mirror of
https://github.com/netzbegruenung/green-spider.git
synced 2024-05-05 10:33:39 +02:00
Marian Steinbach
618e29d763
* CLI: remove 'jobs' command, add 'manager' * Add job definition * Move jobs to manage folder * Rename jobs to manager * Add rq and redis dependencies * Add docker-compose YAML * Downgrade to alpine 3.8 * Adjust paths in Dockerfile, remove entrypoint * Rename 'make spiderjobs' to 'make jobs' * Fix docker exectution * Adapt 'make jobs' * Fix metadata scheme * Add docker dependency * Rendomize queue (a bit) * Use latest image, remove debug output * Make docker-compose file downwards-compatible * Use latest instead of dev image tag * Update docker-compose.yaml * Adapt job start script * Fix redis connection in manager * Add support for increasing timeout via environment variable * Adapt load_in_browser to cookies table schema change * Fix execution * Mitigate yaml warning * Bump some dependency versions * Report resource usage stats for each job * checks/load_in_browser: Return DOM size, prevent multiple page loads * Update .dockerignore * Code update * Script update * Update README.md * WIP * WIP commit * Update Dockerfile to alpine:edge and chromium v90 * Update TestCertificateChecker * Set defaults for __init__ function * Detect sunflower theme * Update unit test for new datetime (zero-basing) * Set logging prefs from Chromium in a new way * Move datastore client instantiation As it is not needed for all commands * Change green-directory repository URL * Add git settings for cloning green-directory * Pin alpine version 3.14, fix py3-cryptography * Use plain docker build progress output * Add volumes to 'make test' docker run command * Fix bug * Update example command in README * Update dependencies * Add creation of Kubernetes jobs
181 lines
6.3 KiB
Python
181 lines
6.3 KiB
Python
import httpretty
|
|
from httpretty import httprettified
|
|
import unittest
|
|
|
|
from checks import html_head, page_content
|
|
from checks import load_feeds
|
|
from checks.config import Config
|
|
from datetime import datetime
|
|
|
|
from pprint import pprint
|
|
|
|
@httprettified
|
|
class TestFeed(unittest.TestCase):
|
|
|
|
def test_feed_rss2(self):
|
|
"""
|
|
Checks RSS 2.0
|
|
"""
|
|
|
|
feed = """<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>Liftoff News</title>
|
|
<link>http://liftoff.msfc.nasa.gov/</link>
|
|
<description>Liftoff to Space Exploration.</description>
|
|
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
|
<item>
|
|
<title>Star City</title>
|
|
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
|
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
|
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
|
</item>
|
|
<item>
|
|
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
|
|
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
|
|
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
|
|
</item>
|
|
</channel>
|
|
</rss>
|
|
"""
|
|
|
|
feed_url = 'http://example.com/feed.xml'
|
|
httpretty.register_uri(httpretty.GET, feed_url,
|
|
body=feed,
|
|
adding_headers={
|
|
"Content-type": "application/rss+xml",
|
|
})
|
|
|
|
# mocking a previous result from some page
|
|
results = {
|
|
'html_head': {
|
|
'http://example.com/': {
|
|
'link_rss_atom': ['http://example.com/feed.xml']
|
|
}
|
|
}
|
|
}
|
|
config = Config(urls=['http://example.com/'])
|
|
checker = load_feeds.Checker(config=config, previous_results=results)
|
|
|
|
result = checker.run()
|
|
pprint(result)
|
|
|
|
self.assertEqual(result['http://example.com/feed.xml'], {
|
|
'exception': None,
|
|
'average_interval': 340359,
|
|
'first_entry': datetime(2003, 5, 30, 11, 6, 42),
|
|
'latest_entry': datetime(2003, 6, 3, 9, 39, 21),
|
|
'num_entries': 2,
|
|
'title': 'Liftoff News',
|
|
})
|
|
|
|
|
|
def test_empty_feed_rss2(self):
|
|
"""
|
|
Checks RSS 2.0
|
|
"""
|
|
|
|
feed = """<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>Empty Feed</title>
|
|
<link>http://example.com/</link>
|
|
<pubDate></pubDate>
|
|
</channel>
|
|
</rss>
|
|
"""
|
|
|
|
feed_url = 'http://example.com/feed.xml'
|
|
httpretty.register_uri(httpretty.GET, feed_url,
|
|
body=feed,
|
|
adding_headers={
|
|
"Content-type": "application/rss+xml",
|
|
})
|
|
|
|
# mocking a previous result from some page
|
|
results = {
|
|
'html_head': {
|
|
'http://example.com/': {
|
|
'link_rss_atom': ['http://example.com/feed.xml']
|
|
}
|
|
}
|
|
}
|
|
config = Config(urls=['http://example.com/'])
|
|
checker = load_feeds.Checker(config=config, previous_results=results)
|
|
|
|
result = checker.run()
|
|
pprint(result)
|
|
|
|
self.assertEqual(result, {
|
|
'http://example.com/feed.xml': {
|
|
'exception': None,
|
|
'title': 'Empty Feed',
|
|
'latest_entry': None,
|
|
'first_entry': None,
|
|
'average_interval': None,
|
|
'num_entries': 0,
|
|
}
|
|
})
|
|
|
|
|
|
def test_feed_rss2_without_dates(self):
|
|
"""
|
|
Checks RSS 2.0
|
|
"""
|
|
|
|
feed = """<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>Liftoff News</title>
|
|
<link>http://liftoff.msfc.nasa.gov/</link>
|
|
<description>Liftoff to Space Exploration.</description>
|
|
<item>
|
|
<title>Star City</title>
|
|
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
|
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
|
</item>
|
|
<item>
|
|
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
|
|
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
|
|
</item>
|
|
</channel>
|
|
</rss>
|
|
"""
|
|
|
|
feed_url = 'http://example.com/feed.xml'
|
|
httpretty.register_uri(httpretty.GET, feed_url,
|
|
body=feed,
|
|
adding_headers={
|
|
"Content-type": "application/rss+xml",
|
|
})
|
|
|
|
# mocking a previous result from some page
|
|
results = {
|
|
'html_head': {
|
|
'http://example.com/': {
|
|
'link_rss_atom': ['http://example.com/feed.xml']
|
|
}
|
|
}
|
|
}
|
|
config = Config(urls=['http://example.com/'])
|
|
checker = load_feeds.Checker(config=config, previous_results=results)
|
|
|
|
result = checker.run()
|
|
pprint(result)
|
|
|
|
self.assertEqual(result, {
|
|
'http://example.com/feed.xml': {
|
|
'exception': None,
|
|
'title': 'Liftoff News',
|
|
'latest_entry': None,
|
|
'first_entry': None,
|
|
'average_interval': None,
|
|
'num_entries': 2,
|
|
}
|
|
})
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|