green-spider/checks/load_feeds_test.py
Marian Steinbach 618e29d763
Job-Verwaltung mit RQ, und vieles mehr (#149)
* CLI: remove 'jobs' command, add 'manager'

* Add job definition

* Move jobs to manage folder

* Rename jobs to manager

* Add rq and redis dependencies

* Add docker-compose YAML

* Downgrade to alpine 3.8

* Adjust paths in Dockerfile, remove entrypoint

* Rename 'make spiderjobs' to 'make jobs'

* Fix docker exectution

* Adapt 'make jobs'

* Fix metadata scheme

* Add docker dependency

* Rendomize queue (a bit)

* Use latest image, remove debug output

* Make docker-compose file downwards-compatible

* Use latest instead of dev image tag

* Update docker-compose.yaml

* Adapt job start script

* Fix redis connection in manager

* Add support for increasing timeout via environment variable

* Adapt load_in_browser to cookies table schema change

* Fix execution

* Mitigate yaml warning

* Bump some dependency versions

* Report resource usage stats for each job

* checks/load_in_browser: Return DOM size, prevent multiple page loads

* Update .dockerignore

* Code update

* Script update

* Update README.md

* WIP

* WIP commit

* Update Dockerfile to alpine:edge and chromium v90

* Update TestCertificateChecker

* Set defaults for __init__ function

* Detect sunflower theme

* Update unit test for new datetime (zero-basing)

* Set logging prefs from Chromium in a new way

* Move datastore client instantiation

As it is not needed for all commands

* Change green-directory repository URL

* Add git settings for cloning green-directory

* Pin alpine version 3.14, fix py3-cryptography

* Use plain docker build progress output

* Add volumes to 'make test' docker run command

* Fix bug

* Update example command in README

* Update dependencies

* Add creation of Kubernetes jobs
2021-11-11 20:15:43 +01:00

181 lines
6.3 KiB
Python

import httpretty
from httpretty import httprettified
import unittest
from checks import html_head, page_content
from checks import load_feeds
from checks.config import Config
from datetime import datetime
from pprint import pprint
@httprettified
class TestFeed(unittest.TestCase):
def test_feed_rss2(self):
"""
Checks RSS 2.0
"""
feed = """<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
</item>
</channel>
</rss>
"""
feed_url = 'http://example.com/feed.xml'
httpretty.register_uri(httpretty.GET, feed_url,
body=feed,
adding_headers={
"Content-type": "application/rss+xml",
})
# mocking a previous result from some page
results = {
'html_head': {
'http://example.com/': {
'link_rss_atom': ['http://example.com/feed.xml']
}
}
}
config = Config(urls=['http://example.com/'])
checker = load_feeds.Checker(config=config, previous_results=results)
result = checker.run()
pprint(result)
self.assertEqual(result['http://example.com/feed.xml'], {
'exception': None,
'average_interval': 340359,
'first_entry': datetime(2003, 5, 30, 11, 6, 42),
'latest_entry': datetime(2003, 6, 3, 9, 39, 21),
'num_entries': 2,
'title': 'Liftoff News',
})
def test_empty_feed_rss2(self):
"""
Checks RSS 2.0
"""
feed = """<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Empty Feed</title>
<link>http://example.com/</link>
<pubDate></pubDate>
</channel>
</rss>
"""
feed_url = 'http://example.com/feed.xml'
httpretty.register_uri(httpretty.GET, feed_url,
body=feed,
adding_headers={
"Content-type": "application/rss+xml",
})
# mocking a previous result from some page
results = {
'html_head': {
'http://example.com/': {
'link_rss_atom': ['http://example.com/feed.xml']
}
}
}
config = Config(urls=['http://example.com/'])
checker = load_feeds.Checker(config=config, previous_results=results)
result = checker.run()
pprint(result)
self.assertEqual(result, {
'http://example.com/feed.xml': {
'exception': None,
'title': 'Empty Feed',
'latest_entry': None,
'first_entry': None,
'average_interval': None,
'num_entries': 0,
}
})
def test_feed_rss2_without_dates(self):
"""
Checks RSS 2.0
"""
feed = """<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
</item>
</channel>
</rss>
"""
feed_url = 'http://example.com/feed.xml'
httpretty.register_uri(httpretty.GET, feed_url,
body=feed,
adding_headers={
"Content-type": "application/rss+xml",
})
# mocking a previous result from some page
results = {
'html_head': {
'http://example.com/': {
'link_rss_atom': ['http://example.com/feed.xml']
}
}
}
config = Config(urls=['http://example.com/'])
checker = load_feeds.Checker(config=config, previous_results=results)
result = checker.run()
pprint(result)
self.assertEqual(result, {
'http://example.com/feed.xml': {
'exception': None,
'title': 'Liftoff News',
'latest_entry': None,
'first_entry': None,
'average_interval': None,
'num_entries': 2,
}
})
if __name__ == '__main__':
unittest.main()