From 75c30c3e55081cae4a5bdf7d5e2a280b31842b96 Mon Sep 17 00:00:00 2001 From: Daniil Lapezo Date: Sun, 10 Nov 2019 20:15:28 +0300 Subject: [PATCH 1/3] Added rss-reader.py --- rss-reader.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 rss-reader.py diff --git a/rss-reader.py b/rss-reader.py new file mode 100644 index 0000000..e69de29 From a88fb405886801eb94be7fde17e5324def63f5ad Mon Sep 17 00:00:00 2001 From: Daniil Lapezo Date: Tue, 26 Nov 2019 17:45:17 +0300 Subject: [PATCH 2/3] Added argparser funtionality and implemented feed reader --- .gitignore | 2 +- rss-reader.py | 0 rss_feed.py | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++ rss_reader.py | 26 ++++++++++++ 4 files changed, 134 insertions(+), 1 deletion(-) delete mode 100644 rss-reader.py create mode 100644 rss_feed.py create mode 100644 rss_reader.py diff --git a/.gitignore b/.gitignore index 894a44c..ef81fd7 100644 --- a/.gitignore +++ b/.gitignore @@ -99,6 +99,6 @@ venv.bak/ # mkdocs documentation /site - +.idea # mypy .mypy_cache/ diff --git a/rss-reader.py b/rss-reader.py deleted file mode 100644 index e69de29..0000000 diff --git a/rss_feed.py b/rss_feed.py new file mode 100644 index 0000000..8b37c22 --- /dev/null +++ b/rss_feed.py @@ -0,0 +1,107 @@ +from bs4 import BeautifulSoup as BSoup +from urllib.request import Request +from urllib.request import urlopen +import re +import logging +import json +from rss_reader import argparser, get_rss + + +""" List of selected data""" +args = argparser() +soup = BSoup(get_rss(args.source), "xml") +title = [] +date = [] +description = [] +link = [] +image_link = [] + + +def get_title(item): + """Getting Title""" + titles = item.find('title').string + title.append(titles.replace("'", "'").replace(""", "")) + logging.info('Get title success') + + +def get_date(item): + """Getting Date""" + date.append(item.find('pubDate').string) + logging.info('Get date success') + + +def get_description(item): + """Getting Description""" + descript = item.find('description').string + descript = descript.replace("'", "'").replace(""", "").replace(">", "").replace("  ", "\n") + """Formatting Description Text""" + description.append(re.sub('<.*?>', '', descript)) + logging.info('Get description success') + + +def get_link(item): + """Getting Link""" + link.append(item.find('link').string) + logging.info('Get link source success') + media_link = item.find_all('media:content') + images = [] # List of image links for one item + for img_link in media_link: + if (img_link.get('type') == 'image/jpeg') or (not img_link.get('type')): + images.append(img_link.get('url')) + image_link.append(images) + logging.info('Get image link success') + + +def get_feed(): + """Getting Feed """ + logging.info("Limit is: (%s) " % str(args.limit)) + logging.info("Find tags in feed.") + items = soup.find_all('item', limit=args.limit) + for item in items: + get_link(item) + get_date(item) + get_description(item) + get_title(item) + else: + logging.info("All goods:)") + + +def print_news(): + """Outputs News""" + feed = soup.title.string + print("\nFeed: " + feed + "\n") + for number in range(0, len(title)): + print('Title: ' + title[number]) + print('Date: ' + date[number]) + print('Link: ' + link[number]) + print('\nNews: ' + description[number]) + if image_link[number]: + print('\nImage link: ') + print('\n'.join(image_link[number])) + print('\n\n') + else: + logging.info(' Feed #' + str(number) + ' doesn\'t has image') + print('\nImage link: None\n\n') + logging.info("All news are printed") + + +def get_news(): + """Getting News""" + logging.info("Start parsing feeds") + get_feed() + feed_data = soup.title.string + print("\nFeed: " + feed_data + "\n") + print_news() + + +def main(): + open('logger.log', 'w').close() + if argparser().verbose: + logging.basicConfig(format=u'%(filename)s[LINE:%(lineno)d]# %(levelname)-8s [%(asctime)s] %(message)s', + level=logging.DEBUG, filename='logger.log') + + +get_news() + +if __name__ == '__main__': + main() diff --git a/rss_reader.py b/rss_reader.py new file mode 100644 index 0000000..9666833 --- /dev/null +++ b/rss_reader.py @@ -0,0 +1,26 @@ +import argparse +import logging +from urllib.request import Request +from urllib.request import urlopen + + +def argparser(): + """Set argparser logic""" + parser = argparse.ArgumentParser(description='base starter') + parser.add_argument('source', action='store', help='Rss url authentication', type=str) + parser.add_argument('--json', action='store_true', help='outputs result as JSON') + parser.add_argument('--version', action='version', version='1.0', help='print version information') + parser.add_argument('--verbose', action='store_true', help='outputs verbose status information') + parser.add_argument('--limit', action='store', help='limits the number of topics', type=int, default=1) + return parser.parse_args() + + +def get_rss(url): + logging.info('URL opened for news reading: %s' % url) + request = Request(url) + logging.info('Read our request') + rss = urlopen(request).read() + return rss + +argparser() + From 6be812c80c342d80d797e71ae5beae6956c070cb Mon Sep 17 00:00:00 2001 From: Daniil Lapezo Date: Tue, 26 Nov 2019 17:53:55 +0300 Subject: [PATCH 3/3] Completed Iteration 1 --- rss_feed.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/rss_feed.py b/rss_feed.py index 8b37c22..a93a9d6 100644 --- a/rss_feed.py +++ b/rss_feed.py @@ -6,7 +6,6 @@ import json from rss_reader import argparser, get_rss - """ List of selected data""" args = argparser() soup = BSoup(get_rss(args.source), "xml") @@ -94,11 +93,22 @@ def get_news(): print_news() +def json(): + print(json.dumps({'title': soup.find('title').string, + 'news': [{'Title': title[number], + 'Date': date[number], + 'Link': link[number], + 'Feed': description[number], + 'Image link': image_link[number] + } for number in range(0, len(title))]}, ensure_ascii=False, indent=4)) + + def main(): open('logger.log', 'w').close() if argparser().verbose: logging.basicConfig(format=u'%(filename)s[LINE:%(lineno)d]# %(levelname)-8s [%(asctime)s] %(message)s', - level=logging.DEBUG, filename='logger.log') + level=logging.DEBUG, + filename='logger.log') get_news()