diff --git a/.gitignore b/.gitignore index 894a44c..94a37c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,7 @@ -# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class - -# C extensions *.so - -# Distribution / packaging .Python build/ develop-eggs/ @@ -24,18 +19,10 @@ wheels/ .installed.cfg *.egg MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec - -# Installer logs pip-log.txt pip-delete-this-directory.txt - -# Unit test / coverage reports htmlcov/ .tox/ .coverage @@ -46,42 +33,20 @@ coverage.xml *.cover .hypothesis/ .pytest_cache/ - -# Translations *.mo *.pot - -# Django stuff: *.log local_settings.py db.sqlite3 - -# Flask stuff: instance/ .webassets-cache - -# Scrapy stuff: .scrapy - -# Sphinx documentation docs/_build/ - -# PyBuilder target/ - -# Jupyter Notebook .ipynb_checkpoints - -# pyenv .python-version - -# celery beat schedule file celerybeat-schedule - -# SageMath parsed files *.sage.py - -# Environments .env .venv env/ @@ -89,16 +54,13 @@ venv/ ENV/ env.bak/ venv.bak/ - -# Spyder project settings .spyderproject .spyproject - -# Rope project settings .ropeproject - -# mkdocs documentation /site - -# mypy .mypy_cache/ +.idea/ +*.sqlite3 +*.html +*.pdf +__cache__/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..d2fe208 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# Welcome +This project was created for the EPAM Python Courses 2019. + +## Installing + +First, this app runs on Python version >=3.8. + +### Download: + +`git clone https://github.com/TeRRoRlsT/PythonHomework.git` + +### Setup: +Go to repository **PythonHomework** and execute the command: + +`python3.8 -m pip install .` + +or + +`pip install .` + +## Running +To view the help for running project go to **PythonHomework/rssreader** folder and execute the command: + +`python3.8 rss_reader.py --help` + +### SQLite3 +This application uses SQLite3 database to cache all downloaded news articles. +If you use '--date YYYYMMDD' the application will load news articles from the DB with the date after the given date. + +## Tests +For run unittest go to **PythonHomework** folder and execute the command: + +`python3.8 -m unittest tests` + + ## Authors +* Sergey Pivovar - BSUIR 2019 \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader/__init__.py b/rssreader/__init__.py new file mode 100644 index 0000000..b2a95f9 --- /dev/null +++ b/rssreader/__init__.py @@ -0,0 +1 @@ +__version__ = "1.2" diff --git a/rssreader/feed_parser.py b/rssreader/feed_parser.py new file mode 100644 index 0000000..9f2b8cf --- /dev/null +++ b/rssreader/feed_parser.py @@ -0,0 +1,346 @@ +import datetime +import html +import logging +from abc import ABC, abstractmethod + +__all__ = ['Parser'] + + +class Tag(ABC): + """ + Abstract class for working with tags as a class structure. + """ + + def __init__(self, **kwargs): + for arg, val in kwargs.items(): + self.__setattr__(arg, val) + + @abstractmethod + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + + @abstractmethod + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in links section + :rtype: str + """ + + @abstractmethod + def __str__(self): + """ + Get string to output tag in the description section. + + :return: string to output tag in the description section + """ + + +class A(Tag): + """ + Class for work with tag `a` (link) as a class struct. + """ + href = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[link {}]" + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.href + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.href} (link)" + + +class Img(Tag): + """ + Class for work with tag img (image) as a class struct. + """ + src = None + alt = None + width = None + height = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[Image {}: %s] " % self.alt + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.src + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.src} (image)" + + +class HTMLParser: + """ + A class for parse news articles from response struct of module "feedparser". + Methods return JSON format of news articles or dict with info about given article. + """ + _table = { + 'a': A, + 'img': Img, + } + + def __init__(self): + self._tags = [] + + def parse(self, response, limit): + """ + A method of parsing news articles and creating object models for easy access. + + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int + :return: return a dict {'title': str, 'articles': list). + Title is header of RSS Source. + Articles is a list of dicts with articles info which was created from parsed feeds + :rtype: dict + """ + logging.info("Getting list of limited articles") + raw_articles = self._get_limited_articles(response, limit) + + logging.info("Completed. Converting each article to dict") + nice_articles = [self._article_to_dict(article) for article in raw_articles] + + logging.info("Completed. Clear articles from HTML escapes") + articles = [self._clear_from_html(article) for article in nice_articles] + + logging.info("Getting a RSS source title") + title = response['feed']['title'] + + return {'title': title, 'articles': articles} + + def _clear_from_html(self, elem): + """ + Method to clear html escapes from all fields of article. + + :param elem: article to clear from HTML escapes + :return: clean article + """ + if type(elem) == str: + return html.unescape(elem) + elif type(elem) == dict: + return {self._clear_from_html(k): self._clear_from_html(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clear_from_html(el) for el in elem] + else: + return elem + + @staticmethod + def _get_limited_articles(response, limit): + """ + Method of limiting parsing articles from response struct. + If limit is None return articles given length, else return all available articles. + + :param response: response struct for parse + :param limit: limit of output news articles + :type response: dict + :type limit: int or None + :return: news articles of limited length + :rtype: dict + """ + result = response['entries'] + if limit is not None: + logging.info(f"Completed. Loaded {min(limit, len(result))} articles with limit {limit}") + return result[0:min(limit, len(result))] + else: + logging.info(f"Completed. Loaded {len(result)} articles without any limit") + return result + + @staticmethod + def _get_next_tag(line): + """ + Method for getting startpos and endpos of tag in given string line. + + :param line: line with html tag + :type line: str + :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None + :rtype: tuple or None + """ + if (startpos := line.find('<')) != -1 and (endpos := line.find('>')) != -1: + return startpos, endpos + 1 + else: + return None + + def _create_tag(self, params): + """ + Method for creating Tag struct class from params. + + :param params: info for creating tag + :type params: dict + :return: tag object if creating was successful, else None + :rtype: Tag or None + """ + try: + tag_type = next(iter(params)) + params.pop(tag_type) + return self._table[tag_type](**params) + except KeyError: + return None + + def _get_params_from_line(self, tag_line): + """ + Method for getting all parameters from html tag string line. + If parameter have a value params save value. Else value is True. + + :param tag_line: line with tag parameters + :type tag_line: str + :return: dict with parsed parameters + :rtype: dict + """ + params = {} + tag_line = tag_line.strip('<>') + strings, tag_line = self._get_all_strings(tag_line) + words = tag_line.split() + for param in words: + pair = param.split('=') + if len(pair) == 1: + params.update({pair[0]: True}) + else: + params.update({pair[0]: strings.pop(0)}) + + return params + + @staticmethod + def _get_all_strings(tag_line): + """ + Method of cutting all string in quotes \"...\". + + :param tag_line: line with tag info and strings + :type tag_line: str + :return: tuple (strings, tag_line). + strings is a list with all cutting strings. + tag_line is a given string parameter without cutting strings + :rtype: tuple + """ + strings = [] + while (start_ind := tag_line.find('"')) != -1: + end_ind = tag_line.find('"', start_ind + 1) + 1 + strings.append(tag_line[start_ind + 1: end_ind - 1]) + tag_line = tag_line[:start_ind] + tag_line[end_ind:] + return strings, tag_line + + def _process_description(self, desc, fill_desc=True, fill_links=True): + """ + Method processing description. Return description of specific format. + + :param desc: description of news article with useless info and tags + :type desc: str + :return: tuple (description, links). + description is description without useless info and tags. With inserts links or not. + links is list with formatted strings with links from all created tag objects + :rtype: tuple + """ + self._tags.clear() + index_of_tag = 1 + links = [] + while (pos_tag := self._get_next_tag(desc)) is not None: + first_quotes, last_quotes = pos_tag + full_tag_line = desc[first_quotes: last_quotes] + parameters = self._get_params_from_line(full_tag_line) + obj_tag = self._create_tag(parameters) + if obj_tag is not None: + self._tags.append(obj_tag) + if fill_desc: + desc = desc[:first_quotes] + str(obj_tag).format(index_of_tag) + desc[last_quotes:] + else: + desc = desc[:first_quotes] + desc[last_quotes:] + if fill_links: + links.append(obj_tag.format_link(index_of_tag)) + else: + links.append(obj_tag.link()) + index_of_tag += 1 + else: + desc = desc[:first_quotes] + desc[last_quotes:] + + return desc, links + + def _article_to_dict(self, article): + """ + Method for converting article info into dict of specific format. + + :param article: article for converting into dict of specific format + :type article: dict + :return: dict of specific format + :rtype: dict + """ + + dec_description, dec_links = self._process_description(article['description']) + description, links = self._process_description(article['description'], False, False) + + images = [obj for obj in self._tags if isinstance(obj, Img)] + + media = [ + {"src": image.src, + "alt": image.alt, + "width": image.width, + "height": image.height} for image in images + ] + + try: + date = datetime.datetime(*article['published_parsed'][:6]).strftime("%a, %d %b %Y %H:%M") + except (AttributeError, ValueError): + date = 'None' + + result = { + 'title': article['title'], + 'description': description, + 'dec_description': dec_description, + 'link': article['link'], + 'pubDate': date, + 'media': media, + 'links': links, + 'dec_links': dec_links, + } + + return result + + +Parser = HTMLParser() diff --git a/rssreader/feed_parser_tests.py b/rssreader/feed_parser_tests.py new file mode 100644 index 0000000..6ecf1d2 --- /dev/null +++ b/rssreader/feed_parser_tests.py @@ -0,0 +1,306 @@ +import html +import time +from unittest import TestCase, main + +from rssreader.feed_parser import A, Img, HTMLParser + +__all__ = ['TestA', 'TestImg', 'TestHTMLParser'] + + +def equals_classes_a(obj1: A, obj2: A): + return obj1.href == obj2.href + + +def equals_classes_img(obj1: Img, obj2: Img): + return obj1.src == obj2.src and \ + obj1.alt == obj2.alt and \ + obj1.width == obj2.width and \ + obj1.height == obj2.height + + +class TestImg(TestCase): + def setUp(self): + self.fields = { + 'src': 'some_src', + 'alt': 'some_alt', + 'style': 'some_style', + 'width': 'some_width', + 'height': 'some_height', + } + self.img = Img(**self.fields) + + def test_create_obj(self): + self.assertTrue(issubclass(type(self.img), Img)) + + self.assertFalse(self.img is Img) + + def test_creating_fields(self): + self.assertEqual(self.img.src, self.fields['src']) + + self.assertEqual(self.img.alt, self.fields['alt']) + + self.assertEqual(self.img.width, self.fields['width']) + + self.assertEqual(self.img.height, self.fields['height']) + + def test_method_link(self): + self.assertEqual(self.img.link(), self.img.src) + + self.assertEqual(self.img.link(), self.fields['src']) + + self.assertNotEqual(self.img.link(), 'src') + + def test_method_format_link(self): + ind = 1 + self.assertEqual(self.img.format_link(ind), f"[{ind}]: {self.img.src} (image)") + + self.assertEqual(self.img.format_link(2), "[2]: some_src (image)") + + self.assertNotEqual(self.img.format_link(3), "[1]: some_src (image)") + + def test_method_str(self): + self.assertEqual(str(self.img), "[Image {}: %s] " % self.img.alt) + + self.assertNotEqual(str(self.img), "[Image {}: {}]") + + +class TestA(TestCase): + def setUp(self): + self.fields = { + 'href': 'some_href', + 'style': 'some_style', + 'align': 'some_align', + } + self.a = A(**self.fields) + + def test_create_obj(self): + self.assertTrue(issubclass(type(self.a), A)) + + self.assertFalse(self.a is A) + + def test_creating_fields(self): + self.assertEqual(self.a.href, self.fields['href']) + + self.assertNotEqual(self.a.href, 'href') + + self.assertEqual(self.a.style, self.fields['style']) + + def test_method_link(self): + self.assertEqual(self.a.link(), self.a.href) + + self.assertEqual(self.a.link(), self.fields['href']) + + self.assertNotEqual(self.a.link(), 'href') + + def test_method_format_link(self): + ind = 1 + self.assertEqual(self.a.format_link(ind), f"[{ind}]: {self.a.href} (link)") + + self.assertEqual(self.a.format_link(2), "[2]: some_href (link)") + + self.assertNotEqual(self.a.format_link(3), "[1]: some_href (link)") + + def test_method_str(self): + self.assertEqual(str(self.a), "[link {}]") + + self.assertNotEqual(str(self.a), "[link ]") + + +class TestHTMLParser(TestCase): + def setUp(self): + date = time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)) + self.response = { + 'feed': { + 'title': 'Yahoo News - Latest News & Headlines', + }, + 'entries': [{ + 'title': 'Some title', + 'description': '

Alt of image 2Some long description


', + 'link': 'some long link', + 'published_parsed': date, + }] + } + self.article = { + 'title': 'Some title', + 'description': '

Alt of image 2Some long description


', + 'link': 'some long link', + 'published_parsed': date, + } + self.article_parsed = { + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + } + self.response_parsed = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + self.parser = HTMLParser() + + def test_parse(self): + self.assertEqual(self.parser.parse(self.response, 1), self.response_parsed) + + def test_clear_from_html(self): + string = 'some_string' + self.assertEqual(self.parser._clear_from_html(html.escape(string)), string) + + dict_with_html_string = { + 1: html.escape('st&ri`ng'), + 'a': html.escape('s"tr>>i@ng\''), + html.escape('s"tr>>i@ng\''): html.escape('s"tr>>i@ng\'') + } + result = { + 1: 'st&ri`ng', + 'a': 's"tr>>i@ng\'', + 's"tr>>i@ng\'': 's"tr>>i@ng\'' + } + self.assertEqual(self.parser._clear_from_html(dict_with_html_string), result) + + list_with_html_string = [ + 1, + html.escape('st&ri`ng'), + 'a', + html.escape('s"tr>>i@ng\'') + ] + result = [ + 1, + 'st&ri`ng', + 'a', + 's"tr>>i@ng\'', + ] + self.assertEqual(self.parser._clear_from_html(list_with_html_string), result) + + def test_get_limited_articles(self): + self.assertEqual(self.parser._get_limited_articles(self.response, 1), [self.article]) + + def test_get_next_tag(self): + line_with_tags = '' + self.assertEqual(self.parser._get_next_tag(line_with_tags), (0, len(line_with_tags))) + + line_with_tags = 'some_alt' + self.assertEqual(self.parser._get_next_tag(line_with_tags), (0, len(line_with_tags))) + + line_with_tags = '" \ + f"" \ + f"" \ + f"" \ + f"RSS Feeds" \ + f"" \ + f"" \ + f"

{articles['title']}

" \ + f"{''.join([self._print_article(art) for art in articles['articles']])}" \ + f"" \ + f"" + + with open(print_to, 'w') as file: + file.write(html_text) + + def _print_article(self, article, **kwargs): + """ + Method for output one article use HTML. + + :param article: article to output + :param kwargs: optional params. Use to extend a count given params in base method + :type article: dict + :return: string with html version of given article + :rtype: str + """ + result = "
" \ + "

" + result += '' \ + '{} (Link to original)' \ + '' \ + '

'.format(article['link'], article['title']) + for image in article['media']: + attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()]) + result += "
" \ + "".format(attrs) + result += f'

' \ + f'Published: {article["pubDate"]}' \ + f'

' + result += f'

' \ + f'{article["description"]}' \ + f'

' \ + f'
' \ + f'

' \ + f'Links:' \ + f'

' + for i in range(len(article['dec_links'])): + result += f'' \ + f'{article["dec_links"][i]}' \ + f'' \ + f'
' + result += "
" + return result + + +class OutputController: + """ + Main OutputController class such working with all controllers in this module. + """ + + @staticmethod + def print(articles, to_pdf=None, to_html=None, to_json=False, colorize=False): + """ + Method for the choice and run procedure of output given articles. + The output method depends on a given parameters. + HTML output - to_html = 'filename' + PDF output - to_pdf = 'filename' + JSON output - to_json = True + Default start sample output. + + :param articles: articles for output + :param to_pdf: filename for output in PDF + :param to_html: filename for output using HTML + :param to_json: Print given articles in JSON format + :param colorize: Print result in colorized mode + :type articles: dict + :type to_pdf: str + :type to_html: str + :type to_json: bool + :type colorize: bool + """ + if to_html is not None: + HTMLPrintController().print_to(articles, filename=to_html) + if to_pdf is not None: + PDFPrintController().print_to(articles, filename=to_pdf) + + if to_json: + JSONPrintController().print_to(articles) + else: + SamplePrintController().print_to(articles, colorize=colorize) diff --git a/rssreader/output_controller_tests.py b/rssreader/output_controller_tests.py new file mode 100644 index 0000000..ef1c1c1 --- /dev/null +++ b/rssreader/output_controller_tests.py @@ -0,0 +1,201 @@ +import json +import unittest +from unittest.mock import patch, MagicMock + +from rssreader.output_controller import (SamplePrintController, + JSONPrintController, + OutputController) + + +class TestSamplePrintController(unittest.TestCase): + def setUp(self): + self.articles = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + self.printer = SamplePrintController() + + def test_print_to(self): + # without colorize param + self.printer._print_article = MagicMock(return_value=True) + self.printer._print_title = MagicMock(return_value=False) + with patch('builtins.print') as base_print_mock: + self.assertIsNone(self.printer.print_to(self.articles)) + + self.assertEqual(self.printer._print_article.call_count, 1) + + self.printer._print_title.assert_called_with('Yahoo News - Latest News & Headlines', colorize=False) + self.assertEqual(self.printer._print_title.call_count, 1) + + self.assertEqual(base_print_mock.call_count, 0) + + def test_print_to_colorize(self): + # with colorize=True + self.printer._print_article = MagicMock(return_value=True) + self.printer._print_title = MagicMock(return_value=False) + with patch('builtins.print') as base_print_mock: + self.assertIsNone(self.printer.print_to(self.articles, colorize=True)) + + self.assertEqual(self.printer._print_article.call_count, 1) + + self.printer._print_title.assert_called_with('Yahoo News - Latest News & Headlines', colorize=True) + self.assertEqual(self.printer._print_title.call_count, 1) + + self.assertEqual(base_print_mock.call_count, 0) + + def test_print_article(self): + with patch('builtins.print') as base_print_mock: + self.assertIsNone(self.printer._print_article(self.articles['articles'][0], colorize=False)) + + self.assertEqual(base_print_mock.call_count, 4) + + def test_print_article_colorize(self): + with patch('builtins.print') as base_print_mock: + self.assertIsNone(self.printer._print_article(self.articles['articles'][0], colorize=True)) + + self.assertEqual(base_print_mock.call_count, 4) + + def test_print_title(self): + with patch('builtins.print') as base_print_mock: + self.assertIsNone(self.printer._print_title(self.articles['title'], colorize=False)) + + self.assertEqual(base_print_mock.call_count, 1) + + def test_print_title_colorize(self): + with patch('builtins.print') as base_print_mock: + self.assertIsNone(self.printer._print_title(self.articles['title'], colorize=True)) + + self.assertEqual(base_print_mock.call_count, 1) + + +class TestJSONPrintController(unittest.TestCase): + def setUp(self): + self.articles = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + self.printer = JSONPrintController() + + def test_print_to(self): + with patch('json.dumps') as json_mock: + json_mock.return_value = '{"title": "Yahoo News - Latest News & Headlines", "articles": [{"title": "Some title", "description": "Some long description", "dec_description": "[link 1][Image 2: Alt of image 2] Some long description", "link": "some long link", "pubDate": "Tue, 26 Nov 2019 20:53", "media": [{"src": "some long link to source of image 2", "alt": "Alt of image 2", "width": "130", "height": "86"}], "links": ["some long link", "some long link to source of image 2"], "dec_links": ["[1]: some long link (link)", "[2]: some long link to source of image 2 (image)"]}]}' + with patch('builtins.print') as print_mock: + self.assertIsNone(self.printer.print_to(self.articles)) + + self.assertEqual(print_mock.call_count, 1) + self.assertEqual(json_mock.call_count, 1) + + json_mock.assert_called_with(self.articles) + print_mock.assert_called_with(json.dumps(self.articles)) + + +class TestOutputController(unittest.TestCase): + def setUp(self): + self.controller = OutputController() + + self.articles = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + self.filename = 'filename' + + def test_print(self): + # Sample + with patch('rssreader.output_controller.SamplePrintController.print_to') as chosen_printer: + self.assertIsNone(self.controller.print(self.articles)) + + chosen_printer.assert_called_once_with(self.articles, colorize=False) + + with patch('rssreader.output_controller.SamplePrintController.print_to') as chosen_printer: + self.assertIsNone(self.controller.print(self.articles, colorize=True)) + + chosen_printer.assert_called_once_with(self.articles, colorize=True) + + # JSON + with patch('rssreader.output_controller.JSONPrintController.print_to') as chosen_printer: + self.assertIsNone(self.controller.print(self.articles, to_json=True)) + + chosen_printer.assert_called_once_with(self.articles) + + # PDF + with patch('rssreader.output_controller.PDFPrintController.print_to') as chosen_printer: + with patch('rssreader.output_controller.SamplePrintController.print_to') as standard_printer: + self.assertIsNone(self.controller.print(self.articles, to_pdf='filename')) + + standard_printer.assert_called_once_with(self.articles, colorize=False) + chosen_printer.assert_called_once_with(self.articles, filename='filename') + + # HTML + with patch('rssreader.output_controller.HTMLPrintController.print_to') as chosen_printer: + with patch('rssreader.output_controller.SamplePrintController.print_to') as standard_printer: + self.assertIsNone(self.controller.print(self.articles, to_html='filename')) + + standard_printer.assert_called_once_with(self.articles, colorize=False) + chosen_printer.assert_called_once_with(self.articles, filename='filename') + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/rss_reader.py b/rssreader/rss_reader.py new file mode 100644 index 0000000..53f8273 --- /dev/null +++ b/rssreader/rss_reader.py @@ -0,0 +1,136 @@ +""" +Python RSS reader + +Designed to download news from the entered url. + +Opportunities: + * Get version + * Conversion to JSON + * Logging + * Limiting articles + * Caching news feeds in SQLite database + * Converting to PDF and HTML formats + * Print in colorize mode + +For information enter + "python3.8 rss_reader -h" +in terminal to find more information. + +""" +__package__ = 'rssreader' + +import argparse +import datetime +import logging + +import feedparser + +from rssreader.feed_parser import * +from rssreader.output_controller import * +from rssreader.storage import * + + +class RSSReader: + def __call__(self, source, limit, date, **kwargs): + """ + Procedure executing program. Get additional setting parameters and running. + + :param source: URL for downloading news articles + :param limit: limit news topics if this parameter provided + :param date: print cached articles by date + :param kwargs: optional parameter for control behavior of output method. + Use one from this parameters: + * to_json: bool - output in JSON or not + * to_pdf: str - string filename for output + * to_html: str - string filename for output + * colorize: bool - print the result in colorized mode + Default start sample output + :type source: str + :type limit: int or None + :type date: str or None + :type kwargs: dict + """ + if limit and limit < 1: + print(f"Error: Impossible parse 0 and less RSS Feeds") + exit(0) + + if not date: + logging.info("Start loading articles from RSS source") + articles = self._get_articles_from_url(source, limit) + logging.info("Completed. Saving articles in cache") + count = StorageController().save(source, articles['articles'], articles['title']) + logging.info(f"Completed. {count} articles was saved in cache") + else: + logging.info("Start loading from cache") + try: + logging.info("Check date format") + datetime.datetime.strptime(date, "%Y%m%d") + except ValueError: + print(f"Error format date {date}. Need '%Y%m%d'") + exit(0) + logging.info("Date is correct. Start loading by date") + articles = StorageController().load(source, date, limit) + + if len(articles['articles']) < 1: + print(f"No news articles for output") + exit(0) + + logging.info("All articles was successfully loaded") + + OutputController.print(articles, **kwargs) + + @staticmethod + def _get_articles_from_url(source, limit): + logging.info("Completed. Check the availability of URL.") + + if 'status' not in (response := feedparser.parse(source.strip())) or len(response['entries']) == 0: + print(f"Error: Impossible parse RSS Feeds from url '{source}'") + exit(0) + + logging.info("Completed. Check status code of response.") + + if response['status'] in range(200, 300): + logging.info(f"Status code {response['status']}. Getting articles from '{source}' was successful") + else: + print(f"Error connecting with URL '{source.strip()}' with status code {response['status']}.") + exit(0) + + return Parser.parse(response, limit) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('source', action='store', type=str, help='RSS URL') + parser.add_argument('--version', action='store_true', help='Print version info') + parser.add_argument('--json', action='store_true', help='Print result as JSON in stdout') + parser.add_argument('--verbose', action='store_true', help='Outputs verbose status messages') + parser.add_argument('--limit', type=int, help='Limit news topics if this parameter provided') + parser.add_argument('--date', type=str, help='Print cached articles by date') + parser.add_argument('--to-pdf', type=str, help='Print result as PDF in entered file') + parser.add_argument('--to-html', type=str, help='Print result as HTML in entered file') + parser.add_argument('--colorize', action='store_true', help='Print the result of the utility in colorized mode') + + settings = parser.parse_args() + + output = { + 'colorize': settings.colorize, + 'to_json': settings.json, + 'to_pdf': settings.to_pdf, + 'to_html': settings.to_html, + } + + if settings.version: + print(f'RSS Reader {__import__(__package__).__version__}') + + if settings.verbose: + logging.basicConfig(level=logging.INFO) + logging.info("Logging enabled") + + RSSReader()(settings.source, + settings.limit, + settings.date, + **output) + + +if __name__ == '__main__': + main() diff --git a/rssreader/rss_reader_tests.py b/rssreader/rss_reader_tests.py new file mode 100644 index 0000000..514b2a8 --- /dev/null +++ b/rssreader/rss_reader_tests.py @@ -0,0 +1,97 @@ +import time +import unittest +from unittest.mock import patch, MagicMock + +from rssreader.rss_reader import RSSReader + +__all__ = ['TestRSSReader'] + + +class TestRSSReader(unittest.TestCase): + def setUp(self): + date = time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)) + self.reader = RSSReader() + self.source = "https://news.yahoo.com/rss/" + self.response = { + 'feed': { + 'title': 'Yahoo News - Latest News & Headlines', + }, + 'status': 200, + 'entries': [{ + 'title': 'Some title', + 'description': '

Alt of image 2Some long description


', + 'link': 'some long link', + 'published_parsed': date, + }] + } + self.response_parsed = { + 'title': 'Yahoo News - Latest News & Headlines', + 'articles': [{ + 'title': 'Some title', + 'description': 'Some long description', + 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description', + 'link': 'some long link', + 'pubDate': 'Tue, 26 Nov 2019 20:53', + 'media': [{ + 'src': 'some long link to source of image 2', + 'alt': 'Alt of image 2', + 'width': '130', + 'height': '86' + }], + 'links': [ + 'some long link', + 'some long link to source of image 2' + ], + 'dec_links': [ + '[1]: some long link (link)', + '[2]: some long link to source of image 2 (image)' + ] + }] + } + + def test_get_articles_from_url(self): + limit = 1 + with patch('rssreader.feed_parser.Parser.parse') as feed_parser_mock: + feed_parser_mock.return_value = 'Successful' + with patch('feedparser.parse') as feedparser_mock: + feedparser_mock.return_value = self.response + self.assertEqual(self.reader._get_articles_from_url(self.source, limit), 'Successful') + + feedparser_mock.assert_called_with(self.source.strip()) + feed_parser_mock.assert_called_with(self.response, limit) + + def test_call_save(self): + limit = 1 + self.reader._get_articles_from_url = MagicMock(return_value=self.response_parsed) + with patch('rssreader.output_controller.OutputController.print') as print_mock: + + # Time for crutches :) + with patch('rssreader.storage.controller.StorageController.__init__') as crutch: + crutch.return_value = None # remove creating db file + # How to replace a class object on MagicMock without crutch? + + with patch('rssreader.storage.controller.StorageController.save') as storage_mock: + storage_mock.return_value = 1 + self.assertIsNone(self.reader(self.source, limit, None)) + + print_mock.assert_called_with(self.response_parsed) + + def test_call_load(self): + limit = 1 + date = '20191122' + with patch('rssreader.output_controller.OutputController.print') as print_mock: + # Time for crutches :) + with patch('rssreader.storage.controller.StorageController.__init__') as crutch: + crutch.return_value = None # remove creating db file + # How to replace a class object on MagicMock without crutch? + + with patch('rssreader.storage.controller.StorageController.load') as storage_mock: + storage_mock.return_value = self.response_parsed + self.assertIsNone(self.reader(self.source, limit, date)) + + print_mock.assert_called_once_with(self.response_parsed) + storage_mock.assert_called_with(self.source, date, limit) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/__init__.py b/rssreader/storage/__init__.py new file mode 100644 index 0000000..038fb33 --- /dev/null +++ b/rssreader/storage/__init__.py @@ -0,0 +1 @@ +from .controller import * diff --git a/rssreader/storage/controller.py b/rssreader/storage/controller.py new file mode 100644 index 0000000..a318ef9 --- /dev/null +++ b/rssreader/storage/controller.py @@ -0,0 +1,75 @@ +""" +Module with controllers for work with db. + +""" +import logging + +from peewee import PeeweeException + +from .managers import * + +__all__ = ['StorageController'] + + +class StorageController: + """ + Controller for loading and saving articles in database. + """ + + def __init__(self): + try: + DB_HANDLE.connect() + self.articles = ArticleManager() + self.sources = SourceManager() + except PeeweeException as e: + print(e) + return + + def load(self, url, date, limit): + """ + Method for loading limited articles from database + + :param url: source URL for getting articles from db + :param date: date from which need to load articles in string + :param limit: limit of articles for loading + :type url: str + :type date: str + :type limit: int + :return: list of dicts of articles with date after a given date + :rtype: list + """ + + clr_url = url.strip('/\\') + logging.info(f"Start loading articles from storage") + articles = self.sources.get_articles_with_data_from(clr_url, date) + + logging.info(f"Completed. Cutting list of articles") + if limit is not None: + articles['articles'] = [article for i, article in enumerate(articles['articles']) if i < limit] + + logging.info(f"Completed. Convert to dict each article") + articles['articles'] = [article.to_dict() for article in articles['articles']] + + return articles + + def save(self, url, articles, title): + """ + Method for saving parsed articles. + + :param url: string URL of RSS source + :param articles: parsed articles + :param title: title of RSS source + :type url: str + :type articles: list + :type title: str + :return: count of new created articles in db + :rtype: int + """ + clr_url = url.strip('/\\') + + logging.info(f"Getting source model") + source = self.sources.get_or_create(clr_url, title) + + logging.info(f"Completed. Saving articles in chosen source model") + count = self.articles.create_and_return(articles, source) + return count diff --git a/rssreader/storage/controller_tests.py b/rssreader/storage/controller_tests.py new file mode 100644 index 0000000..d67f533 --- /dev/null +++ b/rssreader/storage/controller_tests.py @@ -0,0 +1,14 @@ +import unittest + +from .controller import StorageController + +__all__ = ['StorageController'] + + +class TestStorageController(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/managers/__init__.py b/rssreader/storage/managers/__init__.py new file mode 100644 index 0000000..26d7a36 --- /dev/null +++ b/rssreader/storage/managers/__init__.py @@ -0,0 +1,2 @@ +from .article_manager import ArticleManager, DB_HANDLE +from .source_manager import SourceManager, DB_HANDLE diff --git a/rssreader/storage/managers/article_manager.py b/rssreader/storage/managers/article_manager.py new file mode 100644 index 0000000..766ca8d --- /dev/null +++ b/rssreader/storage/managers/article_manager.py @@ -0,0 +1,26 @@ +""" +Module manager of database model Article. + +""" +from rssreader.storage.models import Article, DB_HANDLE + +__all__ = ['ArticleManager'] + + +class ArticleManager: + def __init__(self): + Article.create_table() + + @staticmethod + def create_and_return(structs, source): + """ + Method for creating articles in list in db. Return count of created objects + + :param structs: list of articles structs + :param source: model Source object of feeds source + :type structs: list + :type source: Source + :return: count of new created objects + :rtype: int + """ + return len([art for struct in structs if (art := Article.from_struct(struct, source)) is not None]) diff --git a/rssreader/storage/managers/article_manager_tests.py b/rssreader/storage/managers/article_manager_tests.py new file mode 100644 index 0000000..481eff3 --- /dev/null +++ b/rssreader/storage/managers/article_manager_tests.py @@ -0,0 +1,14 @@ +import unittest + +from .article_manager import ArticleManager + +__all__ = ['TestArticleManager'] + + +class TestArticleManager(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/managers/source_manager.py b/rssreader/storage/managers/source_manager.py new file mode 100644 index 0000000..03749c1 --- /dev/null +++ b/rssreader/storage/managers/source_manager.py @@ -0,0 +1,51 @@ +""" +Module manager of database model Article. + +""" +import logging + +from rssreader.storage.models import Source, DB_HANDLE + +__all__ = ['SourceManager'] + + +class SourceManager: + def __init__(self): + Source.create_table() + + @staticmethod + def get_or_create(url, title): + """ + Method for safe getting a Source model object. + + :param url: string link for init object + :param title: title of feeds source + :type url: str + :type title: str + :return: Source object. If object with such data is founded return it, + else created new object and return it. + :rtype: Source + """ + return Source.get_or_create(url, title=title) + + @staticmethod + def get_articles_with_data_from(url, date): + """ + Method to getting articles with date after a given date. + + :param url: URL-key for getting Source object + :param date: date for query + :type url: str + :type date: str + :return: dict with title of a rss source and founded articles + :rtype dict + """ + logging.info(f"Getting source model") + source = Source.get_or_create(url) + + logging.info(f"Completed. Getting articles from source") + articles = source.sort_by_date(date) + return { + 'title': source.title, + 'articles': articles, + } diff --git a/rssreader/storage/managers/source_manager_tests.py b/rssreader/storage/managers/source_manager_tests.py new file mode 100644 index 0000000..1fb75cc --- /dev/null +++ b/rssreader/storage/managers/source_manager_tests.py @@ -0,0 +1,14 @@ +import unittest + +from .source_manager import SourceManager + +__all__ = ['TestSourceManager'] + + +class TestSourceManager(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/models.py b/rssreader/storage/models.py new file mode 100644 index 0000000..d971463 --- /dev/null +++ b/rssreader/storage/models.py @@ -0,0 +1,146 @@ +""" +Module with description of models in database. +Used SQLite3 database and peewee module for work with it. + +""" + +import datetime +import json + +import peewee + +__all__ = ['DB_HANDLE', 'Source', 'Article'] + +STORAGE_DATABASE = 'storage.sqlite3' + +DB_HANDLE = peewee.SqliteDatabase(STORAGE_DATABASE) + + +class Source(peewee.Model): + """ + Model for containing rss feed sources in SQLite database. + + Fields: + title: title of article + url: absolute URL to RSS source + """ + title = peewee.TextField(null=True) + url = peewee.TextField(unique=True) + + class Meta: + database = DB_HANDLE + db_table = "sources" + + @classmethod + def get_or_create(cls, url, title=None): + """ + Rewriten base method of safe getting Source model object. + + :param url: string link for init object + :param title: title of feeds source + :type url: str + :type title: str + :return: Source object. If object with such data is founded return it, + else created new object and return it. + :rtype: Source + """ + try: + return super().get(Source.url == url) + except peewee.DoesNotExist: + return cls.create(url=url, title=title) + + def sort_by_date(self, date): + """ + Method for get list of articles with a date after the given date. + + :param date: datetime for searching articles in string + :type date: str + :return: a list with of articles with a date after the given date + :rtype: list + """ + return self.articles.select().where(Article.pubDate >= date) + + +class Article(peewee.Model): + """ + Model for containing rss feed article in SQLite database. + All objects of this model ordered by pubDate field. + + Fields: + title: title of article + description: description of article + dec_description: decorated description of article + link: absolute URL to article + pubDate: date of publication article + media: all media objects from article + source: absolute URL to containing RSS source + links: all links from article without any formatting + dec_links: decorated links from article in special format + """ + title = peewee.TextField() + description = peewee.TextField() + dec_description = peewee.TextField() + link = peewee.CharField(unique=True) + pubDate = peewee.DateTimeField() + media = peewee.TextField() + source = peewee.ForeignKeyField(Source, backref='articles') + links = peewee.TextField() + dec_links = peewee.TextField() + + class Meta: + database = DB_HANDLE + db_table = "articles" + order_by = ('-pubDate',) + + @classmethod + def from_struct(cls, struct, source): + """ + Class method for creating Article model object from given dict. + Object creating with safe load a pub date. If RSS feed have no pub date, + the article will be saved with the date of adding to the db. + + :param struct: dictionary with info about article + :param source: Source object of source feeds. Used for connect sources with articles + :type struct: dict + :type source: Source + :return: return Article object if no objects in db with such link. Else None + :rtype: Article or None + """ + try: + if struct['pubDate'] != 'None': + date = datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M") + else: + date = datetime.datetime.now() + + return cls.create( + title=struct['title'], + description=struct['description'], + dec_description=struct['dec_description'], + link=struct['link'], + pubDate=date, + media=json.dumps(struct['media']), + source=source, + links=json.dumps(struct['links']), + dec_links=json.dumps(struct['dec_links']) + ) + except peewee.IntegrityError: + return None + + def to_dict(self): + """ + Method for converting model objects to dict with all info. + + :return: dict with article info + :rtype: dict + """ + return { + 'title': self.title, + 'description': self.description, + 'dec_description': self.dec_description, + 'link': self.link, + 'pubDate': self.pubDate.strftime("%a, %d %b %Y %H:%M"), + 'media': json.loads(self.media), + 'source': self.source.url, + 'links': json.loads(self.links), + 'dec_links': json.loads(self.dec_links), + } diff --git a/rssreader/storage/models_tests.py b/rssreader/storage/models_tests.py new file mode 100644 index 0000000..79ec14d --- /dev/null +++ b/rssreader/storage/models_tests.py @@ -0,0 +1,19 @@ +import unittest + +from .models import Source, Source + +__all__ = ['TestSourceModel', 'TestArticleModel'] + + +class TestSourceModel(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +class TestArticleModel(unittest.TestCase): + def test_something(self): + self.assertEqual(True, True) + + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/storage/tests.py b/rssreader/storage/tests.py new file mode 100644 index 0000000..40a9623 --- /dev/null +++ b/rssreader/storage/tests.py @@ -0,0 +1,9 @@ +import unittest + +from .models_tests import * +from .controller_tests import * +from .managers.article_manager_tests import * +from .managers.source_manager_tests import * + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader/tests.py b/rssreader/tests.py new file mode 100644 index 0000000..4b2349c --- /dev/null +++ b/rssreader/tests.py @@ -0,0 +1,9 @@ +import unittest + +from .storage.tests import * +from .feed_parser_tests import * +from .output_controller_tests import * +from .rss_reader_tests import * + +if __name__ == '__main__': + unittest.main() diff --git a/rssreader_server/api_v1/__init__.py b/rssreader_server/api_v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader_server/api_v1/admin.py b/rssreader_server/api_v1/admin.py new file mode 100644 index 0000000..0ea393a --- /dev/null +++ b/rssreader_server/api_v1/admin.py @@ -0,0 +1,5 @@ +from django.contrib import admin +from .models import * + +admin.site.register(Source) +admin.site.register(Article) diff --git a/rssreader_server/api_v1/apps.py b/rssreader_server/api_v1/apps.py new file mode 100644 index 0000000..9e01182 --- /dev/null +++ b/rssreader_server/api_v1/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class RssreaderAppConfig(AppConfig): + name = 'api_v1' diff --git a/rssreader_server/api_v1/converters.py b/rssreader_server/api_v1/converters.py new file mode 100644 index 0000000..88ac959 --- /dev/null +++ b/rssreader_server/api_v1/converters.py @@ -0,0 +1,301 @@ +""" +Module for output the result of the utility and printing in different formats. +Recommend use only class OutputController with parameters: + * to_json: bool - output in JSON or not + * to_pdf: str - string filename for output + * to_html: str - string filename for output +Default start sample output. + +""" +import json +import os +from abc import ABC + +from fpdf import FPDF +from requests import get + +__all__ = ['SamplePrintResponseConverter', 'JSONPrintResponseConverter', + 'PDFPrintResponseConverter', 'HTMLPrintResponseConverter'] + + +class BaseResponseConverter(ABC): + cache_folder = '__cache__' + + def print(self, articles, filename, **kwargs): + """ + Procedure for output of news articles. + + :param articles: dict with title and list of news articles + :param filename: name of the file output + :param kwargs: optional params. Use to extend a count given params in base method + :type articles: dict + """ + + def _print_article(self, article, **kwargs): + """ + Method for output given articles in given PDF file. + + :param article: article to output + :param kwargs: optional params. Use to extend a count given params in base method + :type article: dict + """ + + def _print_title(self, title, **kwargs): + """ + Method for output given title. + + :param title: title to output + :param kwargs: optional params. Use to extend a count given params in base method + :type title: str + """ + + +class SamplePrintResponseConverter(BaseResponseConverter): + """ + Class controller for sample output in standard out. + """ + delimiter = "#" * 80 + + def print(self, articles, filename, **kwargs): + """ + Method for output of given articles if given filename. + + :param articles: articles for output in file + :param filename: name of file to output + :return: path to file with result + :rtype: str + """ + if (title := articles.get('title', None)) is not None: + response_result = f"Feed: {title}\n" + + for article in articles['articles']: + response_result += self._print_article(article) + + with open(os.path.join(self.cache_folder, filename), 'w') as file: + file.write(response_result) + + return filename + + def _print_article(self, article, **kwargs): + """ + Method for output articles in PDF format. + + :param article: current dict with article info for output + :type dict + """ + response_result = f"Title: {article['title']}\n" \ + f"Date: {article['pubDate']}\n" \ + f"Link: {article['link']}\n" \ + f"\n" \ + f"{article['dec_description']}\n" \ + f"\n" \ + f"Links:" + + for link in article['dec_links']: + response_result += f"\n{link}" + response_result += f"\n{self.delimiter}" + + return response_result + + +class JSONPrintResponseConverter(BaseResponseConverter): + """ + Class controller for output JSON form of articles in standard out. + """ + + def print(self, articles, filename, **kwargs): + """ + Method for output of given articles if given filename. + + :param articles: articles for output in file + :param filename: name of file to output + :return: path to file with result + :rtype: str + """ + with open(os.path.join(self.cache_folder, filename), 'w') as file: + file.write(json.dumps(articles)) + + return filename + + +class PDFPrintResponseConverter(BaseResponseConverter): + """ + Class controller for output given articles in PDF in file. + """ + image_ext = 'jpg' + cache_folder = "__cache__" + extension = '.pdf' + delimiter_before = "_" * 59 + delimiter_after = "%d".rjust(55, '_').ljust(59, '_') + + def print(self, articles, filename, **kwargs): + """ + Method for output of given articles if given filename. + + :param articles: articles for output in file + :param filename: name of file to output + :return: path to file with result + :rtype: str + """ + writer = FPDF() + writer.add_page() + self._print_title(articles['title'], writer=writer) + + for i, article in enumerate(articles['articles']): + self._print_article(article, writer=writer, ind=i) + + writer.output(os.path.join(self.cache_folder, filename)) + + # os.removedirs(self.cache_folder) + + return filename + + def _print_title(self, title, **kwargs): + """ + Method for output title of RSS Feeds. + + :param title: title of RSS Feed + :rtype: dict + """ + writer = kwargs['writer'] + writer.set_font('Courier', 'B', 20) + writer.multi_cell(0, 30, title, align='C') + + def _print_article(self, article, **kwargs): + """ + Method for output articles in PDF format. + + :param article: current dict with article info for output + :type dict + """ + writer = kwargs['writer'] + ind = kwargs['ind'] + + article = self._clean_each_elem_article(article) + + writer.set_font("Courier", 'B', 15) + writer.multi_cell(0, 10, self.delimiter_before) + + writer.set_font("Courier", "B", 13) + writer.multi_cell(0, 7, f"Title: {article['title']}", align="L") + + writer.set_font("Courier", "BI", 11) + writer.multi_cell(0, 10, f"Date: {article['pubDate']}", align='R') + + for img in article['media']: + self._draw_image(writer, img) + + writer.set_font("Courier", size=12) + writer.multi_cell(0, 5, article['description'], align='L') + + writer.set_font("Courier", "BI", size=9) + writer.multi_cell(0, 10, f"Link: {article['link']}", align='L') + + writer.set_font("Courier", 'B', 15) + writer.multi_cell(0, 10, self.delimiter_after % (ind + 1)) + + def _clean_each_elem_article(self, elem): + """ + Recursive method for cleaning errors with encoding 'latin-1' for output ready text in PDF file. + Go throw all elements of given objects and remove error with encoding 'latin-1'. + + :param elem: current element for checking and removing errors with encoding + :return: recursive call this method if givn object is collection, else string + """ + if type(elem) == str: + return elem.encode('latin-1', 'replace').decode('latin-1') + elif type(elem) == dict: + return {k: self._clean_each_elem_article(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clean_each_elem_article(el) for el in elem] + + def _draw_image(self, writer, image): + """ + Method for draw image in file by given FPDF writer. + + :param writer: FPDF object for drawing in file + :param image: dict with info about image + :type writer: fpdf.FPDF + :type image: dict + """ + try: + image_name = f"{image['src'].split('/')[-1]}.{self.image_ext}" + image_path = self._download_to(image['src'], image_name) + writer.image(image_path, type=self.image_ext, link=image['src'], x=(writer.w - 50) // 2) + except (ValueError, TypeError, RuntimeError): + writer.set_font("Courier", 'B', 10) + writer.multi_cell(0, 3, f"NO IMAGE: {image['alt']}", align='C') + + def _download_to(self, link, filename): + """ + Method for downloading image by link in given file. Return path to downloaded image. + + :param link: link to image + :param filename: name of file, such will be rewriten. + :type link: str + :type filename: str + :return: absolute path to downloaded image + :rtype: str + """ + if not os.path.exists(os.path.join(self.cache_folder)): + os.mkdir(os.path.join(self.cache_folder)) + img_data = get(link).content + ready_image_path = os.path.join(self.cache_folder, filename) + with open(ready_image_path, 'wb') as handler: + handler.write(img_data) + + return ready_image_path + + +class HTMLPrintResponseConverter(BaseResponseConverter): + """ + Class controller for output given articles using HTML in file. + """ + extension = '.html' + + def print(self, articles, filename, **kwargs): + html_text = f"" \ + f"" \ + f"" \ + f"" \ + f"RSS Feeds" \ + f"" \ + f"" \ + f"

{articles['title']}

" \ + f"{''.join([self._print_article(art) for art in articles['articles']])}" \ + f"" \ + f"" + + with open(os.path.join(self.cache_folder, filename), 'w') as file: + file.write(html_text) + + return filename + + def _print_article(self, article, **kwargs): + result = "
" \ + "

" + result += '' \ + '{} (Link to original)' \ + '' \ + '

'.format(article['link'], article['title']) + for image in article['media']: + attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()]) + result += "
" \ + "".format(attrs) + result += f'

' \ + f'Published: {article["pubDate"]}' \ + f'

' + result += f'

' \ + f'{article["description"]}' \ + f'

' \ + f'
' \ + f'

' \ + f'Links:' \ + f'

' + for i in range(len(article['dec_links'])): + result += f'' \ + f'{article["dec_links"][i]}' \ + f'' \ + f'
' + result += "
" + return result diff --git a/rssreader_server/api_v1/feed_parser.py b/rssreader_server/api_v1/feed_parser.py new file mode 100644 index 0000000..9f2b8cf --- /dev/null +++ b/rssreader_server/api_v1/feed_parser.py @@ -0,0 +1,346 @@ +import datetime +import html +import logging +from abc import ABC, abstractmethod + +__all__ = ['Parser'] + + +class Tag(ABC): + """ + Abstract class for working with tags as a class structure. + """ + + def __init__(self, **kwargs): + for arg, val in kwargs.items(): + self.__setattr__(arg, val) + + @abstractmethod + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + + @abstractmethod + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in links section + :rtype: str + """ + + @abstractmethod + def __str__(self): + """ + Get string to output tag in the description section. + + :return: string to output tag in the description section + """ + + +class A(Tag): + """ + Class for work with tag `a` (link) as a class struct. + """ + href = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[link {}]" + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.href + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.href} (link)" + + +class Img(Tag): + """ + Class for work with tag img (image) as a class struct. + """ + src = None + alt = None + width = None + height = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[Image {}: %s] " % self.alt + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.src + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.src} (image)" + + +class HTMLParser: + """ + A class for parse news articles from response struct of module "feedparser". + Methods return JSON format of news articles or dict with info about given article. + """ + _table = { + 'a': A, + 'img': Img, + } + + def __init__(self): + self._tags = [] + + def parse(self, response, limit): + """ + A method of parsing news articles and creating object models for easy access. + + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int + :return: return a dict {'title': str, 'articles': list). + Title is header of RSS Source. + Articles is a list of dicts with articles info which was created from parsed feeds + :rtype: dict + """ + logging.info("Getting list of limited articles") + raw_articles = self._get_limited_articles(response, limit) + + logging.info("Completed. Converting each article to dict") + nice_articles = [self._article_to_dict(article) for article in raw_articles] + + logging.info("Completed. Clear articles from HTML escapes") + articles = [self._clear_from_html(article) for article in nice_articles] + + logging.info("Getting a RSS source title") + title = response['feed']['title'] + + return {'title': title, 'articles': articles} + + def _clear_from_html(self, elem): + """ + Method to clear html escapes from all fields of article. + + :param elem: article to clear from HTML escapes + :return: clean article + """ + if type(elem) == str: + return html.unescape(elem) + elif type(elem) == dict: + return {self._clear_from_html(k): self._clear_from_html(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clear_from_html(el) for el in elem] + else: + return elem + + @staticmethod + def _get_limited_articles(response, limit): + """ + Method of limiting parsing articles from response struct. + If limit is None return articles given length, else return all available articles. + + :param response: response struct for parse + :param limit: limit of output news articles + :type response: dict + :type limit: int or None + :return: news articles of limited length + :rtype: dict + """ + result = response['entries'] + if limit is not None: + logging.info(f"Completed. Loaded {min(limit, len(result))} articles with limit {limit}") + return result[0:min(limit, len(result))] + else: + logging.info(f"Completed. Loaded {len(result)} articles without any limit") + return result + + @staticmethod + def _get_next_tag(line): + """ + Method for getting startpos and endpos of tag in given string line. + + :param line: line with html tag + :type line: str + :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None + :rtype: tuple or None + """ + if (startpos := line.find('<')) != -1 and (endpos := line.find('>')) != -1: + return startpos, endpos + 1 + else: + return None + + def _create_tag(self, params): + """ + Method for creating Tag struct class from params. + + :param params: info for creating tag + :type params: dict + :return: tag object if creating was successful, else None + :rtype: Tag or None + """ + try: + tag_type = next(iter(params)) + params.pop(tag_type) + return self._table[tag_type](**params) + except KeyError: + return None + + def _get_params_from_line(self, tag_line): + """ + Method for getting all parameters from html tag string line. + If parameter have a value params save value. Else value is True. + + :param tag_line: line with tag parameters + :type tag_line: str + :return: dict with parsed parameters + :rtype: dict + """ + params = {} + tag_line = tag_line.strip('<>') + strings, tag_line = self._get_all_strings(tag_line) + words = tag_line.split() + for param in words: + pair = param.split('=') + if len(pair) == 1: + params.update({pair[0]: True}) + else: + params.update({pair[0]: strings.pop(0)}) + + return params + + @staticmethod + def _get_all_strings(tag_line): + """ + Method of cutting all string in quotes \"...\". + + :param tag_line: line with tag info and strings + :type tag_line: str + :return: tuple (strings, tag_line). + strings is a list with all cutting strings. + tag_line is a given string parameter without cutting strings + :rtype: tuple + """ + strings = [] + while (start_ind := tag_line.find('"')) != -1: + end_ind = tag_line.find('"', start_ind + 1) + 1 + strings.append(tag_line[start_ind + 1: end_ind - 1]) + tag_line = tag_line[:start_ind] + tag_line[end_ind:] + return strings, tag_line + + def _process_description(self, desc, fill_desc=True, fill_links=True): + """ + Method processing description. Return description of specific format. + + :param desc: description of news article with useless info and tags + :type desc: str + :return: tuple (description, links). + description is description without useless info and tags. With inserts links or not. + links is list with formatted strings with links from all created tag objects + :rtype: tuple + """ + self._tags.clear() + index_of_tag = 1 + links = [] + while (pos_tag := self._get_next_tag(desc)) is not None: + first_quotes, last_quotes = pos_tag + full_tag_line = desc[first_quotes: last_quotes] + parameters = self._get_params_from_line(full_tag_line) + obj_tag = self._create_tag(parameters) + if obj_tag is not None: + self._tags.append(obj_tag) + if fill_desc: + desc = desc[:first_quotes] + str(obj_tag).format(index_of_tag) + desc[last_quotes:] + else: + desc = desc[:first_quotes] + desc[last_quotes:] + if fill_links: + links.append(obj_tag.format_link(index_of_tag)) + else: + links.append(obj_tag.link()) + index_of_tag += 1 + else: + desc = desc[:first_quotes] + desc[last_quotes:] + + return desc, links + + def _article_to_dict(self, article): + """ + Method for converting article info into dict of specific format. + + :param article: article for converting into dict of specific format + :type article: dict + :return: dict of specific format + :rtype: dict + """ + + dec_description, dec_links = self._process_description(article['description']) + description, links = self._process_description(article['description'], False, False) + + images = [obj for obj in self._tags if isinstance(obj, Img)] + + media = [ + {"src": image.src, + "alt": image.alt, + "width": image.width, + "height": image.height} for image in images + ] + + try: + date = datetime.datetime(*article['published_parsed'][:6]).strftime("%a, %d %b %Y %H:%M") + except (AttributeError, ValueError): + date = 'None' + + result = { + 'title': article['title'], + 'description': description, + 'dec_description': dec_description, + 'link': article['link'], + 'pubDate': date, + 'media': media, + 'links': links, + 'dec_links': dec_links, + } + + return result + + +Parser = HTMLParser() diff --git a/rssreader_server/api_v1/managers/__init__.py b/rssreader_server/api_v1/managers/__init__.py new file mode 100644 index 0000000..ffe7dcc --- /dev/null +++ b/rssreader_server/api_v1/managers/__init__.py @@ -0,0 +1,2 @@ +from .source_manager import SourceManager +from .article_manager import ArticleManager diff --git a/rssreader_server/api_v1/managers/article_manager.py b/rssreader_server/api_v1/managers/article_manager.py new file mode 100644 index 0000000..7d86dea --- /dev/null +++ b/rssreader_server/api_v1/managers/article_manager.py @@ -0,0 +1,19 @@ +from api_v1.models import Article + +__all__ = ['ArticleManager'] + + +class ArticleManager: + @staticmethod + def create_and_return(structs, source): + """ + Method for creating articles in list in db. Return count of created objects + + :param structs: list of articles structs + :param source: model Source object of feeds source + :type structs: list + :type source: Source + :return: count of new created objects + :rtype: int + """ + return len([art for struct in structs if (art := Article.from_struct(struct, source)) is not None]) diff --git a/rssreader_server/api_v1/managers/source_manager.py b/rssreader_server/api_v1/managers/source_manager.py new file mode 100644 index 0000000..4c01f83 --- /dev/null +++ b/rssreader_server/api_v1/managers/source_manager.py @@ -0,0 +1,40 @@ +from api_v1.models import Source + +__all__ = ['SourceManager'] + + +class SourceManager: + @staticmethod + def get_or_create(url, title): + """ + Method for safe getting a Source model object. + + :param url: string link for init object + :param title: title of feeds source + :type url: str + :type title: str + :return: Source object. If object with such data is founded return it, + else created new object and return it. + :rtype: Source + """ + return Source.get_or_create(url, title=title) + + @staticmethod + def get_articles_with_data_from(url, date): + """ + Method to getting articles with date after a given date. + + :param url: URL-key for getting Source object + :param date: date for query + :type url: str + :type date: str + :return: dict with title of a rss source and founded articles + :rtype dict + """ + source = Source.get_or_create(url) + + articles = source.sort_by_date(date) + return { + 'title': source.title, + 'articles': articles, + } diff --git a/rssreader_server/api_v1/migrations/__init__.py b/rssreader_server/api_v1/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader_server/api_v1/models/__init__.py b/rssreader_server/api_v1/models/__init__.py new file mode 100644 index 0000000..2f5afed --- /dev/null +++ b/rssreader_server/api_v1/models/__init__.py @@ -0,0 +1,2 @@ +from .article import Article +from .source import Source diff --git a/rssreader_server/api_v1/models/article.py b/rssreader_server/api_v1/models/article.py new file mode 100644 index 0000000..abb2f2c --- /dev/null +++ b/rssreader_server/api_v1/models/article.py @@ -0,0 +1,54 @@ +import datetime +import json + +from django.db import models, IntegrityError + + +class Article(models.Model): + title = models.TextField() + description = models.TextField() + dec_description = models.TextField() + link = models.TextField(unique=True) + pubDate = models.DateTimeField() + media = models.TextField() + source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='articles') + links = models.TextField() + dec_links = models.TextField() + + class Meta: + ordering = ['pubDate'] + + @classmethod + def from_struct(cls, struct, source): + try: + if struct['pubDate'] != 'None': + date = datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M") + else: + date = datetime.datetime.now() + + return cls.objects.create( + title=struct['title'], + description=struct['description'], + dec_description=struct['dec_description'], + link=struct['link'], + pubDate=date, + media=json.dumps(struct['media']), + source=source, + links=json.dumps(struct['links']), + dec_links=json.dumps(struct['dec_links']) + ) + except IntegrityError: + return None + + def to_dict(self): + return { + 'title': self.title, + 'description': self.description, + 'dec_description': self.dec_description, + 'link': self.link, + 'pubDate': self.pubDate.strftime("%a, %d %b %Y %H:%M"), + 'media': json.loads(self.media), + 'source': self.source.url, + 'links': json.loads(self.links), + 'dec_links': json.loads(self.dec_links), + } diff --git a/rssreader_server/api_v1/models/source.py b/rssreader_server/api_v1/models/source.py new file mode 100644 index 0000000..ebbe1be --- /dev/null +++ b/rssreader_server/api_v1/models/source.py @@ -0,0 +1,19 @@ +from django.db import models + + +class Source(models.Model): + title = models.TextField(null=True) + url = models.TextField(unique=True) + + class Meta: + ordering = ['title', ] + + @classmethod + def get_or_create(cls, url, title=None): + try: + return cls.objects.get(url=url) + except cls.DoesNotExist: + return cls.objects.create(url=url, title=title) + + def sort_by_date(self, date): + return self.articles.filter(pubDate__gte=date) diff --git a/rssreader_server/api_v1/storage_controller.py b/rssreader_server/api_v1/storage_controller.py new file mode 100644 index 0000000..19e77b8 --- /dev/null +++ b/rssreader_server/api_v1/storage_controller.py @@ -0,0 +1,49 @@ +from .managers import ArticleManager, SourceManager + + +class StorageController: + """ + Static controller for loading and saving articles in database. + """ + + @staticmethod + def load(url, date, limit): + """ + Method for loading limited articles from database + + :param url: source URL for getting articles from db + :param date: date from which need to load articles in string + :param limit: limit of articles for loading + :type url: str + :type date: str + :type limit: int + :return: list of dicts of articles with date after a given date + :rtype: list + """ + clr_url = url.strip('/\\') + articles = SourceManager.get_articles_with_data_from(clr_url, date) + + if limit is not None: + articles['articles'] = [article for i, article in enumerate(articles['articles']) if i < limit] + + articles['articles'] = [article.to_dict() for article in articles['articles']] + return articles + + @staticmethod + def save(url, articles, title): + """ + Method for saving parsed articles. + + :param url: string URL of RSS source + :param articles: parsed articles + :param title: title of RSS source + :type url: str + :type articles: list + :type title: str + :return: count of new created articles in db + :rtype: int + """ + clr_url = url.strip('/\\') + source = SourceManager.get_or_create(clr_url, title) + + return ArticleManager.create_and_return(articles, source) diff --git a/rssreader_server/api_v1/urls.py b/rssreader_server/api_v1/urls.py new file mode 100644 index 0000000..a22dfc1 --- /dev/null +++ b/rssreader_server/api_v1/urls.py @@ -0,0 +1,25 @@ +from django.urls import path + +from .views import * + +""" + API v1.0 + + /news/ + * method GET -> return news: + Method GET using for take parameters. + Optional parameters: + * url=URL RSS URL + * limit=LIMIT Limit news topics if this parameter provided + * date=DATE Print cached articles by date + * to_json Print result as JSON in browser + * to-pdf=TO_PDF Print result as PDF in file `TO_PDF` + * to-html=TO_HTML Print result as HTML in file `TO_PDF` + /help/ + * all methods -> return info about + +""" +urlpatterns = [ + path('news/', LoaderNews().download_result), + path('help/', show_help_view) +] diff --git a/rssreader_server/api_v1/view_controller.py b/rssreader_server/api_v1/view_controller.py new file mode 100644 index 0000000..975f8cb --- /dev/null +++ b/rssreader_server/api_v1/view_controller.py @@ -0,0 +1,97 @@ +import os + +from django.http import FileResponse + +from .converters import * + + +class ResponseBuilder: + """ + Base class with base logic of converting and load file with result. + """ + from_folder = '__cache__' + extension = None + converter = None + + def load(self, articles, filename): + """ + Base method of loading file with result of executing program. + + :param articles: dict with articles for converting and output + :param filename: name of file for output + :type articles: dict + :type filename: str + :return: + """ + result_response = self.load_result(articles, filename) + result_response = os.path.join(self.from_folder, result_response) + return FileResponse(open(result_response, 'rb'), + filename=result_response, + content_type='application/txt') + + def load_result(self, articles, filename): + """ + Method of converting result. Return name of file with result. + + :param articles: dict with articles for converting and output + :param filename: name of file for output + :type articles: dict + :type filename: str + :return: name of file for output, if process if successful + :rtype: str + """ + return self.converter().print(articles, filename=self.this_filename(filename)) + + def this_filename(self, filename): + """ + Method for correct name of file for current format. + + :param filename: + :return: + """ + return filename + self.extension if not filename.endswith(self.extension) else filename + + +class PDFResponse(ResponseBuilder): + """ + Class processing request articles to PDF format + """ + extension = '.pdf' + converter = PDFPrintResponseConverter + + +class JSONResponse(ResponseBuilder): + """ + Class processing request articles to JSON format + """ + extension = '.json' + converter = JSONPrintResponseConverter + + +class HTMLResponse(ResponseBuilder): + """ + Class processing request articles to HTML format + """ + extension = '.html' + converter = HTMLPrintResponseConverter + + +class SampleResponse(ResponseBuilder): + """ + Class processing request articles without any format + """ + extension = '.txt' + converter = SamplePrintResponseConverter + + +class ResponseController: + @staticmethod + def load_result_into_file(articles, to_pdf=None, to_html=None, to_json=None, to_sample=None): + if to_html is not None: + return HTMLResponse().load(articles, filename=to_html) + if to_pdf is not None: + return PDFResponse().load(articles, filename=to_pdf) + if to_json is not None: + return JSONResponse().load(articles, filename=to_json) + + return SampleResponse().load(articles, filename=to_sample) diff --git a/rssreader_server/api_v1/views.py b/rssreader_server/api_v1/views.py new file mode 100644 index 0000000..6660dea --- /dev/null +++ b/rssreader_server/api_v1/views.py @@ -0,0 +1,122 @@ +import datetime + +import feedparser +from django.http import HttpResponse, Http404 + +from api_v1.feed_parser import Parser +from api_v1.storage_controller import StorageController +from api_v1.view_controller import ResponseController + + +class LoaderNews: + def download_result(self, request): + """ + Method of process request + + :param request: request struct for processing + :type: dict + :return: response of process URL + :rtype: HttpResponse + """ + if request.method == 'GET': + kwargs = { + 'url': request.GET.get('url', None), + 'date': request.GET.get('date', None), + 'limit': request.GET.get('limit', None), + 'to_pdf': request.GET.get('to_pdf', None), + 'to_json': request.GET.get('to_json', None), + 'to_html': request.GET.get('to_html', None), + } + return self._execute(**kwargs) + else: + return Http404() + + def _execute(self, url, limit, date, **kwargs): + """ + Main method of processing request. + + :param url: URL RSS + :param limit: count of output articles, if given + :param date: datetime, need for load caching from storage + :param kwargs: optional parameters + :type url: str + :type limit: str + :type date: str + :type kwargs: dict + :return: http response of processing request + :rtype: HttpResponse + """ + if limit: + try: + limit = int(limit) + if limit < 1: + return HttpResponse(f"Bad given value limit '{limit}'", status=404) + except (ValueError, TypeError): + return HttpResponse(f"Bad given value limit '{limit}'", status=404) + + if not date: + articles = self._get_articles_from_url(url, limit) + + StorageController.save(url, articles['articles'], articles['title']) + else: + try: + datetime.datetime.strptime(date, "%Y%m%d") + except ValueError: + return HttpResponse(f"Error format date {date}. Need '%Y%m%d'", status=404) + articles = StorageController.load(url, datetime.datetime.strptime(date, "%Y%m%d"), limit) + + return ResponseController.load_result_into_file(articles, + to_html=kwargs.get('to_html', None), + to_json=kwargs.get('to_json', None), + to_pdf=kwargs.get('to_pdf', None), + to_sample=datetime.datetime.now().strftime("%d%m%Y%H%M%S")) + + @staticmethod + def _get_articles_from_url(url, limit): + """ + Method for downloading articles from given URL. + + :param url: RSS URL + :param limit: count of output articles, if given + :type url: str + :type limit: int + :return: dict with parsed articles + :rtype: dict + """ + if 'status' not in (response := feedparser.parse(url.strip())) or len(response['entries']) == 0: + return HttpResponse(f"Error: Impossible parse RSS Feeds from url '{url}'", status=404) + + if response['status'] in range(200, 300): + pass + else: + return HttpResponse(f"Error connecting with URL '{url.strip()}' with status code {response['status']}.", + status=404) + + return Parser.parse(response, limit) + + +def show_help_view(request): + """ + Method for output info about. + + :return: http response with info about API of current app + :rtype: HttpResponse + """ + html_result = f"" \ + f"" \ + f"" \ + f"" \ + f"RSS Feeds" \ + f"" \ + f"" \ + f"Method GET using for take parameters." \ + f"Optional parameters:" \ + f"\n\t* url=URL RSS URL" \ + f"\n\t* limit=LIMIT Limit news topics if this parameter provided" \ + f"\n\t* date=DATE Print cached articles by date" \ + f"\n\t* to_json Print result as JSON in browser" \ + f"\n\t* to-pdf=TO_PDF Print result as PDF in file `TO_PDF`" \ + f"\n\t* to-html=TO_HTML Print result as HTML in file `TO_PDF`</plaintext>" \ + f"</body>" \ + f"</html>" + return HttpResponse(html_result) diff --git a/rssreader_server/manage.py b/rssreader_server/manage.py new file mode 100755 index 0000000..e209c73 --- /dev/null +++ b/rssreader_server/manage.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rssreader_server.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/rssreader_server/rssreader_server/__init__.py b/rssreader_server/rssreader_server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader_server/rssreader_server/settings.py b/rssreader_server/rssreader_server/settings.py new file mode 100644 index 0000000..20e879c --- /dev/null +++ b/rssreader_server/rssreader_server/settings.py @@ -0,0 +1,82 @@ +import os + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + +SECRET_KEY = 'ny#t%v)^98(0afy8#+$dbrp^strv6#+#_nif)w8o2%dbffx5ft' + +DEBUG = False + +ALLOWED_HOSTS = [] + +INSTALLED_APPS = [ + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'api_v1', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'rssreader_server.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'rssreader_server.wsgi.application' + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), + } +} + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_L10N = True + +USE_TZ = True + +STATIC_URL = '/static/' diff --git a/rssreader_server/rssreader_server/urls.py b/rssreader_server/rssreader_server/urls.py new file mode 100644 index 0000000..21f33c3 --- /dev/null +++ b/rssreader_server/rssreader_server/urls.py @@ -0,0 +1,5 @@ +from django.urls import path, include + +urlpatterns = [ + path('api/v1/', include('api_v1.urls')) +] diff --git a/rssreader_server/rssreader_server/wsgi.py b/rssreader_server/rssreader_server/wsgi.py new file mode 100644 index 0000000..8ba396b --- /dev/null +++ b/rssreader_server/rssreader_server/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for rssreader_server project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/2.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'rssreader_server.settings') + +application = get_wsgi_application() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..38b06c4 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +from setuptools import setup, find_packages + +with open('README.md') as file: + LONG_DESCRIPTION = file.read() + +setup( + name='rss-reader', + version=__import__('rssreader').__version__, + description="RSS News Reader for EPAM Python Courses", + long_description=LONG_DESCRIPTION, + long_description_content_type='text/markdown', + author="Pivovar Sergey", + author_email="pivovar-ser-leon@inbox.ru", + url="https://github.com/TeRRoRlsT/PythonHomework.git", + + packages=find_packages(), + + python_requires='>=3.8', + install_requires=['feedparser', 'requests', 'fpdf', 'peewee', 'colorama'], + + entry_points={ + 'console_scripts': [ + 'rss-reader=rssreader.rss_reader:main', + ] + }, + classifiers=[ + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + ], +) diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..20355ce --- /dev/null +++ b/tests.py @@ -0,0 +1,6 @@ +import unittest + +from rssreader.tests import * + +if __name__ == '__main__': + unittest.main()