diff --git a/.gitignore b/.gitignore index 894a44c..94a37c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,7 @@ -# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class - -# C extensions *.so - -# Distribution / packaging .Python build/ develop-eggs/ @@ -24,18 +19,10 @@ wheels/ .installed.cfg *.egg MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec - -# Installer logs pip-log.txt pip-delete-this-directory.txt - -# Unit test / coverage reports htmlcov/ .tox/ .coverage @@ -46,42 +33,20 @@ coverage.xml *.cover .hypothesis/ .pytest_cache/ - -# Translations *.mo *.pot - -# Django stuff: *.log local_settings.py db.sqlite3 - -# Flask stuff: instance/ .webassets-cache - -# Scrapy stuff: .scrapy - -# Sphinx documentation docs/_build/ - -# PyBuilder target/ - -# Jupyter Notebook .ipynb_checkpoints - -# pyenv .python-version - -# celery beat schedule file celerybeat-schedule - -# SageMath parsed files *.sage.py - -# Environments .env .venv env/ @@ -89,16 +54,13 @@ venv/ ENV/ env.bak/ venv.bak/ - -# Spyder project settings .spyderproject .spyproject - -# Rope project settings .ropeproject - -# mkdocs documentation /site - -# mypy .mypy_cache/ +.idea/ +*.sqlite3 +*.html +*.pdf +__cache__/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..d2fe208 --- /dev/null +++ b/README.md @@ -0,0 +1,36 @@ +# Welcome +This project was created for the EPAM Python Courses 2019. + +## Installing + +First, this app runs on Python version >=3.8. + +### Download: + +`git clone https://github.com/TeRRoRlsT/PythonHomework.git` + +### Setup: +Go to repository **PythonHomework** and execute the command: + +`python3.8 -m pip install .` + +or + +`pip install .` + +## Running +To view the help for running project go to **PythonHomework/rssreader** folder and execute the command: + +`python3.8 rss_reader.py --help` + +### SQLite3 +This application uses SQLite3 database to cache all downloaded news articles. +If you use '--date YYYYMMDD' the application will load news articles from the DB with the date after the given date. + +## Tests +For run unittest go to **PythonHomework** folder and execute the command: + +`python3.8 -m unittest tests` + + ## Authors +* Sergey Pivovar - BSUIR 2019 \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader/__init__.py b/rssreader/__init__.py new file mode 100644 index 0000000..b2a95f9 --- /dev/null +++ b/rssreader/__init__.py @@ -0,0 +1 @@ +__version__ = "1.2" diff --git a/rssreader/feed_parser.py b/rssreader/feed_parser.py new file mode 100644 index 0000000..9f2b8cf --- /dev/null +++ b/rssreader/feed_parser.py @@ -0,0 +1,346 @@ +import datetime +import html +import logging +from abc import ABC, abstractmethod + +__all__ = ['Parser'] + + +class Tag(ABC): + """ + Abstract class for working with tags as a class structure. + """ + + def __init__(self, **kwargs): + for arg, val in kwargs.items(): + self.__setattr__(arg, val) + + @abstractmethod + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + + @abstractmethod + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in links section + :rtype: str + """ + + @abstractmethod + def __str__(self): + """ + Get string to output tag in the description section. + + :return: string to output tag in the description section + """ + + +class A(Tag): + """ + Class for work with tag `a` (link) as a class struct. + """ + href = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[link {}]" + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.href + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.href} (link)" + + +class Img(Tag): + """ + Class for work with tag img (image) as a class struct. + """ + src = None + alt = None + width = None + height = None + + def __str__(self): + """ + Get string to output tag in description section. + + :return: string to output tag in the description section + """ + return "[Image {}: %s] " % self.alt + + def link(self): + """ + Get media object source link. + + :return: media object source URL + :rtype: str + """ + return self.src + + def format_link(self, ind): + """ + Get formatted link to output in the links section. + + :param ind: Sequence number in the queue + :type ind: int + :return: string to output in the links section + :rtype: str + """ + return f"[{ind}]: {self.src} (image)" + + +class HTMLParser: + """ + A class for parse news articles from response struct of module "feedparser". + Methods return JSON format of news articles or dict with info about given article. + """ + _table = { + 'a': A, + 'img': Img, + } + + def __init__(self): + self._tags = [] + + def parse(self, response, limit): + """ + A method of parsing news articles and creating object models for easy access. + + :param response: response struct for parse + :param limit: required number of articles to show + :type response: dict + :type limit: int + :return: return a dict {'title': str, 'articles': list). + Title is header of RSS Source. + Articles is a list of dicts with articles info which was created from parsed feeds + :rtype: dict + """ + logging.info("Getting list of limited articles") + raw_articles = self._get_limited_articles(response, limit) + + logging.info("Completed. Converting each article to dict") + nice_articles = [self._article_to_dict(article) for article in raw_articles] + + logging.info("Completed. Clear articles from HTML escapes") + articles = [self._clear_from_html(article) for article in nice_articles] + + logging.info("Getting a RSS source title") + title = response['feed']['title'] + + return {'title': title, 'articles': articles} + + def _clear_from_html(self, elem): + """ + Method to clear html escapes from all fields of article. + + :param elem: article to clear from HTML escapes + :return: clean article + """ + if type(elem) == str: + return html.unescape(elem) + elif type(elem) == dict: + return {self._clear_from_html(k): self._clear_from_html(v) for k, v in elem.items()} + elif type(elem) == list: + return [self._clear_from_html(el) for el in elem] + else: + return elem + + @staticmethod + def _get_limited_articles(response, limit): + """ + Method of limiting parsing articles from response struct. + If limit is None return articles given length, else return all available articles. + + :param response: response struct for parse + :param limit: limit of output news articles + :type response: dict + :type limit: int or None + :return: news articles of limited length + :rtype: dict + """ + result = response['entries'] + if limit is not None: + logging.info(f"Completed. Loaded {min(limit, len(result))} articles with limit {limit}") + return result[0:min(limit, len(result))] + else: + logging.info(f"Completed. Loaded {len(result)} articles without any limit") + return result + + @staticmethod + def _get_next_tag(line): + """ + Method for getting startpos and endpos of tag in given string line. + + :param line: line with html tag + :type line: str + :return: (startpos, endpos) is a position of next tag in line if line have a tag, else None + :rtype: tuple or None + """ + if (startpos := line.find('<')) != -1 and (endpos := line.find('>')) != -1: + return startpos, endpos + 1 + else: + return None + + def _create_tag(self, params): + """ + Method for creating Tag struct class from params. + + :param params: info for creating tag + :type params: dict + :return: tag object if creating was successful, else None + :rtype: Tag or None + """ + try: + tag_type = next(iter(params)) + params.pop(tag_type) + return self._table[tag_type](**params) + except KeyError: + return None + + def _get_params_from_line(self, tag_line): + """ + Method for getting all parameters from html tag string line. + If parameter have a value params save value. Else value is True. + + :param tag_line: line with tag parameters + :type tag_line: str + :return: dict with parsed parameters + :rtype: dict + """ + params = {} + tag_line = tag_line.strip('<>') + strings, tag_line = self._get_all_strings(tag_line) + words = tag_line.split() + for param in words: + pair = param.split('=') + if len(pair) == 1: + params.update({pair[0]: True}) + else: + params.update({pair[0]: strings.pop(0)}) + + return params + + @staticmethod + def _get_all_strings(tag_line): + """ + Method of cutting all string in quotes \"...\". + + :param tag_line: line with tag info and strings + :type tag_line: str + :return: tuple (strings, tag_line). + strings is a list with all cutting strings. + tag_line is a given string parameter without cutting strings + :rtype: tuple + """ + strings = [] + while (start_ind := tag_line.find('"')) != -1: + end_ind = tag_line.find('"', start_ind + 1) + 1 + strings.append(tag_line[start_ind + 1: end_ind - 1]) + tag_line = tag_line[:start_ind] + tag_line[end_ind:] + return strings, tag_line + + def _process_description(self, desc, fill_desc=True, fill_links=True): + """ + Method processing description. Return description of specific format. + + :param desc: description of news article with useless info and tags + :type desc: str + :return: tuple (description, links). + description is description without useless info and tags. With inserts links or not. + links is list with formatted strings with links from all created tag objects + :rtype: tuple + """ + self._tags.clear() + index_of_tag = 1 + links = [] + while (pos_tag := self._get_next_tag(desc)) is not None: + first_quotes, last_quotes = pos_tag + full_tag_line = desc[first_quotes: last_quotes] + parameters = self._get_params_from_line(full_tag_line) + obj_tag = self._create_tag(parameters) + if obj_tag is not None: + self._tags.append(obj_tag) + if fill_desc: + desc = desc[:first_quotes] + str(obj_tag).format(index_of_tag) + desc[last_quotes:] + else: + desc = desc[:first_quotes] + desc[last_quotes:] + if fill_links: + links.append(obj_tag.format_link(index_of_tag)) + else: + links.append(obj_tag.link()) + index_of_tag += 1 + else: + desc = desc[:first_quotes] + desc[last_quotes:] + + return desc, links + + def _article_to_dict(self, article): + """ + Method for converting article info into dict of specific format. + + :param article: article for converting into dict of specific format + :type article: dict + :return: dict of specific format + :rtype: dict + """ + + dec_description, dec_links = self._process_description(article['description']) + description, links = self._process_description(article['description'], False, False) + + images = [obj for obj in self._tags if isinstance(obj, Img)] + + media = [ + {"src": image.src, + "alt": image.alt, + "width": image.width, + "height": image.height} for image in images + ] + + try: + date = datetime.datetime(*article['published_parsed'][:6]).strftime("%a, %d %b %Y %H:%M") + except (AttributeError, ValueError): + date = 'None' + + result = { + 'title': article['title'], + 'description': description, + 'dec_description': dec_description, + 'link': article['link'], + 'pubDate': date, + 'media': media, + 'links': links, + 'dec_links': dec_links, + } + + return result + + +Parser = HTMLParser() diff --git a/rssreader/feed_parser_tests.py b/rssreader/feed_parser_tests.py new file mode 100644 index 0000000..6ecf1d2 --- /dev/null +++ b/rssreader/feed_parser_tests.py @@ -0,0 +1,306 @@ +import html +import time +from unittest import TestCase, main + +from rssreader.feed_parser import A, Img, HTMLParser + +__all__ = ['TestA', 'TestImg', 'TestHTMLParser'] + + +def equals_classes_a(obj1: A, obj2: A): + return obj1.href == obj2.href + + +def equals_classes_img(obj1: Img, obj2: Img): + return obj1.src == obj2.src and \ + obj1.alt == obj2.alt and \ + obj1.width == obj2.width and \ + obj1.height == obj2.height + + +class TestImg(TestCase): + def setUp(self): + self.fields = { + 'src': 'some_src', + 'alt': 'some_alt', + 'style': 'some_style', + 'width': 'some_width', + 'height': 'some_height', + } + self.img = Img(**self.fields) + + def test_create_obj(self): + self.assertTrue(issubclass(type(self.img), Img)) + + self.assertFalse(self.img is Img) + + def test_creating_fields(self): + self.assertEqual(self.img.src, self.fields['src']) + + self.assertEqual(self.img.alt, self.fields['alt']) + + self.assertEqual(self.img.width, self.fields['width']) + + self.assertEqual(self.img.height, self.fields['height']) + + def test_method_link(self): + self.assertEqual(self.img.link(), self.img.src) + + self.assertEqual(self.img.link(), self.fields['src']) + + self.assertNotEqual(self.img.link(), 'src') + + def test_method_format_link(self): + ind = 1 + self.assertEqual(self.img.format_link(ind), f"[{ind}]: {self.img.src} (image)") + + self.assertEqual(self.img.format_link(2), "[2]: some_src (image)") + + self.assertNotEqual(self.img.format_link(3), "[1]: some_src (image)") + + def test_method_str(self): + self.assertEqual(str(self.img), "[Image {}: %s] " % self.img.alt) + + self.assertNotEqual(str(self.img), "[Image {}: {}]") + + +class TestA(TestCase): + def setUp(self): + self.fields = { + 'href': 'some_href', + 'style': 'some_style', + 'align': 'some_align', + } + self.a = A(**self.fields) + + def test_create_obj(self): + self.assertTrue(issubclass(type(self.a), A)) + + self.assertFalse(self.a is A) + + def test_creating_fields(self): + self.assertEqual(self.a.href, self.fields['href']) + + self.assertNotEqual(self.a.href, 'href') + + self.assertEqual(self.a.style, self.fields['style']) + + def test_method_link(self): + self.assertEqual(self.a.link(), self.a.href) + + self.assertEqual(self.a.link(), self.fields['href']) + + self.assertNotEqual(self.a.link(), 'href') + + def test_method_format_link(self): + ind = 1 + self.assertEqual(self.a.format_link(ind), f"[{ind}]: {self.a.href} (link)") + + self.assertEqual(self.a.format_link(2), "[2]: some_href (link)") + + self.assertNotEqual(self.a.format_link(3), "[1]: some_href (link)") + + def test_method_str(self): + self.assertEqual(str(self.a), "[link {}]") + + self.assertNotEqual(str(self.a), "[link ]") + + +class TestHTMLParser(TestCase): + def setUp(self): + date = time.struct_time((2019, 11, 26, 20, 53, 11, 1, 330, 0)) + self.response = { + 'feed': { + 'title': 'Yahoo News - Latest News & Headlines', + }, + 'entries': [{ + 'title': 'Some title', + 'description': '
',
+ 'link': 'some long link',
+ 'published_parsed': date,
+ }]
+ }
+ self.article = {
+ 'title': 'Some title',
+ 'description': '
' \
+ f'Published: {article["pubDate"]}' \
+ f' ' \
+ f'{article["description"]}' \
+ f' ' \
+ f'Links:' \
+ f' ' \
+ f'Published: {article["pubDate"]}' \
+ f' ' \
+ f'{article["description"]}' \
+ f' ' \
+ f'Links:' \
+ f'
',
+ 'link': 'some long link',
+ 'published_parsed': date,
+ }
+ self.article_parsed = {
+ 'title': 'Some title',
+ 'description': 'Some long description',
+ 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description',
+ 'link': 'some long link',
+ 'pubDate': 'Tue, 26 Nov 2019 20:53',
+ 'media': [{
+ 'src': 'some long link to source of image 2',
+ 'alt': 'Alt of image 2',
+ 'width': '130',
+ 'height': '86'
+ }],
+ 'links': [
+ 'some long link',
+ 'some long link to source of image 2'
+ ],
+ 'dec_links': [
+ '[1]: some long link (link)',
+ '[2]: some long link to source of image 2 (image)'
+ ]
+ }
+ self.response_parsed = {
+ 'title': 'Yahoo News - Latest News & Headlines',
+ 'articles': [{
+ 'title': 'Some title',
+ 'description': 'Some long description',
+ 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description',
+ 'link': 'some long link',
+ 'pubDate': 'Tue, 26 Nov 2019 20:53',
+ 'media': [{
+ 'src': 'some long link to source of image 2',
+ 'alt': 'Alt of image 2',
+ 'width': '130',
+ 'height': '86'
+ }],
+ 'links': [
+ 'some long link',
+ 'some long link to source of image 2'
+ ],
+ 'dec_links': [
+ '[1]: some long link (link)',
+ '[2]: some long link to source of image 2 (image)'
+ ]
+ }]
+ }
+
+ self.parser = HTMLParser()
+
+ def test_parse(self):
+ self.assertEqual(self.parser.parse(self.response, 1), self.response_parsed)
+
+ def test_clear_from_html(self):
+ string = 'some_string'
+ self.assertEqual(self.parser._clear_from_html(html.escape(string)), string)
+
+ dict_with_html_string = {
+ 1: html.escape('st&ri`ng'),
+ 'a': html.escape('s"tr>>i@ng\''),
+ html.escape('s"tr>>i@ng\''): html.escape('s"tr>>i@ng\'')
+ }
+ result = {
+ 1: 'st&ri`ng',
+ 'a': 's"tr>>i@ng\'',
+ 's"tr>>i@ng\'': 's"tr>>i@ng\''
+ }
+ self.assertEqual(self.parser._clear_from_html(dict_with_html_string), result)
+
+ list_with_html_string = [
+ 1,
+ html.escape('st&ri`ng'),
+ 'a',
+ html.escape('s"tr>>i@ng\'')
+ ]
+ result = [
+ 1,
+ 'st&ri`ng',
+ 'a',
+ 's"tr>>i@ng\'',
+ ]
+ self.assertEqual(self.parser._clear_from_html(list_with_html_string), result)
+
+ def test_get_limited_articles(self):
+ self.assertEqual(self.parser._get_limited_articles(self.response, 1), [self.article])
+
+ def test_get_next_tag(self):
+ line_with_tags = ''
+ self.assertEqual(self.parser._get_next_tag(line_with_tags), (0, len(line_with_tags)))
+
+ line_with_tags = ''
+ self.assertEqual(self.parser._get_next_tag(line_with_tags), (0, len(line_with_tags)))
+
+ line_with_tags = '" \
+ f"" \
+ f"" \
+ f"" \
+ f"
{articles['title']}
" \
+ f"{''.join([self._print_article(art) for art in articles['articles']])}" \
+ f"" \
+ f""
+
+ with open(print_to, 'w') as file:
+ file.write(html_text)
+
+ def _print_article(self, article, **kwargs):
+ """
+ Method for output one article use HTML.
+
+ :param article: article to output
+ :param kwargs: optional params. Use to extend a count given params in base method
+ :type article: dict
+ :return: string with html version of given article
+ :rtype: str
+ """
+ result = ""
+ result += '' \
+ '{} (Link to original)' \
+ '' \
+ '
'.format(article['link'], article['title'])
+ for image in article['media']:
+ attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()])
+ result += "
" \
+ "".format(attrs)
+ result += f'
' \
+ f'
'
+ result += "
',
+ 'link': 'some long link',
+ 'published_parsed': date,
+ }]
+ }
+ self.response_parsed = {
+ 'title': 'Yahoo News - Latest News & Headlines',
+ 'articles': [{
+ 'title': 'Some title',
+ 'description': 'Some long description',
+ 'dec_description': '[link 1][Image 2: Alt of image 2] Some long description',
+ 'link': 'some long link',
+ 'pubDate': 'Tue, 26 Nov 2019 20:53',
+ 'media': [{
+ 'src': 'some long link to source of image 2',
+ 'alt': 'Alt of image 2',
+ 'width': '130',
+ 'height': '86'
+ }],
+ 'links': [
+ 'some long link',
+ 'some long link to source of image 2'
+ ],
+ 'dec_links': [
+ '[1]: some long link (link)',
+ '[2]: some long link to source of image 2 (image)'
+ ]
+ }]
+ }
+
+ def test_get_articles_from_url(self):
+ limit = 1
+ with patch('rssreader.feed_parser.Parser.parse') as feed_parser_mock:
+ feed_parser_mock.return_value = 'Successful'
+ with patch('feedparser.parse') as feedparser_mock:
+ feedparser_mock.return_value = self.response
+ self.assertEqual(self.reader._get_articles_from_url(self.source, limit), 'Successful')
+
+ feedparser_mock.assert_called_with(self.source.strip())
+ feed_parser_mock.assert_called_with(self.response, limit)
+
+ def test_call_save(self):
+ limit = 1
+ self.reader._get_articles_from_url = MagicMock(return_value=self.response_parsed)
+ with patch('rssreader.output_controller.OutputController.print') as print_mock:
+
+ # Time for crutches :)
+ with patch('rssreader.storage.controller.StorageController.__init__') as crutch:
+ crutch.return_value = None # remove creating db file
+ # How to replace a class object on MagicMock without crutch?
+
+ with patch('rssreader.storage.controller.StorageController.save') as storage_mock:
+ storage_mock.return_value = 1
+ self.assertIsNone(self.reader(self.source, limit, None))
+
+ print_mock.assert_called_with(self.response_parsed)
+
+ def test_call_load(self):
+ limit = 1
+ date = '20191122'
+ with patch('rssreader.output_controller.OutputController.print') as print_mock:
+ # Time for crutches :)
+ with patch('rssreader.storage.controller.StorageController.__init__') as crutch:
+ crutch.return_value = None # remove creating db file
+ # How to replace a class object on MagicMock without crutch?
+
+ with patch('rssreader.storage.controller.StorageController.load') as storage_mock:
+ storage_mock.return_value = self.response_parsed
+ self.assertIsNone(self.reader(self.source, limit, date))
+
+ print_mock.assert_called_once_with(self.response_parsed)
+ storage_mock.assert_called_with(self.source, date, limit)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader/storage/__init__.py b/rssreader/storage/__init__.py
new file mode 100644
index 0000000..038fb33
--- /dev/null
+++ b/rssreader/storage/__init__.py
@@ -0,0 +1 @@
+from .controller import *
diff --git a/rssreader/storage/controller.py b/rssreader/storage/controller.py
new file mode 100644
index 0000000..a318ef9
--- /dev/null
+++ b/rssreader/storage/controller.py
@@ -0,0 +1,75 @@
+"""
+Module with controllers for work with db.
+
+"""
+import logging
+
+from peewee import PeeweeException
+
+from .managers import *
+
+__all__ = ['StorageController']
+
+
+class StorageController:
+ """
+ Controller for loading and saving articles in database.
+ """
+
+ def __init__(self):
+ try:
+ DB_HANDLE.connect()
+ self.articles = ArticleManager()
+ self.sources = SourceManager()
+ except PeeweeException as e:
+ print(e)
+ return
+
+ def load(self, url, date, limit):
+ """
+ Method for loading limited articles from database
+
+ :param url: source URL for getting articles from db
+ :param date: date from which need to load articles in string
+ :param limit: limit of articles for loading
+ :type url: str
+ :type date: str
+ :type limit: int
+ :return: list of dicts of articles with date after a given date
+ :rtype: list
+ """
+
+ clr_url = url.strip('/\\')
+ logging.info(f"Start loading articles from storage")
+ articles = self.sources.get_articles_with_data_from(clr_url, date)
+
+ logging.info(f"Completed. Cutting list of articles")
+ if limit is not None:
+ articles['articles'] = [article for i, article in enumerate(articles['articles']) if i < limit]
+
+ logging.info(f"Completed. Convert to dict each article")
+ articles['articles'] = [article.to_dict() for article in articles['articles']]
+
+ return articles
+
+ def save(self, url, articles, title):
+ """
+ Method for saving parsed articles.
+
+ :param url: string URL of RSS source
+ :param articles: parsed articles
+ :param title: title of RSS source
+ :type url: str
+ :type articles: list
+ :type title: str
+ :return: count of new created articles in db
+ :rtype: int
+ """
+ clr_url = url.strip('/\\')
+
+ logging.info(f"Getting source model")
+ source = self.sources.get_or_create(clr_url, title)
+
+ logging.info(f"Completed. Saving articles in chosen source model")
+ count = self.articles.create_and_return(articles, source)
+ return count
diff --git a/rssreader/storage/controller_tests.py b/rssreader/storage/controller_tests.py
new file mode 100644
index 0000000..d67f533
--- /dev/null
+++ b/rssreader/storage/controller_tests.py
@@ -0,0 +1,14 @@
+import unittest
+
+from .controller import StorageController
+
+__all__ = ['StorageController']
+
+
+class TestStorageController(unittest.TestCase):
+ def test_something(self):
+ self.assertEqual(True, True)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader/storage/managers/__init__.py b/rssreader/storage/managers/__init__.py
new file mode 100644
index 0000000..26d7a36
--- /dev/null
+++ b/rssreader/storage/managers/__init__.py
@@ -0,0 +1,2 @@
+from .article_manager import ArticleManager, DB_HANDLE
+from .source_manager import SourceManager, DB_HANDLE
diff --git a/rssreader/storage/managers/article_manager.py b/rssreader/storage/managers/article_manager.py
new file mode 100644
index 0000000..766ca8d
--- /dev/null
+++ b/rssreader/storage/managers/article_manager.py
@@ -0,0 +1,26 @@
+"""
+Module manager of database model Article.
+
+"""
+from rssreader.storage.models import Article, DB_HANDLE
+
+__all__ = ['ArticleManager']
+
+
+class ArticleManager:
+ def __init__(self):
+ Article.create_table()
+
+ @staticmethod
+ def create_and_return(structs, source):
+ """
+ Method for creating articles in list in db. Return count of created objects
+
+ :param structs: list of articles structs
+ :param source: model Source object of feeds source
+ :type structs: list
+ :type source: Source
+ :return: count of new created objects
+ :rtype: int
+ """
+ return len([art for struct in structs if (art := Article.from_struct(struct, source)) is not None])
diff --git a/rssreader/storage/managers/article_manager_tests.py b/rssreader/storage/managers/article_manager_tests.py
new file mode 100644
index 0000000..481eff3
--- /dev/null
+++ b/rssreader/storage/managers/article_manager_tests.py
@@ -0,0 +1,14 @@
+import unittest
+
+from .article_manager import ArticleManager
+
+__all__ = ['TestArticleManager']
+
+
+class TestArticleManager(unittest.TestCase):
+ def test_something(self):
+ self.assertEqual(True, True)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader/storage/managers/source_manager.py b/rssreader/storage/managers/source_manager.py
new file mode 100644
index 0000000..03749c1
--- /dev/null
+++ b/rssreader/storage/managers/source_manager.py
@@ -0,0 +1,51 @@
+"""
+Module manager of database model Article.
+
+"""
+import logging
+
+from rssreader.storage.models import Source, DB_HANDLE
+
+__all__ = ['SourceManager']
+
+
+class SourceManager:
+ def __init__(self):
+ Source.create_table()
+
+ @staticmethod
+ def get_or_create(url, title):
+ """
+ Method for safe getting a Source model object.
+
+ :param url: string link for init object
+ :param title: title of feeds source
+ :type url: str
+ :type title: str
+ :return: Source object. If object with such data is founded return it,
+ else created new object and return it.
+ :rtype: Source
+ """
+ return Source.get_or_create(url, title=title)
+
+ @staticmethod
+ def get_articles_with_data_from(url, date):
+ """
+ Method to getting articles with date after a given date.
+
+ :param url: URL-key for getting Source object
+ :param date: date for query
+ :type url: str
+ :type date: str
+ :return: dict with title of a rss source and founded articles
+ :rtype dict
+ """
+ logging.info(f"Getting source model")
+ source = Source.get_or_create(url)
+
+ logging.info(f"Completed. Getting articles from source")
+ articles = source.sort_by_date(date)
+ return {
+ 'title': source.title,
+ 'articles': articles,
+ }
diff --git a/rssreader/storage/managers/source_manager_tests.py b/rssreader/storage/managers/source_manager_tests.py
new file mode 100644
index 0000000..1fb75cc
--- /dev/null
+++ b/rssreader/storage/managers/source_manager_tests.py
@@ -0,0 +1,14 @@
+import unittest
+
+from .source_manager import SourceManager
+
+__all__ = ['TestSourceManager']
+
+
+class TestSourceManager(unittest.TestCase):
+ def test_something(self):
+ self.assertEqual(True, True)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader/storage/models.py b/rssreader/storage/models.py
new file mode 100644
index 0000000..d971463
--- /dev/null
+++ b/rssreader/storage/models.py
@@ -0,0 +1,146 @@
+"""
+Module with description of models in database.
+Used SQLite3 database and peewee module for work with it.
+
+"""
+
+import datetime
+import json
+
+import peewee
+
+__all__ = ['DB_HANDLE', 'Source', 'Article']
+
+STORAGE_DATABASE = 'storage.sqlite3'
+
+DB_HANDLE = peewee.SqliteDatabase(STORAGE_DATABASE)
+
+
+class Source(peewee.Model):
+ """
+ Model for containing rss feed sources in SQLite database.
+
+ Fields:
+ title: title of article
+ url: absolute URL to RSS source
+ """
+ title = peewee.TextField(null=True)
+ url = peewee.TextField(unique=True)
+
+ class Meta:
+ database = DB_HANDLE
+ db_table = "sources"
+
+ @classmethod
+ def get_or_create(cls, url, title=None):
+ """
+ Rewriten base method of safe getting Source model object.
+
+ :param url: string link for init object
+ :param title: title of feeds source
+ :type url: str
+ :type title: str
+ :return: Source object. If object with such data is founded return it,
+ else created new object and return it.
+ :rtype: Source
+ """
+ try:
+ return super().get(Source.url == url)
+ except peewee.DoesNotExist:
+ return cls.create(url=url, title=title)
+
+ def sort_by_date(self, date):
+ """
+ Method for get list of articles with a date after the given date.
+
+ :param date: datetime for searching articles in string
+ :type date: str
+ :return: a list with of articles with a date after the given date
+ :rtype: list
+ """
+ return self.articles.select().where(Article.pubDate >= date)
+
+
+class Article(peewee.Model):
+ """
+ Model for containing rss feed article in SQLite database.
+ All objects of this model ordered by pubDate field.
+
+ Fields:
+ title: title of article
+ description: description of article
+ dec_description: decorated description of article
+ link: absolute URL to article
+ pubDate: date of publication article
+ media: all media objects from article
+ source: absolute URL to containing RSS source
+ links: all links from article without any formatting
+ dec_links: decorated links from article in special format
+ """
+ title = peewee.TextField()
+ description = peewee.TextField()
+ dec_description = peewee.TextField()
+ link = peewee.CharField(unique=True)
+ pubDate = peewee.DateTimeField()
+ media = peewee.TextField()
+ source = peewee.ForeignKeyField(Source, backref='articles')
+ links = peewee.TextField()
+ dec_links = peewee.TextField()
+
+ class Meta:
+ database = DB_HANDLE
+ db_table = "articles"
+ order_by = ('-pubDate',)
+
+ @classmethod
+ def from_struct(cls, struct, source):
+ """
+ Class method for creating Article model object from given dict.
+ Object creating with safe load a pub date. If RSS feed have no pub date,
+ the article will be saved with the date of adding to the db.
+
+ :param struct: dictionary with info about article
+ :param source: Source object of source feeds. Used for connect sources with articles
+ :type struct: dict
+ :type source: Source
+ :return: return Article object if no objects in db with such link. Else None
+ :rtype: Article or None
+ """
+ try:
+ if struct['pubDate'] != 'None':
+ date = datetime.datetime.strptime(struct['pubDate'], "%a, %d %b %Y %H:%M")
+ else:
+ date = datetime.datetime.now()
+
+ return cls.create(
+ title=struct['title'],
+ description=struct['description'],
+ dec_description=struct['dec_description'],
+ link=struct['link'],
+ pubDate=date,
+ media=json.dumps(struct['media']),
+ source=source,
+ links=json.dumps(struct['links']),
+ dec_links=json.dumps(struct['dec_links'])
+ )
+ except peewee.IntegrityError:
+ return None
+
+ def to_dict(self):
+ """
+ Method for converting model objects to dict with all info.
+
+ :return: dict with article info
+ :rtype: dict
+ """
+ return {
+ 'title': self.title,
+ 'description': self.description,
+ 'dec_description': self.dec_description,
+ 'link': self.link,
+ 'pubDate': self.pubDate.strftime("%a, %d %b %Y %H:%M"),
+ 'media': json.loads(self.media),
+ 'source': self.source.url,
+ 'links': json.loads(self.links),
+ 'dec_links': json.loads(self.dec_links),
+ }
diff --git a/rssreader/storage/models_tests.py b/rssreader/storage/models_tests.py
new file mode 100644
index 0000000..79ec14d
--- /dev/null
+++ b/rssreader/storage/models_tests.py
@@ -0,0 +1,19 @@
+import unittest
+
+from .models import Source, Source
+
+__all__ = ['TestSourceModel', 'TestArticleModel']
+
+
+class TestSourceModel(unittest.TestCase):
+ def test_something(self):
+ self.assertEqual(True, True)
+
+
+class TestArticleModel(unittest.TestCase):
+ def test_something(self):
+ self.assertEqual(True, True)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader/storage/tests.py b/rssreader/storage/tests.py
new file mode 100644
index 0000000..40a9623
--- /dev/null
+++ b/rssreader/storage/tests.py
@@ -0,0 +1,9 @@
+import unittest
+
+from .models_tests import *
+from .controller_tests import *
+from .managers.article_manager_tests import *
+from .managers.source_manager_tests import *
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader/tests.py b/rssreader/tests.py
new file mode 100644
index 0000000..4b2349c
--- /dev/null
+++ b/rssreader/tests.py
@@ -0,0 +1,9 @@
+import unittest
+
+from .storage.tests import *
+from .feed_parser_tests import *
+from .output_controller_tests import *
+from .rss_reader_tests import *
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/rssreader_server/api_v1/__init__.py b/rssreader_server/api_v1/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/rssreader_server/api_v1/admin.py b/rssreader_server/api_v1/admin.py
new file mode 100644
index 0000000..0ea393a
--- /dev/null
+++ b/rssreader_server/api_v1/admin.py
@@ -0,0 +1,5 @@
+from django.contrib import admin
+from .models import *
+
+admin.site.register(Source)
+admin.site.register(Article)
diff --git a/rssreader_server/api_v1/apps.py b/rssreader_server/api_v1/apps.py
new file mode 100644
index 0000000..9e01182
--- /dev/null
+++ b/rssreader_server/api_v1/apps.py
@@ -0,0 +1,5 @@
+from django.apps import AppConfig
+
+
+class RssreaderAppConfig(AppConfig):
+ name = 'api_v1'
diff --git a/rssreader_server/api_v1/converters.py b/rssreader_server/api_v1/converters.py
new file mode 100644
index 0000000..88ac959
--- /dev/null
+++ b/rssreader_server/api_v1/converters.py
@@ -0,0 +1,301 @@
+"""
+Module for output the result of the utility and printing in different formats.
+Recommend use only class OutputController with parameters:
+ * to_json: bool - output in JSON or not
+ * to_pdf: str - string filename for output
+ * to_html: str - string filename for output
+Default start sample output.
+
+"""
+import json
+import os
+from abc import ABC
+
+from fpdf import FPDF
+from requests import get
+
+__all__ = ['SamplePrintResponseConverter', 'JSONPrintResponseConverter',
+ 'PDFPrintResponseConverter', 'HTMLPrintResponseConverter']
+
+
+class BaseResponseConverter(ABC):
+ cache_folder = '__cache__'
+
+ def print(self, articles, filename, **kwargs):
+ """
+ Procedure for output of news articles.
+
+ :param articles: dict with title and list of news articles
+ :param filename: name of the file output
+ :param kwargs: optional params. Use to extend a count given params in base method
+ :type articles: dict
+ """
+
+ def _print_article(self, article, **kwargs):
+ """
+ Method for output given articles in given PDF file.
+
+ :param article: article to output
+ :param kwargs: optional params. Use to extend a count given params in base method
+ :type article: dict
+ """
+
+ def _print_title(self, title, **kwargs):
+ """
+ Method for output given title.
+
+ :param title: title to output
+ :param kwargs: optional params. Use to extend a count given params in base method
+ :type title: str
+ """
+
+
+class SamplePrintResponseConverter(BaseResponseConverter):
+ """
+ Class controller for sample output in standard out.
+ """
+ delimiter = "#" * 80
+
+ def print(self, articles, filename, **kwargs):
+ """
+ Method for output of given articles if given filename.
+
+ :param articles: articles for output in file
+ :param filename: name of file to output
+ :return: path to file with result
+ :rtype: str
+ """
+ if (title := articles.get('title', None)) is not None:
+ response_result = f"Feed: {title}\n"
+
+ for article in articles['articles']:
+ response_result += self._print_article(article)
+
+ with open(os.path.join(self.cache_folder, filename), 'w') as file:
+ file.write(response_result)
+
+ return filename
+
+ def _print_article(self, article, **kwargs):
+ """
+ Method for output articles in PDF format.
+
+ :param article: current dict with article info for output
+ :type dict
+ """
+ response_result = f"Title: {article['title']}\n" \
+ f"Date: {article['pubDate']}\n" \
+ f"Link: {article['link']}\n" \
+ f"\n" \
+ f"{article['dec_description']}\n" \
+ f"\n" \
+ f"Links:"
+
+ for link in article['dec_links']:
+ response_result += f"\n{link}"
+ response_result += f"\n{self.delimiter}"
+
+ return response_result
+
+
+class JSONPrintResponseConverter(BaseResponseConverter):
+ """
+ Class controller for output JSON form of articles in standard out.
+ """
+
+ def print(self, articles, filename, **kwargs):
+ """
+ Method for output of given articles if given filename.
+
+ :param articles: articles for output in file
+ :param filename: name of file to output
+ :return: path to file with result
+ :rtype: str
+ """
+ with open(os.path.join(self.cache_folder, filename), 'w') as file:
+ file.write(json.dumps(articles))
+
+ return filename
+
+
+class PDFPrintResponseConverter(BaseResponseConverter):
+ """
+ Class controller for output given articles in PDF in file.
+ """
+ image_ext = 'jpg'
+ cache_folder = "__cache__"
+ extension = '.pdf'
+ delimiter_before = "_" * 59
+ delimiter_after = "%d".rjust(55, '_').ljust(59, '_')
+
+ def print(self, articles, filename, **kwargs):
+ """
+ Method for output of given articles if given filename.
+
+ :param articles: articles for output in file
+ :param filename: name of file to output
+ :return: path to file with result
+ :rtype: str
+ """
+ writer = FPDF()
+ writer.add_page()
+ self._print_title(articles['title'], writer=writer)
+
+ for i, article in enumerate(articles['articles']):
+ self._print_article(article, writer=writer, ind=i)
+
+ writer.output(os.path.join(self.cache_folder, filename))
+
+ # os.removedirs(self.cache_folder)
+
+ return filename
+
+ def _print_title(self, title, **kwargs):
+ """
+ Method for output title of RSS Feeds.
+
+ :param title: title of RSS Feed
+ :rtype: dict
+ """
+ writer = kwargs['writer']
+ writer.set_font('Courier', 'B', 20)
+ writer.multi_cell(0, 30, title, align='C')
+
+ def _print_article(self, article, **kwargs):
+ """
+ Method for output articles in PDF format.
+
+ :param article: current dict with article info for output
+ :type dict
+ """
+ writer = kwargs['writer']
+ ind = kwargs['ind']
+
+ article = self._clean_each_elem_article(article)
+
+ writer.set_font("Courier", 'B', 15)
+ writer.multi_cell(0, 10, self.delimiter_before)
+
+ writer.set_font("Courier", "B", 13)
+ writer.multi_cell(0, 7, f"Title: {article['title']}", align="L")
+
+ writer.set_font("Courier", "BI", 11)
+ writer.multi_cell(0, 10, f"Date: {article['pubDate']}", align='R')
+
+ for img in article['media']:
+ self._draw_image(writer, img)
+
+ writer.set_font("Courier", size=12)
+ writer.multi_cell(0, 5, article['description'], align='L')
+
+ writer.set_font("Courier", "BI", size=9)
+ writer.multi_cell(0, 10, f"Link: {article['link']}", align='L')
+
+ writer.set_font("Courier", 'B', 15)
+ writer.multi_cell(0, 10, self.delimiter_after % (ind + 1))
+
+ def _clean_each_elem_article(self, elem):
+ """
+ Recursive method for cleaning errors with encoding 'latin-1' for output ready text in PDF file.
+ Go throw all elements of given objects and remove error with encoding 'latin-1'.
+
+ :param elem: current element for checking and removing errors with encoding
+ :return: recursive call this method if givn object is collection, else string
+ """
+ if type(elem) == str:
+ return elem.encode('latin-1', 'replace').decode('latin-1')
+ elif type(elem) == dict:
+ return {k: self._clean_each_elem_article(v) for k, v in elem.items()}
+ elif type(elem) == list:
+ return [self._clean_each_elem_article(el) for el in elem]
+
+ def _draw_image(self, writer, image):
+ """
+ Method for draw image in file by given FPDF writer.
+
+ :param writer: FPDF object for drawing in file
+ :param image: dict with info about image
+ :type writer: fpdf.FPDF
+ :type image: dict
+ """
+ try:
+ image_name = f"{image['src'].split('/')[-1]}.{self.image_ext}"
+ image_path = self._download_to(image['src'], image_name)
+ writer.image(image_path, type=self.image_ext, link=image['src'], x=(writer.w - 50) // 2)
+ except (ValueError, TypeError, RuntimeError):
+ writer.set_font("Courier", 'B', 10)
+ writer.multi_cell(0, 3, f"NO IMAGE: {image['alt']}", align='C')
+
+ def _download_to(self, link, filename):
+ """
+ Method for downloading image by link in given file. Return path to downloaded image.
+
+ :param link: link to image
+ :param filename: name of file, such will be rewriten.
+ :type link: str
+ :type filename: str
+ :return: absolute path to downloaded image
+ :rtype: str
+ """
+ if not os.path.exists(os.path.join(self.cache_folder)):
+ os.mkdir(os.path.join(self.cache_folder))
+ img_data = get(link).content
+ ready_image_path = os.path.join(self.cache_folder, filename)
+ with open(ready_image_path, 'wb') as handler:
+ handler.write(img_data)
+
+ return ready_image_path
+
+
+class HTMLPrintResponseConverter(BaseResponseConverter):
+ """
+ Class controller for output given articles using HTML in file.
+ """
+ extension = '.html'
+
+ def print(self, articles, filename, **kwargs):
+ html_text = f"" \
+ f"" \
+ f"{articles['title']}
" \
+ f"{''.join([self._print_article(art) for art in articles['articles']])}" \
+ f"" \
+ f""
+
+ with open(os.path.join(self.cache_folder, filename), 'w') as file:
+ file.write(html_text)
+
+ return filename
+
+ def _print_article(self, article, **kwargs):
+ result = ""
+ result += '' \
+ '{} (Link to original)' \
+ '' \
+ '
'.format(article['link'], article['title'])
+ for image in article['media']:
+ attrs = " ".join([f"{k}=\"{v}\"" for k, v in image.items()])
+ result += "
" \
+ "".format(attrs)
+ result += f'
' \
+ f'
'
+ result += "