diff --git a/.gitignore b/.gitignore index 894a44c..3da6272 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +#PyCharm +.idea \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..540b720 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include requirements.txt \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..48ee4ca --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# RSS reader + +Python RSS reader - command-line utility. + +## [Usage]: + * positional\required arguments: + * source .. RSS URL + * optional arguments: + * -h, --help .. Show help message and exit. + * --version .. Print version info. + * --json .. Print result as JSON in stdout. + * --verbose .. Outputs verbose status messages. + * --limit .. Limit news topics if this parameter is provided. + * --date .. Return cached news from the specified day. Format is YYYYMMDD. + * --to-html .. Convert news into html format and save a file to the specified path. + * --to-pdf .. Convert news into pdf format and save a file to the specified path. + * --colorize .. Output in colorized mode + + ## [Cache] + Cached Rss feeds are stored in `~/.rss-reader` folder in `cache.db` file. Cache use sqllite3 for storing Rss feeds. + When you run utility cache module always storing or updating [if news already exists] parsing news from Rss feed. + +## [Converter] + News can be converted into `HTML` and `PDF` formats. If the file already exists at the specified path, it will be overwritten. + + + ## [JSON structure] +
+{
+  "title": "Yahoo News - Latest News & Headlines",
+  "url": "https://news.yahoo.com/rss/",
+  "image": "http://l.yimg.com/rz/d/yahoo_news_en-US_s_f_p_168x21_news.png",
+  "entries": [
+    {
+      "entry": {
+        "link": "https://news.yahoo.com/1-protesters-burn-tyres-southern-113205795.html",
+        "body": {
+          "title": "UPDATE 3-Iraq protesters burn shrine entrance in holy city, PM quitting 'not enough'",
+          "date": "Sat, 30 Nov 2019 - [11:32:05]",
+          "links": [
+            {
+              "rel": "alternate",
+              "type": "text/html",
+              "href": "https://news.yahoo.com/1-protesters-burn-tyres-southern-113205795.html"
+            }
+          ],
+          "media": [],
+          "description": "Iraqi protesters set fire to the entrance of a shrine in the southern holy city of Najaf on Saturday and security forces fired tear gas to disperse them, police and a demonstrator at the scene said, risking more bloodshed after a rare day of calm.  The demonstrator sent a video to Reuters of a doorway to the Hakim shrine blazing as protesters cheered and filmed it on their mobile phones.  The incident took place during one of the bloodiest weeks of Iraq\u2019s anti-government unrest, which erupted last month."
+        }
+      }
+    }
+  ]
+}
+
+
diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..456bb53 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +feedparser==5.2.1 +bs4==0.0.1 +coloredlogs==10.0.0 +fabulous==0.3.0 +jinja2==2.10.3 +WeasyPrint==50 +PyYAML==3.13 +lxml==4.2.1 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4d2a2dc --- /dev/null +++ b/setup.py @@ -0,0 +1,38 @@ +import setuptools +from src import conf +from pathlib import Path + +here = Path(__file__).parent + + +def get_install_requirements(): + with open(here.joinpath('requirements.txt'), 'r') as file: + return [requirement.strip() for requirement in file] + + +with open(here.joinpath('README.md'), encoding='utf-8') as f: + long_description = f.read() + + +setuptools.setup( + name=conf.__package__, + version=conf.__version__, + license='MIT', + author=conf.__author__, + author_email=conf.__email__, + description=long_description, + long_description=conf.__description__, + long_description_content_type='text/markdown', + url=conf.__url__, + packages=setuptools.find_packages(), + include_package_data=True, + package_data={ + '': ['*.jinja2', '*.yaml', '*.yml'], + }, + install_requires=get_install_requirements(), + python_requires='>=3.6', + entry_points={ + 'console_scripts': + ['%s = src.rss_reader:main' % conf.__package__] + } +) diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..1377f57 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1 @@ +from . import components diff --git a/src/__main__.py b/src/__main__.py new file mode 100644 index 0000000..76b3fd5 --- /dev/null +++ b/src/__main__.py @@ -0,0 +1,5 @@ +from src import rss_reader + + +if __name__ == '__main__': + rss_reader.main() diff --git a/src/components/__init__.py b/src/components/__init__.py new file mode 100644 index 0000000..703b232 --- /dev/null +++ b/src/components/__init__.py @@ -0,0 +1,6 @@ +# from . import cache +# from . import converter +# from . import feed +# from . import helper +# from . import logger +# from . import parser \ No newline at end of file diff --git a/src/components/cache/__init__.py b/src/components/cache/__init__.py new file mode 100644 index 0000000..4bb9bcd --- /dev/null +++ b/src/components/cache/__init__.py @@ -0,0 +1,3 @@ +from .cache import Cache + + diff --git a/src/components/cache/cache.py b/src/components/cache/cache.py new file mode 100644 index 0000000..4f763c9 --- /dev/null +++ b/src/components/cache/cache.py @@ -0,0 +1,236 @@ +"""this module contain class for caching feeds""" + +from src.components.cache.db.sqlite import Sqlite +from src.components.logger import Logger +from src.components.helper import Singleton +from src.components.helper import Map +from src.components.feed.feed_entry import FeedEntry +from fabulous import color + +from datetime import timedelta +from datetime import datetime +from pathlib import Path +from src import conf +import html + + +class Cache(Singleton): + """ + This class represent store, update cache data, access to storing cache feed entries + + Attributes: + _db_name attribute contain default name of database + cache_default attribute contain default cache date to retrieve + """ + + _db_name = 'cache.db' + + cache_default = datetime.today().strftime('%Y%m%d') + + def __init__(self) -> None: + """ + This constructor provide represent sqlite3 db layer instance + for work with database + """ + self._cache_db_file = self._storage_initialize() + self._db = Sqlite(str(self._cache_db_file)) + + def _storage_initialize(self) -> Path: + """ + Ths method check on existence of cache database and init + it in case database not found. Return path to database + :return: Path + """ + cache_path = Path.home().joinpath('.' + conf.__package__) + + if not cache_path.exists(): + cache_path.mkdir() + Logger.log(f'Created {conf.__package__} local dir with path: {cache_path}') + + cache_file = cache_path.joinpath(self._db_name) + + if not cache_file.exists(): + Sqlite.create_database(str(cache_file)) + Logger.log(f'Created local storage with path: {cache_file}') + + Logger.log(f'Cache local storage with path: {cache_file}') + + return cache_file + + def append_feeds(self, feed: Map, feed_entities_list: list) -> None: + """ + This method append or update feeds entries from Feed to cache storage + :param feed: Map + :param feed_entities_list:list + :return: None + """ + Logger.log(f'Check on feed cache exist on url: {feed.url}') + + feed_id = self._db.find_where('feeds', 'url', feed.url, 'like') + + if not feed_id: + feed_id = self._insert_feed_data(feed) + + Logger.log('Start caching feeds: \n') + + for feed_entry in feed_entities_list: + if not self._db.find_where('feeds_entries', 'link', feed_entry.link, 'like'): + Logger.log(f'Caching feed {color.blue(feed_entry.link)} INSERTED') + else: + Logger.log(f'Caching feed {color.blue(feed_entry.link)} UPDATED') + + self._insert_feed_entry_into_cache(feed_id, feed_entry) + + print("\n") + Logger.log('Cached feeds was updated') + + self._db.close() + + def _insert_feed_entry_into_cache(self, feed_id: int, entry: FeedEntry) -> None: + """ + This method insert feed entry of rss feed into cache storage. Also creating + feed entry general data, entry links, entry media + :param feed_id: int + :param entry: FeedEntry + :return: None + """ + self._write_feed_entry_general(entry, feed_id) + + feed_entry_id = self._db.cursor.lastrowid + + self._write_feed_entry_links(feed_entry_id, entry) + self._write_feed_entry_media(feed_entry_id, entry) + + def _insert_feed_data(self, feed: Map) -> int: + """ + This method store rss feed data into cache storage + :param feed: Map + :return: int + """ + Logger.log(f'Add feed cache exist on url: {feed.url}') + + self._db.write('feeds', [ + 'url', + 'encoding', + 'image' + ], [ + feed.url, + feed.encoding, + feed.image + ]) + + return self._db.cursor.lastrowid + + def _write_feed_entry_general(self, entry: FeedEntry, feed_id: int) -> None: + """ + Insert feed entry general data into cache driver + :param entry: FeedEntry + :param feed_id: int + :return: None + """ + return self._db.write( + 'feeds_entries', + ['feed_id','title','description','link','published'], + [feed_id,html.escape(entry.title),html.escape(entry.description),entry.link,entry.published,] + ) + + def _write_feed_entry_links(self, feed_entry_id: int, entry: FeedEntry) -> None: + """ + Insert feed entry links data into cache driver + :param feed_entry_id: int + :param entry: FeedEntry + :return: None + """ + for link in entry.links: + return self._db.write( + 'feed_entry_links', + ['feed_entry_id','href','type',], + [feed_entry_id, link.href,link.type,] + ) + + def _write_feed_entry_media(self, feed_entry_id: int, entry: FeedEntry) -> None: + """ + Insert feed entry media data into cache driver + :param feed_entry_id: int + :param entry: FeedEntry + :return: None + """ + for media in entry.media: + return self._db.write('feed_entry_media', + ['feed_entry_id', 'url','additional',], + [feed_entry_id,media.url,html.escape(media.alt),] + ) + + def load_feeds_entries(self, url: str, date: str, limit: int = 100) -> list: + """ + This method load feed entries from cache storage to Feed. + If cache entries not found raised Exception + :param url: str + :param date: str + :param limit: int + :return: list + """ + Logger.log( + f'Load file from cache storage ' + f'{date.strftime("from %d, %b %Y")}' + f'{(date + timedelta(days=1)).strftime(" to %d, %b %Y")}' + ) + + date = datetime.combine(date, datetime.min.time()) + + cache_list = self._get_specify_by_date(url, date, limit) + + if not cache_list: + raise Exception( + f'Cache retrieve nothing. Storage for specified data is empty ' + f'{date.strftime("from %d, %b %Y")}' + f'{(date + timedelta(days=1)).strftime(" to %d, %b %Y")}' + ) + #@TODO:wrap into CacheEntry + return self._db.map_data(cache_list) + + def load_feed_general(self, url: str) -> list: + """ + This method load feed general data by url + :param url: str + :return: list + """ + return self._db.map_data( + self._db.where( + 'feeds', + ['url', '=', url], + limit=1 + ) + )[0] + + def _get_specify_by_date(self, url: str, date, limit: int = 100) -> list: + """ + Retrieve cache data from storage by specified date from console + :param url: str + :param date: str + :param limit: int + :return: list + """ + feed_id = self._db.find_where('feeds', 'url', url, 'like') + + cache_general_data = self._db.where('feeds_entries', + ['feed_id', '=', feed_id], + ['published','>=', date], + ['published','<=', date + timedelta(days=1)], + limit=limit + ) + + output_cache = [] + + for cache_entry in self._db.map_data(cache_general_data): + cache_entry['links'] = self._db.map_data( + self._db.where('feed_entry_links', ['feed_entry_id', '=', cache_entry['id']]) + ) + + cache_entry['media'] = self._db.map_data( + self._db.where('feed_entry_media', ['feed_entry_id', '=', cache_entry['id']]) + ) + + output_cache.append(cache_entry) + + return output_cache diff --git a/src/components/cache/cache_entry.py b/src/components/cache/cache_entry.py new file mode 100644 index 0000000..ce397ee --- /dev/null +++ b/src/components/cache/cache_entry.py @@ -0,0 +1,13 @@ +"""This module contain class for structuring feeds cache entries""" + +from src.components.feed.feed_entry import FeedEntry + + +class CacheEntry(FeedEntry): + """ + This class implementing FeedEntry class. + This is done because the class contains similar data and can + be extended for cached entries + """ + pass + diff --git a/src/components/cache/db/__init__.py b/src/components/cache/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/components/cache/db/sqlite.py b/src/components/cache/db/sqlite.py new file mode 100644 index 0000000..4c1499e --- /dev/null +++ b/src/components/cache/db/sqlite.py @@ -0,0 +1,169 @@ +"""this module contain class layer for sqllite3""" + +import sqlite3 +import sys +from .sqlite_scripts import scripts +from src.components.helper import Map + + +class Sqlite: + """This class provided layer over sqllite3 for standard crud operation + and help store cache into database""" + + def __init__(self, path: str) -> None: + """ + This constructor start open connection to sqllite database + :param path: str + """ + self.conn = None + self.cursor = None + + self.open(path) + + def open(self, path: str) -> None: + """ + This method try to open sqlite connection and set current connection cursor + Otherwise raised exceptions + :param path: str + :return: None + """ + + try: + self.conn = sqlite3.connect(path, isolation_level=None) + self.conn.row_factory = sqlite3.Row + self.cursor = self.conn.cursor() + + except sqlite3.Error as e: + sys.exit(e) + + def close(self) -> None: + """ + This method commit changes and close connection + :return: None + """ + if self.conn: + self.conn.commit() + self.cursor.close() + self.conn.close() + + @classmethod + def map_data(self, data: dict) -> list: + """ + This method wrap retrieving data to Map object for proper usage + :param data: dict + :return: list + """ + if isinstance(data, sqlite3.Cursor): + return [Map(row) for row in data.fetchall()] + + return [Map(row) for row in data] + + + @classmethod + def create_database(self, path: str) -> None: + """ + This method create cache storage database from sqllite_scripts + Otherwise raised exceptions + :param path: str + :return: None + """ + try: + self.conn = sqlite3.connect(path, isolation_level=None) + self.conn.row_factory = sqlite3.Row + cursor = self.conn.cursor() + + cursor.executescript(scripts.create_db_tables['feeds']) + cursor.executescript(scripts.create_db_tables['feeds_entries']) + cursor.executescript(scripts.create_db_tables['feed_entry_links']) + cursor.executescript(scripts.create_db_tables['feed_entry_media']) + + cursor.close() + + except sqlite3.Error as e: + sys.exit(e) + + def get(self, table: str, columns: str, limit: int=100) -> list: + """ + This method retrieve data from specific table + :param table: str + :param columns: str + :param limit: int + :return: list + """ + query = scripts.get.format(columns, table, limit) + self.cursor.execute(query) + + return self.cursor.fetchall() + + def get_last(self, table: str, columns: str): + """ + This method retrieve last entry from specific table + :param table: str + :param columns: str + :return: + """ + return self.get(table, columns, limit=1)[0] + + def where(self, table: str, *where: list, limit: int=100) -> list: + """ + This method retrieve data with specific where statements + :param table: str + :param where: list + :param limit: int + :return: list + """ + + where = ' AND '.join('{} {} "{}" '.format(item[0], item[1], item[2]) for item in where) + + query = scripts.where.format(table, where, limit) + + self.cursor.execute(query) + + return self.cursor.fetchall() + + def find_where(self, table: str, column: str, value, type: str='=') -> int: + """ + This method retrieve id from single entry found by specific statement + :param table: str + :param column: str + :param value: Union[int, str] + :param type: str + :return: int + """ + + query = scripts.find_where.format(table, column, type, value) + + self.cursor.execute(query) + row = self.cursor.fetchone() + + return row[0] if row is not None else False + + def write(self, table: str, columns: list, data: list) -> None: + """ + This method write provided data + :param table: str + :param columns: list + :param data: list + :return: None + """ + + query = scripts.write.format( + table, ', '.join(column for column in columns) , ', '.join( "'" + str(item) + "'" for item in data) + ) + + self.cursor.execute(query) + + def query(self, sql: str, *args): + """ + This method provide wrap on query for further methods usage + :param sql: str + :param args: * + :return: + """ + self.cursor = self.conn.cursor() + + return self.cursor.execute(sql, args) + + def __exit__(self, exc_type, exc_value, traceback): + """ Close connection on exit""" + self.close() diff --git a/src/components/cache/db/sqlite_scripts.py b/src/components/cache/db/sqlite_scripts.py new file mode 100644 index 0000000..8c69efe --- /dev/null +++ b/src/components/cache/db/sqlite_scripts.py @@ -0,0 +1,61 @@ +from src.components.helper import Map + + +scripts = Map({ + 'write': 'INSERT OR REPLACE INTO {0} ({1}) VALUES ({2});', + 'find_where': 'SELECT id FROM {0} WHERE {1} {2} \'{3}\';', + 'where': 'SELECT * FROM {0} WHERE {1} LIMIT {2};', + 'get': 'SELECT {1} FROM {0} LIMIT {2};', + + 'create_db_tables': { + 'feeds': ''' + CREATE TABLE feeds( + id integer PRIMARY KEY autoincrement, + url text UNIQUE NOT NULL, + image text UNIQUE NOT NULL, + encoding text NOT NULL + ); + CREATE UNIQUE index unique_feeds_url on feeds (url); + ''', + 'feeds_entries': ''' + CREATE TABLE feeds_entries ( + id integer PRIMARY KEY autoincrement, + feed_id integer NOT NULL, + title text NOT NULL, + description text, + link text UNIQUE NOT NULL, + published timestamp NOT NULL, + FOREIGN KEY(feed_id) + REFERENCES feeds ( id ) + ON UPDATE CASCADE + ON DELETE CASCADE + ); + CREATE UNIQUE index unique_feeds_entries_link ON feeds_entries (link); + ''', + 'feed_entry_links': ''' + CREATE TABLE feed_entry_links( + id integer PRIMARY KEY autoincrement, + feed_entry_id integer NOT NULL, + href text NOT NULL, + type text DEFAULT NULL, + FOREIGN KEY(feed_entry_id) + REFERENCES feeds_entries ( id ) + ON UPDATE CASCADE + ON DELETE CASCADE + ); + ''', + 'feed_entry_media': ''' + CREATE TABLE feed_entry_media( + id integer PRIMARY KEY autoincrement, + feed_entry_id integer NOT NULL, + url text NOT NULL, + additional text DEFAULT NULL , + FOREIGN KEY(feed_entry_id) + REFERENCES feeds_entries ( id ) + ON UPDATE CASCADE + ON DELETE CASCADE + ); + ''', + }, +}) + diff --git a/src/components/converter/__init__.py b/src/components/converter/__init__.py new file mode 100644 index 0000000..c2ca1c0 --- /dev/null +++ b/src/components/converter/__init__.py @@ -0,0 +1,2 @@ +from . import html +from . import pdf \ No newline at end of file diff --git a/src/components/converter/converter_abstract.py b/src/components/converter/converter_abstract.py new file mode 100644 index 0000000..6682eda --- /dev/null +++ b/src/components/converter/converter_abstract.py @@ -0,0 +1,125 @@ +"""This module contain interface for converters of utility""" + +from src.components.logger import Logger +from abc import ABC, abstractmethod +from pathlib import Path +from src import conf +import urllib.request as request +from src.components.feed import Feed +from src.components.feed import FeedEntry +import jinja2 + + +class ConverterAbstract(ABC): + """ + This interface provided for implementing by utility converter + for render data into appropriate format + + Attributes: + _media_img_ext attribute contain default media img parse extension + """ + + _media_img_ext: str = '.jpg' + + def __init__(self, path: str, limit: int) -> None: + """ + This interface constructor represent creating path from path initialization + and limit of entries to render + :param path: str + :param limit: int + """ + self._path_initialize(Path(path)) + self._limit = limit + + @abstractmethod + def render(self, feed: Feed) -> str: + """This abstract method should be implemented for render + entry point of utility converter and bundle all render parts in one """ + pass + + @abstractmethod + def _entry_render(self, entry: FeedEntry): + """This abstract method should be implemented for render + single feed entry""" + pass + + @abstractmethod + def _media_render(self, entry: FeedEntry): + """This abstract method should be implemented for render + media of single feed entry""" + pass + + def _init_template_processor(self, template_path: str) -> None: + """ + This method create converter template processor which + provide access to work with stored templates. Should be + calling in render method to provide converter template path + :param template_path: str + :return: None + """ + path = str(Path(__file__).parent.joinpath(template_path).as_posix()) + + self._template_processor = jinja2.Environment( + loader=jinja2.FileSystemLoader(path), trim_blocks=True + ) + + def _path_initialize(self, path: Path) -> None: + """ + This method initialize converter storing path + for render file and media data + :param path: Path + :return: + """ + self._path = path + + if not self._path.parent.exists(): + self._path.parent.mkdir( + parents=True, + exist_ok=True + ) + else: + Logger.log(f'Caution - file {self._path} would be overriding') + + self._media_path = Path.home()\ + .joinpath('.' + conf.__package__)\ + .joinpath('media') + + if not self._media_path.exists(): + self._media_path.mkdir( + parents=True, exist_ok=True + ) + + def _download_media(self, media_url: str) -> str: + """ + This method storing remote media in local + storage and provide path to downloaded files + :param media_url: str + :return: str + """ + media_file_name = str(abs(hash(media_url)) % 10 ** 10) + + media_file = self._media_path.joinpath(media_file_name + self._media_img_ext) + + try: + request.urlretrieve(media_url, media_file) + media_file.chmod(0o755) + + except(request.HTTPError, request.URLError): + Logger.log(f'Image with url {media_url} did not download') + return False + + return media_file + + def _save_render_file(self, output: str, encoding: str='UTF-8') -> None: + """ + This method store converted file. Called when render was + completed and all output data can be saved + :param output: str + :param encoding: str + :return: None + """ + with open(self._path, 'w', encoding=encoding) as file: + file.write(output) + + Path(self._path).chmod(0o755) + diff --git a/src/components/converter/html/__init__.py b/src/components/converter/html/__init__.py new file mode 100644 index 0000000..2303011 --- /dev/null +++ b/src/components/converter/html/__init__.py @@ -0,0 +1 @@ +from .html_converter import HtmlConverter \ No newline at end of file diff --git a/src/components/converter/html/html_converter.py b/src/components/converter/html/html_converter.py new file mode 100644 index 0000000..9eaf701 --- /dev/null +++ b/src/components/converter/html/html_converter.py @@ -0,0 +1,94 @@ +"""This module contain class representing html utility converter """ + +from src.components.converter.converter_abstract import ConverterAbstract +from src.components.logger import Logger +from src.components.feed import Feed +from src.components.feed import FeedEntry +from pathlib import Path +import sys + + +class HtmlConverter(ConverterAbstract): + """ + This class implements ConverterAbstract interface and convert + rss data into html format + + Attributes: + _log_Converter attribute contain log name converter + _template_path attribute contain templates path + """ + + _log_Converter: str = 'HTML' + _template_path: Path = Path(__file__).parent.joinpath('templates') + + def render(self, feed: Feed) -> str: + """ + This method is implementation of render abstract method + render all templates and save them into file + :param feed: Feed + :return: + """ + Logger.log( + f'Converter option choosen. Default output was declined.\n' + f'Initialize {self._log_Converter} converter render' + ) + + self._init_template_processor(self._template_path); + render_feeds_entries = [] + + for index, entry in zip(range(self._limit), feed.entities_list): + render_feeds_entries.append( + self._entry_render(entry) + ) + + Logger.log('Process all render entries') + + self._save_render_file( + self._template_processor.get_template('layout.html.jinja2').render( + feeds_entries=render_feeds_entries, + url=feed.feeds_url, + logo=self._download_media(feed.feeds_image), + title=feed.feeds_title, + encoding=feed.feeds_encoding, + ) + ) + + Logger.log(f'{self._log_Converter} render complete. You can check it in: {self._path}') + sys.exit(1) + + def _media_render(self, media: list) -> str: + """ + This method is implementation of _media_render abstract method + render media for single feed entry + :param media: list + :return: str + """ + media_output = [] + + for item in media: + media_file = self._download_media(item.url) + + if not media_file: + return self._template_processor.get_template('empty_media.html.jinja2').render() + + media_output.append(self._template_processor.get_template('media.html.jinja2').render( + src=item.get('url', ''), alt=item.get('alt', '')) + ) + + return media_output + + def _entry_render(self, entry: FeedEntry) -> str: + """ + This method is implementation of _entry_render abstract method + render single feed entry + :param entry: FeedEntry + :return: str + """ + return self._template_processor.get_template('entry.html.jinja2').render( + media=self._media_render(entry.media), + title=entry.title, + date=entry.published, + description=entry.description, + link=entry.link, + links=entry.links + ) diff --git a/src/components/converter/html/templates/__init__.py b/src/components/converter/html/templates/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/components/converter/html/templates/empty_media.html.jinja2 b/src/components/converter/html/templates/empty_media.html.jinja2 new file mode 100644 index 0000000..3db6313 --- /dev/null +++ b/src/components/converter/html/templates/empty_media.html.jinja2 @@ -0,0 +1,3 @@ +
+ Image for this block did not download! +
diff --git a/src/components/converter/html/templates/entry.html.jinja2 b/src/components/converter/html/templates/entry.html.jinja2 new file mode 100644 index 0000000..9b3e5e2 --- /dev/null +++ b/src/components/converter/html/templates/entry.html.jinja2 @@ -0,0 +1,33 @@ +
+

+ {{ title }} +

+
+
+ {% for item in media %} + {{ item }} + {% endfor %} +
+
+

+ {{ description }} +

+ links: + +
+ {% for link in links %} + {{ link['href'] }}
+ {% endfor %} +
+ + source: {{ link }} +
+

+ {{ date }} +

+
+
+
+
+
+
diff --git a/src/components/converter/html/templates/layout.html.jinja2 b/src/components/converter/html/templates/layout.html.jinja2 new file mode 100644 index 0000000..bee515a --- /dev/null +++ b/src/components/converter/html/templates/layout.html.jinja2 @@ -0,0 +1,26 @@ + + + + {{ title }} + + +
+
+

{{ title }}

+

+ {{ url }} +

+ {% if logo %} + + {% endif %} +
+
+ {% for entry in feeds_entries %} + {{ entry }} + {% endfor %} +
+
+ + diff --git a/src/components/converter/html/templates/media.html.jinja2 b/src/components/converter/html/templates/media.html.jinja2 new file mode 100644 index 0000000..71664a5 --- /dev/null +++ b/src/components/converter/html/templates/media.html.jinja2 @@ -0,0 +1,3 @@ +
+ {{alt}} +
diff --git a/src/components/converter/html/templates/style.css.jinja2 b/src/components/converter/html/templates/style.css.jinja2 new file mode 100644 index 0000000..6f9e9b1 --- /dev/null +++ b/src/components/converter/html/templates/style.css.jinja2 @@ -0,0 +1,57 @@ +@charset "UTF-8"; +@import url("https://fonts.googleapis.com/css?family=Pacifico|Source+Sans+Pro:400,700"); + +@page { + margin: 3cm; + @bottom-left { + color: #1ee494; + font-family: Pacifico; + content: '♥ rss-reader'; + font-size: 6pt; + } + @bottom-center { + color: #333; + content: "Page " counter(page) " of " counter(pages); + font-family: Pacifico; + font-size: 13pt; + } + @bottom-right { + color: #c1a; + content: 'rss-reader♥ '; + font-family: Pacifico; + font-size: 6pt; + } +} + +html { + color: #14213d; + font-family: Source Sans Pro; + font-size: 11pt; + line-height: 1.6; +} + +html body { + margin: 0; +} + +html h1 { + color: #1ee494; + font-family: Pacifico; + font-size: 40pt; + margin: 0; +} +.wrapper{ + position:relative; +} +.media{ + display:inline-block; +} +img{ + max-width: 14rem !important; + margin: 0 1rem; +} +.logo{ + text-align:center; + margin: 3rem auto; + max-width: 20rem; +} \ No newline at end of file diff --git a/src/components/converter/pdf/__init__.py b/src/components/converter/pdf/__init__.py new file mode 100644 index 0000000..ace1f5d --- /dev/null +++ b/src/components/converter/pdf/__init__.py @@ -0,0 +1 @@ +from .pdf_converter import PdfConverter \ No newline at end of file diff --git a/src/components/converter/pdf/pdf_converter.py b/src/components/converter/pdf/pdf_converter.py new file mode 100644 index 0000000..3ae5d47 --- /dev/null +++ b/src/components/converter/pdf/pdf_converter.py @@ -0,0 +1,35 @@ +"""This module contain class representing html utility converter """ + +from src.components.converter.html.html_converter import HtmlConverter +from pathlib import Path +from weasyprint import HTML, CSS + + +class PdfConverter(HtmlConverter): + """ + This class implements HtmlConverter class and convert + rss data into html format. + + Using weasyprint and jinja2 allows to using HtmlConverter + methods instead of writing own logic for pdf converter + + Attributes: + _log_Converter attribute contain log name converter + + """ + + _log_Converter = 'PDF' + + def _save_render_file(self, output: str, encoding: str = 'UTF-8'): + """ + This method overriding _save_render_file method and provide saving pdf data + by weasyprint and jinja2 from html templating + :param output: str + :param encoding: ste + :return: None + """ + HTML(string=output, encoding=encoding).write_pdf( + stylesheets=[CSS(string=self._template_processor.get_template('style.css.jinja2').render())], target=self._path + ) + + Path(self._path).chmod(0o755) diff --git a/src/components/feed/__init__.py b/src/components/feed/__init__.py new file mode 100644 index 0000000..8bb51ac --- /dev/null +++ b/src/components/feed/__init__.py @@ -0,0 +1,6 @@ +from .feed import Feed +from .feed import FeedEntry +from .feed import FeedFormatter +from .feed import FeedProperty + + diff --git a/src/components/feed/feed.py b/src/components/feed/feed.py new file mode 100644 index 0000000..bc03c47 --- /dev/null +++ b/src/components/feed/feed.py @@ -0,0 +1,188 @@ +"""Module contain classes for feed parsing logic and formatting feeds into appropriate mods""" + +import feedparser +from argparse import ArgumentParser +from abc import ABC +from src.components.helper import Map +from src.components.feed.feed_entry import FeedEntry +from src.components.feed.feed_formatter import FeedFormatter +from src.components.logger.logger import Logger +from src.components.cache.cache import Cache +import urllib.request as url + + +class FeedProperty(ABC): + """Trait for Feed class.Contain all properties, which Feed use out of class """ + + @property + def entities_list(self) -> list: + """Property provide value of feed parsed or cached entities""" + return self._decide_output() + + @property + def feeds_title(self) -> str: + """Property provide value of feed general title""" + return self._feeds_title + + @property + def feeds_url(self) -> str: + """Property provide value of feed origin url""" + return self._url + + @property + def feeds_image(self) -> str: + """Property provide value of feed image url""" + return self._feeds_image + + @property + def feeds_encoding(self)-> str: + """Property provide value of feed encoding""" + return self._feeds_encoding + + +class Feed(FeedProperty): + """This class represent parsing feed process, + manage caching module, output data in proper way to console output""" + + def __init__(self, args: ArgumentParser) -> None: + """ + This method initialize start required data for Feed class and call parser to parse feeds + :param args: ArgumentParser + """ + self._is_json = args.json + self._is_colorize = args.colorize + self._cache_date = args.date + self._limit = args.limit + self._url = args.source + self._entities_list = [] + + Logger.log('Initialize console variables') + + self._parse_feeds() + + def show_feeds(self) -> None: + """ + This method using for output processed data into console into appropriate way + :return: None + """ + Logger.log( + f'Preparation for output feeds. ' + f'Output type: {"JSON" if self._is_json else "DEFAULT"}. ' + f'Feeds choosen: {self._limit}' + ) + + FeedFormatter.is_json = self._is_json + + top_data_output = Map({ + 'url': self._url, + 'title': self._feeds_title, + 'image': self._feeds_image, + 'encoding': self._feeds_encoding + }) + + output = FeedFormatter.generate_output( + self._decide_output(), + self._limit, + top_data_output, + self._is_colorize, + ) + + print(output) + + def _decide_output(self) -> list: + """ + This method realize which data will be ensure to use - cache or just parsed + :return: List + """ + if self._cache_date: + return Cache().load_feeds_entries(self._url, self._cache_date, self._limit) + + return self._entities_list + + def _parse_feeds(self) -> None: + """ + This method parsing feeds from provided url and process calls + append entries to entries list and store to cache + :return: None + """ + + try: + url.urlopen(self._url) + except (url.HTTPError, url.URLError) as e: + return self._setup_cache_on_unavailable_source() + + Logger.log(f'Start parsing data from url: {self._url}') + + parse_data = feedparser.parse(self._url) + + if parse_data['bozo']: + raise ValueError("Bozo Exception. Wrong validate or no access to the Internet") + + self._set_global_feed_data(parse_data) + + Logger.log('Generate feeds instances') + + for item in parse_data.entries: + self._append_feed_entry(item) + + if self._entities_list: + self._store_cache_instances() + + def _setup_cache_on_unavailable_source(self) -> None: + """ + This method check cache date on source unavailable + and set data of relevant cache url + :return: None + """ + Logger.log('Something wrong with your source. Only cache available') + + if not self._cache_date: + self._cache_date = Cache.cache_default + Logger.log(f'Cache set to: {self._cache_date}') + + feed_general = Cache().load_feed_general(self._url) + + self._feeds_title = feed_general.url + self._feeds_image = feed_general.image + self._feeds_encoding = feed_general.encoding + + Logger.log('Cache feed data setup') + + def _set_global_feed_data(self, parse_data: feedparser.FeedParserDict) -> None: + """ + This method set all global feed data to Feed instatance + :param parse_data: feedparser.FeedParserDict + :return: None + """ + Logger.log('Setting global feed data') + + self._feeds_title = parse_data.feed.title + self._feeds_encoding = parse_data.encoding + + try: + self._feeds_image = parse_data.feed.image.href + + except (AttributeError, KeyError): + self._feeds_image = '' + Logger.log('Cannot find feed image.') + + def _append_feed_entry(self, entry: feedparser.FeedParserDict) -> None: + """ + This method wrap feed parser entry into FeedEntry class and append it to Feed list + :param entry: feedparser.FeedParserDict + :return: None + """ + self._entities_list.append(FeedEntry(entry)) + + def _store_cache_instances(self) -> None: + """ + This method initialize Cache module and provide data to store in cache storage + :return: None + """ + cache_params = Map({ + 'url': self._url, + 'encoding': self._feeds_encoding, + 'image' : self._feeds_image + }) + + Cache().append_feeds(cache_params, self._entities_list) diff --git a/src/components/feed/feed_entry.py b/src/components/feed/feed_entry.py new file mode 100644 index 0000000..1d78440 --- /dev/null +++ b/src/components/feed/feed_entry.py @@ -0,0 +1,68 @@ +"""This module contain class for structuring feeds entries""" + +import html +import feedparser +from bs4 import BeautifulSoup +from datetime import datetime +from src.components.helper import Map + + +class FeedEntry: + """ + This class represents feed entry structure and preprocess some entry values + + Attributes: + _soup attribute provide access for work with BeautifulSoup interface + """ + + _soup: BeautifulSoup = BeautifulSoup + + def __init__(self, entry: feedparser.FeedParserDict) -> None: + """ + This constructor init demand data for feed entry and formatting it + :param entry: feedparser.FeedParserDict + """ + self.link = entry.link + self.title = html.unescape(entry.title) + self.description = self._process_description(entry.summary) + self.published = self._process_published(entry) + + self.links: list= self._process_links(entry.links) + self.media: list= self._process_media(entry.summary) + + def _process_links(self, links) -> list: + """ + Getting entry links and processing to check links + :param links: + :return: + """ + return [link for link in links if link.get('href', False)] + + def _process_media(self, summary: str) -> list: + """ + Getting entry text and retrieve media data from it + :param summary: + :return: + """ + return [Map({ + 'url': media.get('src'), + 'alt': html.escape(media.get('alt', '')) + }) for media in self._soup(summary, 'lxml').find_all(['img']) if media.get('src', False)] + + def _process_description(self, description:str) -> str: + """ + Getting entry text and formatting it into more readable format + :param description: + :return: str + """ + return html.unescape( + self._soup(description, 'lxml').get_text() + ) + + def _process_published(self, entry: feedparser.FeedParserDict) -> str: + """ + Retrieve tuple of published data and process it into readable format + :param entry: feedparser.FeedParserDict + :return: str + """ + return datetime(*entry.published_parsed[:6]) diff --git a/src/components/feed/feed_formatter.py b/src/components/feed/feed_formatter.py new file mode 100644 index 0000000..6a4bcbe --- /dev/null +++ b/src/components/feed/feed_formatter.py @@ -0,0 +1,201 @@ +"""this module contain class for various formatter output parsing data in console """ + +import json +from fabulous import color +from fabulous.text import Text +from datetime import datetime +from src.components.helper import Map + + +class FeedFormatter: + """ + This class provide a way for formatting data into console depending on the selected options + Attributes: + is_json store state of output case + """ + + is_json = False + + @classmethod + def generate_output(cls, entries: list, limit: int, top_data_output: Map, is_colorize: bool=False) -> str: + """ + This method decide which way rss feed should be printed + :param entries: list + :param limit: int + :param top_data_output: Map + :param is_colorize: bool + :return: + """ + if not cls.is_json: + return cls._default_output(entries, limit, top_data_output, is_colorize) + + return cls._json_output(entries, limit, top_data_output) + + @classmethod + def _default_output(cls, entries: list, limit: int, top_data_output: Map, is_colorize) -> str: + """ + This method render data for default output case + :param entries: list + :param limit: int + :param top_data_output: Map + :param is_colorize: bool + :return: + """ + if is_colorize: + print(Text("Console Rss Reader!", fsize=19, color='#f44a41', shadow=False, skew=4)) + + formatted_feeds = ''.join(cls._colorize_single_feed_format_default(feed) for feed in entries[:limit]) + else: + formatted_feeds = ''.join(cls._single_feed_format_default(feed) for feed in entries[:limit]) + + if is_colorize: + return 'Feed: {0}\nUrl: {1}\n\n{2}'.format( + color.highlight_black(top_data_output.title), + color.highlight_red(top_data_output.url), + formatted_feeds + ) + + return 'Feed: {0}\nUrl: {1}\n\n{2}'.format( + f'——— {top_data_output.title} ———', + f'——— {top_data_output.url} ———', + formatted_feeds + ) + + @classmethod + def _json_output(cls, entries: list, limit: int, top_data_output: Map) -> str: + """ + This method render data for json output case + :param entries: list + :param limit: int + :param top_data_output: Map + :return: + """ + formatted_feeds = [cls._single_feed_format_json(feed) for feed in entries[:limit]] + + output = json.dumps({ + "title": top_data_output.title, + "url": top_data_output.url, + "image": top_data_output.image, + "entries" : formatted_feeds, + }, indent=2, sort_keys=False, ensure_ascii=False) + + return output.encode(top_data_output.encoding).decode() + + @classmethod + def _single_feed_format_default(cls, entry: object) ->str: + """ + This method render single entry for default output + :param entry: object + :return: str + """ + return f'\ + \r{cls._delimiter()}\n\n\ + \rTitle: {entry.title}\n\ + \rDate: {cls.human_date(entry.published)}\n\ + \rLink:{entry.link}\n\n\ + \r{entry.description}\n\n\ + \rMedia: {cls.format_media(entry.media)}\n\ + \rLinks: {cls.format_links(entry.links)}\n' + + @classmethod + def _colorize_single_feed_format_default(cls, entry: object) -> str: + """ + This method render single entry for default output with colorizee option + :param entry: object + :return: str + """ + return f'\ + \r{color.highlight_red(cls._delimiter())}\n\n\ + \r{color.italic(color.magenta("Title"))}: {color.highlight_magenta(entry.title)}\n\ + \r{color.bold(color.yellow("Date"))}: {color.highlight_yellow(cls.human_date(entry.published))}\n\ + \r{color.bold(color.blue("Link"))}: {color.highlight_blue(entry.link)}\n\n\ + \r{color.highlight_green(entry.description)}\n\n\ + \r{color.bold(color.blue("Media"))}: {color.bold(cls.format_media(entry.media))}\n\ + \r{color.bold(color.blue("Links"))}: {color.bold(cls.format_links(entry.links))}\n' + + @classmethod + def _single_feed_format_json(cls, entry: object) -> str: + """ + This method render single entry for json output + :param entry: object + :return: str + """ + return { + "entry": { + "link": entry.link, + "body": { + "title": entry.title, + "date": str(cls.human_date(entry.published)), + "links": [{ + 'href':link.href, + 'type': link.type, + } for link in entry.links], + "media": [{ + 'url':media.url, + 'additional': media.additional, + } for media in entry.media], + "description": entry.description + } + } + } + + @staticmethod + def format_links(links: list) -> str: + """ + This static method beautifying provided links + :param entry: object + :return: str + """ + if not links: + return '———— No data ————' + + def formatted(link, count): + return f'[{count}] {link["href"]} ({link["type"]})\n' + + return ''.join( + formatted(link, count) for count, link in enumerate(links, start=1) + ) + + @staticmethod + def format_media(media: list) -> str: + """ + This static method beautifying provided media urls + :param media:list + :return: str + """ + + if not media: + return '———— No data ————' + + def formatted(media): + return f' {media["url"]}\n' + + return ''.join(formatted(item) for item in media) + + @staticmethod + def human_date(date) -> datetime: + """ + This static method provide more readable for human date format + :param date: + :return: datetime + """ + if isinstance(date, type('str')): + return datetime.strptime(date, "%Y-%m-%d %H:%M:%S") + + return date.strftime("%a, %d %b %Y - [%H:%M:%S]") + + @staticmethod + def _delimiter() -> str: + """ + This static method provide simple delimiter between feeds entries + :return: str + """ + return ''.join('#' * 100) + + @staticmethod + def _delimiter_secondary() -> str: + """ + This static method provide second variant of simple delimiter between feeds entries + :return: str + """ + return ''.join('—' * 50) diff --git a/src/components/helper/__init__.py b/src/components/helper/__init__.py new file mode 100644 index 0000000..4f19b9c --- /dev/null +++ b/src/components/helper/__init__.py @@ -0,0 +1,2 @@ +from .singleton import Singleton +from .map import Map diff --git a/src/components/helper/map.py b/src/components/helper/map.py new file mode 100644 index 0000000..08c1ff9 --- /dev/null +++ b/src/components/helper/map.py @@ -0,0 +1,33 @@ +"""This module contain class representing map dictionaries pattern""" + + +class Map(dict): + """This class wrap dictionary to proper work with them""" + + def __init__(self, *args, **kwargs): + super(Map, self).__init__(*args, **kwargs) + for arg in args: + if isinstance(arg, dict): + for k, v in arg.items(): + self[k] = v + + if kwargs: + for k, v in kwargs.items(): + self[k] = v + + def __getattr__(self, attr): + return self.get(attr) + + def __setattr__(self, key, value): + self.__setitem__(key, value) + + def __setitem__(self, key, value): + super(Map, self).__setitem__(key, value) + self.__dict__.update({key: value}) + + def __delattr__(self, item): + self.__delitem__(item) + + def __delitem__(self, key): + super(Map, self).__delitem__(key) + del self.__dict__[key] diff --git a/src/components/helper/singleton.py b/src/components/helper/singleton.py new file mode 100644 index 0000000..ada2968 --- /dev/null +++ b/src/components/helper/singleton.py @@ -0,0 +1,19 @@ +"""This module contain class representing singleton pattern for further implementation""" + + +class Singleton(object): + """ + This module contain class representing singleton pattern for further implementation + + Attributes: + _instance attribute contains sole instance of class + """ + + _instance = None + + def __new__(class_, *args, **kwargs): + """rewrite __new__ for preventing creating new instances of class""" + if not isinstance(class_._instance, class_): + class_._instance = object.__new__(class_, *args, **kwargs) + + return class_._instance diff --git a/src/components/logger/__init__.py b/src/components/logger/__init__.py new file mode 100644 index 0000000..dd0a8a7 --- /dev/null +++ b/src/components/logger/__init__.py @@ -0,0 +1 @@ +from .logger import Logger \ No newline at end of file diff --git a/src/components/logger/conf.yml b/src/components/logger/conf.yml new file mode 100644 index 0000000..424ae85 --- /dev/null +++ b/src/components/logger/conf.yml @@ -0,0 +1,32 @@ +version: 1 +disable_existing_loggers: True + +loggers: + standard: + level: INFO + handlers: [console, error_file_handler] + propagate: no +formatters: + standard: + format: '%(asctime)s - %(message)s' + datefmt: '%H:%M:%S' + error: + format: '%(levelname)s %(name)s.%(funcName)s(): %(message)s' +handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: standard + stream: ext://sys.stdout + error_file_handler: + class: logging.handlers.RotatingFileHandler + level: ERROR + formatter: error + filename: /tmp/rss-reader-errors.log + maxBytes: 10485760 + backupCount: 20 + encoding: utf8 +root: + level: NOTSET + handlers: [console, error_file_handler] + propogate: yes \ No newline at end of file diff --git a/src/components/logger/logger.py b/src/components/logger/logger.py new file mode 100644 index 0000000..ef69efe --- /dev/null +++ b/src/components/logger/logger.py @@ -0,0 +1,65 @@ +"""this module contain logger module for logging in console""" + +import logging +import logging.config +import yaml +from src.components.helper.singleton import Singleton +import coloredlogs +from pathlib import Path + + +class Logger(Singleton): + + """ + Logger class using for wrap logger and provide more convenient approach for logging + + Attributes: + logger_name logger_name containt logger settings default name + """ + + logger_name: str = 'standard' + + @classmethod + def initialize(cls, is_colorize: bool) -> None : + """ + This method initalize logger module for logging in project. Is_colorize using + for decide is color cli output. Also logger config store in conf.yml + :param is_colorize: bool + :return: None + """ + with open(Path(__file__).parent.joinpath('conf.yml'), 'r') as file: + config = yaml.safe_load(file.read()) + logging.config.dictConfig(config) + + cls._logger = logging.getLogger(cls.logger_name) + + if is_colorize: + coloredlogs.install( + fmt='%(asctime)s - %(message)s', + datefmt='%H:%M:%S', + field_styles={ + 'message' : dict(color='green'), + 'asctime' : dict(color='red'), + }, + level='DEBUG', logger=cls._logger + ) + + @classmethod + def log(cls, message: str) -> None: + """ + This method wrap Logger info method + :param message: str + :return: None + """ + if getattr(cls, '_logger', None) is not None: + cls._logger.info(message) + + @classmethod + def log_error(cls, message: str) -> None: + """ + This method wrap Logger error method + :param message: str + :return: None + """ + if getattr(cls, '_logger', None) is not None: + cls()._logger.error(message) diff --git a/src/components/parser/__init__.py b/src/components/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/components/parser/arguments/__init__.py b/src/components/parser/arguments/__init__.py new file mode 100644 index 0000000..dd201b2 --- /dev/null +++ b/src/components/parser/arguments/__init__.py @@ -0,0 +1,12 @@ +from .arguments_abstract import ArgumentsAbstract + +from .optional.version import * +from .optional.json import * +from .optional.limit import * +from .optional.verbose import * +from .optional.colorize import * +from .optional.date import * +from .optional.to_html import * +from .optional.to_pdf import * + +from .positional.source import * diff --git a/src/components/parser/arguments/arguments_abstract.py b/src/components/parser/arguments/arguments_abstract.py new file mode 100644 index 0000000..cb049b2 --- /dev/null +++ b/src/components/parser/arguments/arguments_abstract.py @@ -0,0 +1,40 @@ +"""This module contain interface for implementation by cli utility params""" + +from abc import ABC, abstractmethod +from pathlib import Path +import argparse + + +class ArgumentsAbstract(ABC): + """ + This interface provided general data for implemented by argparse params + """ + def __init__(self, parser: argparse.ArgumentParser) -> None: + """ + This interface constructor init argparse parser instance for further usage in options implementations + :param parser: argparse.ArgumentParser + """ + self._parser = parser + + @abstractmethod + def add_argument(self) -> argparse: + """This abstract method should be implemented for adding represented options""" + pass + + def _validate_converter_path(self, path: str) -> Path: + """ + This method validate incoming path for converter module on + path extension and path valid + :param path: str + :return: Path + """ + if not Path(path).suffix in self._extensions: + raise argparse.ArgumentTypeError( + f'Wrong extension type. Proper extension\\s: {", ".join(self._extensions)}' + ) + + try: + return Path(path) + + except argparse.ArgumentTypeError: + raise argparse.ArgumentTypeError(f'Invalid provided path: {path}') diff --git a/src/components/parser/arguments/optional/__init__.py b/src/components/parser/arguments/optional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/components/parser/arguments/optional/colorize.py b/src/components/parser/arguments/optional/colorize.py new file mode 100644 index 0000000..695bbf9 --- /dev/null +++ b/src/components/parser/arguments/optional/colorize.py @@ -0,0 +1,11 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments import ArgumentsAbstract + + +class Colorize(ArgumentsAbstract): + + def add_argument(self): + self._parser.add_argument( + '--colorize', default=False, action='store_true', help='Colorize console output' + ) diff --git a/src/components/parser/arguments/optional/date.py b/src/components/parser/arguments/optional/date.py new file mode 100644 index 0000000..3513f9f --- /dev/null +++ b/src/components/parser/arguments/optional/date.py @@ -0,0 +1,33 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments import ArgumentsAbstract +from datetime import datetime +import argparse + + +class Date(ArgumentsAbstract): + """This class representing implementation of ArgumentsAbstract + interface and init a optional Date for cache parameter""" + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add Date parameter from console for retrieving cache + :return: argparse + """ + self._parser.add_argument( + '--date', type=self._validate_caching_date, + help='Cached news from the specified date. YYYYMMDD is proper date format.' + ) + + def _validate_caching_date(self, date: str) -> datetime: + """ + This method validate incoming optional date parameter on + date format type + :param date: str + :return: datetime + """ + try: + return datetime.strptime(date, '%Y%m%d').date() + except ValueError: + raise argparse.ArgumentTypeError(f'Invalid date typed for caching: {date} \n Use YYYYMMDD format') diff --git a/src/components/parser/arguments/optional/json.py b/src/components/parser/arguments/optional/json.py new file mode 100644 index 0000000..5252ed7 --- /dev/null +++ b/src/components/parser/arguments/optional/json.py @@ -0,0 +1,19 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments import ArgumentsAbstract +import argparse + + +class Json(ArgumentsAbstract): + """This class representing implementation of ArgumentsAbstract + interface and init a optional Json for json output parameter""" + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add Json parameter from console for json output + :return: argparse + """ + self._parser.add_argument( + '--json', action='store_true', help='Print result as JSON in stdout' + ) diff --git a/src/components/parser/arguments/optional/limit.py b/src/components/parser/arguments/optional/limit.py new file mode 100644 index 0000000..f337d59 --- /dev/null +++ b/src/components/parser/arguments/optional/limit.py @@ -0,0 +1,34 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments import ArgumentsAbstract +import argparse + + +class Limit(ArgumentsAbstract): + """This class representing implementation of ArgumentsAbstract + interface and init a optional Json for json output parameter""" + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add Limit parameter from console for limit feed entries + :return: argparse + """ + self._parser.add_argument( + '--limit', type=self._validate_limit, default=3, help='Limit news topics if this parameter provided' + ) + + def _validate_limit(self, limit: int) -> int: + """ + This method validate incoming optional limit parameter on equals to zero or less + :param limit: int + :return: int + """ + try: + if not int(limit) > 0: + raise argparse.ArgumentTypeError + + return int(limit) + + except argparse.ArgumentTypeError: + raise argparse.ArgumentTypeError('Argument limit equal or less 0') diff --git a/src/components/parser/arguments/optional/to_html.py b/src/components/parser/arguments/optional/to_html.py new file mode 100644 index 0000000..4f4aaa0 --- /dev/null +++ b/src/components/parser/arguments/optional/to_html.py @@ -0,0 +1,28 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments.arguments_abstract import ArgumentsAbstract +import argparse + + +class ToHtml(ArgumentsAbstract): + """ + This class representing implementation of ArgumentsAbstract interface + and init a optional ToHtml parameter + + Attributes: + _extensions attribute contains all permitted extension for this parameter + """ + + _extensions: list=['.html', '.htm'] + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add ToHtml parameter from console for converter feeds entities into html + :return: argparse + """ + self._parser.add_argument( + '--to-html', type=self._validate_converter_path, + help='Convert to HTML format. Please provide path to file create' + ) + diff --git a/src/components/parser/arguments/optional/to_pdf.py b/src/components/parser/arguments/optional/to_pdf.py new file mode 100644 index 0000000..bdb5527 --- /dev/null +++ b/src/components/parser/arguments/optional/to_pdf.py @@ -0,0 +1,27 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments.arguments_abstract import ArgumentsAbstract +import argparse + + +class ToPdf(ArgumentsAbstract): + """ + This class representing implementation of ArgumentsAbstract interface + and init a optional Pdf parameter + + Attributes: + _extensions attribute contains all permitted extension for this parameter + """ + + _extensions: list=['.pdf'] + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add ToPdf parameter from console for converter feeds entities into pdf + :return: argparse + """ + self._parser.add_argument( + '--to-pdf', type=self._validate_converter_path, + help='Convert to Pdf format. Please provide path to file create' + ) diff --git a/src/components/parser/arguments/optional/verbose.py b/src/components/parser/arguments/optional/verbose.py new file mode 100644 index 0000000..6e041d3 --- /dev/null +++ b/src/components/parser/arguments/optional/verbose.py @@ -0,0 +1,20 @@ +"""This module contain class representing cli optional argument""" + +from src.components.parser.arguments import ArgumentsAbstract +import argparse + + +class Verbose(ArgumentsAbstract): + """This class representing implementation of ArgumentsAbstract + interface and init a optional Verbose parameter""" + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add Verbose parameter from console for output verbose data + :return: argparse + """ + self._parser.add_argument( + '--verbose', default=False, action='store_true', help='Outputs verbose status messages' + ) + diff --git a/src/components/parser/arguments/optional/version.py b/src/components/parser/arguments/optional/version.py new file mode 100644 index 0000000..0a88a78 --- /dev/null +++ b/src/components/parser/arguments/optional/version.py @@ -0,0 +1,20 @@ +"""This module contain class representing cli optional argument""" + +from .. import ArgumentsAbstract +from src import conf +import argparse + + +class Version(ArgumentsAbstract): + """This class representing implementation of ArgumentsAbstract + interface and init a optional Version parameter""" + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add Version parameter from console for output version of rss-reader + :return: argparse + """ + self._parser.add_argument( + '-v', '--version', action='version', version=conf.__version__, help='Print version info' + ) diff --git a/src/components/parser/arguments/positional/__init__.py b/src/components/parser/arguments/positional/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/components/parser/arguments/positional/source.py b/src/components/parser/arguments/positional/source.py new file mode 100644 index 0000000..5e4dbeb --- /dev/null +++ b/src/components/parser/arguments/positional/source.py @@ -0,0 +1,36 @@ +"""This module contain class representing cli positional argument""" + +from src.components.parser.arguments.arguments_abstract import ArgumentsAbstract +import argparse +import urllib.request as url + + +class Source(ArgumentsAbstract): + """This class representing implementation of ArgumentsAbstract + interface and init a positional Source parameter""" + + def add_argument(self) -> argparse: + """ + This method is implementation of add_argument abstract method + add Source parameter from console for load feed + :return: argparse + """ + self._parser.add_argument( + 'source', type=str, help='RSS URL' + ) + + def _validate_source(self, source: str) -> str: + """ + This method validate incoming required source parameter url checker exception + :param source: str + :return: str + """ + try: + if url.urlopen(source).getcode() is not 200: + raise argparse.ArgumentError + + return source + + except argparse.ArgumentError: + raise argparse.ArgumentError('Server answer code is not 200') + diff --git a/src/components/parser/parser.py b/src/components/parser/parser.py new file mode 100644 index 0000000..e7f5b0f --- /dev/null +++ b/src/components/parser/parser.py @@ -0,0 +1,62 @@ +"""This module contain class for wrap Argparse""" + +import argparse +import importlib + + +class Parser: + """ + This class represents wrap on argparse for more convenient way to parse params and validate them + + Attributes: + _arguments_list attribute contains all presenting cli options in utility + """ + + _arguments_list: tuple = ( + 'source', + 'version', + 'json', + 'verbose', + 'limit', + 'date', + 'colorize', + 'to_html', + 'to_pdf', + ) + + def __init__(self, description: str, usage: str) -> None : + """ + This constructor implements, argparse module and init param from console + :param description: str + :param usage: str + """ + self._parser = argparse.ArgumentParser(description=description, usage=usage) + self._init_arguments() + + def get_args(self) -> argparse: + """ + This method retrieve cli parameters and return them + :return: argparse + """ + return self._parser.parse_args() + + def _init_arguments(self) -> None: + """ + This method load arparse parameters classes bound with _arguments_list list + :return: None + """ + module = importlib.import_module('src.components.parser.arguments') + + for argument in self._arguments_list: + argument_class = getattr(module, self.to_camel_case(argument)) + argument_class(self._parser).add_argument() + + @staticmethod + def to_camel_case(string: str) -> str: + """ + This staticmethod help convert snake_case parameters to CamelCase for load classes + :param string: str + :return: str + """ + parts = string.split('_') + return parts[0].capitalize() + ''.join(part.title() for part in parts[1:]) diff --git a/src/conf.py b/src/conf.py new file mode 100644 index 0000000..48481ef --- /dev/null +++ b/src/conf.py @@ -0,0 +1,6 @@ +__author__ = 'Mikhan Victor' +__email__ = 'victormikhan@gmail.com' +__package__ = 'rss-reader' +__version__ = '1.6.0' +__description__ = 'RSS Reader' +__url__ = 'https://github.com/victormikhan/PythonHomework' diff --git a/src/rss_reader.py b/src/rss_reader.py new file mode 100644 index 0000000..367f6c7 --- /dev/null +++ b/src/rss_reader.py @@ -0,0 +1,54 @@ +"""This module contain main rss-reader class App and entry point to utility""" + +from .components.helper.singleton import Singleton +from .components.parser.parser import Parser +from .components.feed import * +from .components.logger.logger import Logger +from .components.converter.html import HtmlConverter +from .components.converter.pdf import PdfConverter +from src import conf + + +class App(Singleton): + """General class of rss-reader. Implements all classes and run utility""" + + def __init__(self) -> None: + """ + This constructor parse program arguments, + initialize all module params decide which logic to run + """ + console = Parser( + 'Pure Python command-line RSS reader.', + conf.__description__ + ) + + self._console_args = console.get_args() + + if self._console_args.verbose: + Logger.initialize(self._console_args.colorize) + + self._feed = Feed(self._console_args) + + if self._console_args.to_html: + HtmlConverter(self._console_args.to_html, self._console_args.limit).render(self._feed) + + if self._console_args.to_pdf: + PdfConverter(self._console_args.to_pdf, self._console_args.limit).render(self._feed) + + @classmethod + def start(cls) -> None: + return cls()._feed.show_feeds() + + +def main(): + """ + Entry point of rss-reader. + """ + try: + App.start() + except KeyboardInterrupt: + Logger.log_error('\nStop Rss-Reader') + + +if __name__ == "__main__": + main()