Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
88ee332
Add .idea files to gitignore
Nov 12, 2019
7838b27
init component module. add parser load class. add few arguments for p…
Nov 17, 2019
0787966
add all required optional and positional argument to parser
Nov 17, 2019
6a3c62c
add helpers classes. add feed classes for parsing rss data. fill setu…
Nov 17, 2019
6f19af3
complete default parsing without json. add feed formater for provisi…
Nov 17, 2019
150cec4
add json feature output to rss reader
Nov 17, 2019
998c60b
add cli entry_point to conf
Nov 17, 2019
ffb9b3a
add logging module to rss_reader. add logging message to rss_reader
Nov 29, 2019
6d5c011
fix setup module from package. add caching mechanism without loading …
Nov 30, 2019
dca3b86
add helper "map" for working with dicts. modifing cache mechanism for…
Nov 30, 2019
b261db6
add colorize libs to requirements. add colorize output to rss-reader …
Nov 30, 2019
e362f8c
add initialization validation to no bool rss-reader param source, limit
Nov 30, 2019
05e5b43
change to-html validator exceptions. add part of convertation module …
Nov 30, 2019
02e1a57
modifying path in project and setup file. change db cache structurtur…
Dec 1, 2019
9d92b67
complete converter pdf and improve converter by jinja templates. add …
Dec 1, 2019
ebf3f5f
fixing json output parser. Add docstrings to several modules
Dec 1, 2019
97aeefd
docstring parser component
Dec 1, 2019
f0d5996
docstring helper and converter modules
Dec 1, 2019
7267b1b
Remove the ~/test directory
Dec 1, 2019
ed06aa8
add docstring to cache module
Dec 1, 2019
bc21b97
modify Redme. add Manifest. change json structure
Dec 1, 2019
32b3b26
just change some small tip in setup.py
Dec 1, 2019
5d49282
another tip in setup.py
viktarmikhan Dec 1, 2019
9dd95d0
fix install bug
Dec 8, 2019
aa1789a
add yaml and lxml libraries to requirements to avoiding errors after …
Dec 8, 2019
e91eaa7
add feature to read cache rss data without internet connection. set …
Dec 8, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,6 @@ venv.bak/

# mypy
.mypy_cache/

#PyCharm
.idea
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include requirements.txt
55 changes: 55 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# RSS reader

Python RSS reader - command-line utility.

## [Usage]:
* positional\required arguments:
* source .. RSS URL
* optional arguments:
* -h, --help .. Show help message and exit.
* --version .. Print version info.
* --json .. Print result as JSON in stdout.
* --verbose .. Outputs verbose status messages.
* --limit .. Limit news topics if this parameter is provided.
* --date .. Return cached news from the specified day. Format is YYYYMMDD.
* --to-html .. Convert news into html format and save a file to the specified path.
* --to-pdf .. Convert news into pdf format and save a file to the specified path.
* --colorize .. Output in colorized mode

## [Cache]
Cached Rss feeds are stored in `~/.rss-reader` folder in `cache.db` file. Cache use sqllite3 for storing Rss feeds.
When you run utility cache module always storing or updating [if news already exists] parsing news from Rss feed.

## [Converter]
News can be converted into `HTML` and `PDF` formats. If the file already exists at the specified path, it will be overwritten.


## [JSON structure]
<pre>
{
"title": "Yahoo News - Latest News & Headlines",
"url": "https://news.yahoo.com/rss/",
"image": "http://l.yimg.com/rz/d/yahoo_news_en-US_s_f_p_168x21_news.png",
"entries": [
{
"entry": {
"link": "https://news.yahoo.com/1-protesters-burn-tyres-southern-113205795.html",
"body": {
"title": "UPDATE 3-Iraq protesters burn shrine entrance in holy city, PM quitting 'not enough'",
"date": "Sat, 30 Nov 2019 - [11:32:05]",
"links": [
{
"rel": "alternate",
"type": "text/html",
"href": "https://news.yahoo.com/1-protesters-burn-tyres-southern-113205795.html"
}
],
"media": [],
"description": "Iraqi protesters set fire to the entrance of a shrine in the southern holy city of Najaf on Saturday and security forces fired tear gas to disperse them, police and a demonstrator at the scene said, risking more bloodshed after a rare day of calm. The demonstrator sent a video to Reuters of a doorway to the Hakim shrine blazing as protesters cheered and filmed it on their mobile phones. The incident took place during one of the bloodiest weeks of Iraq\u2019s anti-government unrest, which erupted last month."
}
}
}
]
}

</pre>
8 changes: 8 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
feedparser==5.2.1
bs4==0.0.1
coloredlogs==10.0.0
fabulous==0.3.0
jinja2==2.10.3
WeasyPrint==50
PyYAML==3.13
lxml==4.2.1
38 changes: 38 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import setuptools
from src import conf
from pathlib import Path

here = Path(__file__).parent


def get_install_requirements():
with open(here.joinpath('requirements.txt'), 'r') as file:
return [requirement.strip() for requirement in file]


with open(here.joinpath('README.md'), encoding='utf-8') as f:
long_description = f.read()


setuptools.setup(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

не получается установить пакет

# pip install .
Processing /PythonHomework
    ERROR: Command errored out with exit status 1:
     command: /usr/local/bin/python -c 'import sys, setuptools, tokenize; sys.argv[0] = '"'"'/tmp/pip-req-build-v64yw744/setup.py'"'"'; __file__='"'"'/tmp/pip-req-build-v64yw744/setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' egg_info --egg-base /tmp/pip-req-build-v64yw744/pip-egg-info
         cwd: /tmp/pip-req-build-v64yw744/
    Complete output (8 lines):
    running egg_info
    creating /tmp/pip-req-build-v64yw744/pip-egg-info/rss_reader.egg-info
    writing /tmp/pip-req-build-v64yw744/pip-egg-info/rss_reader.egg-info/PKG-INFO
    writing dependency_links to /tmp/pip-req-build-v64yw744/pip-egg-info/rss_reader.egg-info/dependency_links.txt
    writing entry points to /tmp/pip-req-build-v64yw744/pip-egg-info/rss_reader.egg-info/entry_points.txt
    writing top-level names to /tmp/pip-req-build-v64yw744/pip-egg-info/rss_reader.egg-info/top_level.txt
    writing manifest file '/tmp/pip-req-build-v64yw744/pip-egg-info/rss_reader.egg-info/SOURCES.txt'
    error: package directory 'rss_reader' does not exist
    ----------------------------------------
ERROR: Command errored out with exit status 1: python setup.py egg_info Check the logs for full command output.

name=conf.__package__,
version=conf.__version__,
license='MIT',
author=conf.__author__,
author_email=conf.__email__,
description=long_description,
long_description=conf.__description__,
long_description_content_type='text/markdown',
url=conf.__url__,
packages=setuptools.find_packages(),
include_package_data=True,
package_data={
'': ['*.jinja2', '*.yaml', '*.yml'],
},
install_requires=get_install_requirements(),
python_requires='>=3.6',
entry_points={
'console_scripts':
['%s = src.rss_reader:main' % conf.__package__]
}
)
1 change: 1 addition & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from . import components
5 changes: 5 additions & 0 deletions src/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from src import rss_reader


if __name__ == '__main__':
rss_reader.main()
6 changes: 6 additions & 0 deletions src/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# from . import cache
# from . import converter
# from . import feed
# from . import helper
# from . import logger
# from . import parser
3 changes: 3 additions & 0 deletions src/components/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .cache import Cache


236 changes: 236 additions & 0 deletions src/components/cache/cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
"""this module contain class for caching feeds"""

from src.components.cache.db.sqlite import Sqlite
from src.components.logger import Logger
from src.components.helper import Singleton
from src.components.helper import Map
from src.components.feed.feed_entry import FeedEntry
from fabulous import color

from datetime import timedelta
from datetime import datetime
from pathlib import Path
from src import conf
import html


class Cache(Singleton):
"""
This class represent store, update cache data, access to storing cache feed entries

Attributes:
_db_name attribute contain default name of database
cache_default attribute contain default cache date to retrieve
"""

_db_name = 'cache.db'

cache_default = datetime.today().strftime('%Y%m%d')

def __init__(self) -> None:
"""
This constructor provide represent sqlite3 db layer instance
for work with database
"""
self._cache_db_file = self._storage_initialize()
self._db = Sqlite(str(self._cache_db_file))

def _storage_initialize(self) -> Path:
"""
Ths method check on existence of cache database and init
it in case database not found. Return path to database
:return: Path
"""
cache_path = Path.home().joinpath('.' + conf.__package__)

if not cache_path.exists():
cache_path.mkdir()
Logger.log(f'Created {conf.__package__} local dir with path: {cache_path}')

cache_file = cache_path.joinpath(self._db_name)

if not cache_file.exists():
Sqlite.create_database(str(cache_file))
Logger.log(f'Created local storage with path: {cache_file}')

Logger.log(f'Cache local storage with path: {cache_file}')

return cache_file

def append_feeds(self, feed: Map, feed_entities_list: list) -> None:
"""
This method append or update feeds entries from Feed to cache storage
:param feed: Map
:param feed_entities_list:list
:return: None
"""
Logger.log(f'Check on feed cache exist on url: {feed.url}')

feed_id = self._db.find_where('feeds', 'url', feed.url, 'like')

if not feed_id:
feed_id = self._insert_feed_data(feed)

Logger.log('Start caching feeds: \n')

for feed_entry in feed_entities_list:
if not self._db.find_where('feeds_entries', 'link', feed_entry.link, 'like'):
Logger.log(f'Caching feed {color.blue(feed_entry.link)} INSERTED')
else:
Logger.log(f'Caching feed {color.blue(feed_entry.link)} UPDATED')

self._insert_feed_entry_into_cache(feed_id, feed_entry)

print("\n")
Logger.log('Cached feeds was updated')

self._db.close()

def _insert_feed_entry_into_cache(self, feed_id: int, entry: FeedEntry) -> None:
"""
This method insert feed entry of rss feed into cache storage. Also creating
feed entry general data, entry links, entry media
:param feed_id: int
:param entry: FeedEntry
:return: None
"""
self._write_feed_entry_general(entry, feed_id)

feed_entry_id = self._db.cursor.lastrowid

self._write_feed_entry_links(feed_entry_id, entry)
self._write_feed_entry_media(feed_entry_id, entry)

def _insert_feed_data(self, feed: Map) -> int:
"""
This method store rss feed data into cache storage
:param feed: Map
:return: int
"""
Logger.log(f'Add feed cache exist on url: {feed.url}')

self._db.write('feeds', [
'url',
'encoding',
'image'
], [
feed.url,
feed.encoding,
feed.image
])

return self._db.cursor.lastrowid

def _write_feed_entry_general(self, entry: FeedEntry, feed_id: int) -> None:
"""
Insert feed entry general data into cache driver
:param entry: FeedEntry
:param feed_id: int
:return: None
"""
return self._db.write(
'feeds_entries',
['feed_id','title','description','link','published'],
[feed_id,html.escape(entry.title),html.escape(entry.description),entry.link,entry.published,]
)

def _write_feed_entry_links(self, feed_entry_id: int, entry: FeedEntry) -> None:
"""
Insert feed entry links data into cache driver
:param feed_entry_id: int
:param entry: FeedEntry
:return: None
"""
for link in entry.links:
return self._db.write(
'feed_entry_links',
['feed_entry_id','href','type',],
[feed_entry_id, link.href,link.type,]
)

def _write_feed_entry_media(self, feed_entry_id: int, entry: FeedEntry) -> None:
"""
Insert feed entry media data into cache driver
:param feed_entry_id: int
:param entry: FeedEntry
:return: None
"""
for media in entry.media:
return self._db.write('feed_entry_media',
['feed_entry_id', 'url','additional',],
[feed_entry_id,media.url,html.escape(media.alt),]
)

def load_feeds_entries(self, url: str, date: str, limit: int = 100) -> list:
"""
This method load feed entries from cache storage to Feed.
If cache entries not found raised Exception
:param url: str
:param date: str
:param limit: int
:return: list
"""
Logger.log(
f'Load file from cache storage '
f'{date.strftime("from %d, %b %Y")}'
f'{(date + timedelta(days=1)).strftime(" to %d, %b %Y")}'
)

date = datetime.combine(date, datetime.min.time())

cache_list = self._get_specify_by_date(url, date, limit)

if not cache_list:
raise Exception(
f'Cache retrieve nothing. Storage for specified data is empty '
f'{date.strftime("from %d, %b %Y")}'
f'{(date + timedelta(days=1)).strftime(" to %d, %b %Y")}'
)
#@TODO:wrap into CacheEntry
return self._db.map_data(cache_list)

def load_feed_general(self, url: str) -> list:
"""
This method load feed general data by url
:param url: str
:return: list
"""
return self._db.map_data(
self._db.where(
'feeds',
['url', '=', url],
limit=1
)
)[0]

def _get_specify_by_date(self, url: str, date, limit: int = 100) -> list:
"""
Retrieve cache data from storage by specified date from console
:param url: str
:param date: str
:param limit: int
:return: list
"""
feed_id = self._db.find_where('feeds', 'url', url, 'like')

cache_general_data = self._db.where('feeds_entries',
['feed_id', '=', feed_id],
['published','>=', date],
['published','<=', date + timedelta(days=1)],
limit=limit
)

output_cache = []

for cache_entry in self._db.map_data(cache_general_data):
cache_entry['links'] = self._db.map_data(
self._db.where('feed_entry_links', ['feed_entry_id', '=', cache_entry['id']])
)

cache_entry['media'] = self._db.map_data(
self._db.where('feed_entry_media', ['feed_entry_id', '=', cache_entry['id']])
)

output_cache.append(cache_entry)

return output_cache
13 changes: 13 additions & 0 deletions src/components/cache/cache_entry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""This module contain class for structuring feeds cache entries"""

from src.components.feed.feed_entry import FeedEntry


class CacheEntry(FeedEntry):
"""
This class implementing FeedEntry class.
This is done because the class contains similar data and can
be extended for cached entries
"""
pass

Empty file.
Loading