diff --git a/.gitignore b/.gitignore index 894a44c..903c535 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,8 @@ venv.bak/ # mypy .mypy_cache/ +*.html +data/ +*.pdf +*.jpg +loglist.txt \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..ef18989 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +Caching news write in file data\feeddata.txt and read by lines + +html scheme: + +
+FEED
+IMAGES LINKS
+ + + + + + +##PDF convert: +can't convert cirilics symbols +can't convert symbols from "bad" fonts +can't print img in "bad" formats +if you want to convert feed with these exceptions, reader simple don't write THESE feed. + + +#about parser +checked on yahoo, NASA, BBC. +Work goog on all rss with 2.0 version, but RSS 1.0 has "bad" format for these reader. diff --git a/jsonStandart.md b/jsonStandart.md new file mode 100644 index 0000000..b770ed1 --- /dev/null +++ b/jsonStandart.md @@ -0,0 +1,25 @@ + {"item": + + {"link": + //URL of feed + } + + {"body": + + {"title": + //title text + } + + {"feed": + //feed text + } + + {"images": + //images links + } + + {"date": + //publicate date + } + } + } \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..328d144 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +beautifulsoup4==4.8.1 +bs4==0.0.1 +fpdf==1.7.2 +logger==1.4 +lxml==4.4.1 +urlib3==1.25.7 +argparser==1.4.0 \ No newline at end of file diff --git a/rssreader/__init__.py b/rssreader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rssreader/__main__.py b/rssreader/__main__.py new file mode 100644 index 0000000..6304bf9 --- /dev/null +++ b/rssreader/__main__.py @@ -0,0 +1,2 @@ +from rssreader.rssreader import main +main() diff --git a/rssreader/rssreader.py b/rssreader/rssreader.py new file mode 100644 index 0000000..161d25d --- /dev/null +++ b/rssreader/rssreader.py @@ -0,0 +1,291 @@ +import argparse +import logging +import urllib3 +from bs4 import BeautifulSoup +import urllib.request +import sys +import json +from fpdf import FPDF +import os + + +def argsparsing(): + """arguments creating and control""" + parser = argparse.ArgumentParser() + parser.add_argument("source", help="RSS URL", type=str) + parser.add_argument("--version", action='version', version='%(prog)s ' + 'v 2.0', help="Print version info", ) + parser.add_argument("--json", help="Print result as JSON in stdout", action="store_true") + parser.add_argument("--verbose", help="Outputs verbose status messages", action="store_true") + parser.add_argument("--limit", type=int, help="Limit news topics if this parameter provided") + parser.add_argument("--date", type=int, help="Read cashed news by date in next format YYMMDD") + parser.add_argument('--html', type=str, help="Convert news to html and save in .html file.Path in format smth\\") + parser.add_argument('--pdf', type=str, help="Convert news to pdf and save in .pdf file.Path in format smth\\") + return parser.parse_args() + + +def making_log(operation, message, file='loglist.log'): + """func can do 2 ops, if 1 to write if 0 to read""" + if bool(operation): + logging.basicConfig(filename=file, format='%(name)s - %(levelname)s - %(message)s-%(asctime)s', + level=logging.INFO) + logging.info(message) + else: + print(open(file, 'r').read()) + + +def spliting_items(lst, index1, tag): + """help to create pdf files""" + try: + line_list = '' + split_list = [] + split_list = str(tag+str(lst)).split(" ") + for index in range(len(split_list)): + if len(str(line_list)) < 120: + line_list = line_list + " %s" % str(split_list[index]) + else: + index1.cell(250, 10, line_list, ln=1, align="C") + line_list = '' + index1.cell(250, 10, line_list, ln=1, align="pos") + except: + making_log(1, "Cant't save feed with index=%d to index") + print("Can't save news as index ;(") + + +class NewsRss: + """Class with all parts of rss news and methods to work with its.""" + def __init__(self): + self.arguments = argsparsing() + self.title = [] + self.pubDate = [] + self.link = [] + self.desc = [] + self.links = [] + self.datalist = [] + + def feed_find(self): + """find rss news by url and save it to memory""" + try: + urllib.request.urlopen(self.arguments.source) + except: + print("Error.URL is incorrect") + exit(1) + soup = BeautifulSoup(urllib.request.urlopen(self.arguments.source), "xml") + making_log(1, "Opened URL for news reading, URL: %s" % self.arguments.source) + try: + list = soup.find_all("item") + except: + print("Error. Can't find
+