-
Notifications
You must be signed in to change notification settings - Fork 6
Added covers, fixed getting all torrents #39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
bf55919
3f5212d
f112df3
8b189e8
cf1456c
de73cba
db1ad8d
19f6df2
ea65f23
c4328f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
radaron marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,7 +69,7 @@ async def search(self, pattern, type=SearchParamType.ALL_OWN, where=SearchParamW | |
| sort_by=ParamSort.UPLOAD, sort_order=ParamSeq.DECREASING, number=None): | ||
| page_count = 1 | ||
| torrents = [] | ||
| while number is None or len(torrents) < number: | ||
| while number is None or number == -1 or len(torrents) < number: | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
| url = URLs.DOWNLOAD_PATTERN.value.format(page=page_count, | ||
| t_type=type.value, | ||
| sort=sort_by.value, | ||
|
|
@@ -81,9 +81,9 @@ async def search(self, pattern, type=SearchParamType.ALL_OWN, where=SearchParamW | |
| except Exception as e: | ||
| raise NcoreConnectionError(f"Error while searhing torrents. {e}") from e | ||
| new_torrents = [Torrent(**params) for params in self._page_parser.get_items(request.text)] | ||
| torrents.extend(new_torrents) | ||
| if number is None or len(new_torrents) == 0: | ||
| return torrents | ||
| torrents.extend(new_torrents) | ||
| page_count += 1 | ||
| return torrents[:number] | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,13 +9,18 @@ class TorrentsPageParser: | |
| def __init__(self): | ||
| self.type_pattern = re.compile(r'<a href=".*\/torrents\.php\?tipus=(.*?)">' | ||
| r'<img src=".*" class="categ_link" alt=".*" title=".*">') | ||
| self.id_name_pattern = re.compile(r'<a href=".*?" onclick="torrent\(([0-9]+)\); return false;" title="(.*?)">') | ||
| self.id_name_poster_pattern = re.compile( | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recommend to use simpler regex here. As I see there is the image tag: And here you could parse out the torrent id:
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then you can iterate over the ids. check is the id in this pattern output. If yes add if not set no cover. |
||
| r'<a href=".*?" onclick="torrent\((\d+)\);.*?" title="(.*?)">.*?(?:onmouseover="mutat\(\'(https:\/\/.*?)\',.*?)', | ||
| re.DOTALL | ||
| ) | ||
| self.id_name_patter = re.compile(r'<a href=".*?" onclick="torrent\(([0-9]+)\); return false;" title="(.*?)">') | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: pattern |
||
| self.date_pattern = re.compile(r'<div class="box_feltoltve2">(.*?)<br>(.*?)</div>') | ||
| self.size_pattern = re.compile(r'<div class="box_meret2">(.*?)</div>') | ||
| self.not_found_pattern = re.compile(r'<div class="lista_mini_error">Nincs találat!</div>') | ||
| self.seeders_pattern = re.compile(r'<div class="box_s2"><a class="torrent" href=".*">([0-9]+)</a></div>') | ||
| self.leechers_pattern = re.compile(r'<div class="box_l2"><a class="torrent" href=".*">([0-9]+)</a></div>') | ||
|
|
||
|
|
||
| @staticmethod | ||
| def get_key(data): | ||
| key_pattern = r'<link rel="alternate" href=".*?\/rss.php\?key=(?P<key>[a-z,0-9]+)" title=".*"' | ||
|
|
@@ -25,24 +30,43 @@ def get_key(data): | |
| raise NcoreParserError(f"Error while read user " | ||
| f"key with pattern: {key_pattern}") | ||
|
|
||
|
|
||
| def id_exists(self, data, search_id): | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can be staticmethod and can start with |
||
| for i, id in enumerate(data): | ||
| if search_id in data[i]: | ||
| return True | ||
|
|
||
| return False | ||
|
|
||
|
|
||
| def get_items(self, data): | ||
| types = self.type_pattern.findall(data) | ||
| ids_names = self.id_name_pattern.findall(data) | ||
| ids_names_posters = self.id_name_poster_pattern.findall(data) | ||
| ids_names = self.id_name_patter.findall(data) | ||
| dates_times = self.date_pattern.findall(data) | ||
| sizes = self.size_pattern.findall(data) | ||
| seed = self.seeders_pattern.findall(data) | ||
| leech = self.leechers_pattern.findall(data) | ||
| if len(types) != 0 and len(types) == len(ids_names) == \ | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can create a poster dict here, where the key is the torrent id and the value is the poster url or "no cover". For this I recommend to convert the re.findall response to a dictionary: |
||
|
|
||
| # Not all torrents has poster img | ||
| if (len(ids_names) != len(ids_names_posters)): | ||
| for i, id in enumerate(ids_names): | ||
| if not self.id_exists(ids_names_posters, ids_names[i][0]): | ||
| missing_torrent_data = (ids_names[i][0], ids_names[i][1], 'no cover') | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The no Cover could be a constant. Because in one place
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in |
||
| ids_names_posters.append(missing_torrent_data) | ||
|
|
||
| if len(types) != 0 and len(types) == len(ids_names_posters) == \ | ||
| len(dates_times) == len(sizes) == len(seed) == len(leech): | ||
| ids, names = zip(*ids_names) | ||
| ids, names, poster_image = zip(*ids_names_posters) | ||
| dates, times = zip(*dates_times) | ||
| key = self.get_key(data) | ||
| else: | ||
| if not self.not_found_pattern.search(data): | ||
| raise NcoreParserError(f"Error while parse download items in {self.__class__.__name__}.") | ||
| return | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This return hides an error. If the len of found data is not equal. |
||
| for i, id in enumerate(ids): | ||
| yield {"id": id, "title": names[i], "key": key, "date": parse_datetime(dates[i], times[i]), | ||
| "size": Size(sizes[i]), "type": SearchParamType(types[i]), "seed": seed[i], "leech": leech[i]} | ||
| "size": Size(sizes[i]), "type": SearchParamType(types[i]), "seed": seed[i], "leech": leech[i], "poster_image": poster_image[i]} | ||
|
|
||
|
|
||
| class TorrenDetailParser: | ||
|
|
@@ -57,6 +81,7 @@ def __init__(self): | |
| self.peers_pattern = re.compile(r'div class="dt">Seederek:</div>.*?<div class="dd"><a onclick=".*?">' | ||
| r'(?P<seed>[0-9]+)</a></div>.*?<div class="dt">Leecherek:</div>.*?<div ' | ||
| r'class="dd"><a onclick=".*?">(?P<leech>[0-9]+)</a></div>', re.DOTALL) | ||
| self.poster_image_pattern = re.compile(r'<td class="inforbar_img" align="center"><img\s+[^>]*src="(https://[^\"]+)', re.DOTALL) | ||
|
|
||
| def get_item(self, data): | ||
| try: | ||
|
|
@@ -69,9 +94,12 @@ def get_item(self, data): | |
| peers = self.peers_pattern.search(data) | ||
| seed = peers.group('seed') | ||
| leech = peers.group('leech') | ||
| poster_image_match = self.poster_image_pattern.search(data) | ||
| poster_image = poster_image_match.group(1) if poster_image_match else "No cover" | ||
|
|
||
| except AttributeError as e: | ||
| raise NcoreParserError(f"Error while parsing by detailed page. {e}") from e | ||
| return {"title": title, "key": key, "date": date, "size": size, "type": t_type, 'seed': seed, 'leech': leech} | ||
| return {"title": title, "key": key, "date": date, "size": size, "type": t_type, 'seed': seed, 'leech': leech, 'poster_image': poster_image} | ||
|
|
||
|
|
||
| class RssParser: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.