Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ncoreparser/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def search(self, pattern, type=SearchParamType.ALL_OWN, where=SearchParamWhere.N
sort_by=ParamSort.UPLOAD, sort_order=ParamSeq.DECREASING, number=None):
page_count = 1
torrents = []
while number is None or len(torrents) < number:
while number is in (None, -1) or len(torrents) < number:
url = URLs.DOWNLOAD_PATTERN.value.format(page=page_count,
t_type=type.value,
sort=sort_by.value,
Expand All @@ -79,9 +79,9 @@ def search(self, pattern, type=SearchParamType.ALL_OWN, where=SearchParamWhere.N
except Exception as e:
raise NcoreConnectionError(f"Error while searhing torrents. {e}") from e
new_torrents = [Torrent(**params) for params in self._page_parser.get_items(request.text)]
torrents.extend(new_torrents)
if number is None or len(new_torrents) == 0:
return torrents
torrents.extend(new_torrents)
page_count += 1
return torrents[:number]

Expand Down
4 changes: 2 additions & 2 deletions ncoreparser/client_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ async def search(self, pattern, type=SearchParamType.ALL_OWN, where=SearchParamW
sort_by=ParamSort.UPLOAD, sort_order=ParamSeq.DECREASING, number=None):
page_count = 1
torrents = []
while number is None or len(torrents) < number:
while number is None or number == -1 or len(torrents) < number:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

url = URLs.DOWNLOAD_PATTERN.value.format(page=page_count,
t_type=type.value,
sort=sort_by.value,
Expand All @@ -81,9 +81,9 @@ async def search(self, pattern, type=SearchParamType.ALL_OWN, where=SearchParamW
except Exception as e:
raise NcoreConnectionError(f"Error while searhing torrents. {e}") from e
new_torrents = [Torrent(**params) for params in self._page_parser.get_items(request.text)]
torrents.extend(new_torrents)
if number is None or len(new_torrents) == 0:
return torrents
torrents.extend(new_torrents)
page_count += 1
return torrents[:number]

Expand Down
40 changes: 34 additions & 6 deletions ncoreparser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,18 @@ class TorrentsPageParser:
def __init__(self):
self.type_pattern = re.compile(r'<a href=".*\/torrents\.php\?tipus=(.*?)">'
r'<img src=".*" class="categ_link" alt=".*" title=".*">')
self.id_name_pattern = re.compile(r'<a href=".*?" onclick="torrent\(([0-9]+)\); return false;" title="(.*?)">')
self.id_name_poster_pattern = re.compile(
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend to use simpler regex here. As I see there is the image tag:

<img onmouseout="elrejt('borito3814785')" onmouseover="mutat('https://nc-img.cdn.l7cache.com/covers/L9_kjpbQ9UwZFlXZ?28795815', '281', 'borito3814785', this)" border="0" src="" class="infobar_ico">

And here you could parse out the torrent id: elrejt('borito3814785') the id is: 3814785.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then you can iterate over the ids. check is the id in this pattern output. If yes add if not set no cover.

r'<a href=".*?" onclick="torrent\((\d+)\);.*?" title="(.*?)">.*?(?:onmouseover="mutat\(\'(https:\/\/.*?)\',.*?)',
re.DOTALL
)
self.id_name_patter = re.compile(r'<a href=".*?" onclick="torrent\(([0-9]+)\); return false;" title="(.*?)">')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: pattern

self.date_pattern = re.compile(r'<div class="box_feltoltve2">(.*?)<br>(.*?)</div>')
self.size_pattern = re.compile(r'<div class="box_meret2">(.*?)</div>')
self.not_found_pattern = re.compile(r'<div class="lista_mini_error">Nincs találat!</div>')
self.seeders_pattern = re.compile(r'<div class="box_s2"><a class="torrent" href=".*">([0-9]+)</a></div>')
self.leechers_pattern = re.compile(r'<div class="box_l2"><a class="torrent" href=".*">([0-9]+)</a></div>')


@staticmethod
def get_key(data):
key_pattern = r'<link rel="alternate" href=".*?\/rss.php\?key=(?P<key>[a-z,0-9]+)" title=".*"'
Expand All @@ -25,24 +30,43 @@ def get_key(data):
raise NcoreParserError(f"Error while read user "
f"key with pattern: {key_pattern}")


def id_exists(self, data, search_id):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It can be staticmethod and can start with _

for i, id in enumerate(data):
if search_id in data[i]:
return True

return False


def get_items(self, data):
types = self.type_pattern.findall(data)
ids_names = self.id_name_pattern.findall(data)
ids_names_posters = self.id_name_poster_pattern.findall(data)
ids_names = self.id_name_patter.findall(data)
dates_times = self.date_pattern.findall(data)
sizes = self.size_pattern.findall(data)
seed = self.seeders_pattern.findall(data)
leech = self.leechers_pattern.findall(data)
if len(types) != 0 and len(types) == len(ids_names) == \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can create a poster dict here, where the key is the torrent id and the value is the poster url or "no cover".
You can iterate on IDs, and add the id to this dict key, then find the poster image and add as value.

For this I recommend to convert the re.findall response to a dictionary:
https://www.geeksforgeeks.org/python-convert-a-list-of-tuples-into-dictionary/


# Not all torrents has poster img
if (len(ids_names) != len(ids_names_posters)):
for i, id in enumerate(ids_names):
if not self.id_exists(ids_names_posters, ids_names[i][0]):
missing_torrent_data = (ids_names[i][0], ids_names[i][1], 'no cover')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The no Cover could be a constant. Because in one place no cover in another No cover

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in data.py you can create a NO_COVER constant and use here.

ids_names_posters.append(missing_torrent_data)

if len(types) != 0 and len(types) == len(ids_names_posters) == \
len(dates_times) == len(sizes) == len(seed) == len(leech):
ids, names = zip(*ids_names)
ids, names, poster_image = zip(*ids_names_posters)
dates, times = zip(*dates_times)
key = self.get_key(data)
else:
if not self.not_found_pattern.search(data):
raise NcoreParserError(f"Error while parse download items in {self.__class__.__name__}.")
return
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This return hides an error. If the len of found data is not equal.

for i, id in enumerate(ids):
yield {"id": id, "title": names[i], "key": key, "date": parse_datetime(dates[i], times[i]),
"size": Size(sizes[i]), "type": SearchParamType(types[i]), "seed": seed[i], "leech": leech[i]}
"size": Size(sizes[i]), "type": SearchParamType(types[i]), "seed": seed[i], "leech": leech[i], "poster_image": poster_image[i]}


class TorrenDetailParser:
Expand All @@ -57,6 +81,7 @@ def __init__(self):
self.peers_pattern = re.compile(r'div class="dt">Seederek:</div>.*?<div class="dd"><a onclick=".*?">'
r'(?P<seed>[0-9]+)</a></div>.*?<div class="dt">Leecherek:</div>.*?<div '
r'class="dd"><a onclick=".*?">(?P<leech>[0-9]+)</a></div>', re.DOTALL)
self.poster_image_pattern = re.compile(r'<td class="inforbar_img" align="center"><img\s+[^>]*src="(https://[^\"]+)', re.DOTALL)

def get_item(self, data):
try:
Expand All @@ -69,9 +94,12 @@ def get_item(self, data):
peers = self.peers_pattern.search(data)
seed = peers.group('seed')
leech = peers.group('leech')
poster_image_match = self.poster_image_pattern.search(data)
poster_image = poster_image_match.group(1) if poster_image_match else "No cover"

except AttributeError as e:
raise NcoreParserError(f"Error while parsing by detailed page. {e}") from e
return {"title": title, "key": key, "date": date, "size": size, "type": t_type, 'seed': seed, 'leech': leech}
return {"title": title, "key": key, "date": date, "size": size, "type": t_type, 'seed': seed, 'leech': leech, 'poster_image': poster_image}


class RssParser:
Expand Down
3 changes: 2 additions & 1 deletion ncoreparser/torrent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Torrent:
def __init__(self, id, title, key, size, #pylint: disable=too-many-arguments
type, date, seed, leech, **params): #pylint: disable=too-many-arguments
type, date, seed, leech, poster_image, **params): #pylint: disable=too-many-arguments
self._details = {}
self._details["id"] = int(id)
self._details["title"] = title
Expand All @@ -14,6 +14,7 @@ def __init__(self, id, title, key, size, #pylint: disable=too-many-arguments
self._details["date"] = date
self._details["seed"] = seed
self._details["leech"] = leech
self._details["poster_image"] = poster_image
self._details["download"] = URLs.DOWNLOAD_LINK.value.format(id=id, key=key)
self._details.update(params)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "hatchling.build"

[project]
name = "ncoreparser"
version = "2.1.2"
version = "2.2.0"
description = "Package to download from ncore.pro"
authors = [
{ name="Aron Radics", email="aron.radics.jozsef@gmail.com" }
Expand Down
27 changes: 19 additions & 8 deletions tests/manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@

def print_category(msg):
print("")
print("*{:175}*".format("-" * 175))
print(f"|{msg:^175}|")
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10))
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|".format("Title", "Type", "Size", "ID", "Seed", "Leech"))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10))
print("*{:276}*".format("-" * 276))
print(f"|{msg:^276}|")
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*{:^100}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10, "-" * 100))
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|{:^100}|".format("Title", "Type", "Size", "ID", "Seed", "Leech", "Poster"))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*{:^100}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10, "-" * 100))


def pretty_print(torrent):
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|".format(torrent['title'],
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|{:^100}|".format(torrent['title'],
torrent['type'],
str(torrent['size']),
str(torrent['id']),
torrent['seed'],
torrent['leech']))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10))
torrent['leech'],
torrent['poster_image']))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*{:^100}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10, "-" * 100))


parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -65,6 +66,16 @@ def pretty_print(torrent):
for torrent in torrents:
pretty_print(torrent)

print_category("Test number=None")
torrents = client.search(pattern="")
for torrent in torrents:
pretty_print(torrent)

print_category("Test number=-1")
torrents = client.search(pattern="creed", number=-1)
for torrent in torrents:
pretty_print(torrent)

client.logout()
end = time.time()

Expand Down
27 changes: 19 additions & 8 deletions tests/manual_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,22 @@

def print_category(msg):
print("")
print("*{:175}*".format("-" * 175))
print(f"|{msg:^175}|")
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10))
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|".format("Title", "Type", "Size", "ID", "Seed", "Leech"))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10))
print("*{:276}*".format("-" * 276))
print(f"|{msg:^276}|")
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*{:^100}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10, "-" * 100))
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|{:^100}|".format("Title", "Type", "Size", "ID", "Seed", "Leech", "Poster"))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*{:^100}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10, "-" * 100))


def pretty_print(torrent):
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|".format(torrent['title'],
print("|{:^100}|{:^30}|{:^10}|{:^10}|{:^10}|{:^10}|{:^100}|".format(torrent['title'],
torrent['type'],
str(torrent['size']),
str(torrent['id']),
torrent['seed'],
torrent['leech']))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10))
torrent['leech'],
torrent['poster_image']))
print("*{:^100}*{:^30}*{:^10}*{:^10}*{:^10}*{:^10}*{:^100}*".format("-" * 100, "-" * 30, "-" * 10, "-" * 10, "-" * 10, "-" * 10, "-" * 100))


parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -65,6 +66,16 @@ async def main():
for torrent in torrents:
pretty_print(torrent)

print_category("Test number=None")
torrents = await client.search(pattern="")
for torrent in torrents:
pretty_print(torrent)

print_category("Test number=-1")
torrents = await client.search(pattern="creed", number=-1)
for torrent in torrents:
pretty_print(torrent)

await client.logout()
end = time.time()

Expand Down