From 96aed2cc92395d535bb325ddbac5269e94c8cf55 Mon Sep 17 00:00:00 2001 From: Eliot Lash Date: Tue, 14 Jan 2025 23:07:15 -0800 Subject: [PATCH 1/5] Add TRANSFER_DOWNLOADED_EPISODES setting to control if downloaded episodes are copied. Add additional docs to README about this and other flags. --- README.md | 29 ++++++++++++++++++++--------- pa_to_ap.py | 3 ++- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index c1b690f..b7a391a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,18 @@ Migrate data from Podcast Addict's to AntennaPod's database This does not use any IDs for matching feeds and episodes from one db to another, as those tend to be very unreliable. (They're supposed to stay the same, but often they don't.) Instead, we match them by their name and, in some cases, other attributes. This will work even if the name changed. For example, when using the script one episode's name changed from something like `123. Great Title` to just `Great Title`, but they were still matched. +## Configuration +There are a few optional configuration settings you can change by editing the variables at the top of `pa_to_ap.py` before running the script. + +MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True +* `TRANSFER_DOWNLOADED_EPISODES` controls if existing downloads in Podcast Addict are copied to AntennaPod. + Additional steps are required, see steps below. + * Default: `True` (downloads are transferred.) +* `EPISODES_DIR_PATH ` controls the directory path for transferred episodes (to which you have to manually copy/move the files to). + * Default: `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` +* `MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE` If a name match for a given episode is not found, this setting controls if we should try to match on the episode media URL instead. + * Default: `True` (URL match is used as a fallback.) + ## Steps 0. Install Python 3.8 or later @@ -20,18 +32,17 @@ This does not use any IDs for matching feeds and episodes from one db to another 5. Run the [`pa_to_ap.py`](pa_to_ap.py) script (AntennaPod db file will be **modified**!) in a terminal 6. Confirm that matches are correct (if they aren't you may need to increase `min_similarity`) 7. Copy the modified db file back to your phone -8. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` -9. Manually move (or copy) the folders **inside** -`/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/` -**to** -`/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` - as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.) -10. Import the modified db in AntennaPod +8. If you chose to enable `TRANSFER_DOWNLOADED_EPISODES` (this is on by default): + 1. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` + 2. Manually move (or copy) the folders **inside** + `/storage/emulated/0/Android/data/com.bambuna.podcastaddict/files/podcast/` + **to** + `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` + as AntennaPod cannot access the files under the other app's directory (Consider making a backup of these files.) +9. Import the modified db in AntennaPod Enjoy! -Of course, you can change the location (to which you have to manually copy/move the files to) by modifying the `EPISODES_DIR_PATH` before running the script. - ## Warning Note that this is somewhat rough and will likely not handle a lot of edge cases. diff --git a/pa_to_ap.py b/pa_to_ap.py index 6ed0914..05b5c4c 100644 --- a/pa_to_ap.py +++ b/pa_to_ap.py @@ -14,6 +14,7 @@ CUR_PATH = Path() +TRANSFER_DOWNLOADED_EPISODES = True EPISODES_DIR_PATH = '/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict' MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True @@ -208,7 +209,7 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # "INSERT INTO Favorites (feeditem, feed) VALUES " "(?, ?)", (ap_ep[0], ap.id)) - if pa_ep[4]: + if pa_ep[4] and TRANSFER_DOWNLOADED_EPISODES: transfer_from_dld_ep_to_ep(antenna_pod_cur, podcast_addict_cur, # pa_ep, ap_ep, pa.folder_name) From a430ac567c7a1d536390e5fe275e22d792a0c9c7 Mon Sep 17 00:00:00 2001 From: Eliot Lash Date: Tue, 14 Jan 2025 23:47:01 -0800 Subject: [PATCH 2/5] Update .gitignore for pa_to_ap i/o files and PyCharm IDE --- .gitignore | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/.gitignore b/.gitignore index b6e4761..a8bf3cb 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,33 @@ dmypy.json # Pyre type checker .pyre/ + +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# pa_to_ap specific stuff +/podcast_addict_extracted +/*.db +/*.backup From 3771bf4ccffdf0d94f37b2ac42f7e18ae0463f64 Mon Sep 17 00:00:00 2001 From: Eliot Lash Date: Tue, 14 Jan 2025 23:47:43 -0800 Subject: [PATCH 3/5] A few major fixes and improvements: - Perform feed matching by feed URL instead of fuzzy matching which seems to have 100% success rate, old method wasn't working at all for me. - Ignore uninitialized PA feeds - Add support for transferring in-progress episodes - Fix bug in episode URL match fallback logic where it wasn't doing anything when a match was found, and add log message when URL match is successful - Fix error from referencing SimpleChapters.type column which doesn't exist in AP 3.6.1 - Remove some unnecessary repeated calls to print function where \n literal could be used instead. --- pa_to_ap.py | 89 +++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 43 deletions(-) diff --git a/pa_to_ap.py b/pa_to_ap.py index 05b5c4c..3b34d76 100644 --- a/pa_to_ap.py +++ b/pa_to_ap.py @@ -26,6 +26,7 @@ class Feed: description: str author: str keep_updated: int + feed_url: str folder_name: str = '' @@ -77,43 +78,29 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): # first find match for all feeds in pa pa_feeds = [Feed(*a) for a in podcast_addict_cur.execute( 'select _id, name, description, author, ' - 'automaticRefresh, folderName from podcasts ' - 'where subscribed_status = 1 and is_virtual = 0')] + 'automaticRefresh, feed_url, folderName from podcasts ' + 'where subscribed_status = 1 and is_virtual = 0 and initialized_status = 1')] print("# Podcast addict feeds:") for feed in pa_feeds: print(feed.name) - print() - print() - - ap_feeds = [Feed(*a) for a in antenna_pod_cur.execute( - 'select id, title, description, author, keep_updated from Feeds ' - 'where downloaded = 1')] - - feed_attr_to_weight = { # - (lambda f: f.name): 0.85, # - (lambda f: f.author): 0.15, # - } - matcher = ObjectListMatcher(feed_attr_to_weight) + print("\n\n") - # should never be larger than the largest weight (otherwise is - # slightly unpredictable, as not every weight will be evaluated) - # value in range [0, 1] - matcher.minimum_similarity = 0.78 + ap_feeds = {a[5]: Feed(*a) for a in antenna_pod_cur.execute( + 'select id, title, description, author, keep_updated, download_url from Feeds ' + )} pa_to_ap = [] - ap_indices = matcher.get_indices(pa_feeds, ap_feeds) for n, pa in enumerate(pa_feeds): - ap_idx = ap_indices[n] - ap_name = '!!! NO MATCH !!!' - if ap_idx >= 0: - ap = ap_feeds[ap_idx] + pa_name = pa.name if pa.name else pa.feed_url + if pa.feed_url in ap_feeds: + ap = ap_feeds[pa.feed_url] ap_name = ap.name pa_to_ap.append((pa, ap)) - print(pa.name, ap_name, sep=" -> ") + print(pa_name, ap_name, sep=" -> ") print() if not confirmed("Is this correct? Can we continue?"): @@ -154,10 +141,10 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # pa_episodes = list(podcast_addict_cur.execute( # # 0 1 n2 n3 n4 'select _id, name, seen_status, favorite, local_file_name, ' - # n5 n6 n7 n8 - 'playbackDate, duration_ms, chapters_extracted, download_url ' + # n5 n6 n7 n8 n9 + 'playbackDate, duration_ms, chapters_extracted, download_url, position_to_resume ' 'from episodes where podcast_id = ? and ' - '(seen_status = 1 or ' + '(seen_status = 1 or position_to_resume < 0 or ' '(local_file_name != "" and local_file_name IS NOT NULL))', (pa.id,))) @@ -165,13 +152,11 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # 'select fi.id, fi.title, fm.download_url ' 'from FeedItems fi ' 'LEFT JOIN FeedMedia fm ON fi.id = fm.feeditem ' - 'where fi.feed = ? and fi.read = 0 ', (ap.id,))) + 'where fi.feed = ? and fi.read = 0 ' + , (ap.id,))) - print() combinations = len(pa_episodes) * len(ap_episodes) - print(f"Rough estimate: {combinations / 4000:.2f} seconds") - print() - print() + print(f"\nRough estimate: {combinations / 4000:.2f} seconds\n\n") pa_indices = ITEM_MATCHER.get_indices(ap_episodes, pa_episodes) seen_match_count = 0 @@ -185,13 +170,15 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # if MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE and ap_url is not None: ap_url = ap_url.strip() if len(ap_url) > 9: - for pa_idx, pa_ep in enumerate(pa_episodes): + for pa_idx_urlmatch, pa_ep in enumerate(pa_episodes): if not pa_ep[8]: continue pa_url = pa_ep[8].strip() if pa_url and pa_url == ap_url: + print(f"! Fallback to URL match for: {ap_ep[1]}") found = True + pa_idx = pa_idx_urlmatch break if not found: @@ -203,6 +190,10 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # if pa_ep[2]: transfer_from_seen_ep_to_ep(antenna_pod_cur, podcast_addict_cur, # pa_ep, ap_ep) + else: + transfer_progress_ep_to_ep(antenna_pod_cur, podcast_addict_cur, # + pa_ep, ap_ep) + if pa_ep[3]: antenna_pod_cur.execute( @@ -227,10 +218,9 @@ def transfer_chapters(antenna_pod_cur: Cursor, # for title, start in podcast_addict_cur.execute( # "select name, start from chapters " "where podcastId = ? and episodeId = ?", (pa_feed_id, pa_ep[0])): - # we use chapter type 2 (id3) simply because it seems most likely antenna_pod_cur.execute("INSERT INTO SimpleChapters " - "(title, start, feeditem, type) VALUES " - "(?, ?, ?, 2)", (title, start, ap_ep[0],)) + "(title, start, feeditem) VALUES " + "(?, ?, ?)", (title, start, ap_ep[0])) def transfer_from_dld_ep_to_ep(antenna_pod_cur: Cursor, # @@ -238,7 +228,7 @@ def transfer_from_dld_ep_to_ep(antenna_pod_cur: Cursor, # pa_ep: tuple, # ap_ep: tuple, # pa_folder_name: str): - pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _ = pa_ep + pa_ep_id, _, _, _, pa_local_file_name, _, _, _, _, _ = pa_ep dir_path = EPISODES_DIR_PATH.rstrip("/") + "/" + pa_folder_name file_path = dir_path + "/" + pa_local_file_name @@ -253,8 +243,8 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor, # podcast_addict_cur: Cursor, # pa_ep: tuple, # ap_ep: tuple): - print(ap_ep[1], " <> ", pa_ep[1]) - pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _ = pa_ep + print(ap_ep[1], " <> ", pa_ep[1]) + pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, _ = pa_ep antenna_pod_cur.execute("UPDATE FeedItems SET read = 1 WHERE id = ?", (ap_ep[0],)) @@ -266,13 +256,26 @@ def transfer_from_seen_ep_to_ep(antenna_pod_cur: Cursor, # (pa_playbackDate, pa_playbackDate, pa_duration_ms, ap_ep[0],)) +def transfer_progress_ep_to_ep(antenna_pod_cur: Cursor, + podcast_addict_cur: Cursor, + pa_ep: tuple, + ap_ep: tuple): + print(ap_ep[1], " <> ", pa_ep[1]) + pa_ep_id, _, _, _, _, pa_playbackDate, pa_duration_ms, _, _, pa_position = pa_ep + + antenna_pod_cur.execute("UPDATE FeedMedia " + "SET last_played_time = ?, " + "position = ?, " + "played_duration = ? " + "WHERE feeditem = ?", + (pa_playbackDate, pa_position, pa_position, + ap_ep[0],)) + ap_db, pa_db = get_antenna_pod_and_podcast_addict_backup_path() -print() -print("AntennaPod .db file found:", ap_db) +print("\nAntennaPod .db file found:", ap_db) print("Podcast Addict .db file found:", pa_db) -print() -print() +print("\n") podcast_addict_con = None antenna_pod_con = None From 5f697bdee68ae6e60447c48c0a3d42c2c268f888 Mon Sep 17 00:00:00 2001 From: Eliot Lash Date: Thu, 23 Jan 2025 17:32:24 -0800 Subject: [PATCH 4/5] Add support for migrating feed tags --- .gitignore | 26 ++---------------- pa_to_ap.py | 76 ++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 65 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index a8bf3cb..29a23a4 100644 --- a/.gitignore +++ b/.gitignore @@ -128,30 +128,8 @@ dmypy.json # Pyre type checker .pyre/ -# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider -# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 - -# User-specific stuff -.idea/**/workspace.xml -.idea/**/tasks.xml -.idea/**/usage.statistics.xml -.idea/**/dictionaries -.idea/**/shelf - -# AWS User-specific -.idea/**/aws.xml - -# Generated files -.idea/**/contentModel.xml - -# Sensitive or high-churn files -.idea/**/dataSources/ -.idea/**/dataSources.ids -.idea/**/dataSources.local.xml -.idea/**/sqlDataSources.xml -.idea/**/dynamic.xml -.idea/**/uiDesigner.xml -.idea/**/dbnavigator.xml +# PyCharm / JetBrains IDEs +/.idea # pa_to_ap specific stuff /podcast_addict_extracted diff --git a/pa_to_ap.py b/pa_to_ap.py index 3b34d76..03da3f3 100644 --- a/pa_to_ap.py +++ b/pa_to_ap.py @@ -3,7 +3,8 @@ import sys import zipfile import sqlite3 -from dataclasses import dataclass +import functools +from dataclasses import dataclass, field from operator import itemgetter from pathlib import Path from sqlite3 import Cursor @@ -18,6 +19,7 @@ EPISODES_DIR_PATH = '/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict' MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True +AP_TAG_SEPARATOR = "\u001e" # Separator character for AP tag column blob @dataclass class Feed: @@ -27,8 +29,31 @@ class Feed: author: str keep_updated: int feed_url: str - folder_name: str = '' +@dataclass +class PAFeed(Feed): + tag: int # Tag for single row from JOIN + tags: list[int] = field(default_factory=list, init=False) # For merged PAFeed rows this will contain all tags + folder_name: str + + def tag_names(self, pa_tags: dict[int, str]): + return [pa_tags[x] for x in self.tags] + +@dataclass +class APFeed(Feed): + _tags: str + + @property + def tags_str(self): + return self._tags + + @property + def tags(self): + return self._tags.split(AP_TAG_SEPARATOR) if self._tags is not None else list() + + @tags.setter + def tags(self, value: list[str]): + self._tags = AP_TAG_SEPARATOR.join(value) def error(msg): print("ERROR:", msg) @@ -75,19 +100,36 @@ def get_antenna_pod_and_podcast_addict_backup_path(): def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): - # first find match for all feeds in pa - pa_feeds = [Feed(*a) for a in podcast_addict_cur.execute( - 'select _id, name, description, author, ' - 'automaticRefresh, feed_url, folderName from podcasts ' - 'where subscribed_status = 1 and is_virtual = 0 and initialized_status = 1')] + # first find match for all feeds in pa, left join on tags relation table (so there may be multiple rows for each podcast) + pa_feeds_one_to_many_tags = [PAFeed(*a) for a in podcast_addict_cur.execute( + 'SELECT podcasts._id, podcasts.name, description, author, ' + 'automaticRefresh, feed_url, tag_relation.tag_id, folderName FROM podcasts ' + 'LEFT JOIN tag_relation ON tag_relation.podcast_id = podcasts._id ' + 'WHERE subscribed_status = 1 AND is_virtual = 0 AND initialized_status = 1')] + + # Collate multiple JOIN rows for each podcast if they had multiple tags + def reduce_by_tag(feeds: dict[str, PAFeed], current_feed: PAFeed): + if current_feed.id not in feeds: + if current_feed.tag is not None: + current_feed.tags.append(current_feed.tag) + feeds[current_feed.id] = current_feed + elif current_feed.tag is not None: + existing_feed: PAFeed = feeds[current_feed.id] + existing_feed.tags.append(current_feed.tag) + return feeds + + pa_feeds_dict = functools.reduce(reduce_by_tag, pa_feeds_one_to_many_tags, dict()) + pa_feeds = pa_feeds_dict.values() + + pa_tags: dict[int, str] = dict(podcast_addict_cur.execute('SELECT _id, name FROM tags')) print("# Podcast addict feeds:") for feed in pa_feeds: print(feed.name) print("\n\n") - ap_feeds = {a[5]: Feed(*a) for a in antenna_pod_cur.execute( - 'select id, title, description, author, keep_updated, download_url from Feeds ' + ap_feeds = {a[5]: APFeed(*a) for a in antenna_pod_cur.execute( + 'select id, title, description, author, keep_updated, download_url, tags from Feeds ' )} pa_to_ap = [] @@ -113,13 +155,13 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): # # FIXME: make it work if premium and non-premium share same name # if ap.name == "Name of same podcast but premium version": # transfer_from_feed_to_feed(podcast_addict_cur, - # antenna_pod_cur, pa, ap) + # antenna_pod_cur, pa, ap, pa_tags) # break # break for pa, ap in pa_to_ap: - transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap) + transfer_from_feed_to_feed(podcast_addict_cur, antenna_pod_cur, pa, ap, pa_tags) print() # break @@ -130,8 +172,9 @@ def transfer(podcast_addict_cur: Cursor, antenna_pod_cur: Cursor): def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # antenna_pod_cur: Cursor, # - pa: Feed, # - ap: Feed): + pa: PAFeed, # + ap: APFeed, + pa_tags: dict[int, str]): print(f'# Feed: {ap.name}') antenna_pod_cur.execute("UPDATE Feeds " "SET keep_updated = ? " @@ -160,6 +203,13 @@ def transfer_from_feed_to_feed(podcast_addict_cur: Cursor, # pa_indices = ITEM_MATCHER.get_indices(ap_episodes, pa_episodes) seen_match_count = 0 + # Transfer tags, merge any existing tags with PA tags + ap.tags = list(set(ap.tags).union(pa.tag_names(pa_tags))) + antenna_pod_cur.execute("UPDATE Feeds " + "SET tags = ? " + "WHERE id = ?", # + (ap.tags_str, ap.id,)) + for ap_ep, pa_idx in zip(ap_episodes, pa_indices): if pa_idx < 0: From fa72ac3b2a250b100a7a7ac2977f74f47e97d17b Mon Sep 17 00:00:00 2001 From: Joschua Gandert <3063858+jgandert@users.noreply.github.com> Date: Mon, 25 Aug 2025 16:21:08 +0200 Subject: [PATCH 5/5] Closes #4: Mention which file to copy --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b7a391a..976d591 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ MATCH_ON_EPISODE_URL_IF_COULD_NOT_FIND_A_MATCH_OTHERWISE = True 4. Put them in the same folder as this script 5. Run the [`pa_to_ap.py`](pa_to_ap.py) script (AntennaPod db file will be **modified**!) in a terminal 6. Confirm that matches are correct (if they aren't you may need to increase `min_similarity`) -7. Copy the modified db file back to your phone +7. Copy the modified `.db` file (the one that you put in the folder) back to your phone 8. If you chose to enable `TRANSFER_DOWNLOADED_EPISODES` (this is on by default): 1. Create `/storage/emulated/0/Android/data/de.danoeh.antennapod/files/media/from_podcast_addict` 2. Manually move (or copy) the folders **inside**