From a3d4ea680d5211389f302996e1d79df7c1488f08 Mon Sep 17 00:00:00 2001 From: IBBoard Date: Sat, 20 Aug 2016 17:03:51 +0100 Subject: [PATCH] Only remove Twitter image URLs, not 3rd party media URLs This is important because Flickr and Instagram images (amongst others) may have comments and other details on the page and the URL may be integral to the content of the tweet, whereas Twitter images are reliably at the end of the tweet. We also now won't load 3rd party images when Twitter images exist because this used to lead to double-images, and we now keep the URLs --- src/MediaDownloader.c | 15 +++++++++++++++ src/MediaDownloader.h | 1 + src/TextTransform.c | 2 +- src/Types.c | 28 ++++++++++++++++++---------- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/MediaDownloader.c b/src/MediaDownloader.c index 7fb7ef5f5..49dc29619 100644 --- a/src/MediaDownloader.c +++ b/src/MediaDownloader.c @@ -432,6 +432,21 @@ is_media_candidate (const char *url) } +gboolean +is_twitter_media_candidate (const char *url) +{ + url = canonicalize_url (url); + + return +#ifdef VIDEO + g_str_has_prefix (url, "/photo/1/") || + g_str_has_prefix (url, "video.twimg.com/ext_tw_video") || +#endif + g_str_has_prefix (url, "pbs.twimg.com/media/") + ; + +} + static void cb_media_downloader_init (CbMediaDownloader *downloader) { diff --git a/src/MediaDownloader.h b/src/MediaDownloader.h index 030a7e750..69e4d4ea2 100644 --- a/src/MediaDownloader.h +++ b/src/MediaDownloader.h @@ -59,6 +59,7 @@ void cb_media_downloader_disable (CbMediaDownloader *downloader); gboolean is_media_candidate (const char *url); +gboolean is_twitter_media_candidate (const char *url); G_END_DECLS diff --git a/src/TextTransform.c b/src/TextTransform.c index 0a4125f10..0420e47d3 100644 --- a/src/TextTransform.c +++ b/src/TextTransform.c @@ -54,7 +54,7 @@ is_media_url (const char *url, const char *display_text, gsize media_count) { - return (is_media_candidate (url != NULL ? url : display_text) && media_count == 1) || + return (is_twitter_media_candidate (url != NULL ? url : display_text) && media_count == 1) || g_str_has_prefix (display_text, "pic.twitter.com/"); } diff --git a/src/Types.c b/src/Types.c index 979725b25..d1663c406 100644 --- a/src/Types.c +++ b/src/Types.c @@ -364,16 +364,6 @@ cb_mini_tweet_parse_entities (CbMiniTweet *t, JsonObject *url = json_node_get_object (json_array_get_element (urls, i)); const char *expanded_url = json_object_get_string_member (url, "expanded_url"); JsonArray *indices; - - if (is_media_candidate (expanded_url)) - { - t->medias[t->n_medias] = cb_media_new (); - t->medias[t->n_medias]->url = g_strdup (expanded_url); - t->medias[t->n_medias]->type = cb_media_type_from_url (expanded_url); - t->medias[t->n_medias]->target_url = g_strdup (expanded_url); - t->n_medias ++; - } - indices = json_object_get_array_member (url, "indices"); t->entities[url_index].from = json_array_get_int_element (indices, 0); t->entities[url_index].to = json_array_get_int_element (indices, 1); @@ -580,6 +570,24 @@ cb_mini_tweet_parse_entities (CbMiniTweet *t, } } + if (t->n_medias == 0) + { + for (i = 0, p = json_array_get_length (urls); i < p; i ++) + { + JsonObject *url = json_node_get_object (json_array_get_element (urls, i)); + const char *expanded_url = json_object_get_string_member (url, "expanded_url"); + + if (is_media_candidate (expanded_url)) + { + t->medias[t->n_medias] = cb_media_new (); + t->medias[t->n_medias]->url = g_strdup (expanded_url); + t->medias[t->n_medias]->type = cb_media_type_from_url (expanded_url); + t->medias[t->n_medias]->target_url = g_strdup (expanded_url); + t->n_medias ++; + } + } + } + t->n_entities = url_index; #if 0 g_debug ("Wasted entities: %d", max_entities - t->n_entities);