diff --git a/cinemas/riversidestudios.co.uk/transform.js b/cinemas/riversidestudios.co.uk/transform.js index 9544745b..8713a507 100644 --- a/cinemas/riversidestudios.co.uk/transform.js +++ b/cinemas/riversidestudios.co.uk/transform.js @@ -9,9 +9,26 @@ const { } = require("../../common/utils"); const attributes = require("./attributes"); +function formatTable($, $el) { + return $el + .find("tr") + .map((_, row) => { + const cells = $(row).find("td"); + if (cells.length < 2) return null; + const key = getText(cells.eq(0)).replace(/:$/, ""); + const value = getText(cells.eq(1)); + return key && value ? `${key}: ${value}` : null; + }) + .get() + .filter(Boolean) + .join("\n"); +} + function getOverviewData(pageData) { const $ = cheerio.load(pageData); const data = {}; + + // First, try to get data from .event-details .event-detail-section $(".event-details .event-detail-section").each(function () { const key = basicNormalize(getText($(this).find("div").eq(0))); const value = getText($(this).find("div").eq(1)); @@ -30,13 +47,62 @@ function getOverviewData(pageData) { } } }); + + // Also try .te figure table (newer format) + $(".te figure table tr").each(function () { + const cells = $(this).find("td"); + if (cells.length < 2) return; + const key = basicNormalize(getText(cells.eq(0))); + const value = getText(cells.eq(1)); + switch (key) { + case "year of release:": { + data.year = value; + break; + } + case "director:": { + data.directors = value; + break; + } + case "cast:": { + data.actors = value; + break; + } + case "age rating:": { + data.classification = value; + break; + } + } + }); + return data; } +/** + * Formats the .te content for use as overview text in matchingHints. + * Ensures good spacing between paragraphs and formats tables nicely. + */ +function formatOverviewText(pageData) { + const $ = cheerio.load(pageData); + const $te = $(".te"); + if (!$te.length) return ""; + return $te + .children() + .map((_, el) => { + const $el = $(el); + const $table = $el.find("table"); + return $table.length ? formatTable($, $table) : getText($el); + }) + .get() + .filter(Boolean) + .join("\n\n"); +} + async function transform({ movieListPage, moviePages }, sourcedEvents) { const movies = movieListPage.map((movieData) => { const { title, url: urlRaw } = movieData; - const { year, directors, actors } = getOverviewData(moviePages[urlRaw]); + const { year, directors, actors, classification } = getOverviewData( + moviePages[urlRaw], + ); const url = encodeURI(urlRaw); const showingId = generateShowingId(attributes, movieData.id); @@ -45,7 +111,7 @@ async function transform({ movieListPage, moviePages }, sourcedEvents) { year, directors, actors, - classification: movieData.age_rating_class, + classification: movieData.age_rating_class || classification, }); const performances = Object.keys(movieData.performances).flatMap( @@ -102,7 +168,7 @@ async function transform({ movieListPage, moviePages }, sourcedEvents) { overview, performances: uniquePerformances, matchingHints: { - overview: getText(cheerio.load(moviePages[urlRaw])(".te")), + overview: formatOverviewText(moviePages[urlRaw]), }, }; }); diff --git a/common/known-removable-phrases.js b/common/known-removable-phrases.js index 4cd55d3f..0c6d222c 100644 --- a/common/known-removable-phrases.js +++ b/common/known-removable-phrases.js @@ -100,6 +100,7 @@ const knownRemovablePhrases = [ " with score by Pet Shop Boys", " with live score by Maxim Melton", " with live music from millie turner", + " with live original score", " with original score by Body Orchestra", " Music by Pet Shop Boys", " with Ranjit S. Ruprai", @@ -643,6 +644,7 @@ const knownRemovablePhrases = [ "SAFAR ", "Scared To Dance:", "Schools Screening IWD26:", + "Doc Screening & Discussion", "Screening & Social", "Screening & Q&A:", "Screening + Q&A:", @@ -715,6 +717,7 @@ const knownRemovablePhrases = [ "UFF 2025:", "UKJFF 2025:", "UKJFF:", + "Anti-Valentine's Day:", "Valentine's Day:", "Galentine's Day:", "Galentine’s Day:", @@ -724,6 +727,7 @@ const knownRemovablePhrases = [ "Visions of Ukraine:", "Visions of an Otherworld:", "Wallace & Gromit in ", + "Wallace & Gromit :", "William Shakespeare’s", "Windrush Caribbean Film Festival 2025:", "Women of Almodóvar:", @@ -830,6 +834,7 @@ const knownRemovablePhrases = [ "Preview Screenings", "preview screening", "Previews", + "[Preview]", "preview", "live action", "Sapphic", diff --git a/common/tests/test-titles.json b/common/tests/test-titles.json index 53be1e37..387bffc6 100644 --- a/common/tests/test-titles.json +++ b/common/tests/test-titles.json @@ -50774,5 +50774,181 @@ { "input": "White Men Can't Jump", "output": "white men cant jump" + }, + { + "input": "THE CONSPIRACISTS + LIVE Q&A", + "output": "conspiracists" + }, + { + "input": "Parent & Baby Screening: The Voice of Hind Rajab", + "output": "voice of hind rajab" + }, + { + "input": "EcoDocs at Castlehaven Presents: The Grab Doc Screening & Discussion", + "output": "grab" + }, + { + "input": "[Preview] A Private Life [Vie privée]", + "output": "a private life" + }, + { + "input": "Mardaani 3 (Hindi)", + "output": "mardaani 3" + }, + { + "input": "Euphoria (Telugu)", + "output": "euphoria" + }, + { + "input": "Gandhi Talks (Hindi)", + "output": "gandhi talks" + }, + { + "input": "Lockdown (Tamil)", + "output": "lockdown" + }, + { + "input": "Paatki (Gujarati)", + "output": "paatki" + }, + { + "input": "Prakambanam (Malayalam)", + "output": "prakambanam" + }, + { + "input": "Sri Chidambaram (Telugu)", + "output": "sri chidambaram" + }, + { + "input": "Valathu Vashathe Kallan (Malayalam)", + "output": "valathu vashathe kallan" + }, + { + "input": "Film Jam Sunday Screening and Social (Feb)", + "output": "film jam sunday screening social" + }, + { + "input": "The Testament of Ann Lee", + "output": "testament of ann lee" + }, + { + "input": "Toddler Club: Lilo & Stitch (2002)", + "output": "lilo stitch (2002)" + }, + { + "input": "Largo - cinema event", + "output": "largo" + }, + { + "input": "3rd Emerging Filmmakers Showcase", + "output": "3rd emerging filmmakers showcase" + }, + { + "input": "The Glass Essays", + "output": "glass essays" + }, + { + "input": "Mardaani 3", + "output": "mardaani 3" + }, + { + "input": "Prakambanam", + "output": "prakambanam" + }, + { + "input": "Sinners (IMAX)", + "output": "sinners" + }, + { + "input": "Valathu Vashathe Kallan", + "output": "valathu vashathe kallan" + }, + { + "input": "Secret Screening (02-02-26)", + "output": "mystery movie" + }, + { + "input": "Arco + Q&A with Natalie Portman and Ugo Bienvenu", + "output": "arco" + }, + { + "input": "Crime 101 + Bart Layton Director Q&A", + "output": "crime 101" + }, + { + "input": "Molly Vs the Machines + Recorded Q&A", + "output": "molly vs the machines" + }, + { + "input": "Drink & Dine: One Battle After Another", + "output": "one battle after another" + }, + { + "input": "Crystal Palace Friends of Palestine presents All That's Left of You + Q&A", + "output": "all thats left of you" + }, + { + "input": "Dead Man's Wire", + "output": "dead mans wire" + }, + { + "input": "Wallace & Gromit : A Matter of Loaf and Death", + "output": "a matter of loaf death" + }, + { + "input": "Category H: OBJECT LOVERS! Christine & Killer Sofa", + "output": "object lovers christine killer sofa" + }, + { + "input": "Molly Vs THE MACHINES + Discussion", + "output": "molly vs the machines" + }, + { + "input": "Exploding Cinema", + "output": "exploding cinema" + }, + { + "input": "Paratha Club", + "output": "paratha club" + }, + { + "input": "Peckham Filmmakers - Winter Showcase", + "output": "peckham filmmakers" + }, + { + "input": "Anti-Valentine's Day: Switchblade Romance", + "output": "switchblade romance" + }, + { + "input": "Valentine's Day: 100 Nights of Hero", + "output": "100 nights of hero" + }, + { + "input": "Valentine's Day: Pillion", + "output": "pillion" + }, + { + "input": "Valentine's Day: The History of Sound", + "output": "history of sound" + }, + { + "input": "'L'Inferno' (1911) with live original score", + "output": "linferno (1911)" + }, + { + "input": "Guest Event - Anything that Bleeds: Book Launch", + "output": "anything that bleeds" + }, + { + "input": "A Room With a View + Q&A with Keith Lodwick", + "output": "a room with a view" + }, + { + "input": "The London International Animation Festival presents Wonderful Animated Shorts for 3 -12 year-olds", + "output": "wonderful animated shorts for 3" + }, + { + "input": "LSFF x Nunhead Community Cinema: London Lives", + "output": "lsff x nunhead community cinema london lives" } ]