Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 69 additions & 3 deletions cinemas/riversidestudios.co.uk/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,26 @@ const {
} = require("../../common/utils");
const attributes = require("./attributes");

function formatTable($, $el) {
return $el
.find("tr")
.map((_, row) => {
const cells = $(row).find("td");
if (cells.length < 2) return null;
const key = getText(cells.eq(0)).replace(/:$/, "");
const value = getText(cells.eq(1));
return key && value ? `${key}: ${value}` : null;
})
.get()
.filter(Boolean)
.join("\n");
}

function getOverviewData(pageData) {
const $ = cheerio.load(pageData);
const data = {};

// First, try to get data from .event-details .event-detail-section
$(".event-details .event-detail-section").each(function () {
const key = basicNormalize(getText($(this).find("div").eq(0)));
const value = getText($(this).find("div").eq(1));
Expand All @@ -30,13 +47,62 @@ function getOverviewData(pageData) {
}
}
});

// Also try .te figure table (newer format)
$(".te figure table tr").each(function () {
const cells = $(this).find("td");
if (cells.length < 2) return;
const key = basicNormalize(getText(cells.eq(0)));
const value = getText(cells.eq(1));
switch (key) {
case "year of release:": {
data.year = value;
break;
}
case "director:": {
data.directors = value;
break;
}
case "cast:": {
data.actors = value;
break;
}
case "age rating:": {
data.classification = value;
break;
}
}
});

return data;
}

/**
* Formats the .te content for use as overview text in matchingHints.
* Ensures good spacing between paragraphs and formats tables nicely.
*/
function formatOverviewText(pageData) {
const $ = cheerio.load(pageData);
const $te = $(".te");
if (!$te.length) return "";
return $te
.children()
.map((_, el) => {
const $el = $(el);
const $table = $el.find("table");
return $table.length ? formatTable($, $table) : getText($el);
})
.get()
.filter(Boolean)
.join("\n\n");
}

async function transform({ movieListPage, moviePages }, sourcedEvents) {
const movies = movieListPage.map((movieData) => {
const { title, url: urlRaw } = movieData;
const { year, directors, actors } = getOverviewData(moviePages[urlRaw]);
const { year, directors, actors, classification } = getOverviewData(
moviePages[urlRaw],
);
const url = encodeURI(urlRaw);
const showingId = generateShowingId(attributes, movieData.id);

Expand All @@ -45,7 +111,7 @@ async function transform({ movieListPage, moviePages }, sourcedEvents) {
year,
directors,
actors,
classification: movieData.age_rating_class,
classification: movieData.age_rating_class || classification,
});

const performances = Object.keys(movieData.performances).flatMap(
Expand Down Expand Up @@ -102,7 +168,7 @@ async function transform({ movieListPage, moviePages }, sourcedEvents) {
overview,
performances: uniquePerformances,
matchingHints: {
overview: getText(cheerio.load(moviePages[urlRaw])(".te")),
overview: formatOverviewText(moviePages[urlRaw]),
},
};
});
Expand Down
5 changes: 5 additions & 0 deletions common/known-removable-phrases.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ const knownRemovablePhrases = [
" with score by Pet Shop Boys",
" with live score by Maxim Melton",
" with live music from millie turner",
" with live original score",
" with original score by Body Orchestra",
" Music by Pet Shop Boys",
" with Ranjit S. Ruprai",
Expand Down Expand Up @@ -643,6 +644,7 @@ const knownRemovablePhrases = [
"SAFAR ",
"Scared To Dance:",
"Schools Screening IWD26:",
"Doc Screening & Discussion",
"Screening & Social",
"Screening & Q&A:",
"Screening + Q&A:",
Expand Down Expand Up @@ -715,6 +717,7 @@ const knownRemovablePhrases = [
"UFF 2025:",
"UKJFF 2025:",
"UKJFF:",
"Anti-Valentine's Day:",
"Valentine's Day:",
"Galentine's Day:",
"Galentine’s Day:",
Expand All @@ -724,6 +727,7 @@ const knownRemovablePhrases = [
"Visions of Ukraine:",
"Visions of an Otherworld:",
"Wallace & Gromit in ",
"Wallace & Gromit :",
"William Shakespeare’s",
"Windrush Caribbean Film Festival 2025:",
"Women of Almodóvar:",
Expand Down Expand Up @@ -830,6 +834,7 @@ const knownRemovablePhrases = [
"Preview Screenings",
"preview screening",
"Previews",
"[Preview]",
"preview",
"live action",
"Sapphic",
Expand Down
176 changes: 176 additions & 0 deletions common/tests/test-titles.json
Original file line number Diff line number Diff line change
Expand Up @@ -50774,5 +50774,181 @@
{
"input": "White Men Can't Jump",
"output": "white men cant jump"
},
{
"input": "THE CONSPIRACISTS + LIVE Q&A",
"output": "conspiracists"
},
{
"input": "Parent & Baby Screening: The Voice of Hind Rajab",
"output": "voice of hind rajab"
},
{
"input": "EcoDocs at Castlehaven Presents: The Grab Doc Screening & Discussion",
"output": "grab"
},
{
"input": "[Preview] A Private Life [Vie privée]",
"output": "a private life"
},
{
"input": "Mardaani 3 (Hindi)",
"output": "mardaani 3"
},
{
"input": "Euphoria (Telugu)",
"output": "euphoria"
},
{
"input": "Gandhi Talks (Hindi)",
"output": "gandhi talks"
},
{
"input": "Lockdown (Tamil)",
"output": "lockdown"
},
{
"input": "Paatki (Gujarati)",
"output": "paatki"
},
{
"input": "Prakambanam (Malayalam)",
"output": "prakambanam"
},
{
"input": "Sri Chidambaram (Telugu)",
"output": "sri chidambaram"
},
{
"input": "Valathu Vashathe Kallan (Malayalam)",
"output": "valathu vashathe kallan"
},
{
"input": "Film Jam Sunday Screening and Social (Feb)",
"output": "film jam sunday screening social"
},
{
"input": "The Testament of Ann Lee",
"output": "testament of ann lee"
},
{
"input": "Toddler Club: Lilo & Stitch (2002)",
"output": "lilo stitch (2002)"
},
{
"input": "Largo - cinema event",
"output": "largo"
},
{
"input": "3rd Emerging Filmmakers Showcase",
"output": "3rd emerging filmmakers showcase"
},
{
"input": "The Glass Essays",
"output": "glass essays"
},
{
"input": "Mardaani 3",
"output": "mardaani 3"
},
{
"input": "Prakambanam",
"output": "prakambanam"
},
{
"input": "Sinners (IMAX)",
"output": "sinners"
},
{
"input": "Valathu Vashathe Kallan",
"output": "valathu vashathe kallan"
},
{
"input": "Secret Screening (02-02-26)",
"output": "mystery movie"
},
{
"input": "Arco + Q&A with Natalie Portman and Ugo Bienvenu",
"output": "arco"
},
{
"input": "Crime 101 + Bart Layton Director Q&A",
"output": "crime 101"
},
{
"input": "Molly Vs the Machines + Recorded Q&A",
"output": "molly vs the machines"
},
{
"input": "Drink & Dine: One Battle After Another",
"output": "one battle after another"
},
{
"input": "Crystal Palace Friends of Palestine presents All That's Left of You + Q&A",
"output": "all thats left of you"
},
{
"input": "Dead Man's Wire",
"output": "dead mans wire"
},
{
"input": "Wallace & Gromit : A Matter of Loaf and Death",
"output": "a matter of loaf death"
},
{
"input": "Category H: OBJECT LOVERS! Christine & Killer Sofa",
"output": "object lovers christine killer sofa"
},
{
"input": "Molly Vs THE MACHINES + Discussion",
"output": "molly vs the machines"
},
{
"input": "Exploding Cinema",
"output": "exploding cinema"
},
{
"input": "Paratha Club",
"output": "paratha club"
},
{
"input": "Peckham Filmmakers - Winter Showcase",
"output": "peckham filmmakers"
},
{
"input": "Anti-Valentine's Day: Switchblade Romance",
"output": "switchblade romance"
},
{
"input": "Valentine's Day: 100 Nights of Hero",
"output": "100 nights of hero"
},
{
"input": "Valentine's Day: Pillion",
"output": "pillion"
},
{
"input": "Valentine's Day: The History of Sound",
"output": "history of sound"
},
{
"input": "'L'Inferno' (1911) with live original score",
"output": "linferno (1911)"
},
{
"input": "Guest Event - Anything that Bleeds: Book Launch",
"output": "anything that bleeds"
},
{
"input": "A Room With a View + Q&A with Keith Lodwick",
"output": "a room with a view"
},
{
"input": "The London International Animation Festival presents Wonderful Animated Shorts for 3 -12 year-olds",
"output": "wonderful animated shorts for 3"
},
{
"input": "LSFF x Nunhead Community Cinema: London Lives",
"output": "lsff x nunhead community cinema london lives"
}
]