Skip to content
This repository was archived by the owner on Jan 4, 2026. It is now read-only.

Commit 45ed32e

Browse files
committed
Avoid getting denied/throttled when scraping Regal movies
1 parent be78865 commit 45ed32e

File tree

1 file changed

+59
-10
lines changed

1 file changed

+59
-10
lines changed

src/Theorem/Middleware/RegalMiddleware.cs

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ namespace Theorem.Middleware
2222
/// </summary>
2323
public class RegalMiddleware : IMiddleware
2424
{
25+
const int NUM_RETRIES = 7;
26+
2527
private readonly ILogger<RegalMiddleware> _logger;
2628

2729
private ConfigurationSection _configuration;
@@ -38,8 +40,6 @@ public class RegalMiddleware : IMiddleware
3840

3941
private Timer _postTimer;
4042

41-
private HttpClient _httpClient = new();
42-
4343
public RegalMiddleware(
4444
ILogger<RegalMiddleware> logger,
4545
ConfigurationSection configuration,
@@ -251,6 +251,12 @@ private async Task<FilmScheduleModel> GetMoviesForLocationsAsync(
251251
{
252252
var returnValue = new FilmScheduleModel();
253253

254+
var handler = new HttpClientHandler()
255+
{
256+
CookieContainer = new()
257+
};
258+
HttpClient httpClient = new(handler);
259+
254260
// First, we need a "build id", since it's needed in the Regal API URI.
255261
// We can extract it from any Regal front-end page.
256262
string buildId;
@@ -260,11 +266,12 @@ private async Task<FilmScheduleModel> GetMoviesForLocationsAsync(
260266
RequestUri = new Uri("https://www.regmovies.com/"),
261267
Method = HttpMethod.Get,
262268
};
263-
var result = await _httpClient.SendAsync(request);
269+
PretendToBeEdge(request.Headers);
270+
var result = await httpClient.SendAsync(request);
264271
if (!result.IsSuccessStatusCode)
265272
{
266273
throw new ApplicationException("Could not retrieve Regal homepage to read " +
267-
"build id.");
274+
"build id");
268275
}
269276
var resultContent = await result.Content.ReadAsStringAsync();
270277
var buildIdMatch = Regex.Match(resultContent,
@@ -283,7 +290,8 @@ private async Task<FilmScheduleModel> GetMoviesForLocationsAsync(
283290
RequestUri = new Uri(
284291
$"https://www.regmovies.com/_next/data/{buildId}/en/theatres.json")
285292
};
286-
var result = await _httpClient.SendAsync(request);
293+
PretendToBeEdge(request.Headers);
294+
var result = await httpClient.SendAsync(request);
287295
if (!result.IsSuccessStatusCode)
288296
{
289297
throw new ApplicationException("Could not retrieve Regal theater list.");
@@ -309,17 +317,45 @@ private async Task<FilmScheduleModel> GetMoviesForLocationsAsync(
309317
for (var currentDate = startDate; currentDate <= endDate;
310318
currentDate = currentDate.AddDays(1))
311319
{
312-
var request = new HttpRequestMessage()
320+
_logger.LogDebug("Fetching movies for location {} on {}...", locationCode,
321+
currentDate);
322+
HttpResponseMessage result;
323+
for (int i = 0; true; ++i)
324+
{
325+
var request = new HttpRequestMessage()
313326
{
314327
RequestUri = new Uri("https://www.regmovies.com/api/getShowtimes" +
315328
$"?theatres={locationCode}" +
316329
$"&date={currentDate:MM-dd-yyyy}" +
317330
"&hoCode=&ignoreCache=false&moviesOnly=false")
318331
};
319-
var result = await _httpClient.SendAsync(request);
320-
if (!result.IsSuccessStatusCode)
321-
{
322-
throw new ApplicationException("Could not retrieve Regal showtime list.");
332+
PretendToBeEdge(request.Headers);
333+
result = await httpClient.SendAsync(request);
334+
if (result.IsSuccessStatusCode)
335+
{
336+
break;
337+
}
338+
else
339+
{
340+
var resultStr = await result.Content.ReadAsStringAsync();
341+
if (i < NUM_RETRIES)
342+
{
343+
var delayMs = (int)(Math.Pow(2, i) * 1000);
344+
_logger.LogWarning("Couldn't retrieve Regal showtime list for " +
345+
"location {}. Retrying in {}ms...", locationCode, delayMs);
346+
handler = new HttpClientHandler()
347+
{
348+
CookieContainer = new()
349+
};
350+
httpClient = new(handler);
351+
await Task.Delay(delayMs);
352+
}
353+
else
354+
{
355+
throw new ApplicationException(
356+
"Could not retrieve Regal showtime list.");
357+
}
358+
}
323359
}
324360
var parsedContent = await JsonSerializer.DeserializeAsync<
325361
RegalShowtimesResponse>(await result.Content.ReadAsStreamAsync());
@@ -370,6 +406,19 @@ private async Task<FilmScheduleModel> GetMoviesForLocationsAsync(
370406
return returnValue;
371407
}
372408

409+
private static void PretendToBeEdge(HttpRequestHeaders headers)
410+
{
411+
headers.Accept.ParseAdd("text/html,application/xhtml+xml,application/xml;q=0.9," +
412+
"image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7");
413+
headers.AcceptLanguage.ParseAdd("en-US,en;q=0.9");
414+
headers.CacheControl = new CacheControlHeaderValue() {
415+
NoCache = true, NoStore = true, MaxAge = TimeSpan.FromMinutes(0) };
416+
headers.Pragma.ParseAdd("no-cache");
417+
headers.Add("Priority", "u=0, i");
418+
headers.UserAgent.ParseAdd("Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
419+
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0");
420+
}
421+
373422
/// <summary>
374423
/// Given a day of week and time of day, returns the DateTimeOffset
375424
/// of the next time this occurs.

0 commit comments

Comments
 (0)