Skip to content

Commit b0c5beb

Browse files
committed
Add option to prepend non-HTML links when downloading recursively
because it's more likely to download temporary links before they expire because it's more similar to the browsing experience
1 parent 103cbf1 commit b0c5beb

File tree

5 files changed

+60
-10
lines changed

5 files changed

+60
-10
lines changed

doc/wget.texi

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1916,6 +1916,10 @@ case.
19161916
Turn on recursive retrieving. @xref{Recursive Download}, for more
19171917
details. The default maximum depth is 5.
19181918

1919+
@item --queue-type=@var{queuetype}
1920+
Specify the queue type (@pxref{Recursive Download}). Accepted values are @samp{fifo} (the default)
1921+
and @samp{browser}.
1922+
19191923
@item -l @var{depth}
19201924
@itemx --level=@var{depth}
19211925
Specify recursion maximum depth level @var{depth} (@pxref{Recursive
@@ -2296,6 +2300,14 @@ documents linked by them, and so on. In other words, Wget first
22962300
downloads the documents at depth 1, then those at depth 2, and so on
22972301
until the specified maximum depth.
22982302

2303+
The @dfn{queue type} is FIFO (default) or browser. FIFO download (dequeue)
2304+
the first enqueued files first. Browser download the last enqueued files
2305+
first. Browser can prevent that links expire before they're downloaded
2306+
because it downloads them directly after their parent page and
2307+
therefore directly after the parent page and its temporary links are
2308+
generated if it's a dynamic page. Pages sometimes use temporary links
2309+
to prevent direct links to files.
2310+
22992311
The maximum @dfn{depth} to which the retrieval may descend is specified
23002312
with the @samp{-l} option. The default maximum depth is five layers.
23012313

src/init.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ CMD_DECLARE (cmd_spec_htmlify);
104104
CMD_DECLARE (cmd_spec_mirror);
105105
CMD_DECLARE (cmd_spec_prefer_family);
106106
CMD_DECLARE (cmd_spec_progress);
107+
CMD_DECLARE (cmd_spec_queue_type);
107108
CMD_DECLARE (cmd_spec_recursive);
108109
CMD_DECLARE (cmd_spec_regex_type);
109110
CMD_DECLARE (cmd_spec_restrict_file_names);
@@ -247,6 +248,7 @@ static const struct {
247248
{ "proxypasswd", &opt.proxy_passwd, cmd_string }, /* deprecated */
248249
{ "proxypassword", &opt.proxy_passwd, cmd_string },
249250
{ "proxyuser", &opt.proxy_user, cmd_string },
251+
{ "queuetype", &opt.queue_type, cmd_spec_queue_type },
250252
{ "quiet", &opt.quiet, cmd_boolean },
251253
{ "quota", &opt.quota, cmd_bytes_sum },
252254
#ifdef HAVE_SSL
@@ -403,6 +405,8 @@ defaults (void)
403405
opt.restrict_files_nonascii = false;
404406
opt.restrict_files_case = restrict_no_case_restriction;
405407

408+
opt.queue_type = queue_type_fifo;
409+
406410
opt.regex_type = regex_type_posix;
407411

408412
opt.max_redirect = 20;
@@ -1441,6 +1445,23 @@ cmd_spec_recursive (const char *com, const char *val, void *place_ignored _GL_UN
14411445
return true;
14421446
}
14431447

1448+
/* Validate --queue-type and set the choice. */
1449+
1450+
static bool
1451+
cmd_spec_queue_type (const char *com, const char *val, void *place_ignored _GL_UNUSED)
1452+
{
1453+
static const struct decode_item choices[] = {
1454+
{ "fifo", queue_type_fifo },
1455+
{ "browser", queue_type_browser },
1456+
};
1457+
int queue_type = queue_type_fifo;
1458+
int ok = decode_string (val, choices, countof (choices), &queue_type);
1459+
if (!ok)
1460+
fprintf (stderr, _("%s: %s: Invalid value %s.\n"), exec_name, com, quote (val));
1461+
opt.queue_type = queue_type;
1462+
return ok;
1463+
}
1464+
14441465
/* Validate --regex-type and set the choice. */
14451466

14461467
static bool

src/main.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ static struct cmdline_option option_data[] =
272272
{ "proxy-passwd", 0, OPT_VALUE, "proxypassword", -1 }, /* deprecated */
273273
{ "proxy-password", 0, OPT_VALUE, "proxypassword", -1 },
274274
{ "proxy-user", 0, OPT_VALUE, "proxyuser", -1 },
275+
{ "queue-type", 0, OPT_VALUE, "queuetype", -1 },
275276
{ "quiet", 'q', OPT_BOOLEAN, "quiet", -1 },
276277
{ "quota", 'Q', OPT_VALUE, "quota", -1 },
277278
{ "random-file", 0, OPT_VALUE, "randomfile", -1 },
@@ -736,6 +737,8 @@ WARC options:\n"),
736737
Recursive download:\n"),
737738
N_("\
738739
-r, --recursive specify recursive download\n"),
740+
N_("\
741+
--queue-type=TYPE queue type (fifo|browser).\n"),
739742
N_("\
740743
-l, --level=NUMBER maximum recursion depth (inf or 0 for infinite)\n"),
741744
N_("\

src/options.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ struct options
4646
bool relative_only; /* Follow only relative links. */
4747
bool no_parent; /* Restrict access to the parent
4848
directory. */
49+
enum {
50+
queue_type_fifo,
51+
queue_type_browser
52+
} queue_type; /* Recursion queue type */
4953
int reclevel; /* Maximum level of recursion */
5054
bool dirstruct; /* Do we build the directory structure
5155
as we go along? */

src/recur.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,8 @@ url_queue_delete (struct url_queue *queue)
8888
xfree (queue);
8989
}
9090

91-
/* Enqueue a URL in the queue. The queue is FIFO: the items will be
92-
retrieved ("dequeued") from the queue in the order they were placed
93-
into it. */
91+
/* Enqueue a URL in the queue. Non-HTML links are prepended because
92+
that download them directly after their parent page. */
9493

9594
static void
9695
url_enqueue (struct url_queue *queue, struct iri *i,
@@ -110,23 +109,34 @@ url_enqueue (struct url_queue *queue, struct iri *i,
110109
if (queue->count > queue->maxcount)
111110
queue->maxcount = queue->count;
112111

113-
DEBUGP (("Enqueuing %s at depth %d\n",
112+
DEBUGP (("%s %s at depth %d\n", html_allowed ? "Appending" : "Prepending",
114113
quotearg_n_style (0, escape_quoting_style, url), depth));
115114
DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
116115

117116
if (i)
118-
DEBUGP (("[IRI Enqueuing %s with %s\n", quote_n (0, url),
119-
i->uri_encoding ? quote_n (1, i->uri_encoding) : "None"));
117+
DEBUGP (("[IRI %s %s with %s\n", html_allowed ? "Appending" : "Prepending",
118+
quote_n (0, url), i->uri_encoding ? quote_n (1, i->uri_encoding) : "None"));
120119

121-
if (queue->tail)
122-
queue->tail->next = qel;
123-
queue->tail = qel;
120+
if (html_allowed)
121+
{
122+
if (queue->tail)
123+
queue->tail->next = qel;
124+
queue->tail = qel;
125+
}
126+
else
127+
{
128+
if (queue->head)
129+
qel->next = queue->head;
130+
queue->head = qel;
131+
}
124132

125133
if (!queue->head)
126134
queue->head = queue->tail;
135+
if (!queue->tail)
136+
queue->tail = queue->head;
127137
}
128138

129-
/* Take a URL out of the queue. Return true if this operation
139+
/* Take the head URL out of the queue. Return true if this operation
130140
succeeded, or false if the queue is empty. */
131141

132142
static bool

0 commit comments

Comments
 (0)