Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
.DS_Store
node_modules
node_modules
.idea
62 changes: 49 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
# node-wget
# wget-improved

A download tool, now supporting http/https resource and http/https proxy, written in nodejs.
wget-improved simplifies retrieving files from any URL

Improvements over [wuchengwei/node-wget](https://github.com/wuchengwei/node-wget)
- Handles 302 redirects (including infinite redirect loops)
- Passes URL parameters
- Better error reporting
- Does not write using append (uses w+ identical to wget)
- Handles gzip compression, allow you to automatically gunzip the stream

## Install

# Installing
```
npm install wget
npm install wget-improved --save
```

# Usage

<a name="download" />
## download(src, output, options)

```js
var wget = require('wget');
var src = 'https://raw.github.com/Fyrd/caniuse/master/data.json';
var output = '/tmp/data.json';
var wget = require('wget-improved');
var src = 'http://nodejs.org/images/logo.svg';
var output = '/tmp/logo.svg';
var options = {
proxy: 'http://host:port'
// see options below
};
var download = wget.download(src, output, options);
download.on('error', function(err) {
console.log(err);
});
download.on('start', function(fileSize) {
console.log(fileSize);
});
download.on('end', function(output) {
console.log(output);
});
Expand All @@ -31,7 +39,6 @@ download.on('progress', function(progress) {
});
```

<a name="request" />
## request(options, callback)

```js
Expand Down Expand Up @@ -64,4 +71,33 @@ req.end();
req.on('error', function(err) {
console.log(err);
});
```
```

## options

```js

options = {}
// Set to true to have any gzip stream automatically decompressed before saving
options.gunzip = false;
options.proxy = {};
options.proxy.protocol = 'http';
options.proxy.host = 'someproxy.org';
options.proxy.port = 1337;
options.proxy.proxyAuth = '{basic auth}';
options.proxy.headers = {'User-Agent': 'Node'};
```

## CLI

```bash
# If installed globally
nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json

# If not installed globally
./node_modules/.bin/nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json
```

## Todo

- Enable gzip when using request method
42 changes: 42 additions & 0 deletions bin/nwget
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env node

/**

Usage:
nwget [URL] [OPTIONS]

Example:
nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json

**/

var wget = require('../lib/wget');
var path = require('path');

var args = require('minimist')(process.argv);
var url = args._[2]
var output = args.O

if (args.help) return console.log(`Usage: wget [URL] [OPTIONS]

Download:
-O, --output-document=FILE write documents to FILE
`)

if (!url) return console.error('The first argument must be a URL to a downloadable resource (e.g. nwget https://raw.github.com/Fyrd/caniuse/master/data.json)')

if (!output) return console.error('The second argument must be a file path for the downloaded resource (e.g. nwget https://raw.github.com/Fyrd/caniuse/master/data.json -O /tmp/data.json)')

// console.log(url, output);

var download = wget.download(url, path.resolve(output));

download.on('error', function(err) {
console.error(err);
});
download.on('end', function(output) {
console.log(output);
});
download.on('progress', function(progress) {
console.log(progress);
});
Empty file removed bin/nwget.js
Empty file.
120 changes: 92 additions & 28 deletions lib/wget.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,34 @@ var http = require('http');
var https = require('https');
var tunnel = require('tunnel');
var url = require('url');
var util = require('util');
var zlib = require('zlib');
var fs = require('fs');
var EventEmitter = require('events').EventEmitter;

function download(src, output, options) {
var downloader = new EventEmitter(),
/**
* Downloads a file using http get and request
* @param {string} src - The http URL to download from
* @param {string} output - The filepath to save to
* @param {object} options - Options object
* @param {object} _parentEvent - Used for when their is a 302 redirect and need to maintain state to a new request
* @param {number} redirects - The number of redirects, used to prevent infinite loops
* @returns {*|EventEmitter}
*/
function download(src, output, options, _parentEvent, redirects) {
if(typeof redirects === "undefined") {
redirects = 0;
}
var downloader = _parentEvent || new EventEmitter(),
srcUrl,
tunnelAgent,
req;

if (options) {
if (options) {
options = parseOptions('download', options);
} else {
options = {
gunzip: false
};
}
srcUrl = url.parse(src);
srcUrl.protocol = cleanProtocol(srcUrl.protocol);
Expand All @@ -24,80 +40,126 @@ function download(src, output, options) {
protocol: srcUrl.protocol,
host: srcUrl.hostname,
port: srcUrl.port,
path: srcUrl.pathname,
path: srcUrl.pathname + (srcUrl.search || ""),
proxy: options?options.proxy:undefined,
method: 'GET'
}, function(res) {
var fileSize, writeStream, downloadedSize;
var gunzip = zlib.createGunzip();

// Handle 302 redirects
if(res.statusCode === 301 || res.statusCode === 302 || res.statusCode === 307) {
redirects++;
if(redirects >= 10) {
downloader.emit('error', 'Infinite redirect loop detected');
}
download(res.headers.location, output, options, downloader, redirects);
}

if (res.statusCode === 200) {
downloadedSize = 0;
fileSize = res.headers['content-length'];
writeStream = fs.createWriteStream(output, {
flags: 'a',
flags: 'w+',
encoding: 'binary'
});

res.on('error', function(err) {
writeStream.end();
downloader.emit('error', err);
});

var encoding = "";
if(typeof res.headers['content-encoding'] === "string") {
encoding = res.headers['content-encoding'];
}

// If the user has specified to unzip, and the file is gzip encoded, pipe to gunzip
if(options.gunzip === true && encoding === "gzip") {
res.pipe(gunzip);
} else {
res.pipe(writeStream);
}

//emit a start event so the user knows the file-size he's gonna receive
downloader.emit('start', fileSize);

// Data handlers
res.on('data', function(chunk) {
downloadedSize += chunk.length;
downloader.emit('progress', downloadedSize/fileSize);
});
gunzip.on('data', function(chunk) {
writeStream.write(chunk);
});
res.on('end', function() {

writeStream.on('finish', function() {
writeStream.end();
downloader.emit('end', "Finished writing to disk");
req.end('finished');
});
writeStream.on('close', function(){
downloader.emit('end', output);
});
} else {
downloader.emit('error', 'Server respond ' + res.statusCode);
} else if(res.statusCode !== 200 && res.statusCode !== 301 && res.statusCode !== 302) {
downloader.emit('error', 'Server responded with unhandled status: ' + res.statusCode);
}
});

req.end();
req.end('done');
req.on('error', function(err) {
downloader.emit('error', err);
});
// Attach request to our EventEmitter for backwards compatibility, enables actions such as
// req.abort();
downloader.req = req;

return downloader;
}

function request(options, callback) {
var newOptions = {}, newProxy = {}, key;
options = parseOptions('request', options);
if (options.protocol === 'http') {
if (options.proxy) {
for (key in options.proxy) {
if (key !== 'protocol') {
newProxy[key] = options.proxy[key];
}
}
if (options.proxy.protocol === 'http') {
delete options.proxy.protocol; // delete self-defined arg
options.agent = tunnel.httpOverHttp({proxy: options.proxy});
options.agent = tunnel.httpOverHttp({proxy: newProxy});
} else if (options.proxy.protocol === 'https') {
delete options.proxy.protocol; // delete self-defined arg
options.agent = tunnel.httpOverHttps({proxy: options.proxy});
options.agent = tunnel.httpOverHttps({proxy: newProxy});
} else {
throw options.proxy.protocol + ' proxy is not supported!';
}
}
delete options.protocol; // delete self-defined arg
delete options.proxy; // delete self-defined arg
return http.request(options, callback);
for (key in options) {
if (key !== 'protocol' && key !== 'proxy') {
newOptions[key] = options[key];
}
}
return http.request(newOptions, callback);
}
if (options.protocol === 'https') {
if (options.proxy) {
for (key in options.proxy) {
if (key !== 'protocol') {
newProxy[key] = options.proxy[key];
}
}
if (options.proxy.protocol === 'http') {
delete options.proxy.protocol; // delete self-defined arg
options.agent = tunnel.httpsOverHttp({proxy: options.proxy});
options.agent = tunnel.httpsOverHttp({proxy: newProxy});
} else if (options.proxy.protocol === 'https') {
delete options.proxy.protocol; // delete self-defined arg
options.agent = tunnel.httpsOverHttps({proxy: options.proxy});
options.agent = tunnel.httpsOverHttps({proxy: newProxy});
} else {
throw options.proxy.protocol + ' proxy is not supported!';
}
}
delete options.protocol; // delete self-defined arg
delete options.proxy; // delete self-defined arg
return https.request(options, callback);
for (key in options) {
if (key !== 'protocol' && key !== 'proxy') {
newOptions[key] = options[key];
}
}
return https.request(newOptions, callback);
}
throw 'only allow http or https request!';
}
Expand Down Expand Up @@ -135,6 +197,8 @@ function parseOptions(type, options) {
options.proxy.headers = {'User-Agent': 'Node'};
}
}

options.gunzip = options.gunzip || false;
return options;
}
}
Expand All @@ -144,4 +208,4 @@ function cleanProtocol(str) {
}

exports.download = download;
exports.request = request;
exports.request = request;
22 changes: 14 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
{
"name": "wget",
"version": "0.0.1",
"description": "wget in nodejs.",
"keywords": ["download", "http", "https", "ftp", "proxy"],
"author": "Chengwei Wu <meegodevelop@gmail.com>",
"name": "wget-improved",
"version": "1.3.0",
"description": "wget in nodejs, forked from wuchengwei/node-wget to add improvements and help maintain the project",
"keywords": ["download", "http", "https", "ftp", "proxy", "wget"],
"author": "Michael Barajas <michael.a.barajas@gmail.com>",
"repository":{
"type": "git",
"url": "git://github.com/wuchengwei/node-wget.git"
"url": "git://github.com/bearjaws/node-wget.git"
},
"main": "./index.js",
"contributors": [
{
"name": "Michael Barajas"
}
],
"homepage": "https://github.com/bearjaws/node-wget",
"bin": {
"nwget": "./bin/nwget.js"
"nwget": "./bin/nwget"
},
"dependencies": {
"minimist": "^1.2.0",
"tunnel": "0.0.2"
},
"engines": { "node": ">= 0.6.18" }
Expand Down
6 changes: 5 additions & 1 deletion test/test.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
var wget = require('../lib/wget');

var download = wget.download('https://raw.github.com/Fyrd/caniuse/master/data.json', '/tmp/README.md');
var download = wget.download('https://www.npmjs.com/static/images/npm-logo.svg', '/tmp/README.md');
// with a proxy:
// var download = wget.download('https://raw.github.com/Fyrd/caniuse/master/data.json', '/tmp/README.md', {proxy: 'http://proxyhost:port'});
download.on('error', function(err) {
console.log(err);
});
download.on('start', function(fileSize) {
console.log(fileSize);
});
download.on('end', function(output) {
console.log(output);
process.exit();
});
download.on('progress', function(progress) {
console.log(progress);
Expand Down