diff --git a/lib/fetch.js b/lib/fetch.js index eb6062bf..aad09e77 100644 --- a/lib/fetch.js +++ b/lib/fetch.js @@ -2,8 +2,10 @@ // SPDX-License-Identifier: MIT const axios = require('axios') +const axiosRetry = require('axios-retry') const defaultHeaders = Object.freeze({ 'User-Agent': 'clearlydefined.io crawler (clearlydefined@outlook.com)' }) +const maximumAttempts = 3 axios.defaults.headers = defaultHeaders @@ -14,12 +16,14 @@ function buildRequestOptions(request) { } else if (request.encoding === null) { responseType = 'stream' } - const validateOptions = {} if (request.simple === false) { validateOptions.validateStatus = () => true } - + if (request.gzip) { + if (!request.headers) request.headers = {} + request.headers['Accept-Encoding'] = 'gzip' + } return { method: request.method, url: request.url || request.uri, @@ -49,4 +53,51 @@ function withDefaults(opts) { return request => callFetch(request, axiosInstance) } -module.exports = { callFetch, withDefaults, defaultHeaders } +async function getStream(opt) { + if (typeof opt === 'string') { + opt = { url: opt } + } + const request = { + ...opt, + encoding: null, + method: 'GET', + headers: { ...defaultHeaders, ...(opt.headers || {}) } + } + return await callFetch(request) +} + +async function callFetchWithRetry(url, options = {}, retryOptions = {}) { + const { maxAttempts = maximumAttempts, ...otherRetryOpts } = retryOptions + const axiosInstance = axios.create() + axiosRetry(axiosInstance, { retries: maxAttempts, retryDelay: axiosRetry.exponentialDelay, ...otherRetryOpts }) + options.resolveWithFullResponse ??= true + const request = { url, ...options } + try { + const response = await callFetch(request, axiosInstance) + if (!options.resolveWithFullResponse) return response + return { + statusCode: response.status, + headers: response.headers, + body: response.data, + request: response.request + } + } catch (err) { + if (err.response) { + return { + statusCode: err.response.status, + headers: err.response.headers, + body: err.response.data, + request: err.response.request + } + } + throw err + } +} + +module.exports = { + callFetch, + callFetchWithRetry, + withDefaults, + defaultHeaders, + getStream +} diff --git a/lib/sourceDiscovery.js b/lib/sourceDiscovery.js index a02d4997..1a4f9371 100644 --- a/lib/sourceDiscovery.js +++ b/lib/sourceDiscovery.js @@ -3,7 +3,6 @@ const { get, uniq, uniqWith } = require('lodash') const ghrequestor = require('ghrequestor') -const request = require('requestretry') const geit = require('geit') // TODO why not parse-github-repo-url (10x more downloads) const parseGitHubUrl = require('parse-github-url') @@ -75,7 +74,8 @@ async function discoverFromGitHubRefs(version, candidate, options) { headers, maxAttempts: 3, retryDelay: 250, - retryStrategy: request.RetryStrategies.HTTPOrNetworkError, + // This is the default strategy in axios retry. We ghrequestor currently depends on requestretry, but we will replace it with axios retry soon. + // retryStrategy: request.RetryStrategies.HTTPOrNetworkError, tokenLowerBound: 10, json: true }) diff --git a/package-lock.json b/package-lock.json index a1d37e63..13b49c14 100644 --- a/package-lock.json +++ b/package-lock.json @@ -48,7 +48,6 @@ "parse-github-url": "^1.0.2", "patch-package": "^6.5.1", "qlimit": "^0.1.1", - "requestretry": "^4.0.0", "semver": "^7.6.0", "sha.js": "^2.4.11", "spdx-correct": "^3.2.0", @@ -7504,19 +7503,6 @@ "uuid": "bin/uuid" } }, - "node_modules/requestretry": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/requestretry/-/requestretry-4.1.2.tgz", - "integrity": "sha512-N1WAp+8eOy8NfsVBChcSxNCKvPY1azOpliQ4Sby4WDe0HFEhdKywlNZeROMBQ+BI3Jpc0eNOT1KVFGREawtahA==", - "dependencies": { - "extend": "^3.0.2", - "lodash": "^4.17.15", - "when": "^3.7.7" - }, - "peerDependencies": { - "request": "2.*.*" - } - }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", diff --git a/package.json b/package.json index 82a3e068..005a1710 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,6 @@ "parse-github-url": "^1.0.2", "patch-package": "^6.5.1", "qlimit": "^0.1.1", - "requestretry": "^4.0.0", "semver": "^7.6.0", "sha.js": "^2.4.11", "spdx-correct": "^3.2.0", diff --git a/providers/fetch/condaFetch.js b/providers/fetch/condaFetch.js index 8d7e353a..aec86f94 100644 --- a/providers/fetch/condaFetch.js +++ b/providers/fetch/condaFetch.js @@ -5,7 +5,7 @@ const AbstractFetch = require('./abstractFetch') const { clone } = require('lodash') const fs = require('fs') const memCache = require('memory-cache') -const nodeRequest = require('request') +const { getStream: nodeRequest } = require('../../lib/fetch') const FetchResult = require('../../lib/fetchResult') class CondaFetch extends AbstractFetch { @@ -167,11 +167,16 @@ class CondaFetch extends AbstractFetch { return new Promise((resolve, reject) => { const options = { url: downloadUrl, headers: this.headers } nodeRequest - .get(options, (error, response) => { - if (error) return reject(error) - if (response.statusCode !== 200) return reject(new Error(`${response.statusCode} ${response.statusMessage}`)) + .getStream(options) + .then(response => { + if (response.statusCode !== 200) { + return reject(new Error(`${response.statusCode} ${response.message}`)) + } + response.pipe(fs.createWriteStream(destination)).on('finish', resolve) + }) + .catch(error => { + return reject(error) }) - .pipe(fs.createWriteStream(destination).on('finish', () => resolve())) }) } diff --git a/providers/fetch/debianFetch.js b/providers/fetch/debianFetch.js index 76e660cf..4bc59aec 100644 --- a/providers/fetch/debianFetch.js +++ b/providers/fetch/debianFetch.js @@ -8,7 +8,7 @@ const domain = require('domain') const fs = require('fs') const linebyline = require('linebyline') const memCache = require('memory-cache') -const nodeRequest = require('request') +const { getStream: nodeRequest } = require('../../lib/fetch') const path = require('path') const { promisify } = require('util') const { callFetch: requestPromise } = require('../../lib/fetch') @@ -104,16 +104,17 @@ class DebianFetch extends AbstractFetch { return reject(error) }) dom.run(() => { - nodeRequest - .get(packageFileMap.url) - .pipe(bz2()) - .pipe(fs.createWriteStream(this.packageMapFileLocation)) - .on('finish', () => { - this.logger.info( - `Debian: retrieved ${packageFileMap.url}. Stored map file at ${this.packageMapFileLocation}` - ) - return resolve() - }) + nodeRequest(packageFileMap.url).then(response => { + response + .pipe(bz2()) + .pipe(fs.createWriteStream(this.packageMapFileLocation)) + .on('finish', () => { + this.logger.info( + `Debian: retrieved ${packageFileMap.url}. Stored map file at ${this.packageMapFileLocation}` + ) + return resolve() + }) + }) }) }) } @@ -214,14 +215,16 @@ class DebianFetch extends AbstractFetch { const dom = domain.create() dom.on('error', error => reject(error)) dom.run(() => { - nodeRequest - .get(downloadUrl, (error, response) => { - if (error) return reject(error) - if (response.statusCode !== 200) - return reject(new Error(`${response.statusCode} ${response.statusMessage}`)) + nodeRequest(downloadUrl) + .then(response => { + if (response.statusCode !== 200) { + return reject(new Error(`${response.statusCode} ${response.message}`)) + } + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve()) + }) + .catch(error => { + return reject(error) }) - .pipe(fs.createWriteStream(destination)) - .on('finish', () => resolve()) }) }) } diff --git a/providers/fetch/goFetch.js b/providers/fetch/goFetch.js index 8a7431b9..253bc358 100644 --- a/providers/fetch/goFetch.js +++ b/providers/fetch/goFetch.js @@ -1,7 +1,7 @@ const { clone } = require('lodash') const { callFetch: requestPromise } = require('../../lib/fetch') const AbstractFetch = require('./abstractFetch') -const nodeRequest = require('request') +const { getStream: nodeRequest } = require('../../lib/fetch') const fs = require('fs') const axios = require('axios') const { default: axiosRetry, exponentialDelay, isNetworkOrIdempotentRequestError } = require('axios-retry') @@ -112,14 +112,16 @@ class GoFetch extends AbstractFetch { const url = this._buildUrl(spec) const status = await new Promise(resolve => { - nodeRequest - .get(url, (error, response) => { - if (error) this.logger.error(this._google_proxy_error_string(error)) + nodeRequest(url) + .then(response => { if (response.statusCode !== 200) return resolve(false) + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(true)) + }) + .catch(error => { + this.logger.error(this._google_proxy_error_string(error)) + resolve(false) }) - .pipe(fs.createWriteStream(destination).on('finish', () => resolve(true))) }) - if (status) return true } diff --git a/providers/fetch/mavenBasedFetch.js b/providers/fetch/mavenBasedFetch.js index 88c59ced..b60e4b8d 100644 --- a/providers/fetch/mavenBasedFetch.js +++ b/providers/fetch/mavenBasedFetch.js @@ -2,8 +2,7 @@ // SPDX-License-Identifier: MIT const AbstractFetch = require('./abstractFetch') -const { callFetch, defaultHeaders } = require('../../lib/fetch') -const nodeRequest = require('request') +const { callFetch, getStream } = require('../../lib/fetch') const { clone, get } = require('lodash') const { promisify } = require('util') const fs = require('fs') @@ -28,7 +27,7 @@ class MavenBasedFetch extends AbstractFetch { super(options) this._providerMap = { ...providerMap } this._handleRequestPromise = options.requestPromise || callFetch - this._handleRequestStream = options.requestStream || nodeRequest.defaults({ headers: defaultHeaders }).get + this._handleRequestStream = options.requestStream || getStream } canHandle(request) { @@ -93,10 +92,15 @@ class MavenBasedFetch extends AbstractFetch { for (let extension of extensions) { const url = this._buildUrl(spec, extension) const status = await new Promise(resolve => { - this._handleRequestStream(url, (error, response) => { - if (error) this.logger.error(error) - if (response.statusCode !== 200) return resolve(false) - }).pipe(fs.createWriteStream(destination).on('finish', () => resolve(true))) + this._handleRequestStream(url) + .then(response => { + if (response.statusCode !== 200) return resolve(false) + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(true)) + }) + .catch(error => { + this.logger.error(error) + resolve(false) + }) }) if (status) return true } diff --git a/providers/fetch/npmjsFetch.js b/providers/fetch/npmjsFetch.js index 394e736b..1c16d573 100644 --- a/providers/fetch/npmjsFetch.js +++ b/providers/fetch/npmjsFetch.js @@ -2,8 +2,7 @@ // SPDX-License-Identifier: MIT const AbstractFetch = require('./abstractFetch') -const nodeRequest = require('request') -const { callFetch: requestPromise } = require('../../lib/fetch') +const { callFetch: requestPromise, getStream: nodeRequest } = require('../../lib/fetch') const fs = require('fs') const { clone, get } = require('lodash') const FetchResult = require('../../lib/fetchResult') @@ -42,12 +41,14 @@ class NpmFetch extends AbstractFetch { async _getPackage(spec, destination) { return new Promise((resolve, reject) => { - nodeRequest - .get(this._buildUrl(spec), (error, response) => { - if (error) return reject(error) - if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.statusMessage}`)) + nodeRequest(this._buildUrl(spec)) + .then(response => { + if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.message}`)) + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(true)) + }) + .catch(error => { + return reject(error) }) - .pipe(fs.createWriteStream(destination).on('finish', () => resolve(null))) }) } diff --git a/providers/fetch/nugetFetch.js b/providers/fetch/nugetFetch.js index f9b5780f..aa9218de 100644 --- a/providers/fetch/nugetFetch.js +++ b/providers/fetch/nugetFetch.js @@ -7,7 +7,7 @@ const fs = require('fs') const mkdirp = require('mkdirp') const path = require('path') const { promisify } = require('util') -const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) +const { callFetchWithRetry: requestRetry } = require('../../lib/fetch') const FetchResult = require('../../lib/fetchResult') const providerMap = { @@ -69,7 +69,7 @@ class NuGetFetch extends AbstractFetch { // https://docs.microsoft.com/en-us/nuget/api/registration-base-url-resource // Example: https://api.nuget.org/v3/registration5/moq/4.8.2.json and follow catalogEntry // https://api.nuget.org/v3/registration5-gz-semver2/microsoft.powershell.native/7.0.0-preview.1.json - const { body, statusCode } = await requestRetry.get( + const { body, statusCode } = await requestRetry( `${baseUrl}/v3/registration5-gz-semver2/${spec.name.toLowerCase()}/${spec.revision}.json`, { gzip: true } ) @@ -87,7 +87,7 @@ class NuGetFetch extends AbstractFetch { // https://docs.microsoft.com/en-us/nuget/api/package-base-address-resource // Example: https://api.nuget.org/v3-flatcontainer/moq/index.json const baseUrl = providerMap.nuget - const { body, statusCode } = await requestRetry.get(`${baseUrl}/v3-flatcontainer/${name}/index.json`, { + const { body, statusCode } = await requestRetry(`${baseUrl}/v3-flatcontainer/${name}/index.json`, { json: true }) // If statusCode is not 200, XML may be returned @@ -100,17 +100,18 @@ class NuGetFetch extends AbstractFetch { async _getPackage(zip, packageContentUrl) { return new Promise((resolve, reject) => { - requestRetry - .get(packageContentUrl, { json: false, encoding: null }) - .pipe(fs.createWriteStream(zip)) - .on('finish', () => resolve(null)) - .on('error', reject) + requestRetry(packageContentUrl, { json: false, encoding: null }).then(response => { + response.body + .pipe(fs.createWriteStream(zip)) + .on('finish', () => resolve(null)) + .on('error', reject) + }) }) } async _getManifest(catalogEntryUrl) { // Example: https://api.nuget.org/v3/catalog0/data/2018.10.29.04.23.22/xunit.core.2.4.1.json - const { body, statusCode } = await requestRetry.get(catalogEntryUrl) + const { body, statusCode } = await requestRetry(catalogEntryUrl) if (statusCode !== 200) return null return JSON.parse(body) } @@ -119,7 +120,7 @@ class NuGetFetch extends AbstractFetch { async _getNuspec(spec) { // https://docs.microsoft.com/en-us/nuget/api/package-base-address-resource#download-package-manifest-nuspec // Example: https://api.nuget.org/v3-flatcontainer/newtonsoft.json/11.0.1/newtonsoft.json.nuspec - const { body, statusCode } = await requestRetry.get( + const { body, statusCode } = await requestRetry( `https://api.nuget.org/v3-flatcontainer/${spec.name.toLowerCase()}/${spec.revision}/${spec.name.toLowerCase()}.nuspec` ) if (statusCode !== 200) return null @@ -146,7 +147,7 @@ class NuGetFetch extends AbstractFetch { if (licenseUrl.toLowerCase().includes('license_url_here_or_delete_this_line')) return const downloadedLicenseDirName = path.join(dirName, 'clearlydefined', 'downloaded') await promisify(mkdirp)(downloadedLicenseDirName) - const { body, statusCode } = await requestRetry.get(licenseUrl) + const { body, statusCode } = await requestRetry(licenseUrl) if (statusCode !== 200) return await promisify(fs.writeFile)(path.join(downloadedLicenseDirName, 'LICENSE'), body) } diff --git a/providers/fetch/packagistFetch.js b/providers/fetch/packagistFetch.js index 8ba76a8e..aa00683d 100644 --- a/providers/fetch/packagistFetch.js +++ b/providers/fetch/packagistFetch.js @@ -2,14 +2,12 @@ // SPDX-License-Identifier: MIT const AbstractFetch = require('./abstractFetch') -const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) const fs = require('fs') const { get } = require('lodash') -const nodeRequest = require('request') +const { defaultHeaders, getStream: nodeRequest, callFetchWithRetry: requestRetry } = require('../../lib/fetch') const { promisify } = require('util') const readdir = promisify(fs.readdir) const FetchResult = require('../../lib/fetchResult') -const { defaultHeaders } = require('../../lib/fetch') const providerMap = { packagist: 'https://repo.packagist.org/' @@ -42,7 +40,7 @@ class PackagistFetch extends AbstractFetch { async _getRegistryData(spec) { let registryData const baseUrl = providerMap.packagist - const { body, statusCode } = await requestRetry.get(`${baseUrl}/p/${spec.namespace}/${spec.name}.json`, { + const { body, statusCode } = await requestRetry(`${baseUrl}/p/${spec.namespace}/${spec.name}.json`, { json: true }) if (statusCode !== 200 || !body) return null @@ -65,12 +63,14 @@ class PackagistFetch extends AbstractFetch { url: distUrl, headers: defaultHeaders } - nodeRequest - .get(options, (error, response) => { - if (error) return reject(error) - if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.statusMessage}`)) + nodeRequest(options) + .then(response => { + if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.message}`)) + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(null)) + }) + .catch(error => { + return reject(error) }) - .pipe(fs.createWriteStream(destination).on('finish', () => resolve(null))) }) } diff --git a/providers/fetch/podFetch.js b/providers/fetch/podFetch.js index d61b4f19..c89b19fa 100644 --- a/providers/fetch/podFetch.js +++ b/providers/fetch/podFetch.js @@ -3,12 +3,11 @@ const { clone, get } = require('lodash') const AbstractFetch = require('./abstractFetch') -const { callFetch: request } = require('../../lib/fetch') +const { callFetch: request, callFetchWithRetry: requestRetry } = require('../../lib/fetch') const fs = require('fs') const path = require('path') const crypto = require('crypto') const { exec } = require('child_process') -const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) const FetchResult = require('../../lib/fetchResult') const services = { @@ -131,7 +130,7 @@ class PodFetch extends AbstractFetch { async _getVersion(spec) { // Example: https://trunk.cocoapods.org/api/v1/pods/SwiftLCS - const { body, statusCode } = await requestRetry.get(`${services.trunk}/pods/${spec.name}`, { + const { body, statusCode } = await requestRetry(`${services.trunk}/pods/${spec.name}`, { json: true }) diff --git a/providers/fetch/pypiFetch.js b/providers/fetch/pypiFetch.js index 8e5f4e5f..3565b8b6 100644 --- a/providers/fetch/pypiFetch.js +++ b/providers/fetch/pypiFetch.js @@ -3,7 +3,7 @@ const AbstractFetch = require('./abstractFetch') const requestRetry = require('./requestRetryWithDefaults') -const nodeRequest = require('request') +const nodeRequest = require('../../lib/fetch') const fs = require('fs') const spdxCorrect = require('spdx-correct') const { findLastKey, get, find, clone } = require('lodash') @@ -46,7 +46,7 @@ class PyPiFetch extends AbstractFetch { async _getRegistryData(spec) { const baseUrl = providerMap.pypi - const { body, statusCode } = await requestRetry.get(`${baseUrl}/pypi/${spec.name}/json`, { + const { body, statusCode } = await requestRetry.callFetchWithRetry(`${baseUrl}/pypi/${spec.name}/json`, { json: true }) if (statusCode !== 200 || !body) return null @@ -106,11 +106,14 @@ class PyPiFetch extends AbstractFetch { return new Promise((resolve, reject) => { nodeRequest - .get(release.url, (error, response) => { - if (error) return reject(error) - if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.statusMessage}`)) + .getStream(release.url) + .then(response => { + if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.message}`)) + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(true)) + }) + .catch(error => { + reject(error) }) - .pipe(fs.createWriteStream(destination).on('finish', () => resolve(true))) }) } } diff --git a/providers/fetch/requestRetryWithDefaults.js b/providers/fetch/requestRetryWithDefaults.js index 741cad1d..57ebc605 100644 --- a/providers/fetch/requestRetryWithDefaults.js +++ b/providers/fetch/requestRetryWithDefaults.js @@ -1,3 +1,3 @@ -const requestRetryWithDefaults = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) +const requestRetryWithDefaults = require('../../lib/fetch') module.exports = requestRetryWithDefaults diff --git a/providers/fetch/rubyGemsFetch.js b/providers/fetch/rubyGemsFetch.js index c3838c70..3304c273 100644 --- a/providers/fetch/rubyGemsFetch.js +++ b/providers/fetch/rubyGemsFetch.js @@ -2,8 +2,8 @@ // SPDX-License-Identifier: MIT const AbstractFetch = require('./abstractFetch') -const nodeRequest = require('request') -const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true }) +const { getStream: nodeRequest } = require('../../lib/fetch') +const requestRetry = require('../../lib/fetch').callFetchWithRetry const fs = require('fs') const zlib = require('zlib') const path = require('path') @@ -48,7 +48,7 @@ class RubyGemsFetch extends AbstractFetch { async _getRegistryData(spec) { const baseUrl = providerMap.rubyGems - const { body, statusCode } = await requestRetry.get(`${baseUrl}/api/v1/gems/${spec.name}.json`, { + const { body, statusCode } = await requestRetry(`${baseUrl}/api/v1/gems/${spec.name}.json`, { json: true }) return statusCode === 200 && body ? body : null @@ -58,12 +58,14 @@ class RubyGemsFetch extends AbstractFetch { const fullName = spec.namespace ? `${spec.namespace}/${spec.name}` : spec.name const gemUrl = `${providerMap.rubyGems}/gems/${fullName}-${spec.revision}.gem` return new Promise((resolve, reject) => { - nodeRequest - .get(gemUrl, (error, response) => { - if (error) return reject(error) - if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.statusMessage}`)) + nodeRequest(gemUrl) + .then(response => { + if (response.statusCode !== 200) reject(new Error(`${response.statusCode} ${response.message}`)) + response.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(null)) + }) + .catch(error => { + reject(error) }) - .pipe(fs.createWriteStream(destination).on('finish', () => resolve(null))) }) } diff --git a/providers/process/top.js b/providers/process/top.js index 238e8c72..1e9a601f 100644 --- a/providers/process/top.js +++ b/providers/process/top.js @@ -10,7 +10,8 @@ const ghrequestor = require('ghrequestor') const linebyline = require('linebyline') const path = require('path') const Request = require('../../ghcrawler').request -const requestRetry = require('requestretry').defaults({ json: true, maxAttempts: 3, fullResponse: false }) +const requestRetry = require('../../lib/fetch').callFetchWithRetry +const defaultOptions = { json: true, resolveWithFullResponse: false } class TopProcessor extends AbstractProcessor { canHandle(request) { @@ -89,7 +90,8 @@ class TopProcessor extends AbstractProcessor { if (!end || end - start <= 0) end = start + 1000 const initialOffset = Math.floor(start / 36) * 36 for (let offset = initialOffset; offset < end; offset += 36) { - const response = await requestRetry.get(`https://www.npmjs.com/browse/depended?offset=${offset}`, { + const response = await requestRetry(`https://www.npmjs.com/browse/depended?offset=${offset}`, { + ...defaultOptions, headers: { 'x-spiferack': 1 } }) const packages = response.packages || [] @@ -154,8 +156,9 @@ class TopProcessor extends AbstractProcessor { if (!end || end - start <= 0) end = start + 1000 for (let offset = start; offset < end; offset += 100) { const page = offset / 100 + 1 - const response = await requestRetry.get( - `https://crates.io/api/v1/crates?page=${page}&per_page=100&sort=downloads` + const response = await requestRetry( + `https://crates.io/api/v1/crates?page=${page}&per_page=100&sort=downloads`, + defaultOptions ) const requestsPage = response.crates.map( x => new Request('package', `cd:/crate/cratesio/-/${x.name}/${x.max_version}`) @@ -316,8 +319,9 @@ class TopProcessor extends AbstractProcessor { if (!start || start < 0) start = 0 if (!end || end - start <= 0) end = start + 1000 for (let offset = start; offset < end; offset += pageSize) { - const topComponents = await requestRetry.get( - `https://api-v2v3search-0.nuget.org/query?prerelease=false&skip=${offset}&take=${pageSize}` + const topComponents = await requestRetry( + `https://api-v2v3search-0.nuget.org/query?prerelease=false&skip=${offset}&take=${pageSize}`, + defaultOptions ) const requests = topComponents.data.map(component => { return new Request('package', `cd:/nuget/nuget/-/${component.id}`) @@ -349,7 +353,8 @@ class TopProcessor extends AbstractProcessor { }) const requests = [] for (let i = 0; i < repos.length; i++) { - const commits = await requestRetry.get(`https://api.github.com/repos/${namespace}/${repos[i].name}/commits`, { + const commits = await requestRetry(`https://api.github.com/repos/${namespace}/${repos[i].name}/commits`, { + ...defaultOptions, headers }) if (commits.length > 0) { diff --git a/test/unit/lib/fetchTests.js b/test/unit/lib/fetchTests.js index 114cc3b5..197a2665 100644 --- a/test/unit/lib/fetchTests.js +++ b/test/unit/lib/fetchTests.js @@ -14,6 +14,124 @@ describe('CallFetch', () => { const mockServer = mockttp.getLocal() beforeEach(async () => await mockServer.start()) afterEach(async () => await mockServer.stop()) + it('should retry and succeed after a failure', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + let callCount = 0 + await mockServer.forGet(path).thenCallback(req => { + callCount++ + if (callCount === 1) return { status: 500, body: 'fail' } + else return { status: 200, body: JSON.stringify(expected) } + }) + + const { callFetchWithRetry } = require('../../../lib/fetch') + const response = await callFetchWithRetry(mockServer.urlFor(path), { json: true }, { maxAttempts: 3 }) + expect(callCount).to.equal(2) + expect(response.statusCode).to.equal(200) + expect(response.body).to.deep.equal(expected) + }) + + it('should succeed on first attempt', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + await mockServer.forGet(path).thenReply(200, JSON.stringify(expected)) + + const { callFetchWithRetry } = require('../../../lib/fetch') + const response = await callFetchWithRetry(mockServer.urlFor(path), { json: true }) + expect(response.statusCode).to.equal(200) + expect(response.body).to.deep.equal(expected) + }) + + it('should return error info after all retries fail', async () => { + const path = '/registry.npmjs.com/redis/0.1.2' + const endpointMock = await mockServer.forGet(path).thenReply(500, 'fail') + + const { callFetchWithRetry } = require('../../../lib/fetch') + const response = await callFetchWithRetry(mockServer.urlFor(path), { json: true }, { maxAttempts: 5 }) + + expect(response.statusCode).to.equal(500) + expect(response.body).to.equal('fail') + const requests = await endpointMock.getSeenRequests() + expect(requests.length).to.equal(6) + }).timeout(10000) + + it('should return a stream when called with a URL string', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + let callCount = 0 + await mockServer.forGet(path).thenCallback(req => { + callCount++ + if (callCount === 1) return { status: 500, body: 'fail' } + else return { status: 200, body: JSON.stringify(expected) } + }) + + const { callFetchWithRetry } = require('../../../lib/fetch') + const response = await callFetchWithRetry(mockServer.urlFor(path), { json: true }, { maxAttempts: 3 }) + expect(callCount).to.equal(2) + expect(response.statusCode).to.equal(200) + expect(response.body).to.deep.equal(expected) + }) + + it('should succeed on first attempt', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + await mockServer.forGet(path).thenReply(200, JSON.stringify(expected)) + + const { callFetchWithRetry } = require('../../../lib/fetch') + const response = await callFetchWithRetry(mockServer.urlFor(path), { json: true }) + expect(response.statusCode).to.equal(200) + expect(response.body).to.deep.equal(expected) + }) + + it('should return a stream when called with a URL string', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + await mockServer.forGet(path).thenStream(200, fs.createReadStream('test/fixtures/fetch/redis-0.1.0.json')) + + const { getStream } = require('../../../lib/fetch') + const response = await getStream(mockServer.urlFor(path)) + expect(response.readable).to.be.true + // Optionally, pipe and check content + const destination = 'test/fixtures/fetch/temp-stream.json' + await new Promise(resolve => { + response.pipe(fs.createWriteStream(destination).on('finish', () => resolve(true))) + }) + const downloaded = JSON.parse(fs.readFileSync(destination)) + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + expect(downloaded).to.deep.equal(expected) + fs.unlinkSync(destination) + }) + + it('should return a stream when called with an options object', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + await mockServer.forGet(path).thenStream(200, fs.createReadStream('test/fixtures/fetch/redis-0.1.0.json')) + + const { getStream } = require('../../../lib/fetch') + const response = await getStream({ url: mockServer.urlFor(path) }) + expect(response.readable).to.be.true + // Optionally, pipe and check content + const destination = 'test/fixtures/fetch/temp-stream2.json' + await new Promise(resolve => { + response.pipe(fs.createWriteStream(destination).on('finish', () => resolve(true))) + }) + const downloaded = JSON.parse(fs.readFileSync(destination)) + const expected = JSON.parse(fs.readFileSync('test/fixtures/fetch/redis-0.1.0.json')) + expect(downloaded).to.deep.equal(expected) + fs.unlinkSync(destination) + }) + + it('should apply default headers when called', async () => { + const path = '/registry.npmjs.com/redis/0.1.0' + const endpointMock = await mockServer + .forGet(path) + .thenStream(200, fs.createReadStream('test/fixtures/fetch/redis-0.1.0.json')) + + const { getStream, defaultHeaders } = require('../../../lib/fetch') + await getStream({ url: mockServer.urlFor(path) }) + const requests = await endpointMock.getSeenRequests() + for (const [key, value] of Object.entries(defaultHeaders)) { + expect(requests[0].headers).to.have.property(key.toLowerCase()).that.equals(value) + } + }) it('checks if the response is JSON while sending GET request', async () => { const path = '/registry.npmjs.com/redis/0.1.0' diff --git a/test/unit/providers/fetch/dispatcherTests.js b/test/unit/providers/fetch/dispatcherTests.js index ec0a801c..ac9dd825 100644 --- a/test/unit/providers/fetch/dispatcherTests.js +++ b/test/unit/providers/fetch/dispatcherTests.js @@ -339,8 +339,7 @@ describe('fetchDispatcher cache fetch result', () => { beforeEach(() => { const GoFetch = proxyquire('../../../../providers/fetch/goFetch', { - request: { get: createGetStub(fileSupplier) }, - '../../lib/fetch': { callFetch: createRequestPromiseStub(fileSupplier) } + '../../lib/fetch': { callFetch: createRequestPromiseStub(fileSupplier), getStream: createGetStub(fileSupplier) } }) const fetch = GoFetch({ logger: { info: sinon.stub() }, http: successHttpStub }) fetchDispatcher = setupDispatcher(fetch) @@ -365,24 +364,21 @@ describe('fetchDispatcher cache fetch result', () => { const requestPromiseStub = (url, options) => { const body = fs.readFileSync(`test/fixtures/${fileSupplier(url)}`) - if (options?.json) return { body: JSON.parse(body), statusCode: 200 } + if (options?.json) return Promise.resolve({ body: JSON.parse(body), statusCode: 200 }) const response = new PassThrough() response.body = body response.write(body) response.statusCode = 200 response.end() - return response + if (options?.encoding === null) return Promise.resolve({ body: response, statusCode: 200 }) + return Promise.resolve(response) } let fetchDispatcher beforeEach(() => { const NugetFetch = proxyquire('../../../../providers/fetch/nugetFetch', { - requestretry: { - defaults: () => { - return { get: requestPromiseStub } - } - } + '../../lib/fetch': { callFetchWithRetry: requestPromiseStub } }) const fetch = NugetFetch({ logger: { info: sinon.stub() } }) fetchDispatcher = setupDispatcher(fetch) @@ -397,12 +393,10 @@ describe('fetchDispatcher cache fetch result', () => { beforeEach(() => { const PodFetch = proxyquire('../../../../providers/fetch/podFetch', { - requestretry: { - defaults: () => { - return { get: sinon.stub().resolves({ body: loadJson('pod/versions.json'), statusCode: 200 }) } - } - }, - '../../lib/fetch': { callFetch: sinon.stub().resolves(loadJson('pod/registryData.json')) } + '../../lib/fetch': { + callFetch: sinon.stub().resolves(loadJson('pod/registryData.json')), + callFetchWithRetry: sinon.stub().resolves({ body: loadJson('pod/versions.json'), statusCode: 200 }) + } }) const fetch = PodFetch({ logger: { info: sinon.stub() } }) fetch._getPackage = sinon.stub().resolves('/tmp/cd-pYKk9q/SwiftLCS-1.0') @@ -428,17 +422,17 @@ const createRequestPromiseStub = fileSupplier => { } const createGetStub = fileSupplier => { - return (url, callback) => { + return url => { const response = new PassThrough() const file = `test/fixtures/${fileSupplier(url)}` if (file) { response.write(fs.readFileSync(file)) - callback(null, { statusCode: 200 }) + response.statusCode = 200 } else { - callback(new Error(url.includes('error') ? 'Error' : 'Code')) + return Promise.reject(new Error(url.includes('error') ? 'Error' : 'Code')) } response.end() - return response + return Promise.resolve(response) } } diff --git a/test/unit/providers/fetch/gradlePluginFetchTests.js b/test/unit/providers/fetch/gradlePluginFetchTests.js index 50bbf1bd..a5b3999b 100644 --- a/test/unit/providers/fetch/gradlePluginFetchTests.js +++ b/test/unit/providers/fetch/gradlePluginFetchTests.js @@ -81,12 +81,12 @@ describe('Gradle plugin fetch', () => { const content = contentFromFile(options.url) return options.json ? JSON.parse(content) : content } - const getStub = (url, callback) => { + const getStub = url => { const response = new PassThrough() response.write(contentFromFile(url)) - callback(null, { statusCode: 200 }) + response.statusCode = 200 response.end() - return response + return Promise.resolve(response) } handler = GradlePluginFetch({ logger: { log: sinon.stub(), error: sinon.stub() }, @@ -150,11 +150,11 @@ describe('Gradle plugin fetch', () => { }) it('handle no sourcearchive found for plugin', async () => { - handler._handleRequestStream = (url, callback) => { + handler._handleRequestStream = () => { const response = new PassThrough() - callback(new Error('404'), { statusCode: 404 }) + response.statusCode = 404 response.end() - return response + return Promise.reject(new Error('404')) } const request = await handler.handle( new Request('test', 'cd:/sourcearchive/gradleplugin/org.eclipse/swt/3.3.0-v3344') diff --git a/test/unit/providers/fetch/mavencentralFetchTests.js b/test/unit/providers/fetch/mavencentralFetchTests.js index 1531e5b2..4088efa3 100644 --- a/test/unit/providers/fetch/mavencentralFetchTests.js +++ b/test/unit/providers/fetch/mavencentralFetchTests.js @@ -78,17 +78,17 @@ describe('MavenCentral fetching', () => { const content = fs.readFileSync(`test/fixtures/maven/${file}`) return options.json ? JSON.parse(content) : content } - const getStub = (url, callback) => { + const getStub = url => { const response = new PassThrough() const file = `test/fixtures/maven/${pickArtifact(url)}` if (file) { response.write(fs.readFileSync(file)) - callback(null, { statusCode: 200 }) + response.statusCode = 200 } else { - callback(new Error(url.includes('error') ? 'Error' : 'Code')) + Promise.reject(new Error(url.includes('error') ? 'Error' : 'Code')) } response.end() - return response + return Promise.resolve(response) } handler = MavenFetch({ diff --git a/test/unit/providers/fetch/mavengoogleFetchTests.js b/test/unit/providers/fetch/mavengoogleFetchTests.js index 099b4f2d..e2ad9f22 100644 --- a/test/unit/providers/fetch/mavengoogleFetchTests.js +++ b/test/unit/providers/fetch/mavengoogleFetchTests.js @@ -76,17 +76,17 @@ describe('MavenGoogle fetching', () => { const content = fs.readFileSync(`test/fixtures/maven/${file}`) return options.json ? JSON.parse(content) : content } - const getStub = (url, callback) => { + const getStub = url => { const response = new PassThrough() const file = `test/fixtures/maven/${pickArtifact(url)}` if (file) { response.write(fs.readFileSync(file)) - callback(null, { statusCode: 200 }) + response.statusCode = 200 } else { - callback(new Error(url.includes('error') ? 'Error' : 'Code')) + Promise.reject(new Error(url.includes('error') ? 'Error' : 'Code')) } response.end() - return response + return Promise.resolve(response) } handler = MavenGoogleFetch({ diff --git a/test/unit/providers/fetch/npmjsFetchTests.js b/test/unit/providers/fetch/npmjsFetchTests.js index 53230e65..bb675aac 100644 --- a/test/unit/providers/fetch/npmjsFetchTests.js +++ b/test/unit/providers/fetch/npmjsFetchTests.js @@ -60,20 +60,19 @@ describe('', () => { } return resultBox.result } - const getStub = (url, callback) => { + const getStub = url => { const response = new PassThrough() if (url.includes('redie')) { response.write(fs.readFileSync('test/fixtures/npm/redie-0.3.0.tgz')) - callback(null, { statusCode: 200 }) + response.statusCode = 200 } else { - callback(new Error(url.includes('error') ? 'Error' : 'Code')) + return Promise.reject(new Error(url.includes('error') ? 'Error' : 'Code')) } response.end() - return response + return Promise.resolve(response) } Fetch = proxyquire('../../../../providers/fetch/npmjsFetch', { - request: { get: getStub }, - '../../lib/fetch': { callFetch: requestPromiseStub } + '../../lib/fetch': { callFetch: requestPromiseStub, getStream: getStub } }) Fetch._resultBox = resultBox }) diff --git a/test/unit/providers/fetch/nugetFetchTests.js b/test/unit/providers/fetch/nugetFetchTests.js index 454bef99..cd18dc0b 100644 --- a/test/unit/providers/fetch/nugetFetchTests.js +++ b/test/unit/providers/fetch/nugetFetchTests.js @@ -8,6 +8,7 @@ const proxyquire = require('proxyquire') const Request = require('../../../../ghcrawler').request const PassThrough = require('stream').PassThrough const fs = require('fs') +const { callFetchWithRetry } = require('../../../../lib/fetch') describe('NuGet fetch', () => { it('should normalize version correctly', () => { @@ -55,20 +56,18 @@ describe('', () => { if (url.includes('missing')) throw { statusCode: 404 } } const body = fs.readFileSync(`test/fixtures/nuget/${pickFile(url)}`) - if (options && options.json) return { body: JSON.parse(body), statusCode: 200 } + if (options && options.json) return Promise.resolve({ body: JSON.parse(body), statusCode: 200 }) const response = new PassThrough() response.body = body response.write(response.body) response.end() response.statusCode = 200 + if (options?.encoding === null) return Promise.resolve({ body: response, statusCode: 200 }) return response } - const requestRetryStub = { - defaults: () => { - return { get } - } - } - Fetch = proxyquire('../../../../providers/fetch/nugetFetch', { requestretry: requestRetryStub }) + Fetch = proxyquire('../../../../providers/fetch/nugetFetch', { + '../../lib/fetch': { callFetchWithRetry: get } + }) }) afterEach(() => { diff --git a/test/unit/providers/fetch/packagistFetchTests.js b/test/unit/providers/fetch/packagistFetchTests.js index b18c07c9..b506b511 100644 --- a/test/unit/providers/fetch/packagistFetchTests.js +++ b/test/unit/providers/fetch/packagistFetchTests.js @@ -28,20 +28,19 @@ describe('packagistFetch', () => { } return resultBox.result } - const getStub = (url_hash, callback) => { + const getStub = url_hash => { const response = new PassThrough() if (url_hash.url.includes('symfony/polyfill-mbstring')) { response.write(fs.readFileSync('test/fixtures/composer/symfony-polyfill-mbstring-v1.11.0-0-gfe5e94c.zip')) - callback(null, { statusCode: 200 }) + response.statusCode = 200 } else { - callback(new Error(url_hash.includes('error') ? 'Error' : 'Code')) + return Promise.reject(new Error(url_hash.includes('error') ? 'Error' : 'Code')) } response.end() - return response + return Promise.resolve(response) } Fetch = proxyquire('../../../../providers/fetch/packagistFetch', { - request: { get: getStub }, - '../../lib/fetch': { callFetch: requestPromiseStub } + '../../lib/fetch': { callFetch: requestPromiseStub, getStream: getStub } }) Fetch._resultBox = resultBox }) diff --git a/test/unit/providers/fetch/podFetchTests.js b/test/unit/providers/fetch/podFetchTests.js index db42063b..2a79196a 100644 --- a/test/unit/providers/fetch/podFetchTests.js +++ b/test/unit/providers/fetch/podFetchTests.js @@ -12,14 +12,10 @@ describe('podFetch', () => { const getVersionsStub = sinon.stub() const PodFetch = proxyquire('../../../../providers/fetch/podFetch', { - requestretry: { - defaults: () => { - return { - get: getVersionsStub - } - } - }, - '../../lib/fetch': { callFetch: sinon.stub().resolves(loadJson('registryData.json')) } + '../../lib/fetch': { + callFetch: sinon.stub().resolves(loadJson('registryData.json')), + callFetchWithRetry: getVersionsStub + } }) let fetch diff --git a/test/unit/providers/fetch/pypiFetchTests.js b/test/unit/providers/fetch/pypiFetchTests.js index 8e8dc8f7..dd86f776 100644 --- a/test/unit/providers/fetch/pypiFetchTests.js +++ b/test/unit/providers/fetch/pypiFetchTests.js @@ -5,7 +5,7 @@ const expect = require('chai').expect const fs = require('fs') const sinon = require('sinon') const PassThrough = require('stream').PassThrough -const nodeRequest = require('request') +const nodeFetch = require('../../../../lib/fetch') const PypiFetch = require('../../../../providers/fetch/pypiFetch') const requestRetryWithDefaults = require('../../../../providers/fetch/requestRetryWithDefaults') const Request = require('../../../../ghcrawler/lib/request.js') @@ -16,10 +16,12 @@ describe('pypiFetch handle function', () => { let sandbox = sinon.createSandbox() let requestGetStub let fetch + let nodeRequestStub beforeEach(function () { - requestGetStub = sandbox.stub(requestRetryWithDefaults, 'get') - sandbox.stub(nodeRequest, 'get').callsFake(getCompressedFile) + let nodeFetchStub = sandbox.stub(nodeFetch) + requestGetStub = nodeFetchStub.callFetchWithRetry + nodeRequestStub = nodeFetchStub.getStream fetch = PypiFetch(pypiFetchOptions) }) @@ -39,7 +41,7 @@ describe('pypiFetch handle function', () => { it('fetch success', async () => { const registryData = JSON.parse(fs.readFileSync('test/fixtures/pypi/registryData.json')) requestGetStub.resolves({ body: registryData, statusCode: 200 }) - + nodeRequestStub.resolves(getCompressedFile()) const result = await fetch.handle(new Request('pypi', 'cd:/pypi/pypi/-/backports.ssl-match-hostname/3.7.0.1')) result.fetchResult.copyTo(result) expect(result.url).to.be.equal('cd:/pypi/pypi/-/backports.ssl-match-hostname/3.7.0.1') @@ -202,11 +204,11 @@ describe('pypiFetch handle function', () => { }) }) -const getCompressedFile = (url, callback) => { +const getCompressedFile = () => { const response = new PassThrough() const file = 'test/fixtures/maven/swt-3.3.0-v3346.jar' response.write(fs.readFileSync(file)) - callback(null, { statusCode: 200 }) + response.statusCode = 200 response.end() return response }