diff --git a/.gitignore b/.gitignore index a98ecab..2f70496 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ mongodb node_modules .DS_Store +.idea +*.iml +generated-*.js diff --git a/README.markdown b/README.markdown index 474ef46..35c12a9 100644 --- a/README.markdown +++ b/README.markdown @@ -17,12 +17,18 @@ Benchmarks on my 2011 Macbook Air whilst running lots of software. The test too 1. Comes with the [standard CSV database by MaxMind](http://www.maxmind.com/app/geolite) which may require updating. -## How to use + +## Install + +Simple run + + npm install geoip-native + +## Install from source 1. git clone https://github.com/benlowry/node-geoip-native 2. cd node-geoip-native -3. node test.js - -or just ```npm install geoip-native``` +3. npm install +4. node benchmark.js ## Methods @@ -30,15 +36,21 @@ Node GeoIP Native provides methods for: 1. ```lookup``` performs the lookup, takes the ip address as a parameter -## Examples +## Testing +First, you have to install nodeunit (https://github.com/caolan/nodeunit) + + $> npm install nodeunit + +Second, run the unit test: - var geoip = require("geoip-native"); - var ip = "123.123.123.123"; - geoip.lookup(ip); - console.log("country: " + ip.name + " / " + ip.code); + $> ./node_modules/nodeunit/bin/nodeunit test/unit/lookupTest.js + +## Examples - // in practice you'd want: - // ip = request.headers["x-forwarded-for"] || request.connection.remoteAddress, + var geoip = require("geoip-native"); + var ip = geoip.lookup("134.12.12.123"); + console.log("numeric ip value: " + ip.ipstart); + console.log("country: " + ip.name + " / " + ip.code); ### What's missing Be neat to expand this to include cities. diff --git a/generate_sources.js b/generate_sources.js new file mode 100644 index 0000000..ff5ab0b --- /dev/null +++ b/generate_sources.js @@ -0,0 +1,124 @@ +"use strict"; + +var countries = []; +var countryNamesAndCodes = []; + +function read_csv_file_and_prepare_data() { + + function load_CSV_file() { + var fs = require("fs"); + var data = fs.readFileSync(__dirname + "/GeoIPCountryWhois.csv") + var buffer = ""; + buffer += data.toString(); + return buffer; + } + + function extractPartsFromCsv(line) { + var matches = line.match(/("(?:[^"]|"")*"|[^,]*)/g); + var result = []; + for (var i = 0; i < matches.length; i++) { + var part = matches[i].replace(/"/g, "").trim(); + if (part.length > 0) { + result.push(part); + } + } + return result; + } + + var entries = load_CSV_file().split("\n"); + var offsetCounter = 0; + var countryIndex = 0; + var countrySet = {}; + var lastIpRangeEnd = 0; + + for (var i = 0; i < entries.length; i++) { + var parts = extractPartsFromCsv(entries[i]); + if (parts.length > 5) { + var countryName = parts[5].trim(); + var countryCode = parts[4]; + if (!countrySet[countryName]) { + countryIndex = offsetCounter++; + countrySet[countryName] = {index: countryIndex}; + countryNamesAndCodes.push(countryName); + countryNamesAndCodes.push(countryCode); + } else { + countryIndex = countrySet[countryName].index; + } + countries.push(createCountryInformation(parseInt(parts[2]), parseInt(parts[3]), countryCode, countryName, (countryIndex * 2))); + lastIpRangeEnd = parseInt(parts[3]); + } + } + + // add a special country, which indicates the END + countryIndex = offsetCounter; + countrySet["UNKNOWN"] = {index: countryIndex}; + countryNamesAndCodes.push("UNKNOWN"); + countryNamesAndCodes.push("N/A"); + countries.push( + createCountryInformation(lastIpRangeEnd + 1, lastIpRangeEnd + 1, "N/A", "UNKNOWN", countryIndex * 2) + ); + + countries.sort(function (a, b) { + return a.ipstart - b.ipstart; + }); + +} + +function createCountryInformation(ipStart, ipEnd, countryCode, countryName, index) { + return { + ipstart: ipStart, + ipend: ipEnd, + code: countryCode, + name: countryName, + index: index + }; +} + + +function write_sourceFile_countryNamesAndCodes() { + var data = ""; + data += "// AUTOGENERATED CODE - DO NOT MODIFY! " + new Date() + " \n"; + data += "var countryNamesAndCodes = [\n"; + for (var i = 0, len = countryNamesAndCodes.length >> 1; i < len; i++) { + var countryName = countryNamesAndCodes[i << 1].replace("'", "\\'"); + var countryCode = countryNamesAndCodes[(i << 1) + 1].replace("'", "\\'"); + data += "'" + countryName + "','" + countryCode + "',\n"; + } + data += "];\n"; + data += "module.exports = {\n"; + data += " countryNamesAndCodes:countryNamesAndCodes\n"; + data += "};\n"; + + var fs = require("fs"); + fs.writeFileSync('generated-namesandcodes.js', data, 'utf8'); +} + +function write_sourceFile_countries() { + var data = ""; + data += "// AUTOGENERATED CODE - DO NOT MODIFY! " + new Date() + " \n"; + data += "var countries = [\n"; + for (var i = 0, len = countries.length; i < len; i++) { + var country = countries[i]; + data += "{s:" + country.ipstart + ","; + data += "e:" + country.ipend + ","; + data += "i:" + country.index + "},"; + data += "//" + countryNamesAndCodes[country.index + 1] + "\n"; + } + data += "];\n"; + data += "module.exports = {\n"; + data += " countries:countries\n"; + data += "};\n"; + + var fs = require("fs"); + fs.writeFileSync('generated-countries.js', data, 'utf8'); +} + +/** + * Prepare the data. This uses the standard free GeoIP CSV database + * from MaxMind, you should be able to update it at any time by just + * overwriting GeoIPCountryWhois.csv with a new version. + */ + +read_csv_file_and_prepare_data() +write_sourceFile_countryNamesAndCodes(); +write_sourceFile_countries(); diff --git a/geoip-native.js b/geoip-native.js new file mode 100644 index 0000000..839e44e --- /dev/null +++ b/geoip-native.js @@ -0,0 +1,65 @@ +var countryNamesAndCodes = require('./generated-namesandcodes.js').countryNamesAndCodes; +var countries = require('./generated-countries.js').countries; +var countriesLength = countries.length; + +module.exports = geoip = { + lookup: function (ip) { + return _lookup(ip); + } +}; + +/** + * @param ip the ip we're looking for + * @return {*} + * @see http://en.wikipedia.org/wiki/Binary_search_algorithm (Deferred detection of equality approach) + */ +function _lookup(ip) { + + var parts = ip.split("."); + var target_ip = parseInt(parts[3], 10) + + (parseInt(parts[2], 10) * 256) + + (parseInt(parts[1], 10) * 65536) + + (parseInt(parts[0], 10) * 16777216); + + var idxMin = 0; + var idxMiddle = 0; + var idxMax = countriesLength - 1; + + while (idxMin < idxMax) { + idxMiddle = (idxMax + idxMin) >> 1; + if (!(idxMiddle < idxMax)) { + throw "assertion error: idxMiddle is not lower then idxMax" + } + if (countries[idxMiddle].s < target_ip) { + idxMin = idxMiddle + 1; + } else { + idxMax = idxMiddle; + } + } + + var pickedCountry = countries[idxMin]; + if ((idxMax == idxMin) && (pickedCountry.s == target_ip)) { + pickedCountry = countries[idxMin]; + return createCountry(pickedCountry.s, pickedCountry.e, countryNamesAndCodes[pickedCountry.i], countryNamesAndCodes[pickedCountry.i + 1]); + } + + if ((idxMiddle > 0) && (countries[idxMiddle - 1].s < target_ip) && (target_ip < countries[idxMiddle].s)) { + pickedCountry = countries[idxMiddle - 1] + return createCountry(pickedCountry.s, pickedCountry.e, countryNamesAndCodes[pickedCountry.i], countryNamesAndCodes[pickedCountry.i + 1]); + } + + if ((idxMiddle < idxMax) && (countries[idxMiddle].s < target_ip) && (target_ip < countries[idxMiddle + 1].s)) { + pickedCountry = countries[idxMiddle] + return createCountry(pickedCountry.s, pickedCountry.e, countryNamesAndCodes[pickedCountry.i], countryNamesAndCodes[pickedCountry.i + 1]); + } + return createCountry(target_ip, target_ip, "UNKNOWN", "N/A"); +} + +function createCountry(ipstart, ipend, countryName, countryCode) { + return { + ipstart: ipstart, + ipend: ipend, + name: countryName, + code: countryCode + }; +} diff --git a/geoip.js b/geoip.js deleted file mode 100644 index 140e6c2..0000000 --- a/geoip.js +++ /dev/null @@ -1,122 +0,0 @@ -var countries = [], - midpoints = [], - numcountries = 0, - ready = false; - -module.exports = geoip = { - lookup: function(ip) { - if(!ready) { - console.log("geoip warming up"); - return {code: "N/A", name: "UNKNOWN"}; - } - - return find(ip); - } -}; - -/** - * A qcuick little binary search - * @param ip the ip we're looking for - * @return {*} - */ -function find(ip) { - - var mpi = 0; - var n = midpoints[0]; - var step; - var parts = ip.split("."); - var ipl = parseInt(parts[3], 10) + - (parseInt(parts[2], 10) * 256) + - (parseInt(parts[1], 10) * 65536) + - (parseInt(parts[0], 10) * 16777216); - - var current; - var next; - var prev; - var nn; - var pn; - - while(true) { - - mpi++; - step = midpoints[mpi]; - current = countries[n]; - nn = n + 1; - pn = n - 1; - - next = nn < numcountries ? countries[nn] : null; - prev = pn > -1 ? countries[pn] : null; - - // take another step? - if(step > 0) { - - if(!next || next.ipstart < ipl) { - n += step; - } else { - n -= step; - } - - continue; - } - - // we're either current, next or previous depending on which is closest to ipl - var cd = Math.abs(ipl - current.ipstart); - var nd = next && next.ipstart< ipl ? ipl - next.ipstart : 1000000000; - var pd = prev && prev.ipstart < ipl ? ipl - prev.ipstart : 1000000000; - - // current wins - if(cd < nd && cd < pd) { - return current; - } - - // next wins - if(nd < cd && nd < pd) { - return next; - - } - - // prev wins - return prev; - } -} - -/** -* Prepare the data. This uses the standard free GeoIP CSV database -* from MaxMind, you should be able to update it at any time by just -* overwriting GeoIPCountryWhois.csv with a new version. -*/ -(function() { - - var fs = require("fs"); - var sys = require("sys"); - var stream = fs.createReadStream(__dirname + "/GeoIPCountryWhois.csv"); - var buffer = ""; - - stream.addListener("data", function(data) { - buffer += data.toString().replace(/"/g, ""); - }); - - stream.addListener("end", function() { - - var entries = buffer.split("\n"); - - for(var i=0; i= 1) { - n = Math.floor(n / 2); - midpoints.push(n); - } - - numcountries = countries.length; - ready = true; - }); - -}()); \ No newline at end of file diff --git a/index.js b/index.js deleted file mode 100644 index 8fba889..0000000 --- a/index.js +++ /dev/null @@ -1 +0,0 @@ -module.exports = require("./geoip.js"); \ No newline at end of file diff --git a/package.json b/package.json index d7a2fb4..a62c2a3 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,12 @@ "author": { "name": "Playtomic Inc" }, + "contributors": [ + { + "name" : "Martin W. Kirst", + "url": "https://github.com/nitram509" + } + ], "repository": { "type": "git", "url": "http://github.com/benlowry/node-geoip-native.git" @@ -18,14 +24,17 @@ "engines": { "node": ">=0.6.0" }, - "main" : "./index.js", + "main" : "./geoip-native.js", + "scripts" : { + "preinstall" : "node generate_sources.js" + }, "licenses": [ { "type": "MIT", "url": "http://opensource.org/licenses/mit-license.php" } ], - "readme": "# Node GeoIP Native\n\nThis package is a lightning-fast, native JavaScript geoip lookup built on [MaxMind](http://www.maxmind.com/)'s free country database.\n\nIt is non-blocking and operates without any IO after initially loading the data into memory.\n\nResults are 4 - 5 times faster than [geoip-lite](https://github.com/bluesmoon/node-geoip) with the caveat that it takes 2 or 3 times longer to initialize and uses 60 or 70 megabytes memory.\n\nThis is used in production at [Playtomic](https://playtomic.com/) in a [high volume API](https://success.heroku.com/playtomic) where performance matters.\n\nBenchmarks on my 2011 Macbook Air whilst running lots of software. The test took the middle 10 results from 20 iterations and averaged them. The APIs are interchangeable so tests were identical.\n\n\tgeoip-native:\taverage: 1540.3ms / million lookups\n\tgeoip-lite: \taverage: 8375.3ms / million lookups\n\n## Requires\n\n1. Comes with the [standard CSV database by MaxMind](http://www.maxmind.com/app/geolite) which may require updating.\n\n## How to use\n1. git clone https://github.com/benlowry/node-geoip-native\n2. cd node-geoip-native\n3. node test.js\n\nor just ```npm install geoip-native```\n\n## Methods\n\nNode GeoIP Native provides methods for:\n\n1. ```lookup``` performs the lookup, takes the ip address as a parameter\n\n## Examples\n\n\tvar geoip = require(\"geoip-native\");\n\tvar ip = \"123.123.123.123\";\n\tgeoip.lookup(ip);\n\tconsole.log(\"country: \" + ip.name + \" / \" + ip.code);\n\n\t// in practice you'd want:\n\t// ip = request.headers[\"x-forwarded-for\"] || request.connection.remoteAddress,\n\n### What's missing\nBe neat to expand this to include cities.\n\n### License\nCopyright [Playtomic Inc](https://playtomic.com), 2012. Licensed under the MIT license. Certain portions may come from 3rd parties and carry their own licensing terms and are referenced where applicable.\n\nThis product includes GeoLite data created by MaxMind, available from http://www.maxmind.com\n", + "readme": "# Node GeoIP Native\n\nThis package is a lightning-fast, native JavaScript geoip lookup built on [MaxMind](http://www.maxmind.com/)'s free country database.\n\nIt is non-blocking and operates without any IO after initially loading the data into memory.\n\nResults are 4 - 5 times faster than [geoip-lite](https://github.com/bluesmoon/node-geoip) with the caveat that it takes 2 or 3 times longer to initialize and uses 60 or 70 megabytes memory.\n\nThis is used in production at [Playtomic](https://playtomic.com/) in a [high volume API](https://success.heroku.com/playtomic) where performance matters.\n\nBenchmarks on my 2011 Macbook Air whilst running lots of software. The test took the middle 10 results from 20 iterations and averaged them. The APIs are interchangeable so tests were identical.\n\n\tgeoip-native:\taverage: 1540.3ms / million lookups\n\tgeoip-lite: \taverage: 8375.3ms / million lookups\n\n## Requires\n\n1. Comes with the [standard CSV database by MaxMind](http://www.maxmind.com/app/geolite) which may require updating.\n\n## How to use\n1. git clone https://github.com/benlowry/node-geoip-native\n2. cd node-geoip-native\n3. node benchmark.js\n\nor just ```npm install geoip-native```\n\n## Methods\n\nNode GeoIP Native provides methods for:\n\n1. ```lookup``` performs the lookup, takes the ip address as a parameter\n\n## Examples\n\n\tvar geoip = require(\"geoip-native\");\n\tvar ip = \"123.123.123.123\";\n\tgeoip.lookup(ip);\n\tconsole.log(\"country: \" + ip.name + \" / \" + ip.code);\n\n\t// in practice you'd want:\n\t// ip = request.headers[\"x-forwarded-for\"] || request.connection.remoteAddress,\n\n### What's missing\nBe neat to expand this to include cities.\n\n### License\nCopyright [Playtomic Inc](https://playtomic.com), 2012. Licensed under the MIT license. Certain portions may come from 3rd parties and carry their own licensing terms and are referenced where applicable.\n\nThis product includes GeoLite data created by MaxMind, available from http://www.maxmind.com\n", "_id": "geoip-native@0.0.2", "_from": "geoip-native" } diff --git a/test.js b/test.js deleted file mode 100644 index 53887e2..0000000 --- a/test.js +++ /dev/null @@ -1,76 +0,0 @@ -var geoip = require("./geoip.js"); -//var geoip = require("geoip-lite"); - -var test1 = true; - -function test() { - - var total = 0; - var numtests = 20; - var numiterations = 1000000; - - console.log("starting test: " + (test1 ? "geoip-native" : "geoip-lite")); - - for(var t=0; t 4 && t < 15) { - total += (finish - start); - console.log("time " + (finish - start)); - } - } - - console.log("average: " + (total / 10)); - - if(!test1) { - return; - } - - geoip = require("geoip-lite"); - test1 = false; - test(); -} - -setTimeout(test, 3000); - -/* -benchmark results: - - geoip-native - time 1500 - time 1824 - time 1526 - time 1495 - time 1543 - time 1509 - time 1511 - time 1492 - time 1505 - time 1498 - average: 1540.3 - - geoip-lite - time 8339 - time 8335 - time 8314 - time 8327 - time 8631 - time 8315 - time 8512 - time 8303 - time 8416 - time 8261 - average: 8375.3*/ \ No newline at end of file diff --git a/test/unit/lookupTest.js b/test/unit/lookupTest.js new file mode 100644 index 0000000..2f2e5d4 --- /dev/null +++ b/test/unit/lookupTest.js @@ -0,0 +1,128 @@ +// use Node-Unit (https://github.com/caolan/nodeunit) to run this test + +"use strict"; + +prepare_Tests_from_data_provider(); + +module.exports.setUp = function (callback) { + this.geoip = require("../../geoip-native.js"); + callback(); +}; + +module.exports.tearDown = function (callback) { + this.geoip = null; + callback(); +}; + +function createTestFunction(ipFrom, ipTo, int32From, int32To, countryName, countryCode) { + return function (test) { + var actual = this.geoip.lookup(ipFrom); + test.equals(actual.name, countryName); + test.equals(actual.code, countryCode); + test.equals(actual.ipstart, int32From); + test.equals(actual.ipend, int32To); + + var actual = this.geoip.lookup(ipTo); + test.equals(actual.name, countryName); + test.equals(actual.code, countryCode); + test.equals(actual.ipstart, int32From); + test.equals(actual.ipend, int32To); + + test.done(); + } +} + +function inMinimizedTestRange(maxLength, index) { + var testRanges_fromIndex_toIndex = [ + [0, 99], + [maxLength / 2 - 50, maxLength / 2 + 50], + [maxLength - 100, maxLength - 1] + ]; + var inRange = false; + for (var z = 0; z < testRanges_fromIndex_toIndex.length; z++) { + var testRange = testRanges_fromIndex_toIndex[z]; + inRange |= (testRange[0] <= index && index <= testRange[1]); + } + return inRange; +} + +function prepare_Tests_from_data_provider() { + + var records = load_CSV_file().split("\n"); + + + for (var i = 0; i < records.length; i++) { + var record = records[i].toString().trim(); + var dataParts = extractPartsFromCsv(record); + + // warning: testing the complete Range will take more than one minute ! + var shouldContinue = inMinimizedTestRange(records.length, i); + var recordContainsComma = dataParts.length > 6; + if (!shouldContinue && !recordContainsComma) continue; + if (record.length < 1) continue; + + var ipFrom = dataParts[0].toString().replace(/"/g, ""); + var ipTo = dataParts[1].toString().replace(/"/g, ""); + var int32From = parseInt(dataParts[2].toString().replace(/"/g, ""), 10); + var int32To = parseInt(dataParts[3].toString().replace(/"/g, ""), 10); + var countryCode = dataParts[4].toString().replace(/"/g, ""); + var countryName = dataParts[5].toString().replace(/"/g, ""); + + var testMethodName = ("test: record index=" + i + " ipFrom=" + ipFrom + " name=" + countryName); + module.exports[testMethodName] = createTestFunction(ipFrom, ipTo, int32From, int32To, countryName, countryCode); + } +} + +module.exports.test_unknown_low_ip_should_give_an_UNKNONW_country = function (test) { + + var ip = "0.1.2.3"; + + var actual = this.geoip.lookup(ip); + + test.equals(actual.name, "UNKNOWN"); + test.equals(actual.code, "N/A"); + test.equals(actual.ipstart, ip2int(ip)); + + test.done(); +} + +module.exports.test_unknown_high_ip_should_give_an_UNKNONW_country = function (test) { + + var ip = "254.255.254.255"; + + var actual = this.geoip.lookup(ip); + + test.equals(actual.name, "UNKNOWN"); + test.equals(actual.code, "N/A"); + test.equals(actual.ipstart, ip2int(ip)); + + test.done(); +} + +function ip2int(ipAsString) { + var parts = ipAsString.split(/[.]/); + return parseInt(parts[0], 10) * (1 << 24) + + parseInt(parts[1], 10) * (1 << 16) + + parseInt(parts[2], 10) * (1 << 8) + + parseInt(parts[3], 10); +} + +function load_CSV_file() { + var fs = require("fs"); + var data = fs.readFileSync(__dirname + "/../../GeoIPCountryWhois.csv") + var buffer = ""; + buffer += data.toString(); + return buffer; +} + +function extractPartsFromCsv(line) { + var matches = line.match(/("(?:[^"]|"")*"|[^,]*)/g); + var result = []; + for (var i = 0; i < matches.length; i++) { + var part = matches[i].replace(/"/g, "").trim(); + if (part.length > 0) { + result.push(part); + } + } + return result; +} \ No newline at end of file diff --git a/tools/benchmark.js b/tools/benchmark.js new file mode 100644 index 0000000..6649ace --- /dev/null +++ b/tools/benchmark.js @@ -0,0 +1,82 @@ +/* + @param geoipLibrary: the library from which to use the lookup method + */ +function benchmark_IP_lookup(geoipLibrary) { + + var total = 0; + var numtests = 20; + var numiterations = 1000000; + console.log("----------------------------"); + for (var t = 0; t < numtests; t++) { + + var start = new Date().getTime(); + + for (var i = 0; i < numiterations; i++) { + + var o1 = 1 + Math.round(Math.random() * 254); + var o2 = 1 + Math.round(Math.random() * 254); + var o3 = 1 + Math.round(Math.random() * 254); + var o4 = 1 + Math.round(Math.random() * 254); + var ip = o1 + "." + o2 + "." + o3 + "." + o4; + geoipLibrary.lookup(ip); + } + + var finish = new Date().getTime(); + + if (t > 4 && t < 15) { + total += (finish - start); + console.log("time " + (finish - start)); + } + } + console.log("average: " + (total / 10)); + console.log("----------------------------"); +} + +function run_all_benchmarks() { + + console.log("starting test: geoip-native"); + var start = new Date().getTime(); + var geoip = require("../geoip-native.js"); + var finish = new Date().getTime(); + console.log("loading geoip-native took " + (finish - start) + "ms"); + benchmark_IP_lookup(geoip); + + + console.log("starting test: geoip-lite"); + start = new Date().getTime(); + geoip = require("geoip-lite"); + finish = new Date().getTime(); + console.log("loading geoip-lite took " + (finish - start) + "ms"); + benchmark_IP_lookup(geoip); +} + +run_all_benchmarks(); + +/* + benchmark results: + + geoip-native + time 1500 + time 1824 + time 1526 + time 1495 + time 1543 + time 1509 + time 1511 + time 1492 + time 1505 + time 1498 + average: 1540.3 + + geoip-lite + time 8339 + time 8335 + time 8314 + time 8327 + time 8631 + time 8315 + time 8512 + time 8303 + time 8416 + time 8261 + average: 8375.3*/