From 9b4257be1f79887a5cec1d3154068640cf66eb7b Mon Sep 17 00:00:00 2001 From: Shan He Date: Tue, 23 May 2023 14:05:09 -0700 Subject: [PATCH] [Feat] Detect zero padded numbers as string Signed-off-by: Shan He --- src/regex-list.js | 6 +++++- src/validator-map.js | 8 ++++++-- test/basic-types-test.js | 23 +++++++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/src/regex-list.js b/src/regex-list.js index 939f5da..638c9b2 100644 --- a/src/regex-list.js +++ b/src/regex-list.js @@ -26,11 +26,15 @@ var RegexList = { isNumber: /^(\+|\-)?\$?[\d,]*\.?\d+((e|E)(\+|\-)\d+)?%?$/, // accepts: 12, +123, -12,234 - isInt: /^(\+|\-)?[\d,]+$/, + // no accept: 01, 001 + isInt: /^(?!0)(\+|\-)?[\d,]+$/, // accepts: 1.1234, -.1234, +2.34 isFloat: /^(\+|\-)?[\d,]*\.\d+?$/, + // accepts: 0124, 092342, 001203 + isZeroPaddedNumber: /^(0)[\d]+$/, + // accepts: $1 $0.12 $1.12 $1,000.12 $1,000.12 isCurrency: /(?=.)^\$(([1-9][0-9]{0,2}(,[0-9]{3})*)|0)?(\.[0-9]{1,2})?$/, diff --git a/src/validator-map.js b/src/validator-map.js index 832c3d3..55a691e 100644 --- a/src/validator-map.js +++ b/src/validator-map.js @@ -60,7 +60,7 @@ VALIDATOR_MAP[DATA_TYPES.TIME] = Utils.buildRegexCheck('isTime'); // 1, 2, 3, +40, 15,121 const intRegexCheck = Utils.buildRegexCheck('isInt'); function isInt(value) { - if (intRegexCheck(value)) { + if (intRegexCheck(value) || value == '0') { var asNum = parseInt(value.toString().replace(/(\+|,)/g, ''), 10); return asNum > Number.MIN_SAFE_INTEGER && asNum < Number.MAX_SAFE_INTEGER; } @@ -77,8 +77,12 @@ function isFloat(value) { VALIDATOR_MAP[DATA_TYPES.FLOAT] = isFloat; // 1, 2.2, 3.456789e+0 +const zeroPaddedNumCheck = Utils.buildRegexCheck('isZeroPaddedNumber'); + VALIDATOR_MAP[DATA_TYPES.NUMBER] = function isNumeric(row) { - return !isNaN(row) || isInt(row) || isFloat(row); + return ( + (!isNaN(row) && !zeroPaddedNumCheck(row)) || isInt(row) || isFloat(row) + ); }; // strings: '94101-10', 'San Francisco', 'Name' diff --git a/test/basic-types-test.js b/test/basic-types-test.js index af08568..91286ca 100644 --- a/test/basic-types-test.js +++ b/test/basic-types-test.js @@ -319,6 +319,29 @@ test('Analyzer: string validator', function t(assert) { assert.end(); }); +test('Analyzer: Zipcode', function t(assert) { + var arr = ['01059', '02280', '05003'].map(mapArr); + assert.equal( + Analyzer.computeColMeta(arr, [], {keepUnknowns: true})[0].type, + 'ZIPCODE', + 'Interprets 0 padded zipcode a string' + ); + + assert.end(); +}); + +test('Analyzer: FIPS', function t(assert) { + // census tract + var arr = ['01001020801', '01001020801'].map(mapArr); + assert.equal( + Analyzer.computeColMeta(arr, [], {keepUnknowns: true})[0].type, + 'STRING', + 'Interprets 0 padded fips a string' + ); + + assert.end(); +}); + test('Analyzer: handling of unknown types', function t(assert) { var arr = [];