Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/regex-list.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ var RegexList = {
isNumber: /^(\+|\-)?\$?[\d,]*\.?\d+((e|E)(\+|\-)\d+)?%?$/,

// accepts: 12, +123, -12,234
isInt: /^(\+|\-)?[\d,]+$/,
// no accept: 01, 001
isInt: /^(?!0)(\+|\-)?[\d,]+$/,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the only senario this doesn't handle is the actual 0


// accepts: 1.1234, -.1234, +2.34
isFloat: /^(\+|\-)?[\d,]*\.\d+?$/,

// accepts: 0124, 092342, 001203
isZeroPaddedNumber: /^(0)[\d]+$/,

// accepts: $1 $0.12 $1.12 $1,000.12 $1,000.12
isCurrency: /(?=.)^\$(([1-9][0-9]{0,2}(,[0-9]{3})*)|0)?(\.[0-9]{1,2})?$/,

Expand Down
8 changes: 6 additions & 2 deletions src/validator-map.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ VALIDATOR_MAP[DATA_TYPES.TIME] = Utils.buildRegexCheck('isTime');
// 1, 2, 3, +40, 15,121
const intRegexCheck = Utils.buildRegexCheck('isInt');
function isInt(value) {
if (intRegexCheck(value)) {
if (intRegexCheck(value) || value == '0') {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (intRegexCheck(value) || value == '0') {
if (intRegexCheck(value) || value === '0') {

Do we know that value is a string? If not, should we also check for value === 0?

var asNum = parseInt(value.toString().replace(/(\+|,)/g, ''), 10);
return asNum > Number.MIN_SAFE_INTEGER && asNum < Number.MAX_SAFE_INTEGER;
}
Expand All @@ -77,8 +77,12 @@ function isFloat(value) {
VALIDATOR_MAP[DATA_TYPES.FLOAT] = isFloat;

// 1, 2.2, 3.456789e+0
const zeroPaddedNumCheck = Utils.buildRegexCheck('isZeroPaddedNumber');
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Comment above should now move down?
Nit 2: It looks like this just avoid number parsing - should we explicitly output string in this case, rather than having it fall through the number check?

Copy link
Collaborator Author

@heshan0131 heshan0131 May 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, but that's not how computeColMeta currently designed. with the current implementation
type = allValidators.find(buildValidatorFinder(data, columnName)); The only way to have it return string is to return falsy on the number check, because string is at the bottom of the validator queue


VALIDATOR_MAP[DATA_TYPES.NUMBER] = function isNumeric(row) {
return !isNaN(row) || isInt(row) || isFloat(row);
return (
(!isNaN(row) && !zeroPaddedNumCheck(row)) || isInt(row) || isFloat(row)
);
};

// strings: '94101-10', 'San Francisco', 'Name'
Expand Down
23 changes: 23 additions & 0 deletions test/basic-types-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,29 @@ test('Analyzer: string validator', function t(assert) {
assert.end();
});

test('Analyzer: Zipcode', function t(assert) {
var arr = ['01059', '02280', '05003'].map(mapArr);
assert.equal(
Analyzer.computeColMeta(arr, [], {keepUnknowns: true})[0].type,
'ZIPCODE',
'Interprets 0 padded zipcode a string'
);

assert.end();
});

test('Analyzer: FIPS', function t(assert) {
// census tract
var arr = ['01001020801', '01001020801'].map(mapArr);
assert.equal(
Analyzer.computeColMeta(arr, [], {keepUnknowns: true})[0].type,
'STRING',
'Interprets 0 padded fips a string'
);

assert.end();
});

test('Analyzer: handling of unknown types', function t(assert) {
var arr = [];

Expand Down