From 483349fbe67f5514e0ccded018c87a519389f2bf Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 29 Sep 2025 13:00:08 +0200 Subject: [PATCH] feat(patch): add patch post-processing script --- Document.js | 1 + package.json | 3 +- post/patch.js | 86 +++++++++++++++++++++++++++ test/document/post.js | 5 +- test/post/fixtures/patch/invalid.json | 1 + test/post/fixtures/patch/valid.json | 7 +++ test/post/patch.js | 51 ++++++++++++++++ test/run.js | 1 + 8 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 post/patch.js create mode 100644 test/post/fixtures/patch/invalid.json create mode 100644 test/post/fixtures/patch/valid.json create mode 100644 test/post/patch.js diff --git a/Document.js b/Document.js index 938b11a..c3ba653 100644 --- a/Document.js +++ b/Document.js @@ -46,6 +46,7 @@ function Document( source, layer, source_id ){ this.addPostProcessingScript( require('./post/deduplication') ); this.addPostProcessingScript( require('./post/language_field_trimming') ); this.addPostProcessingScript( require('./post/popularity') ); + this.addPostProcessingScript( require('./post/patch') ); // mandatory properties this.setSource( source ); diff --git a/package.json b/package.json index fd9d7ad..4e40070 100644 --- a/package.json +++ b/package.json @@ -28,14 +28,15 @@ "node": ">= 10.0.0" }, "dependencies": { + "glob": "^11.0.3", "lodash": "^4.6.1", "pelias-config": "^6.0.0", "through2": "^3.0.0" }, "devDependencies": { - "stream-mock": "^2.0.3", "precommit-hook": "^3.0.0", "proxyquire": "^2.0.0", + "stream-mock": "^2.0.3", "tap-spec": "^5.0.0", "tape": "^5.0.0" }, diff --git a/post/patch.js b/post/patch.js new file mode 100644 index 0000000..a2e2de6 --- /dev/null +++ b/post/patch.js @@ -0,0 +1,86 @@ +/** + * Document patch post-processing script applies targeted modifications to + * documents based on their GID (global identifier). + * + * Loads JSON patch definition files containing 'set' operations that modify + * specific document properties using lodash's _.set() method. + * + * Example patch file format: + * { + * "geonames:county:3333219": { + * "set": { + * "name.default": ["Los Angeles County"] + * } + * } + * } + */ +const _ = require('lodash'); +const fs = require('fs'); +const { globSync } = require('glob'); + +const setOperations = new Map(); + +function patch( doc ){ + // nothing to do + if( setOperations.size === 0 ){ return; } + + // load any 'set' replacements + const replacements = setOperations.get(doc.getGid()); + if (!_.isPlainObject(replacements)) { return; } + + // apply replacements using _.set() + _.forEach(replacements, (value, key) => _.set(doc, key, value)); +} + +function load() { + setOperations.clear(); + + const config = require('pelias-config').generate(); + if (!_.isPlainObject(config)) { return; } + + const patterns = _.get(config, 'imports.patch.files', []); + if (!_.isArray(patterns)) { return; } + + patterns.forEach(pattern => { + const files = globSync(pattern, { nodir: true, absolute: true }); + if (!_.isArray(files)) { + return console.error(`patch: pattern '${pattern}': matched zero files`); + } + + files.forEach(filename => { + if (!_.isString(filename) || !filename.endsWith('.json')) { + return console.error(`patch: file ${filename}: invalid filename`); + } + + let json = {}; + try { + json = JSON.parse(fs.readFileSync(filename, 'utf8')); + } catch (e) { + return console.error(`patch: failed to load or parse JSON file ${filename}:`, e.message); + } + + if (!_.isPlainObject(json)) { + return console.error(`patch: file ${filename}: invalid definition`); + } + _.forEach(json, (ops, gid) => { + if (_.has(ops, 'set')) { + const setOps = _.get(ops, 'set'); + if (!_.isPlainObject(setOps)) { + return console.error(`patch: file ${filename}: invalid set ops definition`); + } + setOperations.set(gid, setOps); + } + }); + }); + }); +} + +// load patch definition files +try { + load(); +} catch (e) { + console.error('patch: failed to load patch definition files'); +} + +patch.load = load; // export load() for testing +module.exports = patch; \ No newline at end of file diff --git a/test/document/post.js b/test/document/post.js index be5a53b..1664440 100644 --- a/test/document/post.js +++ b/test/document/post.js @@ -7,9 +7,10 @@ const zero_prefixed_house_numbers = require('../../post/zero_prefixed_house_numb const deduplication = require('../../post/deduplication'); const language_field_trimming = require('../../post/language_field_trimming'); const popularity = require('../../post/popularity'); +const patch = require('../../post/patch'); const DEFAULT_SCRIPTS = [ - intersections, seperable_street_names, alphanumeric_postcodes, - zero_prefixed_house_numbers, deduplication, language_field_trimming, popularity + intersections, seperable_street_names, alphanumeric_postcodes, zero_prefixed_house_numbers, + deduplication, language_field_trimming, popularity, patch ]; module.exports.tests = {}; diff --git a/test/post/fixtures/patch/invalid.json b/test/post/fixtures/patch/invalid.json new file mode 100644 index 0000000..f260a90 --- /dev/null +++ b/test/post/fixtures/patch/invalid.json @@ -0,0 +1 @@ +{invalid json \ No newline at end of file diff --git a/test/post/fixtures/patch/valid.json b/test/post/fixtures/patch/valid.json new file mode 100644 index 0000000..2875c53 --- /dev/null +++ b/test/post/fixtures/patch/valid.json @@ -0,0 +1,7 @@ +{ + "geonames:county:3333219": { + "set": { + "name.default": ["Los Angeles County"] + } + } +} \ No newline at end of file diff --git a/test/post/patch.js b/test/post/patch.js new file mode 100644 index 0000000..0409d85 --- /dev/null +++ b/test/post/patch.js @@ -0,0 +1,51 @@ +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const Document = require('../../Document'); +const patch = require('../../post/patch'); + +module.exports.tests = {}; + +module.exports.tests.noop = function (test) { + test('empty setOperations - no-op', (t) => { + var doc = new Document('geonames', 'county', '3333219'); + patch.load(); + patch(doc); + t.deepEquals(doc.name, {}); + t.end(); + }); +}; + +module.exports.tests.foo = function (test) { + test('valid setOperations - update name', (t) => { + var doc = new Document('geonames', 'county', '3333219'); + useFixtures(() => { + patch.load(); + patch(doc); + t.deepEquals(doc.name, { default: [ 'Los Angeles County' ] }); + t.end(); + }); + }); +}; + +module.exports.all = function (tape, common) { + function test(name, testFunction) { + return tape('post/patch: ' + name, testFunction); + } + for (var testCase in module.exports.tests) { + module.exports.tests[testCase](test, common); + } +}; + +// convenience test function loads all fixtures in the fixtures/patch directory +function useFixtures(fn) { + const files = [path.join(__dirname, 'fixtures', 'patch', '*')]; + + const peliasConfig = path.join(os.tmpdir(), `pelias-${Date.now()}.json`); + fs.writeFileSync(peliasConfig, JSON.stringify({ imports: { patch: { files } } })); + + process.env.PELIAS_CONFIG = peliasConfig; + fn(); + delete process.env.PELIAS_CONFIG; + fs.unlinkSync(peliasConfig); +} \ No newline at end of file diff --git a/test/run.js b/test/run.js index 4d41d41..f9eb663 100644 --- a/test/run.js +++ b/test/run.js @@ -29,6 +29,7 @@ const tests = [ require('./post/seperable_street_names.js'), require('./post/language_field_trimming.js'), require('./post/popularity.js'), + require('./post/patch.js'), require('./DocumentMapperStream.js'), require('./util/transform.js'), require('./util/valid.js'),