Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions integration/analyzer_peliasPhrase.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,13 @@ module.exports.tests.functional = function(test, common){
assertAnalysis( 'address', '325 North 12th Street', expected2 );

// both terms should map to same tokens
var expected3 = [ '0:13509', '1:colfax', '2:ave', '2:avenue', '2:av', '3:s', '3:south', '3:see' ];
var expected3 = [ '0:13509', '1:colfax', '2:ave', '2:avenue', '2:av', '3:s', '3:south' ];
var expected4 = [ '0:13509', '1:colfax', '2:avenue', '2:ave', '2:av', '3:south', '3:s' ];
assertAnalysis( 'address', '13509 Colfax Ave S', expected3 );
assertAnalysis( 'address', '13509 Colfax Avenue South', expected4 );

// both terms should map to same tokens
var expected5 = [ '0:100', '1:s', '1:south', '1:see', '2:lake', '2:lk', '3:dr', '3:drive' ];
var expected5 = [ '0:100', '1:s', '1:south', '2:lake', '2:lk', '3:dr', '3:drive' ];
var expected6 = [ '0:100', '1:south', '1:s', '2:lake', '2:lk', '3:drive', '3:dr' ];
assertAnalysis( 'address', '100 S Lake Dr', expected5 );
assertAnalysis( 'address', '100 South Lake Drive', expected6 );
Expand Down
6 changes: 1 addition & 5 deletions integration/analyzer_peliasStreet.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
// validate analyzer is behaving as expected

var tape = require('tape'),
elastictest = require('elastictest'),
schema = require('../schema'),
punctuation = require('../punctuation');
const elastictest = require('elastictest')

module.exports.tests = {};

Expand Down
21 changes: 20 additions & 1 deletion integration/run.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ const common = {
}
return positions;
},
// the 'analyze' assertion indexes $text using the analyzer specified
// in the $analyzer var and then checks that all of the tokens in
// $expected are contained within the index.
// note: previously it asserted that $expected was deeply equal to the
// tokens in the index, now it only asserts that they are all intersect, the
// index may however contain additional tokens not specified in $expected.
analyze: (suite, t, analyzer, comment, text, expected) => {
suite.assert(done => {
suite.client.indices.analyze({
Expand All @@ -67,13 +73,26 @@ const common = {
}
}, (err, res) => {
if (err) { console.error(err); }
t.deepEqual(common.bucketTokens(res.tokens), common.bucketTokens(expected), comment);
t.deepEqual({}, removeIndexTokensFromExpectedTokens(
common.bucketTokens(res.tokens),
common.bucketTokens(expected)
), comment);
done();
});
});
}
};

function removeIndexTokensFromExpectedTokens(index, expected){
for (var pos in index) {
if (!_.isArray(expected[pos])) { continue; }
expected[pos] = expected[pos].filter(token => !index[pos].includes(token));
if (_.isEmpty(expected[pos])) { delete expected[pos]; }
}

return expected;
}

var tests = [
require('./validate.js'),
require('./dynamic_templates.js'),
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"@hapi/joi": "^16.1.8",
"colors": "^1.1.2",
"elasticsearch": "^16.0.0",
"glob": "^7.1.6",
"lodash": "^4.17.15",
"pelias-config": "^4.5.0",
"pelias-logger": "^1.3.0",
Expand Down
89 changes: 48 additions & 41 deletions settings.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,7 @@
const _ = require('lodash');
const fs = require('fs');
const path = require('path');
const peliasConfig = require('pelias-config');
const punctuation = require('./punctuation');
const synonymParser = require('./synonyms/parser');
const synonymLinter = require('./synonyms/linter');

// load synonyms from disk
const synonyms = fs.readdirSync(path.join(__dirname, 'synonyms'))
.sort()
.filter( f => f.match(/\.txt$/) )
.reduce(( acc, cur ) => {
acc[cur.replace('.txt', '')] = synonymParser(
path.join(__dirname, 'synonyms', cur)
);
return acc;
}, {});

// emit synonym warnings
synonymLinter(synonyms);
const synonyms = require('./synonyms/loader').load();

require('./configValidation').validate(peliasConfig.generate());

Expand Down Expand Up @@ -50,9 +33,9 @@ function generate(){
"char_filter" : ["punctuation", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"custom_admin",
"admin_synonyms_multiplexer",
"icu_folding",
"word_delimiter",
"unique_only_same_position",
"notnull",
Expand All @@ -65,12 +48,9 @@ function generate(){
"char_filter" : ["punctuation", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"custom_name",
"street_suffix",
"directionals",
"ampersand",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"peliasOneEdgeGramFilter",
Expand All @@ -84,9 +64,9 @@ function generate(){
"tokenizer": "peliasTokenizer",
"char_filter": ["punctuation", "nfkc_normalizer"],
"filter": [
"icu_folding",
"lowercase",
"trim",
"icu_folding",
"remove_ordinals",
"removeAllZeroNumericPrefix",
"unique_only_same_position",
Expand All @@ -101,10 +81,7 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"ampersand",
"custom_name",
"street_suffix",
"directionals",
"name_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"unique_only_same_position",
Expand All @@ -115,23 +92,23 @@ function generate(){
"peliasZip": {
"type": "custom",
"tokenizer":"keyword",
"char_filter" : ["alphanumeric"],
"char_filter": ["alphanumeric", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"icu_folding",
"unique_only_same_position",
"notnull"
]
},
"peliasUnit": {
"type": "custom",
"tokenizer":"keyword",
"char_filter" : ["alphanumeric"],
"char_filter": ["alphanumeric", "nfkc_normalizer"],
"filter": [
"lowercase",
"icu_folding",
"trim",
"icu_folding",
"unique_only_same_position",
"notnull"
]
Expand All @@ -149,9 +126,7 @@ function generate(){
"lowercase",
"trim",
"remove_duplicate_spaces",
"custom_street",
"street_suffix",
"directionals",
"street_synonyms_multiplexer",
"icu_folding",
"remove_ordinals",
"trim",
Expand All @@ -162,6 +137,37 @@ function generate(){
}
},
"filter" : {
"street_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_street",
"synonyms/personal_titles",
"synonyms/streets",
"synonyms/directionals"
]
},
"name_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_name",
"synonyms/personal_titles",
"synonyms/place_names",
"synonyms/streets",
"synonyms/directionals",
"synonyms/punctuation"
]
},
"admin_synonyms_multiplexer": {
"type": "multiplexer",
"preserve_original": false,
"filters": [
"synonyms/custom_admin",
"synonyms/personal_titles",
"synonyms/place_names"
]
},
"notnull" :{
"type" : "length",
"min" : 1
Expand Down Expand Up @@ -219,13 +225,14 @@ function generate(){
};

// dynamically create filters for all synonym files in the ./synonyms directory.
// each filter is given the same name as the file, minus the extension.
_.each(synonyms, (synonym, key) => {
settings.analysis.filter[key] = {
// each filter is given the same name as the file, paths separators are replaced with
// underscores and the file extension is removed.
_.each(synonyms, (synonym, name) => {
settings.analysis.filter[`synonyms/${name}`] = {
"type": "synonym",
"synonyms": !_.isEmpty(synonym) ? synonym : ['']
};
})
});

// Merge settings from pelias/config
settings = _.merge({}, settings, _.get(config, 'elasticsearch.settings', {}));
Expand Down
6 changes: 0 additions & 6 deletions synonyms/custom_admin.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,3 @@
# foo => foo bar, baz
#
# =============================================================================

saint,st
sainte,ste
fort,ft
mount,mt
mont,mt
Loading