diff --git a/integration/analyzer_peliasPhrase.js b/integration/analyzer_peliasPhrase.js index e1002255..916fe0ba 100644 --- a/integration/analyzer_peliasPhrase.js +++ b/integration/analyzer_peliasPhrase.js @@ -78,13 +78,13 @@ module.exports.tests.functional = function(test, common){ assertAnalysis( 'address', '325 North 12th Street', expected2 ); // both terms should map to same tokens - var expected3 = [ '0:13509', '1:colfax', '2:ave', '2:avenue', '2:av', '3:s', '3:south', '3:see' ]; + var expected3 = [ '0:13509', '1:colfax', '2:ave', '2:avenue', '2:av', '3:s', '3:south' ]; var expected4 = [ '0:13509', '1:colfax', '2:avenue', '2:ave', '2:av', '3:south', '3:s' ]; assertAnalysis( 'address', '13509 Colfax Ave S', expected3 ); assertAnalysis( 'address', '13509 Colfax Avenue South', expected4 ); // both terms should map to same tokens - var expected5 = [ '0:100', '1:s', '1:south', '1:see', '2:lake', '2:lk', '3:dr', '3:drive' ]; + var expected5 = [ '0:100', '1:s', '1:south', '2:lake', '2:lk', '3:dr', '3:drive' ]; var expected6 = [ '0:100', '1:south', '1:s', '2:lake', '2:lk', '3:drive', '3:dr' ]; assertAnalysis( 'address', '100 S Lake Dr', expected5 ); assertAnalysis( 'address', '100 South Lake Drive', expected6 ); diff --git a/integration/analyzer_peliasStreet.js b/integration/analyzer_peliasStreet.js index 705cb659..6ad9355e 100644 --- a/integration/analyzer_peliasStreet.js +++ b/integration/analyzer_peliasStreet.js @@ -1,9 +1,5 @@ // validate analyzer is behaving as expected - -var tape = require('tape'), - elastictest = require('elastictest'), - schema = require('../schema'), - punctuation = require('../punctuation'); +const elastictest = require('elastictest') module.exports.tests = {}; diff --git a/integration/run.js b/integration/run.js index 1daccd22..5b312aca 100644 --- a/integration/run.js +++ b/integration/run.js @@ -57,6 +57,12 @@ const common = { } return positions; }, + // the 'analyze' assertion indexes $text using the analyzer specified + // in the $analyzer var and then checks that all of the tokens in + // $expected are contained within the index. + // note: previously it asserted that $expected was deeply equal to the + // tokens in the index, now it only asserts that they are all intersect, the + // index may however contain additional tokens not specified in $expected. analyze: (suite, t, analyzer, comment, text, expected) => { suite.assert(done => { suite.client.indices.analyze({ @@ -67,13 +73,26 @@ const common = { } }, (err, res) => { if (err) { console.error(err); } - t.deepEqual(common.bucketTokens(res.tokens), common.bucketTokens(expected), comment); + t.deepEqual({}, removeIndexTokensFromExpectedTokens( + common.bucketTokens(res.tokens), + common.bucketTokens(expected) + ), comment); done(); }); }); } }; +function removeIndexTokensFromExpectedTokens(index, expected){ + for (var pos in index) { + if (!_.isArray(expected[pos])) { continue; } + expected[pos] = expected[pos].filter(token => !index[pos].includes(token)); + if (_.isEmpty(expected[pos])) { delete expected[pos]; } + } + + return expected; +} + var tests = [ require('./validate.js'), require('./dynamic_templates.js'), diff --git a/package.json b/package.json index cbd5d276..1a749ec5 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "@hapi/joi": "^16.1.8", "colors": "^1.1.2", "elasticsearch": "^16.0.0", + "glob": "^7.1.6", "lodash": "^4.17.15", "pelias-config": "^4.5.0", "pelias-logger": "^1.3.0", diff --git a/settings.js b/settings.js index 43b1ce31..dca59cd3 100644 --- a/settings.js +++ b/settings.js @@ -1,24 +1,7 @@ const _ = require('lodash'); -const fs = require('fs'); -const path = require('path'); const peliasConfig = require('pelias-config'); const punctuation = require('./punctuation'); -const synonymParser = require('./synonyms/parser'); -const synonymLinter = require('./synonyms/linter'); - -// load synonyms from disk -const synonyms = fs.readdirSync(path.join(__dirname, 'synonyms')) - .sort() - .filter( f => f.match(/\.txt$/) ) - .reduce(( acc, cur ) => { - acc[cur.replace('.txt', '')] = synonymParser( - path.join(__dirname, 'synonyms', cur) - ); - return acc; - }, {}); - -// emit synonym warnings -synonymLinter(synonyms); +const synonyms = require('./synonyms/loader').load(); require('./configValidation').validate(peliasConfig.generate()); @@ -50,9 +33,9 @@ function generate(){ "char_filter" : ["punctuation", "nfkc_normalizer"], "filter": [ "lowercase", - "icu_folding", "trim", - "custom_admin", + "admin_synonyms_multiplexer", + "icu_folding", "word_delimiter", "unique_only_same_position", "notnull", @@ -65,12 +48,9 @@ function generate(){ "char_filter" : ["punctuation", "nfkc_normalizer"], "filter": [ "lowercase", - "icu_folding", "trim", - "custom_name", - "street_suffix", - "directionals", - "ampersand", + "name_synonyms_multiplexer", + "icu_folding", "remove_ordinals", "removeAllZeroNumericPrefix", "peliasOneEdgeGramFilter", @@ -84,9 +64,9 @@ function generate(){ "tokenizer": "peliasTokenizer", "char_filter": ["punctuation", "nfkc_normalizer"], "filter": [ - "icu_folding", "lowercase", "trim", + "icu_folding", "remove_ordinals", "removeAllZeroNumericPrefix", "unique_only_same_position", @@ -101,10 +81,7 @@ function generate(){ "lowercase", "trim", "remove_duplicate_spaces", - "ampersand", - "custom_name", - "street_suffix", - "directionals", + "name_synonyms_multiplexer", "icu_folding", "remove_ordinals", "unique_only_same_position", @@ -115,11 +92,11 @@ function generate(){ "peliasZip": { "type": "custom", "tokenizer":"keyword", - "char_filter" : ["alphanumeric"], + "char_filter": ["alphanumeric", "nfkc_normalizer"], "filter": [ "lowercase", - "icu_folding", "trim", + "icu_folding", "unique_only_same_position", "notnull" ] @@ -127,11 +104,11 @@ function generate(){ "peliasUnit": { "type": "custom", "tokenizer":"keyword", - "char_filter" : ["alphanumeric"], + "char_filter": ["alphanumeric", "nfkc_normalizer"], "filter": [ "lowercase", - "icu_folding", "trim", + "icu_folding", "unique_only_same_position", "notnull" ] @@ -149,9 +126,7 @@ function generate(){ "lowercase", "trim", "remove_duplicate_spaces", - "custom_street", - "street_suffix", - "directionals", + "street_synonyms_multiplexer", "icu_folding", "remove_ordinals", "trim", @@ -162,6 +137,37 @@ function generate(){ } }, "filter" : { + "street_synonyms_multiplexer": { + "type": "multiplexer", + "preserve_original": false, + "filters": [ + "synonyms/custom_street", + "synonyms/personal_titles", + "synonyms/streets", + "synonyms/directionals" + ] + }, + "name_synonyms_multiplexer": { + "type": "multiplexer", + "preserve_original": false, + "filters": [ + "synonyms/custom_name", + "synonyms/personal_titles", + "synonyms/place_names", + "synonyms/streets", + "synonyms/directionals", + "synonyms/punctuation" + ] + }, + "admin_synonyms_multiplexer": { + "type": "multiplexer", + "preserve_original": false, + "filters": [ + "synonyms/custom_admin", + "synonyms/personal_titles", + "synonyms/place_names" + ] + }, "notnull" :{ "type" : "length", "min" : 1 @@ -219,13 +225,14 @@ function generate(){ }; // dynamically create filters for all synonym files in the ./synonyms directory. - // each filter is given the same name as the file, minus the extension. - _.each(synonyms, (synonym, key) => { - settings.analysis.filter[key] = { + // each filter is given the same name as the file, paths separators are replaced with + // underscores and the file extension is removed. + _.each(synonyms, (synonym, name) => { + settings.analysis.filter[`synonyms/${name}`] = { "type": "synonym", "synonyms": !_.isEmpty(synonym) ? synonym : [''] }; - }) + }); // Merge settings from pelias/config settings = _.merge({}, settings, _.get(config, 'elasticsearch.settings', {})); diff --git a/synonyms/custom_admin.txt b/synonyms/custom_admin.txt index 24441522..a61474de 100644 --- a/synonyms/custom_admin.txt +++ b/synonyms/custom_admin.txt @@ -23,9 +23,3 @@ # foo => foo bar, baz # # ============================================================================= - -saint,st -sainte,ste -fort,ft -mount,mt -mont,mt diff --git a/synonyms/custom_name.txt b/synonyms/custom_name.txt index ad9fe7d3..a61474de 100644 --- a/synonyms/custom_name.txt +++ b/synonyms/custom_name.txt @@ -23,152 +23,3 @@ # foo => foo bar, baz # # ============================================================================= - -# English -brothers,bros -cape,cpe,cp -city,cty -creek,cr,crk -county,co,cty -downs,downes,dwns -flats,flts -forest,frst,fst -fort,ft -fords,frds -fork,frk -forks,frks -forge,frg -forges,frgs -glens,glns -great,grt,gt -greater,grtr,gtr -greens,grns -groves,grvs -heights,hghts,hgts,hieghts,ht,hts,hgths -international,intl -lake,lk -lakes,lks -little,ltl,lttl,littl,litl -lock,lck -locks,lcks -lower,low,lwr,lr -medical,med -memorial,mem -middle,mid,midl -military,mil -mount,mt,mnt -mountain,mtn -mountains,mtns -municipal,mun,mpal -national,natl -neck,nck -orchard,orch -paradise,pde,pdse -port,pt,prt -park,pk,prk -rear of,r / o,r o -river,riv,rvr,rivr -slope,slpe,slp -springs,spgs,sprngs -stream,strm,stm -triangle,tri -upper,up,upr,uppr -village,vlg,vlge,vilg,vilge -ville,vl -villages,vlgs -wood,wd -woods,wds - -# French -baston,bast -bourg,brg -charmille,chi -colline,coli -collines,colis -enceinte,en -fleuve,fl -grand,gd,gr,g -mont,mt,mnt -petite,p,pt -porche,pch -rivière,riviere,riv -village,vge -villages,vges - -# German -deutsch,dt -ehemalige,ehem -gebruder,gebr -haltestelle,hst -hinter,hint,ht -internationale,int -kleine,kl -kleiner,kl -kleines,kl -kogel,kg -niedere,nd -rhein,rh -see,s -spitze,sp -vordere,vd,vord -wiese,ws - -# Spanish -abril,abr,abl -agosto,ag,agto,agt -altura,alt -alturas,alts -arboleda,arb -arrabal,arral -bosque,bsq -brigada,brig -cabo,cbo -campo,cpo,cmpo -campos,cpos,cmpos -canal,cnl -centro,cntro,ctro -cerro,crro -corral,crral -corralillo,crrlo -diseminado,disem -enero,en,eno,ene,en o -diciembre,dic,dicbre,dice,dbre,10bre,10 bre,xbre,x bre -febrero,febo,febro,febr,feb -gobierno,gob,gobno -grande,gr -guerra,ga -independencia,indep -infantería,infanteria,infa,ynfa,ynfanta -jardín,jdin,jard,jardin -jardínes,jdins,jards,jardines -junio,jun,jn -julio,jul,jl -lago,lg -lagos,lgs -laguna,lgna -llanura,llnra -llanuras,llnras -marzo,mzo,mar -mayo,my,may -militar,milr -monte,mt,mte,mnte -montes,mts,mtes,mntes,mnts -nacional,nal,nacl -noviembre,nbre,nvre,nove,novre,novbre,9bre,9 bre -octubre,oct,octbre,octe,8bre,8 bre -portillo,ptilo,ptllo -prado,prdo -primeros,pros -privada,priv -punta,pnta -quebrada,qbda -real,rl -republica,rep -revolucion,rev -ribera,ribr -río,rio -septiembre,setbre,sepe,sepbre,7bre,7 re,7re,7 bre,sep,set -sierra,srra -valle,vlle -volcan,vlcn -voluntarios,voluntos diff --git a/synonyms/directionals.txt b/synonyms/directionals.txt deleted file mode 100644 index a427ff2e..00000000 --- a/synonyms/directionals.txt +++ /dev/null @@ -1,11 +0,0 @@ -# note: more descriptive tokens must come before less descriptive ones -# eg: 'southwest' must come before 'west' else 'southwest foo' -> 'southw foo' - -southwest, sw -southeast, se -northwest, nw -northeast, ne -north, n -south, s -east, e -west, w diff --git a/synonyms/directionals/de.txt b/synonyms/directionals/de.txt new file mode 100644 index 00000000..241d1fab --- /dev/null +++ b/synonyms/directionals/de.txt @@ -0,0 +1,33 @@ +nord, n +nördlich, nördl, nordl, nordlich, noerdlich +nördliche, nordliche, noerdliche +nördlicher, nordlicher, noerdlicher +nördliches, nordliches, noerdliches +nordost, no +nordöstlich, nordostlich, nordoestlich +nordwest, nw +ost, o +östlich, östl, ostlich, ostl, oestlich +östliche, ostliche, oestliche +östlicher, ostlicher, oestlicher +östliches, ostliches, oestliches +süd, s, sud, sued +süden, suden, sueden +südlich, südl, sudl, sudlich, suedlich +südliche, sudliche, suedliche +südlicher, sudlicher, suedlicher +südliches, sudliches, suedliches +südost, so, sudost, suedost +südosten, sudosten, suedosten +südöstlich, sudostlich, suedoestlich +südöstliche, sudostliche, suedoestliche +südöstlicher, sudostlicher, suedoestlicher +südöstliches, sudostliches, suedoestliches +südwest, sw, sudwest, suedwest +südwesten, sudwesten, suedwesten +südwestlich, sudwestlich, suedwestlich +südwestliche, sudwestliche, suedwestliche +südwestlicher, sudwestlicher, suedwestlicher +südwestliches, sudwestliches, suedwestliches +west, w +westlich, westl diff --git a/synonyms/directionals/en.txt b/synonyms/directionals/en.txt new file mode 100644 index 00000000..01738428 --- /dev/null +++ b/synonyms/directionals/en.txt @@ -0,0 +1,20 @@ +center, ctr +central, cn, ctrl, cntrl +centre, ctr +east, e +eastern, eastrn, estrn, estn +lower, lowr, lwr +middle, mdl, midle, mddl +north, n, nrt, nrth, nth, norh, nort, no +northeast, northe, neast, ne +northeastern, northeastrn, northestrn, northestn, neastern +northwest, northw, northwst, nwest, nw +northwestern, northwestrn, northwstrn, northwstn +south, s, so, sth +southeast, southe, seast, se +southeastern, southeastrn, southestrn, southestn, seastern +southwest, southw, southwst, swest, sw +southwestern, southwestrn, southwstrn, southwstn, swestern +upper, uppr, upr, up +west, w, wst +western, westrn, wstrn, wstn diff --git a/synonyms/directionals/es.txt b/synonyms/directionals/es.txt new file mode 100644 index 00000000..79e089e3 --- /dev/null +++ b/synonyms/directionals/es.txt @@ -0,0 +1,10 @@ +este, e +norte, n +noreste, nordeste, ne +noroeste, nw +oeste, w +oriente, ote +poniente, pte +sur, s +sureste, se +suroeste, sw diff --git a/synonyms/directionals/fr.txt b/synonyms/directionals/fr.txt new file mode 100644 index 00000000..736b312a --- /dev/null +++ b/synonyms/directionals/fr.txt @@ -0,0 +1,6 @@ +central, ctrl +centre, ctre, cntre +est, e +nord, n +ouest, o +sud, s diff --git a/synonyms/linter.js b/synonyms/linter.js index 930cb886..f4417899 100644 --- a/synonyms/linter.js +++ b/synonyms/linter.js @@ -40,7 +40,8 @@ function linter(synonyms) { letterCasing(line, logprefix, tokens); tokensSanityCheck(line, logprefix, tokens); - // multiWordCheck(line, logprefix, tokens); + multiWordCheck(line, logprefix, tokens); + // tokenLengthCheck(line, logprefix, tokens); }) }) } @@ -65,10 +66,18 @@ function tokensSanityCheck(line, logprefix, tokens) { } } -function multiWordCheck(line, tokens) { +function multiWordCheck(line, logprefix, tokens) { _.each(tokens, token => { if (/\s/.test(token)){ - logger.warn(`multi word synonyms may cause issues with phrase queries:`, token); + logger.warn(`${logprefix} multi word synonyms may cause issues with phrase queries:`, token); + } + }); +} + +function tokenLengthCheck(line, logprefix, tokens) { + _.each(tokens, token => { + if (token.length <= 1) { + logger.warn(`${logprefix} short token:`, token); } }); } diff --git a/synonyms/loader.js b/synonyms/loader.js new file mode 100644 index 00000000..19918a0b --- /dev/null +++ b/synonyms/loader.js @@ -0,0 +1,38 @@ +const path = require('path'); +const glob = require('glob'); +const parse = require('./parser'); +const lint = require('./linter'); + +function load() { + + // map containing all synonyms + const synonyms = {}; + + // recursively find all files ending with .txt in this directory + const basepath = __dirname; + const pattern = path.join(basepath, '**', '*.txt'); + const files = glob.sync(pattern, { realpath: true }); + + // load synonyms files and parse each + files.forEach(filepath => { + // for directories of synonyms we use the directory name as the key. + // nested directories will have their path separators normalized to '/'. + let key = path.dirname(path.relative(basepath, filepath)).split(path.sep).join('/'); + + // for synonym files at the root of the synonyms dir we use the basename as the key. + if (key === '.') { + key = path.basename(filepath).replace('.txt', ''); + } + + if (!synonyms.hasOwnProperty(key)) { synonyms[key] = []; } + synonyms[key] = synonyms[key].concat(parse(filepath)); + }); + + // emit synonym warnings + lint(synonyms); + + // return all synonyms + return synonyms; +} + +module.exports.load = load; diff --git a/synonyms/parser.js b/synonyms/parser.js index 20aab483..abdc5256 100644 --- a/synonyms/parser.js +++ b/synonyms/parser.js @@ -1,4 +1,4 @@ -var fs = require('fs'); +const fs = require('fs'); // https://www.elastic.co/guide/en/elasticsearch/reference/2.4/analysis-synonym-tokenfilter.html diff --git a/synonyms/personal_titles/de.txt b/synonyms/personal_titles/de.txt new file mode 100644 index 00000000..b99b603a --- /dev/null +++ b/synonyms/personal_titles/de.txt @@ -0,0 +1,12 @@ +doktor, dr +bürgermeister, buergermeister, burgermeister, bgm +direktor, dir +frau, fr +fraulein, fri +heilige, hl +herr, hr +herrn, hrn +ingnieur, ing +oberburgermeister, ob, obgm +professor, prof +sankt, st, skt diff --git a/synonyms/personal_titles/en.txt b/synonyms/personal_titles/en.txt new file mode 100644 index 00000000..9a2f708a --- /dev/null +++ b/synonyms/personal_titles/en.txt @@ -0,0 +1,35 @@ +colonel, col +commander, cmdr +corporal, cpl +captain, capt, cpt +czar, tsar +doctor, dr, doc +doctors, drs, docs +brother, br +brothers, bros +father, fr +sister, sr +general, gen +honorable, honourable, hon +king, kg +major, maj +mr, mister +mrs, misses +ms, miss +officer, ofcr +president, pres +professor, prof +professors, profs +reverend, rev +saint, st +saints, ss +sainte, ste +santa, sta +sargeant, sgt +secretary, sec +representative, rep +representatives, reps +senator, sen +senators, sens +junior, jr, jnr +senior, sr, snr diff --git a/synonyms/personal_titles/es.txt b/synonyms/personal_titles/es.txt new file mode 100644 index 00000000..8ff44d80 --- /dev/null +++ b/synonyms/personal_titles/es.txt @@ -0,0 +1,105 @@ +abad, abd +abogada, abga, abgda +abogado, abg, abgdo +administrador, admor, admr +administradora, admora, admra +agrimensor, agrim +alcalde, alcde +alférez, alf, alferez, alfz +almirante, alm, almte, alte +arquitecto, arq +arquitecta, arqa +arquitectors, arqs, arqos +arzobispo, arz +barón, baron, bar +brigadier, brg, bg, brig +caballero, cab +cacique, cque +capitán, cap, capt, ctn, cptn, capitan +cardenal, card, cnal +clérigo, clg, clerigo +comandante, cmdt, cmte, comte, cte +comisario, cmro +conde, cde +condesa, cdesa +consejal, cjal +consejero, cons, consejo +contralmirante, contralmte +coronel, cnel, col, crn +diacono, diac +diacona, diaca +diputado, dip +director, dir +directora, dira +doctor, dr +doctora, dra +dragoneante, dg +duque, dq +duquesa, dqsa, dqa +embajador, emb +enfermera, enf, enfa +excelencia, exca +excelentisima, excma +excelentisimo, excmo +fray, fr +gendarme, gdme +general, gral, genl, gnal, gn +gobernadora, gdora, goba +gobernador, gdor, gob +hermana, hna +hermanas, hnas +hermano, hno +hermanos, hnos +infanta, infa, infta +infante, inf, infte +ingeniera, inga +ingeniero, ing, ingo +inspector, insp +intendente, int +jefe, jf +juez, jz +licenciada, lda, lica +licenciado, ldo, lic, lico +locutor, loc +maestro, mstro, mtro +mariscal, mscal +marques, mq, marqs +marquesa, mqa, marqsa +mayor, my, may +ministerio, min +ministro, mtro +monseñor, monsenor, mons, msnr +notario, not +obispo, ob +presidenta, pdta, presa, presida, pta +presidente, pdte, pres, presid, pte +príncipe, principe +principal, pral +procurador, proc +profesor, prof, profr +profesora, profª, profa, profrª, profra +reina, ra +reverendo, rvdo, rdo, rvd, rev +reverendo, rvdo, rdo, rvd, rev +reverendisima, rma, rvdma +reverendisimo, rmo, rvdmo +san, s +santa, stª, sta +sant, st +santo, stº, st°, sto +sargento, sarg, sgto, sargto +secretario, secreto, secto +secretaria, secreta, secta +senador, sen +señor, sr, senor +señora, sª, srª, sra, senora +señores, srs, sres, senores +señorita, srta, senorita +serenisima, serma +serenisimo, sermo +soldado, sold +subteniente, subte, stn +teniente, tn, tte +vizconde, vde +vizcondesa, vdesa +junior, iunior, jr diff --git a/synonyms/personal_titles/fr.txt b/synonyms/personal_titles/fr.txt new file mode 100644 index 00000000..23e36fc2 --- /dev/null +++ b/synonyms/personal_titles/fr.txt @@ -0,0 +1,39 @@ +amiral, am +capitaine, capte, capt +caporal, capl +chez, chz +colonel, col, cel +commandant, cdt +commander, cmdr, cdr +commodore, cmdre +comte, cte +comtesse, ctesse +docteur, dr +docteurs, drs +frère, frere +général, general, gén, gen, gal +lieutenant, lt +madame, mme +mesdames, mmes +mademoiselle, mlle, mle +mademoiselles, mlles, mles +maître, maitre, me +maîtres, maitres +maîtresse, maitresse +major, maj +maréchal, marechal, mal +marquis, mis +marquise, mise +messieurs, mm, mrs +monseigneur, mgr +monsieur, mr +père, pere +professeur, prof, pr +professeure, profe, pre +professeures, profes, pres +professeurs, profs, prs +révérend, rév, reverend, rev +saint, st +sainte, ste +sergente, sgt +veuve, vve diff --git a/synonyms/place_names/de.txt b/synonyms/place_names/de.txt new file mode 100644 index 00000000..fa734db8 --- /dev/null +++ b/synonyms/place_names/de.txt @@ -0,0 +1,61 @@ +abteilung, abt +altstoffsammelzentrum, asz +bücherei, bucherei, buecherei +bundesgymnasium, bg +bundesrealgymnasium, brg +büro, buro, buero +bustenhalter, bh +café, cafe +denkmal, dkm +deutsch, dt +ehemalige, ehem +fabrik, fb +fachhochschule, fh +fähranlegestelle, fahranlegestelle, faehranlegestelle +gebruder, gebr +gasthaus, gh +gaststätte, gaststatte, gaststaette +gasthof, ghf +gefängnis, gefangnis, gefaengnis +geschäft, geschaft, geschaeft +haltestelle, hst +handelsakademie, hak +handelsschule, hasch +hauptbahnhof, hbf +hinter, hint, ht +höhle, hohle +hörsäle, horsale, hoersaele +hütte, hutte, huette, htt +internationale, int +jagdhutte, jhtt +jungenherberge, jh +kapelle, kap, kpl +kläranlage, ka, klaranlage, klaeranlage +kleine, kl +kleiner, kl +kleines, kl +kogel, kg +konzentrationslager, kz, kl +magistratsabteilung, ma +markt, mkt +marktplatz, marktpl +nationalpark, np +naturschutzgebiet, nsg +niedere, nd +resevoir, res +rhein, rh +schloss, schl +schutzhütte, schutzhutte, schutzhuette +spitze, sp +sportplatz, sportpl +steinbruch, stb +supermarkt, supermkt +truppenubüngsplatz, tüpl, tupl, truppenubungsplatz, truppenubuengsplatz +universität, uni, universitat, universitaet +universitätsbibliothek, universitatsbibliothek, universitaetsbibliothek +volksschule, vs +vordere, vd, vord +warenhäuser, warenhauser, warenhaeuser +wasserfall, wsf, wssf +wiese, ws +wirtshaus, wh diff --git a/synonyms/place_names/en.txt b/synonyms/place_names/en.txt new file mode 100644 index 00000000..bc34e87f --- /dev/null +++ b/synonyms/place_names/en.txt @@ -0,0 +1,119 @@ +abbey, abby +airport, aprt +amphitheatre, amphitheater +annex, anex, annx, anx +apartments, apts +barbecue, barbeque, bbq +barracks, barrack, baracks +beach, bch +boardwalk, bwk, bwlk +cafe, café +cape, cpe, cp +castle, cst +church, chr +circus, crcs, crc +city, cty +club, clb +community, cmnty, cmty +cottage, cott, cottg +cottages, cotts, cottgs +county, co, cty +creek, cr, crk +dam, dm +deli, delicatessen +department, dept +detention, det, detn +distributor, dstr, distrib, dstrb +district, dist +dormitories, dorms +dormitory, dorm +downs, downes, dwns +factory, fcty, fty, fy +farm, frm +flats, flts +fords, frds +forest, frst, fst +forge, frg +forges, frgs +fork, frk +forks, frks +fort, ft +glens, glns +great, grt, gt +greater, grtr, gtr +green, grn, gn +greens, grns +groves, grvs +gym, gymnasium +heights, hghts, hgts, hieghts, ht, hts, hgths +home, hm, hme, hom +hospital, hos, hosp, hospice, hosptl, hsp, hsptl +hostel, host, hostl, hstel, hstl +hotel, hot, hotl, htel +houses, hses +institute, inst +international, intl +jetty, jtty, jty +lake, lk +lakes, lks +little, ltl, lttl, littl, litl +lock, lck +locks, lcks +lodge, ldge, lodg, ldg +lower, low, lwr, lr +mall, mll +manor, mnr +manors, mnrs +market, mkt, mrkt +marketplace, mktpl, mktplc +medical, med +memorial, mem +middle, mid, midl +military, mil +mission, msn +monastery, monastry +motel, mot, motl, mtel +mount, mt, mnt +mountain, mtn +mountains, mtns +municipal, mun, mpal +museum, mus +national, natl +neck, nck +office, ofc +offices, ofcs +orchard, orch +paradise, pde, pdse +park, pk, prk +pharmacy, pharm +po, postoffice +port, pt, prt +precinct, pct +reservation, res, resrv, resv, rsrv, rserv, rs +reserve, res, resrv, resv, rsrv, rserv, rserve, rsrve +reservoir, res +retreat, rtt +river, riv, rvr, rivr +rotary, rty +sanctuary, sanct +service, svc +services, svcs, svc +shop, shp +slope, slpe, slp +springs, spgs, sprngs +station, sta, stn +store, stor +stream, strm, stm +terminal, term +tower, twr +towers, twrs +triangle, tri +university, uni, univ, univers, unvrsty +upper, up, upr, uppr +villa, vll, vla +village, vil, vge, vill, villag, villg, vlg, vlge, vllg, vilg, vilge +villages, vlgs +villas, vlls, vlas +ville, vl +wood, wd +woods, wds diff --git a/synonyms/place_names/es.txt b/synonyms/place_names/es.txt new file mode 100644 index 00000000..47a22c32 --- /dev/null +++ b/synonyms/place_names/es.txt @@ -0,0 +1,58 @@ +abril, abr, abl +agosto, ag, agto, agt +altura, alt +alturas, alts +arboleda, arb +arrabal, arral +bosque, bsq +brigada, brig +cabo, cbo +campo, cpo, cmpo +campos, cpos, cmpos +canal, cnl +centro, cntro, ctro +cerro, crro +corral, crral +corralillo, crrlo +diseminado, disem +enero, en, eno, ene +diciembre, dic, dicbre, dice, dbre, 10bre, xbre +febrero, febo, febro, febr, feb +gobierno, gob, gobno +grande, gr +guerra, ga +independencia, indep +infantería, infanteria, infa, ynfa, ynfanta +jardín, jdin, jard, jardin +jardínes, jdins, jards, jardines +junio, jun, jn +julio, jul, jl +lago, lg +lagos, lgs +laguna, lgna +llanura, llnra +llanuras, llnras +marzo, mzo, mar +mayo, my, may +militar, milr +monte, mt, mte, mnte +montes, mts, mtes, mntes, mnts +nacional, nal, nacl +noviembre, nbre, nvre, nove, novre, novbre, 9bre +octubre, oct, octbre, octe, 8bre +portillo, ptilo, ptllo +prado, prdo +primeros, pros +privada, priv +punta, pnta +quebrada, qbda +real, rl +republica, rep +revolucion, rev +ribera, ribr +río, rio +septiembre, setbre, sepe, sepbre, 7bre, 7re, sep, set +sierra, srra +valle, vlle +volcan, vlcn +voluntarios, voluntos diff --git a/synonyms/place_names/fr.txt b/synonyms/place_names/fr.txt new file mode 100644 index 00000000..8bc060dc --- /dev/null +++ b/synonyms/place_names/fr.txt @@ -0,0 +1,76 @@ +abbaye, abe +auto-école, autoécole, autoecole +aéroport, aeroport +bastide, bstd +baston, bast +bibliothèque, bibliotheque +bourg, brg +béguinage, beguinage, begi +béguinages, beguinages, begis +café, cafe +camping, cpg +castel, cst +chapelle, chp +charmille, chi +château, chateau +cimetière, cimetiere +cinéma, cinema +colline, coli +collines, colis +cottage, cott +cottages, cott, cotts +crématorium, crematorium +darse, dars +département, dept, departement +enceinte, en +escaliers, escs +ferme, frm +fermes, frms +fleuve, fl +fontaine, fon +fort, ft +forum, form +grand, gd, gr +halle, hle +halles, hles, hls +hippodrome, hip +hôpital, hopital +hôtel, hotel +jardin, jard, jrd +jardins, jards, jrds +manoir, man +marché, marche, mar +marchés, marches, mars +mont, mt, mnt +mont, mt, mnt, montagne +moulin, mln +moulins, mlns +musée, musee, mus +médecin, medecin +mémorial, memorial +palais, pal +parc, prc +parking, pkg +pavillon, pav +pavillons, pavs +petite, pt +porche, pch +poterne, pot, potrn +préscolaire, prescolaire +péristyle, peristyle, psty +rivière, riviere, riv +résidence, rés, residence, res +résidences, residences +stade, stde +station, sta +supermarché, supermarche +théâtre, theatre +université, universite, univ, uni +villa, vla +village, vge +villages, vges +villas, vlas +vétérinaire, veterinaire +école, ecole +église, eglise, egl, égl +étang, etang diff --git a/synonyms/ampersand.txt b/synonyms/punctuation/ampersand.txt similarity index 97% rename from synonyms/ampersand.txt rename to synonyms/punctuation/ampersand.txt index c4466e45..d21364ed 100644 --- a/synonyms/ampersand.txt +++ b/synonyms/punctuation/ampersand.txt @@ -16,7 +16,7 @@ # Swedish: och # English -&,and +&, and # German -&,und \ No newline at end of file +&, und diff --git a/synonyms/street_suffix.txt b/synonyms/street_suffix.txt deleted file mode 100644 index 55aba03c..00000000 --- a/synonyms/street_suffix.txt +++ /dev/null @@ -1,129 +0,0 @@ -alley, aly -annex, anx -avenue, ave, av -bayou, byu -beach, bch -bend, bnd -bluff, blf -bluffs, blfs -bottom, btm -boulevard, blvd -branch, br -bridge, brg -brook, brk -bypass, byp -canyon, cyn -cape, cp -causeway, cswy -center, ctr -channel, chnnl -circle, cir -cliff, clf -close, cl -club, clb -common, cmn -commons, cmns -connector, con -corridor, cor -course, crse -court, ct -cove, cv -creek, crk -crescent, cres -crest, crst -crossing, xing -crossroad, xrd -crossroads, xrds -curve, curv -dale, dl -dam, dm -drive, dr -esplanade, esp -expressway, expy -extended, ext -falls, fls -ferry, fry -field, fld -fields, flds -flat, flt -flats, flts -ford, frd -forest, frst -forge, frg -fork, frk -forks, frks -freeway, fwy -garden, gdn -gardens, gdns -gateway, gtwy -glen, gln -glenn, gln -green, grn -grove, grv -harbor, hbr -haven, hvn -heights, hts -highway, hwy -hill, hl -hills, hls -hollow, holw -isle, is -junction, jct -key, ky -keys, kys -knoll, knl -knolls, knls -landing, lndg -lane, ln -light, lgt -lights, lgts -lock, lck -locks, lcks -manor, mnr -meadow, mdw -meadows, mdws -mill, ml -mills, mls -mountain, mnt -motorway, mtwy -neck, nck -orchard, orch -parkway, pkwy -pasage, psge -pier, pr -pine, pne -pines, pnes -place, pl -plaza, plz -ranch, rnch -ridge, rdg -ridges, rdgs -river, riv -road, rd -route, rte -shore, shr -shores, shrs -skyway, skwy -spring, spg -springs, spgs -square, sq -street, st -suite, ste -terrace, terr, tce -trail, trl, tr -trafficway, trfy -tunnel, tunl -turnpike, tpke -valley, vly -vista, vis -village, vlg -way, wy - -# Germanic street suffixes -straße => strasse, str -strasse, str -brücke => bruecke, brucke, br -bruecke, brucke, br -bahnhof, bhf, bf -chaussee, ch -platz, pl diff --git a/synonyms/streets/de.txt b/synonyms/streets/de.txt new file mode 100644 index 00000000..ddd3a542 --- /dev/null +++ b/synonyms/streets/de.txt @@ -0,0 +1,20 @@ +allee, al +bahnhof, bhf, bf +boulevard, bd +brücke, br, brucke, bruecke +bühl, buhl, buehl +chaussee, ch +forsthaus, fh +graben, gr +großser, grosser +große, grosse, gr +großes, grosses +obere, ob +oberer, ob +platz, pl +quelle, qu +rundwanderweg, rww +siedlung, sdlg +stiege, stg +straße, str, strasse +wiese, ws diff --git a/synonyms/streets/en.txt b/synonyms/streets/en.txt new file mode 100644 index 00000000..c211d0a3 --- /dev/null +++ b/synonyms/streets/en.txt @@ -0,0 +1,373 @@ +abbey, abby +access, accs, acc +acres, acrs +alley, aly, ally, alee, al +alleyway, alwy, allyway, allwy +amble, ambl +anchorage, ancg +annex, anx +apartments, apts +approach, app, apch, appr +arcade, arc +arterial, artl +artery, art, arty +avenue, av, ave, aven, avenu, avn, avnu, avnue +avenues, avs, aves, avens, avenus, avns, avnus, avnues +autoroute, aut +back, bk +bank, bnk +basin, basn, bsn +bay, by +bayou, byu, bayoo +beach, baech, bch, beech +belt, blt +bend, bnd +block, blk, blck +bluff, blf, bluf, bluffs, blfs +boardwalk, bwk, bwlk +boulevard, blvd, bd, bde, blv, bl, blvde, blvrd, boulavard, boul, boulv, bvd, boulevarde +bottom, bot, bottm, btm, bttm +bottoms, bttms, btms, bottms +boundary, bdy +bowl, bl +brace, br, brce +branch, br, brnch, brch +brae, br +break, brk +bridge, bdge, br, brdg, bri, brg +broadway, bdwy, bway, bwy, brdway +brook, brk +brooks, brks +brow, brw +burg, bg +burgs, brgs +burrow, burw +butte, btte, bte +bypass, bypa, byps, bps, byp +byway, bywy +camp, cp +cape, cpe, cp +canyon, cyn, cnyn +caravan, cvan, cvn +causeway, csway, cswy, causewy, caus, cause, cway +center, centre, cetr, cntr, ctr, cen +centers, ctrs +centreway, cnwy +chase, ch, chas +circle, cir, circel +circles, cirs +circlet, clt +circuit, crct, circ, cct, cirt, ci, circt +circus, crcs, crc +claim, clm +cliff, clf +cliffs, clfs +close, cl, cls, clse +cluster, clr, clstr +colonnade, clde, clnde +common, cmmn, comm, cmn, com, cm +commons, cmmns, cmns, comms +concord, cncd, cncrd +concession, conc +concourse, con, concs, concse, cnc +connection, cntn, cxn +connector, conr, cnctr, cntr +copse, cps +corner, cnr, crn, cor +corners, cnrs, crns, cors +corseo, cseo +corso, cso +ch, chw, cohw, ctyhw, chgwy, cohgwy, ctyhgwy, chway, cohway, ctyhway, chwy, cohwy, ctyhwy, chi, cohi, ctyhi +cr, cor, crd, cord, ctyr, ctyrd +cr, cor, crt, cort, ctyr, ctyrt, crte, corte, ctyrte +course, crse +court, ct, crt +courts, crts, cts +courtyard, cyd, ctyd +cove, cov, ce, cv +creek, cr, crk +crescent, cr, cres, crs, crecent +crest, crst, cst +crief, crf +croft, cft +cross, cs, crss +crossing, crsg, xing, csg, x-ing +crossroad, crd, xroad, x-road, xrd, x-rd +crossroads, xrds +crossway, cowy, crwy, xway, xwy, x-way +cruiseway, cuwy, crwy +cul-de-sac, culdesac, cds, cusac, csac +curve, cve, crv, crve, curv +cutting, cttg, ctg, cutt +dale, dle +deviation, devn +distributor, dstr +divide, div +diversion, divers +down, dn +downs, dns, dwns +drive, dr, drv, dv, dve +driveway, drwy, dvwy, dwy, dway, drvwy +drove, drov +easement, esmt +edge, edg +elbow, elb +entrance, ent, entr +esplanade, esp, espl +estate, est +estates, ests +expressway, exp, expwy, expway, expy, exwy +extension, ex, ext, extn, exten +extensions, exts +fairway, fawy, fy +fall, fl +falls, fls +farm, frm +farms, frms +ferry, fry, fy +field, fld, fd +fields, flds, fds +fireline, fline, flne +firetrack, ftrk +firetrail, fit, fitr +flat, fl, flt +flats, flts +follow, folw +footway, ftwy +ford, frd +foreshore, fshr +formation, form, fmtn +freeway, frwy, fw, fwy, fway +front, frnt +frontage, frtg, fr +gap, gp +garden, gdn, grd, grdn +gardens, gdns, grds, grdns +gate, ga, gte +gates, gtes +gateway, gwy, gway, gtwy, gtway +glade, gl, gld, glde +glen, gln +gbd, grbd, grdbd, gdbd +grange, gra +green, grn, gn, gren +greenway, grwy +ground, grnd +grounds, grnds +grove, gr, grv, grve, gro +gulch, glch +gully, gly +hanger, hngr +harbor, harbour, hbr, hrbr +harbors, hbrs +haven, hvn, havn +head, hd +heads, hds +heath, hth, heth +heights, hghts, hgts, ht, hts, hgths +highlands, hghlds, hlds, hglds +highroad, hrd, hird +highway, hgwy, hw, hway, hwy, hi, hwye, hywy +hill, hl +hills, hls, hils +hollow, hllw, holw +impasse, imp +inlet, inlt +interchange, intg, intchg +intersection, intn, intsctn +interstate, ih +island, is, id, isl, isld +islands, iss, ids, islds +junction, jct, jnc, jnct, jctn, jtn, junct +junctions, jcts +key, ky +keys, kys +knoll, knol, knl +knolls, knls +ladder, ladr +lagoon, lagn, lgn, lagon +landing, ldg, lndg, landng +lane, ln, la +laneway, lnwy +light, lgt, lt +limits, lmts +line, ln +link, lnk, lk +little, ltl, lttl, littl, litl, lit, lt +loaf, lf +lookout, lkt +loop, lp +loops, lps +lot, lt +lynne, lynn +mall, ml +manor, mnr +meadow, mdw +meadows, mdws, mead +mead, md +meander, mndr, mdr, mr +mew, mw +mews, mws +mile, mi +mill, ml +mills, mls +motorway, mway, mwy, mtwy +mount, mt +neaves, nvs +nook, nk +number, nbr, num, no, nmbr, nr +outlet, otlt +outlook, out, otlk +overbridge, ovrb +overlook, ovlk +overpass, opas +paddock, padk +palms, plms +parade, pde, prd, prde, pard +park, pk, prk +parklands, pkld, pklds, parkland +parkway, pkwy, parkwy, pky, pkway, prkwy, prkway, pkw, pwy, prkw +parkways, pkwys +part, prt +pass, ps +passage, psge, pass, pasg +path, pth +pathway, phwy, pway, pthway, pthwy, ptway, ptwy +peninsula, psla +piazza, piaz, pzza +pike, pk, pke +pine, pne, pn +pines, pns, pnes +place, pl, pla, plc, plac +plain, pln, pl +plains, plns, pls +plateau, plat, plt +plaza, plz, plza, pz +prarie, pr +pocket, pkt, pokt, pckt +point, piont, pnt, pt +pointe, pte, pnte +port, prt +ports, prts +prairie, pr +priors, prrs +private, pvt +promenade, prom, prm +pursuit, pur +quad, qd +quadrangle, qdgl +quadrant, qdrt, qd +quay, quy, qy +quays, quys, qys +radial, radl +ramble, ra, rmbl +ramp, rmp +ranae, ran +ranch, rnch +rapid, rpd +rapids, rpds +range, rng, rnge, rang +reach, rch +reserve, res, resrv, resv, rsrv, rserv, rserve, rsrve +rest, rst +retreat, rt, rtt +return, rtn +ridge, rdge, rdg +ridges, rdgs +ridgeway, rgwy, rdgwy +rowy, rightofway, rofw, row +rise, ri +riverway, rvwy +riviera, rvra +road, rd, ro, roa +roads, rds +roadside, rdsd +roadway, rdwy, rdw, rdy +rocks, rks +ronde, rnde +rosebowl, rsbl +rotary, rty +round, rnd +route, rt, rte +row, rw +run, rn +serviceway, swy, svwy, svcwy +shoal, shl +shoals, shls +shore, shor, shr +shores, shors, shrs +shunt, shun, shnt +siding, sdng, sdg +skyway, skwy +slope, slpe, slp +sound, snd +space, spc +spring, spg, sprng, sprn +springs, spgs, sprngs, spns +spur, spr +square, sq, sqr +squares, sqs +stairs, strs +stairway, stwy, strwy, strway +shighway, sthighway, sh, sth, shw, sthw, shwy, shgwy, sthgwy, shway, sthway, sthwy, shi, sthi, statehighway +sr, stateroad, sroad, stroad, staterd, srd, strd +sr, stateroute, sroute, stroute, statert, srt, srte, strt, strte +steps, stps +strand, stra, strnd, strd +strands, strnds, strds +stravenue, stra, strav +street, st, str, stre, stree, strt +streets, sts +strip, strp +subdivision, subdiv +subway, sbwy +summit, smt, sumt +tarn, tn +terrace, tce, ter, tr, terr, terace, terrac, terrasse, tsse +thicket, thick +thoroughfare, thor, throughfare, thfr +thoroughway, thwy +throughway, thru, thro, thruway, trwy, thwy +tollway, tlwy, twy +th, twph, tshph, thw, twphw, tshphw, thgwy, twphgwy, tshphgwy, thway, twphway, tshphway, thwy, twphwy, tshphwy, thi, twphi, tshphi +tr, trd, twpr, twprd, tshpr, tshprd +tr, trt, trte, twpr, twprt, twprte, tshpr, tshprt, tshprte +tower, twr +towers, twrs +townline, tline +trace, trce, trc +track, tr, trk, trak +trafficway, trfy +trail, tr, trl +trailer, trlr +tramway, tmwy +trees, trs +triangle, tri +trunkway, tkwy +tunnel, tun, tunl +turnabout, trnabt +turn, tn, trn +turnpike, tpk, tpke +underpass, upas, upass, ups +union, un +unions, uns +vale, va, vl +valley, vlly, vly, vy +valleys, vlys, vllys +viaduct, via, viad, vdct, viadct +view, vw +views, vws +villa, vla +village, vlge +villas, vlas +vista, vst, vsta, vis +walk, wlk, wk +walkway, wkwy, wky, wlkwy +waters, wtrs +way, wy +ways, wys +well, wl +wells, wls +wharf, whrf, whf +wynd, wyn +yard, yd, yrd diff --git a/synonyms/streets/es.txt b/synonyms/streets/es.txt new file mode 100644 index 00000000..f3475719 --- /dev/null +++ b/synonyms/streets/es.txt @@ -0,0 +1,96 @@ +acceso, acces +alameda, alam +alquería, alqueria, alque +andador, andad +angosta, angta +apeadero, apdro +autopista, auto, autop, aut, ap +autovía, autovia, autov +avenida, av, avd, avda +bajada, bjada +banda, bda +barranco, branc +barranquillo, bqllo +barriada, barda +boulevard, blvd, bvd +brazal, brzal +bulevar, bulev, blev, blv, bv, bl +calle, cl, cll, ca, call +calleja, cllja +callejón, callejon, callej, cjón, cjon, cllon, cllón, cj +callejuela, cjla +callizo, cllzo +calzada, czada, calz +camino, cno, cmo, cmno, cm +caminito, cmt +camping, campg +cantera, cantr +cantón, canton, cant +carrera, cra, carra, carr, cr, kra, kr +carretera, ctra, cr, ct +carreterín, carreterin, ctrin +carretil, crtil +carril, crril +cerrada, cda, cer +cinturón, cinturon, cint +circular, cq +circuito, cto +circunvalar, cv, crv, cirv +circunvalación, circunvalacion, ccvcn +corredor, crrdo +costanilla, cstan +cuesta, custa +diagonal, diag, dg +diseminado, disem +espalda, eslda +estrada, estda +explanada, expla +extensión, ext, extension +extramuros, extrm +galería, galeria, gale +glorieta, gta +hacienda, hda +ladera, ldera +laderas, lderas +llanura, llnra +malecón, malecon, malec +mirador, mrdor +muelle, meull +pantano, pant +paraje, praje +parque, pque, parq, pq, pqe +particular, parti +partida, ptda +pasadizo, pzo +pasaje, psaje, psj +paseo, pº, p°, po, pso, pseo, pas, ps +pasillo, psllo +peatonal, peat +periferico, perif +plaza, pl, plza, pza, pz +plazoleta, pzta, plzta, plta +plazuela, plzla +poblado, pbdo +prolongación, prolongacion, prol +puebla, pbla +pueblo, pblo +puente, pnte +rambla, rbla +rampla, rampa, rpla +retorno, ret, rt +rincón, rincon, rcon, rin, rncn, rncon +rinconada, rcda, rcnda +ronda, rda +rotonda, rtda +ruta, rta +sector, sect +sendera, sedra +sendero, send, sedro +subida, sbida +tránsito, transito, trans +transversal, trval, trvsal, tv, tr +trasera, tras +travesía, travesia, trva, trvsía, trvsia +vereda, vreda, ver +viaducto, vcto, vd +vista, vst, vsta, vist diff --git a/synonyms/streets/fr.txt b/synonyms/streets/fr.txt new file mode 100644 index 00000000..5dd33323 --- /dev/null +++ b/synonyms/streets/fr.txt @@ -0,0 +1,129 @@ +allée, allee, all +allées, alls, allees +arcade, arc +autoroute, aut +avenue, av, ave, aven, avenu, avn, avnu, avnue +avenues, avs, aves, avens, avenus, avns, avnus, avnues +barriêre, barriere, bre +barriêres, barrieres, bres +berge, ber +berges, bers +boucle, bcle +boulevard, bd, bde, blv, blvd, blvde, blvrd, boulavard, boul, boulv, bvd, boulevarde, bld +butte, but +côte, cote +côteau, coteau +campagne, cgne +carreau, cau, carru +carrefour, carf, carref +carrière, carriere, care +carrières, carrieres, cares +carré, carre, carr, car +cavée, cavee, cav +cercle, cercl +chalet, chl +chaussée, chaussee, chs, chee +chaussées, chaussees, chss, chees +chemin, ch, che +cheminement, chem +chemins, ches +château, chateau, cht +cloître, cloitre, cloi +contour, ctr +corniche, cor +corniches, cors +cours, crs +degré, degre, deg +degrés, degres, degs +descente, dsg +descentes, dsgs +digue, dig +digues, digs +échangeur, éch +écluse, ecluse, ecl, écl +écluses, ecluses, ecls, écls +enclave, env +enclos, enc +espace, espa +esplanade, esp +esplanades, esps +fosse, fos +fosses, fos, foss +foyer, foyr +galerie, gal +galeries, gals +garenne, garn +gbd, grbd, grdbd, gdbd +gch, grch, gdch, grdch +gden, gdens +grandrue, gr, grdr, gdr +gdsen, gdsens +grille, gri +grimpette, grim +hameau, ham +hchs, hschs +impasse, imp +impasses, imps +jetée, jetee, jte +jetées, jetees, jtes +levée, levee, leve, lve +montée, montee, mte +montées, montees, mtes +métro, metro, mét, met +parc, prc +parcs, prcs +parvis, prv +passage, pas, psg +passe, pass +passerelle, ple +passerelles, ples +patio, pat +périphérique, peripherique, peri, péri +place, pl +placis, plci +plage, plag +plages, plags +plaine, pln +plateau, plt, plat +plateaux, pltx, platx +pointe, pte, pnte +portique, porq, portq +portiques, porqs, portqs +pourtour, pour +presquîle, presquile, prq, prql +promenade, prom +peripherique, peri +quai, au +raccourci, rac, racc +raidillon, raid +rampe, rpe, rmpe, rmp +rempart, rem, remp +rocade, rocd +ronde, rnde +rdpt, rpt +roquet, roqt +rotonde, rtd, rtnd, rtde, rtnde +route, rt, rte +routes, rts, rtes +ruelle, rle +ruelles, rles +rues, rs +residence, res +residences, ress +sente, sen +sentes, sens +sentier, sent +sentiers, sents +square, sq +terrain, terr, trn +terrasse, tsse +terrasses, tsses +terte, trt +tertes, trts +traverse, tra, trvs, trvrs +vallon, val +valée, vallee, val +venelle, ven +venelles, vens +voie, voi +voies, voiss diff --git a/synonyms/streets/usps.txt b/synonyms/streets/usps.txt new file mode 100644 index 00000000..d5ca822d --- /dev/null +++ b/synonyms/streets/usps.txt @@ -0,0 +1,195 @@ +# USPS C1 Street Suffix Abbreviations +# https://pe.usps.com/text/pub28/28apc_002.htm +# https://gist.github.com/mick-io/26db11e4c7f7aee6646b07d9f858eb9c + +aly, alley, allee, ally +anx, anex, annex, annx +arc, arcade +ave, avenue, av, aven, avenu, avn, avnue +byu, bayou, bayoo +bch, beach +bnd, bend +blf, bluff, bluf +blfs, bluffs +btm, bottom, bot, bottm +blvd, boulevard, boul, boulv +br, branch, brnch +brg, bridge, brdge +brk, brook +brks, brooks +bg, burg +bgs, burgs +byp, bypass, bypa, bypas, byps +cp, camp, cmp +cyn, canyon, canyn, cnyn +cpe, cape +cswy, causeway, causwa +ctr, center, cen, cent, centr, centre, cnter, cntr +ctrs, centers +cir, circle, circ, circl, crcl, crcle +cirs, circles +clf, cliff +clfs, cliffs +clb, club +cmn, common +cmns, commons +cor, corner +cors, corners +crse, course +ct, court +cts, courts +cv, cove +cvs, coves +crk, creek +cres, crescent, crsent, crsnt +crst, crest +xing, crossing, crssng +xrd, crossroad +xrds, crossroads +curv, curve +dl, dale +dm, dam +dv, divide, div, dvd +dr, drive, driv, drv +drs, drives +est, estate +ests, estates +expy, expressway, exp, expr, express, expw +ext, extension, extn, extnsn +exts, extensions +fls, falls +fry, ferry, frry +fld, field +flds, fields +flt, flat +flts, flats +frd, ford +frds, fords +frst, forest, forests +frg, forge, forg +frgs, forges +frk, fork +frks, forks +ft, fort, frt +fwy, freeway, freewy, frway, frwy +gdn, garden, gardn, grden, grdn +gdns, gardens, grdns +gtwy, gateway, gatewy, gatway, gtway +gln, glen +glns, glens +grn, green +grns, greens +grv, grove, grov +grvs, groves +hbr, harbor, harb, harbr, hrbor +hbrs, harbors +hvn, haven +hts, heights, ht +hwy, highway, highwy, hiway, hiwy, hway +hl, hill +hls, hills +holw, hollow, hllw, hollows, holws +inlt, inlet +is, island, islnd +iss, islands, islnds +isle, isles +jct, junction, jction, jctn, junctn, juncton +jcts, junctions, jctns +ky, key +kys, keys +knl, knoll, knol +knls, knolls +lk, lake +lks, lakes +lndg, landing, lndng +ln, lane +lgt, light +lgts, lights +lf, loaf +lck, lock +lcks, locks +ldg, lodge, ldge, lodg +loop, loops +mnr, manor +mnrs, manors +mdw, meadow +mdws, meadows, mdw, medows +ml, mill +mls, mills +msn, mission, missn, mssn +mtwy, motorway +mt, mount, mnt +mtn, mountain, mntain, mntn, mountin, mtin +mtns, mountains, mntns +nck, neck +orch, orchard, orchrd +oval, ovl +opas, overpass +park, parks +pkwy, parkway, parkwy, pkway, pky, parkways, pkwys +psge, passage +path, paths +pike, pikes +pne, pine +pnes, pines +pl, place +pln, plain +plns, plains +plz, plaza, plza +pt, point +pts, points +prt, port +prts, ports +pr, prairie, prr +radl, radial, rad, radiel +rnch, ranch, ranches, rnchs +rpd, rapid +rpds, rapids +rst, rest +rdg, ridge, rdge +rdgs, ridges +riv, river, rvr, rivr +rd, road +rds, roads +rte, route +shl, shoal +shls, shoals +shr, shore, shoar +shrs, shores, shoars +skwy, skyway +spg, spring, spng, sprng +spgs, springs, spngs, sprngs +spur, spurs +sq, square, sqr, sqre, squ +sqs, squares, sqrs +sta, station, statn, stn +stra, stravenue, strav, straven, stravn, strvn, strvnue +strm, stream, streme +st, street, strt, str +sts, streets +smt, summit, sumit, sumitt +ter, terrace, terr +trwy, throughway +trce, trace, traces +trak, track, tracks, trk, trks +trfy, trafficway +trl, trail, trails, trls +trlr, trailer, trlrs +tunl, tunnel, tunel, tunls, tunnels, tunnl +tpke, turnpike, trnpk, turnpk +upas, underpass +un, union +uns, unions +vly, valley, vally, vlly +vlys, valleys +via, viaduct, vdct, viadct +vw, view +vws, views +vlg, village, vill, villag, villg, villiage +vlgs, villages +vl, ville +vis, vista, vist, vst, vsta +walk, walks +way, wy +wl, well +wls, wells diff --git a/test/fixtures/expected.json b/test/fixtures/expected.json index 3128f7a4..80ac9b0d 100644 --- a/test/fixtures/expected.json +++ b/test/fixtures/expected.json @@ -29,9 +29,9 @@ ], "filter": [ "lowercase", - "icu_folding", "trim", - "custom_admin", + "admin_synonyms_multiplexer", + "icu_folding", "word_delimiter", "unique_only_same_position", "notnull", @@ -47,12 +47,9 @@ ], "filter": [ "lowercase", - "icu_folding", "trim", - "custom_name", - "street_suffix", - "directionals", - "ampersand", + "name_synonyms_multiplexer", + "icu_folding", "remove_ordinals", "removeAllZeroNumericPrefix", "peliasOneEdgeGramFilter", @@ -69,9 +66,9 @@ "nfkc_normalizer" ], "filter": [ - "icu_folding", "lowercase", "trim", + "icu_folding", "remove_ordinals", "removeAllZeroNumericPrefix", "unique_only_same_position", @@ -89,10 +86,7 @@ "lowercase", "trim", "remove_duplicate_spaces", - "ampersand", - "custom_name", - "street_suffix", - "directionals", + "name_synonyms_multiplexer", "icu_folding", "remove_ordinals", "unique_only_same_position", @@ -104,12 +98,13 @@ "type": "custom", "tokenizer": "keyword", "char_filter": [ - "alphanumeric" + "alphanumeric", + "nfkc_normalizer" ], "filter": [ "lowercase", - "icu_folding", "trim", + "icu_folding", "unique_only_same_position", "notnull" ] @@ -118,12 +113,13 @@ "type": "custom", "tokenizer": "keyword", "char_filter": [ - "alphanumeric" + "alphanumeric", + "nfkc_normalizer" ], "filter": [ "lowercase", - "icu_folding", "trim", + "icu_folding", "unique_only_same_position", "notnull" ] @@ -146,9 +142,7 @@ "lowercase", "trim", "remove_duplicate_spaces", - "custom_street", - "street_suffix", - "directionals", + "street_synonyms_multiplexer", "icu_folding", "remove_ordinals", "trim", @@ -159,6 +153,37 @@ } }, "filter": { + "street_synonyms_multiplexer": { + "type": "multiplexer", + "preserve_original": false, + "filters": [ + "synonyms/custom_street", + "synonyms/personal_titles", + "synonyms/streets", + "synonyms/directionals" + ] + }, + "name_synonyms_multiplexer": { + "type": "multiplexer", + "preserve_original": false, + "filters": [ + "synonyms/custom_name", + "synonyms/personal_titles", + "synonyms/place_names", + "synonyms/streets", + "synonyms/directionals", + "synonyms/punctuation" + ] + }, + "admin_synonyms_multiplexer": { + "type": "multiplexer", + "preserve_original": false, + "filters": [ + "synonyms/custom_admin", + "synonyms/personal_titles", + "synonyms/place_names" + ] + }, "notnull": { "type": "length", "min": 1 @@ -187,109 +212,477 @@ "pattern": " +", "replacement": " " }, - "ampersand": { + "synonyms/custom_admin": { "type": "synonym", "synonyms": [ - "&,and", - "&,und" + "" + ] + }, + "synonyms/custom_name": { + "type": "synonym", + "synonyms": [ + "" ] }, - "custom_admin": { + "synonyms/custom_street": { "type": "synonym", "synonyms": [ + "" + ] + }, + "synonyms/directionals": { + "type": "synonym", + "synonyms": [ + "nord,n", + "nördlich,nördl,nordl,nordlich,noerdlich", + "nördliche,nordliche,noerdliche", + "nördlicher,nordlicher,noerdlicher", + "nördliches,nordliches,noerdliches", + "nordost,no", + "nordöstlich,nordostlich,nordoestlich", + "nordwest,nw", + "ost,o", + "östlich,östl,ostlich,ostl,oestlich", + "östliche,ostliche,oestliche", + "östlicher,ostlicher,oestlicher", + "östliches,ostliches,oestliches", + "süd,s,sud,sued", + "süden,suden,sueden", + "südlich,südl,sudl,sudlich,suedlich", + "südliche,sudliche,suedliche", + "südlicher,sudlicher,suedlicher", + "südliches,sudliches,suedliches", + "südost,so,sudost,suedost", + "südosten,sudosten,suedosten", + "südöstlich,sudostlich,suedoestlich", + "südöstliche,sudostliche,suedoestliche", + "südöstlicher,sudostlicher,suedoestlicher", + "südöstliches,sudostliches,suedoestliches", + "südwest,sw,sudwest,suedwest", + "südwesten,sudwesten,suedwesten", + "südwestlich,sudwestlich,suedwestlich", + "südwestliche,sudwestliche,suedwestliche", + "südwestlicher,sudwestlicher,suedwestlicher", + "südwestliches,sudwestliches,suedwestliches", + "west,w", + "westlich,westl", + "center,ctr", + "central,cn,ctrl,cntrl", + "centre,ctr", + "east,e", + "eastern,eastrn,estrn,estn", + "lower,lowr,lwr", + "middle,mdl,midle,mddl", + "north,n,nrt,nrth,nth,norh,nort,no", + "northeast,northe,neast,ne", + "northeastern,northeastrn,northestrn,northestn,neastern", + "northwest,northw,northwst,nwest,nw", + "northwestern,northwestrn,northwstrn,northwstn", + "south,s,so,sth", + "southeast,southe,seast,se", + "southeastern,southeastrn,southestrn,southestn,seastern", + "southwest,southw,southwst,swest,sw", + "southwestern,southwestrn,southwstrn,southwstn,swestern", + "upper,uppr,upr,up", + "west,w,wst", + "western,westrn,wstrn,wstn", + "este,e", + "norte,n", + "noreste,nordeste,ne", + "noroeste,nw", + "oeste,w", + "oriente,ote", + "poniente,pte", + "sur,s", + "sureste,se", + "suroeste,sw", + "central,ctrl", + "centre,ctre,cntre", + "est,e", + "nord,n", + "ouest,o", + "sud,s" + ] + }, + "synonyms/personal_titles": { + "type": "synonym", + "synonyms": [ + "doktor,dr", + "bürgermeister,buergermeister,burgermeister,bgm", + "direktor,dir", + "frau,fr", + "fraulein,fri", + "heilige,hl", + "herr,hr", + "herrn,hrn", + "ingnieur,ing", + "oberburgermeister,ob,obgm", + "professor,prof", + "sankt,st,skt", + "colonel,col", + "commander,cmdr", + "corporal,cpl", + "captain,capt,cpt", + "czar,tsar", + "doctor,dr,doc", + "doctors,drs,docs", + "brother,br", + "brothers,bros", + "father,fr", + "sister,sr", + "general,gen", + "honorable,honourable,hon", + "king,kg", + "major,maj", + "mr,mister", + "mrs,misses", + "ms,miss", + "officer,ofcr", + "president,pres", + "professor,prof", + "professors,profs", + "reverend,rev", "saint,st", + "saints,ss", "sainte,ste", - "fort,ft", - "mount,mt", - "mont,mt" + "santa,sta", + "sargeant,sgt", + "secretary,sec", + "representative,rep", + "representatives,reps", + "senator,sen", + "senators,sens", + "junior,jr,jnr", + "senior,sr,snr", + "abad,abd", + "abogada,abga,abgda", + "abogado,abg,abgdo", + "administrador,admor,admr", + "administradora,admora,admra", + "agrimensor,agrim", + "alcalde,alcde", + "alférez,alf,alferez,alfz", + "almirante,alm,almte,alte", + "arquitecto,arq", + "arquitecta,arqa", + "arquitectors,arqs,arqos", + "arzobispo,arz", + "barón,baron,bar", + "brigadier,brg,bg,brig", + "caballero,cab", + "cacique,cque", + "capitán,cap,capt,ctn,cptn,capitan", + "cardenal,card,cnal", + "clérigo,clg,clerigo", + "comandante,cmdt,cmte,comte,cte", + "comisario,cmro", + "conde,cde", + "condesa,cdesa", + "consejal,cjal", + "consejero,cons,consejo", + "contralmirante,contralmte", + "coronel,cnel,col,crn", + "diacono,diac", + "diacona,diaca", + "diputado,dip", + "director,dir", + "directora,dira", + "doctor,dr", + "doctora,dra", + "dragoneante,dg", + "duque,dq", + "duquesa,dqsa,dqa", + "embajador,emb", + "enfermera,enf,enfa", + "excelencia,exca", + "excelentisima,excma", + "excelentisimo,excmo", + "fray,fr", + "gendarme,gdme", + "general,gral,genl,gnal,gn", + "gobernadora,gdora,goba", + "gobernador,gdor,gob", + "hermana,hna", + "hermanas,hnas", + "hermano,hno", + "hermanos,hnos", + "infanta,infa,infta", + "infante,inf,infte", + "ingeniera,inga", + "ingeniero,ing,ingo", + "inspector,insp", + "intendente,int", + "jefe,jf", + "juez,jz", + "licenciada,lda,lica", + "licenciado,ldo,lic,lico", + "locutor,loc", + "maestro,mstro,mtro", + "mariscal,mscal", + "marques,mq,marqs", + "marquesa,mqa,marqsa", + "mayor,my,may", + "ministerio,min", + "ministro,mtro", + "monseñor,monsenor,mons,msnr", + "notario,not", + "obispo,ob", + "presidenta,pdta,presa,presida,pta", + "presidente,pdte,pres,presid,pte", + "príncipe,principe", + "principal,pral", + "procurador,proc", + "profesor,prof,profr", + "profesora,profª,profa,profrª,profra", + "reina,ra", + "reverendo,rvdo,rdo,rvd,rev", + "reverendo,rvdo,rdo,rvd,rev", + "reverendisima,rma,rvdma", + "reverendisimo,rmo,rvdmo", + "san,s", + "santa,stª,sta", + "sant,st", + "santo,stº,st°,sto", + "sargento,sarg,sgto,sargto", + "secretario,secreto,secto", + "secretaria,secreta,secta", + "senador,sen", + "señor,sr,senor", + "señora,sª,srª,sra,senora", + "señores,srs,sres,senores", + "señorita,srta,senorita", + "serenisima,serma", + "serenisimo,sermo", + "soldado,sold", + "subteniente,subte,stn", + "teniente,tn,tte", + "vizconde,vde", + "vizcondesa,vdesa", + "junior,iunior,jr", + "amiral,am", + "capitaine,capte,capt", + "caporal,capl", + "chez,chz", + "colonel,col,cel", + "commandant,cdt", + "commander,cmdr,cdr", + "commodore,cmdre", + "comte,cte", + "comtesse,ctesse", + "docteur,dr", + "docteurs,drs", + "frère,frere", + "général,general,gén,gen,gal", + "lieutenant,lt", + "madame,mme", + "mesdames,mmes", + "mademoiselle,mlle,mle", + "mademoiselles,mlles,mles", + "maître,maitre,me", + "maîtres,maitres", + "maîtresse,maitresse", + "major,maj", + "maréchal,marechal,mal", + "marquis,mis", + "marquise,mise", + "messieurs,mm,mrs", + "monseigneur,mgr", + "monsieur,mr", + "père,pere", + "professeur,prof,pr", + "professeure,profe,pre", + "professeures,profes,pres", + "professeurs,profs,prs", + "révérend,rév,reverend,rev", + "saint,st", + "sainte,ste", + "sergente,sgt", + "veuve,vve" ] }, - "custom_name": { + "synonyms/place_names": { "type": "synonym", "synonyms": [ - "brothers,bros", + "abteilung,abt", + "altstoffsammelzentrum,asz", + "bücherei,bucherei,buecherei", + "bundesgymnasium,bg", + "bundesrealgymnasium,brg", + "büro,buro,buero", + "bustenhalter,bh", + "café,cafe", + "denkmal,dkm", + "deutsch,dt", + "ehemalige,ehem", + "fabrik,fb", + "fachhochschule,fh", + "fähranlegestelle,fahranlegestelle,faehranlegestelle", + "gebruder,gebr", + "gasthaus,gh", + "gaststätte,gaststatte,gaststaette", + "gasthof,ghf", + "gefängnis,gefangnis,gefaengnis", + "geschäft,geschaft,geschaeft", + "haltestelle,hst", + "handelsakademie,hak", + "handelsschule,hasch", + "hauptbahnhof,hbf", + "hinter,hint,ht", + "höhle,hohle", + "hörsäle,horsale,hoersaele", + "hütte,hutte,huette,htt", + "internationale,int", + "jagdhutte,jhtt", + "jungenherberge,jh", + "kapelle,kap,kpl", + "kläranlage,ka,klaranlage,klaeranlage", + "kleine,kl", + "kleiner,kl", + "kleines,kl", + "kogel,kg", + "konzentrationslager,kz,kl", + "magistratsabteilung,ma", + "markt,mkt", + "marktplatz,marktpl", + "nationalpark,np", + "naturschutzgebiet,nsg", + "niedere,nd", + "resevoir,res", + "rhein,rh", + "schloss,schl", + "schutzhütte,schutzhutte,schutzhuette", + "spitze,sp", + "sportplatz,sportpl", + "steinbruch,stb", + "supermarkt,supermkt", + "truppenubüngsplatz,tüpl,tupl,truppenubungsplatz,truppenubuengsplatz", + "universität,uni,universitat,universitaet", + "universitätsbibliothek,universitatsbibliothek,universitaetsbibliothek", + "volksschule,vs", + "vordere,vd,vord", + "warenhäuser,warenhauser,warenhaeuser", + "wasserfall,wsf,wssf", + "wiese,ws", + "wirtshaus,wh", + "abbey,abby", + "airport,aprt", + "amphitheatre,amphitheater", + "annex,anex,annx,anx", + "apartments,apts", + "barbecue,barbeque,bbq", + "barracks,barrack,baracks", + "beach,bch", + "boardwalk,bwk,bwlk", + "cafe,café", "cape,cpe,cp", + "castle,cst", + "church,chr", + "circus,crcs,crc", "city,cty", - "creek,cr,crk", + "club,clb", + "community,cmnty,cmty", + "cottage,cott,cottg", + "cottages,cotts,cottgs", "county,co,cty", + "creek,cr,crk", + "dam,dm", + "deli,delicatessen", + "department,dept", + "detention,det,detn", + "distributor,dstr,distrib,dstrb", + "district,dist", + "dormitories,dorms", + "dormitory,dorm", "downs,downes,dwns", + "factory,fcty,fty,fy", + "farm,frm", "flats,flts", - "forest,frst,fst", - "fort,ft", "fords,frds", - "fork,frk", - "forks,frks", + "forest,frst,fst", "forge,frg", "forges,frgs", + "fork,frk", + "forks,frks", + "fort,ft", "glens,glns", "great,grt,gt", "greater,grtr,gtr", + "green,grn,gn", "greens,grns", "groves,grvs", + "gym,gymnasium", "heights,hghts,hgts,hieghts,ht,hts,hgths", + "home,hm,hme,hom", + "hospital,hos,hosp,hospice,hosptl,hsp,hsptl", + "hostel,host,hostl,hstel,hstl", + "hotel,hot,hotl,htel", + "houses,hses", + "institute,inst", "international,intl", + "jetty,jtty,jty", "lake,lk", "lakes,lks", "little,ltl,lttl,littl,litl", "lock,lck", "locks,lcks", + "lodge,ldge,lodg,ldg", "lower,low,lwr,lr", + "mall,mll", + "manor,mnr", + "manors,mnrs", + "market,mkt,mrkt", + "marketplace,mktpl,mktplc", "medical,med", "memorial,mem", "middle,mid,midl", "military,mil", + "mission,msn", + "monastery,monastry", + "motel,mot,motl,mtel", "mount,mt,mnt", "mountain,mtn", "mountains,mtns", "municipal,mun,mpal", + "museum,mus", "national,natl", "neck,nck", + "office,ofc", + "offices,ofcs", "orchard,orch", "paradise,pde,pdse", - "port,pt,prt", "park,pk,prk", - "rear of,r / o,r o", + "pharmacy,pharm", + "po,postoffice", + "port,pt,prt", + "precinct,pct", + "reservation,res,resrv,resv,rsrv,rserv,rs", + "reserve,res,resrv,resv,rsrv,rserv,rserve,rsrve", + "reservoir,res", + "retreat,rtt", "river,riv,rvr,rivr", + "rotary,rty", + "sanctuary,sanct", + "service,svc", + "services,svcs,svc", + "shop,shp", "slope,slpe,slp", "springs,spgs,sprngs", + "station,sta,stn", + "store,stor", "stream,strm,stm", + "terminal,term", + "tower,twr", + "towers,twrs", "triangle,tri", + "university,uni,univ,univers,unvrsty", "upper,up,upr,uppr", - "village,vlg,vlge,vilg,vilge", - "ville,vl", + "villa,vll,vla", + "village,vil,vge,vill,villag,villg,vlg,vlge,vllg,vilg,vilge", "villages,vlgs", + "villas,vlls,vlas", + "ville,vl", "wood,wd", "woods,wds", - "baston,bast", - "bourg,brg", - "charmille,chi", - "colline,coli", - "collines,colis", - "enceinte,en", - "fleuve,fl", - "grand,gd,gr,g", - "mont,mt,mnt", - "petite,p,pt", - "porche,pch", - "rivière,riviere,riv", - "village,vge", - "villages,vges", - "deutsch,dt", - "ehemalige,ehem", - "gebruder,gebr", - "haltestelle,hst", - "hinter,hint,ht", - "internationale,int", - "kleine,kl", - "kleiner,kl", - "kleines,kl", - "kogel,kg", - "niedere,nd", - "rhein,rh", - "see,s", - "spitze,sp", - "vordere,vd,vord", - "wiese,ws", "abril,abr,abl", "agosto,ag,agto,agt", "altura,alt", @@ -307,8 +700,8 @@ "corral,crral", "corralillo,crrlo", "diseminado,disem", - "enero,en,eno,ene,en o", - "diciembre,dic,dicbre,dice,dbre,10bre,10 bre,xbre,x bre", + "enero,en,eno,ene", + "diciembre,dic,dicbre,dice,dbre,10bre,xbre", "febrero,febo,febro,febr,feb", "gobierno,gob,gobno", "grande,gr", @@ -330,8 +723,8 @@ "monte,mt,mte,mnte", "montes,mts,mtes,mntes,mnts", "nacional,nal,nacl", - "noviembre,nbre,nvre,nove,novre,novbre,9bre,9 bre", - "octubre,oct,octbre,octe,8bre,8 bre", + "noviembre,nbre,nvre,nove,novre,novbre,9bre", + "octubre,oct,octbre,octe,8bre", "portillo,ptilo,ptllo", "prado,prdo", "primeros,pros", @@ -343,162 +736,908 @@ "revolucion,rev", "ribera,ribr", "río,rio", - "septiembre,setbre,sepe,sepbre,7bre,7 re,7re,7 bre,sep,set", + "septiembre,setbre,sepe,sepbre,7bre,7re,sep,set", "sierra,srra", "valle,vlle", "volcan,vlcn", - "voluntarios,voluntos" - ] - }, - "custom_street": { - "type": "synonym", - "synonyms": [ - "" + "voluntarios,voluntos", + "abbaye,abe", + "auto-école,autoécole,autoecole", + "aéroport,aeroport", + "bastide,bstd", + "baston,bast", + "bibliothèque,bibliotheque", + "bourg,brg", + "béguinage,beguinage,begi", + "béguinages,beguinages,begis", + "café,cafe", + "camping,cpg", + "castel,cst", + "chapelle,chp", + "charmille,chi", + "château,chateau", + "cimetière,cimetiere", + "cinéma,cinema", + "colline,coli", + "collines,colis", + "cottage,cott", + "cottages,cott,cotts", + "crématorium,crematorium", + "darse,dars", + "département,dept,departement", + "enceinte,en", + "escaliers,escs", + "ferme,frm", + "fermes,frms", + "fleuve,fl", + "fontaine,fon", + "fort,ft", + "forum,form", + "grand,gd,gr", + "halle,hle", + "halles,hles,hls", + "hippodrome,hip", + "hôpital,hopital", + "hôtel,hotel", + "jardin,jard,jrd", + "jardins,jards,jrds", + "manoir,man", + "marché,marche,mar", + "marchés,marches,mars", + "mont,mt,mnt", + "mont,mt,mnt,montagne", + "moulin,mln", + "moulins,mlns", + "musée,musee,mus", + "médecin,medecin", + "mémorial,memorial", + "palais,pal", + "parc,prc", + "parking,pkg", + "pavillon,pav", + "pavillons,pavs", + "petite,pt", + "porche,pch", + "poterne,pot,potrn", + "préscolaire,prescolaire", + "péristyle,peristyle,psty", + "rivière,riviere,riv", + "résidence,rés,residence,res", + "résidences,residences", + "stade,stde", + "station,sta", + "supermarché,supermarche", + "théâtre,theatre", + "université,universite,univ,uni", + "villa,vla", + "village,vge", + "villages,vges", + "villas,vlas", + "vétérinaire,veterinaire", + "école,ecole", + "église,eglise,egl,égl", + "étang,etang" ] }, - "directionals": { + "synonyms/punctuation": { "type": "synonym", "synonyms": [ - "southwest,sw", - "southeast,se", - "northwest,nw", - "northeast,ne", - "north,n", - "south,s", - "east,e", - "west,w" + "&,and", + "&,und" ] }, - "street_suffix": { + "synonyms/streets": { "type": "synonym", "synonyms": [ - "alley,aly", + "allee,al", + "bahnhof,bhf,bf", + "boulevard,bd", + "brücke,br,brucke,bruecke", + "bühl,buhl,buehl", + "chaussee,ch", + "forsthaus,fh", + "graben,gr", + "großser,grosser", + "große,grosse,gr", + "großes,grosses", + "obere,ob", + "oberer,ob", + "platz,pl", + "quelle,qu", + "rundwanderweg,rww", + "siedlung,sdlg", + "stiege,stg", + "straße,str,strasse", + "wiese,ws", + "abbey,abby", + "access,accs,acc", + "acres,acrs", + "alley,aly,ally,alee,al", + "alleyway,alwy,allyway,allwy", + "amble,ambl", + "anchorage,ancg", "annex,anx", - "avenue,ave,av", - "bayou,byu", - "beach,bch", + "apartments,apts", + "approach,app,apch,appr", + "arcade,arc", + "arterial,artl", + "artery,art,arty", + "avenue,av,ave,aven,avenu,avn,avnu,avnue", + "avenues,avs,aves,avens,avenus,avns,avnus,avnues", + "autoroute,aut", + "back,bk", + "bank,bnk", + "basin,basn,bsn", + "bay,by", + "bayou,byu,bayoo", + "beach,baech,bch,beech", + "belt,blt", "bend,bnd", - "bluff,blf", - "bluffs,blfs", - "bottom,btm", - "boulevard,blvd", - "branch,br", - "bridge,brg", + "block,blk,blck", + "bluff,blf,bluf,bluffs,blfs", + "boardwalk,bwk,bwlk", + "boulevard,blvd,bd,bde,blv,bl,blvde,blvrd,boulavard,boul,boulv,bvd,boulevarde", + "bottom,bot,bottm,btm,bttm", + "bottoms,bttms,btms,bottms", + "boundary,bdy", + "bowl,bl", + "brace,br,brce", + "branch,br,brnch,brch", + "brae,br", + "break,brk", + "bridge,bdge,br,brdg,bri,brg", + "broadway,bdwy,bway,bwy,brdway", "brook,brk", - "bypass,byp", - "canyon,cyn", - "cape,cp", - "causeway,cswy", - "center,ctr", - "channel,chnnl", - "circle,cir", + "brooks,brks", + "brow,brw", + "burg,bg", + "burgs,brgs", + "burrow,burw", + "butte,btte,bte", + "bypass,bypa,byps,bps,byp", + "byway,bywy", + "camp,cp", + "cape,cpe,cp", + "canyon,cyn,cnyn", + "caravan,cvan,cvn", + "causeway,csway,cswy,causewy,caus,cause,cway", + "center,centre,cetr,cntr,ctr,cen", + "centers,ctrs", + "centreway,cnwy", + "chase,ch,chas", + "circle,cir,circel", + "circles,cirs", + "circlet,clt", + "circuit,crct,circ,cct,cirt,ci,circt", + "circus,crcs,crc", + "claim,clm", "cliff,clf", - "close,cl", - "club,clb", - "common,cmn", - "commons,cmns", - "connector,con", - "corridor,cor", + "cliffs,clfs", + "close,cl,cls,clse", + "cluster,clr,clstr", + "colonnade,clde,clnde", + "common,cmmn,comm,cmn,com,cm", + "commons,cmmns,cmns,comms", + "concord,cncd,cncrd", + "concession,conc", + "concourse,con,concs,concse,cnc", + "connection,cntn,cxn", + "connector,conr,cnctr,cntr", + "copse,cps", + "corner,cnr,crn,cor", + "corners,cnrs,crns,cors", + "corseo,cseo", + "corso,cso", + "ch,chw,cohw,ctyhw,chgwy,cohgwy,ctyhgwy,chway,cohway,ctyhway,chwy,cohwy,ctyhwy,chi,cohi,ctyhi", + "cr,cor,crd,cord,ctyr,ctyrd", + "cr,cor,crt,cort,ctyr,ctyrt,crte,corte,ctyrte", "course,crse", - "court,ct", - "cove,cv", - "creek,crk", - "crescent,cres", - "crest,crst", - "crossing,xing", - "crossroad,xrd", + "court,ct,crt", + "courts,crts,cts", + "courtyard,cyd,ctyd", + "cove,cov,ce,cv", + "creek,cr,crk", + "crescent,cr,cres,crs,crecent", + "crest,crst,cst", + "crief,crf", + "croft,cft", + "cross,cs,crss", + "crossing,crsg,xing,csg,x-ing", + "crossroad,crd,xroad,x-road,xrd,x-rd", "crossroads,xrds", - "curve,curv", - "dale,dl", - "dam,dm", - "drive,dr", - "esplanade,esp", - "expressway,expy", - "extended,ext", + "crossway,cowy,crwy,xway,xwy,x-way", + "cruiseway,cuwy,crwy", + "cul-de-sac,culdesac,cds,cusac,csac", + "curve,cve,crv,crve,curv", + "cutting,cttg,ctg,cutt", + "dale,dle", + "deviation,devn", + "distributor,dstr", + "divide,div", + "diversion,divers", + "down,dn", + "downs,dns,dwns", + "drive,dr,drv,dv,dve", + "driveway,drwy,dvwy,dwy,dway,drvwy", + "drove,drov", + "easement,esmt", + "edge,edg", + "elbow,elb", + "entrance,ent,entr", + "esplanade,esp,espl", + "estate,est", + "estates,ests", + "expressway,exp,expwy,expway,expy,exwy", + "extension,ex,ext,extn,exten", + "extensions,exts", + "fairway,fawy,fy", + "fall,fl", "falls,fls", - "ferry,fry", - "field,fld", - "fields,flds", - "flat,flt", + "farm,frm", + "farms,frms", + "ferry,fry,fy", + "field,fld,fd", + "fields,flds,fds", + "fireline,fline,flne", + "firetrack,ftrk", + "firetrail,fit,fitr", + "flat,fl,flt", "flats,flts", + "follow,folw", + "footway,ftwy", "ford,frd", - "forest,frst", - "forge,frg", - "fork,frk", - "forks,frks", - "freeway,fwy", - "garden,gdn", - "gardens,gdns", - "gateway,gtwy", + "foreshore,fshr", + "formation,form,fmtn", + "freeway,frwy,fw,fwy,fway", + "front,frnt", + "frontage,frtg,fr", + "gap,gp", + "garden,gdn,grd,grdn", + "gardens,gdns,grds,grdns", + "gate,ga,gte", + "gates,gtes", + "gateway,gwy,gway,gtwy,gtway", + "glade,gl,gld,glde", "glen,gln", - "glenn,gln", - "green,grn", - "grove,grv", - "harbor,hbr", - "haven,hvn", - "heights,hts", - "highway,hwy", + "gbd,grbd,grdbd,gdbd", + "grange,gra", + "green,grn,gn,gren", + "greenway,grwy", + "ground,grnd", + "grounds,grnds", + "grove,gr,grv,grve,gro", + "gulch,glch", + "gully,gly", + "hanger,hngr", + "harbor,harbour,hbr,hrbr", + "harbors,hbrs", + "haven,hvn,havn", + "head,hd", + "heads,hds", + "heath,hth,heth", + "heights,hghts,hgts,ht,hts,hgths", + "highlands,hghlds,hlds,hglds", + "highroad,hrd,hird", + "highway,hgwy,hw,hway,hwy,hi,hwye,hywy", "hill,hl", - "hills,hls", - "hollow,holw", - "isle,is", - "junction,jct", + "hills,hls,hils", + "hollow,hllw,holw", + "impasse,imp", + "inlet,inlt", + "interchange,intg,intchg", + "intersection,intn,intsctn", + "interstate,ih", + "island,is,id,isl,isld", + "islands,iss,ids,islds", + "junction,jct,jnc,jnct,jctn,jtn,junct", + "junctions,jcts", "key,ky", "keys,kys", - "knoll,knl", + "knoll,knol,knl", "knolls,knls", - "landing,lndg", - "lane,ln", - "light,lgt", - "lights,lgts", - "lock,lck", - "locks,lcks", + "ladder,ladr", + "lagoon,lagn,lgn,lagon", + "landing,ldg,lndg,landng", + "lane,ln,la", + "laneway,lnwy", + "light,lgt,lt", + "limits,lmts", + "line,ln", + "link,lnk,lk", + "little,ltl,lttl,littl,litl,lit,lt", + "loaf,lf", + "lookout,lkt", + "loop,lp", + "loops,lps", + "lot,lt", + "lynne,lynn", + "mall,ml", "manor,mnr", "meadow,mdw", - "meadows,mdws", + "meadows,mdws,mead", + "mead,md", + "meander,mndr,mdr,mr", + "mew,mw", + "mews,mws", + "mile,mi", "mill,ml", "mills,mls", - "mountain,mnt", - "motorway,mtwy", - "neck,nck", - "orchard,orch", - "parkway,pkwy", - "pasage,psge", - "pier,pr", - "pine,pne", - "pines,pnes", - "place,pl", - "plaza,plz", + "motorway,mway,mwy,mtwy", + "mount,mt", + "neaves,nvs", + "nook,nk", + "number,nbr,num,no,nmbr,nr", + "outlet,otlt", + "outlook,out,otlk", + "overbridge,ovrb", + "overlook,ovlk", + "overpass,opas", + "paddock,padk", + "palms,plms", + "parade,pde,prd,prde,pard", + "park,pk,prk", + "parklands,pkld,pklds,parkland", + "parkway,pkwy,parkwy,pky,pkway,prkwy,prkway,pkw,pwy,prkw", + "parkways,pkwys", + "part,prt", + "pass,ps", + "passage,psge,pass,pasg", + "path,pth", + "pathway,phwy,pway,pthway,pthwy,ptway,ptwy", + "peninsula,psla", + "piazza,piaz,pzza", + "pike,pk,pke", + "pine,pne,pn", + "pines,pns,pnes", + "place,pl,pla,plc,plac", + "plain,pln,pl", + "plains,plns,pls", + "plateau,plat,plt", + "plaza,plz,plza,pz", + "prarie,pr", + "pocket,pkt,pokt,pckt", + "point,piont,pnt,pt", + "pointe,pte,pnte", + "port,prt", + "ports,prts", + "prairie,pr", + "priors,prrs", + "private,pvt", + "promenade,prom,prm", + "pursuit,pur", + "quad,qd", + "quadrangle,qdgl", + "quadrant,qdrt,qd", + "quay,quy,qy", + "quays,quys,qys", + "radial,radl", + "ramble,ra,rmbl", + "ramp,rmp", + "ranae,ran", "ranch,rnch", - "ridge,rdg", + "rapid,rpd", + "rapids,rpds", + "range,rng,rnge,rang", + "reach,rch", + "reserve,res,resrv,resv,rsrv,rserv,rserve,rsrve", + "rest,rst", + "retreat,rt,rtt", + "return,rtn", + "ridge,rdge,rdg", "ridges,rdgs", - "river,riv", - "road,rd", - "route,rte", - "shore,shr", - "shores,shrs", + "ridgeway,rgwy,rdgwy", + "rowy,rightofway,rofw,row", + "rise,ri", + "riverway,rvwy", + "riviera,rvra", + "road,rd,ro,roa", + "roads,rds", + "roadside,rdsd", + "roadway,rdwy,rdw,rdy", + "rocks,rks", + "ronde,rnde", + "rosebowl,rsbl", + "rotary,rty", + "round,rnd", + "route,rt,rte", + "row,rw", + "run,rn", + "serviceway,swy,svwy,svcwy", + "shoal,shl", + "shoals,shls", + "shore,shor,shr", + "shores,shors,shrs", + "shunt,shun,shnt", + "siding,sdng,sdg", "skyway,skwy", - "spring,spg", - "springs,spgs", - "square,sq", - "street,st", - "suite,ste", - "terrace,terr,tce", - "trail,trl,tr", + "slope,slpe,slp", + "sound,snd", + "space,spc", + "spring,spg,sprng,sprn", + "springs,spgs,sprngs,spns", + "spur,spr", + "square,sq,sqr", + "squares,sqs", + "stairs,strs", + "stairway,stwy,strwy,strway", + "shighway,sthighway,sh,sth,shw,sthw,shwy,shgwy,sthgwy,shway,sthway,sthwy,shi,sthi,statehighway", + "sr,stateroad,sroad,stroad,staterd,srd,strd", + "sr,stateroute,sroute,stroute,statert,srt,srte,strt,strte", + "steps,stps", + "strand,stra,strnd,strd", + "strands,strnds,strds", + "stravenue,stra,strav", + "street,st,str,stre,stree,strt", + "streets,sts", + "strip,strp", + "subdivision,subdiv", + "subway,sbwy", + "summit,smt,sumt", + "tarn,tn", + "terrace,tce,ter,tr,terr,terace,terrac,terrasse,tsse", + "thicket,thick", + "thoroughfare,thor,throughfare,thfr", + "thoroughway,thwy", + "throughway,thru,thro,thruway,trwy,thwy", + "tollway,tlwy,twy", + "th,twph,tshph,thw,twphw,tshphw,thgwy,twphgwy,tshphgwy,thway,twphway,tshphway,thwy,twphwy,tshphwy,thi,twphi,tshphi", + "tr,trd,twpr,twprd,tshpr,tshprd", + "tr,trt,trte,twpr,twprt,twprte,tshpr,tshprt,tshprte", + "tower,twr", + "towers,twrs", + "townline,tline", + "trace,trce,trc", + "track,tr,trk,trak", "trafficway,trfy", - "tunnel,tunl", - "turnpike,tpke", - "valley,vly", - "vista,vis", - "village,vlg", + "trail,tr,trl", + "trailer,trlr", + "tramway,tmwy", + "trees,trs", + "triangle,tri", + "trunkway,tkwy", + "tunnel,tun,tunl", + "turnabout,trnabt", + "turn,tn,trn", + "turnpike,tpk,tpke", + "underpass,upas,upass,ups", + "union,un", + "unions,uns", + "vale,va,vl", + "valley,vlly,vly,vy", + "valleys,vlys,vllys", + "viaduct,via,viad,vdct,viadct", + "view,vw", + "views,vws", + "villa,vla", + "village,vlge", + "villas,vlas", + "vista,vst,vsta,vis", + "walk,wlk,wk", + "walkway,wkwy,wky,wlkwy", + "waters,wtrs", "way,wy", - "straße => strasse,str", - "strasse,str", - "brücke => bruecke,brucke,br", - "bruecke,brucke,br", - "bahnhof,bhf,bf", - "chaussee,ch", - "platz,pl" + "ways,wys", + "well,wl", + "wells,wls", + "wharf,whrf,whf", + "wynd,wyn", + "yard,yd,yrd", + "acceso,acces", + "alameda,alam", + "alquería,alqueria,alque", + "andador,andad", + "angosta,angta", + "apeadero,apdro", + "autopista,auto,autop,aut,ap", + "autovía,autovia,autov", + "avenida,av,avd,avda", + "bajada,bjada", + "banda,bda", + "barranco,branc", + "barranquillo,bqllo", + "barriada,barda", + "boulevard,blvd,bvd", + "brazal,brzal", + "bulevar,bulev,blev,blv,bv,bl", + "calle,cl,cll,ca,call", + "calleja,cllja", + "callejón,callejon,callej,cjón,cjon,cllon,cllón,cj", + "callejuela,cjla", + "callizo,cllzo", + "calzada,czada,calz", + "camino,cno,cmo,cmno,cm", + "caminito,cmt", + "camping,campg", + "cantera,cantr", + "cantón,canton,cant", + "carrera,cra,carra,carr,cr,kra,kr", + "carretera,ctra,cr,ct", + "carreterín,carreterin,ctrin", + "carretil,crtil", + "carril,crril", + "cerrada,cda,cer", + "cinturón,cinturon,cint", + "circular,cq", + "circuito,cto", + "circunvalar,cv,crv,cirv", + "circunvalación,circunvalacion,ccvcn", + "corredor,crrdo", + "costanilla,cstan", + "cuesta,custa", + "diagonal,diag,dg", + "diseminado,disem", + "espalda,eslda", + "estrada,estda", + "explanada,expla", + "extensión,ext,extension", + "extramuros,extrm", + "galería,galeria,gale", + "glorieta,gta", + "hacienda,hda", + "ladera,ldera", + "laderas,lderas", + "llanura,llnra", + "malecón,malecon,malec", + "mirador,mrdor", + "muelle,meull", + "pantano,pant", + "paraje,praje", + "parque,pque,parq,pq,pqe", + "particular,parti", + "partida,ptda", + "pasadizo,pzo", + "pasaje,psaje,psj", + "paseo,pº,p°,po,pso,pseo,pas,ps", + "pasillo,psllo", + "peatonal,peat", + "periferico,perif", + "plaza,pl,plza,pza,pz", + "plazoleta,pzta,plzta,plta", + "plazuela,plzla", + "poblado,pbdo", + "prolongación,prolongacion,prol", + "puebla,pbla", + "pueblo,pblo", + "puente,pnte", + "rambla,rbla", + "rampla,rampa,rpla", + "retorno,ret,rt", + "rincón,rincon,rcon,rin,rncn,rncon", + "rinconada,rcda,rcnda", + "ronda,rda", + "rotonda,rtda", + "ruta,rta", + "sector,sect", + "sendera,sedra", + "sendero,send,sedro", + "subida,sbida", + "tránsito,transito,trans", + "transversal,trval,trvsal,tv,tr", + "trasera,tras", + "travesía,travesia,trva,trvsía,trvsia", + "vereda,vreda,ver", + "viaducto,vcto,vd", + "vista,vst,vsta,vist", + "allée,allee,all", + "allées,alls,allees", + "arcade,arc", + "autoroute,aut", + "avenue,av,ave,aven,avenu,avn,avnu,avnue", + "avenues,avs,aves,avens,avenus,avns,avnus,avnues", + "barriêre,barriere,bre", + "barriêres,barrieres,bres", + "berge,ber", + "berges,bers", + "boucle,bcle", + "boulevard,bd,bde,blv,blvd,blvde,blvrd,boulavard,boul,boulv,bvd,boulevarde,bld", + "butte,but", + "côte,cote", + "côteau,coteau", + "campagne,cgne", + "carreau,cau,carru", + "carrefour,carf,carref", + "carrière,carriere,care", + "carrières,carrieres,cares", + "carré,carre,carr,car", + "cavée,cavee,cav", + "cercle,cercl", + "chalet,chl", + "chaussée,chaussee,chs,chee", + "chaussées,chaussees,chss,chees", + "chemin,ch,che", + "cheminement,chem", + "chemins,ches", + "château,chateau,cht", + "cloître,cloitre,cloi", + "contour,ctr", + "corniche,cor", + "corniches,cors", + "cours,crs", + "degré,degre,deg", + "degrés,degres,degs", + "descente,dsg", + "descentes,dsgs", + "digue,dig", + "digues,digs", + "échangeur,éch", + "écluse,ecluse,ecl,écl", + "écluses,ecluses,ecls,écls", + "enclave,env", + "enclos,enc", + "espace,espa", + "esplanade,esp", + "esplanades,esps", + "fosse,fos", + "fosses,fos,foss", + "foyer,foyr", + "galerie,gal", + "galeries,gals", + "garenne,garn", + "gbd,grbd,grdbd,gdbd", + "gch,grch,gdch,grdch", + "gden,gdens", + "grandrue,gr,grdr,gdr", + "gdsen,gdsens", + "grille,gri", + "grimpette,grim", + "hameau,ham", + "hchs,hschs", + "impasse,imp", + "impasses,imps", + "jetée,jetee,jte", + "jetées,jetees,jtes", + "levée,levee,leve,lve", + "montée,montee,mte", + "montées,montees,mtes", + "métro,metro,mét,met", + "parc,prc", + "parcs,prcs", + "parvis,prv", + "passage,pas,psg", + "passe,pass", + "passerelle,ple", + "passerelles,ples", + "patio,pat", + "périphérique,peripherique,peri,péri", + "place,pl", + "placis,plci", + "plage,plag", + "plages,plags", + "plaine,pln", + "plateau,plt,plat", + "plateaux,pltx,platx", + "pointe,pte,pnte", + "portique,porq,portq", + "portiques,porqs,portqs", + "pourtour,pour", + "presquîle,presquile,prq,prql", + "promenade,prom", + "peripherique,peri", + "quai,au", + "raccourci,rac,racc", + "raidillon,raid", + "rampe,rpe,rmpe,rmp", + "rempart,rem,remp", + "rocade,rocd", + "ronde,rnde", + "rdpt,rpt", + "roquet,roqt", + "rotonde,rtd,rtnd,rtde,rtnde", + "route,rt,rte", + "routes,rts,rtes", + "ruelle,rle", + "ruelles,rles", + "rues,rs", + "residence,res", + "residences,ress", + "sente,sen", + "sentes,sens", + "sentier,sent", + "sentiers,sents", + "square,sq", + "terrain,terr,trn", + "terrasse,tsse", + "terrasses,tsses", + "terte,trt", + "tertes,trts", + "traverse,tra,trvs,trvrs", + "vallon,val", + "valée,vallee,val", + "venelle,ven", + "venelles,vens", + "voie,voi", + "voies,voiss", + "aly,alley,allee,ally", + "anx,anex,annex,annx", + "arc,arcade", + "ave,avenue,av,aven,avenu,avn,avnue", + "byu,bayou,bayoo", + "bch,beach", + "bnd,bend", + "blf,bluff,bluf", + "blfs,bluffs", + "btm,bottom,bot,bottm", + "blvd,boulevard,boul,boulv", + "br,branch,brnch", + "brg,bridge,brdge", + "brk,brook", + "brks,brooks", + "bg,burg", + "bgs,burgs", + "byp,bypass,bypa,bypas,byps", + "cp,camp,cmp", + "cyn,canyon,canyn,cnyn", + "cpe,cape", + "cswy,causeway,causwa", + "ctr,center,cen,cent,centr,centre,cnter,cntr", + "ctrs,centers", + "cir,circle,circ,circl,crcl,crcle", + "cirs,circles", + "clf,cliff", + "clfs,cliffs", + "clb,club", + "cmn,common", + "cmns,commons", + "cor,corner", + "cors,corners", + "crse,course", + "ct,court", + "cts,courts", + "cv,cove", + "cvs,coves", + "crk,creek", + "cres,crescent,crsent,crsnt", + "crst,crest", + "xing,crossing,crssng", + "xrd,crossroad", + "xrds,crossroads", + "curv,curve", + "dl,dale", + "dm,dam", + "dv,divide,div,dvd", + "dr,drive,driv,drv", + "drs,drives", + "est,estate", + "ests,estates", + "expy,expressway,exp,expr,express,expw", + "ext,extension,extn,extnsn", + "exts,extensions", + "fls,falls", + "fry,ferry,frry", + "fld,field", + "flds,fields", + "flt,flat", + "flts,flats", + "frd,ford", + "frds,fords", + "frst,forest,forests", + "frg,forge,forg", + "frgs,forges", + "frk,fork", + "frks,forks", + "ft,fort,frt", + "fwy,freeway,freewy,frway,frwy", + "gdn,garden,gardn,grden,grdn", + "gdns,gardens,grdns", + "gtwy,gateway,gatewy,gatway,gtway", + "gln,glen", + "glns,glens", + "grn,green", + "grns,greens", + "grv,grove,grov", + "grvs,groves", + "hbr,harbor,harb,harbr,hrbor", + "hbrs,harbors", + "hvn,haven", + "hts,heights,ht", + "hwy,highway,highwy,hiway,hiwy,hway", + "hl,hill", + "hls,hills", + "holw,hollow,hllw,hollows,holws", + "inlt,inlet", + "is,island,islnd", + "iss,islands,islnds", + "isle,isles", + "jct,junction,jction,jctn,junctn,juncton", + "jcts,junctions,jctns", + "ky,key", + "kys,keys", + "knl,knoll,knol", + "knls,knolls", + "lk,lake", + "lks,lakes", + "lndg,landing,lndng", + "ln,lane", + "lgt,light", + "lgts,lights", + "lf,loaf", + "lck,lock", + "lcks,locks", + "ldg,lodge,ldge,lodg", + "loop,loops", + "mnr,manor", + "mnrs,manors", + "mdw,meadow", + "mdws,meadows,mdw,medows", + "ml,mill", + "mls,mills", + "msn,mission,missn,mssn", + "mtwy,motorway", + "mt,mount,mnt", + "mtn,mountain,mntain,mntn,mountin,mtin", + "mtns,mountains,mntns", + "nck,neck", + "orch,orchard,orchrd", + "oval,ovl", + "opas,overpass", + "park,parks", + "pkwy,parkway,parkwy,pkway,pky,parkways,pkwys", + "psge,passage", + "path,paths", + "pike,pikes", + "pne,pine", + "pnes,pines", + "pl,place", + "pln,plain", + "plns,plains", + "plz,plaza,plza", + "pt,point", + "pts,points", + "prt,port", + "prts,ports", + "pr,prairie,prr", + "radl,radial,rad,radiel", + "rnch,ranch,ranches,rnchs", + "rpd,rapid", + "rpds,rapids", + "rst,rest", + "rdg,ridge,rdge", + "rdgs,ridges", + "riv,river,rvr,rivr", + "rd,road", + "rds,roads", + "rte,route", + "shl,shoal", + "shls,shoals", + "shr,shore,shoar", + "shrs,shores,shoars", + "skwy,skyway", + "spg,spring,spng,sprng", + "spgs,springs,spngs,sprngs", + "spur,spurs", + "sq,square,sqr,sqre,squ", + "sqs,squares,sqrs", + "sta,station,statn,stn", + "stra,stravenue,strav,straven,stravn,strvn,strvnue", + "strm,stream,streme", + "st,street,strt,str", + "sts,streets", + "smt,summit,sumit,sumitt", + "ter,terrace,terr", + "trwy,throughway", + "trce,trace,traces", + "trak,track,tracks,trk,trks", + "trfy,trafficway", + "trl,trail,trails,trls", + "trlr,trailer,trlrs", + "tunl,tunnel,tunel,tunls,tunnels,tunnl", + "tpke,turnpike,trnpk,turnpk", + "upas,underpass", + "un,union", + "uns,unions", + "vly,valley,vally,vlly", + "vlys,valleys", + "via,viaduct,vdct,viadct", + "vw,view", + "vws,views", + "vlg,village,vill,villag,villg,villiage", + "vlgs,villages", + "vl,ville", + "vis,vista,vist,vst,vsta", + "walk,walks", + "way,wy", + "wl,well", + "wls,wells" ] } }, diff --git a/test/settings.js b/test/settings.js index 496ce9ba..badb401c 100644 --- a/test/settings.js +++ b/test/settings.js @@ -50,6 +50,23 @@ module.exports.tests.analysis = function(test, common) { // -- analyzers -- +// this multiplexer filter provides all the synonyms used by the peliasAdmin analyzer +// note: the multiplexer ensures than we do not virally generate synonyms of synonyms. +module.exports.tests.nameSynonymsMultiplexerFilter = function (test, common) { + test('has admin_synonyms_multiplexer filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter.admin_synonyms_multiplexer, 'object', 'there is a admin_synonyms_multiplexer filter'); + var filter = s.analysis.filter.admin_synonyms_multiplexer; + t.equal(filter.type, 'multiplexer'); + t.deepEqual(filter.filters, [ + 'synonyms/custom_admin', + 'synonyms/personal_titles', + 'synonyms/place_names' + ]); + t.end(); + }); +}; + module.exports.tests.peliasAdminAnalyzer = function(test, common) { test('has pelias admin analyzer', function(t) { var s = settings(); @@ -57,9 +74,44 @@ module.exports.tests.peliasAdminAnalyzer = function(test, common) { var analyzer = s.analysis.analyzer.peliasAdmin; t.equal(analyzer.type, 'custom', 'custom analyzer'); t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified'); + t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified'); t.true(Array.isArray(analyzer.filter), 'filters specified'); t.end(); }); + test('peliasAdmin token filters', function (t) { + var analyzer = settings().analysis.analyzer.peliasAdmin; + t.deepEqual(analyzer.filter, [ + "lowercase", + "trim", + "admin_synonyms_multiplexer", + "icu_folding", + "word_delimiter", + "unique_only_same_position", + "notnull", + "flatten_graph" + ]); + t.end(); + }); +}; + +// this multiplexer filter provides all the synonyms used by the peliasPhrase and peliasIndexOneEdgeGram analyzers +// note: the multiplexer ensures than we do not virally generate synonyms of synonyms. +module.exports.tests.nameSynonymsMultiplexerFilter = function (test, common) { + test('has name_synonyms_multiplexer filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter.name_synonyms_multiplexer, 'object', 'there is a name_synonyms_multiplexer filter'); + var filter = s.analysis.filter.name_synonyms_multiplexer; + t.equal(filter.type, 'multiplexer'); + t.deepEqual(filter.filters, [ + 'synonyms/custom_name', + 'synonyms/personal_titles', + 'synonyms/place_names', + 'synonyms/streets', + 'synonyms/directionals', + 'synonyms/punctuation' + ]); + t.end(); + }); }; module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) { @@ -77,12 +129,9 @@ module.exports.tests.peliasIndexOneEdgeGramAnalyzer = function(test, common) { var analyzer = settings().analysis.analyzer.peliasIndexOneEdgeGram; t.deepEqual( analyzer.filter, [ "lowercase", - "icu_folding", "trim", - "custom_name", - "street_suffix", - "directionals", - "ampersand", + "name_synonyms_multiplexer", + "icu_folding", "remove_ordinals", "removeAllZeroNumericPrefix", "peliasOneEdgeGramFilter", @@ -108,9 +157,9 @@ module.exports.tests.peliasQueryAnalyzer = function (test, common) { test('peliasQuery token filters', function (t) { var analyzer = settings().analysis.analyzer.peliasQuery; t.deepEqual(analyzer.filter, [ - 'icu_folding', 'lowercase', 'trim', + 'icu_folding', 'remove_ordinals', 'removeAllZeroNumericPrefix', 'unique_only_same_position', @@ -137,10 +186,7 @@ module.exports.tests.peliasPhraseAnalyzer = function(test, common) { "lowercase", "trim", "remove_duplicate_spaces", - "ampersand", - "custom_name", - "street_suffix", - "directionals", + "name_synonyms_multiplexer", "icu_folding", "remove_ordinals", "unique_only_same_position", @@ -158,7 +204,7 @@ module.exports.tests.peliasZipAnalyzer = function(test, common) { var analyzer = s.analysis.analyzer.peliasZip; t.equal(analyzer.type, 'custom', 'custom analyzer'); t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified'); - t.deepEqual(analyzer.char_filter, ["alphanumeric"], 'alphanumeric filter specified'); + t.deepEqual(analyzer.char_filter, ['alphanumeric', 'nfkc_normalizer'], 'alphanumeric filter specified'); t.true(Array.isArray(analyzer.filter), 'filters specified'); t.end(); }); @@ -166,8 +212,8 @@ module.exports.tests.peliasZipAnalyzer = function(test, common) { var analyzer = settings().analysis.analyzer.peliasZip; t.deepEqual( analyzer.filter, [ "lowercase", - "icu_folding", "trim", + "icu_folding", "unique_only_same_position", "notnull" ]); @@ -182,7 +228,7 @@ module.exports.tests.peliasUnitAnalyzer = function(test, common) { var analyzer = s.analysis.analyzer.peliasUnit; t.equal(analyzer.type, 'custom', 'custom analyzer'); t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified'); - t.deepEqual(analyzer.char_filter, ["alphanumeric"], 'alphanumeric filter specified'); + t.deepEqual(analyzer.char_filter, ['alphanumeric', 'nfkc_normalizer'], 'alphanumeric filter specified'); t.true(Array.isArray(analyzer.filter), 'filters specified'); t.end(); }); @@ -190,8 +236,8 @@ module.exports.tests.peliasUnitAnalyzer = function(test, common) { var analyzer = settings().analysis.analyzer.peliasUnit; t.deepEqual( analyzer.filter, [ "lowercase", - "icu_folding", "trim", + "icu_folding", "unique_only_same_position", "notnull" ]); @@ -212,6 +258,24 @@ module.exports.tests.peliasHousenumberAnalyzer = function(test, common) { }); }; +// this multiplexer filter provides all the synonyms used by the peliasStreet analyzer +// note: the multiplexer ensures than we do not virally generate synonyms of synonyms. +module.exports.tests.streetSynonymsMultiplexerFilter = function (test, common) { + test('has street_synonyms_multiplexer filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter.street_synonyms_multiplexer, 'object', 'there is a street_synonyms_multiplexer filter'); + var filter = s.analysis.filter.street_synonyms_multiplexer; + t.equal(filter.type, 'multiplexer'); + t.deepEqual(filter.filters, [ + 'synonyms/custom_street', + 'synonyms/personal_titles', + 'synonyms/streets', + 'synonyms/directionals' + ]); + t.end(); + }); +}; + module.exports.tests.peliasStreetAnalyzer = function(test, common) { test('has peliasStreet analyzer', function(t) { var s = settings(); @@ -219,7 +283,7 @@ module.exports.tests.peliasStreetAnalyzer = function(test, common) { var analyzer = s.analysis.analyzer.peliasStreet; t.equal(analyzer.type, 'custom', 'custom analyzer'); t.equal(typeof analyzer.tokenizer, 'string', 'tokenizer specified'); - t.deepEqual(analyzer.char_filter, ["punctuation","nfkc_normalizer"], 'character filters specified'); + t.deepEqual(analyzer.char_filter, ['punctuation', 'nfkc_normalizer'], 'character filters specified'); t.true(Array.isArray(analyzer.filter), 'filters specified'); t.end(); }); @@ -229,9 +293,7 @@ module.exports.tests.peliasStreetAnalyzer = function(test, common) { "lowercase", "trim", "remove_duplicate_spaces", - "custom_street", - "street_suffix", - "directionals", + "street_synonyms_multiplexer", "icu_folding", "remove_ordinals", "trim", @@ -313,11 +375,11 @@ module.exports.tests.allCharacterFiltersPresent = function(test, common) { // note: pattern/replace should not have surrounding whitespace // we convert and->& rather than &->and to save memory/disk -module.exports.tests.ampersandFilter = function(test, common) { - test('has ampersand filter', function(t) { +module.exports.tests.punctuationFilter = function(test, common) { + test('has punctuation filter', function(t) { var s = settings(); - t.equal(typeof s.analysis.filter.ampersand, 'object', 'there is a ampersand filter'); - var filter = s.analysis.filter.ampersand; + t.equal(typeof s.analysis.filter['synonyms/punctuation'], 'object', 'there is a punctuation filter'); + var filter = s.analysis.filter['synonyms/punctuation']; t.equal(filter.type, 'synonym'); t.deepEqual(filter.synonyms, [ "&,and", @@ -366,30 +428,58 @@ module.exports.tests.removeAllZeroNumericPrefixFilter = function(test, common) { }); }; -// this filter stems common street suffixes -// eg. road=>rd and street=>st +// this filter provides synonyms for street suffixes +// eg. road=>rd module.exports.tests.streetSynonymFilter = function(test, common) { - test('has street_suffix filter', function(t) { + test('has synonyms/streets filter', function(t) { var s = settings(); - t.equal(typeof s.analysis.filter.street_suffix, 'object', 'there is an street_suffix filter'); - var filter = s.analysis.filter.street_suffix; + t.equal(typeof s.analysis.filter['synonyms/streets'], 'object', 'there is a synonyms/streets filter'); + var filter = s.analysis.filter['synonyms/streets']; t.equal(filter.type, 'synonym'); t.true(Array.isArray(filter.synonyms)); - t.equal(filter.synonyms.length, 127); + t.equal(filter.synonyms.length, 809); t.end(); }); }; // this filter stems common directional terms // eg. north=>n and south=>s -module.exports.tests.directionSynonymFilter = function(test, common) { +module.exports.tests.directionalSynonymFilter = function(test, common) { test('has directionals filter', function(t) { var s = settings(); - t.equal(typeof s.analysis.filter.directionals, 'object', 'there is an directionals filter'); - var filter = s.analysis.filter.directionals; + t.equal(typeof s.analysis.filter['synonyms/directionals'], 'object', 'there is a synonyms/directionals filter'); + var filter = s.analysis.filter['synonyms/directionals']; + t.equal(filter.type, 'synonym'); + t.true(Array.isArray(filter.synonyms)); + t.equal(filter.synonyms.length, 69); + t.end(); + }); +}; + +// this filter provides common synonyms for personal titles +// eg. doctor=>dr +module.exports.tests.personalTitleSynonymFilter = function (test, common) { + test('has personal_titles filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter['synonyms/personal_titles'], 'object', 'there is a synonyms/personal_titles filter'); + var filter = s.analysis.filter['synonyms/personal_titles']; + t.equal(filter.type, 'synonym'); + t.true(Array.isArray(filter.synonyms)); + t.equal(filter.synonyms.length, 191); + t.end(); + }); +}; + +// this filter provides common synonyms for place names +// eg. park=>pk +module.exports.tests.placeNameSynonymFilter = function (test, common) { + test('has place_names filter', function (t) { + var s = settings(); + t.equal(typeof s.analysis.filter['synonyms/place_names'], 'object', 'there is a synonyms/place_names filter'); + var filter = s.analysis.filter['synonyms/place_names']; t.equal(filter.type, 'synonym'); t.true(Array.isArray(filter.synonyms)); - t.equal(filter.synonyms.length, 8); + t.equal(filter.synonyms.length, 314); t.end(); }); };