Skip to content

Commit 4b1f352

Browse files
committed
feat(dedupe): squash empire and country
1 parent f8987a5 commit 4b1f352

File tree

2 files changed

+187
-0
lines changed

2 files changed

+187
-0
lines changed

helper/diffPlaces.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,14 @@ function isParentHierarchyDifferent(item1, item2){
9595
}
9696
}
9797

98+
// special case to consider empires and country as the same for deduplication purposes
99+
if (
100+
(item1.layer === 'empire' && item2.layer === 'country') ||
101+
(item1.layer === 'country' && item2.layer === 'empire')
102+
) {
103+
return false;
104+
}
105+
98106
// special handling of postal codes, which we consider to be strictly
99107
// unique within a single country/dependency regardless of the rest of
100108
// the hierarchy (ie. we ignore other parent properties)
@@ -351,6 +359,17 @@ function layerDependentNormalization(names, layer) {
351359
});
352360
}
353361

362+
// empire / country USA synonyms
363+
if (layer === 'empire' || layer === 'country') {
364+
_.forEach(names, (value, lang) => {
365+
copy[lang] = field.getArrayValue(value).map(name => {
366+
return name
367+
.replace(/^(united states) of america$/i, '$1')
368+
.trim();
369+
});
370+
});
371+
}
372+
354373
// county
355374
if( layer === 'county' ){
356375
_.forEach(names, (value, lang) => {

test/unit/middleware/dedupe.js

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,174 @@ module.exports.tests.priority = function(test, common) {
821821
});
822822
});
823823

824+
test('real-world test New Zealand: empire vs country', function (t) {
825+
var req = {
826+
clean: {
827+
text: 'New Zealand',
828+
size: 100
829+
}
830+
};
831+
var res = {
832+
data: [
833+
{
834+
'name': {
835+
'default': 'New Zealand'
836+
},
837+
'source': 'whosonfirst',
838+
'source_id': '136253053',
839+
'layer': 'empire',
840+
'parent': {
841+
'empire_id': 136253053
842+
},
843+
},
844+
{
845+
'name': {
846+
'default': [ 'New Zealand', 'Aotearoa' ],
847+
},
848+
'source': 'whosonfirst',
849+
'source_id': '85633345',
850+
'layer': 'country',
851+
'parent': {
852+
'continent_id': 102191583,
853+
'country_id': 85633345
854+
},
855+
}
856+
]
857+
};
858+
859+
dedupe(req, res, function () {
860+
t.equal(res.data.length, 1, 'results have fewer items than before');
861+
t.equal(res.data[0].layer, 'country', 'empire result removed');
862+
t.end();
863+
});
864+
});
865+
866+
test('real-world test New Zealand: empire vs country - inverted order', function (t) {
867+
var req = {
868+
clean: {
869+
text: 'New Zealand',
870+
size: 100
871+
}
872+
};
873+
var res = {
874+
data: [
875+
{
876+
'name': {
877+
'default': [ 'New Zealand', 'Aotearoa' ],
878+
},
879+
'source': 'whosonfirst',
880+
'source_id': '85633345',
881+
'layer': 'country',
882+
'parent': {
883+
'continent_id': 102191583,
884+
'country_id': 85633345
885+
},
886+
},
887+
{
888+
'name': {
889+
'default': 'New Zealand'
890+
},
891+
'source': 'whosonfirst',
892+
'source_id': '136253053',
893+
'layer': 'empire',
894+
'parent': {
895+
'empire_id': 136253053
896+
},
897+
}
898+
]
899+
};
900+
901+
dedupe(req, res, function () {
902+
t.equal(res.data.length, 1, 'results have fewer items than before');
903+
t.equal(res.data[0].layer, 'country', 'empire result removed');
904+
t.end();
905+
});
906+
});
907+
908+
test('real-world test United States: empire vs country', function (t) {
909+
var req = {
910+
clean: {
911+
text: 'United States',
912+
size: 100
913+
}
914+
};
915+
var res = {
916+
data: [
917+
{
918+
'name': {
919+
'default': 'United States'
920+
},
921+
'source': 'whosonfirst',
922+
'source_id': '85633793',
923+
'layer': 'country',
924+
'parent': {
925+
'empire_id': 136253057,
926+
'country_id': 85633793
927+
},
928+
},
929+
{
930+
'name': {
931+
'default': 'United States of America',
932+
},
933+
'source': 'whosonfirst',
934+
'source_id': '136253057',
935+
'layer': 'empire',
936+
'parent': {
937+
'empire_id': 136253057,
938+
},
939+
}
940+
]
941+
};
942+
943+
dedupe(req, res, function () {
944+
t.equal(res.data.length, 1, 'results have fewer items than before');
945+
t.equal(res.data[0].layer, 'country', 'empire result removed');
946+
t.end();
947+
});
948+
});
949+
950+
test('real-world test United States: empire vs country - inverted order', function (t) {
951+
var req = {
952+
clean: {
953+
text: 'United States',
954+
size: 100
955+
}
956+
};
957+
var res = {
958+
data: [
959+
{
960+
'name': {
961+
'default': 'United States of America',
962+
},
963+
'source': 'whosonfirst',
964+
'source_id': '136253057',
965+
'layer': 'empire',
966+
'parent': {
967+
'empire_id': 136253057,
968+
},
969+
},
970+
{
971+
'name': {
972+
'default': 'United States'
973+
},
974+
'source': 'whosonfirst',
975+
'source_id': '85633793',
976+
'layer': 'country',
977+
'parent': {
978+
'empire_id': 136253057,
979+
'country_id': 85633793
980+
},
981+
}
982+
]
983+
};
984+
985+
dedupe(req, res, function () {
986+
t.equal(res.data.length, 1, 'results have fewer items than before');
987+
t.equal(res.data[0].layer, 'country', 'empire result removed');
988+
t.end();
989+
});
990+
});
991+
824992
test('A->B B->C dependency graph', function (t) {
825993
var req = {
826994
clean: {

0 commit comments

Comments
 (0)