Source of tools/mkcountries-geonames
#!/usr/bin/env perl
use 5.014;
use warnings;
our $VERSION = '1.04';
my %cc;
%cc = (
an => ["transitionally reserved: Netherlands Antilles", "c-na Xr", "Netherlands Antilles", 'bq cw sx'],
bu => ["transitionally reserved: Burma", "c-as Xr", "Burma", 'mm'],
cs => ["transitionally reserved: Serbia and Montenegro", "c-eu Xr", "Serbia and Montenegro", 'rs me'],
nt => ["transitionally reserved: Neutral Zone", "c-as Xr", "N. Zone", 'iq sa'],
tp => ["transitionally reserved: East Timor", "c-oc Xr", "(East Timor)", 'tl'],
yu => ["transitionally reserved: Yugoslavia", "c-eu Xr", "Yugoslavia", 'cs ba hr mk si'],
zr => ["transitionally reserved: Zaire", "c-af Xr", "Zaire", 'cd'],
ac => ["exceptionally reserved: Ascension Island", "c-oc Xr", "Ascension Island", 'sh'],
cp => ["exceptionally reserved: Clipperton Island", "c-na Xr", "Clipperton Island"],
cq => ["exceptionally reserved by gb: Island of Sark", "c-eu Xr", "Island of Sark", 'gg'],
dg => ["exceptionally reserved: Diego Garcia", "c-as Xr", "Diego Garcia", 'io'],
ea => ["exceptionally reserved: Ceuta and Melilla", "c-af Xr", "Ceuta and Melilla"],
eu => ["exceptionally reserved: European Union", "c-eu Xr", "European Union"],
ez => ["exceptionally reserved: European OTC derivatives", "c-eu Xr", "Eurozone"],
fx => ["exceptionally reserved by fr: Metropolitan France", "c-eu Xr", "Metropolitan France", 'fr'],
ic => ["exceptionally reserved: Canary Islands", "c-af Xr", "Canary Islands"],
su => ["exceptionally reserved: former USSR", "c-eu Xr", "USSR"],
ta => ["exceptionally reserved: Tristan da Cunha", "c-oc Xr", "Tristan da Cunha", 'sh'],
uk => ["exceptionally reserved by gb: United Kingdom", "c-eu Xr", "(United Kingdom)", 'gb'],
un => ["exceptionally reserved: United Nations", "Xi Xr", "United Nations"],
dy => ["indeterminately reserved: Benin", "c-af Xr", "(Benin)", 'bj'],
ew => ["indeterminately reserved: Estonia", "c-eu Xr", "(Estonia)", 'ee'],
fl => ["indeterminately reserved: Liechtenstein", "c-eu Xr", "(Liechtenstein)", 'li'],
ja => ["indeterminately reserved: Jamaica", "c-na Xr", "(Jamaica)", 'jm'],
lf => ["indeterminately reserved: Libya Fezzan", "c-af Xr", "Fezzan", 'ly'],
pi => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'],
ra => ["indeterminately reserved: Argentina", "c-sa Xr", "(Argentina)", 'ar'],
rb => ["indeterminately reserved: Botswana, Bolivia", "c-sa Xr", "(Bots...)", 'bw bo'],
rc => ["indeterminately reserved: Republic of China", "c-as Xr", "(RoC)", 'tw'],
rh => ["indeterminately reserved: Haiti", "c-na Xr", "(Haiti)", 'ht'],
ri => ["indeterminately reserved: Indonesia", "c-as Xr", "(Indonesia)", 'id'],
rl => ["indeterminately reserved: Lebanon", "c-as Xr", "(Lebanon)", 'lb'],
rm => ["indeterminately reserved: Madagascar", "c-af Xr", "(Madagascar)", 'mg'],
rn => ["indeterminately reserved: Niger", "c-af Xr", "(Niger)", 'ne'],
rp => ["indeterminately reserved: Philippines", "c-as Xr", "(Philippines)", 'ph'],
sf => ["indeterminately reserved: Finland", "c-eu Xr", "(Finland)", 'fi'],
wg => ["indeterminately reserved: Grenada", "c-na Xr", "(Grenada)", 'gd'],
wl => ["indeterminately reserved: Saint Lucia", "c-na Xr", "(Saint Luc.)", 'lc'],
wv => ["indeterminately reserved: Saint Vincent", "c-na Xr", "(Saint Vin.)", 'vc'],
yv => ["indeterminately reserved: Venezuela", "c-sa Xr", "(Venezuela)", 've'],
ap => ["not used: African Regional Industrial Property Organization", "Xi", "ARIPO"],
bx => ["not used: Benelux Office for Intellectual Property", "Xi", "BOIP"],
ef => ["not used: European Community Patent Convention", "Xi", "CPC"],
em => ["not used: European Trademark Office", "Xi", "OHIM"],
ep => ["not used: European Patent Organization", "Xi", "EPOrg"],
ev => ["not used: Eurasian Patent Organization", "Xi", "EAPO"],
gc => ["not used: Gulf Patent Office", "Xi", "GCCPO"],
ib => ["not used: International Bureau of WIPO", "Xi", "IB"],
oa => ["not used: African Intellectual Property Organization", "Xi", "OAPI"],
wo => ["not used: World Intellectual Property Organization", "Xi", "WIPO"],
xa => ["user-assigned by Russia: Abkhazia", "c-az Co Xi", "Abkhazia", 'ge'],
xo => ["user-assigned by Russia: South Ossetia", "c-az Co Xi", "South Ossetia", 'ge'],
xi => ["user-assigned for UK/EU trade: Northern Ireland", "c-eu Co Xi", "Northern Ireland", 'gb'],
xn => ["user-assigned by WIPO: Nordic Patent Institute", "c-eu Co Xi", "Nordic", 'dk is no se'],
xu => ["user-assigned by WIPO: international org UPOV", "Co Xi", "UPOV"],
xv => ["user-assigned by WIPO: Visegrad Patent Institute", "c-eu Co Xi", "Visegrad", 'cz hu pl sk'],
xx => ["user-assigned by WIPO: unknown state or other entity", "Co Xi", "other"],
xz => ["UN/LOCODE semantics: international waters", "Co Xi", "international"],
qm => ["user-assigned by ISRC: United States alternative", "c-na Co Xi", "USA"],
qo => ["Unicode semantics: Outlying Oceania", "c-oc Co Xi", "Oceania"],
qu => ["Unicode semantics: European Union deprecated reserve", "c-eu Co Xi", "EU", 'eu'],
zz => ["Unicode semantics: unknown or invalid territory", "Co Xi","unknown"],
oo => ["escape to indicate additional code", "Co Xi", "escape"],
);
while (<>) {
/^#/ and next;
my ($iso, $name, $cont, $tld) = (split /\t/)[0, 4, 8, 9];
my @info = ($name, "c-\L$cont");
$info[3] = $tld if $tld =~ s/\A\.// and $tld ne lc $iso;
$cc{ lc $iso } //= \@info;
}
$cc{io}->[2] = "Chagos Islands";
$cc{um}->[2] = "U.S. isl.";
for (values %cc) {
my $abbr = do {
local $_ = $_->[2] // $_->[0];
s/,.*//;
s/(?<=.)\(.*\)\s*//;
s/ republic\b//gi;
s/ islands?\b//gi;
s/\bthe //gi;
s/ and / & /g and s/(?<=.)[a-z ]+//g;
s/ of / /g;
s/\bsa?int /st /gi;
s/Un\Kited /. /gi;
s/South(?:ern)? /S-/g;
s/North(?:ern)? /N-/g;
s/West(?:ern)? /W-/g;
s/East(?:ern)? /E-/g;
s/New /n./g;
s/(\w)(\w+)-/$1-/g;
s/(\S{4}[b-df-hj-np-tv-xz])((?<!Austr)(?!land)\w{2,})/$1./g;
$_;
};
$_->[2] = $abbr if $abbr ne $_->[0];
}
say "# automatically generated by $0";
use Data::Dump 'dd';
$Data::Dump::INDENT = '';
dd \%cc;
=head1 NAME
mkcountries-geonames - Create Perl include of country info from GeoNames data
=head1 SYNOPSIS
curl http://download.geonames.org/export/dump/countryInfo.txt |
tools/mkcountryinfo > countries.inc.pl