Module:CountryNormalization
Appearance
Documentation for this module may be created at Module:CountryNormalization/doc
-- Module:CountryNormalization
-- Standardizes country names based on their ISO canonical form by mapping common variations, abbreviations, and alternative spellings to a single, consistent output, ensuring uniformity in data representation.
local p = {}
-- Define the mapping table once at module scope.
local mapping = {
-- Brunei Darussalam
["brunei darussalam"] = "Brunei Darussalam",
["brunei"] = "Brunei Darussalam",
-- Cocos (Keeling) Islands
["cocos (keeling) islands"] = "Cocos (Keeling) Islands",
["cocos islands"] = "Cocos (Keeling) Islands",
["keeling islands"] = "Cocos (Keeling) Islands",
-- Congo
["congo"] = "Congo",
["republic of the congo"] = "Congo",
["congo-brazzaville"] = "Congo",
-- Congo, Democratic Republic of the
["congo, democratic republic of the"] = "Congo, Democratic Republic of the",
["democratic republic of the congo"] = "Congo, Democratic Republic of the",
["drc"] = "Congo, Democratic Republic of the",
["dr congo"] = "Congo, Democratic Republic of the",
["congo-kinshasa"] = "Congo, Democratic Republic of the",
-- Côte d'Ivoire
["côte d'ivoire"] = "Côte d'Ivoire",
["cote d'ivoire"] = "Côte d'Ivoire",
["ivory coast"] = "Côte d'Ivoire",
-- Curaçao
["curaçao"] = "Curaçao",
["curacao"] = "Curaçao",
-- Czechia
["czechia"] = "Czechia",
["czech republic"] = "Czechia",
-- Eswatini
["eswatini"] = "Eswatini",
["swaziland"] = "Eswatini",
-- The Gambia
["the gambia"] = "The Gambia",
["gambia"] = "The Gambia",
-- Iran (Islamic Republic of)
["iran (islamic republic of)"] = "Iran (Islamic Republic of)",
["iran"] = "Iran (Islamic Republic of)",
-- Lao People's Democratic Republic
["lao people's democratic republic"] = "Lao People's Democratic Republic",
["lao pdr"] = "Lao People's Democratic Republic",
["laos"] = "Lao People's Democratic Republic",
-- Macao
["macao"] = "Macao",
["macau"] = "Macao",
-- Micronesia (Federated States of)
["micronesia (federated states of)"] = "Micronesia (Federated States of)",
["federated states of micronesia"] = "Micronesia (Federated States of)",
["micronesia"] = "Micronesia (Federated States of)",
-- Myanmar
["myanmar"] = "Myanmar",
["burma"] = "Myanmar",
-- Netherlands
["netherlands"] = "Netherlands",
["holland"] = "Netherlands",
-- Russian Federation
["russian federation"] = "Russian Federation",
["russia"] = "Russian Federation",
-- Saint Barthélemy
["saint barthélemy"] = "Saint Barthélemy",
["saint barthelemy"] = "Saint Barthélemy",
["st barthelemy"] = "Saint Barthélemy",
-- Saint Kitts and Nevis
["saint kitts and nevis"] = "Saint Kitts and Nevis",
["st kitts and nevis"] = "Saint Kitts and Nevis",
-- Saint Pierre and Miquelon
["saint pierre and miquelon"] = "Saint Pierre and Miquelon",
["st pierre and miquelon"] = "Saint Pierre and Miquelon",
-- Saint Vincent and the Grenadines
["saint vincent and the grenadines"] = "Saint Vincent and the Grenadines",
["st vincent and the grenadines"] = "Saint Vincent and the Grenadines",
-- Syrian Arab Republic
["syrian arab republic"] = "Syrian Arab Republic",
["syria"] = "Syrian Arab Republic",
-- United Arab Emirates
["united arab emirates"] = "United Arab Emirates",
["uae"] = "United Arab Emirates",
["u a e"] = "United Arab Emirates",
-- United Kingdom of Great Britain and Northern Ireland
["united kingdom of great britain and northern ireland"] = "United Kingdom of Great Britain and Northern Ireland",
["united kingdom"] = "United Kingdom of Great Britain and Northern Ireland",
["uk"] = "United Kingdom of Great Britain and Northern Ireland",
["u k"] = "United Kingdom of Great Britain and Northern Ireland",
-- United States of America
["united states of america"] = "United States of America",
["united states"] = "United States of America",
["usa"] = "United States of America",
["us"] = "United States of America",
["u s a"] = "United States of America",
["u s"] = "United States of America",
-- Viet Nam
["viet nam"] = "Viet Nam",
["vietnam"] = "Viet Nam",
-- Virgin Islands (U.S.)
["virgin islands (us)"] = "Virgin Islands (U.S.)",
["virgin islands (u s)"] = "Virgin Islands (U.S.)",
["united states virgin islands"] = "Virgin Islands (U.S.)",
["us virgin islands"] = "Virgin Islands (U.S.)",
}
function p.formatCountry(inputCountry)
if not inputCountry or inputCountry == "" then
return inputCountry
end
-- Trim leading/trailing whitespace.
inputCountry = inputCountry:gsub("^%s*(.-)%s*$", "%1")
-- Convert to lowercase and normalize apostrophes.
local lower = inputCountry:lower():gsub("[’`]", "'")
-- Remove periods, collapse multiple spaces, and trim.
lower = lower:gsub("%.", ""):gsub("%s+", " "):gsub("^%s*(.-)%s*$", "%1")
local canonical = mapping[lower]
if canonical then
return canonical
else
return inputCountry
end
end
return p