Module:CountryData
Appearance
Documentation for this module may be created at Module:CountryData/doc
--[[
* Name: CountryData
* Author: Mark W. Datysgeld
* Description: Unified module for country data management with JSON loading, normalization, region mapping, and Semantic MediaWiki integration
* Notes: Loads from Data:CountryDataset.json; normalizes country names to canonical forms; maps countries to ICANN regions; provides extensible property access; formats country lists with region-specific emoji styling; processes countries for category assignment
]]
-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
local loader = require('Module:DatasetLoader')
-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}
-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
schema_version = 1,
last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
countries = {},
icann_regions = {}
}
--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------
-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
local args = {...}
local keyParts = {funcName}
for i = 1, #args do
table.insert(keyParts, tostring(args[i]) or "nil")
end
return table.concat(keyParts, ":")
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
dataCache = nil
nameLookupCache = nil
regionLookupCache = nil
propertyCache = {}
functionCache = {}
end
--------------------------------------------------------------------------------
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------
-- Data loading function using DatasetLoader
local function loadData()
if dataCache then
return dataCache
end
local raw = loader.get('CountryDataset')
dataCache = {
countries = raw.countries or {},
icann_regions = raw.icann_regions or {},
schema_version = raw.schema_version,
last_updated = raw.last_updated
}
return dataCache
end
-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
if nameLookupCache then
return nameLookupCache
end
if not data or not data.countries then
nameLookupCache = {}
return nameLookupCache
end
local lookup = {}
for code, country in pairs(data.countries) do
-- Ensure the country has a name to process
local canonicalName = country.name or country.canonical_name
if canonicalName then
-- 1. Add the canonical name itself
local normalizedName = NormalizationText.normalizeText(canonicalName)
lookup[normalizedName] = code
-- 2. Add the diacritic-stripped version of the canonical name
local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
if strippedName ~= canonicalName then -- only add if it's different
lookup[NormalizationText.normalizeText(strippedName)] = code
end
end
-- 3. Process all variations
if country.variations and type(country.variations) == "table" then
for _, variation in pairs(country.variations) do
-- Add the variation
local normalizedVariation = NormalizationText.normalizeText(variation)
lookup[normalizedVariation] = code
-- Add the diacritic-stripped version of the variation
local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
if strippedVariation ~= variation then
lookup[NormalizationText.normalizeText(strippedVariation)] = code
end
end
end
end
nameLookupCache = lookup
return lookup
end
-- Builds the region lookup cache.
local function buildRegionLookupCache(data)
if regionLookupCache then
return regionLookupCache
end
if not data or not data.icann_regions then
regionLookupCache = {}
return regionLookupCache
end
local lookup = {}
for code, region in pairs(data.icann_regions) do
if region.name then
lookup[NormalizationText.normalizeText(region.name)] = code
end
if region.variations and type(region.variations) == "table" then
for _, variation in pairs(region.variations) do
lookup[NormalizationText.normalizeText(variation)] = code
end
end
end
regionLookupCache = lookup
return lookup
end
--------------------------------------------------------------------------------
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------
local CountryData = {}
-- Load data and initialize caches
function CountryData.loadData(frame)
return loadData()
end
-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
resetCaches()
return true
end
-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
if not code or code == "" then
return nil
end
local cacheKey = createCacheKey("getCountryByCode", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
code = code:upper() -- Standardize code
local result = data and data.countries and data.countries[code] or nil
functionCache[cacheKey] = result
return result
end
-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getCountryByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = buildNameLookupCache(data)
-- Normalize the input name in one go (text normalization includes lowercasing)
local normalized = NormalizationText.normalizeText(name)
-- First, try a direct lookup with the normalized name
local code = nameLookup[normalized]
-- If not found, try looking up the diacritic-stripped version
if not code then
local stripped = DiacriticNormalization.removeDiacritics(name)
if stripped ~= name then
code = nameLookup[NormalizationText.normalizeText(stripped)]
end
end
local result = nil
if code then
result = data.countries[code]
end
functionCache[cacheKey] = result
return result
end
-- Get country code by name
function CountryData.getCountryCodeByName(name)
if not name or name == "" then
return nil
end
local cacheKey = createCacheKey("getCountryCodeByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
-- The country object from the dataset doesn't inherently contain its own ISO code key.
-- We must iterate through the dataset to find the key corresponding to the found country object.
if country then
local data = loadData()
for code, countryData in pairs(data.countries) do
if countryData == country then
functionCache[cacheKey] = code
return code
end
end
end
-- If no country was found, or no matching code was found, cache and return nil.
functionCache[cacheKey] = nil
return nil
end
-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)
function CountryData.normalizeCountryName(name)
if not name or name == "" then
return "(Unrecognized)"
end
local cacheKey = createCacheKey("normalizeCountryName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.name then
result = country.name
else
result = "(Unrecognized)"
end
functionCache[cacheKey] = result
return result
end
function CountryData.getRegionByCountry(name)
if not name or name == "" then
return "(Unrecognized)"
end
local cacheKey = createCacheKey("getRegionByCountry", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.icann_region then
result = country.icann_region
else
result = "(Unrecognized)"
end
functionCache[cacheKey] = result
return result
end
function CountryData.getCountriesByRegion(region)
if not region or region == "" then
return {}
end
local cacheKey = createCacheKey("getCountriesByRegion", region)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local regionLookup = buildRegionLookupCache(data)
local normalized = NormalizationText.normalizeText(region)
local regionCode = regionLookup[normalized]
local result = {}
if regionCode and data.countries then
for code, country in pairs(data.countries) do
if country.icann_region == regionCode then
table.insert(result, {
code = code,
name = country.name
})
end
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getAllCountryCodes()
local cacheKey = "getAllCountryCodes"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
for code in pairs(data.countries) do
table.insert(result, code)
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getAllCountryNames()
local cacheKey = "getAllCountryNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
for _, country in pairs(data.countries) do
table.insert(result, country.name)
end
end
functionCache[cacheKey] = result
return result
end
function CountryData.getCountryProperty(code, property)
if not code or code == "" or not property or property == "" then
return nil
end
local cacheKey = createCacheKey("getCountryProperty", code, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local result = country and country[property] or nil
propertyCache[cacheKey] = result
return result
end
function CountryData.getCountryPropertyByName(name, property)
if not name or name == "" or not property or property == "" then
return nil
end
local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local code = CountryData.getCountryCodeByName(name)
local result = nil
if code then
result = CountryData.getCountryProperty(code, property)
end
propertyCache[cacheKey] = result
return result
end
function CountryData.getAvailableProperties(code)
if not code or code == "" then
return {}
end
local cacheKey = createCacheKey("getAvailableProperties", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local properties = {}
if country then
for property in pairs(country) do
table.insert(properties, property)
end
end
functionCache[cacheKey] = properties
return properties
end
function CountryData.getAllPropertyNames()
local cacheKey = "getAllPropertyNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local properties = {}
local seen = {}
if data and data.countries then
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
table.insert(properties, property)
end
end
end
end
functionCache[cacheKey] = properties
return properties
end
function CountryData.getSemanticCountryRegionProperties(countryValue)
local properties = {}
if not countryValue or countryValue == "" then
return properties
end
local ConfigRepository = require('Module:ConfigRepository')
local countryPropertyName = ConfigRepository.getSemanticPropertyName("Has country")
local regionPropertyName = ConfigRepository.getSemanticPropertyName("Has ICANN region")
if not countryPropertyName or not regionPropertyName then
return properties
end
local countries = {}
for country in string.gmatch(countryValue, "[^;]+") do
local trimmedCountry = country:match("^%s*(.-)%s*$")
if trimmedCountry and trimmedCountry ~= "" then
table.insert(countries, trimmedCountry)
end
end
for _, countryName in ipairs(countries) do
local normalizedCountry = CountryData.normalizeCountryName(countryName)
if normalizedCountry ~= "(Unrecognized)" then
-- Initialize property tables if they don't exist
properties[countryPropertyName] = properties[countryPropertyName] or {}
table.insert(properties[countryPropertyName], normalizedCountry)
local region = CountryData.getRegionByCountry(normalizedCountry)
if region and region ~= "(Unrecognized)" then
properties[regionPropertyName] = properties[regionPropertyName] or {}
table.insert(properties[regionPropertyName], region)
end
end
end
return properties
end
function CountryData.exportAsJson()
local data = loadData()
if not data or not data.countries then
return '{}'
end
if mw.text and mw.text.jsonEncode then
local success, result = pcall(function()
return mw.text.jsonEncode(data)
end)
if success and result then
return result
end
end
return '{}'
end
local function getRegionClass(region)
if not region or region == "(Unrecognized)" then
return "region-default"
end
if region == "NA" or region == "LAC" then
return "region-americas"
elseif region == "AP" then
return "region-asia-pacific"
else
return "region-europe-africa"
end
end
function CountryData.formatCountryList(value)
if not value or value == "" then return "" end
local ListGeneration = require('Module:ListGeneration')
local itemsToProcess = {}
-- First, check if the entire string is a single, valid country.
-- This correctly handles names like "Trinidad and Tobago".
local singleCountryName = CountryData.normalizeCountryName(value)
if singleCountryName ~= "(Unrecognized)" then
-- If it's a valid country, treat it as a single item.
table.insert(itemsToProcess, value)
else
-- If not a single country, assume it's a list and split ONLY by semicolon.
-- This is safer than letting ListGeneration guess the delimiter.
for item in string.gmatch(value, "[^;]+") do
local trimmed = item:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(itemsToProcess, trimmed)
end
end
end
-- Define the item hook for country-specific formatting
local function countryItemHook(countryName)
local normalized = CountryData.normalizeCountryName(countryName)
if normalized ~= "(Unrecognized)" then
local countryRegion = CountryData.getRegionByCountry(normalized)
-- Return a table with content and class for the li element
return {
content = normalized,
class = getRegionClass(countryRegion)
}
end
return nil -- Exclude unrecognized countries from the list
end
-- Set the options for the list generation
local options = {
mode = 'bullet',
listClass = 'template-list-country',
itemHook = countryItemHook
}
-- Pass the pre-processed table of items to the list generator.
return ListGeneration.createList(itemsToProcess, options)
end
function CountryData.formatCountries(value)
return CountryData.formatCountryList(value)
end
function CountryData.getCountriesForCategories(value)
if not value or value == "" then return {} end
local countries = {}
for countryName in string.gmatch(value, "[^;]+") do
local trimmed = countryName:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
local normalized = CountryData.normalizeCountryName(trimmed)
if normalized ~= "(Unrecognized)" then
table.insert(countries, normalized)
end
end
end
return countries
end
function CountryData.getFlagFileName(countryNameOrCode)
if not countryNameOrCode or countryNameOrCode == '' then return nil end
local inputName = countryNameOrCode:gsub('_', ' ')
local isoCode
-- First, try to get the ISO code by treating inputName as a country name.
isoCode = CountryData.getCountryCodeByName(inputName)
-- If no code was found by name, and the inputName itself is 2 characters long,
-- it might be an ISO code already. Let's validate it.
if not isoCode and #inputName == 2 then
if CountryData.getCountryByCode(inputName) then
isoCode = inputName:upper()
end
end
if not isoCode or #isoCode ~= 2 then return nil end
return 'Flag-' .. string.lower(isoCode) .. '.svg'
end
return CountryData