Jump to content

Module:CountryData

Documentation for this module may be created at Module:CountryData/doc

--[[
* Name: CountryData
* Author: Mark W. Datysgeld
* Description: Unified module for country data management with JSON loading, normalization, region mapping, and Semantic MediaWiki integration
* Notes: Loads from Data:CountryDataset.json; normalizes country names to canonical forms; maps countries to ICANN regions; provides extensible property access; formats country lists with region-specific emoji styling; processes countries for category assignment
]]

-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
local loader = require('Module:DatasetLoader')

-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}

-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
    schema_version = 1,
    last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
    countries = {},
    icann_regions = {}
}

--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------

-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
    local args = {...}
    local keyParts = {funcName}
    for i = 1, #args do
        table.insert(keyParts, tostring(args[i]) or "nil")
    end
    return table.concat(keyParts, ":")
end

-- Reset the module-level caches (useful for testing)
local function resetCaches()
    dataCache = nil
    nameLookupCache = nil
    regionLookupCache = nil
    propertyCache = {}
    functionCache = {}
end

--------------------------------------------------------------------------------
-- Data Loading and Cache Building Layer (Refactored)
--------------------------------------------------------------------------------

-- Data loading function using DatasetLoader
local function loadData()
    if dataCache then
        return dataCache
    end
    local raw = loader.get('CountryDataset')
    dataCache = {
        countries      = raw.countries      or {},
        icann_regions  = raw.icann_regions  or {},
        schema_version = raw.schema_version,
        last_updated   = raw.last_updated
    }
    return dataCache
end

-- Builds the primary name-to-code lookup cache.
-- This is the core of the refactoring, ensuring correct normalization order.
local function buildNameLookupCache(data)
    if nameLookupCache then
        return nameLookupCache
    end

    if not data or not data.countries then
        nameLookupCache = {}
        return nameLookupCache
    end

    local lookup = {}
    for code, country in pairs(data.countries) do
        -- Ensure the country has a name to process
        local canonicalName = country.name or country.canonical_name
        if canonicalName then
            -- 1. Add the canonical name itself
            local normalizedName = NormalizationText.normalizeText(canonicalName)
            lookup[normalizedName] = code
            
            -- 2. Add the diacritic-stripped version of the canonical name
            local strippedName = DiacriticNormalization.removeDiacritics(canonicalName)
            if strippedName ~= canonicalName then -- only add if it's different
                 lookup[NormalizationText.normalizeText(strippedName)] = code
            end
        end

        -- 3. Process all variations
        if country.variations and type(country.variations) == "table" then
            for _, variation in pairs(country.variations) do
                -- Add the variation
                local normalizedVariation = NormalizationText.normalizeText(variation)
                lookup[normalizedVariation] = code
                
                -- Add the diacritic-stripped version of the variation
                local strippedVariation = DiacriticNormalization.removeDiacritics(variation)
                if strippedVariation ~= variation then
                    lookup[NormalizationText.normalizeText(strippedVariation)] = code
                end
            end
        end
    end

    nameLookupCache = lookup
    return lookup
end

-- Builds the region lookup cache.
local function buildRegionLookupCache(data)
    if regionLookupCache then
        return regionLookupCache
    end

    if not data or not data.icann_regions then
        regionLookupCache = {}
        return regionLookupCache
    end

    local lookup = {}
    for code, region in pairs(data.icann_regions) do
        if region.name then
            lookup[NormalizationText.normalizeText(region.name)] = code
        end
        if region.variations and type(region.variations) == "table" then
            for _, variation in pairs(region.variations) do
                lookup[NormalizationText.normalizeText(variation)] = code
            end
        end
    end

    regionLookupCache = lookup
    return lookup
end

--------------------------------------------------------------------------------
-- Core API Functions (Public Interface)
--------------------------------------------------------------------------------

local CountryData = {}

-- Load data and initialize caches
function CountryData.loadData(frame)
    return loadData()
end

-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
    resetCaches()
    return true
end

-- Get country data by ISO code (Refactored to use new cache logic)
function CountryData.getCountryByCode(code)
    if not code or code == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryByCode", code)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    code = code:upper() -- Standardize code
    
    local result = data and data.countries and data.countries[code] or nil
    
    functionCache[cacheKey] = result
    return result
end

-- Get country data by name (Refactored to use new cache logic)
function CountryData.getCountryByName(name)
    if not name or name == "" then
        return nil
    end

    local cacheKey = createCacheKey("getCountryByName", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end

    local data = loadData()
    local nameLookup = buildNameLookupCache(data)

    -- Normalize the input name in one go (text normalization includes lowercasing)
    local normalized = NormalizationText.normalizeText(name)
    
    -- First, try a direct lookup with the normalized name
    local code = nameLookup[normalized]

    -- If not found, try looking up the diacritic-stripped version
    if not code then
        local stripped = DiacriticNormalization.removeDiacritics(name)
        if stripped ~= name then
            code = nameLookup[NormalizationText.normalizeText(stripped)]
        end
    end

    local result = nil
    if code then
        result = data.countries[code]
    end

    functionCache[cacheKey] = result
    return result
end

-- Get country code by name
function CountryData.getCountryCodeByName(name)
    if not name or name == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryCodeByName", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByName(name)
    
    -- The country object from the dataset doesn't inherently contain its own ISO code key.
    -- We must iterate through the dataset to find the key corresponding to the found country object.
    if country then
        local data = loadData()
        for code, countryData in pairs(data.countries) do
            if countryData == country then
                functionCache[cacheKey] = code
                return code
            end
        end
    end

    -- If no country was found, or no matching code was found, cache and return nil.
    functionCache[cacheKey] = nil
    return nil
end

-- (The rest of the functions remain unchanged for now, but will be updated in subsequent phases)

function CountryData.normalizeCountryName(name)
    if not name or name == "" then
        return "(Unrecognized)"
    end
    
    local cacheKey = createCacheKey("normalizeCountryName", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByName(name)
    
    local result
    if country and country.name then
        result = country.name
    else
        result = "(Unrecognized)"
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getRegionByCountry(name)
    if not name or name == "" then
        return "(Unrecognized)"
    end
    
    local cacheKey = createCacheKey("getRegionByCountry", name)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByName(name)
    
    local result
    if country and country.icann_region then
        result = country.icann_region
    else
        result = "(Unrecognized)"
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getCountriesByRegion(region)
    if not region or region == "" then
        return {}
    end
    
    local cacheKey = createCacheKey("getCountriesByRegion", region)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local regionLookup = buildRegionLookupCache(data)
    
    local normalized = NormalizationText.normalizeText(region)
    local regionCode = regionLookup[normalized]
    
    local result = {}
    if regionCode and data.countries then
        for code, country in pairs(data.countries) do
            if country.icann_region == regionCode then
                table.insert(result, {
                    code = code,
                    name = country.name
                })
            end
        end
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getAllCountryCodes()
    local cacheKey = "getAllCountryCodes"
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local result = {}
    if data and data.countries then
        for code in pairs(data.countries) do
            table.insert(result, code)
        end
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getAllCountryNames()
    local cacheKey = "getAllCountryNames"
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local result = {}
    if data and data.countries then
        for _, country in pairs(data.countries) do
            table.insert(result, country.name)
        end
    end
    
    functionCache[cacheKey] = result
    return result
end

function CountryData.getCountryProperty(code, property)
    if not code or code == "" or not property or property == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryProperty", code, property)
    if propertyCache[cacheKey] ~= nil then
        return propertyCache[cacheKey]
    end
    
    local country = CountryData.getCountryByCode(code)
    local result = country and country[property] or nil
    
    propertyCache[cacheKey] = result
    return result
end

function CountryData.getCountryPropertyByName(name, property)
    if not name or name == "" or not property or property == "" then
        return nil
    end
    
    local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
    if propertyCache[cacheKey] ~= nil then
        return propertyCache[cacheKey]
    end
    
    local code = CountryData.getCountryCodeByName(name)
    local result = nil
    if code then
        result = CountryData.getCountryProperty(code, property)
    end
    
    propertyCache[cacheKey] = result
    return result
end

function CountryData.getAvailableProperties(code)
    if not code or code == "" then
        return {}
    end
    
    local cacheKey = createCacheKey("getAvailableProperties", code)
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local country = CountryData.getCountryByCode(code)
    local properties = {}
    if country then
        for property in pairs(country) do
            table.insert(properties, property)
        end
    end
    
    functionCache[cacheKey] = properties
    return properties
end

function CountryData.getAllPropertyNames()
    local cacheKey = "getAllPropertyNames"
    if functionCache[cacheKey] ~= nil then
        return functionCache[cacheKey]
    end
    
    local data = loadData()
    local properties = {}
    local seen = {}
    if data and data.countries then
        for _, country in pairs(data.countries) do
            for property in pairs(country) do
                if not seen[property] then
                    seen[property] = true
                    table.insert(properties, property)
                end
            end
        end
    end
    
    functionCache[cacheKey] = properties
    return properties
end

function CountryData.getSemanticCountryRegionProperties(countryValue)
    local properties = {}
    if not countryValue or countryValue == "" then
        return properties
    end
    
    local ConfigRepository = require('Module:ConfigRepository')
    local countryPropertyName = ConfigRepository.getSemanticPropertyName("Has country")
    local regionPropertyName = ConfigRepository.getSemanticPropertyName("Has ICANN region")
    
    if not countryPropertyName or not regionPropertyName then
        return properties
    end
    
    local countries = {}
    for country in string.gmatch(countryValue, "[^;]+") do
        local trimmedCountry = country:match("^%s*(.-)%s*$")
        if trimmedCountry and trimmedCountry ~= "" then
            table.insert(countries, trimmedCountry)
        end
    end
    
    for _, countryName in ipairs(countries) do
        local normalizedCountry = CountryData.normalizeCountryName(countryName)
        if normalizedCountry ~= "(Unrecognized)" then
            -- Initialize property tables if they don't exist
            properties[countryPropertyName] = properties[countryPropertyName] or {}
            table.insert(properties[countryPropertyName], normalizedCountry)
            
            local region = CountryData.getRegionByCountry(normalizedCountry)
            if region and region ~= "(Unrecognized)" then
                properties[regionPropertyName] = properties[regionPropertyName] or {}
                table.insert(properties[regionPropertyName], region)
            end
        end
    end
    
    return properties
end

function CountryData.exportAsJson()
    local data = loadData()
    if not data or not data.countries then
        return '{}'
    end
    
    if mw.text and mw.text.jsonEncode then
        local success, result = pcall(function()
            return mw.text.jsonEncode(data)
        end)
        if success and result then
            return result
        end
    end
    return '{}'
end

local function getRegionClass(region)
    if not region or region == "(Unrecognized)" then
        return "region-default"
    end
    if region == "NA" or region == "LAC" then
        return "region-americas"
    elseif region == "AP" then
        return "region-asia-pacific"
    else
        return "region-europe-africa"
    end
end

function CountryData.formatCountryList(value)
    if not value or value == "" then return "" end

    local ListGeneration = require('Module:ListGeneration')
    local itemsToProcess = {}

    -- First, check if the entire string is a single, valid country.
    -- This correctly handles names like "Trinidad and Tobago".
    local singleCountryName = CountryData.normalizeCountryName(value)
    if singleCountryName ~= "(Unrecognized)" then
        -- If it's a valid country, treat it as a single item.
        table.insert(itemsToProcess, value)
    else
        -- If not a single country, assume it's a list and split ONLY by semicolon.
        -- This is safer than letting ListGeneration guess the delimiter.
        for item in string.gmatch(value, "[^;]+") do
            local trimmed = item:match("^%s*(.-)%s*$")
            if trimmed and trimmed ~= "" then
                table.insert(itemsToProcess, trimmed)
            end
        end
    end

    -- Define the item hook for country-specific formatting
    local function countryItemHook(countryName)
        local normalized = CountryData.normalizeCountryName(countryName)
        if normalized ~= "(Unrecognized)" then
            local countryRegion = CountryData.getRegionByCountry(normalized)
            -- Return a table with content and class for the li element
            return {
                content = normalized,
                class = getRegionClass(countryRegion)
            }
        end
        return nil -- Exclude unrecognized countries from the list
    end

    -- Set the options for the list generation
    local options = {
        mode = 'bullet',
        listClass = 'template-list-country',
        itemHook = countryItemHook
    }

    -- Pass the pre-processed table of items to the list generator.
    return ListGeneration.createList(itemsToProcess, options)
end

function CountryData.formatCountries(value)
    return CountryData.formatCountryList(value)
end

function CountryData.getCountriesForCategories(value)
    if not value or value == "" then return {} end
    
    local countries = {}
    for countryName in string.gmatch(value, "[^;]+") do
        local trimmed = countryName:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            local normalized = CountryData.normalizeCountryName(trimmed)
            if normalized ~= "(Unrecognized)" then
                table.insert(countries, normalized)
            end
        end
    end
    
    return countries
end

function CountryData.getFlagFileName(countryNameOrCode)
    if not countryNameOrCode or countryNameOrCode == '' then return nil end
    
    local inputName = countryNameOrCode:gsub('_', ' ')
    local isoCode
    
    -- First, try to get the ISO code by treating inputName as a country name.
    isoCode = CountryData.getCountryCodeByName(inputName) 
    
    -- If no code was found by name, and the inputName itself is 2 characters long,
    -- it might be an ISO code already. Let's validate it.
    if not isoCode and #inputName == 2 then
        if CountryData.getCountryByCode(inputName) then 
            isoCode = inputName:upper()
        end
    end
    
    if not isoCode or #isoCode ~= 2 then return nil end

    return 'Flag-' .. string.lower(isoCode) .. '.svg' 
end

return CountryData