Module:CountryData
Appearance
Documentation for this module may be created at Module:CountryData/doc
-- Module:CountryData
-- Unified module for country data management, providing a single source of truth
-- for country names, codes, regions, and variations. Also includes country display
-- functionality (formerly in MultiCountryDisplay.lua).
--
-- Features:
-- * Loads country data from JSON stored in MediaWiki
-- * Normalizes country names to canonical forms
-- * Maps countries to ICANN regions
-- * Provides extensible property access
-- * Integrates with Semantic MediaWiki
-- * Formats country lists with region-specific styling
-- * Processes countries for category assignment
--
-- Dependencies:
-- * Module:NormalizationDiacritic - For diacritic removal
local DiacriticNormalization = require('Module:NormalizationDiacritic')
-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}
-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
schema_version = 1,
last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
countries = {},
icann_regions = {}
}
--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------
-- Helper function to remove diacritics and normalize text
local function normalizeText(text)
if not text or text == "" then
return text
end
-- Consolidate string operations to reduce iterations
-- 1. Trim leading/trailing whitespace
-- 2. Convert to lowercase
-- 3. Normalize apostrophes
-- 4. Remove apostrophes
-- 5. Remove periods
-- 6. Replace hyphens, em dashes, en dashes and similar characters with spaces
-- 7. Collapse multiple spaces
local normalized = text:gsub("^%s*(.-)%s*$", "%1") -- Trim whitespace
:lower() -- Convert to lowercase
:gsub("['`]", "'") -- Normalize apostrophes
:gsub("'", "") -- Remove apostrophes
:gsub("%.", "") -- Remove periods
:gsub("[-–—_/]", " ") -- Replace hyphens, dashes, underscores, slashes with spaces
:gsub("%s+", " ") -- Collapse multiple spaces
return normalized
end
-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
local args = {...}
local keyParts = {funcName}
for i = 1, #args do
table.insert(keyParts, tostring(args[i]) or "nil")
end
return table.concat(keyParts, ":")
end
-- Function to safely check if a table has a property
local function hasProperty(tbl, property)
return tbl and type(tbl) == "table" and tbl[property] ~= nil
end
--------------------------------------------------------------------------------
-- Data Loading Layer
--------------------------------------------------------------------------------
-- Get name lookup cache - builds if not already cached
local function getNameLookup(data)
if nameLookupCache then
return nameLookupCache
end
-- If no data provided, return empty lookup
if not data or not data.countries then
nameLookupCache = {}
return nameLookupCache
end
local lookup = {}
-- Pre-count how many mappings we'll create to optimize memory allocation
local mappingCount = 0
for code, country in pairs(data.countries) do
-- Count canonical name
mappingCount = mappingCount + 1
-- Count variations if they exist
if country.variations and type(country.variations) == "table" then
mappingCount = mappingCount + #country.variations
end
end
-- Now build the lookup table with pre-counted size
for code, country in pairs(data.countries) do
-- Add name field as primary display name
local displayName = country.name or country.canonical_name
if displayName then
lookup[normalizeText(displayName)] = code
end
-- Add canonical_name if different from name
if country.canonical_name and country.canonical_name ~= country.name then
lookup[normalizeText(country.canonical_name)] = code
end
-- Add variations
if country.variations and type(country.variations) == "table" then
for _, variation in ipairs(country.variations) do
lookup[normalizeText(variation)] = code
end
end
end
nameLookupCache = lookup
return lookup
end
-- Get region lookup cache - builds if not already cached
local function getRegionLookup(data)
if regionLookupCache then
return regionLookupCache
end
-- If no data provided, return empty lookup
if not data or not data.icann_regions then
regionLookupCache = {}
return regionLookupCache
end
local lookup = {}
-- Pre-count how many mappings we'll create to optimize memory allocation
local mappingCount = 0
for code, region in pairs(data.icann_regions) do
-- Count canonical name
mappingCount = mappingCount + 1
-- Count variations if they exist
if region.variations and type(region.variations) == "table" then
mappingCount = mappingCount + #region.variations
end
end
-- Now build the lookup table with pre-counted size
for code, region in pairs(data.icann_regions) do
-- Add canonical name
if region.name then
lookup[normalizeText(region.name)] = code
end
-- Add variations
if region.variations and type(region.variations) == "table" then
for _, variation in ipairs(region.variations) do
lookup[normalizeText(variation)] = code
end
end
end
regionLookupCache = lookup
return lookup
end
-- Main data loading function with multiple fallback methods
local function loadData(frame)
-- Use the module-level cache if we already loaded data once
if dataCache then
return dataCache
end
local success, data = pcall(function()
-- Get the JSON content using frame:preprocess if available
local jsonText
if frame and type(frame) == "table" and frame.preprocess then
local preprocessSuccess, preprocessResult = pcall(function()
return frame:preprocess('{{MediaWiki:CountryData.json}}')
end)
if preprocessSuccess and preprocessResult then
jsonText = preprocessResult
end
end
-- If we couldn't get JSON from frame:preprocess, fall back to direct content loading
if not jsonText then
-- Try using mw.loadJsonData first (preferred method)
if mw.loadJsonData then
local loadJsonSuccess, jsonData = pcall(function()
return mw.loadJsonData('MediaWiki:CountryData.json')
end)
if loadJsonSuccess and jsonData and type(jsonData) == 'table' then
return jsonData
end
end
-- Direct content loading approach as fallback
local pageTitle = mw.title.new('MediaWiki:CountryData.json')
if not pageTitle or not pageTitle.exists then
return DEFAULT_DATA
end
-- Get raw content from the wiki page
local contentSuccess, content = pcall(function()
return pageTitle:getContent()
end)
if contentSuccess and content and content ~= "" then
-- Remove any BOM or leading whitespace that might cause issues
content = content:gsub("^%s+", "")
if content:byte(1) == 239 and content:byte(2) == 187 and content:byte(3) == 191 then
content = content:sub(4)
end
jsonText = content
else
return DEFAULT_DATA
end
end
-- Try different JSON decode approaches
if jsonText and mw.text and mw.text.jsonDecode then
-- First try WITHOUT PRESERVE_KEYS flag (standard approach)
local jsonDecodeSuccess, jsonData = pcall(function()
return mw.text.jsonDecode(jsonText)
end)
if jsonDecodeSuccess and jsonData then
return jsonData
end
-- If that failed, try with JSON_TRY_FIXING flag
jsonDecodeSuccess, jsonData = pcall(function()
return mw.text.jsonDecode(jsonText, mw.text.JSON_TRY_FIXING)
end)
if jsonDecodeSuccess and jsonData then
return jsonData
end
end
-- As absolute last resort, use local default data
return DEFAULT_DATA
end)
if not success or not data then
data = DEFAULT_DATA
end
-- Ensure minimum data structure
if not data.countries then
data.countries = {}
end
if not data.icann_regions then
data.icann_regions = {}
end
dataCache = data
return data
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
dataCache = nil
nameLookupCache = nil
regionLookupCache = nil
propertyCache = {}
functionCache = {}
end
--------------------------------------------------------------------------------
-- Core API Functions
--------------------------------------------------------------------------------
local CountryData = {}
-- Load data and initialize caches
function CountryData.loadData(frame)
return loadData(frame)
end
-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
resetCaches()
return true
end
-- Get country data by ISO code
function CountryData.getCountryByCode(code)
if not code or code == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getCountryByCode", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
-- Standardize code to uppercase for consistency
code = code:upper()
local result = nil
if data and data.countries and data.countries[code] then
result = data.countries[code]
end
-- Cache the result (including nil)
functionCache[cacheKey] = result
return result
end
-- Get country data by name (including variations)
function CountryData.getCountryByName(name)
if not name or name == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getCountryByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = getNameLookup(data)
-- Normalize the input
local normalized = normalizeText(name)
-- Look up the code
local code = nameLookup[normalized]
local result = nil
if code and data.countries[code] then
result = data.countries[code]
else
-- Try with diacritics removed
local stripped = DiacriticNormalization.removeDiacritics(normalized)
if stripped ~= normalized then
code = nameLookup[stripped]
if code and data.countries[code] then
result = data.countries[code]
end
end
end
-- Cache the result (including nil)
functionCache[cacheKey] = result
return result
end
-- Get country code by name
function CountryData.getCountryCodeByName(name)
if not name or name == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getCountryCodeByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = getNameLookup(data)
-- Normalize the input
local normalized = normalizeText(name)
-- Look up the code
local code = nameLookup[normalized]
if not code then
-- Try with diacritics removed
local stripped = DiacriticNormalization.removeDiacritics(normalized)
if stripped ~= normalized then
code = nameLookup[stripped]
end
end
-- Cache the result (including nil)
functionCache[cacheKey] = code
return code
end
-- Normalize country name to canonical form with "(Unrecognized)" fallback
function CountryData.normalizeCountryName(name)
if not name or name == "" then
return name
end
-- Check function cache first
local cacheKey = createCacheKey("normalizeCountryName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country then
-- Return name as the primary display name
result = country.name or country.canonical_name
else
-- If no match, return "(Unrecognized)"
result = "(Unrecognized)"
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get ICANN region for a country
function CountryData.getRegionByCountry(name)
if not name or name == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getRegionByCountry", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.icann_region then
result = country.icann_region
else
-- Return "(Unrecognized)" for consistency with normalizeCountryName
result = "(Unrecognized)"
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get all countries in a specific region
function CountryData.getCountriesByRegion(region)
if not region or region == "" then
return {}
end
-- Check function cache first
local cacheKey = createCacheKey("getCountriesByRegion", region)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local regionLookup = getRegionLookup(data)
-- Normalize the input
local normalized = normalizeText(region)
-- Look up the region code
local regionCode = regionLookup[normalized]
local result = {}
if regionCode and data.countries then
-- Pre-count number of countries in region for allocation
local countryCount = 0
for _, country in pairs(data.countries) do
if country.icann_region == regionCode then
countryCount = countryCount + 1
end
end
-- Now populate the result with the pre-allocated size
local index = 1
for code, country in pairs(data.countries) do
if country.icann_region == regionCode then
result[index] = {
code = code,
name = country.name or country.canonical_name
}
index = index + 1
end
end
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get list of all country codes
function CountryData.getAllCountryCodes()
-- Check function cache first
local cacheKey = "getAllCountryCodes"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
-- Pre-allocate the array to the number of countries
local countryCount = 0
for _ in pairs(data.countries) do
countryCount = countryCount + 1
end
-- Now populate the array
local index = 1
for code in pairs(data.countries) do
result[index] = code
index = index + 1
end
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get list of all canonical country names
function CountryData.getAllCountryNames()
-- Check function cache first
local cacheKey = "getAllCountryNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
-- Pre-allocate the array to the number of countries
local countryCount = 0
for _ in pairs(data.countries) do
countryCount = countryCount + 1
end
-- Now populate the array
local index = 1
for _, country in pairs(data.countries) do
local name = country.name or country.canonical_name
result[index] = name
index = index + 1
end
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get any property for a country by code
function CountryData.getCountryProperty(code, property)
if not code or code == "" or not property or property == "" then
return nil
end
-- Check property cache first
local cacheKey = createCacheKey("getCountryProperty", code, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local result = nil
if country and country[property] ~= nil then
result = country[property]
end
-- Cache the result (including nil)
propertyCache[cacheKey] = result
return result
end
-- Get any property for a country by name
function CountryData.getCountryPropertyByName(name, property)
if not name or name == "" or not property or property == "" then
return nil
end
-- Check property cache first
local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local code = CountryData.getCountryCodeByName(name)
local result = nil
if code then
result = CountryData.getCountryProperty(code, property)
end
-- Cache the result (including nil)
propertyCache[cacheKey] = result
return result
end
-- List all available properties for a country
function CountryData.getAvailableProperties(code)
if not code or code == "" then
return {}
end
-- Check function cache first
local cacheKey = createCacheKey("getAvailableProperties", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
if not data or not data.countries or not data.countries[code] then
return {}
end
local properties = {}
-- Pre-allocate the table based on the number of properties
local propertyCount = 0
for _ in pairs(data.countries[code]) do
propertyCount = propertyCount + 1
end
-- Fill the table with property names
local index = 1
for property in pairs(data.countries[code]) do
properties[index] = property
index = index + 1
end
-- Cache the result
functionCache[cacheKey] = properties
return properties
end
-- Get all unique property names across all countries
function CountryData.getAllPropertyNames()
-- Check function cache first
local cacheKey = "getAllPropertyNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
if not data or not data.countries then
return {}
end
local properties = {}
local seen = {}
-- First pass: count unique properties for pre-allocation
local propertyCount = 0
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
propertyCount = propertyCount + 1
end
end
end
-- Reset seen table
seen = {}
-- Second pass: fill the pre-allocated table
local index = 1
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
properties[index] = property
index = index + 1
end
end
end
-- Cache the result
functionCache[cacheKey] = properties
return properties
end
-- Get semantic property for a country
function CountryData.getCountrySemanticProperty(name, property)
local code = CountryData.getCountryCodeByName(name)
if not code then
return "(Unrecognized)"
end
local value = CountryData.getCountryProperty(code, property)
if value == nil then
return "(Unrecognized)"
end
return value
end
-- Get semantic property name from ConfigRepository
function CountryData.getSemanticPropertyName(propertyKey)
local ConfigRepository = require('Module:ConfigRepository')
-- Look through all template configurations
for templateName, templateConfig in pairs(ConfigRepository.templates) do
-- Check if this template has semantics configuration
if templateConfig.semantics and templateConfig.semantics.additionalProperties then
-- Check if the property key exists in additionalProperties
if templateConfig.semantics.additionalProperties[propertyKey] then
return propertyKey
end
end
end
-- If not found, return nil
return nil
end
-- Add country semantic properties to a page
function CountryData.addCountrySemanticProperties(countryValue, semanticOutput)
if not countryValue or countryValue == "" then
return semanticOutput
end
-- Get property names from ConfigRepository
local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
-- If property names are not found in ConfigRepository, we can't proceed
if not countryPropertyName or not regionPropertyName then
return semanticOutput
end
-- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
local propertyHtml = {}
-- Split multi-value country strings
local countries = {}
for country in string.gmatch(countryValue, "[^;]+") do
local trimmedCountry = country:match("^%s*(.-)%s*$")
if trimmedCountry and trimmedCountry ~= "" then
table.insert(countries, trimmedCountry)
end
end
-- Process each country
for _, country in ipairs(countries) do
local normalizedCountry = CountryData.normalizeCountryName(country)
-- Only process recognized countries
if normalizedCountry ~= "(Unrecognized)" then
-- Add as semantic property
if mw.smw then
local property = {}
property[countryPropertyName] = normalizedCountry
mw.smw.set(property)
else
-- Collect HTML fragments instead of concatenating strings
table.insert(propertyHtml, '<div style="display:none;">')
table.insert(propertyHtml, ' {{#set: ' .. countryPropertyName .. '=' .. normalizedCountry .. ' }}')
table.insert(propertyHtml, '</div>')
end
-- Add region as semantic property
local region = CountryData.getRegionByCountry(country)
if region and region ~= "(Unrecognized)" then
if mw.smw then
local property = {}
property[regionPropertyName] = region
mw.smw.set(property)
else
-- Collect HTML fragments instead of concatenating strings
table.insert(propertyHtml, '<div style="display:none;">')
table.insert(propertyHtml, ' {{#set: ' .. regionPropertyName .. '=' .. region .. ' }}')
table.insert(propertyHtml, '</div>')
end
end
end
end
-- For non-SMW case, concatenate all property HTML fragments at once
if not mw.smw and #propertyHtml > 0 then
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
end
return semanticOutput
end
-- Export country data as JSON string (for JavaScript usage)
function CountryData.exportAsJson()
local data = loadData()
-- Ensure we have valid data
if not data or not data.countries then
return '{}'
end
-- Use MediaWiki's JSON encoder
if mw.text and mw.text.jsonEncode then
local success, result = pcall(function()
return mw.text.jsonEncode(data)
end)
if success and result then
return result
end
end
-- Fallback to simple string if JSON encoding fails
return '{}'
end
--------------------------------------------------------------------------------
-- Country Display Functions (Migrated from MultiCountryDisplay)
--------------------------------------------------------------------------------
-- Get region-specific CSS class for country display
local function getRegionClass(region)
if not region or region == "(Unrecognized)" then
return "region-default"
end
if region == "NA" or region == "LAC" then
return "region-americas"
elseif region == "AP" then
return "region-asia-pacific"
else
return "region-europe-africa"
end
end
-- Format a list of countries from a semicolon-separated string
-- Returns either plain text (single country) or bullet points (multiple countries)
-- Each country gets its own region-specific class for styling
function CountryData.formatCountryList(value)
if not value or value == "" then return "" end
-- Split and normalize countries
local countries = {}
for country in string.gmatch(value, "[^;]+") do
local trimmed = country:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(countries, trimmed)
end
end
local normalizedCountries = {}
local validCountriesCount = 0
for _, country in ipairs(countries) do
local normalized = CountryData.normalizeCountryName(country)
-- Only include recognized countries
if normalized ~= "(Unrecognized)" then
validCountriesCount = validCountriesCount + 1
normalizedCountries[validCountriesCount] = normalized
end
end
-- Generate output based on number of countries
if validCountriesCount > 1 then
local listItems = {}
for _, country in ipairs(normalizedCountries) do
-- Get the region for this specific country
local countryRegion = CountryData.getRegionByCountry(country)
local regionClass = getRegionClass(countryRegion)
-- Create a list item with region-specific class
table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country))
end
return string.format("<ul class=\"template-list template-list-country\">%s</ul>",
table.concat(listItems, ""))
elseif validCountriesCount == 1 then
-- For a single country, create a similar list with just one item
local countryRegion = CountryData.getRegionByCountry(normalizedCountries[1])
local regionClass = getRegionClass(countryRegion)
-- Single item list with the same styling
return string.format("<ul class=\"template-list template-list-country\"><li class=\"%s\">%s</li></ul>",
regionClass, normalizedCountries[1])
end
return ""
end
-- Alias for backward compatibility
function CountryData.formatCountries(value)
return CountryData.formatCountryList(value)
end
-- Get a list of normalized countries for category assignment
function CountryData.getCountriesForCategories(value)
if not value or value == "" then return {} end
local countries = {}
for country in string.gmatch(value, "[^;]+") do
local trimmed = country:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(countries, trimmed)
end
end
local normalizedCountries = {}
local validCount = 0
for _, country in ipairs(countries) do
local normalized = CountryData.normalizeCountryName(country)
-- Only include recognized countries
if normalized ~= "(Unrecognized)" then
validCount = validCount + 1
normalizedCountries[validCount] = normalized
end
end
return normalizedCountries
end
-- Return the module for use
return CountryData