Module:CountryData
Appearance
Documentation for this module may be created at Module:CountryData/doc
-- Module:CountryData
-- Unified module for country data management.
--
-- Features:
-- * Loads country data from JSON stored in MediaWiki:CountryData.json
-- * Normalizes country names to canonical forms
-- * Maps countries to ICANN regions
-- * Provides extensible property access
-- * Integrates with Semantic MediaWiki
-- * Formats country lists with region-specific emoji styling
-- * Processes countries for category assignment
-- Dependencies
local DiacriticNormalization = require('Module:NormalizationDiacritic')
local NormalizationText = require('Module:NormalizationText')
-- Module-level cache tables for improved performance
local dataCache = nil
local nameLookupCache = nil
local regionLookupCache = nil
local propertyCache = {}
local functionCache = {}
-- Default data structure to use if JSON loading fails
local DEFAULT_DATA = {
schema_version = 1,
last_updated = os.date('!%Y-%m-%dT%H:%M:%SZ'),
countries = {},
icann_regions = {}
}
--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------
-- Create a cache key from a function name and arguments
local function createCacheKey(funcName, ...)
local args = {...}
local keyParts = {funcName}
for i = 1, #args do
table.insert(keyParts, tostring(args[i]) or "nil")
end
return table.concat(keyParts, ":")
end
-- Function to safely check if a table has a property
local function hasProperty(tbl, property)
return tbl and type(tbl) == "table" and tbl[property] ~= nil
end
--------------------------------------------------------------------------------
-- Data Loading Layer
--------------------------------------------------------------------------------
-- Get name lookup cache: builds if not already cached
local function getNameLookup(data)
if nameLookupCache then
return nameLookupCache
end
-- If no data provided, return empty lookup
if not data or not data.countries then
nameLookupCache = {}
return nameLookupCache
end
local lookup = {}
-- Optimization: Pre-count number of mappings
local mappingCount = 0
for code, country in pairs(data.countries) do
-- Count canonical name
mappingCount = mappingCount + 1
-- Count variations if they exist
if country.variations and type(country.variations) == "table" then
mappingCount = mappingCount + #country.variations
end
-- Count ISO-3 code if it exists
if country.iso_3 then
mappingCount = mappingCount + 1
end
end
-- Build the lookup table with pre-counted size
for code, country in pairs(data.countries) do
-- Add name field as primary display name
local displayName = country.name or country.canonical_name
if displayName then
lookup[NormalizationText.normalizeText(displayName)] = code
end
-- Add canonical_name if different from name
if country.canonical_name and country.canonical_name ~= country.name then
lookup[NormalizationText.normalizeText(country.canonical_name)] = code
end
-- Add variations
if country.variations and type(country.variations) == "table" then
for _, variation in ipairs(country.variations) do
lookup[NormalizationText.normalizeText(variation)] = code
end
end
-- Add ISO-3 code as a variation (both original and normalized)
if country.iso_3 then
lookup[country.iso_3] = code
lookup[NormalizationText.normalizeText(country.iso_3)] = code
end
end
nameLookupCache = lookup
return lookup
end
-- Get region lookup cache: builds if not already cached
local function getRegionLookup(data)
if regionLookupCache then
return regionLookupCache
end
-- If no data provided, return empty lookup
if not data or not data.icann_regions then
regionLookupCache = {}
return regionLookupCache
end
local lookup = {}
-- Optimization: Pre-count number of mappings
local mappingCount = 0
for code, region in pairs(data.icann_regions) do
-- Count canonical name
mappingCount = mappingCount + 1
-- Count variations if they exist
if region.variations and type(region.variations) == "table" then
mappingCount = mappingCount + #region.variations
end
end
-- Build the lookup table with pre-counted size
for code, region in pairs(data.icann_regions) do
-- Add canonical name
if region.name then
lookup[NormalizationText.normalizeText(region.name)] = code
end
-- Add variations
if region.variations and type(region.variations) == "table" then
for _, variation in ipairs(region.variations) do
lookup[NormalizationText.normalizeText(variation)] = code
end
end
end
regionLookupCache = lookup
return lookup
end
-- Main data loading function with multiple fallback methods
local function loadData(frame)
-- Use the module-level cache if we already loaded data once
if dataCache then
return dataCache
end
local success, data = pcall(function()
-- Get the JSON content using frame:preprocess if available
local jsonText
if frame and type(frame) == "table" and frame.preprocess then
local preprocessSuccess, preprocessResult = pcall(function()
return frame:preprocess('{{MediaWiki:CountryData.json}}')
end)
if preprocessSuccess and preprocessResult then
jsonText = preprocessResult
end
end
-- If we couldn't get JSON from frame:preprocess, fall back to direct content loading
if not jsonText then
-- Try using mw.loadJsonData first (preferred method)
if mw.loadJsonData then
local loadJsonSuccess, jsonData = pcall(function()
return mw.loadJsonData('MediaWiki:CountryData.json')
end)
if loadJsonSuccess and jsonData and type(jsonData) == 'table' then
return jsonData
end
end
-- Direct content loading approach as fallback
local pageTitle = mw.title.new('MediaWiki:CountryData.json')
if not pageTitle or not pageTitle.exists then
return DEFAULT_DATA
end
-- Get raw content from the wiki page
local contentSuccess, content = pcall(function()
return pageTitle:getContent()
end)
if contentSuccess and content and content ~= "" then
-- Remove any BOM or leading whitespace that might cause issues
content = content:gsub("^%s+", "")
if content:byte(1) == 239 and content:byte(2) == 187 and content:byte(3) == 191 then
content = content:sub(4)
end
jsonText = content
else
return DEFAULT_DATA
end
end
-- Try different JSON decode approaches
if jsonText and mw.text and mw.text.jsonDecode then
-- First try WITHOUT PRESERVE_KEYS flag (standard approach)
local jsonDecodeSuccess, jsonData = pcall(function()
return mw.text.jsonDecode(jsonText)
end)
if jsonDecodeSuccess and jsonData then
return jsonData
end
-- If that failed, try with JSON_TRY_FIXING flag
jsonDecodeSuccess, jsonData = pcall(function()
return mw.text.jsonDecode(jsonText, mw.text.JSON_TRY_FIXING)
end)
if jsonDecodeSuccess and jsonData then
return jsonData
end
end
-- As absolute last resort, use local default data
return DEFAULT_DATA
end)
if not success or not data then
data = DEFAULT_DATA
end
-- Ensure minimum data structure
if not data.countries then
data.countries = {}
end
if not data.icann_regions then
data.icann_regions = {}
end
dataCache = data
return data
end
-- Reset the module-level caches (useful for testing)
local function resetCaches()
dataCache = nil
nameLookupCache = nil
regionLookupCache = nil
propertyCache = {}
functionCache = {}
end
--------------------------------------------------------------------------------
-- Core API Functions
--------------------------------------------------------------------------------
local CountryData = {}
-- Load data and initialize caches
function CountryData.loadData(frame)
return loadData(frame)
end
-- Reset all caches (primarily for testing)
function CountryData.resetCaches()
resetCaches()
return true
end
-- Get country data by ISO code
function CountryData.getCountryByCode(code)
if not code or code == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getCountryByCode", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
-- Standardize code to uppercase for consistency
code = code:upper()
local result = nil
if data and data.countries and data.countries[code] then
result = data.countries[code]
end
-- Cache the result (including nil)
functionCache[cacheKey] = result
return result
end
-- Get country data by name (including variations)
function CountryData.getCountryByName(name)
if not name or name == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getCountryByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = getNameLookup(data)
-- Normalize the input
local normalized = NormalizationText.normalizeText(name)
-- Look up the code
local code = nameLookup[normalized]
local result = nil
if code and data.countries[code] then
result = data.countries[code]
else
-- Try with diacritics removed
local stripped = DiacriticNormalization.removeDiacritics(normalized)
if stripped ~= normalized then
code = nameLookup[stripped]
if code and data.countries[code] then
result = data.countries[code]
end
end
end
-- Cache the result (including nil)
functionCache[cacheKey] = result
return result
end
-- Get country code by name
function CountryData.getCountryCodeByName(name)
if not name or name == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getCountryCodeByName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local nameLookup = getNameLookup(data)
-- Normalize the input
local normalized = NormalizationText.normalizeText(name)
-- Look up the code
local code = nameLookup[normalized]
if not code then
-- Try with diacritics removed
local stripped = DiacriticNormalization.removeDiacritics(normalized)
if stripped ~= normalized then
code = nameLookup[stripped]
end
end
-- Cache the result (including nil)
functionCache[cacheKey] = code
return code
end
-- REVIEW Normalize country name to canonical form with "(Unrecognized)" fallback
function CountryData.normalizeCountryName(name)
if not name or name == "" then
return name
end
-- Check function cache first
local cacheKey = createCacheKey("normalizeCountryName", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country then
-- Return name as the primary display name
result = country.name or country.canonical_name
-- make it category‑friendly:
result = result
:gsub(",%s*", "") -- drop any commas
:gsub("%sand the%s+", " and ") -- turn “ and the ” into “ and ”
else
-- If no match, return "(Unrecognized)"
result = "(Unrecognized)"
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get ICANN region for a country
function CountryData.getRegionByCountry(name)
if not name or name == "" then
return nil
end
-- Check function cache first
local cacheKey = createCacheKey("getRegionByCountry", name)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local country = CountryData.getCountryByName(name)
local result
if country and country.icann_region then
result = country.icann_region
else
-- Return "(Unrecognized)" for consistency with normalizeCountryName
result = "(Unrecognized)"
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get all countries in a specific region
function CountryData.getCountriesByRegion(region)
if not region or region == "" then
return {}
end
-- Check function cache first
local cacheKey = createCacheKey("getCountriesByRegion", region)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local regionLookup = getRegionLookup(data)
-- Normalize the input
local normalized = NormalizationText.normalizeText(region)
-- Look up the region code
local regionCode = regionLookup[normalized]
local result = {}
if regionCode and data.countries then
-- Pre-count number of countries in region for allocation
local countryCount = 0
for _, country in pairs(data.countries) do
if country.icann_region == regionCode then
countryCount = countryCount + 1
end
end
-- Populate the result with the pre-allocated size
local index = 1
for code, country in pairs(data.countries) do
if country.icann_region == regionCode then
result[index] = {
code = code,
name = country.name or country.canonical_name
}
index = index + 1
end
end
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get list of all country codes
function CountryData.getAllCountryCodes()
-- Check function cache first
local cacheKey = "getAllCountryCodes"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
-- Pre-allocate the array to the number of countries
local countryCount = 0
for _ in pairs(data.countries) do
countryCount = countryCount + 1
end
-- Now populate the array
local index = 1
for code in pairs(data.countries) do
result[index] = code
index = index + 1
end
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get list of all canonical country names
function CountryData.getAllCountryNames()
-- Check function cache first
local cacheKey = "getAllCountryNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
local result = {}
if data and data.countries then
-- Pre-allocate the array to the number of countries
local countryCount = 0
for _ in pairs(data.countries) do
countryCount = countryCount + 1
end
-- Populate the array
local index = 1
for _, country in pairs(data.countries) do
local name = country.name or country.canonical_name
result[index] = name
index = index + 1
end
end
-- Cache the result
functionCache[cacheKey] = result
return result
end
-- Get any property for a country by code
function CountryData.getCountryProperty(code, property)
if not code or code == "" or not property or property == "" then
return nil
end
-- Check property cache first
local cacheKey = createCacheKey("getCountryProperty", code, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local country = CountryData.getCountryByCode(code)
local result = nil
if country and country[property] ~= nil then
result = country[property]
end
-- Cache the result (including nil)
propertyCache[cacheKey] = result
return result
end
-- Get any property for a country by name
function CountryData.getCountryPropertyByName(name, property)
if not name or name == "" or not property or property == "" then
return nil
end
-- Check property cache first
local cacheKey = createCacheKey("getCountryPropertyByName", name, property)
if propertyCache[cacheKey] ~= nil then
return propertyCache[cacheKey]
end
local code = CountryData.getCountryCodeByName(name)
local result = nil
if code then
result = CountryData.getCountryProperty(code, property)
end
-- Cache the result (including nil)
propertyCache[cacheKey] = result
return result
end
-- List all available properties for a country
function CountryData.getAvailableProperties(code)
if not code or code == "" then
return {}
end
-- Check function cache first
local cacheKey = createCacheKey("getAvailableProperties", code)
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
if not data or not data.countries or not data.countries[code] then
return {}
end
local properties = {}
-- Pre-allocate the table based on the number of properties
local propertyCount = 0
for _ in pairs(data.countries[code]) do
propertyCount = propertyCount + 1
end
-- Fill the table with property names
local index = 1
for property in pairs(data.countries[code]) do
properties[index] = property
index = index + 1
end
-- Cache the result
functionCache[cacheKey] = properties
return properties
end
-- Get all unique property names across all countries
function CountryData.getAllPropertyNames()
-- Check function cache first
local cacheKey = "getAllPropertyNames"
if functionCache[cacheKey] ~= nil then
return functionCache[cacheKey]
end
local data = loadData()
if not data or not data.countries then
return {}
end
local properties = {}
local seen = {}
-- First pass: count unique properties for pre-allocation
local propertyCount = 0
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
propertyCount = propertyCount + 1
end
end
end
-- Reset seen table
seen = {}
-- Second pass: fill the pre-allocated table
local index = 1
for _, country in pairs(data.countries) do
for property in pairs(country) do
if not seen[property] then
seen[property] = true
properties[index] = property
index = index + 1
end
end
end
-- Cache the result
functionCache[cacheKey] = properties
return properties
end
-- Get semantic property name from ConfigRepository
function CountryData.getSemanticPropertyName(propertyKey)
local ConfigRepository = require('Module:ConfigRepository')
-- Look through all template configurations
for templateName, templateConfig in pairs(ConfigRepository.templates) do
-- Check if this template has semantics configuration
if templateConfig.semantics and templateConfig.semantics.additionalProperties then
-- Check if the property key exists in additionalProperties
if templateConfig.semantics.additionalProperties[propertyKey] then
return propertyKey
end
end
end
-- If not found, return nil
return nil
end
-- Get semantic properties for countries and regions
-- Returns a table of properties that can be integrated with the batch processing system
function CountryData.getSemanticCountryRegionProperties(countryValue)
-- Initialize return table
local properties = {}
if not countryValue or countryValue == "" then
return properties
end
-- Get property names from ConfigRepository
local countryPropertyName = CountryData.getSemanticPropertyName("Has country")
local regionPropertyName = CountryData.getSemanticPropertyName("Has ICANN region")
-- If property names are not found in ConfigRepository, we can't proceed
if not countryPropertyName or not regionPropertyName then
return properties
end
-- Split multi-value country strings
local countries = {}
for country in string.gmatch(countryValue, "[^;]+") do
local trimmedCountry = country:match("^%s*(.-)%s*$")
if trimmedCountry and trimmedCountry ~= "" then
table.insert(countries, trimmedCountry)
end
end
-- Process each country
for _, country in ipairs(countries) do
local normalizedCountry = CountryData.normalizeCountryName(country)
-- Only process recognized countries
if normalizedCountry ~= "(Unrecognized)" then
-- Add country to properties table
properties[countryPropertyName] = properties[countryPropertyName] or {}
table.insert(properties[countryPropertyName], normalizedCountry)
-- Add region to properties table
local region = CountryData.getRegionByCountry(country)
if region and region ~= "(Unrecognized)" then
properties[regionPropertyName] = properties[regionPropertyName] or {}
table.insert(properties[regionPropertyName], region)
end
end
end
return properties
end
-- Export country data as JSON string (for JavaScript usage)
function CountryData.exportAsJson()
local data = loadData()
-- Ensure we have valid data
if not data or not data.countries then
return '{}'
end
-- Use MediaWiki's JSON encoder
if mw.text and mw.text.jsonEncode then
local success, result = pcall(function()
return mw.text.jsonEncode(data)
end)
if success and result then
return result
end
end
-- Fallback to simple string if JSON encoding fails
return '{}'
end
--------------------------------------------------------------------------------
-- Country Display Functions with contextual emoji
--------------------------------------------------------------------------------
-- Get region-specific CSS class for country display
local function getRegionClass(region)
if not region or region == "(Unrecognized)" then
return "region-default"
end
if region == "NA" or region == "LAC" then
return "region-americas"
elseif region == "AP" then
return "region-asia-pacific"
else
return "region-europe-africa"
end
end
-- Format a list of countries from a semicolon-separated string
-- Returns either plain text (single country) or bullet points (multiple countries)
-- Each country gets its own region-specific class for styling
function CountryData.formatCountryList(value)
if not value or value == "" then return "" end
-- Split and normalize countries
local countries = {}
for country in string.gmatch(value, "[^;]+") do
local trimmed = country:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(countries, trimmed)
end
end
local normalizedCountries = {}
local validCountriesCount = 0
for _, country in ipairs(countries) do
local normalized = CountryData.normalizeCountryName(country)
-- Only include recognized countries
if normalized ~= "(Unrecognized)" then
validCountriesCount = validCountriesCount + 1
normalizedCountries[validCountriesCount] = normalized
end
end
-- Generate output based on number of countries
if validCountriesCount > 1 then
local listItems = {}
for _, country in ipairs(normalizedCountries) do
-- Get the region for this specific country
local countryRegion = CountryData.getRegionByCountry(country)
local regionClass = getRegionClass(countryRegion)
-- Create a list item with region-specific class
table.insert(listItems, string.format("<li class=\"%s\">%s</li>", regionClass, country))
end
return string.format("<ul class=\"template-list template-list-country\">%s</ul>",
table.concat(listItems, ""))
elseif validCountriesCount == 1 then
-- For a single country, create a similar list with just one item
local countryRegion = CountryData.getRegionByCountry(normalizedCountries[1])
local regionClass = getRegionClass(countryRegion)
-- Single item list with the same styling
return string.format("<ul class=\"template-list template-list-country\"><li class=\"%s\">%s</li></ul>",
regionClass, normalizedCountries[1])
end
return ""
end
-- Alias for backward compatibility
function CountryData.formatCountries(value)
return CountryData.formatCountryList(value)
end
-- Get a list of normalized countries for category assignment
function CountryData.getCountriesForCategories(value)
if not value or value == "" then return {} end
local countries = {}
for country in string.gmatch(value, "[^;]+") do
local trimmed = country:match("^%s*(.-)%s*$")
if trimmed and trimmed ~= "" then
table.insert(countries, trimmed)
end
end
local normalizedCountries = {}
local validCount = 0
for _, country in ipairs(countries) do
local normalized = CountryData.normalizeCountryName(country)
-- Only include recognized countries
if normalized ~= "(Unrecognized)" then
validCount = validCount + 1
normalizedCountries[validCount] = normalized
end
end
return normalizedCountries
end
-- Return the module for use
-- Adds flag filename lookup
function CountryData.getFlagFileName(countryNameOrCode)
if not countryNameOrCode or countryNameOrCode == '' then return nil end
-- Normalize input: replace underscores with spaces
local input = countryNameOrCode:gsub('_', ' ')
local code
if #input == 2 then
code = string.upper(input)
else
-- Normalize country name to canonical form
local norm = CountryData.normalizeCountryName(input)
code = CountryData.getCountryCodeByName(norm)
end
if not code or code == '' then return nil end
return 'Flag_of_' .. code .. '.svg'
end
return CountryData