Jump to content

Module:RegionalMappingICANN

From ICANNWiki

Documentation for this module may be created at Module:RegionalMappingICANN/doc

-- Module:RegionalMappingICANN
-- Module for mapping countries to their respective geographic regions according to ICANN's regional structure

-- AF = Africa
-- AP = Asia/Australia/Pacific
-- EUR = Europe
-- LAC = Latin America/Caribbean
-- NA = North America

local p = {}

-- Dependencies
local countryNormalization = require('Module:CountryNormalization')

-- Simple mapping of region names to standardized codes
local regionNameToCode = {
    -- Full names (case-insensitive matching is done)
    ["africa"] = "AF",
    ["asia/australia/pacific"] = "AP",
    ["asia pacific"] = "AP",
    ["asia"] = "AP",
    ["australia"] = "AP",
    ["pacific"] = "AP",
    ["europe"] = "EUR",
    ["latin america/caribbean"] = "LAC",
    ["latin america"] = "LAC",
    ["caribbean"] = "LAC",
    ["north america"] = "NA",
    
    -- Codes (for passthrough)
    ["af"] = "AF",
    ["ap"] = "AP",
    ["eur"] = "EUR",
    ["lac"] = "LAC", 
    ["na"] = "NA"
}

-- Region mapping table based on ICANN's regional structure
local rawCountryToRegionMap = {
    ["Afghanistan"] = "AP",
    ["Åland Islands"] = "EUR",
    ["Albania"] = "EUR",
    ["Algeria"] = "AF",
    ["American Samoa"] = "NA",
    ["Andorra"] = "EUR",
    ["Angola"] = "AF",
    ["Anguilla"] = "EUR",
    ["Antarctica"] = "AP",
    ["Antigua and Barbuda"] = "LAC",
    ["Argentina"] = "LAC",
    ["Armenia"] = "AP",
    ["Aruba"] = "EUR",
    ["Ascension Island"] = "EUR",
    ["Australia"] = "AP",
    ["Austria"] = "EUR",
    ["Azerbaijan"] = "AP",
    ["Bahamas"] = "LAC",
    ["Bahrain"] = "AP",
    ["Bangladesh"] = "AP",
    ["Barbados"] = "LAC",
    ["Belarus"] = "EUR",
    ["Belgium"] = "EUR",
    ["Belize"] = "LAC",
    ["Benin"] = "AF",
    ["Bermuda"] = "EUR",
    ["Bhutan"] = "AP",
    ["Plurinational State of Bolivia"] = "LAC",
    ["Bonaire, Sint Eustatius and Saba"] = "LAC",
    ["Bosnia and Herzegovina"] = "EUR",
    ["Botswana"] = "AF",
    ["Bouvet Island"] = "EUR",
    ["Brazil"] = "LAC",
    ["British Indian Ocean Territory"] = "EUR",
    ["Brunei Darussalam"] = "AP",
    ["Bulgaria"] = "EUR",
    ["Burkina Faso"] = "AF",
    ["Burundi"] = "AF",
    ["Cambodia"] = "AP",
    ["Cameroon"] = "AF",
    ["Canada"] = "NA",
    ["Cabo Verde"] = "AF",
    ["Cayman Islands"] = "EUR",
    ["Central African Republic"] = "AF",
    ["Republic of Chad"] = "AF",
    ["Chile"] = "LAC",
    ["China"] = "AP",
    ["Christmas Island"] = "AP",
    ["Cocos (Keeling) Islands"] = "AP",
    ["Colombia"] = "LAC",
    ["Comoros"] = "AF",
    ["Republic of Congo"] = "AF",
    ["The Democratic Republic of the Congo"] = "AF",
    ["Cook Islands"] = "AP",
    ["Costa Rica"] = "LAC",
    ["Côte d'Ivoire"] = "AF",
    ["Croatia"] = "EUR",
    ["Cuba"] = "LAC",
    ["Curaçao"] = "EUR",
    ["Cyprus"] = "AP",
    ["Czech Republic"] = "EUR",
    ["Denmark"] = "EUR",
    ["Djibouti"] = "AF",
    ["The Commonwealth of Dominica"] = "LAC",
    ["Dominican Republic"] = "LAC",
    ["Ecuador"] = "LAC",
    ["Egypt"] = "AF",
    ["El Salvador"] = "LAC",
    ["Equatorial Guinea"] = "AF",
    ["Eritrea"] = "AF",
    ["Estonia"] = "EUR",
    ["Eswatini"] = "AF",
    ["Ethiopia"] = "AF",
    ["Falkland Islands (Malvinas)"] = "EUR",
    ["Faroe Islands"] = "EUR",
    ["Fiji"] = "AP",
    ["Finland"] = "EUR",
    ["France"] = "EUR",
    ["French Guiana"] = "EUR",
    ["French Polynesia"] = "EUR",
    ["French Southern Territories"] = "EUR",
    ["Gabon"] = "AF",
    ["Republic of Gambia"] = "AF",
    ["Georgia"] = "AP",
    ["Germany"] = "EUR",
    ["Ghana"] = "AF",
    ["Gibraltar"] = "EUR",
    ["Greece"] = "EUR",
    ["Greenland"] = "EUR",
    ["Grenada"] = "LAC",
    ["Guadeloupe"] = "EUR",
    ["Guam"] = "NA",
    ["Guatemala"] = "LAC",
    ["Guernsey"] = "EUR",
    ["Republic of Guinea"] = "AF",
    ["Guinea-Bissau"] = "AF",
    ["Guyana"] = "LAC",
    ["Haiti"] = "LAC",
    ["Heard Island and McDonald Islands"] = "AP",
    ["Holy See (Vatican City State)"] = "EUR",
    ["Honduras"] = "LAC",
    ["Hong Kong, China"] = "AP",
    ["Hungary"] = "EUR",
    ["Iceland"] = "EUR",
    ["India"] = "AP",
    ["Indonesia"] = "AP",
    ["Islamic Republic of Iran"] = "AP",
    ["Iraq"] = "AP",
    ["Ireland"] = "EUR",
    ["Isle of Man"] = "EUR",
    ["Israel"] = "AP",
    ["Italy"] = "EUR",
    ["Jamaica"] = "LAC",
    ["Japan"] = "AP",
    ["Jersey"] = "EUR",
    ["Jordan"] = "AP",
    ["Kazakhstan"] = "AP",
    ["Kenya"] = "AF",
    ["Kiribati"] = "AP",
    ["Democratic People's Republic of Korea"] = "AP",
    ["Republic of Korea"] = "AP",
    ["Kuwait"] = "AP",
    ["Kyrgyz Republic"] = "AP",
    ["Lao People's Democratic Republic"] = "AP",
    ["Latvia"] = "EUR",
    ["Lebanon"] = "AP",
    ["Lesotho"] = "AF",
    ["Liberia"] = "AF",
    ["Libya"] = "AF",
    ["Liechtenstein"] = "EUR",
    ["Lithuania"] = "EUR",
    ["Luxembourg"] = "EUR",
    ["Macao, China"] = "AP",
    ["Madagascar"] = "AF",
    ["Malawi"] = "AF",
    ["Malaysia"] = "AP",
    ["Maldives"] = "AP",
    ["Mali"] = "AF",
    ["Malta"] = "EUR",
    ["Republic of the Marshall Islands"] = "AP",
    ["Martinique"] = "EUR",
    ["Mauritania"] = "AF",
    ["Mauritius"] = "AF",
    ["Mayotte"] = "EUR",
    ["Mexico"] = "LAC",
    ["Federated States of Micronesia"] = "AP",
    ["Republic of Moldova"] = "EUR",
    ["Monaco"] = "EUR",
    ["Mongolia"] = "AP",
    ["Montenegro"] = "EUR",
    ["Montserrat"] = "EUR",
    ["Morocco"] = "AF",
    ["Mozambique"] = "AF",
    ["Republic of the Union of Myanmar"] = "AP",
    ["Namibia"] = "AF",
    ["Nauru"] = "AP",
    ["Nepal"] = "AP",
    ["Netherlands"] = "EUR",
    ["New Caledonia"] = "EUR",
    ["New Zealand"] = "AP",
    ["Nicaragua"] = "LAC",
    ["Niger"] = "AF",
    ["Nigeria"] = "AF",
    ["Niue"] = "AP",
    ["Norfolk Island"] = "AP",
    ["North Macedonia"] = "EUR",
    ["Northern Mariana Islands"] = "NA",
    ["Norway"] = "EUR",
    ["Oman"] = "AP",
    ["Pakistan"] = "AP",
    ["Palau"] = "AP",
    ["Palestine"] = "AP",
    ["Republic of Panama"] = "LAC",
    ["Papua New Guinea"] = "AP",
    ["Paraguay"] = "LAC",
    ["Peru"] = "LAC",
    ["Philippines"] = "AP",
    ["Pitcairn"] = "EUR",
    ["Poland"] = "EUR",
    ["Portugal"] = "EUR",
    ["Puerto Rico"] = "NA",
    ["Qatar"] = "AP",
    ["Réunion"] = "EUR",
    ["Romania"] = "EUR",
    ["Russian Federation"] = "EUR",
    ["Rwanda"] = "AF",
    ["Saint Barthélemy"] = "LAC",
    ["Saint Helena, Ascension and Tristan da Cunha"] = "EUR",
    ["Saint Kitts and Nevis"] = "LAC",
    ["Saint Lucia"] = "LAC",
    ["Saint Martin (French part)"] = "LAC",
    ["Saint Pierre and Miquelon"] = "EUR",
    ["Saint Vincent and the Grenadines"] = "LAC",
    ["Samoa"] = "AP",
    ["San Marino"] = "EUR",
    ["Sao Tome and Principe"] = "AF",
    ["Saudi Arabia"] = "AP",
    ["Senegal"] = "AF",
    ["Serbia"] = "EUR",
    ["Seychelles"] = "AF",
    ["Sierra Leone"] = "AF",
    ["Singapore"] = "AP",
    ["Sint Maarten (Dutch part)"] = "LAC",
    ["Slovakia"] = "EUR",
    ["Slovenia"] = "EUR",
    ["Solomon Islands"] = "AP",
    ["Somalia"] = "AF",
    ["South Africa"] = "AF",
    ["South Georgia and the South Sandwich Islands"] = "EUR",
    ["South Sudan"] = "AF",
    ["Spain"] = "EUR",
    ["Sri Lanka"] = "AP",
    ["Sudan"] = "AF",
    ["Republic of Suriname"] = "LAC",
    ["Svalbard and Jan Mayen"] = "EUR",
    ["Sweden"] = "EUR",
    ["Switzerland"] = "EUR",
    ["Syrian Arab Republic"] = "AP",
    ["Chinese Taipei"] = "AP",
    ["Tajikistan"] = "AP",
    ["United Republic of Tanzania"] = "AF",
    ["Thailand"] = "AP",
    ["Timor-Leste"] = "AP",
    ["Togo"] = "AF",
    ["Tokelau"] = "AP",
    ["Tonga"] = "AP",
    ["Trinidad and Tobago"] = "LAC",
    ["Tunisia"] = "AF",
    ["Republic of Türkiye"] = "AP",
    ["Turkmenistan"] = "AP",
    ["Turks and Caicos Islands"] = "EUR",
    ["Tuvalu"] = "AP",
    ["Uganda"] = "AF",
    ["Ukraine"] = "EUR",
    ["United Arab Emirates"] = "AP",
    ["United Kingdom of Great Britain and Northern Ireland"] = "EUR",
    ["United States of America"] = "NA",
    ["United States Minor Outlying Islands"] = "NA",
    ["Uruguay"] = "LAC",
    ["Uzbekistan"] = "AP",
    ["Vanuatu"] = "AP",
    ["Bolivarian Republic of Venezuela"] = "LAC",
    ["Viet Nam"] = "AP",
    ["Virgin Islands, British"] = "EUR",
    ["Virgin Islands, U.S."] = "NA",
    ["Wallis And Futuna"] = "EUR",
    ["Western Sahara"] = "AF",
    ["Yemen"] = "AP",
    ["Zambia"] = "AF",
    ["Zimbabwe"] = "AF"
}

-- Create lookup map for normalized country names to regions
local normalizedCountryToRegionMap = {}

-- Fill the normalized map on module initialization
for country, region in pairs(rawCountryToRegionMap) do
    -- Store both the original and normalized country names
    normalizedCountryToRegionMap[country] = region
    
    -- Add normalized version only if different from original
    local normalizedCountry = countryNormalization.formatCountry(country)
    if normalizedCountry ~= country then
        normalizedCountryToRegionMap[normalizedCountry] = region
    end
end

-- Function to normalize a region name to its standard code
function p.normalizeRegion(regionName)
    if not regionName or regionName == "" then
        return nil
    end
    
    -- Simple case-insensitive lookup
    local trimmed = regionName:match("^%s*(.-)%s*$"):lower()
    return regionNameToCode[trimmed] or regionName
end

-- Function to get the region for a country name
function p.getRegion(countryName)
    if not countryName or countryName == "" then
        return nil
    end
    
    -- Normalize the input country name
    local normalizedCountry = countryNormalization.formatCountry(countryName)
    
    -- Look up the region for the normalized name
    return normalizedCountryToRegionMap[normalizedCountry]
end

-- Function to get regions for multiple countries (semicolon-separated)
function p.getRegionsForCountries(countries)
    if not countries or countries == "" then
        return {}
    end
    
    local regions = {}
    local seenRegions = {}
    
    -- Extract unique regions from countries
    for country in string.gmatch(countries, "[^;]+") do
        local trimmedCountry = country:match("^%s*(.-)%s*$")
        local region = p.getRegion(trimmedCountry)
        
        if region and not seenRegions[region] then
            table.insert(regions, region)
            seenRegions[region] = true
        end
    end
    
    return regions
end

-- Function to normalize multiple regions (semicolon-separated)
function p.normalizeRegions(regionsString)
    if not regionsString or regionsString == "" then
        return {}
    end
    
    local regions = {}
    
    -- Split regions string by semicolon and normalize each
    for region in string.gmatch(regionsString, "[^;]+") do
        local trimmedRegion = region:match("^%s*(.-)%s*$")
        local normalizedRegion = p.normalizeRegion(trimmedRegion)
        
        if normalizedRegion then
            table.insert(regions, normalizedRegion)
        end
    end
    
    return regions
end

-- Function to get the primary region when multiple countries/regions are involved
function p.getPrimaryRegion(countries)
    local regions = p.getRegionsForCountries(countries)
    if #regions > 0 then
        return regions[1]  -- Return first region found
    end
    return nil
end

return p