Jump to content

Module:LanguageNormalization

From ICANNWiki

Documentation for this module may be created at Module:LanguageNormalization/doc

-- Module:LanguageNormalization
-- Maps language inputs (ISO codes, native names) to canonical English names.
--
-- Features:
--   * Maps ISO 639-1/2/3 codes to canonical names
--   * Recognizes native names (e.g., "Español" → "Spanish")
--   * Displays native forms with canonical names (toggleable)
--   * Strips diacritics for flexible matching
--   * Formats multiple languages for templates
--
-- Configuration:
--   * setShowNativeForms(true/false) - Toggle native forms display
--   * getShowNativeForms() - Check current setting
--
-- Dependencies:
--   * Module:CanonicalForms - Normalization pattern
--   * Module:DiacriticNormalization - Diacritic removal

local p = {}
local CanonicalForms = require('Module:CanonicalForms')
local DiacriticNormalization = require('Module:DiacriticNormalization')

-- Configuration
local config = {
    showNativeForms = true -- Default: show native forms
}

-- Cache (persists during page render)
local normalizeCache = {}
local isNativeFormCache = {}
local getNativeFormCache = {}

-- Language mapping table
-- Format: {canonical = "English Name", synonyms = {codes, variations}, native = {native names}}
local languageMapping = {
    -- ========================================================================
    -- Indo-European Languages
    -- ========================================================================
    
    -- Germanic Branch
    {canonical = "English", 
     synonyms = {"en", "eng", "english"}, 
     native = {"English"}},
    
    {canonical = "German", 
     synonyms = {"de", "deu", "ger", "german"}, 
     native = {"Deutsch"}},
    
    {canonical = "Dutch", 
     synonyms = {"nl", "nld", "dut", "dutch", "flemish"}, 
     native = {"Nederlands"}},
    
    {canonical = "Swedish", 
     synonyms = {"sv", "swe", "swedish"}, 
     native = {"Svenska"}},
    
    {canonical = "Danish", 
     synonyms = {"da", "dan", "danish"}, 
     native = {"Dansk"}},
    
    {canonical = "Norwegian", 
     synonyms = {"no", "nor", "norwegian"}, 
     native = {"Norsk"}},
    
    {canonical = "Icelandic", 
     synonyms = {"is", "isl", "ice", "icelandic"}, 
     native = {"Íslenska"}},
    
    {canonical = "Afrikaans", 
     synonyms = {"af", "afr", "afrikaans"}, 
     native = {"Afrikaans"}},
    
    {canonical = "Luxembourgish", 
     synonyms = {"lb", "ltz", "luxembourgish"}, 
     native = {"Lëtzebuergesch"}},
    
    -- Romance Branch
    {canonical = "Spanish", 
     synonyms = {"es", "spa", "spanish", "castilian", "castelhano"}, 
     native = {"Español"}},
    
    {canonical = "Portuguese", 
     synonyms = {"pt", "por", "portuguese"}, 
     native = {"Português"}},
    
    {canonical = "French", 
     synonyms = {"fr", "fra", "fre", "french"}, 
     native = {"Français"}},
    
    {canonical = "Italian", 
     synonyms = {"it", "ita", "italian"}, 
     native = {"Italiano"}},
    
    {canonical = "Romanian", 
     synonyms = {"ro", "ron", "rum", "romanian"}, 
     native = {"Română"}},
    
    {canonical = "Catalan", 
     synonyms = {"ca", "cat", "catalan"}, 
     native = {"Català"}},
    
    {canonical = "Galician", 
     synonyms = {"gl", "glg", "galician"}, 
     native = {"Galego"}},
    
    {canonical = "Occitan", 
     synonyms = {"oc", "oci", "occitan"}, 
     native = {"Occitan"}},
    
    {canonical = "Sardinian", 
     synonyms = {"sc", "srd", "sardinian", "sardo"}, 
     native = {"Sardu"}},
    
    {canonical = "Corsican", 
     synonyms = {"co", "cos", "corsican", "corsu"}, 
     native = {"Corsu"}},
    
    -- Slavic Branch
    {canonical = "Russian", 
     synonyms = {"ru", "rus", "russian", "русский", "руский"}, 
     native = {"Русский"}},
    
    {canonical = "Polish", 
     synonyms = {"pl", "pol", "polish"}, 
     native = {"Polski"}},
    
    {canonical = "Ukrainian", 
     synonyms = {"uk", "ukr", "ukrainian", "українська"}, 
     native = {"Українська"}},
    
    {canonical = "Czech", 
     synonyms = {"cs", "ces", "cze", "czech"}, 
     native = {"Čeština"}},
    
    {canonical = "Slovak", 
     synonyms = {"sk", "slk", "slo", "slovak"}, 
     native = {"Slovenčina"}},
    
    {canonical = "Bulgarian", 
     synonyms = {"bg", "bul", "bulgarian", "български"}, 
     native = {"Български"}},
    
    {canonical = "Croatian", 
     synonyms = {"hr", "hrv", "croatian"}, 
     native = {"Hrvatski"}},
    
    {canonical = "Serbian", 
     synonyms = {"sr", "srp", "serbian", "српски"}, 
     native = {"Српски"}},
    
    {canonical = "Slovenian", 
     synonyms = {"sl", "slv", "slovenian", "slovene"}, 
     native = {"Slovenščina"}},
    
    {canonical = "Belarusian", 
     synonyms = {"be", "bel", "belarusian", "belorussian", "беларуская"}, 
     native = {"Беларуская"}},
    
    {canonical = "Macedonian", 
     synonyms = {"mk", "mkd", "mac", "macedonian", "македонски"}, 
     native = {"Македонски"}},
    
    {canonical = "Bosnian", 
     synonyms = {"bs", "bos", "bosnian"}, 
     native = {"Bosanski"}},
    
    -- Indo-Aryan Branch
    {canonical = "Hindi", 
     synonyms = {"hi", "hin", "hindi", "हिन्दी", "हिंदी"}, 
     native = {"हिन्दी"}},
    
    {canonical = "Bengali", 
     synonyms = {"bn", "ben", "bengali", "bangla", "বাংলা"}, 
     native = {"বাংলা"}},
    
    {canonical = "Punjabi", 
     synonyms = {"pa", "pan", "punjabi", "ਪੰਜਾਬੀ"}, 
     native = {"ਪੰਜਾਬੀ"}},
    
    {canonical = "Urdu", 
     synonyms = {"ur", "urd", "urdu", "اردو"}, 
     native = {"اردو"}},
    
    {canonical = "Gujarati", 
     synonyms = {"gu", "guj", "gujarati", "ગુજરાતી"}, 
     native = {"ગુજરાતી"}},
    
    {canonical = "Marathi", 
     synonyms = {"mr", "mar", "marathi", "मराठी"}, 
     native = {"मराठी"}},
    
    {canonical = "Nepali", 
     synonyms = {"ne", "nep", "nepali", "नेपाली"}, 
     native = {"नेपाली"}},
    
    {canonical = "Sinhala", 
     synonyms = {"si", "sin", "sinhala", "sinhalese", "සිංහල"}, 
     native = {"සිංහල"}},
    
    {canonical = "Odia", 
     synonyms = {"or", "ori", "odia", "oriya", "ଓଡ଼ିଆ"}, 
     native = {"ଓଡ଼ିଆ"}},
    
    {canonical = "Assamese", 
     synonyms = {"as", "asm", "assamese", "অসমীয়া"}, 
     native = {"অসমীয়া"}},
    
    {canonical = "Maithili", 
     synonyms = {"mai", "maithili", "मैथिली"}, 
     native = {"मैथिली"}},
    
    {canonical = "Rajasthani", 
     synonyms = {"raj", "rajasthani", "राजस्थानी"}, 
     native = {"राजस्थानी"}},
    
    -- Iranian Branch
    {canonical = "Persian", 
     synonyms = {"fa", "fas", "per", "persian", "farsi", "فارسی"}, 
     native = {"فارسی"}},
    
    {canonical = "Kurdish", 
     synonyms = {"ku", "kur", "kurdish", "كوردی"}, 
     native = {"كوردی"}},
    
    {canonical = "Pashto", 
     synonyms = {"ps", "pus", "pashto", "پښتو"}, 
     native = {"پښتو"}},
    
    {canonical = "Tajik", 
     synonyms = {"tg", "tgk", "tajik", "тоҷики"}, 
     native = {"тоҷики"}},
    
    -- Baltic Branch
    {canonical = "Lithuanian", 
     synonyms = {"lt", "lit", "lithuanian"}, 
     native = {"Lietuvių"}},
    
    {canonical = "Latvian", 
     synonyms = {"lv", "lav", "latvian"}, 
     native = {"Latviešu"}},
    
    -- Celtic Branch
    {canonical = "Irish", 
     synonyms = {"ga", "gle", "irish", "irish gaelic"}, 
     native = {"Gaeilge"}},
    
    {canonical = "Welsh", 
     synonyms = {"cy", "cym", "wel", "welsh"}, 
     native = {"Cymraeg"}},
    
    {canonical = "Scottish Gaelic", 
     synonyms = {"gd", "gla", "scottish gaelic", "gaelic"}, 
     native = {"Gàidhlig"}},
    
    -- Other Indo-European
    {canonical = "Greek", 
     synonyms = {"el", "ell", "gre", "greek", "ελληνικά"}, 
     native = {"Ελληνικά"}},
    
    {canonical = "Albanian", 
     synonyms = {"sq", "sqi", "alb", "albanian"}, 
     native = {"Shqip"}},
    
    {canonical = "Armenian", 
     synonyms = {"hy", "hye", "arm", "armenian", "հայերեն"}, 
     native = {"հայերեն"}},
    
    -- ========================================================================
    -- Uralic Languages
    -- ========================================================================
    {canonical = "Hungarian", 
     synonyms = {"hu", "hun", "hungarian"}, 
     native = {"Magyar"}},
    
    {canonical = "Finnish", 
     synonyms = {"fi", "fin", "finnish"}, 
     native = {"Suomi"}},
    
    {canonical = "Estonian", 
     synonyms = {"et", "est", "estonian"}, 
     native = {"Eesti"}},
    
    -- ========================================================================
    -- Sino-Tibetan Languages
    -- ========================================================================
    
    -- Chinese Languages
    {canonical = "Mandarin Chinese", 
     synonyms = {"zh", "zho", "cmn", "chinese", "mandarin", "中文", "汉语", "普通话"}, 
     native = {"中文"}},
    
    {canonical = "Cantonese", 
     synonyms = {"yue", "cantonese", "canton", "粵語", "广东话"}, 
     native = {"粵語"}},
    
    {canonical = "Wu Chinese", 
     synonyms = {"wuu", "wu", "wu chinese", "shanghainese", "吳語", "上海话"}, 
     native = {"吳語"}},
    
    {canonical = "Minnan Chinese", 
     synonyms = {"nan", "minnan", "hokkien", "taiwanese", "閩南語", "台湾话"}, 
     native = {"閩南語"}},
    
    {canonical = "Hakka Chinese", 
     synonyms = {"hak", "hakka", "客家話", "客家语"}, 
     native = {"客家話"}},
    
    -- Tibeto-Burman Languages
    {canonical = "Burmese", 
     synonyms = {"my", "mya", "bur", "burmese", "မြန်မာဘာသာ"}, 
     native = {"မြန်မာဘာသာ"}},
    
    {canonical = "Tibetan", 
     synonyms = {"bo", "bod", "tib", "tibetan", "བོད་སྐད་"}, 
     native = {"བོད་སྐད་"}},
    
    -- ========================================================================
    -- Japonic and Koreanic Languages
    -- ========================================================================
    {canonical = "Japanese", 
     synonyms = {"ja", "jpn", "japanese", "日本語", "にほんご", "にっぽんご"}, 
     native = {"日本語"}},
    
    {canonical = "Korean", 
     synonyms = {"ko", "kor", "korean", "한국어", "조선말"}, 
     native = {"한국어"}},
    
    -- ========================================================================
    -- Turkic Languages
    -- ========================================================================
    {canonical = "Turkish", 
     synonyms = {"tr", "tur", "turkish"}, 
     native = {"Türkçe"}},
    
    {canonical = "Azerbaijani", 
     synonyms = {"az", "aze", "azerbaijani", "azeri", "azərbaycan dili"}, 
     native = {"Azərbaycan dili"}},
    
    {canonical = "Uzbek", 
     synonyms = {"uz", "uzb", "uzbek", "o'zbek"}, 
     native = {"Oʻzbek"}},
    
    {canonical = "Kazakh", 
     synonyms = {"kk", "kaz", "kazakh", "қазақ тілі"}, 
     native = {"қазақ тілі"}},
    
    {canonical = "Kyrgyz", 
     synonyms = {"ky", "kir", "kyrgyz", "кыргызча"}, 
     native = {"кыргызча"}},
    
    -- ========================================================================
    -- Austronesian Languages
    -- ========================================================================
    {canonical = "Indonesian", 
     synonyms = {"id", "ind", "indonesian"}, 
     native = {"Bahasa Indonesia"}},
    
    {canonical = "Malay", 
     synonyms = {"ms", "msa", "may", "malay"}, 
     native = {"Bahasa Melayu"}},
    
    {canonical = "Tagalog", 
     synonyms = {"fil", "filipino", "pilipino", "tagalog", "tl", "tgl"}, 
     native = {"Tagalog"}},
    
    {canonical = "Javanese", 
     synonyms = {"jv", "jav", "javanese", "basa jawa"}, 
     native = {"Basa Jawa"}},
    
    {canonical = "Sundanese", 
     synonyms = {"su", "sun", "sundanese", "basa sunda"}, 
     native = {"Basa Sunda"}},
    
    {canonical = "Hawaiian", 
     synonyms = {"haw", "hawaiian"}, 
     native = {"ʻŌlelo Hawaiʻi"}},
    
    {canonical = "Māori", 
     synonyms = {"mi", "mao", "mri", "maori", "reo māori"}, 
     native = {"Te Reo Māori"}},
    
    {canonical = "Samoan", 
     synonyms = {"sm", "smo", "samoan"}, 
     native = {"Gagana Samoa"}},
    
    {canonical = "Fijian", 
     synonyms = {"fj", "fij", "fijian"}, 
     native = {"Vosa Vakaviti"}},
    
    -- ========================================================================
    -- Dravidian Languages
    -- ========================================================================
    {canonical = "Tamil", 
     synonyms = {"ta", "tam", "tamil", "தமிழ்"}, 
     native = {"தமிழ்"}},
    
    {canonical = "Telugu", 
     synonyms = {"te", "tel", "telugu", "తెలుగు"}, 
     native = {"తెలుగు"}},
    
    {canonical = "Kannada", 
     synonyms = {"kn", "kan", "kannada", "ಕನ್ನಡ"}, 
     native = {"ಕನ್ನಡ"}},
    
    {canonical = "Malayalam", 
     synonyms = {"ml", "mal", "malayalam", "മലയാളം"}, 
     native = {"മലയാളം"}},
    
    -- ========================================================================
    -- Tai-Kadai Languages
    -- ========================================================================
    {canonical = "Thai", 
     synonyms = {"th", "tha", "thai", "ไทย"}, 
     native = {"ไทย"}},
    
    {canonical = "Lao", 
     synonyms = {"lo", "lao", "laotian", "ລາວ"}, 
     native = {"ລາວ"}},
    
    -- ========================================================================
    -- Austro-Asiatic Languages
    -- ========================================================================
    {canonical = "Vietnamese", 
     synonyms = {"vi", "vie", "vietnamese"}, 
     native = {"Tiếng Việt"}},
    
    {canonical = "Khmer", 
     synonyms = {"km", "khm", "khmer", "cambodian", "ខ្មែរ"}, 
     native = {"ខ្មែរ"}},
    
    -- ========================================================================
    -- Afro-Asiatic Languages
    -- ========================================================================
    
    -- Semitic Branch
    {canonical = "Arabic", 
     synonyms = {"ar", "ara", "arabic", "عربى", "عربي", "عربية"}, 
     native = {"العربية"}},
    
    {canonical = "Modern Standard Arabic", 
     synonyms = {"msa", "modern standard arabic", "literary arabic", "standard arabic", "العربية الفصحى"}, 
     native = {"العربية الفصحى"}},
    
    {canonical = "Hebrew", 
     synonyms = {"he", "heb", "hebrew", "עברית"}, 
     native = {"עברית"}},
    
    {canonical = "Amharic", 
     synonyms = {"am", "amh", "amharic", "አማርኛ"}, 
     native = {"አማርኛ"}},
    
    {canonical = "Tigrinya", 
     synonyms = {"ti", "tir", "tigrinya", "ትግርኛ"}, 
     native = {"ትግርኛ"}},
    
    {canonical = "Maltese", 
     synonyms = {"mt", "mlt", "maltese"}, 
     native = {"Malti"}},
    
    -- Cushitic Branch
    {canonical = "Somali", 
     synonyms = {"so", "som", "somali"}, 
     native = {"Soomaali"}},
    
    -- Chadic Branch
    {canonical = "Hausa", 
     synonyms = {"ha", "hau", "hausa", "هَوُسَ"}, 
     native = {"هَوُسَ"}},
    
    -- ========================================================================
    -- Niger-Congo Languages
    -- ========================================================================
    
    -- Bantu Branch
    {canonical = "Swahili", 
     synonyms = {"sw", "swa", "swahili"}, 
     native = {"Kiswahili"}},
    
    {canonical = "Zulu", 
     synonyms = {"zu", "zul", "zulu"}, 
     native = {"isiZulu"}},
    
    {canonical = "Xhosa", 
     synonyms = {"xh", "xho", "xhosa"}, 
     native = {"isiXhosa"}},
    
    {canonical = "Shona", 
     synonyms = {"sn", "sna", "shona"}, 
     native = {"chiShona"}},
    
    {canonical = "Lingala", 
     synonyms = {"ln", "lin", "lingala"}, 
     native = {"Lingála"}},
    
    {canonical = "Kinyarwanda", 
     synonyms = {"rw", "kin", "kinyarwanda"}, 
     native = {"Ikinyarwanda"}},
    
    -- West African Branch
    {canonical = "Yoruba", 
     synonyms = {"yo", "yor", "yoruba"}, 
     native = {"Èdè Yorùbá"}},
    
    {canonical = "Igbo", 
     synonyms = {"ig", "ibo", "igbo"}, 
     native = {"Asụsụ Igbo"}},
    
    {canonical = "Fula", 
     synonyms = {"ff", "ful", "fula", "fulfulde", "peul"}, 
     native = {"Fulfulde"}},
    
    {canonical = "Wolof", 
     synonyms = {"wo", "wol", "wolof"}, 
     native = {"Wolof"}},
    
    {canonical = "Kongo", 
     synonyms = {"kg", "kon", "kongo", "kikongo"}, 
     native = {"Kikongo"}},
    
    -- ========================================================================
    -- Americas Indigenous Languages
    -- ========================================================================
    
    -- Quechuan Languages
    {canonical = "Quechua", 
     synonyms = {"qu", "que", "quechua"}, 
     native = {"Runa Simi"}},
    
    -- Tupi-Guarani Languages
    {canonical = "Guarani", 
     synonyms = {"gn", "grn", "guarani"}, 
     native = {"Avañe'ẽ"}},
    
    -- Aymaran Languages
    {canonical = "Aymara", 
     synonyms = {"ay", "aym", "aymara"}, 
     native = {"Aymar aru"}},
    
    -- Uto-Aztecan Languages
    {canonical = "Nahuatl", 
     synonyms = {"nah", "nahuatl", "aztec"}, 
     native = {"Nāhuatl"}},
    
    -- Na-Dene Languages
    {canonical = "Navajo", 
     synonyms = {"nv", "nav", "navajo"}, 
     native = {"Diné bizaad"}},
    
    -- Eskimo-Aleut Languages
    {canonical = "Inuktitut", 
     synonyms = {"iu", "iku", "inuktitut", "ᐃᓄᒃᑎᑐᑦ"}, 
     native = {"ᐃᓄᒃᑎᑐᑦ"}},
    
    -- ========================================================================
    -- Creole Languages
    -- ========================================================================
    {canonical = "Haitian Creole", 
     synonyms = {"ht", "hat", "haitian", "haitian creole", "kreyòl"}, 
     native = {"Kreyòl Ayisyen"}},
    
    -- ========================================================================
    -- Kartvelian Languages
    -- ========================================================================
    {canonical = "Georgian", 
     synonyms = {"ka", "kat", "geo", "georgian", "ქართული"}, 
     native = {"ქართული"}},
    
    -- ========================================================================
    -- Mongolic Languages
    -- ========================================================================
    {canonical = "Mongolian", 
     synonyms = {"mn", "mon", "mongolian", "монгол хэл"}, 
     native = {"Монгол хэл"}},
    
    -- ========================================================================
    -- Language Isolates
    -- ========================================================================
    {canonical = "Basque", 
     synonyms = {"eu", "eus", "baq", "basque"}, 
     native = {"Euskara"}},
    
    -- ========================================================================
    -- Constructed Languages
    -- ========================================================================
    {canonical = "Esperanto", 
     synonyms = {"eo", "epo", "esperanto"}, 
     native = {"Esperanto"}},
}

--------------------------------------------------------------------------------
-- Helper Functions
--------------------------------------------------------------------------------

-- Get native form for a canonical language name
function p.getNativeForm(canonicalName)
    if not canonicalName then return nil end
    
    -- Check cache first
    if getNativeFormCache[canonicalName] ~= nil then
        return getNativeFormCache[canonicalName]
    end
    
    for _, lang in ipairs(languageMapping) do
        if lang.canonical == canonicalName and lang.native and #lang.native > 0 then
            -- Cache and return the first native form (typically the most common one)
            getNativeFormCache[canonicalName] = lang.native[1]
            return lang.native[1]
        end
    end
    
    -- Cache negative results
    getNativeFormCache[canonicalName] = nil
    return nil
end

-- Remove diacritics (delegated to DiacriticNormalization)
function p.removeDiacritics(text)
    return DiacriticNormalization.removeDiacritics(text)
end

-- Prepare mapping with normal and diacritic-free versions
local function prepareMapping()
    local enhancedMapping = {}
    
    for _, lang in ipairs(languageMapping) do
        local entry = {
            canonical = lang.canonical,
            synonyms = {}
        }
        
        -- Add synonyms
        for _, syn in ipairs(lang.synonyms or {}) do
            table.insert(entry.synonyms, syn)
        end
        
        -- Add native names
        if lang.native then
            for _, native in ipairs(lang.native) do
                table.insert(entry.synonyms, native:lower())
                
                -- Add diacritic-free version
                local stripped = DiacriticNormalization.removeDiacritics(native:lower())
                if stripped and stripped ~= native:lower() then
                    table.insert(entry.synonyms, stripped)
                end
            end
        end
        
        table.insert(enhancedMapping, entry)
    end
    
    return enhancedMapping
end

-- Generate enhanced mapping on module load
local enhancedMapping = prepareMapping()

-- Normalize language to canonical English form
function p.normalize(inputLanguage)
    if not inputLanguage or inputLanguage == "" then
        return inputLanguage
    end
    
    -- Check cache first
    if normalizeCache[inputLanguage] then
        return normalizeCache[inputLanguage]
    end
    
    -- Try direct match
    local canonical = CanonicalForms.normalize(inputLanguage, enhancedMapping)
    if canonical then
        normalizeCache[inputLanguage] = canonical
        return canonical
    end
    
    -- Try with diacritics removed
    local strippedInput = DiacriticNormalization.removeDiacritics(inputLanguage:lower())
    if strippedInput and strippedInput ~= inputLanguage:lower() then
        canonical = CanonicalForms.normalize(strippedInput, enhancedMapping)
        if canonical then
            normalizeCache[inputLanguage] = canonical
            return canonical
        end
    end
    
    -- If no match, return original
    normalizeCache[inputLanguage] = inputLanguage
    return inputLanguage
end

-- Check if input is a native form and get canonical form
function p.isNativeForm(input)
    if not input then return nil end
    
    -- Check cache first
    if isNativeFormCache[input] ~= nil then
        return isNativeFormCache[input]
    end
    
    -- Convert to lowercase
    local lowerInput = input:lower()
    
    for _, lang in ipairs(languageMapping) do
        if lang.native then
            for _, native in ipairs(lang.native) do
                if lowerInput == native:lower() then
                    isNativeFormCache[input] = lang.canonical
                    return lang.canonical
                end
            end
        end
    end
    
    -- Cache negative results
    isNativeFormCache[input] = nil
    return nil
end

-- Format multiple languages with normalization
function p.formatLanguages(inputLanguages)
    if not inputLanguages or inputLanguages == "" then return "" end
    
    -- Split by semicolons
    local languages = {}
    for lang in inputLanguages:gmatch("[^;]+") do
        local trimmed = lang:match("^%s*(.-)%s*$")
        if trimmed and trimmed ~= "" then
            -- Check if native form
            local canonicalFromNative = p.isNativeForm(trimmed)
            
            -- If native form, use canonical + input as native
            local normalized, originalInput
            if canonicalFromNative then
                normalized = canonicalFromNative
                originalInput = trimmed
            else
                -- Otherwise normalize as usual
                normalized = p.normalize(trimmed)
                originalInput = nil
            end
            
            -- Get and format with native form if available
            local nativeForm = originalInput or p.getNativeForm(normalized)
            local formattedLang
            
            -- Exception: English never shows native form
            if nativeForm and config.showNativeForms and normalized ~= "English" then
                formattedLang = string.format("%s<br/><span style=\"display:inline-block; width:0.1em; visibility:hidden;\">*</span><span style=\"font-size:75%%;\">%s</span>", normalized, nativeForm)
            else
                formattedLang = normalized
            end
            
            table.insert(languages, formattedLang)
        end
    end
    
    -- Format based on language count
    if #languages > 1 then
        local listItems = {}
        for _, lang in ipairs(languages) do
            table.insert(listItems, string.format("<li>%s</li>", lang))
        end
        return string.format("<ul class=\"template-list template-list-language\" style=\"margin:0; padding-left:1em;\">%s</ul>", table.concat(listItems, ""))
    elseif #languages == 1 then
        return languages[1]
    end
    
    return ""
end

-- Toggle native forms display
function p.setShowNativeForms(value)
    config.showNativeForms = (value == true)
end

-- Get native forms display setting
function p.getShowNativeForms()
    return config.showNativeForms
end

return p