Module:Punycode
Appearance
Documentation for this module may be created at Module:Punycode/doc
-- Module:Punycode
-- Implements RFC3492 (Punycode) encoding and decoding.
-- Requires mw.ustring for proper Unicode support.
local punycode = {}
--------------------------
-- Configuration Constants
--------------------------
local base = 36
local tmin = 1
local tmax = 26
local skew = 38
local damp = 700
local initial_bias = 72
local initial_n = 128 -- 0x80
local delimiter = '-' -- ASCII hyphen
--------------------------
-- Helper functions for Unicode handling.
--------------------------
-- Converts a UTF-8 string to an array of Unicode code points.
local function toCodePoints(s)
local cps = {}
for char in mw.ustring.gmatch(s, ".") do
table.insert(cps, mw.ustring.codepoint(char))
end
return cps
end
-- Converts an array of Unicode code points to a UTF-8 string.
local function fromCodePoints(cps)
local chars = {}
for _, cp in ipairs(cps) do
table.insert(chars, mw.ustring.char(cp))
end
return table.concat(chars)
end
--------------------------
-- Digit conversion functions
--------------------------
local function digitToBasic(digit)
if digit < 26 then
return string.char(digit + string.byte('a'))
else
return string.char(digit - 26 + string.byte('0'))
end
end
local function basicToDigit(cp)
if cp >= string.byte('0') and cp <= string.byte('9') then
return cp - string.byte('0') + 26
elseif cp >= string.byte('A') and cp <= string.byte('Z') then
return cp - string.byte('A')
elseif cp >= string.byte('a') and cp <= string.byte('z') then
return cp - string.byte('a')
else
return base
end
end
--------------------------
-- Bias adaptation (RFC3492, Section 3.4)
--------------------------
local function adapt(delta, numpoints, first)
if first then
delta = math.floor(delta / damp)
else
delta = math.floor(delta / 2)
end
delta = delta + math.floor(delta / numpoints)
local k = 0
while delta > ((base - tmin) * tmax) / 2 do
delta = math.floor(delta / (base - tmin))
k = k + base
end
return k + math.floor(((base - tmin + 1) * delta) / (delta + skew))
end
--------------------------
-- Punycode Encoding Function
--------------------------
function punycode.encode(input)
local output = {}
local cp_array = toCodePoints(input)
local n = initial_n
local delta = 0
local bias = initial_bias
local basic_count = 0
-- Copy basic code points (ASCII < 128)
for _, cp in ipairs(cp_array) do
if cp < 128 then
table.insert(output, mw.ustring.char(cp))
basic_count = basic_count + 1
end
end
local h = basic_count
if basic_count > 0 then
table.insert(output, delimiter)
end
while h < #cp_array do
local m = 0x7FFFFFFF
for _, cp in ipairs(cp_array) do
if cp >= n and cp < m then
m = cp
end
end
delta = delta + (m - n) * (h + 1)
n = m
for _, cp in ipairs(cp_array) do
if cp < n then
delta = delta + 1
elseif cp == n then
local q = delta
local k = base
while true do
local t
if k <= bias then
t = tmin
elseif k >= bias + tmax then
t = tmax
else
t = k - bias
end
if q < t then break end
local code = t + ((q - t) % (base - t))
table.insert(output, digitToBasic(code))
q = math.floor((q - t) / (base - t))
k = k + base
end
table.insert(output, digitToBasic(q))
bias = adapt(delta, h + 1, h == basic_count)
delta = 0
h = h + 1
end
end
delta = delta + 1
n = n + 1
end
return table.concat(output)
end
--------------------------
-- Punycode Decoding Function
--------------------------
function punycode.decode(input)
local cp_array = {}
local d = input:find(delimiter, 1, true)
local b = 0
if d then
for i = 1, d - 1 do
local cp = input:byte(i)
table.insert(cp_array, cp)
b = b + 1
end
else
d = 0
end
local n = initial_n
local bias = initial_bias
local i = 0
local index = d + 1
local input_len = #input
while index <= input_len do
local oldi = i
local w = 1
local k = base
while true do
if index > input_len then
error("Invalid input: punycode decode incomplete")
end
local digit = basicToDigit(input:byte(index))
index = index + 1
i = i + digit * w
local t
if k <= bias then
t = tmin
elseif k >= bias + tmax then
t = tmax
else
t = k - bias
end
if digit < t then break end
w = w * (base - t)
k = k + base
end
bias = adapt(i - oldi, #cp_array + 1, oldi == 0)
n = n + math.floor(i / (#cp_array + 1))
i = i % (#cp_array + 1)
table.insert(cp_array, i + 1, n)
i = i + 1
end
return fromCodePoints(cp_array)
end
return punycode