Module:DateNormalization
Appearance
Documentation for this module may be created at Module:DateNormalization/doc
-- Module:DateNormalization
-- Parses date inputs from various formats, normalizes month names (including abbreviations with dots), and converts them into a standardized "Month DD, YYYY" format, or returns only the year if no full date is provided.
local p = {}
-- Normalize capitalization of months and abbreviations.
local months = {
Jan = 1, January = 1, Feb = 2, February = 2, Mar = 3, March = 3,
Apr = 4, April = 4, May = 5, Jun = 6, June = 6, Jul = 7, July = 7,
Aug = 8, August = 8, Sep = 9, September = 9, Oct = 10, October = 10,
Nov = 11, November = 11, Dec = 12, December = 12
}
-- Cache for previously processed dates (persists during a single page render)
local dateCache = {}
-- Grouped by type
local patterns = {
-- Text-based formats (with month names)
textFormats = {
-- "DD Month YYYY" e.g. "12 March 1980"
{ "^(%d?%d)%s+(%a+%.?)%s+(%d%d%d%d)$", function(d, monthName, y)
monthName = monthName:gsub("%.$", "")
local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
return tonumber(y), m, tonumber(d)
end },
-- "YYYY Month DD" e.g. "2000 July 12"
{ "^(%d%d%d%d)%s+(%a+%.?)%s+(%d?%d)$", function(y, monthName, d)
monthName = monthName:gsub("%.$", "")
local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
return tonumber(y), m, tonumber(d)
end },
-- "DDth Month YYYY" e.g. "12th July 2000"
{ "^(%d?%d)[a-zA-Z]+%s+(%a+%.?)%s+(%d%d%d%d)$", function(d, monthName, y)
monthName = monthName:gsub("%.$", "")
local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
return tonumber(y), m, tonumber(d)
end },
-- "Month DDth, YYYY" e.g. "July 12th, 2000"
{ "^(%a+%.?)%s+(%d?%d)[a-zA-Z]*[,]?%s*(%d%d%d%d)$", function(monthName, d, y)
monthName = monthName:gsub("%.$", "")
local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
return tonumber(y), m, tonumber(d)
end },
-- "27 Feb. 2014", "7-Feb.-2014"
{ "(%d?%d)[%- ](%a+%.?)[%- ](%d%d%d%d)", function(d, monthName, y)
monthName = monthName:gsub("%.$", "")
local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
return tonumber(y), m, tonumber(d)
end },
-- "Feb. 7, 2014"
{ "(%a+%.?)[, ](%d?%d)[, ](%d%d%d%d)", function(monthName, d, y)
monthName = monthName:gsub("%.$", "")
local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
return tonumber(y), m, tonumber(d)
end }
},
-- Numeric formats (without month names)
numericFormats = {
-- Compact form: "20000712" (YYYYMMDD)
{ "^(%d%d%d%d)(%d%d)(%d%d)$", function(y, m, d)
return tonumber(y), tonumber(m), tonumber(d)
end },
-- YYYY-MM-DD or YYYY/MM/DD
{ "(%d%d%d%d)[%-/](%d?%d)[%-/](%d?%d)", function(y, m, d)
return tonumber(y), tonumber(m), tonumber(d)
end },
-- DD-MM-YYYY or DD/MM/YYYY
{ "(%d?%d)[%-/](%d?%d)[%-/](%d%d%d%d)", function(d, m, y)
return tonumber(y), tonumber(m), tonumber(d)
end },
-- DD-MM-YY or DD/MM/YY
{ "(%d?%d)[%-/](%d?%d)[%-/](%d%d)", function(d, m, y)
return tonumber("20"..y), tonumber(m), tonumber(d)
end }
},
-- Year-only format
yearOnly = {
-- YYYY only
{ "^(%d%d%d%d)$", function(y)
return tonumber(y), nil, nil
end }
}
}
function p.formatDate(inputDate)
if not inputDate or inputDate == "" then
return inputDate
end
-- Check cache first for previously processed dates
if dateCache[inputDate] then
return dateCache[inputDate]
end
-- Normalize input: trim spaces, normalize whitespace, and remove multiple spaces
inputDate = inputDate:gsub("^%s*(.-)%s*$", "%1"):gsub("%s+", " ")
-- Quick check for year-only format first (most common and simplest)
for _, pattern in ipairs(patterns.yearOnly) do
local match = { string.match(inputDate, pattern[1]) }
if #match > 0 then
local y = pattern[2](match[1])
if y then
local result = tostring(y)
dateCache[inputDate] = result
return result
end
end
end
-- Check for text-based formats (with month names)
for _, pattern in ipairs(patterns.textFormats) do
local match = { string.match(inputDate, pattern[1]) }
if #match > 0 then
local y, m, d = pattern[2](match[1], match[2], match[3])
if y and m and d then
local timestamp = os.time{year = y, month = m, day = d}
if timestamp then
local result = os.date("%B %d, %Y", timestamp)
dateCache[inputDate] = result
return result
end
end
end
end
-- Check for numeric formats
for _, pattern in ipairs(patterns.numericFormats) do
local match = { string.match(inputDate, pattern[1]) }
if #match > 0 then
local y, m, d = pattern[2](match[1], match[2], match[3])
if y and m and d then
local timestamp = os.time{year = y, month = m, day = d}
if timestamp then
local result = os.date("%B %d, %Y", timestamp)
dateCache[inputDate] = result
return result
end
end
end
end
-- Fallback: try tokenizing and heuristically identifying components
-- Count tokens first to pre-allocate the table
local tokenCount = 0
for _ in inputDate:gmatch("([%w]+)") do
tokenCount = tokenCount + 1
end
-- Pre-allocate tokens table to avoid reallocation
local tokens = {}
tokens[tokenCount] = nil -- Pre-allocate
-- Fill tokens table
local i = 0
for token in inputDate:gmatch("([%w]+)") do
i = i + 1
tokens[i] = token
end
local year, month, day
for _, token in ipairs(tokens) do
local num = tonumber(token)
if num then
if num >= 1000 and num <= 3000 then
year = num
elseif num <= 31 and not day then
day = num
elseif num <= 12 and not month then
month = num
end
else
local clean = token:gsub("%.$", "")
local mVal = months[clean:sub(1,1):upper() .. clean:sub(2):lower()]
if mVal then
month = mVal
end
end
-- Early return if we have all components
if year and month and day then
local timestamp = os.time{year = year, month = month, day = day}
if timestamp then
local result = os.date("%B %d, %Y", timestamp)
dateCache[inputDate] = result
return result
end
end
end
-- If we only found a year but not month and day
if year and not (month and day) then
local result = tostring(year)
dateCache[inputDate] = result
return result
end
-- Cache the original input if no formatting was possible
dateCache[inputDate] = inputDate
return inputDate
end
return p