Jump to content

Module:NormalizationDate

From ICANNWiki

Documentation for this module may be created at Module:NormalizationDate/doc

-- Module:NormalizationDate
-- Parses and normalizes dates to "Month DD, YYYY" format (or year only if no full date).
-- Toggle between full names ("January") and abbreviations ("Jan.") with setUseShortMonthNames().
--
-- Usage: local dn = require('Module:NormalizationDate')
-- dn.formatDate("2023-01-15")              -- "January 15, 2023"
-- dn.setUseShortMonthNames(true)           -- Enable abbreviations
-- dn.formatDate("2023-01-15")              -- "Jan. 15, 2023"

local p = {}

-- Module configuration
local useShortMonthNames = true -- TOGGLE FORMAT

-- Normalize capitalization of months and abbreviations.
local months = {
  Jan = 1, January = 1, Feb = 2, February = 2, Mar = 3, March = 3,
  Apr = 4, April = 4, May = 5, Jun = 6, June = 6, Jul = 7, July = 7,
  Aug = 8, August = 8, Sep = 9, September = 9, Oct = 10, October = 10,
  Nov = 11, November = 11, Dec = 12, December = 12
}

-- Short month name mapping with periods
local shortMonthNames = {
  [1] = "Jan.", [2] = "Feb.", [3] = "Mar.", [4] = "Apr.", [5] = "May", 
  [6] = "Jun.", [7] = "Jul.", [8] = "Aug.", [9] = "Sep.", 
  [10] = "Oct.", [11] = "Nov.", [12] = "Dec."
}

-- Cache for previously processed dates (persists during a single page render)
local dateCache = {}

-- Grouped by type
local patterns = {
  -- Text-based formats (with month names)
  textFormats = {
    -- "DD Month YYYY" e.g. "12 March 1980"
    { "^(%d?%d)%s+(%a+%.?)%s+(%d%d%d%d)$", function(d, monthName, y)
        monthName = monthName:gsub("%.$", "")
        local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
        return tonumber(y), m, tonumber(d)
      end },
    -- "YYYY Month DD" e.g. "2000 July 12"
    { "^(%d%d%d%d)%s+(%a+%.?)%s+(%d?%d)$", function(y, monthName, d)
        monthName = monthName:gsub("%.$", "")
        local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
        return tonumber(y), m, tonumber(d)
      end },
    -- "DDth Month YYYY" e.g. "12th July 2000"
    { "^(%d?%d)[a-zA-Z]+%s+(%a+%.?)%s+(%d%d%d%d)$", function(d, monthName, y)
        monthName = monthName:gsub("%.$", "")
        local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
        return tonumber(y), m, tonumber(d)
      end },
    -- "Month DDth, YYYY" e.g. "July 12th, 2000"
    { "^(%a+%.?)%s+(%d?%d)[a-zA-Z]*[,]?%s*(%d%d%d%d)$", function(monthName, d, y)
        monthName = monthName:gsub("%.$", "")
        local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
        return tonumber(y), m, tonumber(d)
      end },
    -- "27 Feb. 2014", "7-Feb.-2014"
    { "(%d?%d)[%- ](%a+%.?)[%- ](%d%d%d%d)", function(d, monthName, y)
        monthName = monthName:gsub("%.$", "")
        local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
        return tonumber(y), m, tonumber(d)
      end },
    -- "Feb. 7, 2014"
    { "(%a+%.?)[, ](%d?%d)[, ](%d%d%d%d)", function(monthName, d, y)
        monthName = monthName:gsub("%.$", "")
        local m = months[monthName:sub(1,1):upper() .. monthName:sub(2):lower()]
        return tonumber(y), m, tonumber(d)
      end }
  },
  
  -- Numeric formats (without month names)
  numericFormats = {
    -- Compact form: "20000712" (YYYYMMDD)
    { "^(%d%d%d%d)(%d%d)(%d%d)$", function(y, m, d)
        return tonumber(y), tonumber(m), tonumber(d)
      end },
    -- YYYY-MM-DD or YYYY/MM/DD
    { "(%d%d%d%d)[%-/](%d?%d)[%-/](%d?%d)", function(y, m, d)
        return tonumber(y), tonumber(m), tonumber(d)
      end },
    -- DD-MM-YYYY or DD/MM/YYYY
    { "(%d?%d)[%-/](%d?%d)[%-/](%d%d%d%d)", function(d, m, y)
        return tonumber(y), tonumber(m), tonumber(d)
      end },
    -- DD-MM-YY or DD/MM/YY
    { "(%d?%d)[%-/](%d?%d)[%-/](%d%d)", function(d, m, y)
        return tonumber("20"..y), tonumber(m), tonumber(d)
      end }
  },
  
  -- Year-only format
  yearOnly = {
    -- YYYY only
    { "^(%d%d%d%d)$", function(y)
        return tonumber(y), nil, nil
      end }
  }
}

function p.formatDate(inputDate)
  if not inputDate or inputDate == "" then
    return inputDate
  end
  
  -- Check cache first for previously processed dates
  if dateCache[inputDate] then
    return dateCache[inputDate]
  end

  -- Normalize input: trim spaces, normalize whitespace, and remove multiple spaces
  inputDate = inputDate:gsub("^%s*(.-)%s*$", "%1"):gsub("%s+", " ")

  -- Quick check for year-only format first (most common and simplest)
  for _, pattern in ipairs(patterns.yearOnly) do
    local match = { string.match(inputDate, pattern[1]) }
    if #match > 0 then
      local y = pattern[2](match[1])
      if y then
        local result = tostring(y)
        dateCache[inputDate] = result
        return result
      end
    end
  end
  
  -- Check for text-based formats (with month names)
  for _, pattern in ipairs(patterns.textFormats) do
    local match = { string.match(inputDate, pattern[1]) }
    if #match > 0 then
      local y, m, d = pattern[2](match[1], match[2], match[3])
      if y and m and d then
        local timestamp = os.time{year = y, month = m, day = d}
        if timestamp then
          local result
          if useShortMonthNames then
            -- Get month number (1-12)
            local month = tonumber(os.date("%m", timestamp))
            -- Format with day and year
            local day = tonumber(os.date("%d", timestamp))
            local year = tonumber(os.date("%Y", timestamp))
            -- Use short month name from our mapping
            result = shortMonthNames[month] .. " " .. day .. ", " .. year
          else
            -- Use original full month format
            result = os.date("%B %d, %Y", timestamp)
          end
          dateCache[inputDate] = result
          return result
        end
      end
    end
  end
  
  -- Check for numeric formats
  for _, pattern in ipairs(patterns.numericFormats) do
    local match = { string.match(inputDate, pattern[1]) }
    if #match > 0 then
      local y, m, d = pattern[2](match[1], match[2], match[3])
      if y and m and d then
        local timestamp = os.time{year = y, month = m, day = d}
        if timestamp then
          local result
          if useShortMonthNames then
            -- Get month number (1-12)
            local month = tonumber(os.date("%m", timestamp))
            -- Format with day and year
            local day = tonumber(os.date("%d", timestamp))
            local year = tonumber(os.date("%Y", timestamp))
            -- Use short month name from our mapping
            result = shortMonthNames[month] .. " " .. day .. ", " .. year
          else
            -- Use original full month format
            result = os.date("%B %d, %Y", timestamp)
          end
          dateCache[inputDate] = result
          return result
        end
      end
    end
  end

  -- Fallback: try tokenizing and heuristically identifying components
  -- Count tokens first to pre-allocate the table
  local tokenCount = 0
  for _ in inputDate:gmatch("([%w]+)") do
    tokenCount = tokenCount + 1
  end
  
  -- Pre-allocate tokens table to avoid reallocation
  local tokens = {}
  tokens[tokenCount] = nil  -- Pre-allocate
  
  -- Fill tokens table
  local i = 0
  for token in inputDate:gmatch("([%w]+)") do
    i = i + 1
    tokens[i] = token
  end
  
  local year, month, day
  for _, token in ipairs(tokens) do
    local num = tonumber(token)
    if num then
      if num >= 1000 and num <= 3000 then
        year = num
      elseif num <= 31 and not day then
        day = num
      elseif num <= 12 and not month then
        month = num
      end
    else
      local clean = token:gsub("%.$", "")
      local mVal = months[clean:sub(1,1):upper() .. clean:sub(2):lower()]
      if mVal then
        month = mVal
      end
    end
    
    -- Early return if we have all components
    if year and month and day then
      local timestamp = os.time{year = year, month = month, day = day}
      if timestamp then
        local result
        if useShortMonthNames then
          -- Get month number (1-12)
          local month = tonumber(os.date("%m", timestamp))
          -- Format with day and year
          local day = tonumber(os.date("%d", timestamp))
          local year = tonumber(os.date("%Y", timestamp))
          -- Use short month name from our mapping
          result = shortMonthNames[month] .. " " .. day .. ", " .. year
        else
          -- Use original full month format
          result = os.date("%B %d, %Y", timestamp)
        end
        dateCache[inputDate] = result
        return result
      end
    end
  end

  -- If we only found a year but not month and day
  if year and not (month and day) then
    local result = tostring(year)
    dateCache[inputDate] = result
    return result
  end

  -- Cache the original input if no formatting was possible
  dateCache[inputDate] = inputDate
  return inputDate
end

-- Set whether to use short month names in formatted dates
function p.setUseShortMonthNames(value)
  useShortMonthNames = value and true or false
  return useShortMonthNames
end

-- Get current setting for short month names
function p.getUseShortMonthNames()
  return useShortMonthNames
end

return p