Module:SemanticCategoryHelpers
Appearance
Documentation for this module may be created at Module:SemanticCategoryHelpers/doc
-- Module:SemanticCategoryHelpers
-- Provides utilities for semantic property and category handling in templates.
-- Extracted from TemplateHelpers to improve modularity and focus.
--
-- This module combines semantic property and category utilities that are
-- frequently used together in templates. It provides functions for:
-- * Splitting multi-value strings (e.g., "value1; value2 and value3")
-- * Building category tags from category names
-- * Adding categories based on canonical mappings
-- * Processing multi-value semantic properties with a unified approach
-- * Generating semantic properties based on configuration
-- * Retrieving property descriptions from property pages
local p = {}
-- Dependencies
local CanonicalForms = require('Module:CanonicalForms')
local SemanticAnnotations = require('Module:SemanticAnnotations')
local TemplateHelpers = require('Module:TemplateHelpers')
--------------------------------------------------------------------------------
-- Property Description Cache
--------------------------------------------------------------------------------
-- Module-level cache for property descriptions
local propertyDescriptionCache = {}
-- Get property description from a property page
-- @param propertyName string The name of the property (e.g., "Has interview format")
-- @return string|nil The property description or nil if not found
function p.getPropertyDescription(propertyName)
-- Check cache first
if propertyDescriptionCache[propertyName] ~= nil then
return propertyDescriptionCache[propertyName]
end
-- Construct the property page title
local propertyPageTitle = "Property:" .. propertyName
-- Try to load the property page
local propertyPage = mw.title.new(propertyPageTitle)
if not propertyPage or not propertyPage.exists then
propertyDescriptionCache[propertyName] = nil
return nil
end
-- Extract the description from the page content
local content = propertyPage:getContent()
if not content then
propertyDescriptionCache[propertyName] = nil
return nil
end
-- Look for the property description in the content
-- Pattern matches [[Has property description::description text@en]]
local description = content:match("%[%[Has property description::(.-)@?e?n?%]%]")
if not description then
-- Try canonical description as fallback
description = content:match("'''Canonical description''': (.-)[%.%?!]")
end
-- Cache the result (even if nil)
propertyDescriptionCache[propertyName] = description
return description
end
--------------------------------------------------------------------------------
-- Property Type Registry
--------------------------------------------------------------------------------
-- Registry of property types with their configurations
-- Each property type has:
-- - getPropertyName: Function that returns the property name from ConfigRepository
-- - processor: Function that processes a value for this property type
local propertyTypes = {
country = {
getPropertyName = function()
return require('Module:ConfigRepository').semanticProperties.country
end,
processor = function(value)
local CountryData = require('Module:CountryData')
local normalized = CountryData.normalizeCountryName(value)
if normalized == "(Unrecognized)" then
return nil
end
return normalized
end
},
region = {
getPropertyName = function()
return require('Module:ConfigRepository').semanticProperties.region
end,
processor = function(value)
if value == "(Unrecognized)" then
return nil
end
return value:match("^%s*(.-)%s*$") -- Trim whitespace
end
},
language = {
getPropertyName = function()
return require('Module:ConfigRepository').semanticProperties.language
end,
processor = function(value)
return require('Module:NormalizationLanguage').normalize(value)
end
},
person = {
getPropertyName = function()
return require('Module:ConfigRepository').semanticProperties.person
end
}
}
--------------------------------------------------------------------------------
-- Core Utilities
--------------------------------------------------------------------------------
-- Semicolon-only pattern for backward compatibility with splitSemicolonValues
-- Exposed as a module-level constant for use by other modules
p.SEMICOLON_PATTERN = {{pattern = ";%s*", replacement = ";"}}
-- Generic function to split multi-value strings with various delimiters
-- Returns an array of individual values
-- This is now a wrapper around TemplateHelpers.splitMultiValueString
function p.splitMultiValueString(...)
return require('Module:TemplateHelpers').splitMultiValueString(...)
end
-- Helper function to check if a field contains multiple values
function p.isMultiValueField(value)
if not value or value == "" then return false end
-- Check for common multi-value delimiters
return value:match(";") or value:match("%s+and%s+")
end
--------------------------------------------------------------------------------
-- Category Utilities
--------------------------------------------------------------------------------
-- Ensures a category string is properly wrapped in MediaWiki syntax
function p.formatCategoryName(categoryName)
if not categoryName or categoryName == "" then return "" end
-- Already has full MediaWiki syntax
if categoryName:match("^%[%[Category:[^%]]+%]%]$") then
return categoryName
end
-- Has partial syntax, normalize it
if categoryName:match("^Category:") then
return string.format("[[%s]]", categoryName)
end
-- Plain category name, add full syntax
return string.format("[[Category:%s]]", categoryName)
end
-- Builds a category string from a table of category names
-- Pre-allocates the formatted table for better performance
function p.buildCategories(categories)
if not categories or #categories == 0 then return "" end
-- Pre-allocate formatted table based on input size
local formatted = {}
local index = 1
for _, cat in ipairs(categories) do
-- Use the formatCategoryName function to ensure proper syntax
formatted[index] = p.formatCategoryName(cat)
index = index + 1
end
return table.concat(formatted, "\n")
end
-- Adds categories based on a canonical mapping
function p.addMappingCategories(value, mapping)
if not value or value == "" or not mapping then return {} end
local categories = {}
local canonical = select(1, CanonicalForms.normalize(value, mapping))
if canonical then
for _, group in ipairs(mapping) do
if group.canonical == canonical and group.category then
table.insert(categories, group.category)
break
end
end
end
return categories
end
-- Generic function to add multi-value categories
-- This is a generalized helper that can be used for any multi-value category field
function p.addMultiValueCategories(value, processor, categories, options)
if not value or value == "" then return categories end
options = options or {}
-- Get the values to process
local items
if options.valueGetter and type(options.valueGetter) == "function" then
-- Use custom value getter if provided
items = options.valueGetter(value)
else
-- Default to splitting the string
items = p.splitMultiValueString(value)
end
-- Process each item and collect valid categories
local newCategories = {}
for _, item in ipairs(items) do
-- Apply processor if provided
local processedItem = item
if processor and type(processor) == "function" then
processedItem = processor(item)
end
-- Only add if valid
if processedItem and processedItem ~= "" then
table.insert(newCategories, processedItem)
end
end
-- Combine existing categories with new ones
for _, category in ipairs(newCategories) do
table.insert(categories, category)
end
-- Use the centralized removeDuplicates function to deduplicate the combined list
return TemplateHelpers.removeDuplicates(categories)
end
-- Splits a region string that may contain "and" conjunctions
-- Returns an array of individual region names
-- This is now a wrapper around splitMultiValueString for backward compatibility
function p.splitRegionCategories(regionValue)
return p.splitMultiValueString(regionValue)
end
--------------------------------------------------------------------------------
-- Semantic Property Helpers
--------------------------------------------------------------------------------
-- Unified function to add semantic properties for any property type
-- @param propertyType - The type of property (e.g., "country", "region", "language")
-- @param value - The value to process
-- @param semanticOutput - The current semantic output to append to
-- @param options - Additional options for processing
-- @return The updated semantic output
function p.addSemanticProperties(propertyType, value, semanticOutput, options)
if not value or value == "" then return semanticOutput end
options = options or {}
-- Get configuration for this property type
local config = propertyTypes[propertyType]
if not config then
-- Check if propertyType is a key in ConfigRepository.semanticProperties
local ConfigRepository = require('Module:ConfigRepository')
local propertyName = ConfigRepository.semanticProperties[propertyType]
if propertyName then
-- Create a dynamic config for this property
config = {
getPropertyName = function() return propertyName end,
processor = options.processor
}
else
-- If it's a direct property name, use it as is
config = {
getPropertyName = function() return propertyType end,
processor = options.processor
}
end
end
-- Get property name from config
local propertyName = config.getPropertyName()
-- Get the values to process
local items
if options.valueGetter and type(options.valueGetter) == "function" then
-- Use custom value getter if provided
items = options.valueGetter(value)
else
-- Default to splitting the string
items = p.splitMultiValueString(value)
end
-- For non-SMW case, collect property HTML fragments in a table for efficient concatenation
local propertyHtml = {}
-- Collect properties for batched processing
local propertyValues = {}
-- Process each item and collect valid values
local validValues = {}
-- Process each item and collect for batched property setting
for _, item in ipairs(items) do
-- Apply processor if provided
local processedItem = item
if config.processor and type(config.processor) == "function" then
processedItem = config.processor(item)
end
-- Only add if valid
if processedItem and processedItem ~= "" then
table.insert(validValues, processedItem)
end
end
-- Deduplicate the values using the centralized function
validValues = TemplateHelpers.removeDuplicates(validValues)
-- Add the deduplicated values to the property collection
if #validValues > 0 then
if #validValues == 1 then
-- Single value case
propertyValues[propertyName] = validValues[1]
else
-- Multiple values case
propertyValues[propertyName] = validValues
end
-- For non-SMW fallback, generate HTML fragments for each value
if not mw.smw then
for _, processedItem in ipairs(validValues) do
table.insert(propertyHtml, '<div style="display:none;">')
table.insert(propertyHtml, ' {{#set: ' .. propertyName .. '=' .. processedItem .. ' }}')
table.insert(propertyHtml, '</div>')
end
end
end
-- Use batched property setting with SemanticAnnotations if properties exist
if mw.smw and next(propertyValues) then
local SemanticAnnotations = require('Module:SemanticAnnotations')
local dummyArgs = {} -- We're not using args from the template
local additionalOutput = SemanticAnnotations.setSemanticProperties(
dummyArgs,
propertyValues,
{transform = nil} -- No transforms needed as we've already processed the values
)
if additionalOutput and additionalOutput ~= "" then
semanticOutput = semanticOutput .. additionalOutput
end
elseif not mw.smw and #propertyHtml > 0 then
-- For non-SMW case, concatenate all property HTML fragments at once
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
end
return semanticOutput
end
-- Helper function to process additional properties with multi-value support
-- This standardizes how additional properties are handled across templates
function p.processAdditionalProperties(args, semanticConfig, semanticOutput, skipProperties)
if not semanticConfig or not semanticConfig.additionalProperties then
return semanticOutput
end
skipProperties = skipProperties or {}
-- Map to collect all values for each property for batch processing
local allBatchProperties = {}
for property, sourceFields in pairs(semanticConfig.additionalProperties) do
-- Skip properties that are handled separately
if not skipProperties[property] then
-- Find the property type key in ConfigRepository
local propertyTypeKey = nil
local ConfigRepository = require('Module:ConfigRepository')
for key, name in pairs(ConfigRepository.semanticProperties) do
if name == property then
propertyTypeKey = key
break
end
end
-- If no matching key found, use the property name directly
if not propertyTypeKey then
propertyTypeKey = property
end
-- Get transform function if available
local transform = nil
if semanticConfig.transforms and semanticConfig.transforms[property] then
transform = semanticConfig.transforms[property]
end
-- Process each source field for this property
for _, fieldName in ipairs(sourceFields) do
local _, value = TemplateHelpers.getFieldValue(args, { key = fieldName })
if value and value ~= "" then
-- Split multi-value fields
local values = p.splitMultiValueString(value)
-- For each value, transform if needed and add to property collection
for _, singleValue in ipairs(values) do
-- Apply transform if provided
local transformedValue = singleValue
if transform and type(transform) == "function" then
transformedValue = transform(singleValue)
end
if transformedValue and transformedValue ~= "" then
-- Initialize property in batch collection if needed
allBatchProperties[property] = allBatchProperties[property] or {}
-- Add the transformed value to the batch collection
table.insert(allBatchProperties[property], transformedValue)
end
end
end
end
end
end
-- Process all collected properties in one batch
if next(allBatchProperties) then
local SemanticAnnotations = require('Module:SemanticAnnotations')
-- Deduplicate all property values before sending to SemanticAnnotations
for prop, values in pairs(allBatchProperties) do
if type(values) == 'table' then
allBatchProperties[prop] = TemplateHelpers.removeDuplicates(values)
end
end
-- Use batched property setting with SemanticAnnotations
if mw.smw then
local dummyArgs = {} -- We're not using args from the template
local additionalOutput = SemanticAnnotations.setSemanticProperties(
dummyArgs,
allBatchProperties,
{transform = nil} -- No transforms needed as we've already processed the values
)
if additionalOutput and additionalOutput ~= "" then
semanticOutput = semanticOutput .. additionalOutput
end
else
-- Fallback to HTML generation for non-SMW case
local propertyHtml = {}
for property, values in pairs(allBatchProperties) do
for _, value in ipairs(values) do
table.insert(propertyHtml, '<div style="display:none;">')
table.insert(propertyHtml, ' {{#set: ' .. property .. '=' .. value .. ' }}')
table.insert(propertyHtml, '</div>')
end
end
-- Concatenate all property HTML fragments at once
if #propertyHtml > 0 then
semanticOutput = semanticOutput .. "\n" .. table.concat(propertyHtml, "\n")
end
end
end
return semanticOutput
end
-- Generates semantic properties based on configuration
-- @param args - Template parameters
-- @param semanticConfig - Config with properties, transforms, additionalProperties
-- @param options - Options: transform (functions), skipProperties (to exclude)
-- @return Wikitext with semantic annotations
function p.generateSemanticProperties(args, semanticConfig, options)
if not args or not semanticConfig then return "" end
local SemanticAnnotations = require('Module:SemanticAnnotations')
options = options or {}
-- Set options
local semanticOptions = {
transform = semanticConfig.transforms or options.transform
}
-- Collect all properties in a single batch for complete deduplication
local allProperties = {}
-- Add basic properties directly from properties config
if semanticConfig.properties then
for property, param in pairs(semanticConfig.properties) do
-- Copy to allProperties
allProperties[property] = param
end
end
-- Process additional properties
local skipProperties = options.skipProperties or {}
-- Create a collector function that gathers properties instead of generating output
local propertyCollector = {}
-- Process additional properties into the collector
if semanticConfig.additionalProperties then
for property, sourceFields in pairs(semanticConfig.additionalProperties) do
-- Skip properties that are handled separately
if not skipProperties[property] then
-- Collect all values for this property
local allValues = {}
for _, fieldName in ipairs(sourceFields) do
if args[fieldName] and args[fieldName] ~= "" then
local value = args[fieldName]
-- Transform if needed
if semanticConfig.transforms and semanticConfig.transforms[property] then
local transform = semanticConfig.transforms[property]
if transform and type(transform) == "function" then
local items = p.splitMultiValueString(value)
for _, item in ipairs(items) do
local transformed = transform(item)
if transformed and transformed ~= "" then
table.insert(allValues, transformed)
end
end
else
-- If no transform, add as is
table.insert(allValues, value)
end
else
-- No transform, add as is
table.insert(allValues, value)
end
end
end
-- Only add if we have values
if #allValues > 0 then
if allProperties[property] then
-- Convert to array if needed
if type(allProperties[property]) ~= "table" then
allProperties[property] = {allProperties[property]}
end
-- Add all values
for _, val in ipairs(allValues) do
table.insert(allProperties[property], val)
end
else
-- If only one value, add directly
if #allValues == 1 then
allProperties[property] = allValues[1]
else
allProperties[property] = allValues
end
end
end
end
end
end
-- Deduplicate all property values before sending to SemanticAnnotations
for prop, values in pairs(allProperties) do
if type(values) == 'table' then
allProperties[prop] = TemplateHelpers.removeDuplicates(values)
end
end
-- Now process all collected properties in one batch using setSemanticProperties
local semanticOutput = SemanticAnnotations.setSemanticProperties(
args,
allProperties,
semanticOptions
)
return semanticOutput
end
return p