Module:Citation/CS1/Date validation and Module:Citation/CS1/Date validation/sandbox: Difference between pages
Appearance
(Difference between pages)
Content deleted Content added
julian date fix; |
No edit summary |
||
Line 1: | Line 1: | ||
--[[ |
|||
History of changes since last sync: 2024-03-23 |
|||
2024-07-12: maint cat for |year= value more precise that a year; see Help_talk:Citation_Style_1#year_parameter |
|||
]] |
|||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- |
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- |
||
]] |
]] |
||
Line 384: | Line 392: | ||
--[[--------------------------< P A T T E R N S > |
--[[--------------------------< P A T T E R N S >---------------------------------------------------------- |
||
this is the list of patterns for date formats that this module recognizes. Approximately the first half of these |
this is the list of patterns for date formats that this module recognizes. Approximately the first half of these |
||
Line 396: | Line 404: | ||
]] |
]] |
||
local |
local = { |
||
-- year-initial numerical year-month-day |
-- year-initial numerical year-month-day |
||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, |
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, |
||
Line 445: | Line 453: | ||
local function is_valid_embargo_date (v) |
local function is_valid_embargo_date (v) |
||
if v:match ( |
if v:match (['ymd'][1]) or -- ymd |
||
v:match ( |
v:match (['Mdy'][1]) or -- dmy |
||
v:match ( |
v:match (['dMy'][1]) then -- mdy |
||
return true, v; |
return true, v; |
||
end |
end |
||
Line 486: | Line 494: | ||
local coins_date; |
local coins_date; |
||
if date_string:match ( |
if date_string:match (['ymd'][1]) then -- year-initial numerical year month day format |
||
year, month, day = date_string:match ( |
year, month, day = date_string:match (['ymd'][1]); |
||
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar |
if 12 < tonumber(month) or 1 > tonumber(month) or 1582 > tonumber(year) or 0 == tonumber(day) then return false; end -- month or day number not valid or not Gregorian calendar |
||
anchor_year = year; |
anchor_year = year; |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['Mdy'][1]) then -- month-initial: month day, year |
||
month, day, anchor_year, year = mw.ustring.match(date_string, |
month, day, anchor_year, year = mw.ustring.match(date_string, ['Mdy'][1]); |
||
month = get_month_number (month); |
month = get_month_number (month); |
||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months |
if 0 == month then return false; end -- return false if month text isn't one of the twelve months |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['Md-dy'][1]) then -- month-initial day range: month day–day, year; days are separated by endash |
||
month, day, day2, anchor_year, year = mw.ustring.match(date_string, |
month, day, day2, anchor_year, year = mw.ustring.match(date_string, ['Md-dy'][1]); |
||
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; |
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; |
||
month = get_month_number (month); |
month = get_month_number (month); |
||
Line 504: | Line 512: | ||
year2 = year; |
year2 = year; |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['dMy'][1]) then -- day-initial: day month year |
||
day, month, anchor_year, year = mw.ustring.match(date_string, |
day, month, anchor_year, year = mw.ustring.match(date_string, ['dMy'][1]); |
||
month = get_month_number (month); |
month = get_month_number (month); |
||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months |
if 0 == month then return false; end -- return false if month text isn't one of the twelve months |
||
--[[ NOT supported at en.wiki |
--[[ NOT supported at en.wiki |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['yMd'][1]) then -- year-initial: year month day; day: 1 or 2 two digits, leading zero allowed |
||
anchor_year, year, month, day = mw.ustring.match(date_string, |
anchor_year, year, month, day = mw.ustring.match(date_string, ['yMd'][1]); |
||
month = get_month_number (month); |
month = get_month_number (month); |
||
if 0 == month then return false; end -- return false if month text isn't one of the twelve months |
if 0 == month then return false; end -- return false if month text isn't one of the twelve months |
||
-- end NOT supported at en.wiki ]] |
-- end NOT supported at en.wiki ]] |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['d-dMy'][1]) then -- day-range-initial: day–day month year; days are separated by endash |
||
day, day2, month, anchor_year, year = mw.ustring.match(date_string, |
day, day2, month, anchor_year, year = mw.ustring.match(date_string, ['d-dMy'][1]); |
||
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; |
if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same; |
||
month = get_month_number (month); |
month = get_month_number (month); |
||
Line 524: | Line 532: | ||
year2 = year; |
year2 = year; |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['dM-dMy'][1]) then -- day initial month-day-range: day month - day month year; uses spaced endash |
||
day, month, day2, month2, anchor_year, year = mw.ustring.match(date_string, |
day, month, day2, month2, anchor_year, year = mw.ustring.match(date_string, ['dM-dMy'][1]); |
||
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; |
if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later; |
||
month = get_month_number (month); -- for metadata |
month = get_month_number (month); -- for metadata |
||
Line 531: | Line 539: | ||
year2 = year; |
year2 = year; |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['Md-Mdy'][1]) then -- month initial month-day-range: month day – month day, year; uses spaced endash |
||
month, day, month2, day2, anchor_year, year = mw.ustring.match(date_string, |
month, day, month2, day2, anchor_year, year = mw.ustring.match(date_string, ['Md-Mdy'][1]); |
||
if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end |
if (not is_valid_month_season_range(month, month2, param)) or not is_valid_year(year) then return false; end |
||
month = get_month_number (month); -- for metadata |
month = get_month_number (month); -- for metadata |
||
Line 538: | Line 546: | ||
year2 = year; |
year2 = year; |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['dMy-dMy'][1]) then -- day initial month-day-year-range: day month year - day month year; uses spaced endash |
||
day, month, year, day2, month2, anchor_year, year2 = mw.ustring.match(date_string, |
day, month, year, day2, month2, anchor_year, year2 = mw.ustring.match(date_string, ['dMy-dMy'][1]); |
||
if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later |
if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later |
||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style |
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style |
||
Line 546: | Line 554: | ||
if 0 == month or 0 == month2 then return false; end -- both must be valid |
if 0 == month or 0 == month2 then return false; end -- both must be valid |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['Mdy-Mdy'][1]) then -- month initial month-day-year-range: month day, year – month day, year; uses spaced endash |
||
month, day, year, month2, day2, anchor_year, year2 = mw.ustring.match(date_string, |
month, day, year, month2, day2, anchor_year, year2 = mw.ustring.match(date_string, ['Mdy-Mdy'][1]); |
||
if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later |
if tonumber(year2) <= tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later |
||
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style |
if not is_valid_year(year2) or not is_valid_month_range_style(month, month2) then return false; end -- year2 no more than one year in the future; months same style |
||
Line 554: | Line 562: | ||
if 0 == month or 0 == month2 then return false; end -- both must be valid |
if 0 == month or 0 == month2 then return false; end -- both must be valid |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['Sy4-y2'][1]) then -- special case Winter/Summer year-year (YYYY-YY); year separated with unspaced endash |
||
local century; |
local century; |
||
month, year, century, anchor_year, year2 = mw.ustring.match(date_string, |
month, year, century, anchor_year, year2 = mw.ustring.match(date_string, ['Sy4-y2'][1]); |
||
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer |
if 'Winter' ~= month and 'Summer' ~= month then return false end; -- 'month' can only be Winter or Summer |
||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years |
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years |
||
Line 564: | Line 572: | ||
month = get_season_number(month, param); |
month = get_season_number(month, param); |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['Sy-y'][1]) then -- special case Winter/Summer year-year; year separated with unspaced endash |
||
month, year, anchor_year, year2 = mw.ustring.match(date_string, |
month, year, anchor_year, year2 = mw.ustring.match(date_string, ['Sy-y'][1]); |
||
month = get_season_number (month, param); -- <month> can only be winter or summer; also for metadata |
month = get_season_number (month, param); -- <month> can only be winter or summer; also for metadata |
||
if (month ~= cfg.date_names['en'].season['Winter']) and (month ~= cfg.date_names['en'].season['Summer']) then |
if (month ~= cfg.date_names['en'].season['Winter']) and (month ~= cfg.date_names['en'].season['Summer']) then |
||
Line 574: | Line 582: | ||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year |
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['My-My'][1]) then -- month/season year - month/season year; separated by spaced endash |
||
month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, |
month, year, month2, anchor_year, year2 = mw.ustring.match(date_string, ['My-My'][1]); |
||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years |
anchor_year = year .. '–' .. anchor_year; -- assemble anchor_year from both years |
||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same |
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same |
||
Line 589: | Line 597: | ||
end |
end |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['M-My'][1]) then -- month/season range year; months separated by endash |
||
month, month2, anchor_year, year = mw.ustring.match(date_string, |
month, month2, anchor_year, year = mw.ustring.match(date_string, ['M-My'][1]); |
||
if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end |
if (not is_valid_month_season_range(month, month2, param)) or (not is_valid_year(year)) then return false; end |
||
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season |
if 0 ~= get_month_number(month) then -- determined to be a valid range so just check this one to know if month or season |
||
Line 602: | Line 610: | ||
year2 = year; |
year2 = year; |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['My'][1]) then -- month/season/quarter/proper-name year |
||
month, anchor_year, year = mw.ustring.match(date_string, |
month, anchor_year, year = mw.ustring.match(date_string, ['My'][1]); |
||
if not is_valid_year(year) then return false; end |
if not is_valid_year(year) then return false; end |
||
month = get_element_number(month, param); -- get month season quarter proper-name number or nil |
month = get_element_number(month, param); -- get month season quarter proper-name number or nil |
||
if not month then return false; end -- not valid whatever it is |
if not month then return false; end -- not valid whatever it is |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['y-y'][1]) then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999 |
||
year, anchor_year, year2 = mw.ustring.match(date_string, |
year, anchor_year, year2 = mw.ustring.match(date_string, ['y-y'][1]); |
||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years |
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years |
||
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same |
if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same |
||
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year |
if not is_valid_year(year2) then return false; end -- no year farther in the future than next year |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['y4-y2'][1]) then -- Year range: YYYY–YY; separated by unspaced endash |
||
local century; |
local century; |
||
year, century, anchor_year, year2 = mw.ustring.match(date_string, |
year, century, anchor_year, year2 = mw.ustring.match(date_string, ['y4-y2'][1]); |
||
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years |
anchor_year = year .. '–' .. anchor_year; -- assemble anchor year from both years |
||
Line 628: | Line 636: | ||
end |
end |
||
elseif mw.ustring.match(date_string, |
elseif mw.ustring.match(date_string, ['y'][1]) then -- year; here accept either YYY or YYYY |
||
anchor_year, year = mw.ustring.match(date_string, |
anchor_year, year = mw.ustring.match(date_string, ['y'][1]); |
||
if false == is_valid_year(year) then |
if false == is_valid_year(year) then |
||
return false; |
return false; |
||
Line 743: | Line 751: | ||
end |
end |
||
return anchor_year, embargo_date; -- and done |
return anchor_year, embargo_date; -- and done |
||
end |
|||
--[[--------------------------< Y E A R _ C H E C K >---------------------------------------------------------- |
|||
Temporary function to test |year= for acceptable values: |
|||
YYY, YYYY, year-only ranges, their circa forms, with or without CITEREF disambiguators. |
|||
When |year= holds some form of date that is not one of these year-only dates, emit a maintenance message. |
|||
This function necessary because many non-cs1|2 templates have a |year= parameter so cirrus searches are more-or- |
|||
less useless |
|||
]] |
|||
local function year_check (year) |
|||
year = year:gsub ('c%. *', ''); -- remove circa annotation (if present) before testing <year> |
|||
for _, index in ipairs ({'y-y', 'y4-y2', 'y'}) do -- spin through these indexes into patterns_t |
|||
if mw.ustring.match (year, patterns_t[index][1]) then |
|||
return; -- if a match then |year= holds a valid 'year' |
|||
end |
|||
end |
|||
set_message ('maint_year'); -- if here, |year= value is not an accepted value; add a maint cat |
|||
end |
end |
||
Line 816: | Line 849: | ||
format string used by string.format() |
format string used by string.format() |
||
identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures |
identifier letters ('d', 'm', 'y', 'd2', 'm2', 'y2') that serve as indexes into a table t{} that holds captures |
||
from mw.ustring.match() for the various date parts specified by |
from mw.ustring.match() for the various date parts specified by [pattern_idx][1] |
||
Items in |
Items in {} have the general form: |
||
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where: |
['ymd'] = {'^(%d%d%d%d)%-(%d%d)%-(%d%d)$', 'y', 'm', 'd'}, where: |
||
['ymd'] is pattern_idx |
['ymd'] is pattern_idx |
||
['ymd'][1] is the match pattern with captures for mw.ustring.match() |
|||
['ymd'][2] is an indicator letter identifying the content of the first capture |
|||
['ymd'][3] ... the second capture etc. |
|||
when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier |
when a pattern matches a date, the captures are loaded into table t{} in capture order using the idemtifier |
||
Line 832: | Line 865: | ||
format_param set to the desired format. This function loads table t{} as described and then calls string.format() |
format_param set to the desired format. This function loads table t{} as described and then calls string.format() |
||
with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according |
with the format string specified by re_format[pattern_idx][format_param][1] using values taken from t{} according |
||
to the capture identifier letters specified by |
to the capture identifier letters specified by [pattern_idx][format_param][n] where n is 2.. |
||
]] |
]] |
||
Line 897: | Line 930: | ||
local function reformatter (date, pattern_idx, format_param, mon_len) |
local function reformatter (date, pattern_idx, format_param, mon_len) |
||
if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then |
if not in_array (pattern_idx, {'ymd', 'Mdy', 'Md-dy', 'dMy', 'yMd', 'd-dMy', 'dM-dMy', 'Md-Mdy', 'dMy-dMy', 'Mdy-Mdy', 'My-My', 'M-My', 'My'}) then |
||
return; -- not in this set of date format |
return; -- not in this set of date format then not a reformattable date |
||
end |
end |
||
Line 914: | Line 947: | ||
end |
end |
||
local c1, c2, c3, c4, c5, c6, c7; -- these hold the captures specified in |
local c1, c2, c3, c4, c5, c6, c7; -- these hold the captures specified in [pattern_idx][1] |
||
c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, |
c1, c2, c3, c4, c5, c6, c7 = mw.ustring.match (date, [pattern_idx][1]); -- get the captures |
||
local t = { -- table that holds k/v pairs of date parts from the captures and |
local t = { -- table that holds k/v pairs of date parts from the captures and [pattern_idx][2..] |
||
[ |
[[pattern_idx][2]] = c1; -- at minimum there is always one capture with a matching indicator letter |
||
[ |
[[pattern_idx][3] or 'x'] = c2; -- can have a variable number of captures; each capture requires an indicator letter; |
||
[ |
[[pattern_idx][4] or 'x'] = c3; -- where there is no capture, there is no indicator letter so n in [pattern_idx][n] will be nil; |
||
[ |
[[pattern_idx][5] or 'x'] = c4; -- the 'x' here spoofs an indicator letter to prevent 'table index is nil' error |
||
[ |
[[pattern_idx][6] or 'x'] = c5; |
||
[ |
[[pattern_idx][7] or 'x'] = c6; |
||
[ |
[[pattern_idx][8] or 'x'] = c7; |
||
}; |
}; |
||
Line 1,024: | Line 1,057: | ||
if is_set (param_val.val) then -- if the parameter has a value |
if is_set (param_val.val) then -- if the parameter has a value |
||
if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way |
if not (not all and in_array (param_name, {'access-date', 'archive-date'})) then -- skip access- or archive-date unless format is xxx-all; yeah, ugly; TODO: find a better way |
||
for pattern_idx, pattern in pairs ( |
for pattern_idx, pattern in pairs () do |
||
if mw.ustring.match (param_val.val, pattern[1]) then |
if mw.ustring.match (param_val.val, pattern[1]) then |
||
if all and in_array (param_name, {'access-date', 'archive-date'}) then -- if this date is an access- or archive-date |
if all and in_array (param_name, {'access-date', 'archive-date'}) then -- if this date is an access- or archive-date |
||
Line 1,060: | Line 1,093: | ||
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list |
for param_name, param_val in pairs(date_parameters_list) do -- for each date-holding parameter in the list |
||
if is_set (param_val.val) and |
if is_set (param_val.val) and |
||
not mw.ustring.match (param_val.val, |
not mw.ustring.match (param_val.val, .ymd[1]) then -- for those that are not ymd dates (ustring because here digits may not be Western) |
||
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash |
param_val.val, n = param_val.val:gsub ('%-', '–'); -- replace any hyphen with ndash |
||
if 0 ~= n then |
if 0 ~= n then |
||
Line 1,170: | Line 1,203: | ||
for _, v_t in ipairs ({{'dMy', 'dmy-all'}, {'Mdy', 'mdy-all'}}) do -- is |archive-date= format dmy or mdy? |
for _, v_t in ipairs ({{'dMy', 'dmy-all'}, {'Mdy', 'mdy-all'}}) do -- is |archive-date= format dmy or mdy? |
||
if archive_date:match ( |
if archive_date:match ([v_t[1]][1]) then -- does the pattern match? |
||
archive_date_format = cfg.keywords_xlate[v_t[2]]; -- get appropriate |df= supported keyword from the i18n translator table |
archive_date_format = cfg.keywords_xlate[v_t[2]]; -- get appropriate |df= supported keyword from the i18n translator table |
||
break; |
break; |
||
Line 1,202: | Line 1,235: | ||
reformat_dates = reformat_dates, |
reformat_dates = reformat_dates, |
||
set_selected_modules = set_selected_modules, |
set_selected_modules = set_selected_modules, |
||
year_check = year_check, |
|||
year_date_check = year_date_check, |
year_date_check = year_date_check, |
||
} |
} |