Модуль:Wikifier: различия между версиями
Перейти к навигации
Перейти к поиску
м (typo) |
(several URLs) |
||
(не показано 14 промежуточных версий этого же участника) | |||
Строка 5: | Строка 5: | ||
-- Dependencies: | -- Dependencies: | ||
− | local concat = table.concat | + | local concat, sort = table.concat, table.sort |
local wrap, yield = coroutine.wrap, coroutine.yield | local wrap, yield = coroutine.wrap, coroutine.yield | ||
local string = mw.ustring or string | local string = mw.ustring or string | ||
Строка 12: | Строка 12: | ||
local lpeg, rex = lpeg, rex_pcre | local lpeg, rex = lpeg, rex_pcre | ||
local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V | local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V | ||
− | local any, never, spaces, escape, slash = P(1), P( | + | local any, never, spaces, escape, slash = P(1), P(false), lpeg.locale ().space ^ 0, P'\\', P'/' |
local open, close, comma, equals = P'(', P')', P',', P'=' | local open, close, comma, equals = P'(', P')', P',', P'=' | ||
local ask = mw.smw.ask | local ask = mw.smw.ask | ||
+ | local radically_wikify = require 'Module:RadicalWikifier'.run | ||
-- Локаль: | -- Локаль: | ||
Строка 48: | Строка 49: | ||
return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str) | return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str) | ||
local sanitised = gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code) | local sanitised = gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code) | ||
− | return | + | return char (tonumber ('0x' .. code)) |
end) | end) | ||
sanitised = gsub (sanitised, '\\n', '\n') | sanitised = gsub (sanitised, '\\n', '\n') | ||
Строка 55: | Строка 56: | ||
end) * quote | end) * quote | ||
end -- local function quoted (quote) | end -- local function quoted (quote) | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
-- These function is used in both partial parsing JavaScript and applying wikifier transformations: | -- These function is used in both partial parsing JavaScript and applying wikifier transformations: | ||
Строка 70: | Строка 65: | ||
-- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues): | -- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues): | ||
local hide, restore = (function () | local hide, restore = (function () | ||
+ | local open, close = char (0x01), char (0x02) | ||
+ | local function strip_marker (no) | ||
+ | return open .. tostring (no) .. close | ||
+ | end | ||
local hidden = {} | local hidden = {} | ||
+ | -- hide (): | ||
return function (string, pattern, flags) | return function (string, pattern, flags) | ||
return preg_replace (string, pattern, flags, function (found) | return preg_replace (string, pattern, flags, function (found) | ||
local no = #hidden + 1 | local no = #hidden + 1 | ||
hidden [no] = found | hidden [no] = found | ||
− | return | + | return strip_marker (no) |
end) | end) | ||
+ | -- restore (): | ||
end, function (text) | end, function (text) | ||
for i = #hidden, 1, -1 do | for i = #hidden, 1, -1 do | ||
− | text = gsub (text, | + | if type (hidden [i]) == 'string' then |
+ | text = gsub (text, strip_marker (i), hidden [i]) | ||
+ | end | ||
end | end | ||
hidden = {} | hidden = {} | ||
return text; | return text; | ||
+ | end | ||
+ | end) () | ||
+ | |||
+ | -- Converting external links to internal: | ||
+ | local collect_link, resolve_links, internalise_link = (function () | ||
+ | local property = 'URL источника' | ||
+ | local urls = {} | ||
+ | -- collect_link (): | ||
+ | return function (url, alias) | ||
+ | urls [url] = true | ||
+ | -- return as is: | ||
+ | return '[' .. url .. ' ' .. (alias or '') .. ']' | ||
+ | -- resolve_links (): | ||
+ | end, function (text) | ||
+ | local list = {} | ||
+ | for url, _ in pairs (urls) do | ||
+ | list [#list + 1] = url | ||
+ | end | ||
+ | if #list > 0 then | ||
+ | local pages = ask { | ||
+ | '[[' .. property .. '::' .. concat (list, '||') ..']]', | ||
+ | '?#-', | ||
+ | '?' .. property .. '#-', | ||
+ | limit = #list | ||
+ | } | ||
+ | if pages then | ||
+ | for _, row in ipairs (pages) do | ||
+ | local page, url = row [1], row [property] | ||
+ | if type (url) ~= 'table' then | ||
+ | url = { url } | ||
+ | end | ||
+ | for __, value in ipairs (url) do | ||
+ | urls [value] = page | ||
+ | end | ||
+ | end | ||
+ | end | ||
+ | end | ||
+ | return text -- as is | ||
+ | -- internalise_link: | ||
+ | end, function (url, alias) | ||
+ | local page = urls [url] | ||
+ | if page ~= true then | ||
+ | -- successfully internalised: | ||
+ | return '[[' .. urls [url] .. (alias and '|' .. alias) .. ']]' | ||
+ | else | ||
+ | -- not internalised: | ||
+ | return '[' .. url .. (alias and ' ' .. alias) .. ']' | ||
+ | end | ||
end | end | ||
end) () | end) () | ||
-- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua: | -- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua: | ||
− | local | + | local allowed = { |
r = preg_replace, | r = preg_replace, | ||
hide = hide, | hide = hide, | ||
Строка 99: | Строка 150: | ||
return '[[' .. decode (page) .. '|' .. alias .. ']]'; | return '[[' .. decode (page) .. '|' .. alias .. ']]'; | ||
end, | end, | ||
− | + | collect_link = collect_link, | |
− | + | resolve_links = resolve_links, | |
− | + | internalise_link = internalise_link, | |
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
char = function (_, s) | char = function (_, s) | ||
return char (tonumber (sub (s, -4), 16)) | return char (tonumber (sub (s, -4), 16)) | ||
Строка 115: | Строка 158: | ||
} | } | ||
− | local s | + | local s = P's' |
− | local transform = P { ' | + | local transform = P { 'trans', |
− | + | trans = s * spaces * equals * spaces * V'call' * spaces * ';' * Cp (), | |
− | * ( spaces * | + | call = V'func' * spaces * open |
+ | * ( spaces * V'arg' * ( spaces * comma * spaces * V'arg' ) ^ 0 ) | ||
* spaces * close / function (func, ...) | * spaces * close / function (func, ...) | ||
local args = {...} | local args = {...} | ||
return function (string) | return function (string) | ||
− | return func ( | + | for i, arg in ipairs (args) do |
+ | args [i] = arg == 's' and string or arg | ||
+ | end | ||
+ | return func (unpack (args)) | ||
end | end | ||
end, | end, | ||
func = (function() | func = (function() | ||
− | -- Only allowed functions: | + | -- Only allowed functions. Their names must be ordered from longest to shortest: |
+ | local names = {} | ||
+ | for name, _ in pairs (allowed) do | ||
+ | names [#names + 1] = name | ||
+ | end | ||
+ | sort (names, function (a, b) | ||
+ | return #a > #b | ||
+ | end) | ||
local choice = never | local choice = never | ||
− | for name | + | for _, name in ipairs (names) do |
− | choice = choice + P (name) * Cc ( | + | choice = choice + P (name) * Cc (allowed [name]) |
end | end | ||
return choice | return choice | ||
− | end)(), | + | end) (), |
− | arg = V'call' + | + | arg = V's' + V'call' + V'string' + V'regex' + V'func', |
+ | s = C (s), | ||
+ | string = quoted'"' + quoted"'", | ||
+ | regex = slash * ((escape * slash + any - slash) ^ 1 / function (pattern) | ||
+ | local sanitised = rex.gsub (pattern, '\\\\u([0-9A-F]{2,4})', '\\x{%1}', nil, convert_flags'ig') | ||
+ | return sanitised | ||
+ | end) * slash * C (S'gim' ^ 0 + '') | ||
} | } | ||
− | local transform_anywhere = P{ transform | + | local transform_anywhere = P{ transform + 1 * V(1) } |
+ | local wikifier = 'Gadget-wikifier.js' | ||
+ | local code = tostring (mw.message.new (wikifier)) | ||
-- Iterate over wikifier code yielding processing functions: | -- Iterate over wikifier code yielding processing functions: | ||
− | local function | + | local transforms = (function (code) |
-- Remove comments: | -- Remove comments: | ||
local code = preg_replace (code, '//.*$', 'mg', '') | local code = preg_replace (code, '//.*$', 'mg', '') | ||
Строка 152: | Строка 214: | ||
end | end | ||
end) | end) | ||
− | end -- local function | + | end) (code) --local transforms = (function (code) |
− | + | local function wikify (text, page, start, finish, radical) | |
− | + | if radical then | |
− | + | text = radically_wikify (text) | |
− | local function wikify (text | + | end |
local counter = 1 | local counter = 1 | ||
− | for func in transforms | + | for func in transforms do |
if (not start or counter >= start) and (not finish or counter <= finish) then | if (not start or counter >= start) and (not finish or counter <= finish) then | ||
text = func (text, page) | text = func (text, page) | ||
Строка 198: | Строка 260: | ||
return wikify ( | return wikify ( | ||
frame.args [1] or frame.args.text, | frame.args [1] or frame.args.text, | ||
− | |||
frame:callParserFunction ('FULLPAGENAME', ''), | frame:callParserFunction ('FULLPAGENAME', ''), | ||
tonumber (frame.args.start), | tonumber (frame.args.start), | ||
− | tonumber (frame.args.finish) | + | tonumber (frame.args.finish), |
+ | (frame.args.radical or '') ~= '' | ||
) | ) | ||
end, | end, | ||
Строка 207: | Строка 269: | ||
return wikify ( | return wikify ( | ||
test, | test, | ||
− | |||
frame:callParserFunction ('FULLPAGENAME', ''), | frame:callParserFunction ('FULLPAGENAME', ''), | ||
tonumber (frame.args.start), | tonumber (frame.args.start), | ||
− | tonumber (frame.args.finish) | + | tonumber (frame.args.finish), |
+ | (frame.args.radical or '') ~= '' | ||
) | ) | ||
− | end | + | end, |
+ | test2 = function () | ||
+ | return wikify (test, '') | ||
+ | end, | ||
+ | test3 = function (arg) | ||
+ | return transform:match (arg) | ||
+ | end, allowed = allowed | ||
} -- return | } -- return |
Текущая версия от 08:45, 8 сентября 2023
Страница тестирования викификатора.
Окройте страницу в режиме редактирования и нажмите кнопку викификации. Не сохраняйте викифицированный текст.
Экранирование шаблонов
{{nobr|}}
Обработка невикифицированного HTML
Часть 1
Текст со сноской{{тчк}}<ref name="ref1" /> Ещё текст с другой<ref name="ref2" /> сноской.
Абзац <p>, не закрытый надлежащим образом и содержащий текст в кавычках, и даже
. Текст в во вложенных кавычках
лапках
.
И ещё один абзац, на сей раз закрытый.
Интернализация ссылок на источники
Примечания
<references><ref name="ref1">Текст первой сноски.</ref> <ref name="ref2">Текст второй сноски.</ref></references>
--[[
Модуль для викификации текста на стороне сервера.
Использует правила, определённые в MediaWiki:Gadget-wikifier.js.
--]]
-- Dependencies:
local concat, sort = table.concat, table.sort
local wrap, yield = coroutine.wrap, coroutine.yield
local string = mw.ustring or string
local sub, gsub, match, char = string.sub, string.gsub, string.match, string.char
local decode = mw.uri.decode
local lpeg, rex = lpeg, rex_pcre
local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V
local any, never, spaces, escape, slash = P(1), P(false), lpeg.locale ().space ^ 0, P'\\', P'/'
local open, close, comma, equals = P'(', P')', P',', P'='
local ask = mw.smw.ask
local radically_wikify = require 'Module:RadicalWikifier'.run
-- Локаль:
local function convert_flags (flags)
local flags = flags .. 'u'
-- pcre.h:
local values = {
i = 0x0001
, m = 0x0002
, s = 0x0004
, x = 0x0008
, A = 0x0010
, D = 0x0020
, X = 0x0040
-- , ? = 0x0080 PCRENOTBOL
-- , ? = 0x0100 PCRENOTEOL
, U = 0x0200
, u = 0x0800
}
local converted = 0
if flags then
for flag, value in pairs (values) do
if match (flags, flag) then
converted = converted + value
end
end
end
return converted
end -- local function convert_flags (flags)
-- This pattern matches a JavaScript string and converts it to Lua string:
local function quoted (quote)
return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str)
local sanitised = gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code)
return char (tonumber ('0x' .. code))
end)
sanitised = gsub (sanitised, '\\n', '\n')
sanitised = gsub (sanitised, '$(%d+)', '%%%1')
return sanitised
end) * quote
end -- local function quoted (quote)
-- These function is used in both partial parsing JavaScript and applying wikifier transformations:
local function preg_replace (string, pattern, flags, replacement)
local replacement = type (replacement) == 'string' and gsub (replacement or '', '$(%d)', '%%1') or replacement
return rex.gsub (string, pattern, replacement, nil, convert_flags (flags))
end -- local function preg_replace (string, pattern, flags, replacement)
-- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues):
local hide, restore = (function ()
local open, close = char (0x01), char (0x02)
local function strip_marker (no)
return open .. tostring (no) .. close
end
local hidden = {}
-- hide ():
return function (string, pattern, flags)
return preg_replace (string, pattern, flags, function (found)
local no = #hidden + 1
hidden [no] = found
return strip_marker (no)
end)
-- restore ():
end, function (text)
for i = #hidden, 1, -1 do
if type (hidden [i]) == 'string' then
text = gsub (text, strip_marker (i), hidden [i])
end
end
hidden = {}
return text;
end
end) ()
-- Converting external links to internal:
local collect_link, resolve_links, internalise_link = (function ()
local property = 'URL источника'
local urls = {}
-- collect_link ():
return function (url, alias)
urls [url] = true
-- return as is:
return '[' .. url .. ' ' .. (alias or '') .. ']'
-- resolve_links ():
end, function (text)
local list = {}
for url, _ in pairs (urls) do
list [#list + 1] = url
end
if #list > 0 then
local pages = ask {
'[[' .. property .. '::' .. concat (list, '||') ..']]',
'?#-',
'?' .. property .. '#-',
limit = #list
}
if pages then
for _, row in ipairs (pages) do
local page, url = row [1], row [property]
if type (url) ~= 'table' then
url = { url }
end
for __, value in ipairs (url) do
urls [value] = page
end
end
end
end
return text -- as is
-- internalise_link:
end, function (url, alias)
local page = urls [url]
if page ~= true then
-- successfully internalised:
return '[[' .. urls [url] .. (alias and '|' .. alias) .. ']]'
else
-- not internalised:
return '[' .. url .. (alias and ' ' .. alias) .. ']'
end
end
end) ()
-- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua:
local allowed = {
r = preg_replace,
hide = hide,
hideTags = function (string, ...)
local tags = concat ({...}, '|')
local pattern = '<(' .. tags .. ')( [^>]+)?>[\\s\\S]+?<\\/\\1>'
return hide (string, pattern, 'giu')
end,
restore = restore,
wikifyInternalLinks = function (_, __, page, alias)
return '[[' .. decode (page) .. '|' .. alias .. ']]';
end,
collect_link = collect_link,
resolve_links = resolve_links,
internalise_link = internalise_link,
char = function (_, s)
return char (tonumber (sub (s, -4), 16))
end
}
local s = P's'
local transform = P { 'trans',
trans = s * spaces * equals * spaces * V'call' * spaces * ';' * Cp (),
call = V'func' * spaces * open
* ( spaces * V'arg' * ( spaces * comma * spaces * V'arg' ) ^ 0 )
* spaces * close / function (func, ...)
local args = {...}
return function (string)
for i, arg in ipairs (args) do
args [i] = arg == 's' and string or arg
end
return func (unpack (args))
end
end,
func = (function()
-- Only allowed functions. Their names must be ordered from longest to shortest:
local names = {}
for name, _ in pairs (allowed) do
names [#names + 1] = name
end
sort (names, function (a, b)
return #a > #b
end)
local choice = never
for _, name in ipairs (names) do
choice = choice + P (name) * Cc (allowed [name])
end
return choice
end) (),
arg = V's' + V'call' + V'string' + V'regex' + V'func',
s = C (s),
string = quoted'"' + quoted"'",
regex = slash * ((escape * slash + any - slash) ^ 1 / function (pattern)
local sanitised = rex.gsub (pattern, '\\\\u([0-9A-F]{2,4})', '\\x{%1}', nil, convert_flags'ig')
return sanitised
end) * slash * C (S'gim' ^ 0 + '')
}
local transform_anywhere = P{ transform + 1 * V(1) }
local wikifier = 'Gadget-wikifier.js'
local code = tostring (mw.message.new (wikifier))
-- Iterate over wikifier code yielding processing functions:
local transforms = (function (code)
-- Remove comments:
local code = preg_replace (code, '//.*$', 'mg', '')
code = preg_replace (code, '/\\*.*?\\*/', 'g', '')
return wrap (function ()
local pos = 1
while pos <= #code do
local func
func, pos = transform_anywhere:match (code, pos)
yield (func)
end
end)
end) (code) --local transforms = (function (code)
local function wikify (text, page, start, finish, radical)
if radical then
text = radically_wikify (text)
end
local counter = 1
for func in transforms do
if (not start or counter >= start) and (not finish or counter <= finish) then
text = func (text, page)
if #text == 0 then
return '<span class="error">Text became empty at rule #' .. tostring (counter) .. '</span>\n'
end
end
counter = counter + 1
end
return text
end -- local function wikify (text, code)
local test = [==[
Страница тестирования [[MediaWiki:Gadget-wikifier.js|викификатора]].
Окройте страницу в режиме редактирования и нажмите кнопку викификации. Не сохраняйте викифицированный текст.
== Экранирование шаблонов ==
{{nobr|[[Category:All]]}}
== Обработка невикифицированного HTML ==
<h3>Часть 1</h3>
Текст со сноской [1]. Ещё текст с другой[2] сноской.
<p>Абзац <p>, не закрытый надлежащим образом и содержащий текст "в кавычках, и даже "во вложенных кавычках"". Текст в «лапках».
<p>И ещё один абзац, на сей раз закрытый.</p>
== Интернализация ссылок на источники ==
* [http://www.hist.msu.ru/ER/Etext/apr1906.htm должна интернализоваться],
* [http://www.hist.msu.ru/ER/Etext/apr1906.html не должна интернализоваться],
<h2>Примечания</h2>
[1] Текст первой сноски.
[2] Текст второй сноски.
]==]
return {
run = function (frame)
return wikify (
frame.args [1] or frame.args.text,
frame:callParserFunction ('FULLPAGENAME', ''),
tonumber (frame.args.start),
tonumber (frame.args.finish),
(frame.args.radical or '') ~= ''
)
end,
test = function (frame)
return wikify (
test,
frame:callParserFunction ('FULLPAGENAME', ''),
tonumber (frame.args.start),
tonumber (frame.args.finish),
(frame.args.radical or '') ~= ''
)
end,
test2 = function ()
return wikify (test, '')
end,
test3 = function (arg)
return transform:match (arg)
end, allowed = allowed
} -- return