Модуль:Wikifier: различия между версиями

Материал из in.wiki
Перейти к навигации Перейти к поиску
м (typo)
(several URLs)
 
(не показано 14 промежуточных версий этого же участника)
Строка 5: Строка 5:
  
 
-- Dependencies:
 
-- Dependencies:
local concat = table.concat
+
local concat, sort = table.concat, table.sort
 
local wrap, yield = coroutine.wrap, coroutine.yield
 
local wrap, yield = coroutine.wrap, coroutine.yield
 
local string = mw.ustring or string
 
local string = mw.ustring or string
Строка 12: Строка 12:
 
local lpeg, rex = lpeg, rex_pcre
 
local lpeg, rex = lpeg, rex_pcre
 
local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V
 
local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V
local any, never, spaces, escape, slash = P(1), P(0), lpeg.locale ().space ^ 0, P'\\', P'/'
+
local any, never, spaces, escape, slash = P(1), P(false), lpeg.locale ().space ^ 0, P'\\', P'/'
 
local open, close, comma, equals = P'(', P')', P',', P'='
 
local open, close, comma, equals = P'(', P')', P',', P'='
 
local ask = mw.smw.ask
 
local ask = mw.smw.ask
 +
local radically_wikify = require 'Module:RadicalWikifier'.run
  
 
-- Локаль:
 
-- Локаль:
Строка 48: Строка 49:
 
return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str)
 
return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str)
 
local sanitised =  gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code)
 
local sanitised =  gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code)
return string.char (tonumber ('0x' .. code))
+
return char (tonumber ('0x' .. code))
 
end)
 
end)
 
sanitised = gsub (sanitised, '\\n', '\n')
 
sanitised = gsub (sanitised, '\\n', '\n')
Строка 55: Строка 56:
 
end) * quote
 
end) * quote
 
end -- local function quoted (quote)
 
end -- local function quoted (quote)
 
-- This pattern matches a JavaScript regular expression (//) and cinverts it to a PREG:
 
local regex = slash * ((escape * slash + any - slash) ^ 1 / function (pattern)
 
local sanitised = rex.gsub (pattern, '\\\\u([0-9A-F]{2,4})', '\\x{%1}', nil, convert_flags'ig')
 
return sanitised
 
end) * slash * C (S'gim' ^ 0)
 
  
 
-- These function is used in both partial parsing JavaScript and applying wikifier transformations:
 
-- These function is used in both partial parsing JavaScript and applying wikifier transformations:
Строка 70: Строка 65:
 
-- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues):
 
-- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues):
 
local hide, restore = (function ()
 
local hide, restore = (function ()
 +
local open, close = char (0x01), char (0x02)
 +
local function strip_marker (no)
 +
return open .. tostring (no) .. close
 +
end
 
local hidden = {}
 
local hidden = {}
 +
-- hide ():
 
return function (string, pattern, flags)
 
return function (string, pattern, flags)
 
return preg_replace (string, pattern, flags, function (found)
 
return preg_replace (string, pattern, flags, function (found)
 
local no = #hidden + 1
 
local no = #hidden + 1
 
hidden [no] = found
 
hidden [no] = found
return '\x01' .. tostring (no) .. '\x02'
+
return strip_marker (no)
 
end)
 
end)
 +
-- restore ():
 
end, function (text)
 
end, function (text)
 
for i = #hidden, 1, -1 do
 
for i = #hidden, 1, -1 do
text = gsub (text, '\x01' .. tostring (i) .. '\x02', hidden [i])
+
if type (hidden [i]) == 'string' then
 +
text = gsub (text, strip_marker (i), hidden [i])
 +
end
 
end
 
end
 
hidden = {}
 
hidden = {}
 
return text;
 
return text;
 +
end
 +
end) ()
 +
 +
-- Converting external links to internal:
 +
local collect_link, resolve_links, internalise_link = (function ()
 +
local property = 'URL источника'
 +
local urls = {}
 +
-- collect_link ():
 +
return function (url, alias)
 +
urls [url] = true
 +
-- return as is:
 +
return '[' .. url .. ' ' .. (alias or '') .. ']'
 +
-- resolve_links ():
 +
end, function (text)
 +
local list = {}
 +
for url, _ in pairs (urls) do
 +
list [#list + 1] = url
 +
end
 +
if #list > 0 then
 +
local pages = ask {
 +
'[[' .. property .. '::' .. concat (list, '||') ..']]',
 +
'?#-',
 +
'?' .. property .. '#-',
 +
limit = #list
 +
}
 +
if pages then
 +
for _, row in ipairs (pages) do
 +
local page, url = row [1], row [property]
 +
if type (url) ~= 'table' then
 +
url = { url }
 +
end
 +
for __, value in ipairs (url) do
 +
urls [value] = page
 +
end
 +
end
 +
end
 +
end
 +
return text -- as is
 +
-- internalise_link:
 +
end, function (url, alias)
 +
local page = urls [url]
 +
if page ~= true then
 +
-- successfully internalised:
 +
return '[[' .. urls [url] .. (alias and '|' .. alias) .. ']]'
 +
else
 +
-- not internalised:
 +
return '[' .. url .. (alias and ' ' .. alias) .. ']'
 +
end
 
end
 
end
 
end) ()
 
end) ()
  
 
-- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua:
 
-- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua:
local functions = {
+
local allowed = {
 
r = preg_replace,
 
r = preg_replace,
 
hide = hide,
 
hide = hide,
Строка 99: Строка 150:
 
return '[[' .. decode (page) .. '|' .. alias .. ']]';
 
return '[[' .. decode (page) .. '|' .. alias .. ']]';
 
end,
 
end,
internalise = function (url, alias)
+
collect_link = collect_link,
local property = 'URL источника'
+
resolve_links = resolve_links,
local pages = ask { '[[' .. property .. '::' .. url ..']]', '?#-', limit = 1 }
+
internalise_link = internalise_link,
if pages then
 
local page = pages [1] [1]
 
if not (context and page == context) then
 
return '[[' .. pages [1] [1] .. '|' .. alias .. ']]'
 
end
 
end
 
return '[' .. url .. ' ' .. alias .. ']'
 
end,
 
 
char = function (_, s)
 
char = function (_, s)
 
return char (tonumber (sub (s, -4), 16))
 
return char (tonumber (sub (s, -4), 16))
Строка 115: Строка 158:
 
}
 
}
  
local s, str = P's', quoted'"' + quoted"'"
+
local s = P's'
  
local transform = P { 'call',
+
local transform = P { 'trans',
call = s * spaces * equals * spaces * V'func' * spaces * open
+
trans = s * spaces * equals * spaces * V'call' * spaces * ';' * Cp (),
* ( spaces * s * ( spaces * comma * spaces * V'arg' ) ^ 0 )
+
call = V'func' * spaces * open
 +
* ( spaces * V'arg' * ( spaces * comma * spaces * V'arg' ) ^ 0 )
 
* spaces * close / function (func, ...)
 
* spaces * close / function (func, ...)
 
local args = {...}
 
local args = {...}
 
return function (string)
 
return function (string)
return func (string, unpack (args))
+
for i, arg in ipairs (args) do
 +
args [i] = arg == 's' and string or arg
 +
end
 +
return func (unpack (args))
 
end
 
end
 
end,
 
end,
 
func = (function()
 
func = (function()
-- Only allowed functions:
+
-- Only allowed functions. Their names must be ordered from longest to shortest:
 +
local names = {}
 +
for name, _ in pairs (allowed) do
 +
names [#names + 1] = name
 +
end
 +
sort (names, function (a, b)
 +
return #a > #b
 +
end)
 
local choice = never
 
local choice = never
for name, func in pairs (functions) do
+
for _, name in ipairs (names) do
choice = choice + P (name) * Cc (func)
+
choice = choice + P (name) * Cc (allowed [name])
 
end
 
end
 
return choice
 
return choice
end)(),
+
end) (),
arg = V'call' + str + regex + V'func'
+
arg = V's' + V'call' + V'string' + V'regex' + V'func',
 +
s = C (s),
 +
string = quoted'"' + quoted"'",
 +
regex = slash * ((escape * slash + any - slash) ^ 1 / function (pattern)
 +
local sanitised = rex.gsub (pattern, '\\\\u([0-9A-F]{2,4})', '\\x{%1}', nil, convert_flags'ig')
 +
return sanitised
 +
end) * slash * C (S'gim' ^ 0 + '')
 
}
 
}
  
local transform_anywhere = P{ transform * Cp () + 1 * V(1) }
+
local transform_anywhere = P{ transform + 1 * V(1) }
 +
local wikifier = 'Gadget-wikifier.js'
 +
local code = tostring (mw.message.new (wikifier))
  
 
-- Iterate over wikifier code yielding processing functions:
 
-- Iterate over wikifier code yielding processing functions:
local function transforms (code)
+
local transforms = (function (code)
 
-- Remove comments:
 
-- Remove comments:
 
local code = preg_replace (code, '//.*$', 'mg', '')
 
local code = preg_replace (code, '//.*$', 'mg', '')
Строка 152: Строка 214:
 
end
 
end
 
end)
 
end)
end -- local function transforms (code)
+
end) (code) --local transforms = (function (code)
  
local wikifier = 'Gadget-wikifier.js'
+
local function wikify (text, page, start, finish, radical)
local code = tostring (mw.message.new (wikifier))
+
if radical then
 
+
text = radically_wikify (text)
local function wikify (text, code, page, start, finish)
+
end
 
local counter = 1
 
local counter = 1
for func in transforms (code) do
+
for func in transforms do
 
if (not start or counter >= start) and (not finish or counter <= finish) then
 
if (not start or counter >= start) and (not finish or counter <= finish) then
 
text = func (text, page)
 
text = func (text, page)
Строка 198: Строка 260:
 
return wikify (
 
return wikify (
 
frame.args [1] or frame.args.text,
 
frame.args [1] or frame.args.text,
code,
 
 
frame:callParserFunction ('FULLPAGENAME', ''),
 
frame:callParserFunction ('FULLPAGENAME', ''),
 
tonumber (frame.args.start),
 
tonumber (frame.args.start),
tonumber (frame.args.finish)
+
tonumber (frame.args.finish),
 +
(frame.args.radical or '') ~= ''
 
)
 
)
 
end,
 
end,
Строка 207: Строка 269:
 
return wikify (
 
return wikify (
 
test,
 
test,
code,
 
 
frame:callParserFunction ('FULLPAGENAME', ''),
 
frame:callParserFunction ('FULLPAGENAME', ''),
 
tonumber (frame.args.start),
 
tonumber (frame.args.start),
tonumber (frame.args.finish)
+
tonumber (frame.args.finish),
 +
(frame.args.radical or '') ~= ''
 
)
 
)
   end
+
end,
 +
  test2 = function ()
 +
return wikify (test, '')
 +
  end,
 +
  test3 = function (arg)
 +
  return transform:match (arg)
 +
   end, allowed = allowed
 
} -- return
 
} -- return

Текущая версия от 08:45, 8 сентября 2023

Страница тестирования викификатора.

Окройте страницу в режиме редактирования и нажмите кнопку викификации. Не сохраняйте викифицированный текст.

Экранирование шаблонов

{{nobr|}}

Обработка невикифицированного HTML

Часть 1

Текст со сноской{{тчк}}<ref name="ref1" /> Ещё текст с другой<ref name="ref2" /> сноской.

Абзац <p>, не закрытый надлежащим образом и содержащий текст в кавычках, и даже во вложенных кавычках. Текст в лапках.

И ещё один абзац, на сей раз закрытый.

Интернализация ссылок на источники

Примечания

<references><ref name="ref1">Текст первой сноски.</ref> <ref name="ref2">Текст второй сноски.</ref></references>


--[[
	Модуль для викификации текста на стороне сервера.
	Использует правила, определённые в MediaWiki:Gadget-wikifier.js.
--]]

-- Dependencies:
local concat, sort = table.concat, table.sort
local wrap, yield = coroutine.wrap, coroutine.yield
local string = mw.ustring or string
local sub, gsub, match, char = string.sub, string.gsub, string.match, string.char
local decode = mw.uri.decode
local lpeg, rex = lpeg, rex_pcre
local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V
local any, never, spaces, escape, slash = P(1), P(false), lpeg.locale ().space ^ 0, P'\\', P'/'
local open, close, comma, equals = P'(', P')', P',', P'='
local ask = mw.smw.ask
local radically_wikify = require 'Module:RadicalWikifier'.run

-- Локаль:
local function convert_flags (flags)
	local flags = flags .. 'u'
	-- pcre.h:
	local values = {
		i = 0x0001
	  , m = 0x0002
	  , s = 0x0004
	  , x = 0x0008
	  , A = 0x0010
	  , D = 0x0020
	  , X = 0x0040
--	  , ? = 0x0080 PCRENOTBOL
--	  , ? = 0x0100 PCRENOTEOL
	  , U = 0x0200
	  , u = 0x0800
	}
	local converted = 0
	if flags then
		for flag, value in pairs (values) do
			if match (flags, flag) then
				converted = converted + value
			end
		end
	end
	return converted
end	-- local function convert_flags (flags)

-- This pattern matches a JavaScript string and converts it to Lua string:
local function quoted (quote)
	return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str)
		local sanitised =  gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code)
			return char (tonumber ('0x' .. code))
		end)
		sanitised = gsub (sanitised, '\\n', '\n')
		sanitised = gsub (sanitised, '$(%d+)', '%%%1')
		return sanitised
	end) * quote
end	-- local function quoted (quote)

-- These function is used in both partial parsing JavaScript and applying wikifier transformations:
local function preg_replace (string, pattern, flags, replacement)
	local replacement = type (replacement) == 'string' and gsub (replacement or '', '$(%d)', '%%1') or replacement
	return rex.gsub (string, pattern, replacement, nil, convert_flags (flags))
end	-- local function preg_replace (string, pattern, flags, replacement)

-- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues):
local hide, restore = (function ()
	local open, close = char (0x01), char (0x02)
	local function strip_marker (no)
		return open .. tostring (no) .. close
	end
	local hidden = {}
	-- hide ():
	return function (string, pattern, flags)
		return preg_replace (string, pattern, flags, function (found)
			local no = #hidden + 1
			hidden [no] = found
			return strip_marker (no)
		end)
	-- restore ():
	end, function (text)
		for i = #hidden, 1, -1 do
			if type (hidden [i]) == 'string' then
				text = gsub (text, strip_marker (i), hidden [i])
			end
		end
		hidden = {}
		return text;
	end
end) ()

-- Converting external links to internal:
local collect_link, resolve_links, internalise_link = (function ()
	local property = 'URL источника'
	local urls = {}
	-- collect_link ():
	return function (url, alias)
		urls [url] = true
		-- return as is:
		return '[' .. url .. ' ' .. (alias or '') .. ']'
	-- resolve_links ():
	end, function (text)
		local list = {}
		for url, _ in pairs (urls) do
			list [#list + 1] = url
		end
		if #list > 0 then
			local pages = ask {
				'[[' .. property .. '::' .. concat (list, '||') ..']]',
				'?#-',
				'?' .. property .. '#-',
				limit = #list
			}
			if pages then
				for _, row in ipairs (pages) do
					local page, url = row [1], row [property]
					if type (url) ~= 'table' then
						url = { url }
					end
					for __, value in ipairs (url) do
						urls [value] = page
					end
				end
			end
		end
		return text -- as is
	-- internalise_link:
	end, function (url, alias)
		local page = urls [url]
		if page ~= true then
			-- successfully internalised:
			return '[[' .. urls [url] .. (alias and '|' .. alias) .. ']]'
		else
			-- not internalised:
			return '[' .. url .. (alias and ' ' .. alias) .. ']'
		end
	end
end) ()

-- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua:
local allowed = {
	r			= preg_replace,
	hide		= hide,
	hideTags	= function (string, ...)
		local tags = concat ({...}, '|')
		local pattern = '<(' .. tags .. ')( [^>]+)?>[\\s\\S]+?<\\/\\1>'
		return hide (string, pattern, 'giu')
	end,
	restore		= restore,
	wikifyInternalLinks = function (_, __, page, alias)
		return '[[' .. decode (page) .. '|' .. alias .. ']]';
	end,
	collect_link		= collect_link,
	resolve_links		= resolve_links,
	internalise_link	= internalise_link,
	char		= function (_, s)
		return char (tonumber (sub (s, -4), 16))
	end
}

local s = P's'

local transform = P { 'trans',
	trans	= s * spaces * equals * spaces * V'call' * spaces * ';' * Cp (),
	call	= V'func' * spaces * open
			* ( spaces * V'arg' * ( spaces * comma * spaces * V'arg' ) ^ 0 )
			* spaces * close / function (func, ...)
				local args = {...}
				return function (string)
					for i, arg in ipairs (args) do
						args [i] = arg == 's' and string or arg
					end
					return func (unpack (args))
				end
			end,
	func	= (function()
		-- Only allowed functions. Their names must be ordered from longest to shortest:
		local names = {}
		for name, _ in pairs (allowed) do
			names [#names + 1] = name
		end
		sort (names, function (a, b)
			return #a > #b
		end)
		local choice = never
		for _, name in ipairs (names) do
			choice = choice + P (name) * Cc (allowed [name])
		end
		return choice
	end) (),
	arg		= V's' + V'call' + V'string' + V'regex' + V'func',
	s		= C (s),
	string	= quoted'"' + quoted"'",
	regex	= slash * ((escape * slash + any - slash) ^ 1 / function (pattern)
		local sanitised = rex.gsub (pattern, '\\\\u([0-9A-F]{2,4})', '\\x{%1}', nil, convert_flags'ig')
		return sanitised
	end)	* slash * C (S'gim' ^ 0 + '')
}

local transform_anywhere = P{ transform + 1 * V(1) }
local wikifier = 'Gadget-wikifier.js'
local code = tostring (mw.message.new (wikifier))

-- Iterate over wikifier code yielding processing functions:
local transforms = (function (code)
	-- Remove comments:
	local code = preg_replace (code, '//.*$', 'mg', '')
	code = preg_replace (code, '/\\*.*?\\*/', 'g', '')
	return wrap (function ()
		local pos = 1
		while pos <= #code do
			local func
			func, pos = transform_anywhere:match (code, pos)
			yield (func)
		end
	end)
end) (code) --local transforms = (function (code)

local function wikify (text, page, start, finish, radical)
	if radical then
		text = radically_wikify (text)
	end
	local counter = 1
	for func in transforms do
		if (not start or counter >= start) and (not finish or counter <= finish) then
			text = func (text, page)
			if #text == 0 then
				return '<span class="error">Text became empty at rule #' .. tostring (counter) .. '</span>\n'
			end
		end
		counter = counter + 1
	end
	return text
end	-- local function wikify (text, code)

local test = [==[
Страница тестирования [[MediaWiki:Gadget-wikifier.js|викификатора]].

Окройте страницу в режиме редактирования и нажмите кнопку викификации. Не сохраняйте викифицированный текст.

== Экранирование шаблонов ==
{{nobr|[[Category:All]]}}

== Обработка невикифицированного HTML ==
<h3>Часть 1</h3>
Текст со сноской [1]. Ещё текст с другой[2] сноской.
<p>Абзац &lt;p&gt;, не закрытый надлежащим образом и содержащий текст "в кавычках, и даже "во вложенных кавычках"". Текст в «лапках».
<p>И ещё один абзац, на сей раз закрытый.</p>

== Интернализация ссылок на источники ==
* [http://www.hist.msu.ru/ER/Etext/apr1906.htm должна интернализоваться],
* [http://www.hist.msu.ru/ER/Etext/apr1906.html не должна интернализоваться],

<h2>Примечания</h2>
[1] Текст первой сноски.
[2] Текст второй сноски.
]==]

return {
	run = function (frame)
		return wikify (
			frame.args [1] or frame.args.text,
			frame:callParserFunction ('FULLPAGENAME', ''),
			tonumber (frame.args.start),
			tonumber (frame.args.finish),
			(frame.args.radical or '') ~= ''
		)
	end,
	test = function (frame)
		return wikify (
			test,
			frame:callParserFunction ('FULLPAGENAME', ''),
			tonumber (frame.args.start),
			tonumber (frame.args.finish),
			(frame.args.radical or '') ~= ''
		)
	end,
  	test2 = function ()
		return wikify (test, '')
  	end,
  	test3 = function (arg)
  		return transform:match (arg)
  	end, allowed = allowed
}	-- return