Модуль:Wikifier

Материал из in.wiki
Перейти к навигации Перейти к поиску

Страница тестирования викификатора.

Окройте страницу в режиме редактирования и нажмите кнопку викификации. Не сохраняйте викифицированный текст.

Экранирование шаблонов

{{nobr|}}

Обработка невикифицированного HTML

Часть 1

Текст со сноской{{тчк}}<ref name="ref1" /> Ещё текст с другой<ref name="ref2" /> сноской.

Абзац <p>, не закрытый надлежащим образом и содержащий текст в кавычках, и даже во вложенных кавычках. Текст в лапках.

И ещё один абзац, на сей раз закрытый.

Интернализация ссылок на источники

Примечания

<references><ref name="ref1">Текст первой сноски.</ref> <ref name="ref2">Текст второй сноски.</ref></references>


--[[
	Модуль для викификации текста на стороне сервера.
	Использует правила, определённые в MediaWiki:Gadget-wikifier.js.
--]]

-- Dependencies:
local concat, sort = table.concat, table.sort
local wrap, yield = coroutine.wrap, coroutine.yield
local string = mw.ustring or string
local sub, gsub, match, char = string.sub, string.gsub, string.match, string.char
local decode = mw.uri.decode
local lpeg, rex = lpeg, rex_pcre
local P, C, Cc, Cp, S, V = lpeg.P, lpeg.C, lpeg.Cc, lpeg.Cp, lpeg.S, lpeg.V
local any, never, spaces, escape, slash = P(1), P(false), lpeg.locale ().space ^ 0, P'\\', P'/'
local open, close, comma, equals = P'(', P')', P',', P'='
local ask = mw.smw.ask
local radically_wikify = require 'Module:RadicalWikifier'.run

-- Локаль:
local function convert_flags (flags)
	local flags = flags .. 'u'
	-- pcre.h:
	local values = {
		i = 0x0001
	  , m = 0x0002
	  , s = 0x0004
	  , x = 0x0008
	  , A = 0x0010
	  , D = 0x0020
	  , X = 0x0040
--	  , ? = 0x0080 PCRENOTBOL
--	  , ? = 0x0100 PCRENOTEOL
	  , U = 0x0200
	  , u = 0x0800
	}
	local converted = 0
	if flags then
		for flag, value in pairs (values) do
			if match (flags, flag) then
				converted = converted + value
			end
		end
	end
	return converted
end	-- local function convert_flags (flags)

-- This pattern matches a JavaScript string and converts it to Lua string:
local function quoted (quote)
	return P (quote) * ((any - quote + escape * quote) ^ 0 / function (str)
		local sanitised =  gsub (str, '\\x([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]?[0-9a-fA-F]?)', function (code)
			return char (tonumber ('0x' .. code))
		end)
		sanitised = gsub (sanitised, '\\n', '\n')
		sanitised = gsub (sanitised, '$(%d+)', '%%%1')
		return sanitised
	end) * quote
end	-- local function quoted (quote)

-- These function is used in both partial parsing JavaScript and applying wikifier transformations:
local function preg_replace (string, pattern, flags, replacement)
	local replacement = type (replacement) == 'string' and gsub (replacement or '', '$(%d)', '%%1') or replacement
	return rex.gsub (string, pattern, replacement, nil, convert_flags (flags))
end	-- local function preg_replace (string, pattern, flags, replacement)

-- Generate two functions allowed in wikifier with pseudo-static variables (hidden upvalues):
local hide, restore = (function ()
	local open, close = char (0x01), char (0x02)
	local function strip_marker (no)
		return open .. tostring (no) .. close
	end
	local hidden = {}
	-- hide ():
	return function (string, pattern, flags)
		return preg_replace (string, pattern, flags, function (found)
			local no = #hidden + 1
			hidden [no] = found
			return strip_marker (no)
		end)
	-- restore ():
	end, function (text)
		for i = #hidden, 1, -1 do
			if type (hidden [i]) == 'string' then
				text = gsub (text, strip_marker (i), hidden [i])
			end
		end
		hidden = {}
		return text;
	end
end) ()

-- Converting external links to internal:
local collect_link, resolve_links, internalise_link = (function ()
	local property = 'URL источника'
	local urls = {}
	-- collect_link ():
	return function (url, alias)
		urls [url] = true
		-- return as is:
		return '[' .. url .. ' ' .. (alias or '') .. ']'
	-- resolve_links ():
	end, function (text)
		local list = {}
		for url, _ in pairs (urls) do
			list [#list + 1] = url
		end
		if #list > 0 then
			local pages = ask {
				'[[' .. property .. '::' .. concat (list, '||') ..']]',
				'?#-',
				'?' .. property .. '#-',
				limit = #list
			}
			if pages then
				for _, row in ipairs (pages) do
					local page, url = row [1], row [property]
					if type (url) ~= 'table' then
						url = { url }
					end
					for __, value in ipairs (url) do
						urls [value] = page
					end
				end
			end
		end
		return text -- as is
	-- internalise_link:
	end, function (url, alias)
		local page = urls [url]
		if page ~= true then
			-- successfully internalised:
			return '[[' .. urls [url] .. (alias and '|' .. alias) .. ']]'
		else
			-- not internalised:
			return '[' .. url .. (alias and ' ' .. alias) .. ']'
		end
	end
end) ()

-- These JavaScript functions are allowed in wikifying rules and are reimplemented in Lua:
local allowed = {
	r			= preg_replace,
	hide		= hide,
	hideTags	= function (string, ...)
		local tags = concat ({...}, '|')
		local pattern = '<(' .. tags .. ')( [^>]+)?>[\\s\\S]+?<\\/\\1>'
		return hide (string, pattern, 'giu')
	end,
	restore		= restore,
	wikifyInternalLinks = function (_, __, page, alias)
		return '[[' .. decode (page) .. '|' .. alias .. ']]';
	end,
	collect_link		= collect_link,
	resolve_links		= resolve_links,
	internalise_link	= internalise_link,
	char		= function (_, s)
		return char (tonumber (sub (s, -4), 16))
	end
}

local s = P's'

local transform = P { 'trans',
	trans	= s * spaces * equals * spaces * V'call' * spaces * ';' * Cp (),
	call	= V'func' * spaces * open
			* ( spaces * V'arg' * ( spaces * comma * spaces * V'arg' ) ^ 0 )
			* spaces * close / function (func, ...)
				local args = {...}
				return function (string)
					for i, arg in ipairs (args) do
						args [i] = arg == 's' and string or arg
					end
					return func (unpack (args))
				end
			end,
	func	= (function()
		-- Only allowed functions. Their names must be ordered from longest to shortest:
		local names = {}
		for name, _ in pairs (allowed) do
			names [#names + 1] = name
		end
		sort (names, function (a, b)
			return #a > #b
		end)
		local choice = never
		for _, name in ipairs (names) do
			choice = choice + P (name) * Cc (allowed [name])
		end
		return choice
	end) (),
	arg		= V's' + V'call' + V'string' + V'regex' + V'func',
	s		= C (s),
	string	= quoted'"' + quoted"'",
	regex	= slash * ((escape * slash + any - slash) ^ 1 / function (pattern)
		local sanitised = rex.gsub (pattern, '\\\\u([0-9A-F]{2,4})', '\\x{%1}', nil, convert_flags'ig')
		return sanitised
	end)	* slash * C (S'gim' ^ 0 + '')
}

local transform_anywhere = P{ transform + 1 * V(1) }
local wikifier = 'Gadget-wikifier.js'
local code = tostring (mw.message.new (wikifier))

-- Iterate over wikifier code yielding processing functions:
local transforms = (function (code)
	-- Remove comments:
	local code = preg_replace (code, '//.*$', 'mg', '')
	code = preg_replace (code, '/\\*.*?\\*/', 'g', '')
	return wrap (function ()
		local pos = 1
		while pos <= #code do
			local func
			func, pos = transform_anywhere:match (code, pos)
			yield (func)
		end
	end)
end) (code) --local transforms = (function (code)

local function wikify (text, page, start, finish, radical)
	if radical then
		text = radically_wikify (text)
	end
	local counter = 1
	for func in transforms do
		if (not start or counter >= start) and (not finish or counter <= finish) then
			text = func (text, page)
			if #text == 0 then
				return '<span class="error">Text became empty at rule #' .. tostring (counter) .. '</span>\n'
			end
		end
		counter = counter + 1
	end
	return text
end	-- local function wikify (text, code)

local test = [==[
Страница тестирования [[MediaWiki:Gadget-wikifier.js|викификатора]].

Окройте страницу в режиме редактирования и нажмите кнопку викификации. Не сохраняйте викифицированный текст.

== Экранирование шаблонов ==
{{nobr|[[Category:All]]}}

== Обработка невикифицированного HTML ==
<h3>Часть 1</h3>
Текст со сноской [1]. Ещё текст с другой[2] сноской.
<p>Абзац &lt;p&gt;, не закрытый надлежащим образом и содержащий текст "в кавычках, и даже "во вложенных кавычках"". Текст в «лапках».
<p>И ещё один абзац, на сей раз закрытый.</p>

== Интернализация ссылок на источники ==
* [http://www.hist.msu.ru/ER/Etext/apr1906.htm должна интернализоваться],
* [http://www.hist.msu.ru/ER/Etext/apr1906.html не должна интернализоваться],

<h2>Примечания</h2>
[1] Текст первой сноски.
[2] Текст второй сноски.
]==]

return {
	run = function (frame)
		return wikify (
			frame.args [1] or frame.args.text,
			frame:callParserFunction ('FULLPAGENAME', ''),
			tonumber (frame.args.start),
			tonumber (frame.args.finish),
			(frame.args.radical or '') ~= ''
		)
	end,
	test = function (frame)
		return wikify (
			test,
			frame:callParserFunction ('FULLPAGENAME', ''),
			tonumber (frame.args.start),
			tonumber (frame.args.finish),
			(frame.args.radical or '') ~= ''
		)
	end,
  	test2 = function ()
		return wikify (test, '')
  	end,
  	test3 = function (arg)
  		return transform:match (arg)
  	end, allowed = allowed
}	-- return