Строка 1: |
Строка 1: |
− | local sort, concat = table.sort, table.concat | + | local sort, concat, clone = table.sort, table.concat, mw.clone |
| + | local match = mw.ustring.match |
| local lpeg = lpeg | | local lpeg = lpeg |
| local P, S, V, C, Cg, Cb, Ct, Cf, Cmt = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Ct, lpeg.Cf, lpeg.Cmt | | local P, S, V, C, Cg, Cb, Ct, Cf, Cmt = lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Ct, lpeg.Cf, lpeg.Cmt |
Строка 7: |
Строка 8: |
| local colon, hyphen, equals = P':', P':', P'=' | | local colon, hyphen, equals = P':', P':', P'=' |
| | | |
− | local possible = (function (list) | + | -- Service functions: |
| + | local function ordered_choice (list) |
| local choice = never | | local choice = never |
| sort (list, function (a, b) | | sort (list, function (a, b) |
Строка 16: |
Строка 18: |
| end | | end |
| return choice | | return choice |
− | end) { | + | end |
| + | |
| + | local function to_set (list) |
| + | local set = {} |
| + | for _, tag in ipairs (list) do |
| + | set [tag] = true |
| + | end |
| + | return set |
| + | end |
| + | |
| + | -- Expected HTML tags: |
| + | local possible = ordered_choice { |
| 'a', 'span', 'b', 'i', 'strong', 'em', | | 'a', 'span', 'b', 'i', 'strong', 'em', |
| 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', | | 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', |
Строка 23: |
Строка 36: |
| } | | } |
| | | |
− | local table_tags = (function (list)
| + | -- Only these tags can be legitimately empty: |
− | local set = {}
| + | local empty = to_set { 'th', 'td', 'br', 'hr' } |
− | for _, tag in ipairs (list) do
| |
− | set [tag] = true
| |
− | end
| |
− | return set
| |
− | end) { 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td' }
| |
| | | |
− | local function quoted (quote) | + | -- Table tags are unmergeable: |
− | return P (quote) * Cg ((any - quote + backslash * quote / quote) ^ 0, 'value') * P (quote)
| + | local table_set = to_set { 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td' } |
− | end
| |
| | | |
| -- Return true, if node1 and node2 are tags ot the same type with exactly the same attributes: | | -- Return true, if node1 and node2 are tags ot the same type with exactly the same attributes: |
Строка 40: |
Строка 47: |
| return false | | return false |
| end | | end |
− | if table_tags [node1.__name] or table_tags [node2.__name] then | + | if table_set [node1.__name] or table_set [node2.__name] then |
| + | -- Table elements should not be merged: |
| return false | | return false |
| end | | end |
Строка 69: |
Строка 77: |
| -- Unmergeable: | | -- Unmergeable: |
| return node1, node2 | | return node1, node2 |
| + | end |
| + | |
| + | -- If there is only one chilf node, and it can be safely assimilated into the parent node: |
| + | local function assimilate (node) |
| + | if #node == 0 or #node > 1 then |
| + | return node |
| + | end |
| + | local child = clone (node [1]) |
| + | if type (child) == 'string' or table_set [node.__name] and not table_set [child.__name] or table_set [child.__name] then |
| + | return node |
| + | end |
| + | --if node.__name == child.__name then |
| + | for key, value in pairs (child) do |
| + | if key == 'style' and node [key] then |
| + | node [key] = node [key] .. '; ' .. value |
| + | else |
| + | node [key] = value |
| + | end |
| + | end |
| + | node [1] = child [1] |
| + | --end |
| + | return node |
| end | | end |
| | | |
| + | local function quoted (quote) |
| + | return P (quote) * Cg ((any - quote + backslash * quote / quote) ^ 0, 'value') * P (quote) |
| + | end |
| | | |
| local grammar = P { V'fragment' * -1, | | local grammar = P { V'fragment' * -1, |
Строка 82: |
Строка 115: |
| tag [#tag], tag [#tag + 1] = merge_nodes (tag [#tag], node) | | tag [#tag], tag [#tag + 1] = merge_nodes (tag [#tag], node) |
| end | | end |
− | return tag | + | if type (tag [1]) == 'string' and match (tag [1], '^%s*$') and not empty [tag.__name] then |
| + | -- An empty tag of this type can be safely dropped: |
| + | return nil |
| + | end |
| + | return assimilate (tag) -- try to assimilate the only child. |
| end, | | end, |
| open = lt * spaces * Cg (possible, '__name') * V'attributes' * spaces * gt, | | open = lt * spaces * Cg (possible, '__name') * V'attributes' * spaces * gt, |
Строка 116: |
Строка 153: |
| test = function (frame) | | test = function (frame) |
| return mw.dumpObject {grammar:match (test)} | | return mw.dumpObject {grammar:match (test)} |
− | end, same = same_tag_and_attributes | + | end |
| } | | } |