Строка 7: |
Строка 7: |
| local lt, gt, slash, backslash = P'<', P'>', P'/', P'\\' | | local lt, gt, slash, backslash = P'<', P'>', P'/', P'\\' |
| local colon, hyphen, equals = P':', P':', P'=' | | local colon, hyphen, equals = P':', P':', P'=' |
| + | |
| + | local function memoize (func) |
| + | local memoized = {} |
| + | return function (...) |
| + | local args = {...} |
| + | if not memoized [args] then |
| + | memoized [args] = func (...) |
| + | end |
| + | return memoized [args] |
| + | end |
| + | end |
| | | |
| local max_parsed = 4 * 1024 * 1024 | | local max_parsed = 4 * 1024 * 1024 |
Строка 77: |
Строка 88: |
| | | |
| -- Return true, if node1 and node2 are tags ot the same type with exactly the same attributes: | | -- Return true, if node1 and node2 are tags ot the same type with exactly the same attributes: |
− | local function same_tag_and_attributes (node1, node2) | + | local same_tag_and_attributes = memoize (function (node1, node2) |
| if type (node1) ~= 'table' or type (node2) ~= 'table' then | | if type (node1) ~= 'table' or type (node2) ~= 'table' then |
| return false | | return false |
Строка 96: |
Строка 107: |
| end | | end |
| return subset (node1, node2) and subset (node2, node1) | | return subset (node1, node2) and subset (node2, node1) |
− | end | + | end) |
| | | |
− | local function merge_nodes (node1, node2) | + | local merge_nodes = memoize (function (node1, node2) |
| if type (node1) == 'string' and type (node2) == 'string' then | | if type (node1) == 'string' and type (node2) == 'string' then |
| return node1 .. ' ' .. node2 | | return node1 .. ' ' .. node2 |
Строка 111: |
Строка 122: |
| -- block: | | -- block: |
| return node1, node2 | | return node1, node2 |
− | end | + | end) |
| | | |
| -- Allowed CSS attributes: | | -- Allowed CSS attributes: |
Строка 121: |
Строка 132: |
| single * C ((any - single + backslash * single / "'") ^ 1) * single | | single * C ((any - single + backslash * single / "'") ^ 1) * single |
| + double * C ((any - double + backslash * double / '"') ^ 1) * double | | + double * C ((any - double + backslash * double / '"') ^ 1) * double |
− | )) / function (css) | + | )) / memoize (function (css) |
| local sanitised = {} | | local sanitised = {} |
| for attr, value in gmatch (css, '(%w+)%s*:%s*([^;]+)') do | | for attr, value in gmatch (css, '(%w+)%s*:%s*([^;]+)') do |
Строка 132: |
Строка 143: |
| serialised [#serialised + 1] = attr .. ': ' .. value | | serialised [#serialised + 1] = attr .. ': ' .. value |
| end | | end |
− | return concat (serialised, '; ') | + | local new_css = concat (serialised, '; ') |
− | end / function (css)
| + | if new_css == '' then |
− | if css == '' then | |
| return nil | | return nil |
| else | | else |
− | return { name = 'style', value = css } | + | return { name = 'style', value = mew_css } |
| end | | end |
− | end | + | end) |
| | | |
| -- These tags should be removed and their contents moved to the parent tag: | | -- These tags should be removed and their contents moved to the parent tag: |
Строка 145: |
Строка 155: |
| | | |
| -- If there is only one child node, and it can be safely assimilated into the parent node: | | -- If there is only one child node, and it can be safely assimilated into the parent node: |
− | local function assimilate (node) | + | local assimilate = memoize (function (node) |
| if #node ~= 1 or node.__name == 'a' then | | if #node ~= 1 or node.__name == 'a' then |
| return node | | return node |
Строка 167: |
Строка 177: |
| end | | end |
| return node | | return node |
− | end | + | end) |
| | | |
| local function quoted (quote) | | local function quoted (quote) |