Initial commit

This commit is contained in:
kdusek
2025-12-09 12:13:01 +01:00
commit 8e654ed209
13332 changed files with 2695056 additions and 0 deletions

View File

@@ -0,0 +1,39 @@
"""Regexps to match html elements"""
import re
attr_name = "[a-zA-Z_:][a-zA-Z0-9:._-]*"
unquoted = "[^\"'=<>`\\x00-\\x20]+"
single_quoted = "'[^']*'"
double_quoted = '"[^"]*"'
attr_value = "(?:" + unquoted + "|" + single_quoted + "|" + double_quoted + ")"
attribute = "(?:\\s+" + attr_name + "(?:\\s*=\\s*" + attr_value + ")?)"
open_tag = "<[A-Za-z][A-Za-z0-9\\-]*" + attribute + "*\\s*\\/?>"
close_tag = "<\\/[A-Za-z][A-Za-z0-9\\-]*\\s*>"
comment = "<!---?>|<!--(?:[^-]|-[^-]|--[^>])*-->"
processing = "<[?][\\s\\S]*?[?]>"
declaration = "<![A-Za-z][^>]*>"
cdata = "<!\\[CDATA\\[[\\s\\S]*?\\]\\]>"
HTML_TAG_RE = re.compile(
"^(?:"
+ open_tag
+ "|"
+ close_tag
+ "|"
+ comment
+ "|"
+ processing
+ "|"
+ declaration
+ "|"
+ cdata
+ ")"
)
HTML_OPEN_CLOSE_TAG_STR = "^(?:" + open_tag + "|" + close_tag + ")"
HTML_OPEN_CLOSE_TAG_RE = re.compile(HTML_OPEN_CLOSE_TAG_STR)