Skip to content

Commit 7568519

Browse files
views: better implement xml escaping with invalid chars
1 parent 6c30092 commit 7568519

1 file changed

Lines changed: 37 additions & 1 deletion

File tree

Evtx/Views.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,45 @@ def __init__(self, msg):
2929
super(UnexpectedElementException, self).__init__(msg)
3030

3131

32+
try:
33+
# unfortunately no support yet in six.
34+
# py3
35+
from html import escape as html_escape
36+
except ImportError:
37+
# py2
38+
from cgi import escape as html_escape
39+
40+
41+
CHAR_TAB = 0x9
42+
CHAR_NL = 0xA
43+
CHAR_CR = 0xD
44+
45+
VALID_WHITESPACE = (CHAR_TAB, CHAR_NL, CHAR_CR)
46+
47+
import re
48+
# ref: https://www.w3.org/TR/xml11/#charsets
49+
RESTRICTED_CHARS = re.compile('[\x01-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F]')
50+
51+
52+
def escape(s):
53+
esc = html_escape(s)
54+
esc = esc.encode('ascii', 'xmlcharrefreplace').decode('ascii')
55+
esc = RESTRICTED_CHARS.sub('', esc)
56+
return esc
57+
58+
out = []
59+
for c in s:
60+
# ref: http://www.asciitable.com/index/asciifull.gif
61+
if ord(c) < 0x20 and c not in VALID_WHITESPACE:
62+
c = '&#x%04x;' % (ord(c))
63+
out.append(c)
64+
65+
return ''.join(out)
66+
67+
3268
def to_xml_string(s):
3369
s = xml.sax.saxutils.escape(s, {'"': '&quot;'})
34-
return s
70+
return escape(s)
3571

3672

3773
def render_root_node(root_node, subs):

0 commit comments

Comments
 (0)