1515# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1616# See the License for the specific language governing permissions and
1717# limitations under the License.
18+ import re
19+ import xml .sax .saxutils
20+
1821import six
1922
2023import Evtx .Nodes as e_nodes
21- import xml .sax .saxutils
2224
2325
2426XML_HEADER = "<?xml version=\" 1.1\" encoding=\" utf-8\" standalone=\" yes\" ?>\n "
@@ -29,45 +31,66 @@ def __init__(self, msg):
2931 super (UnexpectedElementException , self ).__init__ (msg )
3032
3133
32- try :
33- # unfortunately no support yet in six.
34- # py3
35- from html import escape as html_escape
36- except ImportError :
37- # py2
38- from cgi import escape as html_escape
34+ # ref: https://www.w3.org/TR/xml11/#charsets
35+ RESTRICTED_CHARS = re .compile ('[\x01 -\x08 \x0B \x0C \x0E -\x1F \x7F -\x84 \x86 -\x9F ]' )
3936
4037
41- CHAR_TAB = 0x9
42- CHAR_NL = 0xA
43- CHAR_CR = 0xD
38+ def escape_attr ( s ):
39+ '''
40+ escape the given string such that it can be placed in an XML attribute, like:
4441
45- VALID_WHITESPACE = ( CHAR_TAB , CHAR_NL , CHAR_CR )
42+ <foo bar='$value'>
4643
47- import re
48- # ref: https://www.w3.org/TR/xml11/#charsets
49- RESTRICTED_CHARS = re .compile ('[\x01 -\x08 \x0B \x0C \x0E -\x1F \x7F -\x84 \x86 -\x9F ]' )
44+ Args:
45+ s (str): the string to escape.
5046
47+ Returns:
48+ str: the escaped string.
49+ '''
50+ esc = xml .sax .saxutils .quoteattr (s )
51+ esc = esc .encode ('ascii' , 'xmlcharrefreplace' ).decode ('ascii' )
52+ esc = RESTRICTED_CHARS .sub ('' , esc )
53+ return esc
54+
55+
56+ def escape_value (s ):
57+ '''
58+ escape the given string such that it can be placed in an XML value location, like:
59+
60+ <foo>
61+ $value
62+ </foo>
63+
64+ Args:
65+ s (str): the string to escape.
5166
52- def escape (s ):
53- esc = html_escape (s )
67+ Returns:
68+ str: the escaped string.
69+ '''
70+ esc = xml .sax .saxutils .escape (s )
5471 esc = esc .encode ('ascii' , 'xmlcharrefreplace' ).decode ('ascii' )
5572 esc = RESTRICTED_CHARS .sub ('' , esc )
5673 return esc
5774
58- out = []
59- for c in s :
60- # ref: http://www.asciitable.com/index/asciifull.gif
61- if ord (c ) < 0x20 and c not in VALID_WHITESPACE :
62- c = '&#x%04x;' % (ord (c ))
63- out .append (c )
6475
65- return '' .join (out )
76+ # ref: https://www.w3.org/TR/xml/#NT-NameStartChar
77+ # but we are going to require a even stricter subset.
78+ NAME_PATTERN = re .compile ('[a-zA-Z_][a-zA-Z_\-]*' )
6679
6780
68- def to_xml_string (s ):
69- s = xml .sax .saxutils .escape (s , {'"' : '"' })
70- return escape (s )
81+ def validate_name (s ):
82+ '''
83+ ensure the given name can be used as an XML entity name, such as tag or attribute name.
84+
85+ Args:
86+ s (str): the string to validate.
87+
88+ Raises:
89+ RuntimeError: if the string is not suitable to be an XML name.
90+ '''
91+ if not NAME_PATTERN .match (s ):
92+ raise RuntimeError ('invalid xml name: %s' % (s ))
93+ return s
7194
7295
7396def render_root_node_with_subs (root_node , subs ):
@@ -90,16 +113,17 @@ def rec(node, acc):
90113 for child in node .children ():
91114 if isinstance (child , e_nodes .AttributeNode ):
92115 acc .append (" " )
93- acc .append (to_xml_string (child .attribute_name ().string ()))
116+ acc .append (validate_name (child .attribute_name ().string ()))
94117 acc .append ("=\" " )
95118 # TODO: should use xml.sax.saxutils.quoteattr here
119+ # but to do so, we'd need to ensure we're not double-quoting this value.
96120 rec (child .attribute_value (), acc )
97121 acc .append ("\" " )
98122 acc .append (">" )
99123 for child in node .children ():
100124 rec (child , acc )
101125 acc .append ("</" )
102- acc .append (to_xml_string (node .tag_name ()))
126+ acc .append (validate_name (node .tag_name ()))
103127 acc .append (">\n " )
104128 elif isinstance (node , e_nodes .CloseStartElementNode ):
105129 pass # intended
@@ -108,19 +132,20 @@ def rec(node, acc):
108132 elif isinstance (node , e_nodes .CloseElementNode ):
109133 pass # intended
110134 elif isinstance (node , e_nodes .ValueNode ):
111- acc .append (to_xml_string (node .children ()[0 ].string ()))
135+ acc .append (escape_value (node .children ()[0 ].string ()))
112136 elif isinstance (node , e_nodes .AttributeNode ):
113137 pass # intended
114138 elif isinstance (node , e_nodes .CDataSectionNode ):
115139 acc .append ("<![CDATA[" )
116- acc .append (to_xml_string (node .cdata ()))
140+ # TODO: is this correct escaping???
141+ acc .append (escape_value (node .cdata ()))
117142 acc .append ("]]>" )
118143 elif isinstance (node , e_nodes .EntityReferenceNode ):
119- acc .append (to_xml_string (node .entity_reference ()))
144+ acc .append (escape_value (node .entity_reference ()))
120145 elif isinstance (node , e_nodes .ProcessingInstructionTargetNode ):
121- acc .append (to_xml_string (node .processing_instruction_target ()))
146+ acc .append (escape_value (node .processing_instruction_target ()))
122147 elif isinstance (node , e_nodes .ProcessingInstructionDataNode ):
123- acc .append (to_xml_string (node .string ()))
148+ acc .append (escape_value (node .string ()))
124149 elif isinstance (node , e_nodes .TemplateInstanceNode ):
125150 raise UnexpectedElementException ("TemplateInstanceNode" )
126151 elif isinstance (node , e_nodes .NormalSubstitutionNode ):
@@ -129,7 +154,7 @@ def rec(node, acc):
129154 if isinstance (sub , e_nodes .BXmlTypeNode ):
130155 sub = render_root_node (sub .root ())
131156 else :
132- sub = to_xml_string (sub .string ())
157+ sub = escape_value (sub .string ())
133158
134159 acc .append (sub )
135160 elif isinstance (node , e_nodes .ConditionalSubstitutionNode ):
@@ -138,7 +163,7 @@ def rec(node, acc):
138163 if isinstance (sub , e_nodes .BXmlTypeNode ):
139164 sub = render_root_node (sub .root ())
140165 else :
141- sub = to_xml_string (sub .string ())
166+ sub = escape_value (sub .string ())
142167
143168 acc .append (sub )
144169 elif isinstance (node , e_nodes .StreamStartNode ):
0 commit comments