26
27
from bzrlib.errors import BzrError
30
_unicode_to_escaped_map = {}
35
"'":"'", # FIXME: overkill
42
def _ensure_utf8_re():
43
"""Make sure the _utf8_re regex has been compiled"""
45
if _utf8_re is not None:
47
_utf8_re = re.compile(u'[&<>\'\"\u0080-\uffff]')
50
def _utf8_escape_replace(match, _map=_utf8_escape_map):
51
"""Replace a string of non-ascii, non XML safe characters with their escape
53
This will escape both Standard XML escapes, like <>"', etc.
54
As well as escaping non ascii characters, because ElementTree did.
55
This helps us remain compatible to older versions of bzr. We may change
56
our policy in the future, though.
58
# TODO: jam 20060816 Benchmark this, is it better to use try/except or
59
# to use _map.get() and check for None.
60
# Or still further, it might be better to pre-generate all
61
# possible conversions. However, the occurance of unicode
62
# characters is quite low, so an initial guess is that this
63
# is the most efficient method
64
# Also need to benchmark whether it is better to have a regex
65
# which matches multiple characters, or if it is better to
66
# only match a single character and call this function multiple
67
# times. The chance that we actually need multiple escapes
68
# is probably very low for our expected usage
70
return _map[match.group()]
72
return "&#%d;" % ord(match.group())
75
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map):
76
"""Encode the string into utf8, and escape invalid XML characters"""
78
return _map[unicode_str]
80
# The alternative policy is to do a regular UTF8 encoding
81
# and then escape only XML meta characters. This could take
82
# advantage of cache_utf8 since a lot of the revision ids
83
# and file ids would already be cached.
84
text = str(_utf8_re.sub(_utf8_escape_replace, unicode_str))
85
_map[unicode_str] = text
29
89
class Serializer_v5(Serializer):
30
90
"""Version 5 serializer
32
92
Packs objects into XML and vice versa.
35
__slots__ = ['_utf8_re']
40
97
def write_inventory_to_string(self, inv):
98
"""Just call write_inventory with a StringIO and return the value"""
41
99
sio = cStringIO.StringIO()
42
100
self.write_inventory(inv, sio)
43
101
return sio.getvalue()
48
106
:param inv: the inventory to write.
49
107
:param f: the file to write.
52
111
self._append_inventory_root(output, inv)
53
112
entries = inv.iter_entries()
54
114
root_path, root_ie = entries.next()
55
115
for path, ie in entries:
56
116
self._append_entry(output, ie)
57
f.write(''.join(output))
58
# elt = self._pack_inventory(inv)
59
# for child in elt.getchildren():
60
# if isinstance(child, inventory.InventoryDirectory):
63
# ElementTree(child).write(f, 'utf-8')
64
f.write('</inventory>\n')
117
output.append('</inventory>\n')
66
120
def _append_inventory_root(self, output, inv):
67
121
"""Append the inventory root to output."""
111
165
def _append_utf8_escaped(self, output, a_string):
112
166
"""Append a_string to output as utf8."""
113
if self._utf8_re is None:
115
self._utf8_re = re.compile("[&'\"<>]")
116
# escape attribute value
117
text = a_string.encode('utf8')
118
output.append(self._utf8_re.sub(self._utf8_escape_replace, text))
167
#output.append(_encode_and_escape(a_string))
168
text = str(_utf8_re.sub(_utf8_escape_replace, a_string))
119
170
output.append('"')
123
"'":"'", # FIXME: overkill
128
def _utf8_escape_replace(self, match, map=_utf8_escape_map):
129
return map[match.group()]
131
172
def _pack_inventory(self, inv):
132
173
"""Convert to XML Element"""
133
174
entries = inv.iter_entries()