58
58
# in overall time. But if you miss frequently, then if None is much
59
59
# faster. For our use case, we *rarely* have a revision id, file id
60
60
# or path name that is unicode. So use try/KeyError.
61
return _map[match.group()]
62
return _map[match.group()]
64
return "&#%d;" % ord(match.group())
64
67
_unicode_to_escaped_map = {}
66
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map,
67
_encode=cache_utf8.encode):
69
def _encode_and_escape(unicode_str, _map=_unicode_to_escaped_map):
68
70
"""Encode the string into utf8, and escape invalid XML characters"""
69
71
# We frequently get entities we have not seen before, so it is better
70
72
# to check if None, rather than try/KeyError
71
73
text = _map.get(unicode_str)
73
75
# The alternative policy is to do a regular UTF8 encoding
74
# and then escape only XML meta characters. This could take
75
# advantage of cache_utf8 since a lot of the revision ids
76
# and file ids would already be cached.
77
text = _utf8_re.sub(_utf8_escape_replace, _encode(unicode_str)) + '"'
76
# and then escape only XML meta characters.
77
# Performance is equivalent once you use cache_utf8. *However*
78
# this makes the serialized texts incompatible with old versions
79
# of bzr. So no net gain. (Perhaps the read code would handle utf8
80
# better than entity escapes, but cElementTree seems to do just fine
82
text = str(_utf8_re.sub(_utf8_escape_replace, unicode_str)) + '"'
78
83
_map[unicode_str] = text