22
22
# importing this module is fairly slow because it has to load several
27
from bzrlib.serializer import Serializer
28
from bzrlib.trace import mutter
25
from bzrlib.trace import mutter, warning
32
# it's in this package in python2.5
33
from xml.etree.cElementTree import (ElementTree, SubElement, Element,
34
XMLTreeBuilder, fromstring, tostring)
35
import xml.etree as elementtree
36
# Also import ElementTree module so monkey-patching below always works
37
import xml.etree.ElementTree
39
from cElementTree import (ElementTree, SubElement, Element,
40
XMLTreeBuilder, fromstring, tostring)
41
import elementtree.ElementTree
42
ParseError = SyntaxError
28
from cElementTree import (ElementTree, SubElement, Element,
29
XMLTreeBuilder, fromstring, tostring)
43
31
except ImportError:
44
32
mutter('WARNING: using slower ElementTree; consider installing cElementTree'
45
33
" and make sure it's on your PYTHONPATH")
46
# this copy is shipped with bzr
47
34
from util.elementtree.ElementTree import (ElementTree, SubElement,
48
35
Element, XMLTreeBuilder,
49
36
fromstring, tostring)
50
37
import util.elementtree as elementtree
51
from xml.parsers.expat import ExpatError as ParseError
53
39
from bzrlib import errors
56
class XMLSerializer(Serializer):
57
"""Abstract XML object serialize/deserialize"""
59
squashes_xml_invalid_characters = True
61
def read_inventory_from_string(self, xml_string, revision_id=None,
62
entry_cache=None, return_from_cache=False):
63
"""Read xml_string into an inventory object.
65
:param xml_string: The xml to read.
66
:param revision_id: If not-None, the expected revision id of the
67
inventory. Some serialisers use this to set the results' root
68
revision. This should be supplied for deserialising all
69
from-repository inventories so that xml5 inventories that were
70
serialised without a revision identifier can be given the right
71
revision id (but not for working tree inventories where users can
72
edit the data without triggering checksum errors or anything).
73
:param entry_cache: An optional cache of InventoryEntry objects. If
74
supplied we will look up entries via (file_id, revision_id) which
75
should map to a valid InventoryEntry (File/Directory/etc) object.
76
:param return_from_cache: Return entries directly from the cache,
77
rather than copying them first. This is only safe if the caller
78
promises not to mutate the returned inventory entries, but it can
79
make some operations significantly faster.
42
class Serializer(object):
43
"""Abstract object serialize/deserialize"""
44
def write_inventory(self, inv, f):
45
"""Write inventory to a file"""
46
elt = self._pack_inventory(inv)
47
self._write_element(elt, f)
49
def write_inventory_to_string(self, inv):
50
return tostring(self._pack_inventory(inv)) + '\n'
52
def read_inventory_from_string(self, xml_string):
82
return self._unpack_inventory(fromstring(xml_string), revision_id,
83
entry_cache=entry_cache,
84
return_from_cache=return_from_cache)
54
return self._unpack_inventory(fromstring(xml_string))
55
except SyntaxError, e:
86
56
raise errors.UnexpectedInventoryFormat(e)
88
def read_inventory(self, f, revision_id=None):
58
def read_inventory(self, f):
91
return self._unpack_inventory(self._read_element(f),
60
return self._unpack_inventory(self._read_element(f))
61
except SyntaxError, e:
96
62
raise errors.UnexpectedInventoryFormat(e)
98
64
def write_revision(self, rev, f):
115
81
return ElementTree().parse(f)
118
def escape_invalid_chars(message):
119
"""Escape the XML-invalid characters in a commit message.
121
:param message: Commit message to escape
122
:return: tuple with escaped message and number of characters escaped
126
# Python strings can include characters that can't be
127
# represented in well-formed XML; escape characters that
128
# aren't listed in the XML specification
129
# (http://www.w3.org/TR/REC-xml/#NT-Char).
130
return re.subn(u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
131
lambda match: match.group(0).encode('unicode_escape'),
84
# performance tuning for elementree's serialiser. This should be
85
# sent upstream - RBC 20060523.
86
# the functions here are patched into elementtree at runtime.
88
escape_re = re.compile("[&'\"<>]")
91
"'":"'", # FIXME: overkill
96
def _escape_replace(match, map=escape_map):
97
return map[match.group()]
99
def _escape_attrib(text, encoding=None, replace=None):
100
# escape attribute value
104
text = elementtree.ElementTree._encode(text, encoding)
106
return elementtree.ElementTree._encode_entity(text)
108
return escape_re.sub(_escape_replace, text)
110
text = replace(text, "&", "&")
111
text = replace(text, "'", "'") # FIXME: overkill
112
text = replace(text, "\"", """)
113
text = replace(text, "<", "<")
114
text = replace(text, ">", ">")
116
except (TypeError, AttributeError):
117
elementtree.ElementTree._raise_serialization_error(text)
119
elementtree.ElementTree._escape_attrib = _escape_attrib
121
escape_cdata_re = re.compile("[&<>]")
127
def _escape_cdata_replace(match, map=escape_cdata_map):
128
return map[match.group()]
130
def _escape_cdata(text, encoding=None, replace=None):
131
# escape character data
135
text = elementtree.ElementTree._encode(text, encoding)
137
return elementtree.ElementTree._encode_entity(text)
139
return escape_cdata_re.sub(_escape_cdata_replace, text)
141
text = replace(text, "&", "&")
142
text = replace(text, "<", "<")
143
text = replace(text, ">", ">")
145
except (TypeError, AttributeError):
146
elementtree.ElementTree._raise_serialization_error(text)
148
elementtree.ElementTree._escape_cdata = _escape_cdata