~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/xml_serializer.py

  • Committer: Robert Collins
  • Date: 2009-07-07 04:32:13 UTC
  • mto: This revision was merged to the branch mainline in revision 4524.
  • Revision ID: robertc@robertcollins.net-20090707043213-4hjjhgr40iq7gk2d
More informative assertions in xml serialisation.

Show diffs side-by-side

added added

removed removed

Lines of Context:
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
"""XML externalization support."""
18
18
 
22
22
# importing this module is fairly slow because it has to load several
23
23
# ElementTree bits
24
24
 
 
25
from bzrlib.serializer import Serializer
25
26
from bzrlib.trace import mutter, warning
26
27
 
27
28
try:
33
34
    except ImportError:
34
35
        from cElementTree import (ElementTree, SubElement, Element,
35
36
                                  XMLTreeBuilder, fromstring, tostring)
36
 
        import elementtree
 
37
        import elementtree.ElementTree
37
38
    ParseError = SyntaxError
38
39
except ImportError:
39
40
    mutter('WARNING: using slower ElementTree; consider installing cElementTree'
48
49
from bzrlib import errors
49
50
 
50
51
 
51
 
class Serializer(object):
52
 
    """Abstract object serialize/deserialize"""
53
 
    def write_inventory(self, inv, f):
54
 
        """Write inventory to a file"""
55
 
        elt = self._pack_inventory(inv)
56
 
        self._write_element(elt, f)
57
 
 
58
 
    def write_inventory_to_string(self, inv):
59
 
        return tostring(self._pack_inventory(inv)) + '\n'
60
 
 
61
 
    def read_inventory_from_string(self, xml_string):
 
52
class XMLSerializer(Serializer):
 
53
    """Abstract XML object serialize/deserialize"""
 
54
 
 
55
    squashes_xml_invalid_characters = True
 
56
 
 
57
    def read_inventory_from_string(self, xml_string, revision_id=None,
 
58
                                   entry_cache=None):
 
59
        """Read xml_string into an inventory object.
 
60
 
 
61
        :param xml_string: The xml to read.
 
62
        :param revision_id: If not-None, the expected revision id of the
 
63
            inventory. Some serialisers use this to set the results' root
 
64
            revision. This should be supplied for deserialising all
 
65
            from-repository inventories so that xml5 inventories that were
 
66
            serialised without a revision identifier can be given the right
 
67
            revision id (but not for working tree inventories where users can
 
68
            edit the data without triggering checksum errors or anything).
 
69
        :param entry_cache: An optional cache of InventoryEntry objects. If
 
70
            supplied we will look up entries via (file_id, revision_id) which
 
71
            should map to a valid InventoryEntry (File/Directory/etc) object.
 
72
        """
62
73
        try:
63
 
            return self._unpack_inventory(fromstring(xml_string))
 
74
            return self._unpack_inventory(fromstring(xml_string), revision_id,
 
75
                                          entry_cache=entry_cache)
64
76
        except ParseError, e:
65
77
            raise errors.UnexpectedInventoryFormat(e)
66
78
 
67
 
    def read_inventory(self, f):
 
79
    def read_inventory(self, f, revision_id=None):
68
80
        try:
69
 
            return self._unpack_inventory(self._read_element(f))
 
81
            return self._unpack_inventory(self._read_element(f),
 
82
                revision_id=None)
70
83
        except ParseError, e:
71
84
            raise errors.UnexpectedInventoryFormat(e)
72
85
 
104
117
    }
105
118
def _escape_replace(match, map=escape_map):
106
119
    return map[match.group()]
107
 
 
 
120
 
108
121
def _escape_attrib(text, encoding=None, replace=None):
109
122
    # escape attribute value
110
123
    try:
135
148
    }
136
149
def _escape_cdata_replace(match, map=escape_cdata_map):
137
150
    return map[match.group()]
138
 
 
 
151
 
139
152
def _escape_cdata(text, encoding=None, replace=None):
140
153
    # escape character data
141
154
    try:
155
168
        elementtree.ElementTree._raise_serialization_error(text)
156
169
 
157
170
elementtree.ElementTree._escape_cdata = _escape_cdata
 
171
 
 
172
 
 
173
def escape_invalid_chars(message):
 
174
    """Escape the XML-invalid characters in a commit message.
 
175
 
 
176
    :param message: Commit message to escape
 
177
    :return: tuple with escaped message and number of characters escaped
 
178
    """
 
179
    if message is None:
 
180
        return None, 0
 
181
    # Python strings can include characters that can't be
 
182
    # represented in well-formed XML; escape characters that
 
183
    # aren't listed in the XML specification
 
184
    # (http://www.w3.org/TR/REC-xml/#NT-Char).
 
185
    return re.subn(u'[^\x09\x0A\x0D\u0020-\uD7FF\uE000-\uFFFD]+',
 
186
            lambda match: match.group(0).encode('unicode_escape'),
 
187
            message)