4763.2.4
by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry. |
1 |
# Copyright (C) 2005-2010 Canonical Ltd
|
1773.4.1
by Martin Pool
Add pyflakes makefile target; fix many warnings |
2 |
#
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
3 |
# This program is free software; you can redistribute it and/or modify
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
1887.1.1
by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines, |
7 |
#
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
8 |
# This program is distributed in the hope that it will be useful,
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
1887.1.1
by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines, |
12 |
#
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
13 |
# You should have received a copy of the GNU General Public License
|
14 |
# along with this program; if not, write to the Free Software
|
|
4183.7.1
by Sabin Iacob
update FSF mailing address |
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
16 |
|
6379.6.3
by Jelmer Vernooij
Use absolute_import. |
17 |
from __future__ import absolute_import |
18 |
||
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
19 |
import cStringIO |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
20 |
|
1911.2.6
by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly. |
21 |
from bzrlib import ( |
22 |
cache_utf8, |
|
5671.2.3
by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer. |
23 |
lazy_regex, |
2598.5.2
by Aaron Bentley
Got all tests passing with Branch returning 'null:' for null revision |
24 |
revision as _mod_revision, |
3882.6.3
by John Arbash Meinel
If we are going to thrash the inventory entry cache, increase its size. |
25 |
trace, |
1911.2.6
by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly. |
26 |
)
|
4237.3.1
by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in |
27 |
from bzrlib.xml_serializer import ( |
28 |
Element, |
|
29 |
SubElement, |
|
30 |
XMLSerializer, |
|
6355.1.1
by Jelmer Vernooij
Move some utility functions to xml_serializer. |
31 |
encode_and_escape, |
4416.5.1
by Jelmer Vernooij
Move squashing of XML-invalid characters to XMLSerializer. |
32 |
escape_invalid_chars, |
6355.1.1
by Jelmer Vernooij
Move some utility functions to xml_serializer. |
33 |
get_utf8_or_ascii, |
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
34 |
serialize_inventory_flat, |
35 |
unpack_inventory_entry, |
|
36 |
unpack_inventory_flat, |
|
4237.3.1
by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in |
37 |
)
|
1773.4.1
by Martin Pool
Add pyflakes makefile target; fix many warnings |
38 |
from bzrlib.revision import Revision |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
39 |
from bzrlib.errors import BzrError |
40 |
||
41 |
||
5671.2.3
by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer. |
42 |
_xml_unescape_map = { |
43 |
'apos':"'", |
|
44 |
'quot':'"', |
|
45 |
'amp':'&', |
|
46 |
'lt':'<', |
|
47 |
'gt':'>' |
|
48 |
}
|
|
49 |
||
50 |
||
51 |
def _unescaper(match, _map=_xml_unescape_map): |
|
52 |
code = match.group(1) |
|
53 |
try: |
|
54 |
return _map[code] |
|
55 |
except KeyError: |
|
56 |
if not code.startswith('#'): |
|
57 |
raise
|
|
58 |
return unichr(int(code[1:])).encode('utf8') |
|
59 |
||
60 |
||
6355.1.1
by Jelmer Vernooij
Move some utility functions to xml_serializer. |
61 |
_unescape_re = lazy_regex.lazy_compile('\&([^;]*);') |
5671.2.3
by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer. |
62 |
|
63 |
def _unescape_xml(data): |
|
64 |
"""Unescape predefined XML entities in a string of data."""
|
|
65 |
return _unescape_re.sub(_unescaper, data) |
|
66 |
||
1934.1.4
by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8') |
67 |
|
4237.3.1
by Jelmer Vernooij
Add new module with generic serializer information; keep XML-specific bits in |
68 |
class Serializer_v8(XMLSerializer): |
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
69 |
"""This serialiser adds rich roots.
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
70 |
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
71 |
Its revision format number matches its inventory number.
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
72 |
"""
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
73 |
|
3882.6.22
by John Arbash Meinel
Start moving things around so that the entry cache is passed in. |
74 |
__slots__ = [] |
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
75 |
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
76 |
root_id = None |
1910.2.48
by Aaron Bentley
Update from review comments |
77 |
support_altered_by_hack = True |
78 |
# This format supports the altered-by hack that reads file ids directly out
|
|
79 |
# of the versionedfile, without doing XML parsing.
|
|
80 |
||
2100.3.1
by Aaron Bentley
Start roundtripping tree-reference entries |
81 |
supported_kinds = set(['file', 'directory', 'symlink']) |
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
82 |
format_num = '8' |
3311.3.3
by Aaron Bentley
Handle format 5 revision |
83 |
revision_format_num = None |
2100.3.1
by Aaron Bentley
Start roundtripping tree-reference entries |
84 |
|
5671.2.3
by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer. |
85 |
# The search regex used by xml based repositories to determine what things
|
86 |
# where changed in a single commit.
|
|
87 |
_file_ids_altered_regex = lazy_regex.lazy_compile( |
|
88 |
r'file_id="(?P<file_id>[^"]+)"' |
|
89 |
r'.* revision="(?P<revision_id>[^"]+)"' |
|
90 |
)
|
|
91 |
||
2889.1.1
by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into |
92 |
def _check_revisions(self, inv): |
93 |
"""Extension point for subclasses to check during serialisation.
|
|
94 |
||
95 |
:param inv: An inventory about to be serialised, to be checked.
|
|
4031.3.1
by Frank Aspell
Fixing various typos |
96 |
:raises: AssertionError if an error has occurred.
|
2889.1.1
by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into |
97 |
"""
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
98 |
if inv.revision_id is None: |
4505.5.2
by Robert Collins
More informative assertions in xml serialisation. |
99 |
raise AssertionError("inv.revision_id is None") |
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
100 |
if inv.root.revision is None: |
4505.5.2
by Robert Collins
More informative assertions in xml serialisation. |
101 |
raise AssertionError("inv.root.revision is None") |
2889.1.1
by Robert Collins
* The class ``bzrlib.repofmt.knitrepo.KnitRepository3`` has been folded into |
102 |
|
3882.6.22
by John Arbash Meinel
Start moving things around so that the entry cache is passed in. |
103 |
def _check_cache_size(self, inv_size, entry_cache): |
104 |
"""Check that the entry_cache is large enough.
|
|
3882.6.12
by John Arbash Meinel
Use resize logic to ensure our inventory entry cache is at an optimal size. |
105 |
|
106 |
We want the cache to be ~2x the size of an inventory. The reason is
|
|
107 |
because we use a FIFO cache, and how Inventory records are likely to
|
|
108 |
change. In general, you have a small number of records which change
|
|
109 |
often, and a lot of records which do not change at all. So when the
|
|
110 |
cache gets full, you actually flush out a lot of the records you are
|
|
111 |
interested in, which means you need to recreate all of those records.
|
|
112 |
An LRU Cache would be better, but the overhead negates the cache
|
|
113 |
coherency benefit.
|
|
114 |
||
115 |
One way to look at it, only the size of the cache > len(inv) is your
|
|
116 |
'working' set. And in general, it shouldn't be a problem to hold 2
|
|
117 |
inventories in memory anyway.
|
|
118 |
||
119 |
:param inv_size: The number of entries in an inventory.
|
|
120 |
"""
|
|
3882.6.22
by John Arbash Meinel
Start moving things around so that the entry cache is passed in. |
121 |
if entry_cache is None: |
122 |
return
|
|
3882.6.12
by John Arbash Meinel
Use resize logic to ensure our inventory entry cache is at an optimal size. |
123 |
# 1.5 times might also be reasonable.
|
3882.6.22
by John Arbash Meinel
Start moving things around so that the entry cache is passed in. |
124 |
recommended_min_cache_size = inv_size * 1.5 |
125 |
if entry_cache.cache_size() < recommended_min_cache_size: |
|
126 |
recommended_cache_size = inv_size * 2 |
|
127 |
trace.mutter('Resizing the inventory entry cache from %d to %d', |
|
128 |
entry_cache.cache_size(), recommended_cache_size) |
|
129 |
entry_cache.resize(recommended_cache_size) |
|
3882.6.12
by John Arbash Meinel
Use resize logic to ensure our inventory entry cache is at an optimal size. |
130 |
|
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
131 |
def write_inventory_to_lines(self, inv): |
132 |
"""Return a list of lines with the encoded inventory."""
|
|
133 |
return self.write_inventory(inv, None) |
|
134 |
||
135 |
def write_inventory_to_string(self, inv, working=False): |
|
136 |
"""Just call write_inventory with a StringIO and return the value.
|
|
137 |
||
138 |
:param working: If True skip history data - text_sha1, text_size,
|
|
139 |
reference_revision, symlink_target.
|
|
140 |
"""
|
|
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
141 |
sio = cStringIO.StringIO() |
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
142 |
self.write_inventory(inv, sio, working) |
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
143 |
return sio.getvalue() |
144 |
||
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
145 |
def write_inventory(self, inv, f, working=False): |
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
146 |
"""Write inventory to a file.
|
3943.8.1
by Marius Kruger
remove all trailing whitespace from bzr source |
147 |
|
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
148 |
:param inv: the inventory to write.
|
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
149 |
:param f: the file to write. (May be None if the lines are the desired
|
150 |
output).
|
|
151 |
:param working: If True skip history data - text_sha1, text_size,
|
|
152 |
reference_revision, symlink_target.
|
|
153 |
:return: The inventory as a list of lines.
|
|
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
154 |
"""
|
6355.1.7
by Jelmer Vernooij
Fix tests. |
155 |
output = [] |
156 |
append = output.append |
|
157 |
self._append_inventory_root(append, inv) |
|
158 |
serialize_inventory_flat(inv, append, |
|
6355.1.2
by Jelmer Vernooij
Factor out serializing of inventory in xml8. |
159 |
self.root_id, self.supported_kinds, working) |
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
160 |
if f is not None: |
161 |
f.writelines(output) |
|
1934.1.5
by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms |
162 |
# Just to keep the cache from growing without bounds
|
163 |
# but we may actually not want to do clear the cache
|
|
1934.1.6
by John Arbash Meinel
With a full cache the time is down to 381 ms |
164 |
#_clear_cache()
|
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
165 |
return output |
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
166 |
|
1934.1.8
by John Arbash Meinel
Passing around the append function rather than the list shaves off another 10%, down to 400ms |
167 |
def _append_inventory_root(self, append, inv): |
1934.1.3
by John Arbash Meinel
[merge] robert's custom XML serializer, and cleanup for benchmarks and iter_entries() differences |
168 |
"""Append the inventory root to output."""
|
169 |
if inv.revision_id is not None: |
|
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
170 |
revid1 = ' revision_id="' |
6355.1.1
by Jelmer Vernooij
Move some utility functions to xml_serializer. |
171 |
revid2 = encode_and_escape(inv.revision_id) |
2817.2.1
by Robert Collins
* Inventory serialisation no longer double-sha's the content. |
172 |
else: |
173 |
revid1 = "" |
|
174 |
revid2 = "" |
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
175 |
append('<inventory format="%s"%s%s>\n' % ( |
176 |
self.format_num, revid1, revid2)) |
|
177 |
append('<directory file_id="%s name="%s revision="%s />\n' % ( |
|
6355.1.1
by Jelmer Vernooij
Move some utility functions to xml_serializer. |
178 |
encode_and_escape(inv.root.file_id), |
179 |
encode_and_escape(inv.root.name), |
|
180 |
encode_and_escape(inv.root.revision))) |
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
181 |
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
182 |
def _pack_revision(self, rev): |
183 |
"""Revision object -> xml tree"""
|
|
2249.5.5
by John Arbash Meinel
better comment for why we are decoding |
184 |
# For the XML format, we need to write them as Unicode rather than as
|
185 |
# utf-8 strings. So that cElementTree can handle properly escaping
|
|
186 |
# them.
|
|
2249.5.4
by John Arbash Meinel
When reading XML, always return utf-8 revision ids. |
187 |
decode_utf8 = cache_utf8.decode |
2249.5.5
by John Arbash Meinel
better comment for why we are decoding |
188 |
revision_id = rev.revision_id |
189 |
if isinstance(revision_id, str): |
|
190 |
revision_id = decode_utf8(revision_id) |
|
3311.3.3
by Aaron Bentley
Handle format 5 revision |
191 |
format_num = self.format_num |
192 |
if self.revision_format_num is not None: |
|
193 |
format_num = self.revision_format_num |
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
194 |
root = Element('revision', |
195 |
committer = rev.committer, |
|
2102.4.1
by John Arbash Meinel
Switch to using millisecond resolution in Revision XML |
196 |
timestamp = '%.3f' % rev.timestamp, |
2249.5.5
by John Arbash Meinel
better comment for why we are decoding |
197 |
revision_id = revision_id, |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
198 |
inventory_sha1 = rev.inventory_sha1, |
3311.3.3
by Aaron Bentley
Handle format 5 revision |
199 |
format=format_num, |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
200 |
)
|
1913.1.1
by John Arbash Meinel
Fix bug #55783 |
201 |
if rev.timezone is not None: |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
202 |
root.set('timezone', str(rev.timezone)) |
203 |
root.text = '\n' |
|
204 |
msg = SubElement(root, 'message') |
|
4416.5.1
by Jelmer Vernooij
Move squashing of XML-invalid characters to XMLSerializer. |
205 |
msg.text = escape_invalid_chars(rev.message)[0] |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
206 |
msg.tail = '\n' |
1313
by Martin Pool
- rename to Revision.parent_ids to avoid confusion with old usage |
207 |
if rev.parent_ids: |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
208 |
pelts = SubElement(root, 'parents') |
209 |
pelts.tail = pelts.text = '\n' |
|
1313
by Martin Pool
- rename to Revision.parent_ids to avoid confusion with old usage |
210 |
for parent_id in rev.parent_ids: |
2598.5.2
by Aaron Bentley
Got all tests passing with Branch returning 'null:' for null revision |
211 |
_mod_revision.check_not_reserved_id(parent_id) |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
212 |
p = SubElement(pelts, 'revision_ref') |
213 |
p.tail = '\n' |
|
2249.5.5
by John Arbash Meinel
better comment for why we are decoding |
214 |
if isinstance(parent_id, str): |
215 |
parent_id = decode_utf8(parent_id) |
|
216 |
p.set('revision_id', parent_id) |
|
1185.16.36
by Martin Pool
- store revision properties in revision xml |
217 |
if rev.properties: |
218 |
self._pack_revision_properties(rev, root) |
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
219 |
return root |
1185.16.36
by Martin Pool
- store revision properties in revision xml |
220 |
|
221 |
def _pack_revision_properties(self, rev, under_element): |
|
222 |
top_elt = SubElement(under_element, 'properties') |
|
223 |
for prop_name, prop_value in sorted(rev.properties.items()): |
|
224 |
prop_elt = SubElement(top_elt, 'property') |
|
225 |
prop_elt.set('name', prop_name) |
|
226 |
prop_elt.text = prop_value |
|
227 |
prop_elt.tail = '\n' |
|
228 |
top_elt.tail = '\n' |
|
229 |
||
6355.1.8
by Jelmer Vernooij
Fix tests. |
230 |
def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False): |
231 |
# This is here because it's overridden by xml7
|
|
232 |
return unpack_inventory_entry(elt, entry_cache, |
|
233 |
return_from_cache) |
|
234 |
||
4849.4.2
by John Arbash Meinel
Change from being a per-serializer attribute to being a per-repo attribute. |
235 |
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None, |
236 |
return_from_cache=False): |
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
237 |
"""Construct from XML Element"""
|
6355.1.9
by Jelmer Vernooij
Review feedback - pass entry_cache and_return_from_cache to unpack_inventory_flat. |
238 |
inv = unpack_inventory_flat(elt, self.format_num, self._unpack_entry, |
239 |
entry_cache, return_from_cache) |
|
3882.6.22
by John Arbash Meinel
Start moving things around so that the entry cache is passed in. |
240 |
self._check_cache_size(len(inv), entry_cache) |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
241 |
return inv |
242 |
||
243 |
def _unpack_revision(self, elt): |
|
244 |
"""XML Element -> Revision object"""
|
|
1393.1.59
by Martin Pool
- put 'format=5' on inventory and revision xml |
245 |
format = elt.get('format') |
3311.3.3
by Aaron Bentley
Handle format 5 revision |
246 |
format_num = self.format_num |
247 |
if self.revision_format_num is not None: |
|
248 |
format_num = self.revision_format_num |
|
1393.1.59
by Martin Pool
- put 'format=5' on inventory and revision xml |
249 |
if format is not None: |
3311.3.3
by Aaron Bentley
Handle format 5 revision |
250 |
if format != format_num: |
251 |
raise BzrError("invalid format version %r on revision" |
|
1393.1.59
by Martin Pool
- put 'format=5' on inventory and revision xml |
252 |
% format) |
6355.1.1
by Jelmer Vernooij
Move some utility functions to xml_serializer. |
253 |
get_cached = get_utf8_or_ascii |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
254 |
rev = Revision(committer = elt.get('committer'), |
255 |
timestamp = float(elt.get('timestamp')), |
|
1911.2.6
by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly. |
256 |
revision_id = get_cached(elt.get('revision_id')), |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
257 |
inventory_sha1 = elt.get('inventory_sha1') |
258 |
)
|
|
259 |
parents = elt.find('parents') or [] |
|
260 |
for p in parents: |
|
1911.2.6
by John Arbash Meinel
Cache revision ids and file ids as part of xml processing. A custom xml parser could just call decode/encode directly. |
261 |
rev.parent_ids.append(get_cached(p.get('revision_id'))) |
1185.16.37
by Martin Pool
- properties are retrieved when revisions are loaded |
262 |
self._unpack_revision_properties(elt, rev) |
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
263 |
v = elt.get('timezone') |
1913.1.1
by John Arbash Meinel
Fix bug #55783 |
264 |
if v is None: |
265 |
rev.timezone = 0 |
|
266 |
else: |
|
267 |
rev.timezone = int(v) |
|
1189
by Martin Pool
- BROKEN: partial support for commit into weave |
268 |
rev.message = elt.findtext('message') # text of <message> |
269 |
return rev |
|
270 |
||
1185.16.37
by Martin Pool
- properties are retrieved when revisions are loaded |
271 |
def _unpack_revision_properties(self, elt, rev): |
272 |
"""Unpack properties onto a revision."""
|
|
273 |
props_elt = elt.find('properties') |
|
274 |
if not props_elt: |
|
275 |
return
|
|
276 |
for prop_elt in props_elt: |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
277 |
if prop_elt.tag != 'property': |
278 |
raise AssertionError( |
|
279 |
"bad tag under properties list: %r" % prop_elt.tag) |
|
1185.16.37
by Martin Pool
- properties are retrieved when revisions are loaded |
280 |
name = prop_elt.get('name') |
281 |
value = prop_elt.text |
|
1886.1.1
by John Arbash Meinel
Fix bug #47782, |
282 |
# If a property had an empty value ('') cElementTree reads
|
283 |
# that back as None, convert it back to '', so that all
|
|
284 |
# properties have string values
|
|
285 |
if value is None: |
|
286 |
value = '' |
|
3376.2.4
by Martin Pool
Remove every assert statement from bzrlib! |
287 |
if name in rev.properties: |
288 |
raise AssertionError("repeated property %r" % name) |
|
1185.16.37
by Martin Pool
- properties are retrieved when revisions are loaded |
289 |
rev.properties[name] = value |
290 |
||
5671.2.3
by Jelmer Vernooij
Move Repository._find_text_key_references_from_xml_inventory_lines onto the serializer. |
291 |
def _find_text_key_references(self, line_iterator): |
292 |
"""Core routine for extracting references to texts from inventories.
|
|
293 |
||
294 |
This performs the translation of xml lines to revision ids.
|
|
295 |
||
296 |
:param line_iterator: An iterator of lines, origin_version_id
|
|
297 |
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
|
|
298 |
to whether they were referred to by the inventory of the
|
|
299 |
revision_id that they contain. Note that if that revision_id was
|
|
300 |
not part of the line_iterator's output then False will be given -
|
|
301 |
even though it may actually refer to that key.
|
|
302 |
"""
|
|
303 |
if not self.support_altered_by_hack: |
|
304 |
raise AssertionError( |
|
305 |
"_find_text_key_references only "
|
|
306 |
"supported for branches which store inventory as unnested xml"
|
|
307 |
", not on %r" % self) |
|
308 |
result = {} |
|
309 |
||
310 |
# this code needs to read every new line in every inventory for the
|
|
311 |
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
|
|
312 |
# not present in one of those inventories is unnecessary but not
|
|
313 |
# harmful because we are filtering by the revision id marker in the
|
|
314 |
# inventory lines : we only select file ids altered in one of those
|
|
315 |
# revisions. We don't need to see all lines in the inventory because
|
|
316 |
# only those added in an inventory in rev X can contain a revision=X
|
|
317 |
# line.
|
|
318 |
unescape_revid_cache = {} |
|
319 |
unescape_fileid_cache = {} |
|
320 |
||
321 |
# jam 20061218 In a big fetch, this handles hundreds of thousands
|
|
322 |
# of lines, so it has had a lot of inlining and optimizing done.
|
|
323 |
# Sorry that it is a little bit messy.
|
|
324 |
# Move several functions to be local variables, since this is a long
|
|
325 |
# running loop.
|
|
326 |
search = self._file_ids_altered_regex.search |
|
327 |
unescape = _unescape_xml |
|
328 |
setdefault = result.setdefault |
|
329 |
for line, line_key in line_iterator: |
|
330 |
match = search(line) |
|
331 |
if match is None: |
|
332 |
continue
|
|
333 |
# One call to match.group() returning multiple items is quite a
|
|
334 |
# bit faster than 2 calls to match.group() each returning 1
|
|
335 |
file_id, revision_id = match.group('file_id', 'revision_id') |
|
336 |
||
337 |
# Inlining the cache lookups helps a lot when you make 170,000
|
|
338 |
# lines and 350k ids, versus 8.4 unique ids.
|
|
339 |
# Using a cache helps in 2 ways:
|
|
340 |
# 1) Avoids unnecessary decoding calls
|
|
341 |
# 2) Re-uses cached strings, which helps in future set and
|
|
342 |
# equality checks.
|
|
343 |
# (2) is enough that removing encoding entirely along with
|
|
344 |
# the cache (so we are using plain strings) results in no
|
|
345 |
# performance improvement.
|
|
346 |
try: |
|
347 |
revision_id = unescape_revid_cache[revision_id] |
|
348 |
except KeyError: |
|
349 |
unescaped = unescape(revision_id) |
|
350 |
unescape_revid_cache[revision_id] = unescaped |
|
351 |
revision_id = unescaped |
|
352 |
||
353 |
# Note that unconditionally unescaping means that we deserialise
|
|
354 |
# every fileid, which for general 'pull' is not great, but we don't
|
|
355 |
# really want to have some many fulltexts that this matters anyway.
|
|
356 |
# RBC 20071114.
|
|
357 |
try: |
|
358 |
file_id = unescape_fileid_cache[file_id] |
|
359 |
except KeyError: |
|
360 |
unescaped = unescape(file_id) |
|
361 |
unescape_fileid_cache[file_id] = unescaped |
|
362 |
file_id = unescaped |
|
363 |
||
364 |
key = (file_id, revision_id) |
|
365 |
setdefault(key, False) |
|
366 |
if revision_id == line_key[-1]: |
|
367 |
result[key] = True |
|
368 |
return result |
|
369 |
||
1185.16.37
by Martin Pool
- properties are retrieved when revisions are loaded |
370 |
|
3311.3.4
by Aaron Bentley
Have xml5 inherit from xml6 from xml8 |
371 |
serializer_v8 = Serializer_v8() |