25
24
revision as _mod_revision,
28
from bzrlib.xml_serializer import (
34
from bzrlib.inventory import InventoryEntry
27
from bzrlib.xml_serializer import SubElement, Element, Serializer
28
from bzrlib.inventory import ROOT_ID, Inventory, InventoryEntry
35
29
from bzrlib.revision import Revision
36
30
from bzrlib.errors import BzrError
192
157
revision_format_num = None
194
# The search regex used by xml based repositories to determine what things
195
# where changed in a single commit.
196
_file_ids_altered_regex = lazy_regex.lazy_compile(
197
r'file_id="(?P<file_id>[^"]+)"'
198
r'.* revision="(?P<revision_id>[^"]+)"'
201
159
def _check_revisions(self, inv):
202
160
"""Extension point for subclasses to check during serialisation.
204
162
:param inv: An inventory about to be serialised, to be checked.
205
:raises: AssertionError if an error has occurred.
163
:raises: AssertionError if an error has occured.
207
165
if inv.revision_id is None:
208
raise AssertionError("inv.revision_id is None")
166
raise AssertionError()
209
167
if inv.root.revision is None:
210
raise AssertionError("inv.root.revision is None")
168
raise AssertionError()
212
170
def _check_cache_size(self, inv_size, entry_cache):
213
171
"""Check that the entry_cache is large enough.
408
366
prop_elt.tail = '\n'
409
367
top_elt.tail = '\n'
411
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
412
return_from_cache=False):
369
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None):
413
370
"""Construct from XML Element"""
414
371
if elt.tag != 'inventory':
415
372
raise errors.UnexpectedInventoryFormat('Root tag is %r' % elt.tag)
422
379
revision_id = cache_utf8.encode(revision_id)
423
380
inv = inventory.Inventory(root_id=None, revision_id=revision_id)
425
ie = self._unpack_entry(e, entry_cache=entry_cache,
426
return_from_cache=return_from_cache)
382
ie = self._unpack_entry(e, entry_cache=entry_cache)
428
384
self._check_cache_size(len(inv), entry_cache)
431
def _unpack_entry(self, elt, entry_cache=None, return_from_cache=False):
387
def _unpack_entry(self, elt, entry_cache=None):
432
388
elt_get = elt.get
433
389
file_id = elt_get('file_id')
434
390
revision = elt_get('revision')
466
422
if entry_cache is not None and revision is not None:
467
423
key = (file_id, revision)
469
# We copy it, because some operations may mutate it
425
# We copy it, because some operatations may mutate it
470
426
cached_ie = entry_cache[key]
474
430
# Only copying directory entries drops us 2.85s => 2.35s
475
if return_from_cache:
476
if cached_ie.kind == 'directory':
477
return cached_ie.copy()
431
# if cached_ie.kind == 'directory':
432
# return cached_ie.copy()
479
434
return cached_ie.copy()
569
524
raise AssertionError("repeated property %r" % name)
570
525
rev.properties[name] = value
572
def _find_text_key_references(self, line_iterator):
573
"""Core routine for extracting references to texts from inventories.
575
This performs the translation of xml lines to revision ids.
577
:param line_iterator: An iterator of lines, origin_version_id
578
:return: A dictionary mapping text keys ((fileid, revision_id) tuples)
579
to whether they were referred to by the inventory of the
580
revision_id that they contain. Note that if that revision_id was
581
not part of the line_iterator's output then False will be given -
582
even though it may actually refer to that key.
584
if not self.support_altered_by_hack:
585
raise AssertionError(
586
"_find_text_key_references only "
587
"supported for branches which store inventory as unnested xml"
588
", not on %r" % self)
591
# this code needs to read every new line in every inventory for the
592
# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line
593
# not present in one of those inventories is unnecessary but not
594
# harmful because we are filtering by the revision id marker in the
595
# inventory lines : we only select file ids altered in one of those
596
# revisions. We don't need to see all lines in the inventory because
597
# only those added in an inventory in rev X can contain a revision=X
599
unescape_revid_cache = {}
600
unescape_fileid_cache = {}
602
# jam 20061218 In a big fetch, this handles hundreds of thousands
603
# of lines, so it has had a lot of inlining and optimizing done.
604
# Sorry that it is a little bit messy.
605
# Move several functions to be local variables, since this is a long
607
search = self._file_ids_altered_regex.search
608
unescape = _unescape_xml
609
setdefault = result.setdefault
610
for line, line_key in line_iterator:
614
# One call to match.group() returning multiple items is quite a
615
# bit faster than 2 calls to match.group() each returning 1
616
file_id, revision_id = match.group('file_id', 'revision_id')
618
# Inlining the cache lookups helps a lot when you make 170,000
619
# lines and 350k ids, versus 8.4 unique ids.
620
# Using a cache helps in 2 ways:
621
# 1) Avoids unnecessary decoding calls
622
# 2) Re-uses cached strings, which helps in future set and
624
# (2) is enough that removing encoding entirely along with
625
# the cache (so we are using plain strings) results in no
626
# performance improvement.
628
revision_id = unescape_revid_cache[revision_id]
630
unescaped = unescape(revision_id)
631
unescape_revid_cache[revision_id] = unescaped
632
revision_id = unescaped
634
# Note that unconditionally unescaping means that we deserialise
635
# every fileid, which for general 'pull' is not great, but we don't
636
# really want to have some many fulltexts that this matters anyway.
639
file_id = unescape_fileid_cache[file_id]
641
unescaped = unescape(file_id)
642
unescape_fileid_cache[file_id] = unescaped
645
key = (file_id, revision_id)
646
setdefault(key, False)
647
if revision_id == line_key[-1]:
652
528
serializer_v8 = Serializer_v8()