~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repository.py

Committer: Jelmer Vernooij
Date: 2011-02-21 23:43:10 UTC
mfrom: (5675.2.1 repository-format-deprecation)
mto: (5582.12.2 weave-plugin)
mto: This revision was merged to the branch mainline in revision 5718.
Revision ID: jelmer@samba.org-20110221234310-tpda0135m9dibmyh

Merge repository-format-deprecation branch.

files modified:
bzrlib/plugins/weave_fmt/repository.py

bzrlib/remote.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_xml.py

bzrlib/xml8.py

Show diffs side-by-side

added added

removed removed

bzrlib/repository.py

graph,

inventory,

inventory_delta,

lazy_regex,

lockable_files,

lockdir,

lru_cache,

988

987

# in a Repository class subclass rather than to override

989

988

# get_commit_builder.

990

989

_commit_builder_class = CommitBuilder

991

# The search regex used by xml based repositories to determine what things

992

# where changed in a single commit.

993

_file_ids_altered_regex = lazy_regex.lazy_compile(

994

r'file_id="(?P<file_id>[^"]+)"'

995

r'.* revision="(?P<revision_id>[^"]+)"'

996

)

997

990

998

991

def abort_write_group(self, suppress_errors=False):

999

992

"""Commit the contents accrued within the current write group.

2063

2056

w = self.inventories

2064

2057

pb = ui.ui_factory.nested_progress_bar()

2065

2058

try:

2066

return self._find_text_key_references_from_xml_inventory_lines(

2059

return self._serializer._find_text_key_references(

2067

2060

w.iter_lines_added_or_present_in_keys(revision_keys, pb=pb))

2068

2061

finally:

2069

2062

pb.finished()

2070

2063

2071

def _find_text_key_references_from_xml_inventory_lines(self,

2072

line_iterator):

2073

"""Core routine for extracting references to texts from inventories.

2074

2075

This performs the translation of xml lines to revision ids.

2076

2077

:param line_iterator: An iterator of lines, origin_version_id

2078

:return: A dictionary mapping text keys ((fileid, revision_id) tuples)

2079

to whether they were referred to by the inventory of the

2080

revision_id that they contain. Note that if that revision_id was

2081

not part of the line_iterator's output then False will be given -

2082

even though it may actually refer to that key.

2083

"""

2084

if not self._serializer.support_altered_by_hack:

2085

raise AssertionError(

2086

"_find_text_key_references_from_xml_inventory_lines only "

2087

"supported for branches which store inventory as unnested xml"

2088

", not on %r" % self)

2089

result = {}

2090

2091

# this code needs to read every new line in every inventory for the

2092

# inventories [revision_ids]. Seeing a line twice is ok. Seeing a line

2093

# not present in one of those inventories is unnecessary but not

2094

# harmful because we are filtering by the revision id marker in the

2095

# inventory lines : we only select file ids altered in one of those

2096

# revisions. We don't need to see all lines in the inventory because

2097

# only those added in an inventory in rev X can contain a revision=X

2098

# line.

2099

unescape_revid_cache = {}

2100

unescape_fileid_cache = {}

2101

2102

# jam 20061218 In a big fetch, this handles hundreds of thousands

2103

# of lines, so it has had a lot of inlining and optimizing done.

2104

# Sorry that it is a little bit messy.

2105

# Move several functions to be local variables, since this is a long

2106

# running loop.

2107

search = self._file_ids_altered_regex.search

2108

unescape = _unescape_xml

2109

setdefault = result.setdefault

2110

for line, line_key in line_iterator:

2111

match = search(line)

2112

if match is None:

2113

continue

2114

# One call to match.group() returning multiple items is quite a

2115

# bit faster than 2 calls to match.group() each returning 1

2116

file_id, revision_id = match.group('file_id', 'revision_id')

2117

2118

# Inlining the cache lookups helps a lot when you make 170,000

2119

# lines and 350k ids, versus 8.4 unique ids.

2120

# Using a cache helps in 2 ways:

2121

# 1) Avoids unnecessary decoding calls

2122

# 2) Re-uses cached strings, which helps in future set and

2123

# equality checks.

2124

# (2) is enough that removing encoding entirely along with

2125

# the cache (so we are using plain strings) results in no

2126

# performance improvement.

2127

try:

2128

revision_id = unescape_revid_cache[revision_id]

2129

except KeyError:

2130

unescaped = unescape(revision_id)

2131

unescape_revid_cache[revision_id] = unescaped

2132

revision_id = unescaped

2133

2134

# Note that unconditionally unescaping means that we deserialise

2135

# every fileid, which for general 'pull' is not great, but we don't

2136

# really want to have some many fulltexts that this matters anyway.

2137

# RBC 20071114.

2138

try:

2139

file_id = unescape_fileid_cache[file_id]

2140

except KeyError:

2141

unescaped = unescape(file_id)

2142

unescape_fileid_cache[file_id] = unescaped

2143

file_id = unescaped

2144

2145

key = (file_id, revision_id)

2146

setdefault(key, False)

2147

if revision_id == line_key[-1]:

2148

result[key] = True

2149

return result

2150

2151

2064

def _inventory_xml_lines_for_keys(self, keys):

2152

2065

"""Get a line iterator of the sort needed for findind references.

2153

2066

2183

2096

revision_ids. Each altered file-ids has the exact revision_ids that

2184

2097

altered it listed explicitly.

2185

2098

"""

2186

seen = set(self._find_text_key_references_from_xml_inventory_lines(

2099

seen = set(self._serializer._find_text_key_references(

2187

2100

line_iterator).iterkeys())

2188

2101

parent_keys = self._find_parent_keys_of_revisions(revision_keys)

2189

parent_seen = set(self._find_text_key_references_from_xml_inventory_lines(

2102

parent_seen = set(self._serializer._find_text_key_references(

2190

2103

self._inventory_xml_lines_for_keys(parent_keys)))

2191

2104

new_keys = seen - parent_seen

2192

2105

result = {}

2820

2733

return result

2821

2734

2822

2735

def _warn_if_deprecated(self, branch=None):

2736

if not self._format.is_deprecated():

2737

return

2823

2738

global _deprecation_warning_done

2824

2739

if _deprecation_warning_done:

2825

2740

return

3252

3167

"""

3253

3168

return True

3254

3169

3170

def is_deprecated(self):

3171

"""Is this format deprecated?

3172

3173

Deprecated formats may trigger a user-visible warning recommending

3174

the user to upgrade. They are still fully supported.

3175

"""

3176

return False

3177

3255

3178

def network_name(self):

3256

3179

"""A simple byte string uniquely identifying this format for RPC calls.

3257

3180

4051

3974

pb.finished()

4052

3975

4053

3976

4054

_unescape_map = {

4055

'apos':"'",

4056

'quot':'"',

4057

'amp':'&',

4058

'lt':'<',

4059

'gt':'>'

4060

}

4061

4062

4063

def _unescaper(match, _map=_unescape_map):

4064

code = match.group(1)

4065

try:

4066

return _map[code]

4067

except KeyError:

4068

if not code.startswith('#'):

4069

raise

4070

return unichr(int(code[1:])).encode('utf8')

4071

4072

4073

_unescape_re = None

4074

4075

4076

def _unescape_xml(data):

4077

"""Unescape predefined XML entities in a string of data."""

4078

global _unescape_re

4079

if _unescape_re is None:

4080

_unescape_re = re.compile('\&([^;]*);')

4081

return _unescape_re.sub(_unescaper, data)

4082

4083

4084

3977

class _VersionedFileChecker(object):

4085

3978

4086

3979

def __init__(self, repository, text_key_references=None, ancestors=None):

Older »