~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tree.py

Committer: Andrew Bennetts
Date: 2008-08-07 00:25:38 UTC
mfrom: (3612 +trunk)
mto: This revision was merged to the branch mainline in revision 3613.
Revision ID: andrew.bennetts@canonical.com-20080807002538-mtl1fcgy2fdabha4

Merge from bzr.dev.

files added:
bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_upgrade_stacked.py

files modified:
NEWS

bzrlib/__init__.py

bzrlib/_dirstate_helpers_py.py

bzrlib/_patiencediff_py.py

bzrlib/branch.py

bzrlib/branchbuilder.py

bzrlib/builtins.py

bzrlib/bzrdir.py

bzrlib/check.py

bzrlib/commit.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/fetch.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/merge_directive.py

bzrlib/osutils.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repofmt/weaverepo.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/smart/branch.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/branch_implementations/test_hooks.py

bzrlib/tests/branch_implementations/test_stacking.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_branchbuilder.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_options.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_tree.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/transform.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/upgrade.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

doc/developers/HACKING.txt

tools/doc_generate/autodoc_rstx.py

Show diffs side-by-side

added added

removed removed

bzrlib/tree.py

374

return vf.plan_lca_merge(last_revision_a, last_revision_b,

375

last_revision_base)

376

377

def _iter_parent_trees(self):

378

"""Iterate through parent trees, defaulting to Tree.revision_tree."""

379

for revision_id in self.get_parent_ids():

380

try:

381

yield self.revision_tree(revision_id)

382

except errors.NoSuchRevisionInTree:

383

yield self.repository.revision_tree(revision_id)

384

385

@staticmethod

386

def _file_revision(revision_tree, file_id):

387

"""Determine the revision associated with a file in a given tree."""

388

revision_tree.lock_read()

389

try:

390

return revision_tree.inventory[file_id].revision

391

finally:

392

revision_tree.unlock()

393

377

394

def _get_file_revision(self, file_id, vf, tree_revision):

378

395

"""Ensure that file_id, tree_revision is in vf to plan the merge."""

379

def file_revision(revision_tree):

380

revision_tree.lock_read()

381

try:

382

return revision_tree.inventory[file_id].revision

383

finally:

384

revision_tree.unlock()

385

386

def iter_parent_trees():

387

for revision_id in self.get_parent_ids():

388

try:

389

yield self.revision_tree(revision_id)

390

except:

391

yield self.repository.revision_tree(revision_id)

392

396

393

397

if getattr(self, '_repository', None) is None:

394

398

last_revision = tree_revision

395

parent_keys = [(file_id, file_revision(t)) for t in

396

iter_parent_trees()]

399

parent_keys = [(file_id, self._file_revision(t, file_id)) for t in

400

self._iter_parent_trees()]

397

401

vf.add_lines((file_id, last_revision), parent_keys,

398

402

self.get_file(file_id).readlines())

399

403

repo = self.branch.repository

400

404

base_vf = repo.texts

401

405

else:

402

last_revision = file_revision(self)

406

last_revision = self._file_revision(self, file_id)

403

407

base_vf = self._repository.texts

404

408

if base_vf not in vf.fallback_versionedfiles:

405

409

vf.fallback_versionedfiles.append(base_vf)

953

957

# the parent's path is necessarily known at this point.

954

958

yield(file_id, (path, to_path), changed_content, versioned, parent,

955

959

name, kind, executable)

960

961

962

class MultiWalker(object):

963

"""Walk multiple trees simultaneously, getting combined results."""

964

965

# Note: This could be written to not assume you can do out-of-order

966

# lookups. Instead any nodes that don't match in all trees could be

967

# marked as 'deferred', and then returned in the final cleanup loop.

968

# For now, I think it is "nicer" to return things as close to the

969

# "master_tree" order as we can.

970

971

def __init__(self, master_tree, other_trees):

972

"""Create a new MultiWalker.

973

974

All trees being walked must implement "iter_entries_by_dir()", such

975

that they yield (path, object) tuples, where that object will have a

976

'.file_id' member, that can be used to check equality.

977

978

:param master_tree: All trees will be 'slaved' to the master_tree such

979

that nodes in master_tree will be used as 'first-pass' sync points.

980

Any nodes that aren't in master_tree will be merged in a second

981

pass.

982

:param other_trees: A list of other trees to walk simultaneously.

983

"""

984

self._master_tree = master_tree

985

self._other_trees = other_trees

986

987

# Keep track of any nodes that were properly processed just out of

988

# order, that way we don't return them at the end, we don't have to

989

# track *all* processed file_ids, just the out-of-order ones

990

self._out_of_order_processed = set()

991

992

@staticmethod

993

def _step_one(iterator):

994

"""Step an iter_entries_by_dir iterator.

995

996

:return: (has_more, path, ie)

997

If has_more is False, path and ie will be None.

998

"""

999

try:

1000

path, ie = iterator.next()

1001

except StopIteration:

1002

return False, None, None

1003

else:

1004

return True, path, ie

1005

1006

@staticmethod

1007

def _cmp_path_by_dirblock(path1, path2):

1008

"""Compare two paths based on what directory they are in.

1009

1010

This generates a sort order, such that all children of a directory are

1011

sorted together, and grandchildren are in the same order as the

1012

children appear. But all grandchildren come after all children.

1013

1014

:param path1: first path

1015

:param path2: the second path

1016

:return: negative number if ``path1`` comes first,

1017

0 if paths are equal

1018

and a positive number if ``path2`` sorts first

1019

"""

1020

# Shortcut this special case

1021

if path1 == path2:

1022

return 0

1023

# This is stolen from _dirstate_helpers_py.py, only switching it to

1024

# Unicode objects. Consider using encode_utf8() and then using the

1025

# optimized versions, or maybe writing optimized unicode versions.

1026

if not isinstance(path1, unicode):

1027

raise TypeError("'path1' must be a unicode string, not %s: %r"

1028

% (type(path1), path1))

1029

if not isinstance(path2, unicode):

1030

raise TypeError("'path2' must be a unicode string, not %s: %r"

1031

% (type(path2), path2))

1032

return cmp(MultiWalker._path_to_key(path1),

1033

MultiWalker._path_to_key(path2))

1034

1035

@staticmethod

1036

def _path_to_key(path):

1037

dirname, basename = osutils.split(path)

1038

return (dirname.split(u'/'), basename)

1039

1040

def _lookup_by_file_id(self, extra_entries, other_tree, file_id):

1041

"""Lookup an inventory entry by file_id.

1042

1043

This is called when an entry is missing in the normal order.

1044

Generally this is because a file was either renamed, or it was

1045

deleted/added. If the entry was found in the inventory and not in

1046

extra_entries, it will be added to self._out_of_order_processed

1047

1048

:param extra_entries: A dictionary of {file_id: (path, ie)}. This

1049

should be filled with entries that were found before they were

1050

used. If file_id is present, it will be removed from the

1051

dictionary.

1052

:param other_tree: The Tree to search, in case we didn't find the entry

1053

yet.

1054

:param file_id: The file_id to look for

1055

:return: (path, ie) if found or (None, None) if not present.

1056

"""

1057

if file_id in extra_entries:

1058

return extra_entries.pop(file_id)

1059

# TODO: Is id2path better as the first call, or is

1060

# inventory[file_id] better as a first check?

1061

try:

1062

cur_path = other_tree.id2path(file_id)

1063

except errors.NoSuchId:

1064

cur_path = None

1065

if cur_path is None:

1066

return (None, None)

1067

else:

1068

self._out_of_order_processed.add(file_id)

1069

cur_ie = other_tree.inventory[file_id]

1070

return (cur_path, cur_ie)

1071

1072

def iter_all(self):

1073

"""Match up the values in the different trees."""

1074

for result in self._walk_master_tree():

1075

yield result

1076

self._finish_others()

1077

for result in self._walk_others():

1078

yield result

1079

1080

def _walk_master_tree(self):

1081

"""First pass, walk all trees in lock-step.

1082

1083

When we are done, all nodes in the master_tree will have been

1084

processed. _other_walkers, _other_entries, and _others_extra will be

1085

set on 'self' for future processing.

1086

"""

1087

# This iterator has the most "inlining" done, because it tends to touch

1088

# every file in the tree, while the others only hit nodes that don't

1089

# match.

1090

master_iterator = self._master_tree.iter_entries_by_dir()

1091

1092

other_walkers = [other.iter_entries_by_dir()

1093

for other in self._other_trees]

1094

other_entries = [self._step_one(walker) for walker in other_walkers]

1095

# Track extra nodes in the other trees

1096

others_extra = [{} for i in xrange(len(self._other_trees))]

1097

1098

master_has_more = True

1099

step_one = self._step_one

1100

lookup_by_file_id = self._lookup_by_file_id

1101

out_of_order_processed = self._out_of_order_processed

1102

1103

while master_has_more:

1104

(master_has_more, path, master_ie) = step_one(master_iterator)

1105

if not master_has_more:

1106

break

1107

1108

file_id = master_ie.file_id

1109

other_values = []

1110

other_values_append = other_values.append

1111

next_other_entries = []

1112

next_other_entries_append = next_other_entries.append

1113

for idx, (other_has_more, other_path, other_ie) in enumerate(other_entries):

1114

if not other_has_more:

1115

other_values_append(lookup_by_file_id(

1116

others_extra[idx], self._other_trees[idx], file_id))

1117

next_other_entries_append((False, None, None))

1118

elif file_id == other_ie.file_id:

1119

# This is the critical code path, as most of the entries

1120

# should match between most trees.

1121

other_values_append((other_path, other_ie))

1122

next_other_entries_append(step_one(other_walkers[idx]))

1123

else:

1124

# This walker did not match, step it until it either

1125

# matches, or we know we are past the current walker.

1126

other_walker = other_walkers[idx]

1127

other_extra = others_extra[idx]

1128

while (other_has_more and

1129

self._cmp_path_by_dirblock(other_path, path) < 0):

1130

other_file_id = other_ie.file_id

1131

if other_file_id not in out_of_order_processed:

1132

other_extra[other_file_id] = (other_path, other_ie)

1133

other_has_more, other_path, other_ie = \

1134

step_one(other_walker)

1135

if other_has_more and other_ie.file_id == file_id:

1136

# We ended up walking to this point, match and step

1137

# again

1138

other_values_append((other_path, other_ie))

1139

other_has_more, other_path, other_ie = \

1140

step_one(other_walker)

1141

else:

1142

# This record isn't in the normal order, see if it

1143

# exists at all.

1144

other_values_append(lookup_by_file_id(

1145

other_extra, self._other_trees[idx], file_id))

1146

next_other_entries_append((other_has_more, other_path,

1147

other_ie))

1148

other_entries = next_other_entries

1149

1150

# We've matched all the walkers, yield this datapoint

1151

yield path, file_id, master_ie, other_values

1152

self._other_walkers = other_walkers

1153

self._other_entries = other_entries

1154

self._others_extra = others_extra

1155

1156

def _finish_others(self):

1157

"""Finish walking the other iterators, so we get all entries."""

1158

for idx, info in enumerate(self._other_entries):

1159

other_extra = self._others_extra[idx]

1160

(other_has_more, other_path, other_ie) = info

1161

while other_has_more:

1162

other_file_id = other_ie.file_id

1163

if other_file_id not in self._out_of_order_processed:

1164

other_extra[other_file_id] = (other_path, other_ie)

1165

other_has_more, other_path, other_ie = \

1166

self._step_one(self._other_walkers[idx])

1167

del self._other_entries

1168

1169

def _walk_others(self):

1170

"""Finish up by walking all the 'deferred' nodes."""

1171

# TODO: One alternative would be to grab all possible unprocessed

1172

# file_ids, and then sort by path, and then yield them. That

1173

# might ensure better ordering, in case a caller strictly

1174

# requires parents before children.

1175

for idx, other_extra in enumerate(self._others_extra):

1176

others = sorted(other_extra.itervalues(),

1177

key=lambda x: self._path_to_key(x[0]))

1178

for other_path, other_ie in others:

1179

file_id = other_ie.file_id

1180

# We don't need to check out_of_order_processed here, because

1181

# the lookup_by_file_id will be removing anything processed

1182

# from the extras cache

1183

other_extra.pop(file_id)

1184

other_values = [(None, None) for i in xrange(idx)]

1185

other_values.append((other_path, other_ie))

1186

for alt_idx, alt_extra in enumerate(self._others_extra[idx+1:]):

1187

alt_idx = alt_idx + idx + 1

1188

alt_extra = self._others_extra[alt_idx]

1189

alt_tree = self._other_trees[alt_idx]

1190

other_values.append(self._lookup_by_file_id(

1191

alt_extra, alt_tree, file_id))

1192

yield other_path, file_id, None, other_values

Older »