1277
def update_by_delta(self, delta):
1278
"""Apply an inventory delta to the dirstate for tree 0
1280
:param delta: An inventory delta. See Inventory.apply_delta for
1283
self._read_dirblocks_if_needed()
1286
for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
1287
if (file_id in insertions) or (file_id in removals):
1288
raise AssertionError("repeated file id in delta %r" % (file_id,))
1289
if old_path is not None:
1290
old_path = old_path.encode('utf-8')
1291
removals[file_id] = old_path
1292
if new_path is not None:
1293
new_path = new_path.encode('utf-8')
1294
dirname, basename = osutils.split(new_path)
1295
key = (dirname, basename, file_id)
1296
minikind = DirState._kind_to_minikind[inv_entry.kind]
1298
fingerprint = inv_entry.reference_revision
1301
insertions[file_id] = (key, minikind, inv_entry.executable,
1302
fingerprint, new_path)
1303
# Transform moves into delete+add pairs
1304
if None not in (old_path, new_path):
1305
for child in self._iter_child_entries(0, old_path):
1306
if child[0][2] in insertions or child[0][2] in removals:
1308
child_dirname = child[0][0]
1309
child_basename = child[0][1]
1310
minikind = child[1][0][0]
1311
fingerprint = child[1][0][4]
1312
executable = child[1][0][3]
1313
old_child_path = osutils.pathjoin(child[0][0],
1315
removals[child[0][2]] = old_child_path
1316
child_suffix = child_dirname[len(old_path):]
1317
new_child_dirname = (new_path + child_suffix)
1318
key = (new_child_dirname, child_basename, child[0][2])
1319
new_child_path = os.path.join(new_child_dirname,
1321
insertions[child[0][2]] = (key, minikind, executable,
1322
fingerprint, new_child_path)
1323
self._apply_removals(removals.values())
1324
self._apply_insertions(insertions.values())
1326
def _apply_removals(self, removals):
1327
for path in sorted(removals, reverse=True):
1328
dirname, basename = osutils.split(path)
1329
block_i, entry_i, d_present, f_present = \
1330
self._get_block_entry_index(dirname, basename, 0)
1331
entry = self._dirblocks[block_i][1][entry_i]
1332
self._make_absent(entry)
1333
# See if we have a malformed delta: deleting a directory must not
1334
# leave crud behind. This increases the number of bisects needed
1335
# substantially, but deletion or renames of large numbers of paths
1336
# is rare enough it shouldn't be an issue (famous last words?) RBC
1338
block_i, entry_i, d_present, f_present = \
1339
self._get_block_entry_index(path, '', 0)
1341
# The dir block is still present in the dirstate; this could
1342
# be due to it being in a parent tree, or a corrupt delta.
1343
for child_entry in self._dirblocks[block_i][1]:
1344
if child_entry[1][0][0] not in ('r', 'a'):
1345
raise errors.InconsistentDelta(path, entry[0][2],
1346
"The file id was deleted but its children were "
1349
def _apply_insertions(self, adds):
1350
for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1351
self.update_minimal(key, minikind, executable, fingerprint,
1352
path_utf8=path_utf8)
1354
def update_basis_by_delta(self, delta, new_revid):
1355
"""Update the parents of this tree after a commit.
1357
This gives the tree one parent, with revision id new_revid. The
1358
inventory delta is applied to the current basis tree to generate the
1359
inventory for the parent new_revid, and all other parent trees are
1362
Note that an exception during the operation of this method will leave
1363
the dirstate in a corrupt state where it should not be saved.
1365
Finally, we expect all changes to be synchronising the basis tree with
1368
:param new_revid: The new revision id for the trees parent.
1369
:param delta: An inventory delta (see apply_inventory_delta) describing
1370
the changes from the current left most parent revision to new_revid.
1372
self._read_dirblocks_if_needed()
1373
self._discard_merge_parents()
1374
if self._ghosts != []:
1375
raise NotImplementedError(self.update_basis_by_delta)
1376
if len(self._parents) == 0:
1377
# setup a blank tree, the most simple way.
1378
empty_parent = DirState.NULL_PARENT_DETAILS
1379
for entry in self._iter_entries():
1380
entry[1].append(empty_parent)
1381
self._parents.append(new_revid)
1383
self._parents[0] = new_revid
1385
delta = sorted(delta, reverse=True)
1389
# The paths this function accepts are unicode and must be encoded as we
1391
encode = cache_utf8.encode
1392
inv_to_entry = self._inv_entry_to_details
1393
# delta is now (deletes, changes), (adds) in reverse lexographical
1395
# deletes in reverse lexographic order are safe to process in situ.
1396
# renames are not, as a rename from any path could go to a path
1397
# lexographically lower, so we transform renames into delete, add pairs,
1398
# expanding them recursively as needed.
1399
# At the same time, to reduce interface friction we convert the input
1400
# inventory entries to dirstate.
1401
root_only = ('', '')
1402
for old_path, new_path, file_id, inv_entry in delta:
1403
if old_path is None:
1404
adds.append((None, encode(new_path), file_id,
1405
inv_to_entry(inv_entry), True))
1406
elif new_path is None:
1407
deletes.append((encode(old_path), None, file_id, None, True))
1408
elif (old_path, new_path) != root_only:
1410
# Because renames must preserve their children we must have
1411
# processed all relocations and removes before hand. The sort
1412
# order ensures we've examined the child paths, but we also
1413
# have to execute the removals, or the split to an add/delete
1414
# pair will result in the deleted item being reinserted, or
1415
# renamed items being reinserted twice - and possibly at the
1416
# wrong place. Splitting into a delete/add pair also simplifies
1417
# the handling of entries with ('f', ...), ('r' ...) because
1418
# the target of the 'r' is old_path here, and we add that to
1419
# deletes, meaning that the add handler does not need to check
1420
# for 'r' items on every pass.
1421
self._update_basis_apply_deletes(deletes)
1423
new_path_utf8 = encode(new_path)
1424
# Split into an add/delete pair recursively.
1425
adds.append((None, new_path_utf8, file_id,
1426
inv_to_entry(inv_entry), False))
1427
# Expunge deletes that we've seen so that deleted/renamed
1428
# children of a rename directory are handled correctly.
1429
new_deletes = reversed(list(self._iter_child_entries(1,
1431
# Remove the current contents of the tree at orig_path, and
1432
# reinsert at the correct new path.
1433
for entry in new_deletes:
1435
source_path = entry[0][0] + '/' + entry[0][1]
1437
source_path = entry[0][1]
1439
target_path = new_path_utf8 + source_path[len(old_path):]
1442
raise AssertionError("cannot rename directory to"
1444
target_path = source_path[len(old_path) + 1:]
1445
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1447
(source_path, target_path, entry[0][2], None, False))
1449
(encode(old_path), new_path, file_id, None, False))
1451
# changes to just the root should not require remove/insertion
1453
changes.append((encode(old_path), encode(new_path), file_id,
1454
inv_to_entry(inv_entry)))
1456
# Finish expunging deletes/first half of renames.
1457
self._update_basis_apply_deletes(deletes)
1458
# Reinstate second half of renames and new paths.
1459
self._update_basis_apply_adds(adds)
1460
# Apply in-situ changes.
1461
self._update_basis_apply_changes(changes)
1463
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1464
self._header_state = DirState.IN_MEMORY_MODIFIED
1465
self._id_index = None
1468
def _update_basis_apply_adds(self, adds):
1469
"""Apply a sequence of adds to tree 1 during update_basis_by_delta.
1471
They may be adds, or renames that have been split into add/delete
1474
:param adds: A sequence of adds. Each add is a tuple:
1475
(None, new_path_utf8, file_id, (entry_details), real_add). real_add
1476
is False when the add is the second half of a remove-and-reinsert
1477
pair created to handle renames and deletes.
1479
# Adds are accumulated partly from renames, so can be in any input
1482
# adds is now in lexographic order, which places all parents before
1483
# their children, so we can process it linearly.
1485
for old_path, new_path, file_id, new_details, real_add in adds:
1486
# the entry for this file_id must be in tree 0.
1487
entry = self._get_entry(0, file_id, new_path)
1488
if entry[0] is None or entry[0][2] != file_id:
1489
self._changes_aborted = True
1490
raise errors.InconsistentDelta(new_path, file_id,
1491
'working tree does not contain new entry')
1492
if real_add and entry[1][1][0] not in absent:
1493
self._changes_aborted = True
1494
raise errors.InconsistentDelta(new_path, file_id,
1495
'The entry was considered to be a genuinely new record,'
1496
' but there was already an old record for it.')
1497
# We don't need to update the target of an 'r' because the handling
1498
# of renames turns all 'r' situations into a delete at the original
1500
entry[1][1] = new_details
1502
def _update_basis_apply_changes(self, changes):
1503
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1505
:param adds: A sequence of changes. Each change is a tuple:
1506
(path_utf8, path_utf8, file_id, (entry_details))
1509
for old_path, new_path, file_id, new_details in changes:
1510
# the entry for this file_id must be in tree 0.
1511
entry = self._get_entry(0, file_id, new_path)
1512
if entry[0] is None or entry[0][2] != file_id:
1513
self._changes_aborted = True
1514
raise errors.InconsistentDelta(new_path, file_id,
1515
'working tree does not contain new entry')
1516
if (entry[1][0][0] in absent or
1517
entry[1][1][0] in absent):
1518
self._changes_aborted = True
1519
raise errors.InconsistentDelta(new_path, file_id,
1520
'changed considered absent')
1521
entry[1][1] = new_details
1523
def _update_basis_apply_deletes(self, deletes):
1524
"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.
1526
They may be deletes, or renames that have been split into add/delete
1529
:param deletes: A sequence of deletes. Each delete is a tuple:
1530
(old_path_utf8, new_path_utf8, file_id, None, real_delete).
1531
real_delete is True when the desired outcome is an actual deletion
1532
rather than the rename handling logic temporarily deleting a path
1533
during the replacement of a parent.
1535
null = DirState.NULL_PARENT_DETAILS
1536
for old_path, new_path, file_id, _, real_delete in deletes:
1537
if real_delete != (new_path is None):
1538
raise AssertionError("bad delete delta")
1539
# the entry for this file_id must be in tree 1.
1540
dirname, basename = osutils.split(old_path)
1541
block_index, entry_index, dir_present, file_present = \
1542
self._get_block_entry_index(dirname, basename, 1)
1543
if not file_present:
1544
self._changes_aborted = True
1545
raise errors.InconsistentDelta(old_path, file_id,
1546
'basis tree does not contain removed entry')
1547
entry = self._dirblocks[block_index][1][entry_index]
1548
if entry[0][2] != file_id:
1549
self._changes_aborted = True
1550
raise errors.InconsistentDelta(old_path, file_id,
1551
'mismatched file_id in tree 1')
1553
if entry[1][0][0] != 'a':
1554
self._changes_aborted = True
1555
raise errors.InconsistentDelta(old_path, file_id,
1556
'This was marked as a real delete, but the WT state'
1557
' claims that it still exists and is versioned.')
1558
del self._dirblocks[block_index][1][entry_index]
1560
if entry[1][0][0] == 'a':
1561
self._changes_aborted = True
1562
raise errors.InconsistentDelta(old_path, file_id,
1563
'The entry was considered a rename, but the source path'
1564
' is marked as absent.')
1565
# For whatever reason, we were asked to rename an entry
1566
# that was originally marked as deleted. This could be
1567
# because we are renaming the parent directory, and the WT
1568
# current state has the file marked as deleted.
1569
elif entry[1][0][0] == 'r':
1570
# implement the rename
1571
del self._dirblocks[block_index][1][entry_index]
1573
# it is being resurrected here, so blank it out temporarily.
1574
self._dirblocks[block_index][1][entry_index][1][1] = null
1576
def _observed_sha1(self, entry, sha1, stat_value,
1577
_stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1578
"""Note the sha1 of a file.
1580
:param entry: The entry the sha1 is for.
1581
:param sha1: The observed sha1.
1582
:param stat_value: The os.lstat for the file.
1062
def update_entry(self, entry, abspath, stat_value=None):
1063
"""Update the entry based on what is actually on disk.
1065
:param entry: This is the dirblock entry for the file in question.
1066
:param abspath: The path on disk for this file.
1067
:param stat_value: (optional) if we already have done a stat on the
1069
:return: The sha1 hexdigest of the file (40 bytes) or link target of a
1072
# This code assumes that the entry passed in is directly held in one of
1073
# the internal _dirblocks. So the dirblock state must have already been
1075
assert self._dirblock_state != DirState.NOT_IN_MEMORY
1076
if stat_value is None:
1078
# We could inline os.lstat but the common case is that
1079
# stat_value will be passed in, not read here.
1080
stat_value = self._lstat(abspath, entry)
1081
except (OSError, IOError), e:
1082
if e.errno in (errno.ENOENT, errno.EACCES,
1084
# The entry is missing, consider it gone
1088
kind = osutils.file_kind_from_stat_mode(stat_value.st_mode)
1585
minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1090
minikind = DirState._kind_to_minikind[kind]
1091
except KeyError: # Unknown kind
1589
packed_stat = _pack_stat(stat_value)
1093
packed_stat = pack_stat(stat_value)
1094
(saved_minikind, saved_link_or_sha1, saved_file_size,
1095
saved_executable, saved_packed_stat) = entry[1][0]
1097
if (minikind == saved_minikind
1098
and packed_stat == saved_packed_stat
1099
# size should also be in packed_stat
1100
and saved_file_size == stat_value.st_size):
1101
# The stat hasn't changed since we saved, so we can potentially
1102
# re-use the saved sha hash.
1591
1106
if self._cutoff_time is None:
1592
1107
self._sha_cutoff_time()
1593
1109
if (stat_value.st_mtime < self._cutoff_time
1594
1110
and stat_value.st_ctime < self._cutoff_time):
1595
entry[1][0] = ('f', sha1, entry[1][0][2], entry[1][0][3],
1597
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1111
# Return the existing fingerprint
1112
return saved_link_or_sha1
1114
# If we have gotten this far, that means that we need to actually
1115
# process this entry.
1118
link_or_sha1 = self._sha1_file(abspath, entry)
1119
executable = self._is_executable(stat_value.st_mode,
1121
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
1122
executable, packed_stat)
1123
elif minikind == 'd':
1125
entry[1][0] = ('d', '', 0, False, packed_stat)
1126
if saved_minikind != 'd':
1127
# This changed from something into a directory. Make sure we
1128
# have a directory block for it. This doesn't happen very
1129
# often, so this doesn't have to be super fast.
1130
block_index, entry_index, dir_present, file_present = \
1131
self._get_block_entry_index(entry[0][0], entry[0][1], 0)
1132
self._ensure_block(block_index, entry_index,
1133
osutils.pathjoin(entry[0][0], entry[0][1]))
1134
elif minikind == 'l':
1135
link_or_sha1 = self._read_link(abspath, saved_link_or_sha1)
1136
entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
1138
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1599
1141
def _sha_cutoff_time(self):
1600
1142
"""Return cutoff time.
2847
2325
self._split_path_cache = {}
2849
2327
def _requires_lock(self):
2850
"""Check that a lock is currently held by someone on the dirstate."""
2328
"""Checks that a lock is currently held by someone on the dirstate"""
2851
2329
if not self._lock_token:
2852
2330
raise errors.ObjectNotLocked(self)
2855
def py_update_entry(state, entry, abspath, stat_value,
2856
_stat_to_minikind=DirState._stat_to_minikind,
2857
_pack_stat=pack_stat):
2858
"""Update the entry based on what is actually on disk.
2860
This function only calculates the sha if it needs to - if the entry is
2861
uncachable, or clearly different to the first parent's entry, no sha
2862
is calculated, and None is returned.
2864
:param state: The dirstate this entry is in.
2865
:param entry: This is the dirblock entry for the file in question.
2866
:param abspath: The path on disk for this file.
2867
:param stat_value: The stat value done on the path.
2868
:return: None, or The sha1 hexdigest of the file (40 bytes) or link
2869
target of a symlink.
2333
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
2334
"""Return the index where to insert dirname into the dirblocks.
2336
The return value idx is such that all directories blocks in dirblock[:idx]
2337
have names < dirname, and all blocks in dirblock[idx:] have names >=
2340
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
2341
slice of a to be searched.
2872
minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
2346
dirname_split = cache[dirname]
2873
2347
except KeyError:
2876
packed_stat = _pack_stat(stat_value)
2877
(saved_minikind, saved_link_or_sha1, saved_file_size,
2878
saved_executable, saved_packed_stat) = entry[1][0]
2880
if minikind == 'd' and saved_minikind == 't':
2882
if (minikind == saved_minikind
2883
and packed_stat == saved_packed_stat):
2884
# The stat hasn't changed since we saved, so we can re-use the
2889
# size should also be in packed_stat
2890
if saved_file_size == stat_value.st_size:
2891
return saved_link_or_sha1
2893
# If we have gotten this far, that means that we need to actually
2894
# process this entry.
2897
executable = state._is_executable(stat_value.st_mode,
2899
if state._cutoff_time is None:
2900
state._sha_cutoff_time()
2901
if (stat_value.st_mtime < state._cutoff_time
2902
and stat_value.st_ctime < state._cutoff_time
2903
and len(entry[1]) > 1
2904
and entry[1][1][0] != 'a'):
2905
# Could check for size changes for further optimised
2906
# avoidance of sha1's. However the most prominent case of
2907
# over-shaing is during initial add, which this catches.
2908
# Besides, if content filtering happens, size and sha
2909
# are calculated at the same time, so checking just the size
2910
# gains nothing w.r.t. performance.
2911
link_or_sha1 = state._sha1_file(abspath)
2912
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
2913
executable, packed_stat)
2915
entry[1][0] = ('f', '', stat_value.st_size,
2916
executable, DirState.NULLSTAT)
2917
elif minikind == 'd':
2919
entry[1][0] = ('d', '', 0, False, packed_stat)
2920
if saved_minikind != 'd':
2921
# This changed from something into a directory. Make sure we
2922
# have a directory block for it. This doesn't happen very
2923
# often, so this doesn't have to be super fast.
2924
block_index, entry_index, dir_present, file_present = \
2925
state._get_block_entry_index(entry[0][0], entry[0][1], 0)
2926
state._ensure_block(block_index, entry_index,
2927
osutils.pathjoin(entry[0][0], entry[0][1]))
2928
elif minikind == 'l':
2929
link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
2930
if state._cutoff_time is None:
2931
state._sha_cutoff_time()
2932
if (stat_value.st_mtime < state._cutoff_time
2933
and stat_value.st_ctime < state._cutoff_time):
2934
entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
2937
entry[1][0] = ('l', '', stat_value.st_size,
2938
False, DirState.NULLSTAT)
2939
state._dirblock_state = DirState.IN_MEMORY_MODIFIED
2941
update_entry = py_update_entry
2944
class ProcessEntryPython(object):
2946
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
2947
"last_source_parent", "last_target_parent", "include_unchanged",
2948
"use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2949
"search_specific_files", "state", "source_index", "target_index",
2950
"want_unversioned", "tree"]
2952
def __init__(self, include_unchanged, use_filesystem_for_exec,
2953
search_specific_files, state, source_index, target_index,
2954
want_unversioned, tree):
2955
self.old_dirname_to_file_id = {}
2956
self.new_dirname_to_file_id = {}
2957
# Just a sentry, so that _process_entry can say that this
2958
# record is handled, but isn't interesting to process (unchanged)
2959
self.uninteresting = object()
2960
# Using a list so that we can access the values and change them in
2961
# nested scope. Each one is [path, file_id, entry]
2962
self.last_source_parent = [None, None]
2963
self.last_target_parent = [None, None]
2964
self.include_unchanged = include_unchanged
2965
self.use_filesystem_for_exec = use_filesystem_for_exec
2966
self.utf8_decode = cache_utf8._utf8_decode
2967
# for all search_indexs in each path at or under each element of
2968
# search_specific_files, if the detail is relocated: add the id, and add the
2969
# relocated path as one to search if its not searched already. If the
2970
# detail is not relocated, add the id.
2971
self.searched_specific_files = set()
2972
self.search_specific_files = search_specific_files
2974
self.source_index = source_index
2975
self.target_index = target_index
2976
self.want_unversioned = want_unversioned
2979
def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):
2980
"""Compare an entry and real disk to generate delta information.
2982
:param path_info: top_relpath, basename, kind, lstat, abspath for
2983
the path of entry. If None, then the path is considered absent.
2984
(Perhaps we should pass in a concrete entry for this ?)
2985
Basename is returned as a utf8 string because we expect this
2986
tuple will be ignored, and don't want to take the time to
2988
:return: None if these don't match
2989
A tuple of information about the change, or
2990
the object 'uninteresting' if these match, but are
2991
basically identical.
2993
if self.source_index is None:
2994
source_details = DirState.NULL_PARENT_DETAILS
2996
source_details = entry[1][self.source_index]
2997
target_details = entry[1][self.target_index]
2998
target_minikind = target_details[0]
2999
if path_info is not None and target_minikind in 'fdlt':
3000
if not (self.target_index == 0):
3001
raise AssertionError()
3002
link_or_sha1 = update_entry(self.state, entry,
3003
abspath=path_info[4], stat_value=path_info[3])
3004
# The entry may have been modified by update_entry
3005
target_details = entry[1][self.target_index]
3006
target_minikind = target_details[0]
3009
file_id = entry[0][2]
3010
source_minikind = source_details[0]
3011
if source_minikind in 'fdltr' and target_minikind in 'fdlt':
3012
# claimed content in both: diff
3013
# r | fdlt | | add source to search, add id path move and perform
3014
# | | | diff check on source-target
3015
# r | fdlt | a | dangling file that was present in the basis.
3017
if source_minikind in 'r':
3018
# add the source to the search path to find any children it
3019
# has. TODO ? : only add if it is a container ?
3020
if not osutils.is_inside_any(self.searched_specific_files,
3022
self.search_specific_files.add(source_details[1])
3023
# generate the old path; this is needed for stating later
3025
old_path = source_details[1]
3026
old_dirname, old_basename = os.path.split(old_path)
3027
path = pathjoin(entry[0][0], entry[0][1])
3028
old_entry = self.state._get_entry(self.source_index,
3030
# update the source details variable to be the real
3032
if old_entry == (None, None):
3033
raise errors.CorruptDirstate(self.state._filename,
3034
"entry '%s/%s' is considered renamed from %r"
3035
" but source does not exist\n"
3036
"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
3037
source_details = old_entry[1][self.source_index]
3038
source_minikind = source_details[0]
3040
old_dirname = entry[0][0]
3041
old_basename = entry[0][1]
3042
old_path = path = None
3043
if path_info is None:
3044
# the file is missing on disk, show as removed.
3045
content_change = True
3049
# source and target are both versioned and disk file is present.
3050
target_kind = path_info[2]
3051
if target_kind == 'directory':
3053
old_path = path = pathjoin(old_dirname, old_basename)
3054
self.new_dirname_to_file_id[path] = file_id
3055
if source_minikind != 'd':
3056
content_change = True
3058
# directories have no fingerprint
3059
content_change = False
3061
elif target_kind == 'file':
3062
if source_minikind != 'f':
3063
content_change = True
3065
# If the size is the same, check the sha:
3066
if target_details[2] == source_details[2]:
3067
if link_or_sha1 is None:
3069
statvalue, link_or_sha1 = \
3070
self.state._sha1_provider.stat_and_sha1(
3072
self.state._observed_sha1(entry, link_or_sha1,
3074
content_change = (link_or_sha1 != source_details[1])
3076
# Size changed, so must be different
3077
content_change = True
3078
# Target details is updated at update_entry time
3079
if self.use_filesystem_for_exec:
3080
# We don't need S_ISREG here, because we are sure
3081
# we are dealing with a file.
3082
target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)
3084
target_exec = target_details[3]
3085
elif target_kind == 'symlink':
3086
if source_minikind != 'l':
3087
content_change = True
3089
content_change = (link_or_sha1 != source_details[1])
3091
elif target_kind == 'tree-reference':
3092
if source_minikind != 't':
3093
content_change = True
3095
content_change = False
3098
raise Exception, "unknown kind %s" % path_info[2]
3099
if source_minikind == 'd':
3101
old_path = path = pathjoin(old_dirname, old_basename)
3102
self.old_dirname_to_file_id[old_path] = file_id
3103
# parent id is the entry for the path in the target tree
3104
if old_dirname == self.last_source_parent[0]:
3105
source_parent_id = self.last_source_parent[1]
3108
source_parent_id = self.old_dirname_to_file_id[old_dirname]
3110
source_parent_entry = self.state._get_entry(self.source_index,
3111
path_utf8=old_dirname)
3112
source_parent_id = source_parent_entry[0][2]
3113
if source_parent_id == entry[0][2]:
3114
# This is the root, so the parent is None
3115
source_parent_id = None
3117
self.last_source_parent[0] = old_dirname
3118
self.last_source_parent[1] = source_parent_id
3119
new_dirname = entry[0][0]
3120
if new_dirname == self.last_target_parent[0]:
3121
target_parent_id = self.last_target_parent[1]
3124
target_parent_id = self.new_dirname_to_file_id[new_dirname]
3126
# TODO: We don't always need to do the lookup, because the
3127
# parent entry will be the same as the source entry.
3128
target_parent_entry = self.state._get_entry(self.target_index,
3129
path_utf8=new_dirname)
3130
if target_parent_entry == (None, None):
3131
raise AssertionError(
3132
"Could not find target parent in wt: %s\nparent of: %s"
3133
% (new_dirname, entry))
3134
target_parent_id = target_parent_entry[0][2]
3135
if target_parent_id == entry[0][2]:
3136
# This is the root, so the parent is None
3137
target_parent_id = None
3139
self.last_target_parent[0] = new_dirname
3140
self.last_target_parent[1] = target_parent_id
3142
source_exec = source_details[3]
3143
if (self.include_unchanged
3145
or source_parent_id != target_parent_id
3146
or old_basename != entry[0][1]
3147
or source_exec != target_exec
3149
if old_path is None:
3150
old_path = path = pathjoin(old_dirname, old_basename)
3151
old_path_u = self.utf8_decode(old_path)[0]
3154
old_path_u = self.utf8_decode(old_path)[0]
3155
if old_path == path:
3158
path_u = self.utf8_decode(path)[0]
3159
source_kind = DirState._minikind_to_kind[source_minikind]
3160
return (entry[0][2],
3161
(old_path_u, path_u),
3164
(source_parent_id, target_parent_id),
3165
(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3166
(source_kind, target_kind),
3167
(source_exec, target_exec))
3169
return self.uninteresting
3170
elif source_minikind in 'a' and target_minikind in 'fdlt':
3171
# looks like a new file
3172
path = pathjoin(entry[0][0], entry[0][1])
3173
# parent id is the entry for the path in the target tree
3174
# TODO: these are the same for an entire directory: cache em.
3175
parent_id = self.state._get_entry(self.target_index,
3176
path_utf8=entry[0][0])[0][2]
3177
if parent_id == entry[0][2]:
3179
if path_info is not None:
3181
if self.use_filesystem_for_exec:
3182
# We need S_ISREG here, because we aren't sure if this
3185
stat.S_ISREG(path_info[3].st_mode)
3186
and stat.S_IEXEC & path_info[3].st_mode)
3188
target_exec = target_details[3]
3189
return (entry[0][2],
3190
(None, self.utf8_decode(path)[0]),
3194
(None, self.utf8_decode(entry[0][1])[0]),
3195
(None, path_info[2]),
3196
(None, target_exec))
3198
# Its a missing file, report it as such.
3199
return (entry[0][2],
3200
(None, self.utf8_decode(path)[0]),
3204
(None, self.utf8_decode(entry[0][1])[0]),
3207
elif source_minikind in 'fdlt' and target_minikind in 'a':
3208
# unversioned, possibly, or possibly not deleted: we dont care.
3209
# if its still on disk, *and* theres no other entry at this
3210
# path [we dont know this in this routine at the moment -
3211
# perhaps we should change this - then it would be an unknown.
3212
old_path = pathjoin(entry[0][0], entry[0][1])
3213
# parent id is the entry for the path in the target tree
3214
parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
3215
if parent_id == entry[0][2]:
3217
return (entry[0][2],
3218
(self.utf8_decode(old_path)[0], None),
3222
(self.utf8_decode(entry[0][1])[0], None),
3223
(DirState._minikind_to_kind[source_minikind], None),
3224
(source_details[3], None))
3225
elif source_minikind in 'fdlt' and target_minikind in 'r':
3226
# a rename; could be a true rename, or a rename inherited from
3227
# a renamed parent. TODO: handle this efficiently. Its not
3228
# common case to rename dirs though, so a correct but slow
3229
# implementation will do.
3230
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
3231
self.search_specific_files.add(target_details[1])
3232
elif source_minikind in 'ra' and target_minikind in 'ra':
3233
# neither of the selected trees contain this file,
3234
# so skip over it. This is not currently directly tested, but
3235
# is indirectly via test_too_much.TestCommands.test_conflicts.
3238
raise AssertionError("don't know how to compare "
3239
"source_minikind=%r, target_minikind=%r"
3240
% (source_minikind, target_minikind))
3241
## import pdb;pdb.set_trace()
3247
def iter_changes(self):
3248
"""Iterate over the changes."""
3249
utf8_decode = cache_utf8._utf8_decode
3250
_cmp_by_dirs = cmp_by_dirs
3251
_process_entry = self._process_entry
3252
uninteresting = self.uninteresting
3253
search_specific_files = self.search_specific_files
3254
searched_specific_files = self.searched_specific_files
3255
splitpath = osutils.splitpath
3257
# compare source_index and target_index at or under each element of search_specific_files.
3258
# follow the following comparison table. Note that we only want to do diff operations when
3259
# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo
3263
# Source | Target | disk | action
3264
# r | fdlt | | add source to search, add id path move and perform
3265
# | | | diff check on source-target
3266
# r | fdlt | a | dangling file that was present in the basis.
3268
# r | a | | add source to search
3270
# r | r | | this path is present in a non-examined tree, skip.
3271
# r | r | a | this path is present in a non-examined tree, skip.
3272
# a | fdlt | | add new id
3273
# a | fdlt | a | dangling locally added file, skip
3274
# a | a | | not present in either tree, skip
3275
# a | a | a | not present in any tree, skip
3276
# a | r | | not present in either tree at this path, skip as it
3277
# | | | may not be selected by the users list of paths.
3278
# a | r | a | not present in either tree at this path, skip as it
3279
# | | | may not be selected by the users list of paths.
3280
# fdlt | fdlt | | content in both: diff them
3281
# fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?
3282
# fdlt | a | | unversioned: output deleted id for now
3283
# fdlt | a | a | unversioned and deleted: output deleted id
3284
# fdlt | r | | relocated in this tree, so add target to search.
3285
# | | | Dont diff, we will see an r,fd; pair when we reach
3286
# | | | this id at the other path.
3287
# fdlt | r | a | relocated in this tree, so add target to search.
3288
# | | | Dont diff, we will see an r,fd; pair when we reach
3289
# | | | this id at the other path.
3291
# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
3292
# keeping a cache of directories that we have seen.
3294
while search_specific_files:
3295
# TODO: the pending list should be lexically sorted? the
3296
# interface doesn't require it.
3297
current_root = search_specific_files.pop()
3298
current_root_unicode = current_root.decode('utf8')
3299
searched_specific_files.add(current_root)
3300
# process the entries for this containing directory: the rest will be
3301
# found by their parents recursively.
3302
root_entries = self.state._entries_for_path(current_root)
3303
root_abspath = self.tree.abspath(current_root_unicode)
3305
root_stat = os.lstat(root_abspath)
3307
if e.errno == errno.ENOENT:
3308
# the path does not exist: let _process_entry know that.
3309
root_dir_info = None
3311
# some other random error: hand it up.
3314
root_dir_info = ('', current_root,
3315
osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
3317
if root_dir_info[2] == 'directory':
3318
if self.tree._directory_is_tree_reference(
3319
current_root.decode('utf8')):
3320
root_dir_info = root_dir_info[:2] + \
3321
('tree-reference',) + root_dir_info[3:]
3323
if not root_entries and not root_dir_info:
3324
# this specified path is not present at all, skip it.
3326
path_handled = False
3327
for entry in root_entries:
3328
result = _process_entry(entry, root_dir_info)
3329
if result is not None:
3331
if result is not uninteresting:
3333
if self.want_unversioned and not path_handled and root_dir_info:
3334
new_executable = bool(
3335
stat.S_ISREG(root_dir_info[3].st_mode)
3336
and stat.S_IEXEC & root_dir_info[3].st_mode)
3338
(None, current_root_unicode),
3342
(None, splitpath(current_root_unicode)[-1]),
3343
(None, root_dir_info[2]),
3344
(None, new_executable)
3346
initial_key = (current_root, '', '')
3347
block_index, _ = self.state._find_block_index_from_key(initial_key)
3348
if block_index == 0:
3349
# we have processed the total root already, but because the
3350
# initial key matched it we should skip it here.
3352
if root_dir_info and root_dir_info[2] == 'tree-reference':
3353
current_dir_info = None
3355
dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)
3357
current_dir_info = dir_iterator.next()
3359
# on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
3360
# python 2.5 has e.errno == EINVAL,
3361
# and e.winerror == ERROR_DIRECTORY
3362
e_winerror = getattr(e, 'winerror', None)
3363
win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
3364
# there may be directories in the inventory even though
3365
# this path is not a file on disk: so mark it as end of
3367
if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
3368
current_dir_info = None
3369
elif (sys.platform == 'win32'
3370
and (e.errno in win_errors
3371
or e_winerror in win_errors)):
3372
current_dir_info = None
3376
if current_dir_info[0][0] == '':
3377
# remove .bzr from iteration
3378
bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))
3379
if current_dir_info[1][bzr_index][0] != '.bzr':
3380
raise AssertionError()
3381
del current_dir_info[1][bzr_index]
3382
# walk until both the directory listing and the versioned metadata
3384
if (block_index < len(self.state._dirblocks) and
3385
osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
3386
current_block = self.state._dirblocks[block_index]
3388
current_block = None
3389
while (current_dir_info is not None or
3390
current_block is not None):
3391
if (current_dir_info and current_block
3392
and current_dir_info[0][0] != current_block[0]):
3393
if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:
3394
# filesystem data refers to paths not covered by the dirblock.
3395
# this has two possibilities:
3396
# A) it is versioned but empty, so there is no block for it
3397
# B) it is not versioned.
3399
# if (A) then we need to recurse into it to check for
3400
# new unknown files or directories.
3401
# if (B) then we should ignore it, because we don't
3402
# recurse into unknown directories.
3404
while path_index < len(current_dir_info[1]):
3405
current_path_info = current_dir_info[1][path_index]
3406
if self.want_unversioned:
3407
if current_path_info[2] == 'directory':
3408
if self.tree._directory_is_tree_reference(
3409
current_path_info[0].decode('utf8')):
3410
current_path_info = current_path_info[:2] + \
3411
('tree-reference',) + current_path_info[3:]
3412
new_executable = bool(
3413
stat.S_ISREG(current_path_info[3].st_mode)
3414
and stat.S_IEXEC & current_path_info[3].st_mode)
3416
(None, utf8_decode(current_path_info[0])[0]),
3420
(None, utf8_decode(current_path_info[1])[0]),
3421
(None, current_path_info[2]),
3422
(None, new_executable))
3423
# dont descend into this unversioned path if it is
3425
if current_path_info[2] in ('directory',
3427
del current_dir_info[1][path_index]
3431
# This dir info has been handled, go to the next
3433
current_dir_info = dir_iterator.next()
3434
except StopIteration:
3435
current_dir_info = None
3437
# We have a dirblock entry for this location, but there
3438
# is no filesystem path for this. This is most likely
3439
# because a directory was removed from the disk.
3440
# We don't have to report the missing directory,
3441
# because that should have already been handled, but we
3442
# need to handle all of the files that are contained
3444
for current_entry in current_block[1]:
3445
# entry referring to file not present on disk.
3446
# advance the entry only, after processing.
3447
result = _process_entry(current_entry, None)
3448
if result is not None:
3449
if result is not uninteresting:
3452
if (block_index < len(self.state._dirblocks) and
3453
osutils.is_inside(current_root,
3454
self.state._dirblocks[block_index][0])):
3455
current_block = self.state._dirblocks[block_index]
3457
current_block = None
3460
if current_block and entry_index < len(current_block[1]):
3461
current_entry = current_block[1][entry_index]
3463
current_entry = None
3464
advance_entry = True
3466
if current_dir_info and path_index < len(current_dir_info[1]):
3467
current_path_info = current_dir_info[1][path_index]
3468
if current_path_info[2] == 'directory':
3469
if self.tree._directory_is_tree_reference(
3470
current_path_info[0].decode('utf8')):
3471
current_path_info = current_path_info[:2] + \
3472
('tree-reference',) + current_path_info[3:]
3474
current_path_info = None
3476
path_handled = False
3477
while (current_entry is not None or
3478
current_path_info is not None):
3479
if current_entry is None:
3480
# the check for path_handled when the path is advanced
3481
# will yield this path if needed.
3483
elif current_path_info is None:
3484
# no path is fine: the per entry code will handle it.
3485
result = _process_entry(current_entry, current_path_info)
3486
if result is not None:
3487
if result is not uninteresting:
3489
elif (current_entry[0][1] != current_path_info[1]
3490
or current_entry[1][self.target_index][0] in 'ar'):
3491
# The current path on disk doesn't match the dirblock
3492
# record. Either the dirblock is marked as absent, or
3493
# the file on disk is not present at all in the
3494
# dirblock. Either way, report about the dirblock
3495
# entry, and let other code handle the filesystem one.
3497
# Compare the basename for these files to determine
3499
if current_path_info[1] < current_entry[0][1]:
3500
# extra file on disk: pass for now, but only
3501
# increment the path, not the entry
3502
advance_entry = False
3504
# entry referring to file not present on disk.
3505
# advance the entry only, after processing.
3506
result = _process_entry(current_entry, None)
3507
if result is not None:
3508
if result is not uninteresting:
3510
advance_path = False
3512
result = _process_entry(current_entry, current_path_info)
3513
if result is not None:
3515
if result is not uninteresting:
3517
if advance_entry and current_entry is not None:
3519
if entry_index < len(current_block[1]):
3520
current_entry = current_block[1][entry_index]
3522
current_entry = None
3524
advance_entry = True # reset the advance flaga
3525
if advance_path and current_path_info is not None:
3526
if not path_handled:
3527
# unversioned in all regards
3528
if self.want_unversioned:
3529
new_executable = bool(
3530
stat.S_ISREG(current_path_info[3].st_mode)
3531
and stat.S_IEXEC & current_path_info[3].st_mode)
3533
relpath_unicode = utf8_decode(current_path_info[0])[0]
3534
except UnicodeDecodeError:
3535
raise errors.BadFilenameEncoding(
3536
current_path_info[0], osutils._fs_enc)
3538
(None, relpath_unicode),
3542
(None, utf8_decode(current_path_info[1])[0]),
3543
(None, current_path_info[2]),
3544
(None, new_executable))
3545
# dont descend into this unversioned path if it is
3547
if current_path_info[2] in ('directory'):
3548
del current_dir_info[1][path_index]
3550
# dont descend the disk iterator into any tree
3552
if current_path_info[2] == 'tree-reference':
3553
del current_dir_info[1][path_index]
3556
if path_index < len(current_dir_info[1]):
3557
current_path_info = current_dir_info[1][path_index]
3558
if current_path_info[2] == 'directory':
3559
if self.tree._directory_is_tree_reference(
3560
current_path_info[0].decode('utf8')):
3561
current_path_info = current_path_info[:2] + \
3562
('tree-reference',) + current_path_info[3:]
3564
current_path_info = None
3565
path_handled = False
3567
advance_path = True # reset the advance flagg.
3568
if current_block is not None:
3570
if (block_index < len(self.state._dirblocks) and
3571
osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
3572
current_block = self.state._dirblocks[block_index]
3574
current_block = None
3575
if current_dir_info is not None:
3577
current_dir_info = dir_iterator.next()
3578
except StopIteration:
3579
current_dir_info = None
3580
_process_entry = ProcessEntryPython
3583
# Try to load the compiled form if possible
3585
from bzrlib._dirstate_helpers_c import (
3586
_read_dirblocks_c as _read_dirblocks,
3587
bisect_dirblock_c as bisect_dirblock,
3588
_bisect_path_left_c as _bisect_path_left,
3589
_bisect_path_right_c as _bisect_path_right,
3590
cmp_by_dirs_c as cmp_by_dirs,
3591
ProcessEntryC as _process_entry,
3592
update_entry as update_entry,
3595
from bzrlib._dirstate_helpers_py import (
3596
_read_dirblocks_py as _read_dirblocks,
3597
bisect_dirblock_py as bisect_dirblock,
3598
_bisect_path_left_py as _bisect_path_left,
3599
_bisect_path_right_py as _bisect_path_right,
3600
cmp_by_dirs_py as cmp_by_dirs,
2348
dirname_split = dirname.split('/')
2349
cache[dirname] = dirname_split
2352
# Grab the dirname for the current dirblock
2353
cur = dirblocks[mid][0]
2355
cur_split = cache[cur]
2357
cur_split = cur.split('/')
2358
cache[cur] = cur_split
2359
if cur_split < dirname_split: lo = mid+1
2365
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):
2366
"""Convert stat values into a packed representation."""
2367
# jam 20060614 it isn't really worth removing more entries if we
2368
# are going to leave it in packed form.
2369
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
2370
# With all entries filesize is 5.9M and read time is mabye 280ms
2371
# well within the noise margin
2373
# base64.encode always adds a final newline, so strip it off
2374
return _encode(_pack('>LLLLLL'
2375
, st.st_size, int(st.st_mtime), int(st.st_ctime)
2376
, st.st_dev, st.st_ino & 0xFFFFFFFF, st.st_mode))[:-1]