1307
def _check_delta_is_valid(self, delta):
1308
return list(inventory._check_delta_unique_ids(
1309
inventory._check_delta_unique_old_paths(
1310
inventory._check_delta_unique_new_paths(
1311
inventory._check_delta_ids_match_entry(
1312
inventory._check_delta_ids_are_valid(
1313
inventory._check_delta_new_path_entry_both_or_None(delta)))))))
1315
def update_by_delta(self, delta):
1316
"""Apply an inventory delta to the dirstate for tree 0
1318
This is the workhorse for apply_inventory_delta in dirstate based
1321
:param delta: An inventory delta. See Inventory.apply_delta for
1324
self._read_dirblocks_if_needed()
1325
encode = cache_utf8.encode
1328
# Accumulate parent references (path_utf8, id), to check for parentless
1329
# items or items placed under files/links/tree-references. We get
1330
# references from every item in the delta that is not a deletion and
1331
# is not itself the root.
1333
# Added ids must not be in the dirstate already. This set holds those
1336
# This loop transforms the delta to single atomic operations that can
1337
# be executed and validated.
1338
delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1339
for old_path, new_path, file_id, inv_entry in delta:
1340
if (file_id in insertions) or (file_id in removals):
1341
self._raise_invalid(old_path or new_path, file_id,
1343
if old_path is not None:
1344
old_path = old_path.encode('utf-8')
1345
removals[file_id] = old_path
1347
new_ids.add(file_id)
1348
if new_path is not None:
1349
if inv_entry is None:
1350
self._raise_invalid(new_path, file_id,
1351
"new_path with no entry")
1352
new_path = new_path.encode('utf-8')
1353
dirname_utf8, basename = osutils.split(new_path)
1355
parents.add((dirname_utf8, inv_entry.parent_id))
1356
key = (dirname_utf8, basename, file_id)
1357
minikind = DirState._kind_to_minikind[inv_entry.kind]
1359
fingerprint = inv_entry.reference_revision or ''
1362
insertions[file_id] = (key, minikind, inv_entry.executable,
1363
fingerprint, new_path)
1364
# Transform moves into delete+add pairs
1365
if None not in (old_path, new_path):
1366
for child in self._iter_child_entries(0, old_path):
1367
if child[0][2] in insertions or child[0][2] in removals:
1369
child_dirname = child[0][0]
1370
child_basename = child[0][1]
1371
minikind = child[1][0][0]
1372
fingerprint = child[1][0][4]
1373
executable = child[1][0][3]
1374
old_child_path = osutils.pathjoin(child_dirname,
1376
removals[child[0][2]] = old_child_path
1377
child_suffix = child_dirname[len(old_path):]
1378
new_child_dirname = (new_path + child_suffix)
1379
key = (new_child_dirname, child_basename, child[0][2])
1380
new_child_path = osutils.pathjoin(new_child_dirname,
1382
insertions[child[0][2]] = (key, minikind, executable,
1383
fingerprint, new_child_path)
1384
self._check_delta_ids_absent(new_ids, delta, 0)
1386
self._apply_removals(removals.iteritems())
1387
self._apply_insertions(insertions.values())
1389
self._after_delta_check_parents(parents, 0)
1390
except errors.BzrError, e:
1391
self._changes_aborted = True
1392
if 'integrity error' not in str(e):
1394
# _get_entry raises BzrError when a request is inconsistent; we
1395
# want such errors to be shown as InconsistentDelta - and that
1396
# fits the behaviour we trigger.
1397
raise errors.InconsistentDeltaDelta(delta,
1398
"error from _get_entry. %s" % (e,))
1400
def _apply_removals(self, removals):
1401
for file_id, path in sorted(removals, reverse=True,
1402
key=operator.itemgetter(1)):
1403
dirname, basename = osutils.split(path)
1404
block_i, entry_i, d_present, f_present = \
1405
self._get_block_entry_index(dirname, basename, 0)
1407
entry = self._dirblocks[block_i][1][entry_i]
1409
self._raise_invalid(path, file_id,
1410
"Wrong path for old path.")
1411
if not f_present or entry[1][0][0] in 'ar':
1412
self._raise_invalid(path, file_id,
1413
"Wrong path for old path.")
1414
if file_id != entry[0][2]:
1415
self._raise_invalid(path, file_id,
1416
"Attempt to remove path has wrong id - found %r."
1418
self._make_absent(entry)
1419
# See if we have a malformed delta: deleting a directory must not
1420
# leave crud behind. This increases the number of bisects needed
1421
# substantially, but deletion or renames of large numbers of paths
1422
# is rare enough it shouldn't be an issue (famous last words?) RBC
1424
block_i, entry_i, d_present, f_present = \
1425
self._get_block_entry_index(path, '', 0)
1427
# The dir block is still present in the dirstate; this could
1428
# be due to it being in a parent tree, or a corrupt delta.
1429
for child_entry in self._dirblocks[block_i][1]:
1430
if child_entry[1][0][0] not in ('r', 'a'):
1431
self._raise_invalid(path, entry[0][2],
1432
"The file id was deleted but its children were "
1435
def _apply_insertions(self, adds):
1437
for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1438
self.update_minimal(key, minikind, executable, fingerprint,
1439
path_utf8=path_utf8)
1440
except errors.NotVersionedError:
1441
self._raise_invalid(path_utf8.decode('utf8'), key[2],
1444
def update_basis_by_delta(self, delta, new_revid):
1445
"""Update the parents of this tree after a commit.
1447
This gives the tree one parent, with revision id new_revid. The
1448
inventory delta is applied to the current basis tree to generate the
1449
inventory for the parent new_revid, and all other parent trees are
1452
Note that an exception during the operation of this method will leave
1453
the dirstate in a corrupt state where it should not be saved.
1455
:param new_revid: The new revision id for the trees parent.
1456
:param delta: An inventory delta (see apply_inventory_delta) describing
1457
the changes from the current left most parent revision to new_revid.
1459
self._read_dirblocks_if_needed()
1460
self._discard_merge_parents()
1461
if self._ghosts != []:
1462
raise NotImplementedError(self.update_basis_by_delta)
1463
if len(self._parents) == 0:
1464
# setup a blank tree, the most simple way.
1465
empty_parent = DirState.NULL_PARENT_DETAILS
1466
for entry in self._iter_entries():
1467
entry[1].append(empty_parent)
1468
self._parents.append(new_revid)
1470
self._parents[0] = new_revid
1472
delta = sorted(self._check_delta_is_valid(delta), reverse=True)
1476
# The paths this function accepts are unicode and must be encoded as we
1478
encode = cache_utf8.encode
1479
inv_to_entry = self._inv_entry_to_details
1480
# delta is now (deletes, changes), (adds) in reverse lexographical
1482
# deletes in reverse lexographic order are safe to process in situ.
1483
# renames are not, as a rename from any path could go to a path
1484
# lexographically lower, so we transform renames into delete, add pairs,
1485
# expanding them recursively as needed.
1486
# At the same time, to reduce interface friction we convert the input
1487
# inventory entries to dirstate.
1488
root_only = ('', '')
1489
# Accumulate parent references (path_utf8, id), to check for parentless
1490
# items or items placed under files/links/tree-references. We get
1491
# references from every item in the delta that is not a deletion and
1492
# is not itself the root.
1494
# Added ids must not be in the dirstate already. This set holds those
1497
for old_path, new_path, file_id, inv_entry in delta:
1498
if inv_entry is not None and file_id != inv_entry.file_id:
1499
self._raise_invalid(new_path, file_id,
1500
"mismatched entry file_id %r" % inv_entry)
1501
if new_path is None:
1502
new_path_utf8 = None
1504
if inv_entry is None:
1505
self._raise_invalid(new_path, file_id,
1506
"new_path with no entry")
1507
new_path_utf8 = encode(new_path)
1508
# note the parent for validation
1509
dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
1511
parents.add((dirname_utf8, inv_entry.parent_id))
1512
if old_path is None:
1513
old_path_utf8 = None
1515
old_path_utf8 = encode(old_path)
1516
if old_path is None:
1517
adds.append((None, new_path_utf8, file_id,
1518
inv_to_entry(inv_entry), True))
1519
new_ids.add(file_id)
1520
elif new_path is None:
1521
deletes.append((old_path_utf8, None, file_id, None, True))
1522
elif (old_path, new_path) == root_only:
1523
# change things in-place
1524
# Note: the case of a parent directory changing its file_id
1525
# tends to break optimizations here, because officially
1526
# the file has actually been moved, it just happens to
1527
# end up at the same path. If we can figure out how to
1528
# handle that case, we can avoid a lot of add+delete
1529
# pairs for objects that stay put.
1530
# elif old_path == new_path:
1531
changes.append((old_path_utf8, new_path_utf8, file_id,
1532
inv_to_entry(inv_entry)))
1535
# Because renames must preserve their children we must have
1536
# processed all relocations and removes before hand. The sort
1537
# order ensures we've examined the child paths, but we also
1538
# have to execute the removals, or the split to an add/delete
1539
# pair will result in the deleted item being reinserted, or
1540
# renamed items being reinserted twice - and possibly at the
1541
# wrong place. Splitting into a delete/add pair also simplifies
1542
# the handling of entries with ('f', ...), ('r' ...) because
1543
# the target of the 'r' is old_path here, and we add that to
1544
# deletes, meaning that the add handler does not need to check
1545
# for 'r' items on every pass.
1546
self._update_basis_apply_deletes(deletes)
1548
# Split into an add/delete pair recursively.
1549
adds.append((old_path_utf8, new_path_utf8, file_id,
1550
inv_to_entry(inv_entry), False))
1551
# Expunge deletes that we've seen so that deleted/renamed
1552
# children of a rename directory are handled correctly.
1553
new_deletes = reversed(list(
1554
self._iter_child_entries(1, old_path_utf8)))
1555
# Remove the current contents of the tree at orig_path, and
1556
# reinsert at the correct new path.
1557
for entry in new_deletes:
1558
child_dirname, child_basename, child_file_id = entry[0]
1560
source_path = child_dirname + '/' + child_basename
1562
source_path = child_basename
1565
new_path_utf8 + source_path[len(old_path_utf8):]
1567
if old_path_utf8 == '':
1568
raise AssertionError("cannot rename directory to"
1570
target_path = source_path[len(old_path_utf8) + 1:]
1571
adds.append((None, target_path, entry[0][2], entry[1][1], False))
1573
(source_path, target_path, entry[0][2], None, False))
1575
(old_path_utf8, new_path_utf8, file_id, None, False))
1577
self._check_delta_ids_absent(new_ids, delta, 1)
1579
# Finish expunging deletes/first half of renames.
1580
self._update_basis_apply_deletes(deletes)
1581
# Reinstate second half of renames and new paths.
1582
self._update_basis_apply_adds(adds)
1583
# Apply in-situ changes.
1584
self._update_basis_apply_changes(changes)
1586
self._after_delta_check_parents(parents, 1)
1587
except errors.BzrError, e:
1588
self._changes_aborted = True
1589
if 'integrity error' not in str(e):
1591
# _get_entry raises BzrError when a request is inconsistent; we
1592
# want such errors to be shown as InconsistentDelta - and that
1593
# fits the behaviour we trigger.
1594
raise errors.InconsistentDeltaDelta(delta,
1595
"error from _get_entry. %s" % (e,))
1597
self._mark_modified(header_modified=True)
1598
self._id_index = None
1601
def _check_delta_ids_absent(self, new_ids, delta, tree_index):
1602
"""Check that none of the file_ids in new_ids are present in a tree."""
1605
id_index = self._get_id_index()
1606
for file_id in new_ids:
1607
for key in id_index.get(file_id, ()):
1608
block_i, entry_i, d_present, f_present = \
1609
self._get_block_entry_index(key[0], key[1], tree_index)
1611
# In a different tree
1613
entry = self._dirblocks[block_i][1][entry_i]
1614
if entry[0][2] != file_id:
1615
# Different file_id, so not what we want.
1617
self._raise_invalid(("%s/%s" % key[0:2]).decode('utf8'), file_id,
1618
"This file_id is new in the delta but already present in "
1621
def _raise_invalid(self, path, file_id, reason):
1622
self._changes_aborted = True
1623
raise errors.InconsistentDelta(path, file_id, reason)
1625
def _update_basis_apply_adds(self, adds):
1626
"""Apply a sequence of adds to tree 1 during update_basis_by_delta.
1628
They may be adds, or renames that have been split into add/delete
1631
:param adds: A sequence of adds. Each add is a tuple:
1632
(None, new_path_utf8, file_id, (entry_details), real_add). real_add
1633
is False when the add is the second half of a remove-and-reinsert
1634
pair created to handle renames and deletes.
1636
# Adds are accumulated partly from renames, so can be in any input
1638
# TODO: we may want to sort in dirblocks order. That way each entry
1639
# will end up in the same directory, allowing the _get_entry
1640
# fast-path for looking up 2 items in the same dir work.
1641
adds.sort(key=lambda x: x[1])
1642
# adds is now in lexographic order, which places all parents before
1643
# their children, so we can process it linearly.
1645
st = static_tuple.StaticTuple
1646
for old_path, new_path, file_id, new_details, real_add in adds:
1647
dirname, basename = osutils.split(new_path)
1648
entry_key = st(dirname, basename, file_id)
1649
block_index, present = self._find_block_index_from_key(entry_key)
1651
# The block where we want to put the file is not present.
1652
# However, it might have just been an empty directory. Look for
1653
# the parent in the basis-so-far before throwing an error.
1654
parent_dir, parent_base = osutils.split(dirname)
1655
parent_block_idx, parent_entry_idx, _, parent_present = \
1656
self._get_block_entry_index(parent_dir, parent_base, 1)
1657
if not parent_present:
1658
self._raise_invalid(new_path, file_id,
1659
"Unable to find block for this record."
1660
" Was the parent added?")
1661
self._ensure_block(parent_block_idx, parent_entry_idx, dirname)
1663
block = self._dirblocks[block_index][1]
1664
entry_index, present = self._find_entry_index(entry_key, block)
1666
if old_path is not None:
1667
self._raise_invalid(new_path, file_id,
1668
'considered a real add but still had old_path at %s'
1671
entry = block[entry_index]
1672
basis_kind = entry[1][1][0]
1673
if basis_kind == 'a':
1674
entry[1][1] = new_details
1675
elif basis_kind == 'r':
1676
raise NotImplementedError()
1678
self._raise_invalid(new_path, file_id,
1679
"An entry was marked as a new add"
1680
" but the basis target already existed")
1682
# The exact key was not found in the block. However, we need to
1683
# check if there is a key next to us that would have matched.
1684
# We only need to check 2 locations, because there are only 2
1686
for maybe_index in range(entry_index-1, entry_index+1):
1687
if maybe_index < 0 or maybe_index >= len(block):
1689
maybe_entry = block[maybe_index]
1690
if maybe_entry[0][:2] != (dirname, basename):
1691
# Just a random neighbor
1693
if maybe_entry[0][2] == file_id:
1694
raise AssertionError(
1695
'_find_entry_index didnt find a key match'
1696
' but walking the data did, for %s'
1698
basis_kind = maybe_entry[1][1][0]
1699
if basis_kind not in 'ar':
1700
self._raise_invalid(new_path, file_id,
1701
"we have an add record for path, but the path"
1702
" is already present with another file_id %s"
1703
% (maybe_entry[0][2],))
1705
entry = (entry_key, [DirState.NULL_PARENT_DETAILS,
1707
block.insert(entry_index, entry)
1709
active_kind = entry[1][0][0]
1710
if active_kind == 'a':
1711
# The active record shows up as absent, this could be genuine,
1712
# or it could be present at some other location. We need to
1714
id_index = self._get_id_index()
1715
# The id_index may not be perfectly accurate for tree1, because
1716
# we haven't been keeping it updated. However, it should be
1717
# fine for tree0, and that gives us enough info for what we
1719
keys = id_index.get(file_id, ())
1721
block_i, entry_i, d_present, f_present = \
1722
self._get_block_entry_index(key[0], key[1], 0)
1725
active_entry = self._dirblocks[block_i][1][entry_i]
1726
if (active_entry[0][2] != file_id):
1727
# Some other file is at this path, we don't need to
1730
real_active_kind = active_entry[1][0][0]
1731
if real_active_kind in 'ar':
1732
# We found a record, which was not *this* record,
1733
# which matches the file_id, but is not actually
1734
# present. Something seems *really* wrong.
1735
self._raise_invalid(new_path, file_id,
1736
"We found a tree0 entry that doesnt make sense")
1737
# Now, we've found a tree0 entry which matches the file_id
1738
# but is at a different location. So update them to be
1740
active_dir, active_name = active_entry[0][:2]
1742
active_path = active_dir + '/' + active_name
1744
active_path = active_name
1745
active_entry[1][1] = st('r', new_path, 0, False, '')
1746
entry[1][0] = st('r', active_path, 0, False, '')
1747
elif active_kind == 'r':
1748
raise NotImplementedError()
1750
new_kind = new_details[0]
1752
self._ensure_block(block_index, entry_index, new_path)
1754
def _update_basis_apply_changes(self, changes):
1755
"""Apply a sequence of changes to tree 1 during update_basis_by_delta.
1757
:param adds: A sequence of changes. Each change is a tuple:
1758
(path_utf8, path_utf8, file_id, (entry_details))
1761
for old_path, new_path, file_id, new_details in changes:
1762
# the entry for this file_id must be in tree 0.
1763
entry = self._get_entry(1, file_id, new_path)
1764
if entry[0] is None or entry[1][1][0] in 'ar':
1765
self._raise_invalid(new_path, file_id,
1766
'changed entry considered not present')
1767
entry[1][1] = new_details
1769
def _update_basis_apply_deletes(self, deletes):
1770
"""Apply a sequence of deletes to tree 1 during update_basis_by_delta.
1772
They may be deletes, or renames that have been split into add/delete
1775
:param deletes: A sequence of deletes. Each delete is a tuple:
1776
(old_path_utf8, new_path_utf8, file_id, None, real_delete).
1777
real_delete is True when the desired outcome is an actual deletion
1778
rather than the rename handling logic temporarily deleting a path
1779
during the replacement of a parent.
1781
null = DirState.NULL_PARENT_DETAILS
1782
for old_path, new_path, file_id, _, real_delete in deletes:
1783
if real_delete != (new_path is None):
1784
self._raise_invalid(old_path, file_id, "bad delete delta")
1785
# the entry for this file_id must be in tree 1.
1786
dirname, basename = osutils.split(old_path)
1787
block_index, entry_index, dir_present, file_present = \
1788
self._get_block_entry_index(dirname, basename, 1)
1789
if not file_present:
1790
self._raise_invalid(old_path, file_id,
1791
'basis tree does not contain removed entry')
1792
entry = self._dirblocks[block_index][1][entry_index]
1793
# The state of the entry in the 'active' WT
1794
active_kind = entry[1][0][0]
1795
if entry[0][2] != file_id:
1796
self._raise_invalid(old_path, file_id,
1797
'mismatched file_id in tree 1')
1799
old_kind = entry[1][1][0]
1800
if active_kind in 'ar':
1801
# The active tree doesn't have this file_id.
1802
# The basis tree is changing this record. If this is a
1803
# rename, then we don't want the record here at all
1804
# anymore. If it is just an in-place change, we want the
1805
# record here, but we'll add it if we need to. So we just
1807
if active_kind == 'r':
1808
active_path = entry[1][0][1]
1809
active_entry = self._get_entry(0, file_id, active_path)
1810
if active_entry[1][1][0] != 'r':
1811
self._raise_invalid(old_path, file_id,
1812
"Dirstate did not have matching rename entries")
1813
elif active_entry[1][0][0] in 'ar':
1814
self._raise_invalid(old_path, file_id,
1815
"Dirstate had a rename pointing at an inactive"
1817
active_entry[1][1] = null
1818
del self._dirblocks[block_index][1][entry_index]
1820
# This was a directory, and the active tree says it
1821
# doesn't exist, and now the basis tree says it doesn't
1822
# exist. Remove its dirblock if present
1824
present) = self._find_block_index_from_key(
1827
dir_block = self._dirblocks[dir_block_index][1]
1829
# This entry is empty, go ahead and just remove it
1830
del self._dirblocks[dir_block_index]
1832
# There is still an active record, so just mark this
1835
block_i, entry_i, d_present, f_present = \
1836
self._get_block_entry_index(old_path, '', 1)
1838
dir_block = self._dirblocks[block_i][1]
1839
for child_entry in dir_block:
1840
child_basis_kind = child_entry[1][1][0]
1841
if child_basis_kind not in 'ar':
1842
self._raise_invalid(old_path, file_id,
1843
"The file id was deleted but its children were "
1846
def _after_delta_check_parents(self, parents, index):
1847
"""Check that parents required by the delta are all intact.
1849
:param parents: An iterable of (path_utf8, file_id) tuples which are
1850
required to be present in tree 'index' at path_utf8 with id file_id
1852
:param index: The column in the dirstate to check for parents in.
1854
for dirname_utf8, file_id in parents:
1855
# Get the entry - the ensures that file_id, dirname_utf8 exists and
1856
# has the right file id.
1857
entry = self._get_entry(index, file_id, dirname_utf8)
1858
if entry[1] is None:
1859
self._raise_invalid(dirname_utf8.decode('utf8'),
1860
file_id, "This parent is not present.")
1861
# Parents of things must be directories
1862
if entry[1][index][0] != 'd':
1863
self._raise_invalid(dirname_utf8.decode('utf8'),
1864
file_id, "This parent is not a directory.")
1866
def _observed_sha1(self, entry, sha1, stat_value,
1867
_stat_to_minikind=_stat_to_minikind):
1868
"""Note the sha1 of a file.
1870
:param entry: The entry the sha1 is for.
1871
:param sha1: The observed sha1.
1872
:param stat_value: The os.lstat for the file.
1875
minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1880
if self._cutoff_time is None:
1881
self._sha_cutoff_time()
1882
if (stat_value.st_mtime < self._cutoff_time
1883
and stat_value.st_ctime < self._cutoff_time):
1884
entry[1][0] = ('f', sha1, stat_value.st_size, entry[1][0][3],
1885
pack_stat(stat_value))
1886
self._mark_modified([entry])
1888
def _sha_cutoff_time(self):
1889
"""Return cutoff time.
1891
Files modified more recently than this time are at risk of being
1892
undetectably modified and so can't be cached.
1894
# Cache the cutoff time as long as we hold a lock.
1895
# time.time() isn't super expensive (approx 3.38us), but
1896
# when you call it 50,000 times it adds up.
1897
# For comparison, os.lstat() costs 7.2us if it is hot.
1898
self._cutoff_time = int(time.time()) - 3
1899
return self._cutoff_time
1901
def _lstat(self, abspath, entry):
1902
"""Return the os.lstat value for this path."""
1903
return os.lstat(abspath)
1905
def _sha1_file_and_mutter(self, abspath):
1906
# when -Dhashcache is turned on, this is monkey-patched in to log
1908
trace.mutter("dirstate sha1 " + abspath)
1909
return self._sha1_provider.sha1(abspath)
1911
def _is_executable(self, mode, old_executable):
1912
"""Is this file executable?"""
1913
return bool(S_IEXEC & mode)
1915
def _is_executable_win32(self, mode, old_executable):
1916
"""On win32 the executable bit is stored in the dirstate."""
1917
return old_executable
1919
if sys.platform == 'win32':
1920
_is_executable = _is_executable_win32
1922
def _read_link(self, abspath, old_link):
1923
"""Read the target of a symlink"""
1924
# TODO: jam 200700301 On Win32, this could just return the value
1925
# already in memory. However, this really needs to be done at a
1926
# higher level, because there either won't be anything on disk,
1927
# or the thing on disk will be a file.
1928
fs_encoding = osutils._fs_enc
1929
if isinstance(abspath, unicode):
1930
# abspath is defined as the path to pass to lstat. readlink is
1931
# buggy in python < 2.6 (it doesn't encode unicode path into FS
1932
# encoding), so we need to encode ourselves knowing that unicode
1933
# paths are produced by UnicodeDirReader on purpose.
1934
abspath = abspath.encode(fs_encoding)
1935
target = os.readlink(abspath)
1936
if fs_encoding not in ('utf-8', 'ascii'):
1937
# Change encoding if needed
1938
target = target.decode(fs_encoding).encode('UTF-8')
1007
1941
def get_ghosts(self):
1008
1942
"""Return a list of the parent tree revision ids that are ghosts."""
1009
1943
self._read_header_if_needed()
1940
3125
if not present:
1941
3126
self._dirblocks.insert(block_index, (subdir_key[0], []))
1943
self._dirblock_state = DirState.IN_MEMORY_MODIFIED
3128
self._mark_modified()
3130
def _maybe_remove_row(self, block, index, id_index):
3131
"""Remove index if it is absent or relocated across the row.
3133
id_index is updated accordingly.
3134
:return: True if we removed the row, False otherwise
3136
present_in_row = False
3137
entry = block[index]
3138
for column in entry[1]:
3139
if column[0] not in 'ar':
3140
present_in_row = True
3142
if not present_in_row:
3144
self._remove_from_id_index(id_index, entry[0])
3148
def _validate(self):
3149
"""Check that invariants on the dirblock are correct.
3151
This can be useful in debugging; it shouldn't be necessary in
3154
This must be called with a lock held.
3156
# NOTE: This must always raise AssertionError not just assert,
3157
# otherwise it may not behave properly under python -O
3159
# TODO: All entries must have some content that's not 'a' or 'r',
3160
# otherwise it could just be removed.
3162
# TODO: All relocations must point directly to a real entry.
3164
# TODO: No repeated keys.
3167
from pprint import pformat
3168
self._read_dirblocks_if_needed()
3169
if len(self._dirblocks) > 0:
3170
if not self._dirblocks[0][0] == '':
3171
raise AssertionError(
3172
"dirblocks don't start with root block:\n" + \
3173
pformat(self._dirblocks))
3174
if len(self._dirblocks) > 1:
3175
if not self._dirblocks[1][0] == '':
3176
raise AssertionError(
3177
"dirblocks missing root directory:\n" + \
3178
pformat(self._dirblocks))
3179
# the dirblocks are sorted by their path components, name, and dir id
3180
dir_names = [d[0].split('/')
3181
for d in self._dirblocks[1:]]
3182
if dir_names != sorted(dir_names):
3183
raise AssertionError(
3184
"dir names are not in sorted order:\n" + \
3185
pformat(self._dirblocks) + \
3188
for dirblock in self._dirblocks:
3189
# within each dirblock, the entries are sorted by filename and
3191
for entry in dirblock[1]:
3192
if dirblock[0] != entry[0][0]:
3193
raise AssertionError(
3195
"doesn't match directory name in\n%r" %
3196
(entry, pformat(dirblock)))
3197
if dirblock[1] != sorted(dirblock[1]):
3198
raise AssertionError(
3199
"dirblock for %r is not sorted:\n%s" % \
3200
(dirblock[0], pformat(dirblock)))
3202
def check_valid_parent():
3203
"""Check that the current entry has a valid parent.
3205
This makes sure that the parent has a record,
3206
and that the parent isn't marked as "absent" in the
3207
current tree. (It is invalid to have a non-absent file in an absent
3210
if entry[0][0:2] == ('', ''):
3211
# There should be no parent for the root row
3213
parent_entry = self._get_entry(tree_index, path_utf8=entry[0][0])
3214
if parent_entry == (None, None):
3215
raise AssertionError(
3216
"no parent entry for: %s in tree %s"
3217
% (this_path, tree_index))
3218
if parent_entry[1][tree_index][0] != 'd':
3219
raise AssertionError(
3220
"Parent entry for %s is not marked as a valid"
3221
" directory. %s" % (this_path, parent_entry,))
3223
# For each file id, for each tree: either
3224
# the file id is not present at all; all rows with that id in the
3225
# key have it marked as 'absent'
3226
# OR the file id is present under exactly one name; any other entries
3227
# that mention that id point to the correct name.
3229
# We check this with a dict per tree pointing either to the present
3230
# name, or None if absent.
3231
tree_count = self._num_present_parents() + 1
3232
id_path_maps = [dict() for i in range(tree_count)]
3233
# Make sure that all renamed entries point to the correct location.
3234
for entry in self._iter_entries():
3235
file_id = entry[0][2]
3236
this_path = osutils.pathjoin(entry[0][0], entry[0][1])
3237
if len(entry[1]) != tree_count:
3238
raise AssertionError(
3239
"wrong number of entry details for row\n%s" \
3240
",\nexpected %d" % \
3241
(pformat(entry), tree_count))
3242
absent_positions = 0
3243
for tree_index, tree_state in enumerate(entry[1]):
3244
this_tree_map = id_path_maps[tree_index]
3245
minikind = tree_state[0]
3246
if minikind in 'ar':
3247
absent_positions += 1
3248
# have we seen this id before in this column?
3249
if file_id in this_tree_map:
3250
previous_path, previous_loc = this_tree_map[file_id]
3251
# any later mention of this file must be consistent with
3252
# what was said before
3254
if previous_path is not None:
3255
raise AssertionError(
3256
"file %s is absent in row %r but also present " \
3258
(file_id, entry, previous_path))
3259
elif minikind == 'r':
3260
target_location = tree_state[1]
3261
if previous_path != target_location:
3262
raise AssertionError(
3263
"file %s relocation in row %r but also at %r" \
3264
% (file_id, entry, previous_path))
3266
# a file, directory, etc - may have been previously
3267
# pointed to by a relocation, which must point here
3268
if previous_path != this_path:
3269
raise AssertionError(
3270
"entry %r inconsistent with previous path %r "
3272
(entry, previous_path, previous_loc))
3273
check_valid_parent()
3276
# absent; should not occur anywhere else
3277
this_tree_map[file_id] = None, this_path
3278
elif minikind == 'r':
3279
# relocation, must occur at expected location
3280
this_tree_map[file_id] = tree_state[1], this_path
3282
this_tree_map[file_id] = this_path, this_path
3283
check_valid_parent()
3284
if absent_positions == tree_count:
3285
raise AssertionError(
3286
"entry %r has no data for any tree." % (entry,))
3287
if self._id_index is not None:
3288
for file_id, entry_keys in self._id_index.iteritems():
3289
for entry_key in entry_keys:
3290
# Check that the entry in the map is pointing to the same
3292
if entry_key[2] != file_id:
3293
raise AssertionError(
3294
'file_id %r did not match entry key %s'
3295
% (file_id, entry_key))
3296
# And that from this entry key, we can look up the original
3298
block_index, present = self._find_block_index_from_key(entry_key)
3300
raise AssertionError('missing block for entry key: %r', entry_key)
3301
entry_index, present = self._find_entry_index(entry_key, self._dirblocks[block_index][1])
3303
raise AssertionError('missing entry for key: %r', entry_key)
3304
if len(entry_keys) != len(set(entry_keys)):
3305
raise AssertionError(
3306
'id_index contained non-unique data for %s'
1946
3309
def _wipe_state(self):
1947
3310
"""Forget all state information about the dirstate."""
1948
3311
self._header_state = DirState.NOT_IN_MEMORY
1949
3312
self._dirblock_state = DirState.NOT_IN_MEMORY
3313
self._changes_aborted = False
1950
3314
self._parents = []
1951
3315
self._ghosts = []
1952
3316
self._dirblocks = []
3317
self._id_index = None
3318
self._packed_stat_index = None
3319
self._end_of_header = None
3320
self._cutoff_time = None
3321
self._split_path_cache = {}
1954
3323
def lock_read(self):
1955
"""Acquire a read lock on the dirstate"""
3324
"""Acquire a read lock on the dirstate."""
1956
3325
if self._lock_token is not None:
1957
3326
raise errors.LockContention(self._lock_token)
3327
# TODO: jam 20070301 Rather than wiping completely, if the blocks are
3328
# already in memory, we could read just the header and check for
3329
# any modification. If not modified, we can just leave things
1958
3331
self._lock_token = lock.ReadLock(self._filename)
3332
self._lock_state = 'r'
1959
3333
self._state_file = self._lock_token.f
1960
3334
self._wipe_state()
1962
3336
def lock_write(self):
1963
"""Acquire a write lock on the dirstate"""
3337
"""Acquire a write lock on the dirstate."""
1964
3338
if self._lock_token is not None:
1965
3339
raise errors.LockContention(self._lock_token)
3340
# TODO: jam 20070301 Rather than wiping completely, if the blocks are
3341
# already in memory, we could read just the header and check for
3342
# any modification. If not modified, we can just leave things
1966
3344
self._lock_token = lock.WriteLock(self._filename)
3345
self._lock_state = 'w'
1967
3346
self._state_file = self._lock_token.f
1968
3347
self._wipe_state()
1970
3349
def unlock(self):
1971
"""Drop any locks held on the dirstate"""
3350
"""Drop any locks held on the dirstate."""
1972
3351
if self._lock_token is None:
1973
3352
raise errors.LockNotHeld(self)
3353
# TODO: jam 20070301 Rather than wiping completely, if the blocks are
3354
# already in memory, we could read just the header and check for
3355
# any modification. If not modified, we can just leave things
1974
3357
self._state_file = None
3358
self._lock_state = None
1975
3359
self._lock_token.unlock()
1976
3360
self._lock_token = None
1977
3361
self._split_path_cache = {}
1979
3363
def _requires_lock(self):
1980
"""Checks that a lock is currently held by someone on the dirstate"""
3364
"""Check that a lock is currently held by someone on the dirstate."""
1981
3365
if not self._lock_token:
1982
3366
raise errors.ObjectNotLocked(self)
1985
def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):
1986
"""Return the index where to insert dirname into the dirblocks.
1988
The return value idx is such that all directories blocks in dirblock[:idx]
1989
have names < dirname, and all blocks in dirblock[idx:] have names >=
1992
Optional args lo (default 0) and hi (default len(dirblocks)) bound the
1993
slice of a to be searched.
3369
def py_update_entry(state, entry, abspath, stat_value,
3370
_stat_to_minikind=DirState._stat_to_minikind):
3371
"""Update the entry based on what is actually on disk.
3373
This function only calculates the sha if it needs to - if the entry is
3374
uncachable, or clearly different to the first parent's entry, no sha
3375
is calculated, and None is returned.
3377
:param state: The dirstate this entry is in.
3378
:param entry: This is the dirblock entry for the file in question.
3379
:param abspath: The path on disk for this file.
3380
:param stat_value: The stat value done on the path.
3381
:return: None, or The sha1 hexdigest of the file (40 bytes) or link
3382
target of a symlink.
1998
dirname_split = cache[dirname]
3385
minikind = _stat_to_minikind[stat_value.st_mode & 0170000]
1999
3386
except KeyError:
2000
dirname_split = dirname.split('/')
2001
cache[dirname] = dirname_split
2004
# Grab the dirname for the current dirblock
2005
cur = dirblocks[mid][0]
3389
packed_stat = pack_stat(stat_value)
3390
(saved_minikind, saved_link_or_sha1, saved_file_size,
3391
saved_executable, saved_packed_stat) = entry[1][0]
3393
if minikind == 'd' and saved_minikind == 't':
3395
if (minikind == saved_minikind
3396
and packed_stat == saved_packed_stat):
3397
# The stat hasn't changed since we saved, so we can re-use the
3402
# size should also be in packed_stat
3403
if saved_file_size == stat_value.st_size:
3404
return saved_link_or_sha1
3406
# If we have gotten this far, that means that we need to actually
3407
# process this entry.
3411
executable = state._is_executable(stat_value.st_mode,
3413
if state._cutoff_time is None:
3414
state._sha_cutoff_time()
3415
if (stat_value.st_mtime < state._cutoff_time
3416
and stat_value.st_ctime < state._cutoff_time
3417
and len(entry[1]) > 1
3418
and entry[1][1][0] != 'a'):
3419
# Could check for size changes for further optimised
3420
# avoidance of sha1's. However the most prominent case of
3421
# over-shaing is during initial add, which this catches.
3422
# Besides, if content filtering happens, size and sha
3423
# are calculated at the same time, so checking just the size
3424
# gains nothing w.r.t. performance.
3425
link_or_sha1 = state._sha1_file(abspath)
3426
entry[1][0] = ('f', link_or_sha1, stat_value.st_size,
3427
executable, packed_stat)
3429
entry[1][0] = ('f', '', stat_value.st_size,
3430
executable, DirState.NULLSTAT)
3431
worth_saving = False
3432
elif minikind == 'd':
3434
entry[1][0] = ('d', '', 0, False, packed_stat)
3435
if saved_minikind != 'd':
3436
# This changed from something into a directory. Make sure we
3437
# have a directory block for it. This doesn't happen very
3438
# often, so this doesn't have to be super fast.
3439
block_index, entry_index, dir_present, file_present = \
3440
state._get_block_entry_index(entry[0][0], entry[0][1], 0)
3441
state._ensure_block(block_index, entry_index,
3442
osutils.pathjoin(entry[0][0], entry[0][1]))
3444
worth_saving = False
3445
elif minikind == 'l':
3446
if saved_minikind == 'l':
3447
worth_saving = False
3448
link_or_sha1 = state._read_link(abspath, saved_link_or_sha1)
3449
if state._cutoff_time is None:
3450
state._sha_cutoff_time()
3451
if (stat_value.st_mtime < state._cutoff_time
3452
and stat_value.st_ctime < state._cutoff_time):
3453
entry[1][0] = ('l', link_or_sha1, stat_value.st_size,
3456
entry[1][0] = ('l', '', stat_value.st_size,
3457
False, DirState.NULLSTAT)
3459
state._mark_modified([entry])
3463
class ProcessEntryPython(object):
3465
__slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
3466
"last_source_parent", "last_target_parent", "include_unchanged",
3467
"partial", "use_filesystem_for_exec", "utf8_decode",
3468
"searched_specific_files", "search_specific_files",
3469
"searched_exact_paths", "search_specific_file_parents", "seen_ids",
3470
"state", "source_index", "target_index", "want_unversioned", "tree"]
3472
def __init__(self, include_unchanged, use_filesystem_for_exec,
3473
search_specific_files, state, source_index, target_index,
3474
want_unversioned, tree):
3475
self.old_dirname_to_file_id = {}
3476
self.new_dirname_to_file_id = {}
3477
# Are we doing a partial iter_changes?
3478
self.partial = search_specific_files != set([''])
3479
# Using a list so that we can access the values and change them in
3480
# nested scope. Each one is [path, file_id, entry]
3481
self.last_source_parent = [None, None]
3482
self.last_target_parent = [None, None]
3483
self.include_unchanged = include_unchanged
3484
self.use_filesystem_for_exec = use_filesystem_for_exec
3485
self.utf8_decode = cache_utf8._utf8_decode
3486
# for all search_indexs in each path at or under each element of
3487
# search_specific_files, if the detail is relocated: add the id, and
3488
# add the relocated path as one to search if its not searched already.
3489
# If the detail is not relocated, add the id.
3490
self.searched_specific_files = set()
3491
# When we search exact paths without expanding downwards, we record
3493
self.searched_exact_paths = set()
3494
self.search_specific_files = search_specific_files
3495
# The parents up to the root of the paths we are searching.
3496
# After all normal paths are returned, these specific items are returned.
3497
self.search_specific_file_parents = set()
3498
# The ids we've sent out in the delta.
3499
self.seen_ids = set()
3501
self.source_index = source_index
3502
self.target_index = target_index
3503
if target_index != 0:
3504
# A lot of code in here depends on target_index == 0
3505
raise errors.BzrError('unsupported target index')
3506
self.want_unversioned = want_unversioned
3509
def _process_entry(self, entry, path_info, pathjoin=osutils.pathjoin):
3510
"""Compare an entry and real disk to generate delta information.
3512
:param path_info: top_relpath, basename, kind, lstat, abspath for
3513
the path of entry. If None, then the path is considered absent in
3514
the target (Perhaps we should pass in a concrete entry for this ?)
3515
Basename is returned as a utf8 string because we expect this
3516
tuple will be ignored, and don't want to take the time to
3518
:return: (iter_changes_result, changed). If the entry has not been
3519
handled then changed is None. Otherwise it is False if no content
3520
or metadata changes have occurred, and True if any content or
3521
metadata change has occurred. If self.include_unchanged is True then
3522
if changed is not None, iter_changes_result will always be a result
3523
tuple. Otherwise, iter_changes_result is None unless changed is
3526
if self.source_index is None:
3527
source_details = DirState.NULL_PARENT_DETAILS
3529
source_details = entry[1][self.source_index]
3530
target_details = entry[1][self.target_index]
3531
target_minikind = target_details[0]
3532
if path_info is not None and target_minikind in 'fdlt':
3533
if not (self.target_index == 0):
3534
raise AssertionError()
3535
link_or_sha1 = update_entry(self.state, entry,
3536
abspath=path_info[4], stat_value=path_info[3])
3537
# The entry may have been modified by update_entry
3538
target_details = entry[1][self.target_index]
3539
target_minikind = target_details[0]
3542
file_id = entry[0][2]
3543
source_minikind = source_details[0]
3544
if source_minikind in 'fdltr' and target_minikind in 'fdlt':
3545
# claimed content in both: diff
3546
# r | fdlt | | add source to search, add id path move and perform
3547
# | | | diff check on source-target
3548
# r | fdlt | a | dangling file that was present in the basis.
3550
if source_minikind in 'r':
3551
# add the source to the search path to find any children it
3552
# has. TODO ? : only add if it is a container ?
3553
if not osutils.is_inside_any(self.searched_specific_files,
3555
self.search_specific_files.add(source_details[1])
3556
# generate the old path; this is needed for stating later
3558
old_path = source_details[1]
3559
old_dirname, old_basename = os.path.split(old_path)
3560
path = pathjoin(entry[0][0], entry[0][1])
3561
old_entry = self.state._get_entry(self.source_index,
3563
# update the source details variable to be the real
3565
if old_entry == (None, None):
3566
raise errors.CorruptDirstate(self.state._filename,
3567
"entry '%s/%s' is considered renamed from %r"
3568
" but source does not exist\n"
3569
"entry: %s" % (entry[0][0], entry[0][1], old_path, entry))
3570
source_details = old_entry[1][self.source_index]
3571
source_minikind = source_details[0]
3573
old_dirname = entry[0][0]
3574
old_basename = entry[0][1]
3575
old_path = path = None
3576
if path_info is None:
3577
# the file is missing on disk, show as removed.
3578
content_change = True
3582
# source and target are both versioned and disk file is present.
3583
target_kind = path_info[2]
3584
if target_kind == 'directory':
3586
old_path = path = pathjoin(old_dirname, old_basename)
3587
self.new_dirname_to_file_id[path] = file_id
3588
if source_minikind != 'd':
3589
content_change = True
3591
# directories have no fingerprint
3592
content_change = False
3594
elif target_kind == 'file':
3595
if source_minikind != 'f':
3596
content_change = True
3598
# Check the sha. We can't just rely on the size as
3599
# content filtering may mean differ sizes actually
3600
# map to the same content
3601
if link_or_sha1 is None:
3603
statvalue, link_or_sha1 = \
3604
self.state._sha1_provider.stat_and_sha1(
3606
self.state._observed_sha1(entry, link_or_sha1,
3608
content_change = (link_or_sha1 != source_details[1])
3609
# Target details is updated at update_entry time
3610
if self.use_filesystem_for_exec:
3611
# We don't need S_ISREG here, because we are sure
3612
# we are dealing with a file.
3613
target_exec = bool(stat.S_IEXEC & path_info[3].st_mode)
3615
target_exec = target_details[3]
3616
elif target_kind == 'symlink':
3617
if source_minikind != 'l':
3618
content_change = True
3620
content_change = (link_or_sha1 != source_details[1])
3622
elif target_kind == 'tree-reference':
3623
if source_minikind != 't':
3624
content_change = True
3626
content_change = False
3630
path = pathjoin(old_dirname, old_basename)
3631
raise errors.BadFileKindError(path, path_info[2])
3632
if source_minikind == 'd':
3634
old_path = path = pathjoin(old_dirname, old_basename)
3635
self.old_dirname_to_file_id[old_path] = file_id
3636
# parent id is the entry for the path in the target tree
3637
if old_basename and old_dirname == self.last_source_parent[0]:
3638
source_parent_id = self.last_source_parent[1]
3641
source_parent_id = self.old_dirname_to_file_id[old_dirname]
3643
source_parent_entry = self.state._get_entry(self.source_index,
3644
path_utf8=old_dirname)
3645
source_parent_id = source_parent_entry[0][2]
3646
if source_parent_id == entry[0][2]:
3647
# This is the root, so the parent is None
3648
source_parent_id = None
3650
self.last_source_parent[0] = old_dirname
3651
self.last_source_parent[1] = source_parent_id
3652
new_dirname = entry[0][0]
3653
if entry[0][1] and new_dirname == self.last_target_parent[0]:
3654
target_parent_id = self.last_target_parent[1]
3657
target_parent_id = self.new_dirname_to_file_id[new_dirname]
3659
# TODO: We don't always need to do the lookup, because the
3660
# parent entry will be the same as the source entry.
3661
target_parent_entry = self.state._get_entry(self.target_index,
3662
path_utf8=new_dirname)
3663
if target_parent_entry == (None, None):
3664
raise AssertionError(
3665
"Could not find target parent in wt: %s\nparent of: %s"
3666
% (new_dirname, entry))
3667
target_parent_id = target_parent_entry[0][2]
3668
if target_parent_id == entry[0][2]:
3669
# This is the root, so the parent is None
3670
target_parent_id = None
3672
self.last_target_parent[0] = new_dirname
3673
self.last_target_parent[1] = target_parent_id
3675
source_exec = source_details[3]
3676
changed = (content_change
3677
or source_parent_id != target_parent_id
3678
or old_basename != entry[0][1]
3679
or source_exec != target_exec
3681
if not changed and not self.include_unchanged:
3684
if old_path is None:
3685
old_path = path = pathjoin(old_dirname, old_basename)
3686
old_path_u = self.utf8_decode(old_path)[0]
3689
old_path_u = self.utf8_decode(old_path)[0]
3690
if old_path == path:
3693
path_u = self.utf8_decode(path)[0]
3694
source_kind = DirState._minikind_to_kind[source_minikind]
3695
return (entry[0][2],
3696
(old_path_u, path_u),
3699
(source_parent_id, target_parent_id),
3700
(self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3701
(source_kind, target_kind),
3702
(source_exec, target_exec)), changed
3703
elif source_minikind in 'a' and target_minikind in 'fdlt':
3704
# looks like a new file
3705
path = pathjoin(entry[0][0], entry[0][1])
3706
# parent id is the entry for the path in the target tree
3707
# TODO: these are the same for an entire directory: cache em.
3708
parent_id = self.state._get_entry(self.target_index,
3709
path_utf8=entry[0][0])[0][2]
3710
if parent_id == entry[0][2]:
3712
if path_info is not None:
3714
if self.use_filesystem_for_exec:
3715
# We need S_ISREG here, because we aren't sure if this
3718
stat.S_ISREG(path_info[3].st_mode)
3719
and stat.S_IEXEC & path_info[3].st_mode)
3721
target_exec = target_details[3]
3722
return (entry[0][2],
3723
(None, self.utf8_decode(path)[0]),
3727
(None, self.utf8_decode(entry[0][1])[0]),
3728
(None, path_info[2]),
3729
(None, target_exec)), True
3731
# Its a missing file, report it as such.
3732
return (entry[0][2],
3733
(None, self.utf8_decode(path)[0]),
3737
(None, self.utf8_decode(entry[0][1])[0]),
3739
(None, False)), True
3740
elif source_minikind in 'fdlt' and target_minikind in 'a':
3741
# unversioned, possibly, or possibly not deleted: we dont care.
3742
# if its still on disk, *and* theres no other entry at this
3743
# path [we dont know this in this routine at the moment -
3744
# perhaps we should change this - then it would be an unknown.
3745
old_path = pathjoin(entry[0][0], entry[0][1])
3746
# parent id is the entry for the path in the target tree
3747
parent_id = self.state._get_entry(self.source_index, path_utf8=entry[0][0])[0][2]
3748
if parent_id == entry[0][2]:
3750
return (entry[0][2],
3751
(self.utf8_decode(old_path)[0], None),
3755
(self.utf8_decode(entry[0][1])[0], None),
3756
(DirState._minikind_to_kind[source_minikind], None),
3757
(source_details[3], None)), True
3758
elif source_minikind in 'fdlt' and target_minikind in 'r':
3759
# a rename; could be a true rename, or a rename inherited from
3760
# a renamed parent. TODO: handle this efficiently. Its not
3761
# common case to rename dirs though, so a correct but slow
3762
# implementation will do.
3763
if not osutils.is_inside_any(self.searched_specific_files, target_details[1]):
3764
self.search_specific_files.add(target_details[1])
3765
elif source_minikind in 'ra' and target_minikind in 'ra':
3766
# neither of the selected trees contain this file,
3767
# so skip over it. This is not currently directly tested, but
3768
# is indirectly via test_too_much.TestCommands.test_conflicts.
3771
raise AssertionError("don't know how to compare "
3772
"source_minikind=%r, target_minikind=%r"
3773
% (source_minikind, target_minikind))
3779
def _gather_result_for_consistency(self, result):
3780
"""Check a result we will yield to make sure we are consistent later.
3782
This gathers result's parents into a set to output later.
3784
:param result: A result tuple.
3786
if not self.partial or not result[0]:
3788
self.seen_ids.add(result[0])
3789
new_path = result[1][1]
3791
# Not the root and not a delete: queue up the parents of the path.
3792
self.search_specific_file_parents.update(
3793
osutils.parent_directories(new_path.encode('utf8')))
3794
# Add the root directory which parent_directories does not
3796
self.search_specific_file_parents.add('')
3798
def iter_changes(self):
3799
"""Iterate over the changes."""
3800
utf8_decode = cache_utf8._utf8_decode
3801
_cmp_by_dirs = cmp_by_dirs
3802
_process_entry = self._process_entry
3803
search_specific_files = self.search_specific_files
3804
searched_specific_files = self.searched_specific_files
3805
splitpath = osutils.splitpath
3807
# compare source_index and target_index at or under each element of search_specific_files.
3808
# follow the following comparison table. Note that we only want to do diff operations when
3809
# the target is fdl because thats when the walkdirs logic will have exposed the pathinfo
3813
# Source | Target | disk | action
3814
# r | fdlt | | add source to search, add id path move and perform
3815
# | | | diff check on source-target
3816
# r | fdlt | a | dangling file that was present in the basis.
3818
# r | a | | add source to search
3820
# r | r | | this path is present in a non-examined tree, skip.
3821
# r | r | a | this path is present in a non-examined tree, skip.
3822
# a | fdlt | | add new id
3823
# a | fdlt | a | dangling locally added file, skip
3824
# a | a | | not present in either tree, skip
3825
# a | a | a | not present in any tree, skip
3826
# a | r | | not present in either tree at this path, skip as it
3827
# | | | may not be selected by the users list of paths.
3828
# a | r | a | not present in either tree at this path, skip as it
3829
# | | | may not be selected by the users list of paths.
3830
# fdlt | fdlt | | content in both: diff them
3831
# fdlt | fdlt | a | deleted locally, but not unversioned - show as deleted ?
3832
# fdlt | a | | unversioned: output deleted id for now
3833
# fdlt | a | a | unversioned and deleted: output deleted id
3834
# fdlt | r | | relocated in this tree, so add target to search.
3835
# | | | Dont diff, we will see an r,fd; pair when we reach
3836
# | | | this id at the other path.
3837
# fdlt | r | a | relocated in this tree, so add target to search.
3838
# | | | Dont diff, we will see an r,fd; pair when we reach
3839
# | | | this id at the other path.
3841
# TODO: jam 20070516 - Avoid the _get_entry lookup overhead by
3842
# keeping a cache of directories that we have seen.
3844
while search_specific_files:
3845
# TODO: the pending list should be lexically sorted? the
3846
# interface doesn't require it.
3847
current_root = search_specific_files.pop()
3848
current_root_unicode = current_root.decode('utf8')
3849
searched_specific_files.add(current_root)
3850
# process the entries for this containing directory: the rest will be
3851
# found by their parents recursively.
3852
root_entries = self.state._entries_for_path(current_root)
3853
root_abspath = self.tree.abspath(current_root_unicode)
3855
root_stat = os.lstat(root_abspath)
3857
if e.errno == errno.ENOENT:
3858
# the path does not exist: let _process_entry know that.
3859
root_dir_info = None
3861
# some other random error: hand it up.
3864
root_dir_info = ('', current_root,
3865
osutils.file_kind_from_stat_mode(root_stat.st_mode), root_stat,
3867
if root_dir_info[2] == 'directory':
3868
if self.tree._directory_is_tree_reference(
3869
current_root.decode('utf8')):
3870
root_dir_info = root_dir_info[:2] + \
3871
('tree-reference',) + root_dir_info[3:]
3873
if not root_entries and not root_dir_info:
3874
# this specified path is not present at all, skip it.
3876
path_handled = False
3877
for entry in root_entries:
3878
result, changed = _process_entry(entry, root_dir_info)
3879
if changed is not None:
3882
self._gather_result_for_consistency(result)
3883
if changed or self.include_unchanged:
3885
if self.want_unversioned and not path_handled and root_dir_info:
3886
new_executable = bool(
3887
stat.S_ISREG(root_dir_info[3].st_mode)
3888
and stat.S_IEXEC & root_dir_info[3].st_mode)
3890
(None, current_root_unicode),
3894
(None, splitpath(current_root_unicode)[-1]),
3895
(None, root_dir_info[2]),
3896
(None, new_executable)
3898
initial_key = (current_root, '', '')
3899
block_index, _ = self.state._find_block_index_from_key(initial_key)
3900
if block_index == 0:
3901
# we have processed the total root already, but because the
3902
# initial key matched it we should skip it here.
3904
if root_dir_info and root_dir_info[2] == 'tree-reference':
3905
current_dir_info = None
3907
dir_iterator = osutils._walkdirs_utf8(root_abspath, prefix=current_root)
3909
current_dir_info = dir_iterator.next()
3911
# on win32, python2.4 has e.errno == ERROR_DIRECTORY, but
3912
# python 2.5 has e.errno == EINVAL,
3913
# and e.winerror == ERROR_DIRECTORY
3914
e_winerror = getattr(e, 'winerror', None)
3915
win_errors = (ERROR_DIRECTORY, ERROR_PATH_NOT_FOUND)
3916
# there may be directories in the inventory even though
3917
# this path is not a file on disk: so mark it as end of
3919
if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
3920
current_dir_info = None
3921
elif (sys.platform == 'win32'
3922
and (e.errno in win_errors
3923
or e_winerror in win_errors)):
3924
current_dir_info = None
3928
if current_dir_info[0][0] == '':
3929
# remove .bzr from iteration
3930
bzr_index = bisect.bisect_left(current_dir_info[1], ('.bzr',))
3931
if current_dir_info[1][bzr_index][0] != '.bzr':
3932
raise AssertionError()
3933
del current_dir_info[1][bzr_index]
3934
# walk until both the directory listing and the versioned metadata
3936
if (block_index < len(self.state._dirblocks) and
3937
osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
3938
current_block = self.state._dirblocks[block_index]
3940
current_block = None
3941
while (current_dir_info is not None or
3942
current_block is not None):
3943
if (current_dir_info and current_block
3944
and current_dir_info[0][0] != current_block[0]):
3945
if _cmp_by_dirs(current_dir_info[0][0], current_block[0]) < 0:
3946
# filesystem data refers to paths not covered by the dirblock.
3947
# this has two possibilities:
3948
# A) it is versioned but empty, so there is no block for it
3949
# B) it is not versioned.
3951
# if (A) then we need to recurse into it to check for
3952
# new unknown files or directories.
3953
# if (B) then we should ignore it, because we don't
3954
# recurse into unknown directories.
3956
while path_index < len(current_dir_info[1]):
3957
current_path_info = current_dir_info[1][path_index]
3958
if self.want_unversioned:
3959
if current_path_info[2] == 'directory':
3960
if self.tree._directory_is_tree_reference(
3961
current_path_info[0].decode('utf8')):
3962
current_path_info = current_path_info[:2] + \
3963
('tree-reference',) + current_path_info[3:]
3964
new_executable = bool(
3965
stat.S_ISREG(current_path_info[3].st_mode)
3966
and stat.S_IEXEC & current_path_info[3].st_mode)
3968
(None, utf8_decode(current_path_info[0])[0]),
3972
(None, utf8_decode(current_path_info[1])[0]),
3973
(None, current_path_info[2]),
3974
(None, new_executable))
3975
# dont descend into this unversioned path if it is
3977
if current_path_info[2] in ('directory',
3979
del current_dir_info[1][path_index]
3983
# This dir info has been handled, go to the next
3985
current_dir_info = dir_iterator.next()
3986
except StopIteration:
3987
current_dir_info = None
3989
# We have a dirblock entry for this location, but there
3990
# is no filesystem path for this. This is most likely
3991
# because a directory was removed from the disk.
3992
# We don't have to report the missing directory,
3993
# because that should have already been handled, but we
3994
# need to handle all of the files that are contained
3996
for current_entry in current_block[1]:
3997
# entry referring to file not present on disk.
3998
# advance the entry only, after processing.
3999
result, changed = _process_entry(current_entry, None)
4000
if changed is not None:
4002
self._gather_result_for_consistency(result)
4003
if changed or self.include_unchanged:
4006
if (block_index < len(self.state._dirblocks) and
4007
osutils.is_inside(current_root,
4008
self.state._dirblocks[block_index][0])):
4009
current_block = self.state._dirblocks[block_index]
4011
current_block = None
4014
if current_block and entry_index < len(current_block[1]):
4015
current_entry = current_block[1][entry_index]
4017
current_entry = None
4018
advance_entry = True
4020
if current_dir_info and path_index < len(current_dir_info[1]):
4021
current_path_info = current_dir_info[1][path_index]
4022
if current_path_info[2] == 'directory':
4023
if self.tree._directory_is_tree_reference(
4024
current_path_info[0].decode('utf8')):
4025
current_path_info = current_path_info[:2] + \
4026
('tree-reference',) + current_path_info[3:]
4028
current_path_info = None
4030
path_handled = False
4031
while (current_entry is not None or
4032
current_path_info is not None):
4033
if current_entry is None:
4034
# the check for path_handled when the path is advanced
4035
# will yield this path if needed.
4037
elif current_path_info is None:
4038
# no path is fine: the per entry code will handle it.
4039
result, changed = _process_entry(current_entry, current_path_info)
4040
if changed is not None:
4042
self._gather_result_for_consistency(result)
4043
if changed or self.include_unchanged:
4045
elif (current_entry[0][1] != current_path_info[1]
4046
or current_entry[1][self.target_index][0] in 'ar'):
4047
# The current path on disk doesn't match the dirblock
4048
# record. Either the dirblock is marked as absent, or
4049
# the file on disk is not present at all in the
4050
# dirblock. Either way, report about the dirblock
4051
# entry, and let other code handle the filesystem one.
4053
# Compare the basename for these files to determine
4055
if current_path_info[1] < current_entry[0][1]:
4056
# extra file on disk: pass for now, but only
4057
# increment the path, not the entry
4058
advance_entry = False
4060
# entry referring to file not present on disk.
4061
# advance the entry only, after processing.
4062
result, changed = _process_entry(current_entry, None)
4063
if changed is not None:
4065
self._gather_result_for_consistency(result)
4066
if changed or self.include_unchanged:
4068
advance_path = False
4070
result, changed = _process_entry(current_entry, current_path_info)
4071
if changed is not None:
4074
self._gather_result_for_consistency(result)
4075
if changed or self.include_unchanged:
4077
if advance_entry and current_entry is not None:
4079
if entry_index < len(current_block[1]):
4080
current_entry = current_block[1][entry_index]
4082
current_entry = None
4084
advance_entry = True # reset the advance flaga
4085
if advance_path and current_path_info is not None:
4086
if not path_handled:
4087
# unversioned in all regards
4088
if self.want_unversioned:
4089
new_executable = bool(
4090
stat.S_ISREG(current_path_info[3].st_mode)
4091
and stat.S_IEXEC & current_path_info[3].st_mode)
4093
relpath_unicode = utf8_decode(current_path_info[0])[0]
4094
except UnicodeDecodeError:
4095
raise errors.BadFilenameEncoding(
4096
current_path_info[0], osutils._fs_enc)
4098
(None, relpath_unicode),
4102
(None, utf8_decode(current_path_info[1])[0]),
4103
(None, current_path_info[2]),
4104
(None, new_executable))
4105
# dont descend into this unversioned path if it is
4107
if current_path_info[2] in ('directory'):
4108
del current_dir_info[1][path_index]
4110
# dont descend the disk iterator into any tree
4112
if current_path_info[2] == 'tree-reference':
4113
del current_dir_info[1][path_index]
4116
if path_index < len(current_dir_info[1]):
4117
current_path_info = current_dir_info[1][path_index]
4118
if current_path_info[2] == 'directory':
4119
if self.tree._directory_is_tree_reference(
4120
current_path_info[0].decode('utf8')):
4121
current_path_info = current_path_info[:2] + \
4122
('tree-reference',) + current_path_info[3:]
4124
current_path_info = None
4125
path_handled = False
4127
advance_path = True # reset the advance flagg.
4128
if current_block is not None:
4130
if (block_index < len(self.state._dirblocks) and
4131
osutils.is_inside(current_root, self.state._dirblocks[block_index][0])):
4132
current_block = self.state._dirblocks[block_index]
4134
current_block = None
4135
if current_dir_info is not None:
4137
current_dir_info = dir_iterator.next()
4138
except StopIteration:
4139
current_dir_info = None
4140
for result in self._iter_specific_file_parents():
4143
def _iter_specific_file_parents(self):
4144
"""Iter over the specific file parents."""
4145
while self.search_specific_file_parents:
4146
# Process the parent directories for the paths we were iterating.
4147
# Even in extremely large trees this should be modest, so currently
4148
# no attempt is made to optimise.
4149
path_utf8 = self.search_specific_file_parents.pop()
4150
if osutils.is_inside_any(self.searched_specific_files, path_utf8):
4151
# We've examined this path.
4153
if path_utf8 in self.searched_exact_paths:
4154
# We've examined this path.
4156
path_entries = self.state._entries_for_path(path_utf8)
4157
# We need either one or two entries. If the path in
4158
# self.target_index has moved (so the entry in source_index is in
4159
# 'ar') then we need to also look for the entry for this path in
4160
# self.source_index, to output the appropriate delete-or-rename.
4161
selected_entries = []
4163
for candidate_entry in path_entries:
4164
# Find entries present in target at this path:
4165
if candidate_entry[1][self.target_index][0] not in 'ar':
4167
selected_entries.append(candidate_entry)
4168
# Find entries present in source at this path:
4169
elif (self.source_index is not None and
4170
candidate_entry[1][self.source_index][0] not in 'ar'):
4172
if candidate_entry[1][self.target_index][0] == 'a':
4173
# Deleted, emit it here.
4174
selected_entries.append(candidate_entry)
4176
# renamed, emit it when we process the directory it
4178
self.search_specific_file_parents.add(
4179
candidate_entry[1][self.target_index][1])
4181
raise AssertionError(
4182
"Missing entry for specific path parent %r, %r" % (
4183
path_utf8, path_entries))
4184
path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
4185
for entry in selected_entries:
4186
if entry[0][2] in self.seen_ids:
4188
result, changed = self._process_entry(entry, path_info)
4190
raise AssertionError(
4191
"Got entry<->path mismatch for specific path "
4192
"%r entry %r path_info %r " % (
4193
path_utf8, entry, path_info))
4194
# Only include changes - we're outside the users requested
4197
self._gather_result_for_consistency(result)
4198
if (result[6][0] == 'directory' and
4199
result[6][1] != 'directory'):
4200
# This stopped being a directory, the old children have
4202
if entry[1][self.source_index][0] == 'r':
4203
# renamed, take the source path
4204
entry_path_utf8 = entry[1][self.source_index][1]
4206
entry_path_utf8 = path_utf8
4207
initial_key = (entry_path_utf8, '', '')
4208
block_index, _ = self.state._find_block_index_from_key(
4210
if block_index == 0:
4211
# The children of the root are in block index 1.
4213
current_block = None
4214
if block_index < len(self.state._dirblocks):
4215
current_block = self.state._dirblocks[block_index]
4216
if not osutils.is_inside(
4217
entry_path_utf8, current_block[0]):
4218
# No entries for this directory at all.
4219
current_block = None
4220
if current_block is not None:
4221
for entry in current_block[1]:
4222
if entry[1][self.source_index][0] in 'ar':
4223
# Not in the source tree, so doesn't have to be
4226
# Path of the entry itself.
4228
self.search_specific_file_parents.add(
4229
osutils.pathjoin(*entry[0][:2]))
4230
if changed or self.include_unchanged:
4232
self.searched_exact_paths.add(path_utf8)
4234
def _path_info(self, utf8_path, unicode_path):
4235
"""Generate path_info for unicode_path.
4237
:return: None if unicode_path does not exist, or a path_info tuple.
4239
abspath = self.tree.abspath(unicode_path)
2007
cur_split = cache[cur]
2009
cur_split = cur.split('/')
2010
cache[cur] = cur_split
2011
if cur_split < dirname_split: lo = mid+1
2016
def pack_stat(st, _encode=base64.encodestring, _pack=struct.pack):
2017
"""Convert stat values into a packed representation."""
2018
# jam 20060614 it isn't really worth removing more entries if we
2019
# are going to leave it in packed form.
2020
# With only st_mtime and st_mode filesize is 5.5M and read time is 275ms
2021
# With all entries filesize is 5.9M and read time is mabye 280ms
2022
# well within the noise margin
2024
# base64.encode always adds a final newline, so strip it off
2025
return _encode(_pack('>llllll'
2026
, st.st_size, st.st_mtime, st.st_ctime
2027
, st.st_dev, st.st_ino, st.st_mode))[:-1]
4241
stat = os.lstat(abspath)
4243
if e.errno == errno.ENOENT:
4244
# the path does not exist.
4248
utf8_basename = utf8_path.rsplit('/', 1)[-1]
4249
dir_info = (utf8_path, utf8_basename,
4250
osutils.file_kind_from_stat_mode(stat.st_mode), stat,
4252
if dir_info[2] == 'directory':
4253
if self.tree._directory_is_tree_reference(
4255
self.root_dir_info = self.root_dir_info[:2] + \
4256
('tree-reference',) + self.root_dir_info[3:]
4260
# Try to load the compiled form if possible
4262
from bzrlib._dirstate_helpers_pyx import (
4269
ProcessEntryC as _process_entry,
4270
update_entry as update_entry,
4272
except ImportError, e:
4273
osutils.failed_to_load_extension(e)
4274
from bzrlib._dirstate_helpers_py import (
4282
# FIXME: It would be nice to be able to track moved lines so that the
4283
# corresponding python code can be moved to the _dirstate_helpers_py
4284
# module. I don't want to break the history for this important piece of
4285
# code so I left the code here -- vila 20090622
4286
update_entry = py_update_entry
4287
_process_entry = ProcessEntryPython