~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

  • Committer: Joe Julian
  • Date: 2010-01-10 02:25:31 UTC
  • mto: (4634.119.7 2.0)
  • mto: This revision was merged to the branch mainline in revision 4959.
  • Revision ID: joe@julianfamily.org-20100110022531-wqk61rsagz8xsiga
Added MANIFEST.in to allow bdist_rpm to have all the required include files and tools. bdist_rpm will still fail to build correctly on some distributions due to a disttools bug http://bugs.python.org/issue644744

Show diffs side-by-side

added added

removed removed

Lines of Context:
204
204
import bisect
205
205
import binascii
206
206
import errno
 
207
import operator
207
208
import os
208
209
from stat import S_IEXEC
209
210
import stat
1277
1278
    def update_by_delta(self, delta):
1278
1279
        """Apply an inventory delta to the dirstate for tree 0
1279
1280
 
 
1281
        This is the workhorse for apply_inventory_delta in dirstate based
 
1282
        trees.
 
1283
 
1280
1284
        :param delta: An inventory delta.  See Inventory.apply_delta for
1281
1285
            details.
1282
1286
        """
1283
1287
        self._read_dirblocks_if_needed()
 
1288
        encode = cache_utf8.encode
1284
1289
        insertions = {}
1285
1290
        removals = {}
1286
 
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
 
1291
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1292
        # items or items placed under files/links/tree-references. We get
 
1293
        # references from every item in the delta that is not a deletion and
 
1294
        # is not itself the root.
 
1295
        parents = set()
 
1296
        # Added ids must not be in the dirstate already. This set holds those
 
1297
        # ids.
 
1298
        new_ids = set()
 
1299
        # This loop transforms the delta to single atomic operations that can
 
1300
        # be executed and validated.
 
1301
        for old_path, new_path, file_id, inv_entry in sorted(
 
1302
            inventory._check_delta_unique_old_paths(
 
1303
            inventory._check_delta_unique_new_paths(
 
1304
            inventory._check_delta_ids_match_entry(
 
1305
            inventory._check_delta_ids_are_valid(
 
1306
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
 
1307
            reverse=True):
1287
1308
            if (file_id in insertions) or (file_id in removals):
1288
 
                raise AssertionError("repeated file id in delta %r" % (file_id,))
 
1309
                raise errors.InconsistentDelta(old_path or new_path, file_id,
 
1310
                    "repeated file_id")
1289
1311
            if old_path is not None:
1290
1312
                old_path = old_path.encode('utf-8')
1291
1313
                removals[file_id] = old_path
 
1314
            else:
 
1315
                new_ids.add(file_id)
1292
1316
            if new_path is not None:
 
1317
                if inv_entry is None:
 
1318
                    raise errors.InconsistentDelta(new_path, file_id,
 
1319
                        "new_path with no entry")
1293
1320
                new_path = new_path.encode('utf-8')
1294
 
                dirname, basename = osutils.split(new_path)
1295
 
                key = (dirname, basename, file_id)
 
1321
                dirname_utf8, basename = osutils.split(new_path)
 
1322
                if basename:
 
1323
                    parents.add((dirname_utf8, inv_entry.parent_id))
 
1324
                key = (dirname_utf8, basename, file_id)
1296
1325
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1297
1326
                if minikind == 't':
1298
 
                    fingerprint = inv_entry.reference_revision
 
1327
                    fingerprint = inv_entry.reference_revision or ''
1299
1328
                else:
1300
1329
                    fingerprint = ''
1301
1330
                insertions[file_id] = (key, minikind, inv_entry.executable,
1320
1349
                                                  child_basename)
1321
1350
                    insertions[child[0][2]] = (key, minikind, executable,
1322
1351
                                               fingerprint, new_child_path)
1323
 
        self._apply_removals(removals.values())
1324
 
        self._apply_insertions(insertions.values())
 
1352
        self._check_delta_ids_absent(new_ids, delta, 0)
 
1353
        try:
 
1354
            self._apply_removals(removals.iteritems())
 
1355
            self._apply_insertions(insertions.values())
 
1356
            # Validate parents
 
1357
            self._after_delta_check_parents(parents, 0)
 
1358
        except errors.BzrError, e:
 
1359
            self._changes_aborted = True
 
1360
            if 'integrity error' not in str(e):
 
1361
                raise
 
1362
            # _get_entry raises BzrError when a request is inconsistent; we
 
1363
            # want such errors to be shown as InconsistentDelta - and that 
 
1364
            # fits the behaviour we trigger.
 
1365
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1325
1366
 
1326
1367
    def _apply_removals(self, removals):
1327
 
        for path in sorted(removals, reverse=True):
 
1368
        for file_id, path in sorted(removals, reverse=True,
 
1369
            key=operator.itemgetter(1)):
1328
1370
            dirname, basename = osutils.split(path)
1329
1371
            block_i, entry_i, d_present, f_present = \
1330
1372
                self._get_block_entry_index(dirname, basename, 0)
1331
 
            entry = self._dirblocks[block_i][1][entry_i]
 
1373
            try:
 
1374
                entry = self._dirblocks[block_i][1][entry_i]
 
1375
            except IndexError:
 
1376
                self._changes_aborted = True
 
1377
                raise errors.InconsistentDelta(path, file_id,
 
1378
                    "Wrong path for old path.")
 
1379
            if not f_present or entry[1][0][0] in 'ar':
 
1380
                self._changes_aborted = True
 
1381
                raise errors.InconsistentDelta(path, file_id,
 
1382
                    "Wrong path for old path.")
 
1383
            if file_id != entry[0][2]:
 
1384
                self._changes_aborted = True
 
1385
                raise errors.InconsistentDelta(path, file_id,
 
1386
                    "Attempt to remove path has wrong id - found %r."
 
1387
                    % entry[0][2])
1332
1388
            self._make_absent(entry)
1333
1389
            # See if we have a malformed delta: deleting a directory must not
1334
1390
            # leave crud behind. This increases the number of bisects needed
1342
1398
                # be due to it being in a parent tree, or a corrupt delta.
1343
1399
                for child_entry in self._dirblocks[block_i][1]:
1344
1400
                    if child_entry[1][0][0] not in ('r', 'a'):
 
1401
                        self._changes_aborted = True
1345
1402
                        raise errors.InconsistentDelta(path, entry[0][2],
1346
1403
                            "The file id was deleted but its children were "
1347
1404
                            "not deleted.")
1348
1405
 
1349
1406
    def _apply_insertions(self, adds):
1350
 
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1351
 
            self.update_minimal(key, minikind, executable, fingerprint,
1352
 
                                path_utf8=path_utf8)
 
1407
        try:
 
1408
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1409
                self.update_minimal(key, minikind, executable, fingerprint,
 
1410
                                    path_utf8=path_utf8)
 
1411
        except errors.NotVersionedError:
 
1412
            self._changes_aborted = True
 
1413
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
 
1414
                "Missing parent")
1353
1415
 
1354
1416
    def update_basis_by_delta(self, delta, new_revid):
1355
1417
        """Update the parents of this tree after a commit.
1399
1461
        # At the same time, to reduce interface friction we convert the input
1400
1462
        # inventory entries to dirstate.
1401
1463
        root_only = ('', '')
 
1464
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1465
        # items or items placed under files/links/tree-references. We get
 
1466
        # references from every item in the delta that is not a deletion and
 
1467
        # is not itself the root.
 
1468
        parents = set()
 
1469
        # Added ids must not be in the dirstate already. This set holds those
 
1470
        # ids.
 
1471
        new_ids = set()
1402
1472
        for old_path, new_path, file_id, inv_entry in delta:
 
1473
            if inv_entry is not None and file_id != inv_entry.file_id:
 
1474
                raise errors.InconsistentDelta(new_path, file_id,
 
1475
                    "mismatched entry file_id %r" % inv_entry)
 
1476
            if new_path is not None:
 
1477
                if inv_entry is None:
 
1478
                    raise errors.InconsistentDelta(new_path, file_id,
 
1479
                        "new_path with no entry")
 
1480
                new_path_utf8 = encode(new_path)
 
1481
                # note the parent for validation
 
1482
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
 
1483
                if basename_utf8:
 
1484
                    parents.add((dirname_utf8, inv_entry.parent_id))
1403
1485
            if old_path is None:
1404
1486
                adds.append((None, encode(new_path), file_id,
1405
1487
                    inv_to_entry(inv_entry), True))
 
1488
                new_ids.add(file_id)
1406
1489
            elif new_path is None:
1407
1490
                deletes.append((encode(old_path), None, file_id, None, True))
1408
1491
            elif (old_path, new_path) != root_only:
1420
1503
                # for 'r' items on every pass.
1421
1504
                self._update_basis_apply_deletes(deletes)
1422
1505
                deletes = []
1423
 
                new_path_utf8 = encode(new_path)
1424
1506
                # Split into an add/delete pair recursively.
1425
1507
                adds.append((None, new_path_utf8, file_id,
1426
1508
                    inv_to_entry(inv_entry), False))
1452
1534
                # of everything.
1453
1535
                changes.append((encode(old_path), encode(new_path), file_id,
1454
1536
                    inv_to_entry(inv_entry)))
1455
 
 
1456
 
        # Finish expunging deletes/first half of renames.
1457
 
        self._update_basis_apply_deletes(deletes)
1458
 
        # Reinstate second half of renames and new paths.
1459
 
        self._update_basis_apply_adds(adds)
1460
 
        # Apply in-situ changes.
1461
 
        self._update_basis_apply_changes(changes)
 
1537
        self._check_delta_ids_absent(new_ids, delta, 1)
 
1538
        try:
 
1539
            # Finish expunging deletes/first half of renames.
 
1540
            self._update_basis_apply_deletes(deletes)
 
1541
            # Reinstate second half of renames and new paths.
 
1542
            self._update_basis_apply_adds(adds)
 
1543
            # Apply in-situ changes.
 
1544
            self._update_basis_apply_changes(changes)
 
1545
            # Validate parents
 
1546
            self._after_delta_check_parents(parents, 1)
 
1547
        except errors.BzrError, e:
 
1548
            self._changes_aborted = True
 
1549
            if 'integrity error' not in str(e):
 
1550
                raise
 
1551
            # _get_entry raises BzrError when a request is inconsistent; we
 
1552
            # want such errors to be shown as InconsistentDelta - and that 
 
1553
            # fits the behaviour we trigger. Partof this is driven by dirstate
 
1554
            # only supporting deltas that turn the basis into a closer fit to
 
1555
            # the active tree.
 
1556
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1462
1557
 
1463
1558
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1464
1559
        self._header_state = DirState.IN_MEMORY_MODIFIED
1465
1560
        self._id_index = None
1466
1561
        return
1467
1562
 
 
1563
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
 
1564
        """Check that none of the file_ids in new_ids are present in a tree."""
 
1565
        if not new_ids:
 
1566
            return
 
1567
        id_index = self._get_id_index()
 
1568
        for file_id in new_ids:
 
1569
            for key in id_index.get(file_id, []):
 
1570
                block_i, entry_i, d_present, f_present = \
 
1571
                    self._get_block_entry_index(key[0], key[1], tree_index)
 
1572
                if not f_present:
 
1573
                    # In a different tree
 
1574
                    continue
 
1575
                entry = self._dirblocks[block_i][1][entry_i]
 
1576
                if entry[0][2] != file_id:
 
1577
                    # Different file_id, so not what we want.
 
1578
                    continue
 
1579
                # NB: No changes made before this helper is called, so no need
 
1580
                # to set the _changes_aborted flag.
 
1581
                raise errors.InconsistentDelta(
 
1582
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
 
1583
                    "This file_id is new in the delta but already present in "
 
1584
                    "the target")
 
1585
 
1468
1586
    def _update_basis_apply_adds(self, adds):
1469
1587
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1470
1588
 
1535
1653
        null = DirState.NULL_PARENT_DETAILS
1536
1654
        for old_path, new_path, file_id, _, real_delete in deletes:
1537
1655
            if real_delete != (new_path is None):
 
1656
                self._changes_aborted = True
1538
1657
                raise AssertionError("bad delete delta")
1539
1658
            # the entry for this file_id must be in tree 1.
1540
1659
            dirname, basename = osutils.split(old_path)
1573
1692
                    # it is being resurrected here, so blank it out temporarily.
1574
1693
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1575
1694
 
 
1695
    def _after_delta_check_parents(self, parents, index):
 
1696
        """Check that parents required by the delta are all intact.
 
1697
        
 
1698
        :param parents: An iterable of (path_utf8, file_id) tuples which are
 
1699
            required to be present in tree 'index' at path_utf8 with id file_id
 
1700
            and be a directory.
 
1701
        :param index: The column in the dirstate to check for parents in.
 
1702
        """
 
1703
        for dirname_utf8, file_id in parents:
 
1704
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
 
1705
            # has the right file id.
 
1706
            entry = self._get_entry(index, file_id, dirname_utf8)
 
1707
            if entry[1] is None:
 
1708
                self._changes_aborted = True
 
1709
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1710
                    file_id, "This parent is not present.")
 
1711
            # Parents of things must be directories
 
1712
            if entry[1][index][0] != 'd':
 
1713
                self._changes_aborted = True
 
1714
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1715
                    file_id, "This parent is not a directory.")
 
1716
 
1576
1717
    def _observed_sha1(self, entry, sha1, stat_value,
1577
1718
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1578
1719
        """Note the sha1 of a file.
1821
1962
        self._read_dirblocks_if_needed()
1822
1963
        if path_utf8 is not None:
1823
1964
            if type(path_utf8) is not str:
1824
 
                raise AssertionError('path_utf8 is not a str: %s %s'
 
1965
                raise errors.BzrError('path_utf8 is not a str: %s %r'
1825
1966
                    % (type(path_utf8), path_utf8))
1826
1967
            # path lookups are faster
1827
1968
            dirname, basename = osutils.split(path_utf8)
2379
2520
        if 'evil' in debug.debug_flags:
2380
2521
            trace.mutter_callsite(1,
2381
2522
                "set_state_from_inventory called; please mutate the tree instead")
 
2523
        tracing = 'dirstate' in debug.debug_flags
 
2524
        if tracing:
 
2525
            trace.mutter("set_state_from_inventory trace:")
2382
2526
        self._read_dirblocks_if_needed()
2383
2527
        # sketch:
2384
2528
        # Two iterators: current data and new data, both in dirblock order.
2393
2537
        new_iterator = new_inv.iter_entries_by_dir()
2394
2538
        # we will be modifying the dirstate, so we need a stable iterator. In
2395
2539
        # future we might write one, for now we just clone the state into a
2396
 
        # list - which is a shallow copy.
 
2540
        # list using a copy so that we see every original item and don't have
 
2541
        # to adjust the position when items are inserted or deleted in the
 
2542
        # underlying dirstate.
2397
2543
        old_iterator = iter(list(self._iter_entries()))
2398
2544
        # both must have roots so this is safe:
2399
2545
        current_new = new_iterator.next()
2433
2579
            # we make both end conditions explicit
2434
2580
            if not current_old:
2435
2581
                # old is finished: insert current_new into the state.
 
2582
                if tracing:
 
2583
                    trace.mutter("Appending from new '%s'.",
 
2584
                        new_path_utf8.decode('utf8'))
2436
2585
                self.update_minimal(new_entry_key, current_new_minikind,
2437
2586
                    executable=current_new[1].executable,
2438
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2587
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2588
                    fullscan=True)
2439
2589
                current_new = advance(new_iterator)
2440
2590
            elif not current_new:
2441
2591
                # new is finished
 
2592
                if tracing:
 
2593
                    trace.mutter("Truncating from old '%s/%s'.",
 
2594
                        current_old[0][0].decode('utf8'),
 
2595
                        current_old[0][1].decode('utf8'))
2442
2596
                self._make_absent(current_old)
2443
2597
                current_old = advance(old_iterator)
2444
2598
            elif new_entry_key == current_old[0]:
2451
2605
                # kind has changed.
2452
2606
                if (current_old[1][0][3] != current_new[1].executable or
2453
2607
                    current_old[1][0][0] != current_new_minikind):
 
2608
                    if tracing:
 
2609
                        trace.mutter("Updating in-place change '%s'.",
 
2610
                            new_path_utf8.decode('utf8'))
2454
2611
                    self.update_minimal(current_old[0], current_new_minikind,
2455
2612
                        executable=current_new[1].executable,
2456
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2613
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2614
                        fullscan=True)
2457
2615
                # both sides are dealt with, move on
2458
2616
                current_old = advance(old_iterator)
2459
2617
                current_new = advance(new_iterator)
2462
2620
                      and new_entry_key[1:] < current_old[0][1:])):
2463
2621
                # new comes before:
2464
2622
                # add a entry for this and advance new
 
2623
                if tracing:
 
2624
                    trace.mutter("Inserting from new '%s'.",
 
2625
                        new_path_utf8.decode('utf8'))
2465
2626
                self.update_minimal(new_entry_key, current_new_minikind,
2466
2627
                    executable=current_new[1].executable,
2467
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2628
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2629
                    fullscan=True)
2468
2630
                current_new = advance(new_iterator)
2469
2631
            else:
2470
2632
                # we've advanced past the place where the old key would be,
2471
2633
                # without seeing it in the new list.  so it must be gone.
 
2634
                if tracing:
 
2635
                    trace.mutter("Deleting from old '%s/%s'.",
 
2636
                        current_old[0][0].decode('utf8'),
 
2637
                        current_old[0][1].decode('utf8'))
2472
2638
                self._make_absent(current_old)
2473
2639
                current_old = advance(old_iterator)
2474
2640
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2475
2641
        self._id_index = None
2476
2642
        self._packed_stat_index = None
 
2643
        if tracing:
 
2644
            trace.mutter("set_state_from_inventory complete.")
2477
2645
 
2478
2646
    def _make_absent(self, current_old):
2479
2647
        """Mark current_old - an entry - as absent for tree 0.
2528
2696
        return last_reference
2529
2697
 
2530
2698
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2531
 
                       packed_stat=None, size=0, path_utf8=None):
 
2699
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
2532
2700
        """Update an entry to the state in tree 0.
2533
2701
 
2534
2702
        This will either create a new entry at 'key' or update an existing one.
2545
2713
        :param size: Size information for new entry
2546
2714
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2547
2715
                extra computation.
 
2716
        :param fullscan: If True then a complete scan of the dirstate is being
 
2717
            done and checking for duplicate rows should not be done. This
 
2718
            should only be set by set_state_from_inventory and similar methods.
2548
2719
 
2549
2720
        If packed_stat and fingerprint are not given, they're invalidated in
2550
2721
        the entry.
2559
2730
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2560
2731
        id_index = self._get_id_index()
2561
2732
        if not present:
 
2733
            # New record. Check there isn't a entry at this path already.
 
2734
            if not fullscan:
 
2735
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
 
2736
                while low_index < len(block):
 
2737
                    entry = block[low_index]
 
2738
                    if entry[0][0:2] == key[0:2]:
 
2739
                        if entry[1][0][0] not in 'ar':
 
2740
                            # This entry has the same path (but a different id) as
 
2741
                            # the new entry we're adding, and is present in ths
 
2742
                            # tree.
 
2743
                            raise errors.InconsistentDelta(
 
2744
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
 
2745
                                "Attempt to add item at path already occupied by "
 
2746
                                "id %r" % entry[0][2])
 
2747
                        low_index += 1
 
2748
                    else:
 
2749
                        break
2562
2750
            # new entry, synthesis cross reference here,
2563
2751
            existing_keys = id_index.setdefault(key[2], set())
2564
2752
            if not existing_keys:
2569
2757
                # grab one of them and use it to generate parent
2570
2758
                # relocation/absent entries.
2571
2759
                new_entry = key, [new_details]
2572
 
                for other_key in existing_keys:
 
2760
                # existing_keys can be changed as we iterate.
 
2761
                for other_key in tuple(existing_keys):
2573
2762
                    # change the record at other to be a pointer to this new
2574
2763
                    # record. The loop looks similar to the change to
2575
2764
                    # relocations when updating an existing record but its not:
2576
2765
                    # the test for existing kinds is different: this can be
2577
2766
                    # factored out to a helper though.
2578
 
                    other_block_index, present = self._find_block_index_from_key(other_key)
2579
 
                    if not present:
2580
 
                        raise AssertionError('could not find block for %s' % (other_key,))
2581
 
                    other_entry_index, present = self._find_entry_index(other_key,
2582
 
                                            self._dirblocks[other_block_index][1])
2583
 
                    if not present:
2584
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2767
                    other_block_index, present = self._find_block_index_from_key(
 
2768
                        other_key)
 
2769
                    if not present:
 
2770
                        raise AssertionError('could not find block for %s' % (
 
2771
                            other_key,))
 
2772
                    other_block = self._dirblocks[other_block_index][1]
 
2773
                    other_entry_index, present = self._find_entry_index(
 
2774
                        other_key, other_block)
 
2775
                    if not present:
 
2776
                        raise AssertionError(
 
2777
                            'update_minimal: could not find other entry for %s'
 
2778
                            % (other_key,))
2585
2779
                    if path_utf8 is None:
2586
2780
                        raise AssertionError('no path')
2587
 
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2588
 
                        ('r', path_utf8, 0, False, '')
 
2781
                    # Turn this other location into a reference to the new
 
2782
                    # location. This also updates the aliased iterator
 
2783
                    # (current_old in set_state_from_inventory) so that the old
 
2784
                    # entry, if not already examined, is skipped over by that
 
2785
                    # loop.
 
2786
                    other_entry = other_block[other_entry_index]
 
2787
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
 
2788
                    self._maybe_remove_row(other_block, other_entry_index,
 
2789
                        id_index)
2589
2790
 
 
2791
                # This loop:
 
2792
                # adds a tuple to the new details for each column
 
2793
                #  - either by copying an existing relocation pointer inside that column
 
2794
                #  - or by creating a new pointer to the right row inside that column
2590
2795
                num_present_parents = self._num_present_parents()
 
2796
                if num_present_parents:
 
2797
                    other_key = list(existing_keys)[0]
2591
2798
                for lookup_index in xrange(1, num_present_parents + 1):
2592
2799
                    # grab any one entry, use it to find the right path.
2593
2800
                    # TODO: optimise this to reduce memory use in highly
2600
2807
                    update_entry_index, present = \
2601
2808
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2602
2809
                    if not present:
2603
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2810
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
2604
2811
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2605
2812
                    if update_details[0] in 'ar': # relocated, absent
2606
2813
                        # its a pointer or absent in lookup_index's tree, use
2652
2859
 
2653
2860
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2654
2861
 
 
2862
    def _maybe_remove_row(self, block, index, id_index):
 
2863
        """Remove index if it is absent or relocated across the row.
 
2864
        
 
2865
        id_index is updated accordingly.
 
2866
        """
 
2867
        present_in_row = False
 
2868
        entry = block[index]
 
2869
        for column in entry[1]:
 
2870
            if column[0] not in 'ar':
 
2871
                present_in_row = True
 
2872
                break
 
2873
        if not present_in_row:
 
2874
            block.pop(index)
 
2875
            id_index[entry[0][2]].remove(entry[0])
 
2876
 
2655
2877
    def _validate(self):
2656
2878
        """Check that invariants on the dirblock are correct.
2657
2879
 
2938
3160
                           False, DirState.NULLSTAT)
2939
3161
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
2940
3162
    return link_or_sha1
2941
 
update_entry = py_update_entry
2942
3163
 
2943
3164
 
2944
3165
class ProcessEntryPython(object):
2945
3166
 
2946
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
3167
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2947
3168
        "last_source_parent", "last_target_parent", "include_unchanged",
2948
 
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2949
 
        "search_specific_files", "state", "source_index", "target_index",
2950
 
        "want_unversioned", "tree"]
 
3169
        "partial", "use_filesystem_for_exec", "utf8_decode",
 
3170
        "searched_specific_files", "search_specific_files",
 
3171
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
 
3172
        "state", "source_index", "target_index", "want_unversioned", "tree"]
2951
3173
 
2952
3174
    def __init__(self, include_unchanged, use_filesystem_for_exec,
2953
3175
        search_specific_files, state, source_index, target_index,
2954
3176
        want_unversioned, tree):
2955
3177
        self.old_dirname_to_file_id = {}
2956
3178
        self.new_dirname_to_file_id = {}
2957
 
        # Just a sentry, so that _process_entry can say that this
2958
 
        # record is handled, but isn't interesting to process (unchanged)
2959
 
        self.uninteresting = object()
 
3179
        # Are we doing a partial iter_changes?
 
3180
        self.partial = search_specific_files != set([''])
2960
3181
        # Using a list so that we can access the values and change them in
2961
3182
        # nested scope. Each one is [path, file_id, entry]
2962
3183
        self.last_source_parent = [None, None]
2965
3186
        self.use_filesystem_for_exec = use_filesystem_for_exec
2966
3187
        self.utf8_decode = cache_utf8._utf8_decode
2967
3188
        # for all search_indexs in each path at or under each element of
2968
 
        # search_specific_files, if the detail is relocated: add the id, and add the
2969
 
        # relocated path as one to search if its not searched already. If the
2970
 
        # detail is not relocated, add the id.
 
3189
        # search_specific_files, if the detail is relocated: add the id, and
 
3190
        # add the relocated path as one to search if its not searched already.
 
3191
        # If the detail is not relocated, add the id.
2971
3192
        self.searched_specific_files = set()
 
3193
        # When we search exact paths without expanding downwards, we record
 
3194
        # that here.
 
3195
        self.searched_exact_paths = set()
2972
3196
        self.search_specific_files = search_specific_files
 
3197
        # The parents up to the root of the paths we are searching.
 
3198
        # After all normal paths are returned, these specific items are returned.
 
3199
        self.search_specific_file_parents = set()
 
3200
        # The ids we've sent out in the delta.
 
3201
        self.seen_ids = set()
2973
3202
        self.state = state
2974
3203
        self.source_index = source_index
2975
3204
        self.target_index = target_index
 
3205
        if target_index != 0:
 
3206
            # A lot of code in here depends on target_index == 0
 
3207
            raise errors.BzrError('unsupported target index')
2976
3208
        self.want_unversioned = want_unversioned
2977
3209
        self.tree = tree
2978
3210
 
2980
3212
        """Compare an entry and real disk to generate delta information.
2981
3213
 
2982
3214
        :param path_info: top_relpath, basename, kind, lstat, abspath for
2983
 
            the path of entry. If None, then the path is considered absent.
2984
 
            (Perhaps we should pass in a concrete entry for this ?)
 
3215
            the path of entry. If None, then the path is considered absent in 
 
3216
            the target (Perhaps we should pass in a concrete entry for this ?)
2985
3217
            Basename is returned as a utf8 string because we expect this
2986
3218
            tuple will be ignored, and don't want to take the time to
2987
3219
            decode.
2988
 
        :return: None if these don't match
2989
 
                 A tuple of information about the change, or
2990
 
                 the object 'uninteresting' if these match, but are
2991
 
                 basically identical.
 
3220
        :return: (iter_changes_result, changed). If the entry has not been
 
3221
            handled then changed is None. Otherwise it is False if no content
 
3222
            or metadata changes have occurred, and True if any content or
 
3223
            metadata change has occurred. If self.include_unchanged is True then
 
3224
            if changed is not None, iter_changes_result will always be a result
 
3225
            tuple. Otherwise, iter_changes_result is None unless changed is
 
3226
            True.
2992
3227
        """
2993
3228
        if self.source_index is None:
2994
3229
            source_details = DirState.NULL_PARENT_DETAILS
3062
3297
                    if source_minikind != 'f':
3063
3298
                        content_change = True
3064
3299
                    else:
3065
 
                        # If the size is the same, check the sha:
3066
 
                        if target_details[2] == source_details[2]:
3067
 
                            if link_or_sha1 is None:
3068
 
                                # Stat cache miss:
3069
 
                                statvalue, link_or_sha1 = \
3070
 
                                    self.state._sha1_provider.stat_and_sha1(
3071
 
                                    path_info[4])
3072
 
                                self.state._observed_sha1(entry, link_or_sha1,
3073
 
                                    statvalue)
3074
 
                            content_change = (link_or_sha1 != source_details[1])
3075
 
                        else:
3076
 
                            # Size changed, so must be different
3077
 
                            content_change = True
 
3300
                        # Check the sha. We can't just rely on the size as
 
3301
                        # content filtering may mean differ sizes actually
 
3302
                        # map to the same content
 
3303
                        if link_or_sha1 is None:
 
3304
                            # Stat cache miss:
 
3305
                            statvalue, link_or_sha1 = \
 
3306
                                self.state._sha1_provider.stat_and_sha1(
 
3307
                                path_info[4])
 
3308
                            self.state._observed_sha1(entry, link_or_sha1,
 
3309
                                statvalue)
 
3310
                        content_change = (link_or_sha1 != source_details[1])
3078
3311
                    # Target details is updated at update_entry time
3079
3312
                    if self.use_filesystem_for_exec:
3080
3313
                        # We don't need S_ISREG here, because we are sure
3095
3328
                        content_change = False
3096
3329
                    target_exec = False
3097
3330
                else:
3098
 
                    raise Exception, "unknown kind %s" % path_info[2]
 
3331
                    if path is None:
 
3332
                        path = pathjoin(old_dirname, old_basename)
 
3333
                    raise errors.BadFileKindError(path, path_info[2])
3099
3334
            if source_minikind == 'd':
3100
3335
                if path is None:
3101
3336
                    old_path = path = pathjoin(old_dirname, old_basename)
3102
3337
                self.old_dirname_to_file_id[old_path] = file_id
3103
3338
            # parent id is the entry for the path in the target tree
3104
 
            if old_dirname == self.last_source_parent[0]:
 
3339
            if old_basename and old_dirname == self.last_source_parent[0]:
3105
3340
                source_parent_id = self.last_source_parent[1]
3106
3341
            else:
3107
3342
                try:
3117
3352
                    self.last_source_parent[0] = old_dirname
3118
3353
                    self.last_source_parent[1] = source_parent_id
3119
3354
            new_dirname = entry[0][0]
3120
 
            if new_dirname == self.last_target_parent[0]:
 
3355
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
3121
3356
                target_parent_id = self.last_target_parent[1]
3122
3357
            else:
3123
3358
                try:
3140
3375
                    self.last_target_parent[1] = target_parent_id
3141
3376
 
3142
3377
            source_exec = source_details[3]
3143
 
            if (self.include_unchanged
3144
 
                or content_change
 
3378
            changed = (content_change
3145
3379
                or source_parent_id != target_parent_id
3146
3380
                or old_basename != entry[0][1]
3147
3381
                or source_exec != target_exec
3148
 
                ):
 
3382
                )
 
3383
            if not changed and not self.include_unchanged:
 
3384
                return None, False
 
3385
            else:
3149
3386
                if old_path is None:
3150
3387
                    old_path = path = pathjoin(old_dirname, old_basename)
3151
3388
                    old_path_u = self.utf8_decode(old_path)[0]
3164
3401
                       (source_parent_id, target_parent_id),
3165
3402
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3166
3403
                       (source_kind, target_kind),
3167
 
                       (source_exec, target_exec))
3168
 
            else:
3169
 
                return self.uninteresting
 
3404
                       (source_exec, target_exec)), changed
3170
3405
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3171
3406
            # looks like a new file
3172
3407
            path = pathjoin(entry[0][0], entry[0][1])
3193
3428
                       (None, parent_id),
3194
3429
                       (None, self.utf8_decode(entry[0][1])[0]),
3195
3430
                       (None, path_info[2]),
3196
 
                       (None, target_exec))
 
3431
                       (None, target_exec)), True
3197
3432
            else:
3198
3433
                # Its a missing file, report it as such.
3199
3434
                return (entry[0][2],
3203
3438
                       (None, parent_id),
3204
3439
                       (None, self.utf8_decode(entry[0][1])[0]),
3205
3440
                       (None, None),
3206
 
                       (None, False))
 
3441
                       (None, False)), True
3207
3442
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3208
3443
            # unversioned, possibly, or possibly not deleted: we dont care.
3209
3444
            # if its still on disk, *and* theres no other entry at this
3221
3456
                   (parent_id, None),
3222
3457
                   (self.utf8_decode(entry[0][1])[0], None),
3223
3458
                   (DirState._minikind_to_kind[source_minikind], None),
3224
 
                   (source_details[3], None))
 
3459
                   (source_details[3], None)), True
3225
3460
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3226
3461
            # a rename; could be a true rename, or a rename inherited from
3227
3462
            # a renamed parent. TODO: handle this efficiently. Its not
3239
3474
                "source_minikind=%r, target_minikind=%r"
3240
3475
                % (source_minikind, target_minikind))
3241
3476
            ## import pdb;pdb.set_trace()
3242
 
        return None
 
3477
        return None, None
3243
3478
 
3244
3479
    def __iter__(self):
3245
3480
        return self
3246
3481
 
 
3482
    def _gather_result_for_consistency(self, result):
 
3483
        """Check a result we will yield to make sure we are consistent later.
 
3484
        
 
3485
        This gathers result's parents into a set to output later.
 
3486
 
 
3487
        :param result: A result tuple.
 
3488
        """
 
3489
        if not self.partial or not result[0]:
 
3490
            return
 
3491
        self.seen_ids.add(result[0])
 
3492
        new_path = result[1][1]
 
3493
        if new_path:
 
3494
            # Not the root and not a delete: queue up the parents of the path.
 
3495
            self.search_specific_file_parents.update(
 
3496
                osutils.parent_directories(new_path.encode('utf8')))
 
3497
            # Add the root directory which parent_directories does not
 
3498
            # provide.
 
3499
            self.search_specific_file_parents.add('')
 
3500
 
3247
3501
    def iter_changes(self):
3248
3502
        """Iterate over the changes."""
3249
3503
        utf8_decode = cache_utf8._utf8_decode
3250
3504
        _cmp_by_dirs = cmp_by_dirs
3251
3505
        _process_entry = self._process_entry
3252
 
        uninteresting = self.uninteresting
3253
3506
        search_specific_files = self.search_specific_files
3254
3507
        searched_specific_files = self.searched_specific_files
3255
3508
        splitpath = osutils.splitpath
3325
3578
                continue
3326
3579
            path_handled = False
3327
3580
            for entry in root_entries:
3328
 
                result = _process_entry(entry, root_dir_info)
3329
 
                if result is not None:
 
3581
                result, changed = _process_entry(entry, root_dir_info)
 
3582
                if changed is not None:
3330
3583
                    path_handled = True
3331
 
                    if result is not uninteresting:
 
3584
                    if changed:
 
3585
                        self._gather_result_for_consistency(result)
 
3586
                    if changed or self.include_unchanged:
3332
3587
                        yield result
3333
3588
            if self.want_unversioned and not path_handled and root_dir_info:
3334
3589
                new_executable = bool(
3444
3699
                        for current_entry in current_block[1]:
3445
3700
                            # entry referring to file not present on disk.
3446
3701
                            # advance the entry only, after processing.
3447
 
                            result = _process_entry(current_entry, None)
3448
 
                            if result is not None:
3449
 
                                if result is not uninteresting:
 
3702
                            result, changed = _process_entry(current_entry, None)
 
3703
                            if changed is not None:
 
3704
                                if changed:
 
3705
                                    self._gather_result_for_consistency(result)
 
3706
                                if changed or self.include_unchanged:
3450
3707
                                    yield result
3451
3708
                        block_index +=1
3452
3709
                        if (block_index < len(self.state._dirblocks) and
3482
3739
                        pass
3483
3740
                    elif current_path_info is None:
3484
3741
                        # no path is fine: the per entry code will handle it.
3485
 
                        result = _process_entry(current_entry, current_path_info)
3486
 
                        if result is not None:
3487
 
                            if result is not uninteresting:
 
3742
                        result, changed = _process_entry(current_entry, current_path_info)
 
3743
                        if changed is not None:
 
3744
                            if changed:
 
3745
                                self._gather_result_for_consistency(result)
 
3746
                            if changed or self.include_unchanged:
3488
3747
                                yield result
3489
3748
                    elif (current_entry[0][1] != current_path_info[1]
3490
3749
                          or current_entry[1][self.target_index][0] in 'ar'):
3503
3762
                        else:
3504
3763
                            # entry referring to file not present on disk.
3505
3764
                            # advance the entry only, after processing.
3506
 
                            result = _process_entry(current_entry, None)
3507
 
                            if result is not None:
3508
 
                                if result is not uninteresting:
 
3765
                            result, changed = _process_entry(current_entry, None)
 
3766
                            if changed is not None:
 
3767
                                if changed:
 
3768
                                    self._gather_result_for_consistency(result)
 
3769
                                if changed or self.include_unchanged:
3509
3770
                                    yield result
3510
3771
                            advance_path = False
3511
3772
                    else:
3512
 
                        result = _process_entry(current_entry, current_path_info)
3513
 
                        if result is not None:
 
3773
                        result, changed = _process_entry(current_entry, current_path_info)
 
3774
                        if changed is not None:
3514
3775
                            path_handled = True
3515
 
                            if result is not uninteresting:
 
3776
                            if changed:
 
3777
                                self._gather_result_for_consistency(result)
 
3778
                            if changed or self.include_unchanged:
3516
3779
                                yield result
3517
3780
                    if advance_entry and current_entry is not None:
3518
3781
                        entry_index += 1
3577
3840
                        current_dir_info = dir_iterator.next()
3578
3841
                    except StopIteration:
3579
3842
                        current_dir_info = None
3580
 
_process_entry = ProcessEntryPython
 
3843
        for result in self._iter_specific_file_parents():
 
3844
            yield result
 
3845
 
 
3846
    def _iter_specific_file_parents(self):
 
3847
        """Iter over the specific file parents."""
 
3848
        while self.search_specific_file_parents:
 
3849
            # Process the parent directories for the paths we were iterating.
 
3850
            # Even in extremely large trees this should be modest, so currently
 
3851
            # no attempt is made to optimise.
 
3852
            path_utf8 = self.search_specific_file_parents.pop()
 
3853
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
3854
                # We've examined this path.
 
3855
                continue
 
3856
            if path_utf8 in self.searched_exact_paths:
 
3857
                # We've examined this path.
 
3858
                continue
 
3859
            path_entries = self.state._entries_for_path(path_utf8)
 
3860
            # We need either one or two entries. If the path in
 
3861
            # self.target_index has moved (so the entry in source_index is in
 
3862
            # 'ar') then we need to also look for the entry for this path in
 
3863
            # self.source_index, to output the appropriate delete-or-rename.
 
3864
            selected_entries = []
 
3865
            found_item = False
 
3866
            for candidate_entry in path_entries:
 
3867
                # Find entries present in target at this path:
 
3868
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
3869
                    found_item = True
 
3870
                    selected_entries.append(candidate_entry)
 
3871
                # Find entries present in source at this path:
 
3872
                elif (self.source_index is not None and
 
3873
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
3874
                    found_item = True
 
3875
                    if candidate_entry[1][self.target_index][0] == 'a':
 
3876
                        # Deleted, emit it here.
 
3877
                        selected_entries.append(candidate_entry)
 
3878
                    else:
 
3879
                        # renamed, emit it when we process the directory it
 
3880
                        # ended up at.
 
3881
                        self.search_specific_file_parents.add(
 
3882
                            candidate_entry[1][self.target_index][1])
 
3883
            if not found_item:
 
3884
                raise AssertionError(
 
3885
                    "Missing entry for specific path parent %r, %r" % (
 
3886
                    path_utf8, path_entries))
 
3887
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
3888
            for entry in selected_entries:
 
3889
                if entry[0][2] in self.seen_ids:
 
3890
                    continue
 
3891
                result, changed = self._process_entry(entry, path_info)
 
3892
                if changed is None:
 
3893
                    raise AssertionError(
 
3894
                        "Got entry<->path mismatch for specific path "
 
3895
                        "%r entry %r path_info %r " % (
 
3896
                        path_utf8, entry, path_info))
 
3897
                # Only include changes - we're outside the users requested
 
3898
                # expansion.
 
3899
                if changed:
 
3900
                    self._gather_result_for_consistency(result)
 
3901
                    if (result[6][0] == 'directory' and
 
3902
                        result[6][1] != 'directory'):
 
3903
                        # This stopped being a directory, the old children have
 
3904
                        # to be included.
 
3905
                        if entry[1][self.source_index][0] == 'r':
 
3906
                            # renamed, take the source path
 
3907
                            entry_path_utf8 = entry[1][self.source_index][1]
 
3908
                        else:
 
3909
                            entry_path_utf8 = path_utf8
 
3910
                        initial_key = (entry_path_utf8, '', '')
 
3911
                        block_index, _ = self.state._find_block_index_from_key(
 
3912
                            initial_key)
 
3913
                        if block_index == 0:
 
3914
                            # The children of the root are in block index 1.
 
3915
                            block_index +=1
 
3916
                        current_block = None
 
3917
                        if block_index < len(self.state._dirblocks):
 
3918
                            current_block = self.state._dirblocks[block_index]
 
3919
                            if not osutils.is_inside(
 
3920
                                entry_path_utf8, current_block[0]):
 
3921
                                # No entries for this directory at all.
 
3922
                                current_block = None
 
3923
                        if current_block is not None:
 
3924
                            for entry in current_block[1]:
 
3925
                                if entry[1][self.source_index][0] in 'ar':
 
3926
                                    # Not in the source tree, so doesn't have to be
 
3927
                                    # included.
 
3928
                                    continue
 
3929
                                # Path of the entry itself.
 
3930
 
 
3931
                                self.search_specific_file_parents.add(
 
3932
                                    osutils.pathjoin(*entry[0][:2]))
 
3933
                if changed or self.include_unchanged:
 
3934
                    yield result
 
3935
            self.searched_exact_paths.add(path_utf8)
 
3936
 
 
3937
    def _path_info(self, utf8_path, unicode_path):
 
3938
        """Generate path_info for unicode_path.
 
3939
 
 
3940
        :return: None if unicode_path does not exist, or a path_info tuple.
 
3941
        """
 
3942
        abspath = self.tree.abspath(unicode_path)
 
3943
        try:
 
3944
            stat = os.lstat(abspath)
 
3945
        except OSError, e:
 
3946
            if e.errno == errno.ENOENT:
 
3947
                # the path does not exist.
 
3948
                return None
 
3949
            else:
 
3950
                raise
 
3951
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
3952
        dir_info = (utf8_path, utf8_basename,
 
3953
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
3954
            abspath)
 
3955
        if dir_info[2] == 'directory':
 
3956
            if self.tree._directory_is_tree_reference(
 
3957
                unicode_path):
 
3958
                self.root_dir_info = self.root_dir_info[:2] + \
 
3959
                    ('tree-reference',) + self.root_dir_info[3:]
 
3960
        return dir_info
3581
3961
 
3582
3962
 
3583
3963
# Try to load the compiled form if possible
3584
3964
try:
3585
 
    from bzrlib._dirstate_helpers_c import (
3586
 
        _read_dirblocks_c as _read_dirblocks,
3587
 
        bisect_dirblock_c as bisect_dirblock,
3588
 
        _bisect_path_left_c as _bisect_path_left,
3589
 
        _bisect_path_right_c as _bisect_path_right,
3590
 
        cmp_by_dirs_c as cmp_by_dirs,
 
3965
    from bzrlib._dirstate_helpers_pyx import (
 
3966
        _read_dirblocks,
 
3967
        bisect_dirblock,
 
3968
        _bisect_path_left,
 
3969
        _bisect_path_right,
 
3970
        cmp_by_dirs,
3591
3971
        ProcessEntryC as _process_entry,
3592
3972
        update_entry as update_entry,
3593
3973
        )
3594
3974
except ImportError:
3595
3975
    from bzrlib._dirstate_helpers_py import (
3596
 
        _read_dirblocks_py as _read_dirblocks,
3597
 
        bisect_dirblock_py as bisect_dirblock,
3598
 
        _bisect_path_left_py as _bisect_path_left,
3599
 
        _bisect_path_right_py as _bisect_path_right,
3600
 
        cmp_by_dirs_py as cmp_by_dirs,
 
3976
        _read_dirblocks,
 
3977
        bisect_dirblock,
 
3978
        _bisect_path_left,
 
3979
        _bisect_path_right,
 
3980
        cmp_by_dirs,
3601
3981
        )
 
3982
    # FIXME: It would be nice to be able to track moved lines so that the
 
3983
    # corresponding python code can be moved to the _dirstate_helpers_py
 
3984
    # module. I don't want to break the history for this important piece of
 
3985
    # code so I left the code here -- vila 20090622
 
3986
    update_entry = py_update_entry
 
3987
    _process_entry = ProcessEntryPython