~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

merge 2.0 branch rev 4647

Show diffs side-by-side

added added

removed removed

Lines of Context:
204
204
import bisect
205
205
import binascii
206
206
import errno
 
207
import operator
207
208
import os
208
209
from stat import S_IEXEC
209
210
import stat
1277
1278
    def update_by_delta(self, delta):
1278
1279
        """Apply an inventory delta to the dirstate for tree 0
1279
1280
 
 
1281
        This is the workhorse for apply_inventory_delta in dirstate based
 
1282
        trees.
 
1283
 
1280
1284
        :param delta: An inventory delta.  See Inventory.apply_delta for
1281
1285
            details.
1282
1286
        """
1283
1287
        self._read_dirblocks_if_needed()
 
1288
        encode = cache_utf8.encode
1284
1289
        insertions = {}
1285
1290
        removals = {}
1286
 
        for old_path, new_path, file_id, inv_entry in sorted(delta, reverse=True):
 
1291
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1292
        # items or items placed under files/links/tree-references. We get
 
1293
        # references from every item in the delta that is not a deletion and
 
1294
        # is not itself the root.
 
1295
        parents = set()
 
1296
        # Added ids must not be in the dirstate already. This set holds those
 
1297
        # ids.
 
1298
        new_ids = set()
 
1299
        # This loop transforms the delta to single atomic operations that can
 
1300
        # be executed and validated.
 
1301
        for old_path, new_path, file_id, inv_entry in sorted(
 
1302
            inventory._check_delta_unique_old_paths(
 
1303
            inventory._check_delta_unique_new_paths(
 
1304
            inventory._check_delta_ids_match_entry(
 
1305
            inventory._check_delta_ids_are_valid(
 
1306
            inventory._check_delta_new_path_entry_both_or_None(delta))))),
 
1307
            reverse=True):
1287
1308
            if (file_id in insertions) or (file_id in removals):
1288
 
                raise AssertionError("repeated file id in delta %r" % (file_id,))
 
1309
                raise errors.InconsistentDelta(old_path or new_path, file_id,
 
1310
                    "repeated file_id")
1289
1311
            if old_path is not None:
1290
1312
                old_path = old_path.encode('utf-8')
1291
1313
                removals[file_id] = old_path
 
1314
            else:
 
1315
                new_ids.add(file_id)
1292
1316
            if new_path is not None:
 
1317
                if inv_entry is None:
 
1318
                    raise errors.InconsistentDelta(new_path, file_id,
 
1319
                        "new_path with no entry")
1293
1320
                new_path = new_path.encode('utf-8')
1294
 
                dirname, basename = osutils.split(new_path)
1295
 
                key = (dirname, basename, file_id)
 
1321
                dirname_utf8, basename = osutils.split(new_path)
 
1322
                if basename:
 
1323
                    parents.add((dirname_utf8, inv_entry.parent_id))
 
1324
                key = (dirname_utf8, basename, file_id)
1296
1325
                minikind = DirState._kind_to_minikind[inv_entry.kind]
1297
1326
                if minikind == 't':
1298
1327
                    fingerprint = inv_entry.reference_revision
1320
1349
                                                  child_basename)
1321
1350
                    insertions[child[0][2]] = (key, minikind, executable,
1322
1351
                                               fingerprint, new_child_path)
1323
 
        self._apply_removals(removals.values())
1324
 
        self._apply_insertions(insertions.values())
 
1352
        self._check_delta_ids_absent(new_ids, delta, 0)
 
1353
        try:
 
1354
            self._apply_removals(removals.iteritems())
 
1355
            self._apply_insertions(insertions.values())
 
1356
            # Validate parents
 
1357
            self._after_delta_check_parents(parents, 0)
 
1358
        except errors.BzrError, e:
 
1359
            self._changes_aborted = True
 
1360
            if 'integrity error' not in str(e):
 
1361
                raise
 
1362
            # _get_entry raises BzrError when a request is inconsistent; we
 
1363
            # want such errors to be shown as InconsistentDelta - and that 
 
1364
            # fits the behaviour we trigger.
 
1365
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1325
1366
 
1326
1367
    def _apply_removals(self, removals):
1327
 
        for path in sorted(removals, reverse=True):
 
1368
        for file_id, path in sorted(removals, reverse=True,
 
1369
            key=operator.itemgetter(1)):
1328
1370
            dirname, basename = osutils.split(path)
1329
1371
            block_i, entry_i, d_present, f_present = \
1330
1372
                self._get_block_entry_index(dirname, basename, 0)
1331
 
            entry = self._dirblocks[block_i][1][entry_i]
 
1373
            try:
 
1374
                entry = self._dirblocks[block_i][1][entry_i]
 
1375
            except IndexError:
 
1376
                self._changes_aborted = True
 
1377
                raise errors.InconsistentDelta(path, file_id,
 
1378
                    "Wrong path for old path.")
 
1379
            if not f_present or entry[1][0][0] in 'ar':
 
1380
                self._changes_aborted = True
 
1381
                raise errors.InconsistentDelta(path, file_id,
 
1382
                    "Wrong path for old path.")
 
1383
            if file_id != entry[0][2]:
 
1384
                self._changes_aborted = True
 
1385
                raise errors.InconsistentDelta(path, file_id,
 
1386
                    "Attempt to remove path has wrong id - found %r."
 
1387
                    % entry[0][2])
1332
1388
            self._make_absent(entry)
1333
1389
            # See if we have a malformed delta: deleting a directory must not
1334
1390
            # leave crud behind. This increases the number of bisects needed
1342
1398
                # be due to it being in a parent tree, or a corrupt delta.
1343
1399
                for child_entry in self._dirblocks[block_i][1]:
1344
1400
                    if child_entry[1][0][0] not in ('r', 'a'):
 
1401
                        self._changes_aborted = True
1345
1402
                        raise errors.InconsistentDelta(path, entry[0][2],
1346
1403
                            "The file id was deleted but its children were "
1347
1404
                            "not deleted.")
1348
1405
 
1349
1406
    def _apply_insertions(self, adds):
1350
 
        for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
1351
 
            self.update_minimal(key, minikind, executable, fingerprint,
1352
 
                                path_utf8=path_utf8)
 
1407
        try:
 
1408
            for key, minikind, executable, fingerprint, path_utf8 in sorted(adds):
 
1409
                self.update_minimal(key, minikind, executable, fingerprint,
 
1410
                                    path_utf8=path_utf8)
 
1411
        except errors.NotVersionedError:
 
1412
            self._changes_aborted = True
 
1413
            raise errors.InconsistentDelta(path_utf8.decode('utf8'), key[2],
 
1414
                "Missing parent")
1353
1415
 
1354
1416
    def update_basis_by_delta(self, delta, new_revid):
1355
1417
        """Update the parents of this tree after a commit.
1399
1461
        # At the same time, to reduce interface friction we convert the input
1400
1462
        # inventory entries to dirstate.
1401
1463
        root_only = ('', '')
 
1464
        # Accumulate parent references (path_utf8, id), to check for parentless
 
1465
        # items or items placed under files/links/tree-references. We get
 
1466
        # references from every item in the delta that is not a deletion and
 
1467
        # is not itself the root.
 
1468
        parents = set()
 
1469
        # Added ids must not be in the dirstate already. This set holds those
 
1470
        # ids.
 
1471
        new_ids = set()
1402
1472
        for old_path, new_path, file_id, inv_entry in delta:
 
1473
            if inv_entry is not None and file_id != inv_entry.file_id:
 
1474
                raise errors.InconsistentDelta(new_path, file_id,
 
1475
                    "mismatched entry file_id %r" % inv_entry)
 
1476
            if new_path is not None:
 
1477
                if inv_entry is None:
 
1478
                    raise errors.InconsistentDelta(new_path, file_id,
 
1479
                        "new_path with no entry")
 
1480
                new_path_utf8 = encode(new_path)
 
1481
                # note the parent for validation
 
1482
                dirname_utf8, basename_utf8 = osutils.split(new_path_utf8)
 
1483
                if basename_utf8:
 
1484
                    parents.add((dirname_utf8, inv_entry.parent_id))
1403
1485
            if old_path is None:
1404
1486
                adds.append((None, encode(new_path), file_id,
1405
1487
                    inv_to_entry(inv_entry), True))
 
1488
                new_ids.add(file_id)
1406
1489
            elif new_path is None:
1407
1490
                deletes.append((encode(old_path), None, file_id, None, True))
1408
1491
            elif (old_path, new_path) != root_only:
1420
1503
                # for 'r' items on every pass.
1421
1504
                self._update_basis_apply_deletes(deletes)
1422
1505
                deletes = []
1423
 
                new_path_utf8 = encode(new_path)
1424
1506
                # Split into an add/delete pair recursively.
1425
1507
                adds.append((None, new_path_utf8, file_id,
1426
1508
                    inv_to_entry(inv_entry), False))
1452
1534
                # of everything.
1453
1535
                changes.append((encode(old_path), encode(new_path), file_id,
1454
1536
                    inv_to_entry(inv_entry)))
1455
 
 
1456
 
        # Finish expunging deletes/first half of renames.
1457
 
        self._update_basis_apply_deletes(deletes)
1458
 
        # Reinstate second half of renames and new paths.
1459
 
        self._update_basis_apply_adds(adds)
1460
 
        # Apply in-situ changes.
1461
 
        self._update_basis_apply_changes(changes)
 
1537
        self._check_delta_ids_absent(new_ids, delta, 1)
 
1538
        try:
 
1539
            # Finish expunging deletes/first half of renames.
 
1540
            self._update_basis_apply_deletes(deletes)
 
1541
            # Reinstate second half of renames and new paths.
 
1542
            self._update_basis_apply_adds(adds)
 
1543
            # Apply in-situ changes.
 
1544
            self._update_basis_apply_changes(changes)
 
1545
            # Validate parents
 
1546
            self._after_delta_check_parents(parents, 1)
 
1547
        except errors.BzrError, e:
 
1548
            self._changes_aborted = True
 
1549
            if 'integrity error' not in str(e):
 
1550
                raise
 
1551
            # _get_entry raises BzrError when a request is inconsistent; we
 
1552
            # want such errors to be shown as InconsistentDelta - and that 
 
1553
            # fits the behaviour we trigger. Partof this is driven by dirstate
 
1554
            # only supporting deltas that turn the basis into a closer fit to
 
1555
            # the active tree.
 
1556
            raise errors.InconsistentDeltaDelta(delta, "error from _get_entry.")
1462
1557
 
1463
1558
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
1464
1559
        self._header_state = DirState.IN_MEMORY_MODIFIED
1465
1560
        self._id_index = None
1466
1561
        return
1467
1562
 
 
1563
    def _check_delta_ids_absent(self, new_ids, delta, tree_index):
 
1564
        """Check that none of the file_ids in new_ids are present in a tree."""
 
1565
        if not new_ids:
 
1566
            return
 
1567
        id_index = self._get_id_index()
 
1568
        for file_id in new_ids:
 
1569
            for key in id_index.get(file_id, []):
 
1570
                block_i, entry_i, d_present, f_present = \
 
1571
                    self._get_block_entry_index(key[0], key[1], tree_index)
 
1572
                if not f_present:
 
1573
                    # In a different tree
 
1574
                    continue
 
1575
                entry = self._dirblocks[block_i][1][entry_i]
 
1576
                if entry[0][2] != file_id:
 
1577
                    # Different file_id, so not what we want.
 
1578
                    continue
 
1579
                # NB: No changes made before this helper is called, so no need
 
1580
                # to set the _changes_aborted flag.
 
1581
                raise errors.InconsistentDelta(
 
1582
                    ("%s/%s" % key[0:2]).decode('utf8'), file_id,
 
1583
                    "This file_id is new in the delta but already present in "
 
1584
                    "the target")
 
1585
 
1468
1586
    def _update_basis_apply_adds(self, adds):
1469
1587
        """Apply a sequence of adds to tree 1 during update_basis_by_delta.
1470
1588
 
1535
1653
        null = DirState.NULL_PARENT_DETAILS
1536
1654
        for old_path, new_path, file_id, _, real_delete in deletes:
1537
1655
            if real_delete != (new_path is None):
 
1656
                self._changes_aborted = True
1538
1657
                raise AssertionError("bad delete delta")
1539
1658
            # the entry for this file_id must be in tree 1.
1540
1659
            dirname, basename = osutils.split(old_path)
1573
1692
                    # it is being resurrected here, so blank it out temporarily.
1574
1693
                    self._dirblocks[block_index][1][entry_index][1][1] = null
1575
1694
 
 
1695
    def _after_delta_check_parents(self, parents, index):
 
1696
        """Check that parents required by the delta are all intact.
 
1697
        
 
1698
        :param parents: An iterable of (path_utf8, file_id) tuples which are
 
1699
            required to be present in tree 'index' at path_utf8 with id file_id
 
1700
            and be a directory.
 
1701
        :param index: The column in the dirstate to check for parents in.
 
1702
        """
 
1703
        for dirname_utf8, file_id in parents:
 
1704
            # Get the entry - the ensures that file_id, dirname_utf8 exists and
 
1705
            # has the right file id.
 
1706
            entry = self._get_entry(index, file_id, dirname_utf8)
 
1707
            if entry[1] is None:
 
1708
                self._changes_aborted = True
 
1709
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1710
                    file_id, "This parent is not present.")
 
1711
            # Parents of things must be directories
 
1712
            if entry[1][index][0] != 'd':
 
1713
                self._changes_aborted = True
 
1714
                raise errors.InconsistentDelta(dirname_utf8.decode('utf8'),
 
1715
                    file_id, "This parent is not a directory.")
 
1716
 
1576
1717
    def _observed_sha1(self, entry, sha1, stat_value,
1577
1718
        _stat_to_minikind=_stat_to_minikind, _pack_stat=pack_stat):
1578
1719
        """Note the sha1 of a file.
1821
1962
        self._read_dirblocks_if_needed()
1822
1963
        if path_utf8 is not None:
1823
1964
            if type(path_utf8) is not str:
1824
 
                raise AssertionError('path_utf8 is not a str: %s %s'
 
1965
                raise errors.BzrError('path_utf8 is not a str: %s %r'
1825
1966
                    % (type(path_utf8), path_utf8))
1826
1967
            # path lookups are faster
1827
1968
            dirname, basename = osutils.split(path_utf8)
2379
2520
        if 'evil' in debug.debug_flags:
2380
2521
            trace.mutter_callsite(1,
2381
2522
                "set_state_from_inventory called; please mutate the tree instead")
 
2523
        tracing = 'dirstate' in debug.debug_flags
 
2524
        if tracing:
 
2525
            trace.mutter("set_state_from_inventory trace:")
2382
2526
        self._read_dirblocks_if_needed()
2383
2527
        # sketch:
2384
2528
        # Two iterators: current data and new data, both in dirblock order.
2393
2537
        new_iterator = new_inv.iter_entries_by_dir()
2394
2538
        # we will be modifying the dirstate, so we need a stable iterator. In
2395
2539
        # future we might write one, for now we just clone the state into a
2396
 
        # list - which is a shallow copy.
 
2540
        # list using a copy so that we see every original item and don't have
 
2541
        # to adjust the position when items are inserted or deleted in the
 
2542
        # underlying dirstate.
2397
2543
        old_iterator = iter(list(self._iter_entries()))
2398
2544
        # both must have roots so this is safe:
2399
2545
        current_new = new_iterator.next()
2433
2579
            # we make both end conditions explicit
2434
2580
            if not current_old:
2435
2581
                # old is finished: insert current_new into the state.
 
2582
                if tracing:
 
2583
                    trace.mutter("Appending from new '%s'.",
 
2584
                        new_path_utf8.decode('utf8'))
2436
2585
                self.update_minimal(new_entry_key, current_new_minikind,
2437
2586
                    executable=current_new[1].executable,
2438
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2587
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2588
                    fullscan=True)
2439
2589
                current_new = advance(new_iterator)
2440
2590
            elif not current_new:
2441
2591
                # new is finished
 
2592
                if tracing:
 
2593
                    trace.mutter("Truncating from old '%s/%s'.",
 
2594
                        current_old[0][0].decode('utf8'),
 
2595
                        current_old[0][1].decode('utf8'))
2442
2596
                self._make_absent(current_old)
2443
2597
                current_old = advance(old_iterator)
2444
2598
            elif new_entry_key == current_old[0]:
2451
2605
                # kind has changed.
2452
2606
                if (current_old[1][0][3] != current_new[1].executable or
2453
2607
                    current_old[1][0][0] != current_new_minikind):
 
2608
                    if tracing:
 
2609
                        trace.mutter("Updating in-place change '%s'.",
 
2610
                            new_path_utf8.decode('utf8'))
2454
2611
                    self.update_minimal(current_old[0], current_new_minikind,
2455
2612
                        executable=current_new[1].executable,
2456
 
                        path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2613
                        path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2614
                        fullscan=True)
2457
2615
                # both sides are dealt with, move on
2458
2616
                current_old = advance(old_iterator)
2459
2617
                current_new = advance(new_iterator)
2462
2620
                      and new_entry_key[1:] < current_old[0][1:])):
2463
2621
                # new comes before:
2464
2622
                # add a entry for this and advance new
 
2623
                if tracing:
 
2624
                    trace.mutter("Inserting from new '%s'.",
 
2625
                        new_path_utf8.decode('utf8'))
2465
2626
                self.update_minimal(new_entry_key, current_new_minikind,
2466
2627
                    executable=current_new[1].executable,
2467
 
                    path_utf8=new_path_utf8, fingerprint=fingerprint)
 
2628
                    path_utf8=new_path_utf8, fingerprint=fingerprint,
 
2629
                    fullscan=True)
2468
2630
                current_new = advance(new_iterator)
2469
2631
            else:
2470
2632
                # we've advanced past the place where the old key would be,
2471
2633
                # without seeing it in the new list.  so it must be gone.
 
2634
                if tracing:
 
2635
                    trace.mutter("Deleting from old '%s/%s'.",
 
2636
                        current_old[0][0].decode('utf8'),
 
2637
                        current_old[0][1].decode('utf8'))
2472
2638
                self._make_absent(current_old)
2473
2639
                current_old = advance(old_iterator)
2474
2640
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2475
2641
        self._id_index = None
2476
2642
        self._packed_stat_index = None
 
2643
        if tracing:
 
2644
            trace.mutter("set_state_from_inventory complete.")
2477
2645
 
2478
2646
    def _make_absent(self, current_old):
2479
2647
        """Mark current_old - an entry - as absent for tree 0.
2528
2696
        return last_reference
2529
2697
 
2530
2698
    def update_minimal(self, key, minikind, executable=False, fingerprint='',
2531
 
                       packed_stat=None, size=0, path_utf8=None):
 
2699
        packed_stat=None, size=0, path_utf8=None, fullscan=False):
2532
2700
        """Update an entry to the state in tree 0.
2533
2701
 
2534
2702
        This will either create a new entry at 'key' or update an existing one.
2545
2713
        :param size: Size information for new entry
2546
2714
        :param path_utf8: key[0] + '/' + key[1], just passed in to avoid doing
2547
2715
                extra computation.
 
2716
        :param fullscan: If True then a complete scan of the dirstate is being
 
2717
            done and checking for duplicate rows should not be done. This
 
2718
            should only be set by set_state_from_inventory and similar methods.
2548
2719
 
2549
2720
        If packed_stat and fingerprint are not given, they're invalidated in
2550
2721
        the entry.
2559
2730
        new_details = (minikind, fingerprint, size, executable, packed_stat)
2560
2731
        id_index = self._get_id_index()
2561
2732
        if not present:
 
2733
            # New record. Check there isn't a entry at this path already.
 
2734
            if not fullscan:
 
2735
                low_index, _ = self._find_entry_index(key[0:2] + ('',), block)
 
2736
                while low_index < len(block):
 
2737
                    entry = block[low_index]
 
2738
                    if entry[0][0:2] == key[0:2]:
 
2739
                        if entry[1][0][0] not in 'ar':
 
2740
                            # This entry has the same path (but a different id) as
 
2741
                            # the new entry we're adding, and is present in ths
 
2742
                            # tree.
 
2743
                            raise errors.InconsistentDelta(
 
2744
                                ("%s/%s" % key[0:2]).decode('utf8'), key[2],
 
2745
                                "Attempt to add item at path already occupied by "
 
2746
                                "id %r" % entry[0][2])
 
2747
                        low_index += 1
 
2748
                    else:
 
2749
                        break
2562
2750
            # new entry, synthesis cross reference here,
2563
2751
            existing_keys = id_index.setdefault(key[2], set())
2564
2752
            if not existing_keys:
2569
2757
                # grab one of them and use it to generate parent
2570
2758
                # relocation/absent entries.
2571
2759
                new_entry = key, [new_details]
2572
 
                for other_key in existing_keys:
 
2760
                # existing_keys can be changed as we iterate.
 
2761
                for other_key in tuple(existing_keys):
2573
2762
                    # change the record at other to be a pointer to this new
2574
2763
                    # record. The loop looks similar to the change to
2575
2764
                    # relocations when updating an existing record but its not:
2576
2765
                    # the test for existing kinds is different: this can be
2577
2766
                    # factored out to a helper though.
2578
 
                    other_block_index, present = self._find_block_index_from_key(other_key)
2579
 
                    if not present:
2580
 
                        raise AssertionError('could not find block for %s' % (other_key,))
2581
 
                    other_entry_index, present = self._find_entry_index(other_key,
2582
 
                                            self._dirblocks[other_block_index][1])
2583
 
                    if not present:
2584
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2767
                    other_block_index, present = self._find_block_index_from_key(
 
2768
                        other_key)
 
2769
                    if not present:
 
2770
                        raise AssertionError('could not find block for %s' % (
 
2771
                            other_key,))
 
2772
                    other_block = self._dirblocks[other_block_index][1]
 
2773
                    other_entry_index, present = self._find_entry_index(
 
2774
                        other_key, other_block)
 
2775
                    if not present:
 
2776
                        raise AssertionError(
 
2777
                            'update_minimal: could not find other entry for %s'
 
2778
                            % (other_key,))
2585
2779
                    if path_utf8 is None:
2586
2780
                        raise AssertionError('no path')
2587
 
                    self._dirblocks[other_block_index][1][other_entry_index][1][0] = \
2588
 
                        ('r', path_utf8, 0, False, '')
 
2781
                    # Turn this other location into a reference to the new
 
2782
                    # location. This also updates the aliased iterator
 
2783
                    # (current_old in set_state_from_inventory) so that the old
 
2784
                    # entry, if not already examined, is skipped over by that
 
2785
                    # loop.
 
2786
                    other_entry = other_block[other_entry_index]
 
2787
                    other_entry[1][0] = ('r', path_utf8, 0, False, '')
 
2788
                    self._maybe_remove_row(other_block, other_entry_index,
 
2789
                        id_index)
2589
2790
 
 
2791
                # This loop:
 
2792
                # adds a tuple to the new details for each column
 
2793
                #  - either by copying an existing relocation pointer inside that column
 
2794
                #  - or by creating a new pointer to the right row inside that column
2590
2795
                num_present_parents = self._num_present_parents()
 
2796
                if num_present_parents:
 
2797
                    other_key = list(existing_keys)[0]
2591
2798
                for lookup_index in xrange(1, num_present_parents + 1):
2592
2799
                    # grab any one entry, use it to find the right path.
2593
2800
                    # TODO: optimise this to reduce memory use in highly
2600
2807
                    update_entry_index, present = \
2601
2808
                        self._find_entry_index(other_key, self._dirblocks[update_block_index][1])
2602
2809
                    if not present:
2603
 
                        raise AssertionError('could not find entry for %s' % (other_key,))
 
2810
                        raise AssertionError('update_minimal: could not find entry for %s' % (other_key,))
2604
2811
                    update_details = self._dirblocks[update_block_index][1][update_entry_index][1][lookup_index]
2605
2812
                    if update_details[0] in 'ar': # relocated, absent
2606
2813
                        # its a pointer or absent in lookup_index's tree, use
2652
2859
 
2653
2860
        self._dirblock_state = DirState.IN_MEMORY_MODIFIED
2654
2861
 
 
2862
    def _maybe_remove_row(self, block, index, id_index):
 
2863
        """Remove index if it is absent or relocated across the row.
 
2864
        
 
2865
        id_index is updated accordingly.
 
2866
        """
 
2867
        present_in_row = False
 
2868
        entry = block[index]
 
2869
        for column in entry[1]:
 
2870
            if column[0] not in 'ar':
 
2871
                present_in_row = True
 
2872
                break
 
2873
        if not present_in_row:
 
2874
            block.pop(index)
 
2875
            id_index[entry[0][2]].remove(entry[0])
 
2876
 
2655
2877
    def _validate(self):
2656
2878
        """Check that invariants on the dirblock are correct.
2657
2879
 
2938
3160
                           False, DirState.NULLSTAT)
2939
3161
    state._dirblock_state = DirState.IN_MEMORY_MODIFIED
2940
3162
    return link_or_sha1
2941
 
update_entry = py_update_entry
2942
3163
 
2943
3164
 
2944
3165
class ProcessEntryPython(object):
2945
3166
 
2946
 
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id", "uninteresting",
 
3167
    __slots__ = ["old_dirname_to_file_id", "new_dirname_to_file_id",
2947
3168
        "last_source_parent", "last_target_parent", "include_unchanged",
2948
 
        "use_filesystem_for_exec", "utf8_decode", "searched_specific_files",
2949
 
        "search_specific_files", "state", "source_index", "target_index",
2950
 
        "want_unversioned", "tree"]
 
3169
        "partial", "use_filesystem_for_exec", "utf8_decode",
 
3170
        "searched_specific_files", "search_specific_files",
 
3171
        "searched_exact_paths", "search_specific_file_parents", "seen_ids",
 
3172
        "state", "source_index", "target_index", "want_unversioned", "tree"]
2951
3173
 
2952
3174
    def __init__(self, include_unchanged, use_filesystem_for_exec,
2953
3175
        search_specific_files, state, source_index, target_index,
2954
3176
        want_unversioned, tree):
2955
3177
        self.old_dirname_to_file_id = {}
2956
3178
        self.new_dirname_to_file_id = {}
2957
 
        # Just a sentry, so that _process_entry can say that this
2958
 
        # record is handled, but isn't interesting to process (unchanged)
2959
 
        self.uninteresting = object()
 
3179
        # Are we doing a partial iter_changes?
 
3180
        self.partial = search_specific_files != set([''])
2960
3181
        # Using a list so that we can access the values and change them in
2961
3182
        # nested scope. Each one is [path, file_id, entry]
2962
3183
        self.last_source_parent = [None, None]
2965
3186
        self.use_filesystem_for_exec = use_filesystem_for_exec
2966
3187
        self.utf8_decode = cache_utf8._utf8_decode
2967
3188
        # for all search_indexs in each path at or under each element of
2968
 
        # search_specific_files, if the detail is relocated: add the id, and add the
2969
 
        # relocated path as one to search if its not searched already. If the
2970
 
        # detail is not relocated, add the id.
 
3189
        # search_specific_files, if the detail is relocated: add the id, and
 
3190
        # add the relocated path as one to search if its not searched already.
 
3191
        # If the detail is not relocated, add the id.
2971
3192
        self.searched_specific_files = set()
 
3193
        # When we search exact paths without expanding downwards, we record
 
3194
        # that here.
 
3195
        self.searched_exact_paths = set()
2972
3196
        self.search_specific_files = search_specific_files
 
3197
        # The parents up to the root of the paths we are searching.
 
3198
        # After all normal paths are returned, these specific items are returned.
 
3199
        self.search_specific_file_parents = set()
 
3200
        # The ids we've sent out in the delta.
 
3201
        self.seen_ids = set()
2973
3202
        self.state = state
2974
3203
        self.source_index = source_index
2975
3204
        self.target_index = target_index
 
3205
        if target_index != 0:
 
3206
            # A lot of code in here depends on target_index == 0
 
3207
            raise errors.BzrError('unsupported target index')
2976
3208
        self.want_unversioned = want_unversioned
2977
3209
        self.tree = tree
2978
3210
 
2980
3212
        """Compare an entry and real disk to generate delta information.
2981
3213
 
2982
3214
        :param path_info: top_relpath, basename, kind, lstat, abspath for
2983
 
            the path of entry. If None, then the path is considered absent.
2984
 
            (Perhaps we should pass in a concrete entry for this ?)
 
3215
            the path of entry. If None, then the path is considered absent in 
 
3216
            the target (Perhaps we should pass in a concrete entry for this ?)
2985
3217
            Basename is returned as a utf8 string because we expect this
2986
3218
            tuple will be ignored, and don't want to take the time to
2987
3219
            decode.
2988
 
        :return: None if these don't match
2989
 
                 A tuple of information about the change, or
2990
 
                 the object 'uninteresting' if these match, but are
2991
 
                 basically identical.
 
3220
        :return: (iter_changes_result, changed). If the entry has not been
 
3221
            handled then changed is None. Otherwise it is False if no content
 
3222
            or metadata changes have occurred, and True if any content or
 
3223
            metadata change has occurred. If self.include_unchanged is True then
 
3224
            if changed is not None, iter_changes_result will always be a result
 
3225
            tuple. Otherwise, iter_changes_result is None unless changed is
 
3226
            True.
2992
3227
        """
2993
3228
        if self.source_index is None:
2994
3229
            source_details = DirState.NULL_PARENT_DETAILS
3099
3334
                    old_path = path = pathjoin(old_dirname, old_basename)
3100
3335
                self.old_dirname_to_file_id[old_path] = file_id
3101
3336
            # parent id is the entry for the path in the target tree
3102
 
            if old_dirname == self.last_source_parent[0]:
 
3337
            if old_basename and old_dirname == self.last_source_parent[0]:
3103
3338
                source_parent_id = self.last_source_parent[1]
3104
3339
            else:
3105
3340
                try:
3115
3350
                    self.last_source_parent[0] = old_dirname
3116
3351
                    self.last_source_parent[1] = source_parent_id
3117
3352
            new_dirname = entry[0][0]
3118
 
            if new_dirname == self.last_target_parent[0]:
 
3353
            if entry[0][1] and new_dirname == self.last_target_parent[0]:
3119
3354
                target_parent_id = self.last_target_parent[1]
3120
3355
            else:
3121
3356
                try:
3138
3373
                    self.last_target_parent[1] = target_parent_id
3139
3374
 
3140
3375
            source_exec = source_details[3]
3141
 
            if (self.include_unchanged
3142
 
                or content_change
 
3376
            changed = (content_change
3143
3377
                or source_parent_id != target_parent_id
3144
3378
                or old_basename != entry[0][1]
3145
3379
                or source_exec != target_exec
3146
 
                ):
 
3380
                )
 
3381
            if not changed and not self.include_unchanged:
 
3382
                return None, False
 
3383
            else:
3147
3384
                if old_path is None:
3148
3385
                    old_path = path = pathjoin(old_dirname, old_basename)
3149
3386
                    old_path_u = self.utf8_decode(old_path)[0]
3162
3399
                       (source_parent_id, target_parent_id),
3163
3400
                       (self.utf8_decode(old_basename)[0], self.utf8_decode(entry[0][1])[0]),
3164
3401
                       (source_kind, target_kind),
3165
 
                       (source_exec, target_exec))
3166
 
            else:
3167
 
                return self.uninteresting
 
3402
                       (source_exec, target_exec)), changed
3168
3403
        elif source_minikind in 'a' and target_minikind in 'fdlt':
3169
3404
            # looks like a new file
3170
3405
            path = pathjoin(entry[0][0], entry[0][1])
3191
3426
                       (None, parent_id),
3192
3427
                       (None, self.utf8_decode(entry[0][1])[0]),
3193
3428
                       (None, path_info[2]),
3194
 
                       (None, target_exec))
 
3429
                       (None, target_exec)), True
3195
3430
            else:
3196
3431
                # Its a missing file, report it as such.
3197
3432
                return (entry[0][2],
3201
3436
                       (None, parent_id),
3202
3437
                       (None, self.utf8_decode(entry[0][1])[0]),
3203
3438
                       (None, None),
3204
 
                       (None, False))
 
3439
                       (None, False)), True
3205
3440
        elif source_minikind in 'fdlt' and target_minikind in 'a':
3206
3441
            # unversioned, possibly, or possibly not deleted: we dont care.
3207
3442
            # if its still on disk, *and* theres no other entry at this
3219
3454
                   (parent_id, None),
3220
3455
                   (self.utf8_decode(entry[0][1])[0], None),
3221
3456
                   (DirState._minikind_to_kind[source_minikind], None),
3222
 
                   (source_details[3], None))
 
3457
                   (source_details[3], None)), True
3223
3458
        elif source_minikind in 'fdlt' and target_minikind in 'r':
3224
3459
            # a rename; could be a true rename, or a rename inherited from
3225
3460
            # a renamed parent. TODO: handle this efficiently. Its not
3237
3472
                "source_minikind=%r, target_minikind=%r"
3238
3473
                % (source_minikind, target_minikind))
3239
3474
            ## import pdb;pdb.set_trace()
3240
 
        return None
 
3475
        return None, None
3241
3476
 
3242
3477
    def __iter__(self):
3243
3478
        return self
3244
3479
 
 
3480
    def _gather_result_for_consistency(self, result):
 
3481
        """Check a result we will yield to make sure we are consistent later.
 
3482
        
 
3483
        This gathers result's parents into a set to output later.
 
3484
 
 
3485
        :param result: A result tuple.
 
3486
        """
 
3487
        if not self.partial or not result[0]:
 
3488
            return
 
3489
        self.seen_ids.add(result[0])
 
3490
        new_path = result[1][1]
 
3491
        if new_path:
 
3492
            # Not the root and not a delete: queue up the parents of the path.
 
3493
            self.search_specific_file_parents.update(
 
3494
                osutils.parent_directories(new_path.encode('utf8')))
 
3495
            # Add the root directory which parent_directories does not
 
3496
            # provide.
 
3497
            self.search_specific_file_parents.add('')
 
3498
 
3245
3499
    def iter_changes(self):
3246
3500
        """Iterate over the changes."""
3247
3501
        utf8_decode = cache_utf8._utf8_decode
3248
3502
        _cmp_by_dirs = cmp_by_dirs
3249
3503
        _process_entry = self._process_entry
3250
 
        uninteresting = self.uninteresting
3251
3504
        search_specific_files = self.search_specific_files
3252
3505
        searched_specific_files = self.searched_specific_files
3253
3506
        splitpath = osutils.splitpath
3323
3576
                continue
3324
3577
            path_handled = False
3325
3578
            for entry in root_entries:
3326
 
                result = _process_entry(entry, root_dir_info)
3327
 
                if result is not None:
 
3579
                result, changed = _process_entry(entry, root_dir_info)
 
3580
                if changed is not None:
3328
3581
                    path_handled = True
3329
 
                    if result is not uninteresting:
 
3582
                    if changed:
 
3583
                        self._gather_result_for_consistency(result)
 
3584
                    if changed or self.include_unchanged:
3330
3585
                        yield result
3331
3586
            if self.want_unversioned and not path_handled and root_dir_info:
3332
3587
                new_executable = bool(
3442
3697
                        for current_entry in current_block[1]:
3443
3698
                            # entry referring to file not present on disk.
3444
3699
                            # advance the entry only, after processing.
3445
 
                            result = _process_entry(current_entry, None)
3446
 
                            if result is not None:
3447
 
                                if result is not uninteresting:
 
3700
                            result, changed = _process_entry(current_entry, None)
 
3701
                            if changed is not None:
 
3702
                                if changed:
 
3703
                                    self._gather_result_for_consistency(result)
 
3704
                                if changed or self.include_unchanged:
3448
3705
                                    yield result
3449
3706
                        block_index +=1
3450
3707
                        if (block_index < len(self.state._dirblocks) and
3480
3737
                        pass
3481
3738
                    elif current_path_info is None:
3482
3739
                        # no path is fine: the per entry code will handle it.
3483
 
                        result = _process_entry(current_entry, current_path_info)
3484
 
                        if result is not None:
3485
 
                            if result is not uninteresting:
 
3740
                        result, changed = _process_entry(current_entry, current_path_info)
 
3741
                        if changed is not None:
 
3742
                            if changed:
 
3743
                                self._gather_result_for_consistency(result)
 
3744
                            if changed or self.include_unchanged:
3486
3745
                                yield result
3487
3746
                    elif (current_entry[0][1] != current_path_info[1]
3488
3747
                          or current_entry[1][self.target_index][0] in 'ar'):
3501
3760
                        else:
3502
3761
                            # entry referring to file not present on disk.
3503
3762
                            # advance the entry only, after processing.
3504
 
                            result = _process_entry(current_entry, None)
3505
 
                            if result is not None:
3506
 
                                if result is not uninteresting:
 
3763
                            result, changed = _process_entry(current_entry, None)
 
3764
                            if changed is not None:
 
3765
                                if changed:
 
3766
                                    self._gather_result_for_consistency(result)
 
3767
                                if changed or self.include_unchanged:
3507
3768
                                    yield result
3508
3769
                            advance_path = False
3509
3770
                    else:
3510
 
                        result = _process_entry(current_entry, current_path_info)
3511
 
                        if result is not None:
 
3771
                        result, changed = _process_entry(current_entry, current_path_info)
 
3772
                        if changed is not None:
3512
3773
                            path_handled = True
3513
 
                            if result is not uninteresting:
 
3774
                            if changed:
 
3775
                                self._gather_result_for_consistency(result)
 
3776
                            if changed or self.include_unchanged:
3514
3777
                                yield result
3515
3778
                    if advance_entry and current_entry is not None:
3516
3779
                        entry_index += 1
3575
3838
                        current_dir_info = dir_iterator.next()
3576
3839
                    except StopIteration:
3577
3840
                        current_dir_info = None
3578
 
_process_entry = ProcessEntryPython
 
3841
        for result in self._iter_specific_file_parents():
 
3842
            yield result
 
3843
 
 
3844
    def _iter_specific_file_parents(self):
 
3845
        """Iter over the specific file parents."""
 
3846
        while self.search_specific_file_parents:
 
3847
            # Process the parent directories for the paths we were iterating.
 
3848
            # Even in extremely large trees this should be modest, so currently
 
3849
            # no attempt is made to optimise.
 
3850
            path_utf8 = self.search_specific_file_parents.pop()
 
3851
            if osutils.is_inside_any(self.searched_specific_files, path_utf8):
 
3852
                # We've examined this path.
 
3853
                continue
 
3854
            if path_utf8 in self.searched_exact_paths:
 
3855
                # We've examined this path.
 
3856
                continue
 
3857
            path_entries = self.state._entries_for_path(path_utf8)
 
3858
            # We need either one or two entries. If the path in
 
3859
            # self.target_index has moved (so the entry in source_index is in
 
3860
            # 'ar') then we need to also look for the entry for this path in
 
3861
            # self.source_index, to output the appropriate delete-or-rename.
 
3862
            selected_entries = []
 
3863
            found_item = False
 
3864
            for candidate_entry in path_entries:
 
3865
                # Find entries present in target at this path:
 
3866
                if candidate_entry[1][self.target_index][0] not in 'ar':
 
3867
                    found_item = True
 
3868
                    selected_entries.append(candidate_entry)
 
3869
                # Find entries present in source at this path:
 
3870
                elif (self.source_index is not None and
 
3871
                    candidate_entry[1][self.source_index][0] not in 'ar'):
 
3872
                    found_item = True
 
3873
                    if candidate_entry[1][self.target_index][0] == 'a':
 
3874
                        # Deleted, emit it here.
 
3875
                        selected_entries.append(candidate_entry)
 
3876
                    else:
 
3877
                        # renamed, emit it when we process the directory it
 
3878
                        # ended up at.
 
3879
                        self.search_specific_file_parents.add(
 
3880
                            candidate_entry[1][self.target_index][1])
 
3881
            if not found_item:
 
3882
                raise AssertionError(
 
3883
                    "Missing entry for specific path parent %r, %r" % (
 
3884
                    path_utf8, path_entries))
 
3885
            path_info = self._path_info(path_utf8, path_utf8.decode('utf8'))
 
3886
            for entry in selected_entries:
 
3887
                if entry[0][2] in self.seen_ids:
 
3888
                    continue
 
3889
                result, changed = self._process_entry(entry, path_info)
 
3890
                if changed is None:
 
3891
                    raise AssertionError(
 
3892
                        "Got entry<->path mismatch for specific path "
 
3893
                        "%r entry %r path_info %r " % (
 
3894
                        path_utf8, entry, path_info))
 
3895
                # Only include changes - we're outside the users requested
 
3896
                # expansion.
 
3897
                if changed:
 
3898
                    self._gather_result_for_consistency(result)
 
3899
                    if (result[6][0] == 'directory' and
 
3900
                        result[6][1] != 'directory'):
 
3901
                        # This stopped being a directory, the old children have
 
3902
                        # to be included.
 
3903
                        if entry[1][self.source_index][0] == 'r':
 
3904
                            # renamed, take the source path
 
3905
                            entry_path_utf8 = entry[1][self.source_index][1]
 
3906
                        else:
 
3907
                            entry_path_utf8 = path_utf8
 
3908
                        initial_key = (entry_path_utf8, '', '')
 
3909
                        block_index, _ = self.state._find_block_index_from_key(
 
3910
                            initial_key)
 
3911
                        if block_index == 0:
 
3912
                            # The children of the root are in block index 1.
 
3913
                            block_index +=1
 
3914
                        current_block = None
 
3915
                        if block_index < len(self.state._dirblocks):
 
3916
                            current_block = self.state._dirblocks[block_index]
 
3917
                            if not osutils.is_inside(
 
3918
                                entry_path_utf8, current_block[0]):
 
3919
                                # No entries for this directory at all.
 
3920
                                current_block = None
 
3921
                        if current_block is not None:
 
3922
                            for entry in current_block[1]:
 
3923
                                if entry[1][self.source_index][0] in 'ar':
 
3924
                                    # Not in the source tree, so doesn't have to be
 
3925
                                    # included.
 
3926
                                    continue
 
3927
                                # Path of the entry itself.
 
3928
 
 
3929
                                self.search_specific_file_parents.add(
 
3930
                                    osutils.pathjoin(*entry[0][:2]))
 
3931
                if changed or self.include_unchanged:
 
3932
                    yield result
 
3933
            self.searched_exact_paths.add(path_utf8)
 
3934
 
 
3935
    def _path_info(self, utf8_path, unicode_path):
 
3936
        """Generate path_info for unicode_path.
 
3937
 
 
3938
        :return: None if unicode_path does not exist, or a path_info tuple.
 
3939
        """
 
3940
        abspath = self.tree.abspath(unicode_path)
 
3941
        try:
 
3942
            stat = os.lstat(abspath)
 
3943
        except OSError, e:
 
3944
            if e.errno == errno.ENOENT:
 
3945
                # the path does not exist.
 
3946
                return None
 
3947
            else:
 
3948
                raise
 
3949
        utf8_basename = utf8_path.rsplit('/', 1)[-1]
 
3950
        dir_info = (utf8_path, utf8_basename,
 
3951
            osutils.file_kind_from_stat_mode(stat.st_mode), stat,
 
3952
            abspath)
 
3953
        if dir_info[2] == 'directory':
 
3954
            if self.tree._directory_is_tree_reference(
 
3955
                unicode_path):
 
3956
                self.root_dir_info = self.root_dir_info[:2] + \
 
3957
                    ('tree-reference',) + self.root_dir_info[3:]
 
3958
        return dir_info
3579
3959
 
3580
3960
 
3581
3961
# Try to load the compiled form if possible
3582
3962
try:
3583
 
    from bzrlib._dirstate_helpers_c import (
3584
 
        _read_dirblocks_c as _read_dirblocks,
3585
 
        bisect_dirblock_c as bisect_dirblock,
3586
 
        _bisect_path_left_c as _bisect_path_left,
3587
 
        _bisect_path_right_c as _bisect_path_right,
3588
 
        cmp_by_dirs_c as cmp_by_dirs,
 
3963
    from bzrlib._dirstate_helpers_pyx import (
 
3964
        _read_dirblocks,
 
3965
        bisect_dirblock,
 
3966
        _bisect_path_left,
 
3967
        _bisect_path_right,
 
3968
        cmp_by_dirs,
3589
3969
        ProcessEntryC as _process_entry,
3590
3970
        update_entry as update_entry,
3591
3971
        )
3592
3972
except ImportError:
3593
3973
    from bzrlib._dirstate_helpers_py import (
3594
 
        _read_dirblocks_py as _read_dirblocks,
3595
 
        bisect_dirblock_py as bisect_dirblock,
3596
 
        _bisect_path_left_py as _bisect_path_left,
3597
 
        _bisect_path_right_py as _bisect_path_right,
3598
 
        cmp_by_dirs_py as cmp_by_dirs,
 
3974
        _read_dirblocks,
 
3975
        bisect_dirblock,
 
3976
        _bisect_path_left,
 
3977
        _bisect_path_right,
 
3978
        cmp_by_dirs,
3599
3979
        )
 
3980
    # FIXME: It would be nice to be able to track moved lines so that the
 
3981
    # corresponding python code can be moved to the _dirstate_helpers_py
 
3982
    # module. I don't want to break the history for this important piece of
 
3983
    # code so I left the code here -- vila 20090622
 
3984
    update_entry = py_update_entry
 
3985
    _process_entry = ProcessEntryPython