~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

  • Committer: Mark Hammond
  • Date: 2008-12-28 05:21:23 UTC
  • mfrom: (3920 +trunk)
  • mto: (3932.1.1 prepare-1.11)
  • mto: This revision was merged to the branch mainline in revision 3937.
  • Revision ID: mhammond@skippinet.com.au-20081228052123-f78xs5sbdkotshwf
merge trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
110
110
    adapter_registry,
111
111
    ConstantMapper,
112
112
    ContentFactory,
113
 
    FulltextContentFactory,
 
113
    ChunkedContentFactory,
114
114
    VersionedFile,
115
115
    VersionedFiles,
116
116
    )
196
196
            [compression_parent], 'unordered', True).next()
197
197
        if basis_entry.storage_kind == 'absent':
198
198
            raise errors.RevisionNotPresent(compression_parent, self._basis_vf)
199
 
        basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))
 
199
        basis_chunks = basis_entry.get_bytes_as('chunked')
 
200
        basis_lines = osutils.chunks_to_lines(basis_chunks)
200
201
        # Manually apply the delta because we have one annotated content and
201
202
        # one plain.
202
203
        basis_content = PlainKnitContent(basis_lines, compression_parent)
229
230
            [compression_parent], 'unordered', True).next()
230
231
        if basis_entry.storage_kind == 'absent':
231
232
            raise errors.RevisionNotPresent(compression_parent, self._basis_vf)
232
 
        basis_lines = split_lines(basis_entry.get_bytes_as('fulltext'))
 
233
        basis_chunks = basis_entry.get_bytes_as('chunked')
 
234
        basis_lines = osutils.chunks_to_lines(basis_chunks)
233
235
        basis_content = PlainKnitContent(basis_lines, compression_parent)
234
236
        # Manually apply the delta because we have one annotated content and
235
237
        # one plain.
276
278
    def get_bytes_as(self, storage_kind):
277
279
        if storage_kind == self.storage_kind:
278
280
            return self._raw_record
279
 
        if storage_kind == 'fulltext' and self._knit is not None:
280
 
            return self._knit.get_text(self.key[0])
281
 
        else:
282
 
            raise errors.UnavailableRepresentation(self.key, storage_kind,
283
 
                self.storage_kind)
 
281
        if self._knit is not None:
 
282
            if storage_kind == 'chunked':
 
283
                return self._knit.get_lines(self.key[0])
 
284
            elif storage_kind == 'fulltext':
 
285
                return self._knit.get_text(self.key[0])
 
286
        raise errors.UnavailableRepresentation(self.key, storage_kind,
 
287
            self.storage_kind)
284
288
 
285
289
 
286
290
class KnitContent(object):
1020
1024
                if record.storage_kind == 'absent':
1021
1025
                    continue
1022
1026
                missing_keys.remove(record.key)
1023
 
                lines = split_lines(record.get_bytes_as('fulltext'))
 
1027
                lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1024
1028
                text_map[record.key] = lines
1025
1029
                content_map[record.key] = PlainKnitContent(lines, record.key)
1026
1030
                if record.key in keys:
1262
1266
                for key in parent_map:
1263
1267
                    present_keys.append(key)
1264
1268
                    source_keys[-1][1].append(key)
 
1269
            # We have been requested to return these records in an order that
 
1270
            # suits us. So we ask the index to give us an optimally sorted
 
1271
            # order.
 
1272
            for source, sub_keys in source_keys:
 
1273
                if source is parent_maps[0]:
 
1274
                    # Only sort the keys for this VF
 
1275
                    self._index._sort_keys_by_io(sub_keys, positions)
1265
1276
        absent_keys = keys - set(global_map)
1266
1277
        for key in absent_keys:
1267
1278
            yield AbsentContentFactory(key)
1281
1292
                text_map, _ = self._get_content_maps(keys, non_local)
1282
1293
                for key in keys:
1283
1294
                    lines = text_map.pop(key)
1284
 
                    text = ''.join(lines)
1285
 
                    yield FulltextContentFactory(key, global_map[key], None,
1286
 
                                                 text)
 
1295
                    yield ChunkedContentFactory(key, global_map[key], None,
 
1296
                                                lines)
1287
1297
        else:
1288
1298
            for source, keys in source_keys:
1289
1299
                if source is parent_maps[0]:
1436
1446
                        buffered = True
1437
1447
                if not buffered:
1438
1448
                    self._index.add_records([index_entry])
 
1449
            elif record.storage_kind == 'chunked':
 
1450
                self.add_lines(record.key, parents,
 
1451
                    osutils.chunks_to_lines(record.get_bytes_as('chunked')))
1439
1452
            elif record.storage_kind == 'fulltext':
1440
1453
                self.add_lines(record.key, parents,
1441
1454
                    split_lines(record.get_bytes_as('fulltext')))
2129
2142
        else:
2130
2143
            self._mode = 'r'
2131
2144
 
 
2145
    def _sort_keys_by_io(self, keys, positions):
 
2146
        """Figure out an optimal order to read the records for the given keys.
 
2147
 
 
2148
        Sort keys, grouped by index and sorted by position.
 
2149
 
 
2150
        :param keys: A list of keys whose records we want to read. This will be
 
2151
            sorted 'in-place'.
 
2152
        :param positions: A dict, such as the one returned by
 
2153
            _get_components_positions()
 
2154
        :return: None
 
2155
        """
 
2156
        def get_sort_key(key):
 
2157
            index_memo = positions[key][1]
 
2158
            # Group by prefix and position. index_memo[0] is the key, so it is
 
2159
            # (file_id, revision_id) and we don't want to sort on revision_id,
 
2160
            # index_memo[1] is the position, and index_memo[2] is the size,
 
2161
            # which doesn't matter for the sort
 
2162
            return index_memo[0][:-1], index_memo[1]
 
2163
        return keys.sort(key=get_sort_key)
 
2164
 
2132
2165
    def _split_key(self, key):
2133
2166
        """Split key into a prefix and suffix."""
2134
2167
        return key[:-1], key[-1]
2388
2421
        bits = node[2][1:].split(' ')
2389
2422
        return node[0], int(bits[0]), int(bits[1])
2390
2423
 
 
2424
    def _sort_keys_by_io(self, keys, positions):
 
2425
        """Figure out an optimal order to read the records for the given keys.
 
2426
 
 
2427
        Sort keys, grouped by index and sorted by position.
 
2428
 
 
2429
        :param keys: A list of keys whose records we want to read. This will be
 
2430
            sorted 'in-place'.
 
2431
        :param positions: A dict, such as the one returned by
 
2432
            _get_components_positions()
 
2433
        :return: None
 
2434
        """
 
2435
        def get_index_memo(key):
 
2436
            # index_memo is at offset [1]. It is made up of (GraphIndex,
 
2437
            # position, size). GI is an object, which will be unique for each
 
2438
            # pack file. This causes us to group by pack file, then sort by
 
2439
            # position. Size doesn't matter, but it isn't worth breaking up the
 
2440
            # tuple.
 
2441
            return positions[key][1]
 
2442
        return keys.sort(key=get_index_memo)
 
2443
 
2391
2444
 
2392
2445
class _KnitKeyAccess(object):
2393
2446
    """Access to records in .knit files."""
2541
2594
                    # If we don't have a _reload_func there is nothing that can
2542
2595
                    # be done
2543
2596
                    raise
2544
 
                raise errors.RetryWithNewPacks(reload_occurred=True,
 
2597
                raise errors.RetryWithNewPacks(index,
 
2598
                                               reload_occurred=True,
2545
2599
                                               exc_info=sys.exc_info())
2546
2600
            try:
2547
2601
                reader = pack.make_readv_reader(transport, path, offsets)
2552
2606
                # missing on disk, we need to trigger a reload, and start over.
2553
2607
                if self._reload_func is None:
2554
2608
                    raise
2555
 
                raise errors.RetryWithNewPacks(reload_occurred=False,
 
2609
                raise errors.RetryWithNewPacks(transport.abspath(path),
 
2610
                                               reload_occurred=False,
2556
2611
                                               exc_info=sys.exc_info())
2557
2612
 
2558
2613
    def set_writer(self, writer, index, transport_packname):
2905
2960
        reannotate = annotate.reannotate
2906
2961
        for record in self._knit.get_record_stream(keys, 'topological', True):
2907
2962
            key = record.key
2908
 
            fulltext = split_lines(record.get_bytes_as('fulltext'))
 
2963
            fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
2909
2964
            parents = parent_map[key]
2910
2965
            if parents is not None:
2911
2966
                parent_lines = [parent_cache[parent] for parent in parent_map[key]]