351
352
where, size = self._data.add_record(version_id, digest, store_lines)
352
353
self._index.add_version(version_id, options, where, size, parents)
355
def _add_raw_records(self, records, data):
356
"""Add all the records 'records' with data pre-joined in 'data'.
358
:param records: A list of tuples(version_id, options, parents, size).
359
:param data: The data for the records. When it is written, the records
360
are adjusted to have pos pointing into data by the sum of
361
the preceeding records sizes.
364
pos = self._data.add_raw_record(data)
366
for (version_id, options, parents, size) in records:
367
index_entries.append((version_id, options, pos, size, parents))
369
self._index.add_versions(index_entries)
354
371
def clear_cache(self):
355
372
"""Clear the data cache only."""
356
373
self._data.clear_cache()
1140
1157
def add_version(self, version_id, options, pos, size, parents):
1141
1158
"""Add a version record to the index."""
1142
self._cache_version(version_id, options, pos, size, parents)
1144
content = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),
1148
self._version_list_to_index(parents))
1149
assert isinstance(content, str), 'content must be utf-8 encoded'
1150
self._transport.append(self._filename, StringIO(content))
1159
self.add_versions(((version_id, options, pos, size, parents),))
1161
def add_versions(self, versions):
1162
"""Add multiple versions to the index.
1164
:param versions: a list of tuples:
1165
(version_id, options, pos, size, parents).
1168
for version_id, options, pos, size, parents in versions:
1169
line = "\n%s %s %s %s %s :" % (version_id.encode('utf-8'),
1173
self._version_list_to_index(parents))
1174
assert isinstance(line, str), \
1175
'content must be utf-8 encoded: %r' % (line,)
1177
self._transport.append(self._filename, StringIO(''.join(lines)))
1178
# cache after writing, so that a failed write leads to missing cache
1179
# entries not extra ones. XXX TODO: RBC 20060502 in the event of a
1180
# failure, reload the index or flush it or some such, to prevent
1181
# writing records that did complete twice.
1182
for version_id, options, pos, size, parents in versions:
1183
self._cache_version(version_id, options, pos, size, parents)
1152
1185
def has_version(self, version_id):
1153
1186
"""True if the version is in the index."""
1154
1187
return self._cache.has_key(version_id)
1234
1267
return length, sio
1236
1269
def add_raw_record(self, raw_data):
1237
"""Append a prepared record to the data file."""
1270
"""Append a prepared record to the data file.
1272
:return: the offset in the data file raw_data was written.
1238
1274
assert isinstance(raw_data, str), 'data must be plain bytes'
1239
start_pos = self._transport.append(self._filename, StringIO(raw_data))
1240
return start_pos, len(raw_data)
1275
return self._transport.append(self._filename, StringIO(raw_data))
1242
1277
def add_record(self, version_id, digest, lines):
1243
1278
"""Write new text record to disk. Returns the position in the
1448
1477
# data suck the join:
1450
1479
total = len(version_list)
1451
# we want the raw gzip for bulk copying, but the record validated
1452
# just enough to be sure its the right one.
1453
# TODO: consider writev or write combining to reduce
1454
# death of a thousand cuts feeling.
1455
1482
for (version_id, raw_data), \
1456
1483
(version_id2, options, parents) in \
1457
1484
izip(self.source._data.read_records_iter_raw(copy_queue_records),
1459
1486
assert version_id == version_id2, 'logic error, inconsistent results'
1460
1487
count = count + 1
1461
1488
pb.update("Joining knit", count, total)
1462
pos, size = self.target._data.add_raw_record(raw_data)
1463
self.target._index.add_version(version_id, options, pos, size, parents)
1489
raw_records.append((version_id, options, parents, len(raw_data)))
1490
raw_datum.append(raw_data)
1491
self.target._add_raw_records(raw_records, ''.join(raw_datum))
1465
1493
for version in mismatched_versions:
1466
1494
# FIXME RBC 20060309 is this needed?
1478
1506
InterVersionedFile.register_optimiser(InterKnit)
1509
class WeaveToKnit(InterVersionedFile):
1510
"""Optimised code paths for weave to knit operations."""
1512
_matching_file_from_factory = bzrlib.weave.WeaveFile
1513
_matching_file_to_factory = KnitVersionedFile
1516
def is_compatible(source, target):
1517
"""Be compatible with weaves to knits."""
1519
return (isinstance(source, bzrlib.weave.Weave) and
1520
isinstance(target, KnitVersionedFile))
1521
except AttributeError:
1524
def join(self, pb=None, msg=None, version_ids=None, ignore_missing=False):
1525
"""See InterVersionedFile.join."""
1526
assert isinstance(self.source, bzrlib.weave.Weave)
1527
assert isinstance(self.target, KnitVersionedFile)
1529
version_ids = self._get_source_version_ids(version_ids, ignore_missing)
1534
pb = bzrlib.ui.ui_factory.nested_progress_bar()
1536
version_ids = list(version_ids)
1538
self.source_ancestry = set(self.source.get_ancestry(version_ids))
1539
this_versions = set(self.target._index.get_versions())
1540
needed_versions = self.source_ancestry - this_versions
1541
cross_check_versions = self.source_ancestry.intersection(this_versions)
1542
mismatched_versions = set()
1543
for version in cross_check_versions:
1544
# scan to include needed parents.
1545
n1 = set(self.target.get_parents_with_ghosts(version))
1546
n2 = set(self.source.get_parents(version))
1547
# if all of n2's parents are in n1, then its fine.
1548
if n2.difference(n1):
1549
# FIXME TEST this check for cycles being introduced works
1550
# the logic is we have a cycle if in our graph we are an
1551
# ancestor of any of the n2 revisions.
1557
parent_ancestors = self.source.get_ancestry(parent)
1558
if version in parent_ancestors:
1559
raise errors.GraphCycleError([parent, version])
1560
# ensure this parent will be available later.
1561
new_parents = n2.difference(n1)
1562
needed_versions.update(new_parents.difference(this_versions))
1563
mismatched_versions.add(version)
1565
if not needed_versions and not mismatched_versions:
1567
full_list = topo_sort(self.source.get_graph())
1569
version_list = [i for i in full_list if (not self.target.has_version(i)
1570
and i in needed_versions)]
1574
total = len(version_list)
1575
for version_id in version_list:
1576
pb.update("Converting to knit", count, total)
1577
parents = self.source.get_parents(version_id)
1578
# check that its will be a consistent copy:
1579
for parent in parents:
1580
# if source has the parent, we must already have it
1581
assert (self.target.has_version(parent))
1582
self.target.add_lines(
1583
version_id, parents, self.source.get_lines(version_id))
1586
for version in mismatched_versions:
1587
# FIXME RBC 20060309 is this needed?
1588
n1 = set(self.target.get_parents_with_ghosts(version))
1589
n2 = set(self.source.get_parents(version))
1590
# write a combined record to our history preserving the current
1591
# parents as first in the list
1592
new_parents = self.target.get_parents_with_ghosts(version) + list(n2.difference(n1))
1593
self.target.fix_parents(version, new_parents)
1599
InterVersionedFile.register_optimiser(WeaveToKnit)
1481
1602
class SequenceMatcher(difflib.SequenceMatcher):
1482
1603
"""Knit tuned sequence matcher.