1662
1680
def _version_ids_to_keys(self, version_ids):
1663
1681
return set((version_id, ) for version_id in version_ids)
1666
class _KnitData(_KnitComponentFile):
1667
"""Contents of the knit data file"""
1669
def __init__(self, transport, filename, mode, create=False, file_mode=None,
1670
create_parent_dir=False, delay_create=False,
1672
_KnitComponentFile.__init__(self, transport, filename, mode,
1673
file_mode=file_mode,
1674
create_parent_dir=create_parent_dir,
1684
class _KnitAccess(object):
1685
"""Access to knit records in a .knit file."""
1687
def __init__(self, transport, filename, _file_mode, _dir_mode,
1688
_need_to_create, _create_parent_dir):
1689
"""Create a _KnitAccess for accessing and inserting data.
1691
:param transport: The transport the .knit is located on.
1692
:param filename: The filename of the .knit.
1694
self._transport = transport
1695
self._filename = filename
1696
self._file_mode = _file_mode
1697
self._dir_mode = _dir_mode
1698
self._need_to_create = _need_to_create
1699
self._create_parent_dir = _create_parent_dir
1701
def add_raw_records(self, sizes, raw_data):
1702
"""Add raw knit bytes to a storage area.
1704
The data is spooled to whereever the access method is storing data.
1706
:param sizes: An iterable containing the size of each raw data segment.
1707
:param raw_data: A bytestring containing the data.
1708
:return: A list of memos to retrieve the record later. Each memo is a
1709
tuple - (index, pos, length), where the index field is always None
1710
for the .knit access method.
1712
assert type(raw_data) == str, \
1713
'data must be plain bytes was %s' % type(raw_data)
1714
if not self._need_to_create:
1715
base = self._transport.append_bytes(self._filename, raw_data)
1717
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1718
create_parent_dir=self._create_parent_dir,
1719
mode=self._file_mode,
1720
dir_mode=self._dir_mode)
1721
self._need_to_create = False
1725
result.append((None, base, size))
1730
"""IFF this data access has its own storage area, initialise it.
1734
self._transport.put_bytes_non_atomic(self._filename, '',
1735
mode=self._file_mode)
1737
def open_file(self):
1738
"""IFF this data access can be represented as a single file, open it.
1740
For knits that are not mapped to a single file on disk this will
1743
:return: None or a file handle.
1746
return self._transport.get(self._filename)
1751
def get_raw_records(self, memos_for_retrieval):
1752
"""Get the raw bytes for a records.
1754
:param memos_for_retrieval: An iterable containing the (index, pos,
1755
length) memo for retrieving the bytes. The .knit method ignores
1756
the index as there is always only a single file.
1757
:return: An iterator over the bytes of the records.
1759
read_vector = [(pos, size) for (index, pos, size) in memos_for_retrieval]
1760
for pos, data in self._transport.readv(self._filename, read_vector):
1764
class _PackAccess(object):
1765
"""Access to knit records via a collection of packs."""
1767
def __init__(self, index_to_packs, writer=None):
1768
"""Create a _PackAccess object.
1770
:param index_to_packs: A dict mapping index objects to the transport
1771
and file names for obtaining data.
1772
:param writer: A tuple (pack.ContainerWriter, write_index) which
1773
is contains the pack to write, and the index that reads from
1774
it will be associated with.
1777
self.container_writer = writer[0]
1778
self.write_index = writer[1]
1780
self.container_writer = None
1781
self.write_index = None
1782
self.indices = index_to_packs
1784
def add_raw_records(self, sizes, raw_data):
1785
"""Add raw knit bytes to a storage area.
1787
The data is spooled to the container writer in one bytes record per
1790
:param sizes: An iterable containing the size of each raw data segment.
1791
:param raw_data: A bytestring containing the data.
1792
:return: A list of memos to retrieve the record later. Each memo is a
1793
tuple - (index, pos, length), where the index field is the
1794
write_index object supplied to the PackAccess object.
1796
assert type(raw_data) == str, \
1797
'data must be plain bytes was %s' % type(raw_data)
1801
p_offset, p_length = self.container_writer.add_bytes_record(
1802
raw_data[offset:offset+size], [])
1804
result.append((self.write_index, p_offset, p_length))
1808
"""Pack based knits do not get individually created."""
1810
def get_raw_records(self, memos_for_retrieval):
1811
"""Get the raw bytes for a records.
1813
:param memos_for_retrieval: An iterable containing the (index, pos,
1814
length) memo for retrieving the bytes. The Pack access method
1815
looks up the pack to use for a given record in its index_to_pack
1817
:return: An iterator over the bytes of the records.
1819
# first pass, group into same-index requests
1821
current_index = None
1822
for (index, offset, length) in memos_for_retrieval:
1823
if current_index == index:
1824
current_list.append((offset, length))
1826
if current_index is not None:
1827
request_lists.append((current_index, current_list))
1828
current_index = index
1829
current_list = [(offset, length)]
1830
# handle the last entry
1831
if current_index is not None:
1832
request_lists.append((current_index, current_list))
1833
for index, offsets in request_lists:
1834
transport, path = self.indices[index]
1835
reader = pack.make_readv_reader(transport, path, offsets)
1836
for names, read_func in reader.iter_records():
1837
yield read_func(None)
1839
def open_file(self):
1840
"""Pack based knits have no single file."""
1843
def set_writer(self, writer, index, (transport, packname)):
1844
"""Set a writer to use for adding data."""
1845
self.indices[index] = (transport, packname)
1846
self.container_writer = writer
1847
self.write_index = index
1850
class _KnitData(object):
1851
"""Manage extraction of data from a KnitAccess, caching and decompressing.
1853
The KnitData class provides the logic for parsing and using knit records,
1854
making use of an access method for the low level read and write operations.
1857
def __init__(self, access):
1858
"""Create a KnitData object.
1860
:param access: The access method to use. Access methods such as
1861
_KnitAccess manage the insertion of raw records and the subsequent
1862
retrieval of the same.
1864
self._access = access
1676
1865
self._checked = False
1677
1866
# TODO: jam 20060713 conceptually, this could spill to disk
1678
1867
# if the cached size gets larger than a certain amount
1725
1904
return length, sio
1727
def add_raw_record(self, raw_data):
1906
def add_raw_records(self, sizes, raw_data):
1728
1907
"""Append a prepared record to the data file.
1730
:return: the offset in the data file raw_data was written.
1909
:param sizes: An iterable containing the size of each raw data segment.
1910
:param raw_data: A bytestring containing the data.
1911
:return: a list of index data for the way the data was stored.
1912
See the access method add_raw_records documentation for more
1732
assert isinstance(raw_data, str), 'data must be plain bytes'
1733
if not self._need_to_create:
1734
return self._transport.append_bytes(self._filename, raw_data)
1736
self._transport.put_bytes_non_atomic(self._filename, raw_data,
1737
create_parent_dir=self._create_parent_dir,
1738
mode=self._file_mode,
1739
dir_mode=self._dir_mode)
1740
self._need_to_create = False
1915
return self._access.add_raw_records(sizes, raw_data)
1743
1917
def add_record(self, version_id, digest, lines):
1744
"""Write new text record to disk. Returns the position in the
1745
file where it was written."""
1918
"""Write new text record to disk.
1920
Returns index data for retrieving it later, as per add_raw_records.
1746
1922
size, sio = self._record_to_data(version_id, digest, lines)
1748
if not self._need_to_create:
1749
start_pos = self._transport.append_file(self._filename, sio)
1751
self._transport.put_file_non_atomic(self._filename, sio,
1752
create_parent_dir=self._create_parent_dir,
1753
mode=self._file_mode,
1754
dir_mode=self._dir_mode)
1755
self._need_to_create = False
1923
result = self.add_raw_records([size], sio.getvalue())
1757
1924
if self._do_cache:
1758
1925
self._cache[version_id] = sio.getvalue()
1759
return start_pos, size
1761
1928
def _parse_record_header(self, version_id, raw_data):
1762
1929
"""Parse a record header for consistency.