~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_knit.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2007-11-30 08:33:01 UTC
  • mfrom: (3052.2.6 knit.datastreamjoin)
  • Revision ID: pqm@pqm.ubuntu.com-20071130083301-5zq7705t6xa7yikn
Allow insert_data_stream to insert differently annotated stream.
        (#165304, Robert Collins, Andrew Bennetts)

Show diffs side-by-side

added added

removed removed

Lines of Context:
47
47
    _KnitIndex,
48
48
    _PackAccess,
49
49
    PlainKnitContent,
 
50
    _StreamAccess,
 
51
    _StreamIndex,
50
52
    WeaveToKnit,
51
53
    KnitSequenceMatcher,
52
54
    )
59
61
    )
60
62
from bzrlib.transport import get_transport
61
63
from bzrlib.transport.memory import MemoryTransport
 
64
from bzrlib.tuned_gzip import GzipFile
62
65
from bzrlib.util import bencode
63
66
from bzrlib.weave import Weave
64
67
 
1132
1135
        self.assertTrue(k.has_version('text-1'))
1133
1136
        self.assertEqualDiff(''.join(k.get_lines('text-1')), TEXT_1)
1134
1137
 
 
1138
    def test_newline_empty_lines(self):
 
1139
        # ensure that ["\n"] round trips ok.
 
1140
        knit = self.make_test_knit()
 
1141
        knit.add_lines('a', [], ["\n"])
 
1142
        knit.add_lines_with_ghosts('b', [], ["\n"])
 
1143
        self.assertEqual(["\n"], knit.get_lines('a'))
 
1144
        self.assertEqual(["\n"], knit.get_lines('b'))
 
1145
        self.assertEqual(['fulltext'], knit._index.get_options('a'))
 
1146
        self.assertEqual(['fulltext'], knit._index.get_options('b'))
 
1147
        knit.add_lines('c', ['a'], ["\n"])
 
1148
        knit.add_lines_with_ghosts('d', ['b'], ["\n"])
 
1149
        self.assertEqual(["\n"], knit.get_lines('c'))
 
1150
        self.assertEqual(["\n"], knit.get_lines('d'))
 
1151
        self.assertEqual(['line-delta'], knit._index.get_options('c'))
 
1152
        self.assertEqual(['line-delta'], knit._index.get_options('d'))
 
1153
 
 
1154
    def test_empty_lines(self):
 
1155
        # bizarrely, [] is not listed as having no-eol. 
 
1156
        knit = self.make_test_knit()
 
1157
        knit.add_lines('a', [], [])
 
1158
        knit.add_lines_with_ghosts('b', [], [])
 
1159
        self.assertEqual([], knit.get_lines('a'))
 
1160
        self.assertEqual([], knit.get_lines('b'))
 
1161
        self.assertEqual(['fulltext'], knit._index.get_options('a'))
 
1162
        self.assertEqual(['fulltext'], knit._index.get_options('b'))
 
1163
        knit.add_lines('c', ['a'], [])
 
1164
        knit.add_lines_with_ghosts('d', ['b'], [])
 
1165
        self.assertEqual([], knit.get_lines('c'))
 
1166
        self.assertEqual([], knit.get_lines('d'))
 
1167
        self.assertEqual(['line-delta'], knit._index.get_options('c'))
 
1168
        self.assertEqual(['line-delta'], knit._index.get_options('d'))
 
1169
 
1135
1170
    def test_knit_reload(self):
1136
1171
        # test that the content in a reloaded knit is correct
1137
1172
        k = self.make_test_knit()
1748
1783
            knit1.transport.get_bytes(knit1._index._filename),
1749
1784
            knit2.transport.get_bytes(knit2._index._filename))
1750
1785
 
 
1786
    def assertKnitValuesEqual(self, left, right):
 
1787
        """Assert that the texts, annotations and graph of left and right are
 
1788
        the same.
 
1789
        """
 
1790
        self.assertEqual(set(left.versions()), set(right.versions()))
 
1791
        for version in left.versions():
 
1792
            self.assertEqual(left.get_parents_with_ghosts(version),
 
1793
                right.get_parents_with_ghosts(version))
 
1794
            self.assertEqual(left.get_lines(version),
 
1795
                right.get_lines(version))
 
1796
            self.assertEqual(left.annotate(version),
 
1797
                right.annotate(version))
 
1798
 
1751
1799
    def test_insert_data_stream_empty(self):
1752
1800
        """Inserting a data stream with no records should not put any data into
1753
1801
        the knit.
1768
1816
        source = self.make_test_knit(name='source')
1769
1817
        source.add_lines('text-a', [], split_lines(TEXT_1))
1770
1818
        data_stream = source.get_data_stream(['text-a'])
1771
 
        
1772
1819
        target = self.make_test_knit(name='target')
1773
1820
        target.insert_data_stream(data_stream)
1774
 
        
1775
1821
        self.assertKnitFilesEqual(source, target)
1776
1822
 
 
1823
    def test_insert_data_stream_annotated_unannotated(self):
 
1824
        """Inserting an annotated datastream to an unannotated knit works."""
 
1825
        # case one - full texts.
 
1826
        source = self.make_test_knit(name='source', annotate=True)
 
1827
        target = self.make_test_knit(name='target', annotate=False)
 
1828
        source.add_lines('text-a', [], split_lines(TEXT_1))
 
1829
        target.insert_data_stream(source.get_data_stream(['text-a']))
 
1830
        self.assertKnitValuesEqual(source, target)
 
1831
        # case two - deltas.
 
1832
        source.add_lines('text-b', ['text-a'], split_lines(TEXT_2))
 
1833
        target.insert_data_stream(source.get_data_stream(['text-b']))
 
1834
        self.assertKnitValuesEqual(source, target)
 
1835
 
 
1836
    def test_insert_data_stream_unannotated_annotated(self):
 
1837
        """Inserting an unannotated datastream to an annotated knit works."""
 
1838
        # case one - full texts.
 
1839
        source = self.make_test_knit(name='source', annotate=False)
 
1840
        target = self.make_test_knit(name='target', annotate=True)
 
1841
        source.add_lines('text-a', [], split_lines(TEXT_1))
 
1842
        target.insert_data_stream(source.get_data_stream(['text-a']))
 
1843
        self.assertKnitValuesEqual(source, target)
 
1844
        # case two - deltas.
 
1845
        source.add_lines('text-b', ['text-a'], split_lines(TEXT_2))
 
1846
        target.insert_data_stream(source.get_data_stream(['text-b']))
 
1847
        self.assertKnitValuesEqual(source, target)
 
1848
 
1777
1849
    def test_insert_data_stream_records_already_present(self):
1778
1850
        """Insert a data stream where some records are alreday present in the
1779
1851
        target, and some not.  Only the new records are inserted.
1855
1927
        self.assertRaises(
1856
1928
            errors.KnitCorrupt, target.insert_data_stream, data_stream)
1857
1929
 
1858
 
    def test_insert_data_stream_incompatible_format(self):
 
1930
    def test_insert_data_stream_unknown_format(self):
1859
1931
        """A data stream in a different format to the target knit cannot be
1860
1932
        inserted.
1861
1933
 
1862
 
        It will raise KnitDataStreamIncompatible.
 
1934
        It will raise KnitDataStreamUnknown because the fallback code will fail
 
1935
        to make a knit. In future we may need KnitDataStreamIncompatible again,
 
1936
        for more exotic cases.
1863
1937
        """
1864
1938
        data_stream = ('fake-format-signature', [], lambda _: '')
1865
1939
        target = self.make_test_knit(name='target')
1866
1940
        self.assertRaises(
1867
 
            errors.KnitDataStreamIncompatible,
 
1941
            errors.KnitDataStreamUnknown,
1868
1942
            target.insert_data_stream, data_stream)
1869
1943
 
1870
1944
    #  * test that a stream of "already present version, then new version"
1871
1945
    #    inserts correctly.
1872
1946
 
 
1947
 
 
1948
    def assertMadeStreamKnit(self, source_knit, versions, target_knit):
 
1949
        """Assert that a knit made from a stream is as expected."""
 
1950
        a_stream = source_knit.get_data_stream(versions)
 
1951
        expected_data = a_stream[2](None)
 
1952
        a_stream = source_knit.get_data_stream(versions)
 
1953
        a_knit = target_knit._knit_from_datastream(a_stream)
 
1954
        self.assertEqual(source_knit.factory.__class__,
 
1955
            a_knit.factory.__class__)
 
1956
        self.assertIsInstance(a_knit._data._access, _StreamAccess)
 
1957
        self.assertIsInstance(a_knit._index, _StreamIndex)
 
1958
        self.assertEqual(a_knit._index.data_list, a_stream[1])
 
1959
        self.assertEqual(a_knit._data._access.data, expected_data)
 
1960
        self.assertEqual(a_knit.filename, target_knit.filename)
 
1961
        self.assertEqual(a_knit.transport, target_knit.transport)
 
1962
        self.assertEqual(a_knit._index, a_knit._data._access.stream_index)
 
1963
        self.assertEqual(target_knit, a_knit._data._access.backing_knit)
 
1964
        self.assertIsInstance(a_knit._data._access.orig_factory,
 
1965
            source_knit.factory.__class__)
 
1966
 
 
1967
    def test__knit_from_data_stream_empty(self):
 
1968
        """Create a knit object from a datastream."""
 
1969
        annotated = self.make_test_knit(name='source', annotate=True)
 
1970
        plain = self.make_test_knit(name='target', annotate=False)
 
1971
        # case 1: annotated source
 
1972
        self.assertMadeStreamKnit(annotated, [], annotated)
 
1973
        self.assertMadeStreamKnit(annotated, [], plain)
 
1974
        # case 2: plain source
 
1975
        self.assertMadeStreamKnit(plain, [], annotated)
 
1976
        self.assertMadeStreamKnit(plain, [], plain)
 
1977
 
 
1978
    def test__knit_from_data_stream_unknown_format(self):
 
1979
        annotated = self.make_test_knit(name='source', annotate=True)
 
1980
        self.assertRaises(errors.KnitDataStreamUnknown,
 
1981
            annotated._knit_from_datastream, ("unknown", None, None))
 
1982
 
 
1983
 
1873
1984
TEXT_1 = """\
1874
1985
Banana cup cakes:
1875
1986
 
2719
2830
        # will fail and we'll adjust it to handle that case correctly, rather
2720
2831
        # than allowing an over-read that is bogus.
2721
2832
        self.assertEqual(expected_length, len(stream[2](-1)))
 
2833
 
 
2834
 
 
2835
class Test_StreamIndex(KnitTests):
 
2836
 
 
2837
    def get_index(self, knit, stream):
 
2838
        """Get a _StreamIndex from knit and stream."""
 
2839
        return knit._knit_from_datastream(stream)._index
 
2840
 
 
2841
    def assertIndexVersions(self, knit, versions):
 
2842
        """Check that the _StreamIndex versions are those of the stream."""
 
2843
        index = self.get_index(knit, knit.get_data_stream(versions))
 
2844
        self.assertEqual(set(index.get_versions()), set(versions))
 
2845
        # check we didn't get duplicates
 
2846
        self.assertEqual(len(index.get_versions()), len(versions))
 
2847
 
 
2848
    def assertIndexAncestry(self, knit, ancestry_versions, versions, result):
 
2849
        """Check the result of a get_ancestry call on knit."""
 
2850
        index = self.get_index(knit, knit.get_data_stream(versions))
 
2851
        self.assertEqual(
 
2852
            set(result),
 
2853
            set(index.get_ancestry(ancestry_versions, False)))
 
2854
 
 
2855
    def assertIterParents(self, knit, versions, parent_versions, result):
 
2856
        """Check the result of an iter_parents call on knit."""
 
2857
        index = self.get_index(knit, knit.get_data_stream(versions))
 
2858
        self.assertEqual(result, index.iter_parents(parent_versions))
 
2859
 
 
2860
    def assertGetMethod(self, knit, versions, version, result):
 
2861
        index = self.get_index(knit, knit.get_data_stream(versions))
 
2862
        self.assertEqual(result, index.get_method(version))
 
2863
 
 
2864
    def assertGetOptions(self, knit, version, options):
 
2865
        index = self.get_index(knit, knit.get_data_stream(version))
 
2866
        self.assertEqual(options, index.get_options(version))
 
2867
 
 
2868
    def assertGetPosition(self, knit, versions, version, result):
 
2869
        index = self.get_index(knit, knit.get_data_stream(versions))
 
2870
        if result[1] is None:
 
2871
            result = (result[0], index, result[2], result[3])
 
2872
        self.assertEqual(result, index.get_position(version))
 
2873
 
 
2874
    def assertGetParentsWithGhosts(self, knit, versions, version, parents):
 
2875
        index = self.get_index(knit, knit.get_data_stream(versions))
 
2876
        self.assertEqual(parents, index.get_parents_with_ghosts(version))
 
2877
 
 
2878
    def make_knit_with_4_versions_2_dags(self):
 
2879
        knit = self.make_test_knit()
 
2880
        knit.add_lines('a', [], ["foo"])
 
2881
        knit.add_lines('b', [], [])
 
2882
        knit.add_lines('c', ['b', 'a'], [])
 
2883
        knit.add_lines_with_ghosts('d', ['e', 'f'], [])
 
2884
        return knit
 
2885
 
 
2886
    def test_versions(self):
 
2887
        """The versions of a StreamIndex are those of the datastream."""
 
2888
        knit = self.make_knit_with_4_versions_2_dags()
 
2889
        # ask for most permutations, which catches bugs like falling back to the
 
2890
        # target knit, or showing ghosts, etc.
 
2891
        self.assertIndexVersions(knit, [])
 
2892
        self.assertIndexVersions(knit, ['a'])
 
2893
        self.assertIndexVersions(knit, ['b'])
 
2894
        self.assertIndexVersions(knit, ['c'])
 
2895
        self.assertIndexVersions(knit, ['d'])
 
2896
        self.assertIndexVersions(knit, ['a', 'b'])
 
2897
        self.assertIndexVersions(knit, ['b', 'c'])
 
2898
        self.assertIndexVersions(knit, ['a', 'c'])
 
2899
        self.assertIndexVersions(knit, ['a', 'b', 'c'])
 
2900
        self.assertIndexVersions(knit, ['a', 'b', 'c', 'd'])
 
2901
 
 
2902
    def test_construct(self):
 
2903
        """Constructing a StreamIndex generates index data."""
 
2904
        data_list = [('text-a', ['fulltext'], 127, []),
 
2905
            ('text-b', ['option'], 128, ['text-c'])]
 
2906
        index = _StreamIndex(data_list)
 
2907
        self.assertEqual({'text-a':(['fulltext'], (0, 127), []),
 
2908
            'text-b':(['option'], (127, 127 + 128), ['text-c'])},
 
2909
            index._by_version)
 
2910
 
 
2911
    def test_get_ancestry(self):
 
2912
        knit = self.make_knit_with_4_versions_2_dags()
 
2913
        self.assertIndexAncestry(knit, ['a'], ['a'], ['a'])
 
2914
        self.assertIndexAncestry(knit, ['b'], ['b'], ['b'])
 
2915
        self.assertIndexAncestry(knit, ['c'], ['c'], ['c'])
 
2916
        self.assertIndexAncestry(knit, ['c'], ['a', 'b', 'c'],
 
2917
            set(['a', 'b', 'c']))
 
2918
        self.assertIndexAncestry(knit, ['c', 'd'], ['a', 'b', 'c', 'd'],
 
2919
            set(['a', 'b', 'c', 'd']))
 
2920
 
 
2921
    def test_get_method(self):
 
2922
        knit = self.make_knit_with_4_versions_2_dags()
 
2923
        self.assertGetMethod(knit, ['a'], 'a', 'fulltext')
 
2924
        self.assertGetMethod(knit, ['c'], 'c', 'line-delta')
 
2925
        # get_method on a basis that is not in the datastream (but in the
 
2926
        # backing knit) returns 'fulltext', because thats what we'll create as
 
2927
        # we thunk across.
 
2928
        self.assertGetMethod(knit, ['c'], 'b', 'fulltext')
 
2929
 
 
2930
    def test_iter_parents(self):
 
2931
        knit = self.make_knit_with_4_versions_2_dags()
 
2932
        self.assertIterParents(knit, ['a'], ['a'], [('a', [])])
 
2933
        self.assertIterParents(knit, ['a', 'b'], ['a', 'b'],
 
2934
            [('a', []), ('b', [])])
 
2935
        self.assertIterParents(knit, ['a', 'b', 'c'], ['a', 'b', 'c'],
 
2936
            [('a', []), ('b', []), ('c', ['b', 'a'])])
 
2937
        self.assertIterParents(knit, ['a', 'b', 'c', 'd'],
 
2938
            ['a', 'b', 'c', 'd'],
 
2939
            [('a', []), ('b', []), ('c', ['b', 'a']), ('d', ['e', 'f'])])
 
2940
        self.assertIterParents(knit, ['c'], ['a', 'b', 'c'],
 
2941
            [('c', ['b', 'a'])])
 
2942
 
 
2943
    def test_get_options(self):
 
2944
        knit = self.make_knit_with_4_versions_2_dags()
 
2945
        self.assertGetOptions(knit, 'a', ['no-eol', 'fulltext'])
 
2946
        self.assertGetOptions(knit, 'c', ['line-delta'])
 
2947
 
 
2948
    def test_get_parents_with_ghosts(self):
 
2949
        knit = self.make_knit_with_4_versions_2_dags()
 
2950
        self.assertGetParentsWithGhosts(knit, ['a'], 'a', [])
 
2951
        self.assertGetParentsWithGhosts(knit, ['c'], 'c', ['b', 'a'])
 
2952
        self.assertGetParentsWithGhosts(knit, ['d'], 'd', ['e', 'f'])
 
2953
 
 
2954
    def test_get_position(self):
 
2955
        knit = self.make_knit_with_4_versions_2_dags()
 
2956
        # get_position returns (thunk_flag, index(can be None), start, end) for
 
2957
        # _StreamAccess to use.
 
2958
        self.assertGetPosition(knit, ['a'], 'a', (False, None, 0, 78))
 
2959
        self.assertGetPosition(knit, ['a', 'c'], 'c', (False, None, 78, 156))
 
2960
        # get_position on a text that is not in the datastream (but in the
 
2961
        # backing knit) returns (True, 'versionid', None, None) - and then the
 
2962
        # access object can construct the relevant data as needed.
 
2963
        self.assertGetPosition(knit, ['a', 'c'], 'b', (True, 'b', None, None))
 
2964
 
 
2965
 
 
2966
class Test_StreamAccess(KnitTests):
 
2967
 
 
2968
    def get_index_access(self, knit, stream):
 
2969
        """Get a _StreamAccess from knit and stream."""
 
2970
        knit =  knit._knit_from_datastream(stream)
 
2971
        return knit._index, knit._data._access
 
2972
 
 
2973
    def assertGetRawRecords(self, knit, versions):
 
2974
        index, access = self.get_index_access(knit,
 
2975
            knit.get_data_stream(versions))
 
2976
        # check that every version asked for can be obtained from the resulting
 
2977
        # access object.
 
2978
        # batch
 
2979
        memos = []
 
2980
        for version in versions:
 
2981
            memos.append(knit._index.get_position(version))
 
2982
        original = {}
 
2983
        for version, data in zip(
 
2984
            versions, knit._data._access.get_raw_records(memos)):
 
2985
            original[version] = data
 
2986
        memos = []
 
2987
        for version in versions:
 
2988
            memos.append(index.get_position(version))
 
2989
        streamed = {}
 
2990
        for version, data in zip(versions, access.get_raw_records(memos)):
 
2991
            streamed[version] = data
 
2992
        self.assertEqual(original, streamed)
 
2993
        # individually
 
2994
        for version in versions:
 
2995
            data = list(access.get_raw_records(
 
2996
                [index.get_position(version)]))[0]
 
2997
            self.assertEqual(original[version], data)
 
2998
 
 
2999
    def make_knit_with_two_versions(self):
 
3000
        knit = self.make_test_knit()
 
3001
        knit.add_lines('a', [], ["foo"])
 
3002
        knit.add_lines('b', [], ["bar"])
 
3003
        return knit
 
3004
 
 
3005
    def test_get_raw_records(self):
 
3006
        knit = self.make_knit_with_two_versions()
 
3007
        self.assertGetRawRecords(knit, ['a', 'b'])
 
3008
        self.assertGetRawRecords(knit, ['a'])
 
3009
        self.assertGetRawRecords(knit, ['b'])
 
3010
    
 
3011
    def test_get_raw_record_from_backing_knit(self):
 
3012
        # the thunk layer should create an artificial A on-demand when needed.
 
3013
        source_knit = self.make_test_knit(name='plain', annotate=False)
 
3014
        target_knit = self.make_test_knit(name='annotated', annotate=True)
 
3015
        source_knit.add_lines("A", [], ["Foo\n"])
 
3016
        # Give the target A, so we can try to thunk across to it.
 
3017
        target_knit.join(source_knit)
 
3018
        index, access = self.get_index_access(target_knit,
 
3019
            source_knit.get_data_stream([]))
 
3020
        raw_data = list(access.get_raw_records([(True, "A", None, None)]))[0]
 
3021
        df = GzipFile(mode='rb', fileobj=StringIO(raw_data))
 
3022
        self.assertEqual(
 
3023
            'version A 1 5d36b88bb697a2d778f024048bafabd443d74503\n'
 
3024
            'Foo\nend A\n',
 
3025
            df.read())
 
3026
 
 
3027
    def test_asking_for_thunk_stream_is_not_plain_errors(self):
 
3028
        knit = self.make_test_knit(name='annotated', annotate=True)
 
3029
        knit.add_lines("A", [], ["Foo\n"])
 
3030
        index, access = self.get_index_access(knit,
 
3031
            knit.get_data_stream([]))
 
3032
        self.assertRaises(errors.KnitCorrupt,
 
3033
            list, access.get_raw_records([(True, "A", None, None)]))