~bzr-pqm/bzr/bzr.dev : contents of bzrlib/tests/test_generate

~bzr-pqm/bzr/bzr.dev : (revision 2391)

# Copyright (C) 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

"""Tests for bzrlib/generate_ids.py"""

import re

from bzrlib import (
    generate_ids,
    tests,
    )


class TestFileIds(tests.TestCase):
    """Test functions which generate file ids"""

    def assertGenFileId(self, regex, filename):
        """gen_file_id should create a file id matching the regex.

        The file id should be ascii, and should be an 8-bit string
        """
        file_id = generate_ids.gen_file_id(filename)
        self.assertContainsRe(file_id, '^'+regex+'$')
        # It should be a utf8 file_id, not a unicode one
        self.assertIsInstance(file_id, str)
        # gen_file_id should always return ascii file ids.
        file_id.decode('ascii')

    def test_gen_file_id(self):
        gen_file_id = generate_ids.gen_file_id

        # We try to use the filename if possible
        self.assertStartsWith(gen_file_id('bar'), 'bar-')

        # but we squash capitalization, and remove non word characters
        self.assertStartsWith(gen_file_id('Mwoo oof\t m'), 'mwoooofm-')

        # We also remove leading '.' characters to prevent hidden file-ids
        self.assertStartsWith(gen_file_id('..gam.py'), 'gam.py-')
        self.assertStartsWith(gen_file_id('..Mwoo oof\t m'), 'mwoooofm-')

        # we remove unicode characters, and still don't end up with a 
        # hidden file id
        self.assertStartsWith(gen_file_id(u'\xe5\xb5.txt'), 'txt-')

        # Our current method of generating unique ids adds 33 characters
        # plus an serial number (log10(N) characters)
        # to the end of the filename. We now restrict the filename portion to
        # be <= 20 characters, so the maximum length should now be approx < 60

        # Test both case squashing and length restriction
        fid = gen_file_id('A'*50 + '.txt')
        self.assertStartsWith(fid, 'a'*20 + '-')
        self.failUnless(len(fid) < 60)

        # restricting length happens after the other actions, so
        # we preserve as much as possible
        fid = gen_file_id('\xe5\xb5..aBcd\tefGhijKLMnop\tqrstuvwxyz')
        self.assertStartsWith(fid, 'abcdefghijklmnopqrst-')
        self.failUnless(len(fid) < 60)

    def test_file_ids_are_ascii(self):
        tail = r'-\d{14}-[a-z0-9]{16}-\d+'
        self.assertGenFileId('foo' + tail, 'foo')
        self.assertGenFileId('foo' + tail, u'foo')
        self.assertGenFileId('bar' + tail, u'bar')
        self.assertGenFileId('br' + tail, u'b\xe5r')

    def test__next_id_suffix_sets_suffix(self):
        generate_ids._gen_file_id_suffix = None
        generate_ids._next_id_suffix()
        self.assertNotEqual(None, generate_ids._gen_file_id_suffix)

    def test__next_id_suffix_increments(self):
        generate_ids._gen_file_id_suffix = "foo-"
        generate_ids._gen_file_id_serial = 1
        try:
            self.assertEqual("foo-2", generate_ids._next_id_suffix())
            self.assertEqual("foo-3", generate_ids._next_id_suffix())
            self.assertEqual("foo-4", generate_ids._next_id_suffix())
            self.assertEqual("foo-5", generate_ids._next_id_suffix())
            self.assertEqual("foo-6", generate_ids._next_id_suffix())
            self.assertEqual("foo-7", generate_ids._next_id_suffix())
            self.assertEqual("foo-8", generate_ids._next_id_suffix())
            self.assertEqual("foo-9", generate_ids._next_id_suffix())
            self.assertEqual("foo-10", generate_ids._next_id_suffix())
        finally:
            # Reset so that all future ids generated in the test suite
            # don't end in 'foo-XXX'
            generate_ids._gen_file_id_suffix = None
            generate_ids._gen_file_id_serial = 0

    def test_gen_root_id(self):
        # Mostly just make sure gen_root_id() exists
        root_id = generate_ids.gen_root_id()
        self.assertStartsWith(root_id, 'tree_root-')


class TestGenRevisionId(tests.TestCase):
    """Test generating revision ids"""

    def assertMatchesRe(self, regex, text):
        """Make sure text is matched by the regex given"""
        if re.match(regex, text) is None:
            self.fail('Pattern %s did not match text %s' % (regex, text))

    def assertGenRevisionId(self, regex, username, timestamp=None):
        """gen_revision_id should create a revision id matching the regex"""
        revision_id = generate_ids.gen_revision_id(username, timestamp)
        self.assertMatchesRe(regex, revision_id)
        # It should be a utf8 revision_id, not a unicode one
        self.assertIsInstance(revision_id, str)
        # gen_revision_id should always return ascii revision ids.
        revision_id.decode('ascii')

    def test_timestamp(self):
        """passing a timestamp should cause it to be used"""
        self.assertGenRevisionId(r'user@host-\d{14}-[a-z0-9]{16}', 'user@host')
        self.assertGenRevisionId('user@host-20061102205056-[a-z0-9]{16}',
                                 'user@host', 1162500656.688)
        self.assertGenRevisionId(r'user@host-20061102205024-[a-z0-9]{16}',
                                 'user@host', 1162500624.000)

    def test_gen_revision_id_email(self):
        """gen_revision_id uses email address if present"""
        regex = r'user\+joe_bar@foo-bar\.com-\d{14}-[a-z0-9]{16}'
        self.assertGenRevisionId(regex,'user+joe_bar@foo-bar.com')
        self.assertGenRevisionId(regex, '<user+joe_bar@foo-bar.com>')
        self.assertGenRevisionId(regex, 'Joe Bar <user+joe_bar@foo-bar.com>')
        self.assertGenRevisionId(regex, 'Joe Bar <user+Joe_Bar@Foo-Bar.com>')
        self.assertGenRevisionId(regex, u'Joe B\xe5r <user+Joe_Bar@Foo-Bar.com>')

    def test_gen_revision_id_user(self):
        """If there is no email, fall back to the whole username"""
        tail = r'-\d{14}-[a-z0-9]{16}'
        self.assertGenRevisionId('joe_bar' + tail, 'Joe Bar')
        self.assertGenRevisionId('joebar' + tail, 'joebar')
        self.assertGenRevisionId('joe_br' + tail, u'Joe B\xe5r')
        self.assertGenRevisionId(r'joe_br_user\+joe_bar_foo-bar.com' + tail,
                                 u'Joe B\xe5r <user+Joe_Bar_Foo-Bar.com>')

    def test_revision_ids_are_ascii(self):
        """gen_revision_id should always return an ascii revision id."""
        tail = r'-\d{14}-[a-z0-9]{16}'
        self.assertGenRevisionId('joe_bar' + tail, 'Joe Bar')
        self.assertGenRevisionId('joe_bar' + tail, u'Joe Bar')
        self.assertGenRevisionId('joe@foo' + tail, u'Joe Bar <joe@foo>')
        # We cheat a little with this one, because email-addresses shouldn't
        # contain non-ascii characters, but generate_ids should strip them
        # anyway.
        self.assertGenRevisionId('joe@f' + tail, u'Joe Bar <joe@f\xb6>')

2116.4.1 by John Arbash Meinel Update file and revision id generators.	1	# Copyright (C) 2006 Canonical Ltd
	2	#
	3	# This program is free software; you can redistribute it and/or modify
	4	# it under the terms of the GNU General Public License as published by
	5	# the Free Software Foundation; either version 2 of the License, or
	6	# (at your option) any later version.
	7	#
	8	# This program is distributed in the hope that it will be useful,
	9	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	10	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	11	# GNU General Public License for more details.
	12	#
	13	# You should have received a copy of the GNU General Public License
	14	# along with this program; if not, write to the Free Software
	15	# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
	16
	17	"""Tests for bzrlib/generate_ids.py"""
	18
	19	import re
	20
	21	from bzrlib import (
	22	generate_ids,
	23	tests,
	24	)
	25
	26
	27	class TestFileIds(tests.TestCase):
	28	"""Test functions which generate file ids"""
2294.1.10 by John Arbash Meinel Switch all apis over to utf8 file ids. All tests pass	29
	30	def assertGenFileId(self, regex, filename):
	31	"""gen_file_id should create a file id matching the regex.
	32
	33	The file id should be ascii, and should be an 8-bit string
	34	"""
	35	file_id = generate_ids.gen_file_id(filename)
	36	self.assertContainsRe(file_id, '^'+regex+'$')
	37	# It should be a utf8 file_id, not a unicode one
	38	self.assertIsInstance(file_id, str)
	39	# gen_file_id should always return ascii file ids.
	40	file_id.decode('ascii')
	41
2116.4.1 by John Arbash Meinel Update file and revision id generators.	42	def test_gen_file_id(self):
	43	gen_file_id = generate_ids.gen_file_id
	44
	45	# We try to use the filename if possible
	46	self.assertStartsWith(gen_file_id('bar'), 'bar-')
	47
	48	# but we squash capitalization, and remove non word characters
	49	self.assertStartsWith(gen_file_id('Mwoo oof\t m'), 'mwoooofm-')
	50
	51	# We also remove leading '.' characters to prevent hidden file-ids
	52	self.assertStartsWith(gen_file_id('..gam.py'), 'gam.py-')
	53	self.assertStartsWith(gen_file_id('..Mwoo oof\t m'), 'mwoooofm-')
	54
	55	# we remove unicode characters, and still don't end up with a
	56	# hidden file id
	57	self.assertStartsWith(gen_file_id(u'\xe5\xb5.txt'), 'txt-')
2294.1.10 by John Arbash Meinel Switch all apis over to utf8 file ids. All tests pass	58
2116.4.1 by John Arbash Meinel Update file and revision id generators.	59	# Our current method of generating unique ids adds 33 characters
	60	# plus an serial number (log10(N) characters)
	61	# to the end of the filename. We now restrict the filename portion to
	62	# be <= 20 characters, so the maximum length should now be approx < 60
	63
	64	# Test both case squashing and length restriction
	65	fid = gen_file_id('A'*50 + '.txt')
	66	self.assertStartsWith(fid, 'a'*20 + '-')
	67	self.failUnless(len(fid) < 60)
	68
	69	# restricting length happens after the other actions, so
	70	# we preserve as much as possible
	71	fid = gen_file_id('\xe5\xb5..aBcd\tefGhijKLMnop\tqrstuvwxyz')
	72	self.assertStartsWith(fid, 'abcdefghijklmnopqrst-')
	73	self.failUnless(len(fid) < 60)
	74
2294.1.10 by John Arbash Meinel Switch all apis over to utf8 file ids. All tests pass	75	def test_file_ids_are_ascii(self):
	76	tail = r'-\d{14}-[a-z0-9]{16}-\d+'
	77	self.assertGenFileId('foo' + tail, 'foo')
	78	self.assertGenFileId('foo' + tail, u'foo')
	79	self.assertGenFileId('bar' + tail, u'bar')
	80	self.assertGenFileId('br' + tail, u'b\xe5r')
	81
2116.4.1 by John Arbash Meinel Update file and revision id generators.	82	def test__next_id_suffix_sets_suffix(self):
	83	generate_ids._gen_file_id_suffix = None
	84	generate_ids._next_id_suffix()
	85	self.assertNotEqual(None, generate_ids._gen_file_id_suffix)
	86
	87	def test__next_id_suffix_increments(self):
	88	generate_ids._gen_file_id_suffix = "foo-"
	89	generate_ids._gen_file_id_serial = 1
	90	try:
	91	self.assertEqual("foo-2", generate_ids._next_id_suffix())
	92	self.assertEqual("foo-3", generate_ids._next_id_suffix())
	93	self.assertEqual("foo-4", generate_ids._next_id_suffix())
	94	self.assertEqual("foo-5", generate_ids._next_id_suffix())
	95	self.assertEqual("foo-6", generate_ids._next_id_suffix())
	96	self.assertEqual("foo-7", generate_ids._next_id_suffix())
	97	self.assertEqual("foo-8", generate_ids._next_id_suffix())
	98	self.assertEqual("foo-9", generate_ids._next_id_suffix())
	99	self.assertEqual("foo-10", generate_ids._next_id_suffix())
	100	finally:
	101	# Reset so that all future ids generated in the test suite
	102	# don't end in 'foo-XXX'
	103	generate_ids._gen_file_id_suffix = None
	104	generate_ids._gen_file_id_serial = 0
	105
	106	def test_gen_root_id(self):
	107	# Mostly just make sure gen_root_id() exists
	108	root_id = generate_ids.gen_root_id()
	109	self.assertStartsWith(root_id, 'tree_root-')
	110
	111
	112	class TestGenRevisionId(tests.TestCase):
	113	"""Test generating revision ids"""
	114
	115	def assertMatchesRe(self, regex, text):
	116	"""Make sure text is matched by the regex given"""
	117	if re.match(regex, text) is None:
	118	self.fail('Pattern %s did not match text %s' % (regex, text))
	119
	120	def assertGenRevisionId(self, regex, username, timestamp=None):
	121	"""gen_revision_id should create a revision id matching the regex"""
	122	revision_id = generate_ids.gen_revision_id(username, timestamp)
	123	self.assertMatchesRe(regex, revision_id)
2249.5.14 by John Arbash Meinel Add some tests that generate_ids.get_revision_id() generates ascii revision ids	124	# It should be a utf8 revision_id, not a unicode one
	125	self.assertIsInstance(revision_id, str)
	126	# gen_revision_id should always return ascii revision ids.
	127	revision_id.decode('ascii')
2116.4.1 by John Arbash Meinel Update file and revision id generators.	128
	129	def test_timestamp(self):
	130	"""passing a timestamp should cause it to be used"""
	131	self.assertGenRevisionId(r'user@host-\d{14}-[a-z0-9]{16}', 'user@host')
	132	self.assertGenRevisionId('user@host-20061102205056-[a-z0-9]{16}',
	133	'user@host', 1162500656.688)
	134	self.assertGenRevisionId(r'user@host-20061102205024-[a-z0-9]{16}',
	135	'user@host', 1162500624.000)
	136
	137	def test_gen_revision_id_email(self):
	138	"""gen_revision_id uses email address if present"""
	139	regex = r'user\+joe_bar@foo-bar\.com-\d{14}-[a-z0-9]{16}'
	140	self.assertGenRevisionId(regex,'user+joe_bar@foo-bar.com')
	141	self.assertGenRevisionId(regex, '<user+joe_bar@foo-bar.com>')
	142	self.assertGenRevisionId(regex, 'Joe Bar <user+joe_bar@foo-bar.com>')
	143	self.assertGenRevisionId(regex, 'Joe Bar <user+Joe_Bar@Foo-Bar.com>')
	144	self.assertGenRevisionId(regex, u'Joe B\xe5r <user+Joe_Bar@Foo-Bar.com>')
	145
	146	def test_gen_revision_id_user(self):
	147	"""If there is no email, fall back to the whole username"""
	148	tail = r'-\d{14}-[a-z0-9]{16}'
2249.5.14 by John Arbash Meinel Add some tests that generate_ids.get_revision_id() generates ascii revision ids	149	self.assertGenRevisionId('joe_bar' + tail, 'Joe Bar')
2116.4.1 by John Arbash Meinel Update file and revision id generators.	150	self.assertGenRevisionId('joebar' + tail, 'joebar')
	151	self.assertGenRevisionId('joe_br' + tail, u'Joe B\xe5r')
	152	self.assertGenRevisionId(r'joe_br_user\+joe_bar_foo-bar.com' + tail,
	153	u'Joe B\xe5r <user+Joe_Bar_Foo-Bar.com>')
2249.5.14 by John Arbash Meinel Add some tests that generate_ids.get_revision_id() generates ascii revision ids	154
	155	def test_revision_ids_are_ascii(self):
	156	"""gen_revision_id should always return an ascii revision id."""
	157	tail = r'-\d{14}-[a-z0-9]{16}'
	158	self.assertGenRevisionId('joe_bar' + tail, 'Joe Bar')
	159	self.assertGenRevisionId('joe_bar' + tail, u'Joe Bar')
	160	self.assertGenRevisionId('joe@foo' + tail, u'Joe Bar <joe@foo>')
	161	# We cheat a little with this one, because email-addresses shouldn't
	162	# contain non-ascii characters, but generate_ids should strip them
	163	# anyway.
	164	self.assertGenRevisionId('joe@f' + tail, u'Joe Bar <joe@f\xb6>')