~bzr-pqm/bzr/bzr.dev

2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
1
# Copyright (C) 2006 Canonical Ltd
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
# TODO: Some kind of command-line display of revision properties: 
18
# perhaps show them in log -v and allow them as options to the commit command.
19
20
"""Some functions to enable caching the conversion between unicode to utf8"""
21
2155.1.1 by John Arbash Meinel
(Dmitry Vasiliev) pre-lookup encoders to improve performance
22
import codecs
23
24
25
_utf8_encode = codecs.getencoder("utf-8")
26
_utf8_decode = codecs.getdecoder("utf-8")
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
27
28
# Map revisions from and to utf8 encoding
29
# Whenever we do an encode/decode operation, we save the result, so that
30
# we don't have to do it again.
31
_unicode_to_utf8_map = {}
32
_utf8_to_unicode_map = {}
33
34
35
def encode(unicode_str,
36
           _uni_to_utf8=_unicode_to_utf8_map,
2155.1.1 by John Arbash Meinel
(Dmitry Vasiliev) pre-lookup encoders to improve performance
37
           _utf8_to_uni=_utf8_to_unicode_map,
38
           _utf8_encode=_utf8_encode):
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
39
    """Take this unicode revision id, and get a unicode version"""
1934.1.11 by John Arbash Meinel
Document why we use try/except rather than if None
40
    # If the key is in the cache try/KeyError is 50% faster than
41
    # val = dict.get(key), if val is None:
42
    # On jam's machine the difference is 
43
    # try/KeyError:  900ms 
44
    #      if None: 1250ms 
45
    # Since these are primarily used when iterating over a knit entry
46
    # *most* of the time the key will already be in the cache, so use the
47
    # fast path
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
48
    try:
49
        return _uni_to_utf8[unicode_str]
50
    except KeyError:
2155.1.1 by John Arbash Meinel
(Dmitry Vasiliev) pre-lookup encoders to improve performance
51
        _uni_to_utf8[unicode_str] = utf8_str = _utf8_encode(unicode_str)[0]
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
52
        _utf8_to_uni[utf8_str] = unicode_str
53
        return utf8_str
54
55
56
def decode(utf8_str,
57
           _uni_to_utf8=_unicode_to_utf8_map,
2155.1.1 by John Arbash Meinel
(Dmitry Vasiliev) pre-lookup encoders to improve performance
58
           _utf8_to_uni=_utf8_to_unicode_map,
59
           _utf8_decode=_utf8_decode):
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
60
    """Take a utf8 revision id, and decode it, but cache the result"""
61
    try:
62
        return _utf8_to_uni[utf8_str]
63
    except KeyError:
2155.1.1 by John Arbash Meinel
(Dmitry Vasiliev) pre-lookup encoders to improve performance
64
        _utf8_to_uni[utf8_str] = unicode_str = _utf8_decode(utf8_str)[0]
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
65
        _uni_to_utf8[unicode_str] = utf8_str
66
        return unicode_str
67
68
1911.2.5 by John Arbash Meinel
Update cache tests, add a function to do something like intern() only for unicode objects
69
def get_cached_unicode(unicode_str):
70
    """Return a cached version of the unicode string.
71
72
    This has a similar idea to that of intern() in that it tries
73
    to return a singleton string. Only it works for unicode strings.
74
    """
75
    # This might return the same object, or it might return the cached one
76
    # the decode() should just be a hash lookup, because the encode() side
77
    # should add the entry to the maps
78
    return decode(encode(unicode_str))
79
80
1911.2.3 by John Arbash Meinel
Moving everything into a new location so that we can cache more than just revision ids
81
def clear_encoding_cache():
82
    """Clear the encoding and decoding caches"""
83
    _unicode_to_utf8_map.clear()
84
    _utf8_to_unicode_map.clear()