~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/chk_serializer.py

  • Committer: John Arbash Meinel
  • Date: 2009-06-03 19:51:50 UTC
  • mto: This revision was merged to the branch mainline in revision 4410.
  • Revision ID: john@arbash-meinel.com-20090603195150-69dnt7zs996nwdp9
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8')

Show diffs side-by-side

added added

removed removed

Lines of Context:
30
30
    xml6,
31
31
    )
32
32
 
33
 
_decode_utf8 = cache_utf8.decode
34
 
 
35
 
 
36
 
def _validate_properties(props, _decode=_decode_utf8):
 
33
 
 
34
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
37
35
    # TODO: we really want an 'isascii' check for key
38
 
    unicode_props = dict([(key, _decode(value))
 
36
    unicode_props = dict([(key, _decode(value)[0])
39
37
                          for key, value in props.iteritems()])
40
38
    return unicode_props
41
39
 
58
56
    # TODO: add a 'validate_utf8' for things like revision_id and file_id
59
57
    #       and a validator for parent-ids
60
58
    _schema = {'format': (None, int, _is_format_10),
61
 
               'committer': ('committer', str, _decode_utf8),
 
59
               'committer': ('committer', str, cache_utf8.decode),
62
60
               'timezone': ('timezone', int, None),
63
61
               'timestamp': ('timestamp', str, float),
64
62
               'revision-id': ('revision_id', str, None),
65
63
               'parent-ids': ('parent_ids', list, tuple),
66
64
               'inventory-sha1': ('inventory_sha1', str, None),
67
 
               'message': ('message', str, _decode_utf8),
 
65
               'message': ('message', str, cache_utf8.decode),
68
66
               'properties': ('properties', dict, _validate_properties),
69
67
    }
70
68
 
71
69
    def write_revision_to_string(self, rev):
72
 
        encode_utf8 = cache_utf8.encode
 
70
        encode_utf8 = cache_utf8._utf8_encode
73
71
        # Use a list of tuples rather than a dict
74
72
        # This lets us control the ordering, so that we are able to create
75
73
        # smaller deltas
76
74
        ret = [
77
75
            ("format", 10),
78
 
            ("committer", encode_utf8(rev.committer)),
 
76
            ("committer", encode_utf8(rev.committer)[0]),
79
77
        ]
80
78
        if rev.timezone is not None:
81
79
            ret.append(("timezone", rev.timezone))
83
81
        # which changes infrequently.
84
82
        revprops = {}
85
83
        for key, value in rev.properties.iteritems():
86
 
            revprops[key] = encode_utf8(value)
 
84
            revprops[key] = encode_utf8(value)[0]
87
85
        ret.append(('properties', revprops))
88
86
        ret.extend([
89
87
            ("timestamp", "%.3f" % rev.timestamp),
90
88
            ("revision-id", rev.revision_id),
91
89
            ("parent-ids", rev.parent_ids),
92
90
            ("inventory-sha1", rev.inventory_sha1),
93
 
            ("message", encode_utf8(rev.message)),
 
91
            ("message", encode_utf8(rev.message)[0]),
94
92
        ])
95
93
        return bencode.bencode(ret)
96
94