~bzr-pqm/bzr/bzr.dev

1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
1
# Copyright (C) 2006 by Canonical Ltd
2
#
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License version 2 as published by
5
# the Free Software Foundation.
6
#
7
# This program is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
# GNU General Public License for more details.
11
#
12
# You should have received a copy of the GNU General Public License
13
# along with this program; if not, write to the Free Software
14
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
15
16
"""Tests for bzr xml serialization performance."""
17
18
from bzrlib import (
1934.1.10 by John Arbash Meinel
Using real utf8 and cache_utf8 has similar performance, 272ms, and 363ms
19
    cache_utf8,
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
20
    xml5,
21
    )
22
from bzrlib.benchmarks import Benchmark
23
24
25
class BenchXMLSerializer(Benchmark):
26
1934.1.13 by John Arbash Meinel
Correct function names (write, not serialize)
27
    def test_write_to_string_kernel_like_inventory(self):
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
28
        # On jam's machine, ElementTree serializer took: 2161ms/13487ms
29
        #                      with Robert's serializer:  631ms/10770ms
30
        #                      with Entity escaper:       487ms/11636ms
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
31
        #           caching Entity escaper, empty cache:  448ms/ 9489ms
1934.1.7 by John Arbash Meinel
Comment why we do caching the way we do
32
        #           caching Entity escaper, full cache:   375ms/ 9489ms
1934.1.8 by John Arbash Meinel
Passing around the append function rather than the list shaves off another 10%, down to 400ms
33
        #                      passing around function:   406ms/ 8942ms
34
        #              cached, passing around function:   328ms/11248ms
1934.1.9 by John Arbash Meinel
Caching the trailing " and directly appending, down another 50ms, 275ms cached, 354ms uncached
35
        #                      removing extra function:   354ms/ 8942ms
36
        #              cached, removing extra function:   275ms/11248ms
1934.1.10 by John Arbash Meinel
Using real utf8 and cache_utf8 has similar performance, 272ms, and 363ms
37
        #                          no cache, real utf8:   363ms/11697ms
38
        #                            cached, real utf8:   272ms/12827ms
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
39
        # Really all we want is a real inventory
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
40
        inv = self.make_kernel_like_inventory()
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
41
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
42
        xml5._clear_cache()
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
43
        # We want a real tree with lots of file ids and sha strings, etc.
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
44
        self.time(xml5.serializer_v5.write_inventory_to_string, inv)
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
45
1934.1.13 by John Arbash Meinel
Correct function names (write, not serialize)
46
    def test_write_kernel_like_inventory(self):
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
47
        # Really all we want is a real inventory
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
48
        inv = self.make_kernel_like_inventory()
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
49
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
50
        xml5._clear_cache()
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
51
        f = open('kernel-like-inventory', 'wb')
52
        try:
53
            # We want a real tree with lots of file ids and sha strings, etc.
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
54
            self.time(xml5.serializer_v5.write_inventory, inv, f)
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
55
        finally:
56
            f.close()
57
1934.1.13 by John Arbash Meinel
Correct function names (write, not serialize)
58
    def test_write_to_string_cached_kernel_like_inventory(self):
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
59
        inv = self.make_kernel_like_inventory()
1934.1.6 by John Arbash Meinel
With a full cache the time is down to 381 ms
60
61
        xml5._clear_cache()
62
        # We want a real tree with lots of file ids and sha strings, etc.
63
        xml5.serializer_v5.write_inventory_to_string(inv)
64
65
        self.time(xml5.serializer_v5.write_inventory_to_string, inv)
66
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
67
    def test_read_from_string_kernel_like_inventory(self):
68
        inv = self.make_kernel_like_inventory()
69
        as_str = xml5.serializer_v5.write_inventory_to_string(inv)
70
1934.1.18 by John Arbash Meinel
Caching revision_ids costs us a little (512 vs 475), but caching file-ids costs us a lot (780 vs 512), so disabling caching of file ids for now
71
        cache_utf8.clear_encoding_cache()
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
72
        read_inv = self.time(xml5.serializer_v5.read_inventory_from_string,
73
                             as_str)
74
        # TODO: make sure the final inventory is equal as a sanity check
75
1934.1.18 by John Arbash Meinel
Caching revision_ids costs us a little (512 vs 475), but caching file-ids costs us a lot (780 vs 512), so disabling caching of file ids for now
76
    def test_read_from_string_cached_kernel_like_inventory(self):
77
        cache_utf8.clear_encoding_cache()
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
78
        inv = self.make_kernel_like_inventory()
79
        as_str = xml5.serializer_v5.write_inventory_to_string(inv)
80
1934.1.18 by John Arbash Meinel
Caching revision_ids costs us a little (512 vs 475), but caching file-ids costs us a lot (780 vs 512), so disabling caching of file ids for now
81
        xml5.serializer_v5.read_inventory_from_string(as_str)
82
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
83
        read_inv = self.time(xml5.serializer_v5.read_inventory_from_string,
84
                             as_str)
85
        # TODO: make sure the final inventory is equal as a sanity check