~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2006 Canonical Ltd
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
2
#
3
# This program is free software; you can redistribute it and/or modify
2052.3.1 by John Arbash Meinel
Add tests to cleanup the copyright of all source files
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
7
#
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
12
#
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
"""Tests for bzr xml serialization performance."""
18
19
from bzrlib import (
1934.1.10 by John Arbash Meinel
Using real utf8 and cache_utf8 has similar performance, 272ms, and 363ms
20
    cache_utf8,
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
21
    xml5,
22
    )
23
from bzrlib.benchmarks import Benchmark
24
25
26
class BenchXMLSerializer(Benchmark):
27
1934.1.13 by John Arbash Meinel
Correct function names (write, not serialize)
28
    def test_write_to_string_kernel_like_inventory(self):
1934.1.4 by John Arbash Meinel
rewrite escaper to use xml numerical entities, rather than using encode('utf8')
29
        # On jam's machine, ElementTree serializer took: 2161ms/13487ms
30
        #                      with Robert's serializer:  631ms/10770ms
31
        #                      with Entity escaper:       487ms/11636ms
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
32
        #           caching Entity escaper, empty cache:  448ms/ 9489ms
1934.1.7 by John Arbash Meinel
Comment why we do caching the way we do
33
        #           caching Entity escaper, full cache:   375ms/ 9489ms
1934.1.8 by John Arbash Meinel
Passing around the append function rather than the list shaves off another 10%, down to 400ms
34
        #                      passing around function:   406ms/ 8942ms
35
        #              cached, passing around function:   328ms/11248ms
1934.1.9 by John Arbash Meinel
Caching the trailing " and directly appending, down another 50ms, 275ms cached, 354ms uncached
36
        #                      removing extra function:   354ms/ 8942ms
37
        #              cached, removing extra function:   275ms/11248ms
1934.1.10 by John Arbash Meinel
Using real utf8 and cache_utf8 has similar performance, 272ms, and 363ms
38
        #                          no cache, real utf8:   363ms/11697ms
39
        #                            cached, real utf8:   272ms/12827ms
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
40
        # Really all we want is a real inventory
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
41
        inv = self.make_kernel_like_inventory()
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
42
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
43
        xml5._clear_cache()
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
44
        # We want a real tree with lots of file ids and sha strings, etc.
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
45
        self.time(xml5.serializer_v5.write_inventory_to_string, inv)
1934.1.1 by John Arbash Meinel
Write a benchmark for the XML serializer
46
1934.1.13 by John Arbash Meinel
Correct function names (write, not serialize)
47
    def test_write_kernel_like_inventory(self):
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
48
        # Really all we want is a real inventory
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
49
        inv = self.make_kernel_like_inventory()
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
50
1934.1.5 by John Arbash Meinel
Cache the entity escaping cuts us down to 450ms
51
        xml5._clear_cache()
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
52
        f = open('kernel-like-inventory', 'wb')
53
        try:
54
            # We want a real tree with lots of file ids and sha strings, etc.
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
55
            self.time(xml5.serializer_v5.write_inventory, inv, f)
1934.1.2 by John Arbash Meinel
Make benchmark include both writing to a real file, and to a string
56
        finally:
57
            f.close()
58
1934.1.13 by John Arbash Meinel
Correct function names (write, not serialize)
59
    def test_write_to_string_cached_kernel_like_inventory(self):
1934.1.16 by John Arbash Meinel
Add a cache for a kernel-like inventory
60
        inv = self.make_kernel_like_inventory()
1934.1.6 by John Arbash Meinel
With a full cache the time is down to 381 ms
61
62
        xml5._clear_cache()
63
        # We want a real tree with lots of file ids and sha strings, etc.
64
        xml5.serializer_v5.write_inventory_to_string(inv)
65
66
        self.time(xml5.serializer_v5.write_inventory_to_string, inv)
67
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
68
    def test_read_from_string_kernel_like_inventory(self):
69
        inv = self.make_kernel_like_inventory()
70
        as_str = xml5.serializer_v5.write_inventory_to_string(inv)
71
1934.1.18 by John Arbash Meinel
Caching revision_ids costs us a little (512 vs 475), but caching file-ids costs us a lot (780 vs 512), so disabling caching of file ids for now
72
        cache_utf8.clear_encoding_cache()
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
73
        read_inv = self.time(xml5.serializer_v5.read_inventory_from_string,
74
                             as_str)
75
        # TODO: make sure the final inventory is equal as a sanity check
76
1934.1.18 by John Arbash Meinel
Caching revision_ids costs us a little (512 vs 475), but caching file-ids costs us a lot (780 vs 512), so disabling caching of file ids for now
77
    def test_read_from_string_cached_kernel_like_inventory(self):
78
        cache_utf8.clear_encoding_cache()
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
79
        inv = self.make_kernel_like_inventory()
80
        as_str = xml5.serializer_v5.write_inventory_to_string(inv)
81
1934.1.18 by John Arbash Meinel
Caching revision_ids costs us a little (512 vs 475), but caching file-ids costs us a lot (780 vs 512), so disabling caching of file ids for now
82
        xml5.serializer_v5.read_inventory_from_string(as_str)
83
1934.1.17 by John Arbash Meinel
Add a benchmark for read inventory speed
84
        read_inv = self.time(xml5.serializer_v5.read_inventory_from_string,
85
                             as_str)
86
        # TODO: make sure the final inventory is equal as a sanity check