~bzr-pqm/bzr/bzr.dev

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Copyright (C) 2006, 2008-2011, 2017 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""Lazily compiled regex objects.

This module defines a class which creates proxy objects for regex
compilation.  This allows overriding re.compile() to return lazily compiled
objects.  

We do this rather than just providing a new interface so that it will also
be used by existing Python modules that create regexs.
"""

from __future__ import absolute_import

import re

from bzrlib import errors


class LazyRegex(object):
    """A proxy around a real regex, which won't be compiled until accessed."""


    # These are the parameters on a real _sre.SRE_Pattern object, which we
    # will map to local members so that we don't have the proxy overhead.
    _regex_attributes_to_copy = [
                 '__copy__', '__deepcopy__', 'findall', 'finditer', 'match',
                 'scanner', 'search', 'split', 'sub', 'subn'
                 ]

    # We use slots to keep the overhead low. But we need a slot entry for
    # all of the attributes we will copy
    __slots__ = ['_real_regex', '_regex_args', '_regex_kwargs',
                ] + _regex_attributes_to_copy

    def __init__(self, args=(), kwargs={}):
        """Create a new proxy object, passing in the args to pass to re.compile

        :param args: The `*args` to pass to re.compile
        :param kwargs: The `**kwargs` to pass to re.compile
        """
        self._real_regex = None
        self._regex_args = args
        self._regex_kwargs = kwargs

    def _compile_and_collapse(self):
        """Actually compile the requested regex"""
        self._real_regex = self._real_re_compile(*self._regex_args,
                                                 **self._regex_kwargs)
        for attr in self._regex_attributes_to_copy:
            setattr(self, attr, getattr(self._real_regex, attr))

    def _real_re_compile(self, *args, **kwargs):
        """Thunk over to the original re.compile"""
        try:
            return _real_re_compile(*args, **kwargs)
        except re.error, e:
            # raise InvalidPattern instead of re.error as this gives a
            # cleaner message to the user.
            raise errors.InvalidPattern('"' + args[0] + '" ' +str(e))

    def __getstate__(self):
        """Return the state to use when pickling."""
        return {
            "args": self._regex_args,
            "kwargs": self._regex_kwargs,
            }

    def __setstate__(self, dict):
        """Restore from a pickled state."""
        self._real_regex = None
        setattr(self, "_regex_args", dict["args"])
        setattr(self, "_regex_kwargs", dict["kwargs"])

    def __getattr__(self, attr):
        """Return a member from the proxied regex object.

        If the regex hasn't been compiled yet, compile it
        """
        if self._real_regex is None:
            self._compile_and_collapse()
        # Once we have compiled, the only time we should come here
        # is actually if the attribute is missing.
        return getattr(self._real_regex, attr)


def lazy_compile(*args, **kwargs):
    """Create a proxy object which will compile the regex on demand.

    :return: a LazyRegex proxy object.
    """
    return LazyRegex(args, kwargs)


def install_lazy_compile():
    """Make lazy_compile the default compile mode for regex compilation.

    This overrides re.compile with lazy_compile. To restore the original
    functionality, call reset_compile().
    """
    re.compile = lazy_compile


def reset_compile():
    """Restore the original function to re.compile().

    It is safe to call reset_compile() multiple times, it will always
    restore re.compile() to the value that existed at import time.
    Though the first call will reset back to the original (it doesn't
    track nesting level)
    """
    re.compile = _real_re_compile


_real_re_compile = re.compile
if _real_re_compile is lazy_compile:
    raise AssertionError(
        "re.compile has already been overridden as lazy_compile, but this would" \
        " cause infinite recursion")


# Some libraries calls re.finditer which fails it if receives a LazyRegex.
if getattr(re, 'finditer', False):
    def finditer_public(pattern, string, flags=0):
        if isinstance(pattern, LazyRegex):
            return pattern.finditer(string)
        else:
            return _real_re_compile(pattern, flags).finditer(string)
    re.finditer = finditer_public