3
# $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
3
# $Id: ElementTree.py 1862 2004-06-18 07:31:02Z Fredrik $
5
5
# light-weight XML support for Python 1.5.2 and later.
7
# this is a stripped-down version of Secret Labs' effDOM library (part
8
# of xmlToolkit). compared to effDOM, this implementation has:
10
# - no support for observers
11
# - no html-specific extensions (e.g. entity preload)
12
# - no custom entities, doctypes, etc
13
# - no accelerator module
8
16
# 2001-10-20 fl created (from various sources)
9
17
# 2001-11-01 fl return root from parse method
30
38
# 2004-03-28 fl added XMLID helper
31
39
# 2004-06-02 fl added default support to findtext
32
40
# 2004-06-08 fl fixed encoding of non-ascii element/attribute names
33
# 2004-08-23 fl take advantage of post-2.1 expat features
34
# 2005-02-01 fl added iterparse implementation
35
# 2005-03-02 fl fixed iterparse support for pre-2.2 versions
37
# Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved.
42
# Copyright (c) 1999-2004 by Fredrik Lundh. All rights reserved.
39
44
# fredrik@pythonware.com
40
45
# http://www.pythonware.com
42
47
# --------------------------------------------------------------------
43
48
# The ElementTree toolkit is
45
# Copyright (c) 1999-2005 by Fredrik Lundh
50
# Copyright (c) 1999-2004 by Fredrik Lundh
47
52
# By obtaining, using, and/or copying this software and/or its
48
53
# associated documentation, you agree that you have read, understood,
863
# Parses an XML document into an element tree incrementally, and reports
864
# what's going on to the user.
866
# @param source A filename or file object containing XML data.
867
# @param events A list of events to report back. If omitted, only "end"
868
# events are reported.
869
# @return A (event, elem) iterator.
873
def __init__(self, source, events=None):
874
if not hasattr(source, "read"):
875
source = open(source, "rb")
879
self.root = self._root = None
880
self._parser = XMLTreeBuilder()
881
# wire up the parser for event reporting
882
parser = self._parser._parser
883
append = self._events.append
889
parser.ordered_attributes = 1
890
parser.specified_attributes = 1
891
def handler(tag, attrib_in, event=event, append=append,
892
start=self._parser._start_list):
893
append((event, start(tag, attrib_in)))
894
parser.StartElementHandler = handler
895
except AttributeError:
896
def handler(tag, attrib_in, event=event, append=append,
897
start=self._parser._start):
898
append((event, start(tag, attrib_in)))
899
parser.StartElementHandler = handler
901
def handler(tag, event=event, append=append,
902
end=self._parser._end):
903
append((event, end(tag)))
904
parser.EndElementHandler = handler
905
elif event == "start-ns":
906
def handler(prefix, uri, event=event, append=append):
908
uri = _encode(uri, "ascii")
911
append((event, (prefix or "", uri)))
912
parser.StartNamespaceDeclHandler = handler
913
elif event == "end-ns":
914
def handler(prefix, event=event, append=append):
915
append((event, None))
916
parser.EndNamespaceDeclHandler = handler
921
item = self._events[self._index]
923
if self._parser is None:
924
self.root = self._root
932
data = self._file.read(16384)
934
self._parser.feed(data)
936
self._root = self._parser.close()
939
self._index = self._index + 1
947
def __getitem__(self, index):
951
868
# Parses an XML document from a string constant. This function can
952
869
# be used to embed "XML literals" in Python code.
1108
1025
class XMLTreeBuilder:
1110
1027
def __init__(self, html=0, target=None):
1112
from xml.parsers import expat
1115
"No module named expat; use SimpleXMLTreeBuilder instead"
1028
from xml.parsers import expat
1117
1029
self._parser = parser = expat.ParserCreate(None, "}")
1118
1030
if target is None:
1119
1031
target = TreeBuilder()
1120
1032
self._target = target
1121
1033
self._names = {} # name memo cache
1123
parser.DefaultHandlerExpand = self._default
1034
parser.DefaultHandler = self._default
1124
1035
parser.StartElementHandler = self._start
1125
1036
parser.EndElementHandler = self._end
1126
1037
parser.CharacterDataHandler = self._data
1127
# let expat do the buffering, if supported
1129
self._parser.buffer_text = 1
1130
except AttributeError:
1132
# use new-style attribute handling, if supported
1134
self._parser.ordered_attributes = 1
1135
self._parser.specified_attributes = 1
1136
parser.StartElementHandler = self._start_list
1137
except AttributeError:
1139
1038
encoding = None
1140
1039
if not parser.returns_unicode:
1141
1040
encoding = "utf-8"
1169
1068
attrib[fixname(key)] = self._fixtext(value)
1170
1069
return self._target.start(tag, attrib)
1172
def _start_list(self, tag, attrib_in):
1173
fixname = self._fixname
1177
for i in range(0, len(attrib_in), 2):
1178
attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
1179
return self._target.start(tag, attrib)
1181
1071
def _data(self, text):
1182
1072
return self._target.data(self._fixtext(text))