3
# $Id: ElementTree.py 1862 2004-06-18 07:31:02Z Fredrik $
3
# $Id: ElementTree.py 2326 2005-03-17 07:45:21Z fredrik $
5
5
# light-weight XML support for Python 1.5.2 and later.
7
# this is a stripped-down version of Secret Labs' effDOM library (part
8
# of xmlToolkit). compared to effDOM, this implementation has:
10
# - no support for observers
11
# - no html-specific extensions (e.g. entity preload)
12
# - no custom entities, doctypes, etc
13
# - no accelerator module
16
8
# 2001-10-20 fl created (from various sources)
17
9
# 2001-11-01 fl return root from parse method
38
30
# 2004-03-28 fl added XMLID helper
39
31
# 2004-06-02 fl added default support to findtext
40
32
# 2004-06-08 fl fixed encoding of non-ascii element/attribute names
33
# 2004-08-23 fl take advantage of post-2.1 expat features
34
# 2005-02-01 fl added iterparse implementation
35
# 2005-03-02 fl fixed iterparse support for pre-2.2 versions
42
# Copyright (c) 1999-2004 by Fredrik Lundh. All rights reserved.
37
# Copyright (c) 1999-2005 by Fredrik Lundh. All rights reserved.
44
39
# fredrik@pythonware.com
45
40
# http://www.pythonware.com
47
42
# --------------------------------------------------------------------
48
43
# The ElementTree toolkit is
50
# Copyright (c) 1999-2004 by Fredrik Lundh
45
# Copyright (c) 1999-2005 by Fredrik Lundh
52
47
# By obtaining, using, and/or copying this software and/or its
53
48
# associated documentation, you agree that you have read, understood,
863
# Parses an XML document into an element tree incrementally, and reports
864
# what's going on to the user.
866
# @param source A filename or file object containing XML data.
867
# @param events A list of events to report back. If omitted, only "end"
868
# events are reported.
869
# @return A (event, elem) iterator.
873
def __init__(self, source, events=None):
874
if not hasattr(source, "read"):
875
source = open(source, "rb")
879
self.root = self._root = None
880
self._parser = XMLTreeBuilder()
881
# wire up the parser for event reporting
882
parser = self._parser._parser
883
append = self._events.append
889
parser.ordered_attributes = 1
890
parser.specified_attributes = 1
891
def handler(tag, attrib_in, event=event, append=append,
892
start=self._parser._start_list):
893
append((event, start(tag, attrib_in)))
894
parser.StartElementHandler = handler
895
except AttributeError:
896
def handler(tag, attrib_in, event=event, append=append,
897
start=self._parser._start):
898
append((event, start(tag, attrib_in)))
899
parser.StartElementHandler = handler
901
def handler(tag, event=event, append=append,
902
end=self._parser._end):
903
append((event, end(tag)))
904
parser.EndElementHandler = handler
905
elif event == "start-ns":
906
def handler(prefix, uri, event=event, append=append):
908
uri = _encode(uri, "ascii")
911
append((event, (prefix or "", uri)))
912
parser.StartNamespaceDeclHandler = handler
913
elif event == "end-ns":
914
def handler(prefix, event=event, append=append):
915
append((event, None))
916
parser.EndNamespaceDeclHandler = handler
921
item = self._events[self._index]
923
if self._parser is None:
924
self.root = self._root
932
data = self._file.read(16384)
934
self._parser.feed(data)
936
self._root = self._parser.close()
939
self._index = self._index + 1
947
def __getitem__(self, index):
868
951
# Parses an XML document from a string constant. This function can
869
952
# be used to embed "XML literals" in Python code.
1025
1108
class XMLTreeBuilder:
1027
1110
def __init__(self, html=0, target=None):
1028
from xml.parsers import expat
1112
from xml.parsers import expat
1115
"No module named expat; use SimpleXMLTreeBuilder instead"
1029
1117
self._parser = parser = expat.ParserCreate(None, "}")
1030
1118
if target is None:
1031
1119
target = TreeBuilder()
1032
1120
self._target = target
1033
1121
self._names = {} # name memo cache
1034
parser.DefaultHandler = self._default
1123
parser.DefaultHandlerExpand = self._default
1035
1124
parser.StartElementHandler = self._start
1036
1125
parser.EndElementHandler = self._end
1037
1126
parser.CharacterDataHandler = self._data
1127
# let expat do the buffering, if supported
1129
self._parser.buffer_text = 1
1130
except AttributeError:
1132
# use new-style attribute handling, if supported
1134
self._parser.ordered_attributes = 1
1135
self._parser.specified_attributes = 1
1136
parser.StartElementHandler = self._start_list
1137
except AttributeError:
1038
1139
encoding = None
1039
1140
if not parser.returns_unicode:
1040
1141
encoding = "utf-8"
1068
1169
attrib[fixname(key)] = self._fixtext(value)
1069
1170
return self._target.start(tag, attrib)
1172
def _start_list(self, tag, attrib_in):
1173
fixname = self._fixname
1177
for i in range(0, len(attrib_in), 2):
1178
attrib[fixname(attrib_in[i])] = self._fixtext(attrib_in[i+1])
1179
return self._target.start(tag, attrib)
1071
1181
def _data(self, text):
1072
1182
return self._target.data(self._fixtext(text))