1 /*--------------------------------------------------------------------
2 * Licensed to PSF under a Contributor Agreement.
3 * See https://www.python.org/psf/license for licensing details.
4 *
5 * _elementtree - C accelerator for xml.etree.ElementTree
6 * Copyright (c) 1999-2009 by Secret Labs AB. All rights reserved.
7 * Copyright (c) 1999-2009 by Fredrik Lundh.
8 *
9 * [email protected]
10 * http://www.pythonware.com
11 *--------------------------------------------------------------------
12 */
13
14 #define PY_SSIZE_T_CLEAN
15 #define NEEDS_PY_IDENTIFIER
16
17 #include "Python.h"
18 #include "structmember.h" // PyMemberDef
19
20 /* -------------------------------------------------------------------- */
21 /* configuration */
22
23 /* An element can hold this many children without extra memory
24 allocations. */
25 #define STATIC_CHILDREN 4
26
27 /* For best performance, chose a value so that 80-90% of all nodes
28 have no more than the given number of children. Set this to zero
29 to minimize the size of the element structure itself (this only
30 helps if you have lots of leaf nodes with attributes). */
31
32 /* Also note that pymalloc always allocates blocks in multiples of
33 eight bytes. For the current C version of ElementTree, this means
34 that the number of children should be an even number, at least on
35 32-bit platforms. */
36
37 /* -------------------------------------------------------------------- */
38
39 /* compiler tweaks */
40 #if defined(_MSC_VER)
41 #define LOCAL(type) static __inline type __fastcall
42 #else
43 #define LOCAL(type) static type
44 #endif
45
46 /* macros used to store 'join' flags in string object pointers. note
47 that all use of text and tail as object pointers must be wrapped in
48 JOIN_OBJ. see comments in the ElementObject definition for more
49 info. */
50 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
51 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
52 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
53
54 /* Py_SETREF for a PyObject* that uses a join flag. */
55 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)56 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
57 {
58 PyObject *tmp = JOIN_OBJ(*p);
59 *p = new_joined_ptr;
60 Py_DECREF(tmp);
61 }
62
63 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
64 * reference since this function sets it to NULL.
65 */
_clear_joined_ptr(PyObject ** p)66 static void _clear_joined_ptr(PyObject **p)
67 {
68 if (*p) {
69 _set_joined_ptr(p, NULL);
70 }
71 }
72
73 /* Types defined by this extension */
74 static PyTypeObject Element_Type;
75 static PyTypeObject ElementIter_Type;
76 static PyTypeObject TreeBuilder_Type;
77 static PyTypeObject XMLParser_Type;
78
79
80 /* Per-module state; PEP 3121 */
81 typedef struct {
82 PyObject *parseerror_obj;
83 PyObject *deepcopy_obj;
84 PyObject *elementpath_obj;
85 PyObject *comment_factory;
86 PyObject *pi_factory;
87 } elementtreestate;
88
89 static struct PyModuleDef elementtreemodule;
90
91 /* Given a module object (assumed to be _elementtree), get its per-module
92 * state.
93 */
94 static inline elementtreestate*
get_elementtree_state(PyObject * module)95 get_elementtree_state(PyObject *module)
96 {
97 void *state = PyModule_GetState(module);
98 assert(state != NULL);
99 return (elementtreestate *)state;
100 }
101
102 /* Find the module instance imported in the currently running sub-interpreter
103 * and get its state.
104 */
105 #define ET_STATE_GLOBAL \
106 ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
107
108 static int
elementtree_clear(PyObject * m)109 elementtree_clear(PyObject *m)
110 {
111 elementtreestate *st = get_elementtree_state(m);
112 Py_CLEAR(st->parseerror_obj);
113 Py_CLEAR(st->deepcopy_obj);
114 Py_CLEAR(st->elementpath_obj);
115 Py_CLEAR(st->comment_factory);
116 Py_CLEAR(st->pi_factory);
117 return 0;
118 }
119
120 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)121 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122 {
123 elementtreestate *st = get_elementtree_state(m);
124 Py_VISIT(st->parseerror_obj);
125 Py_VISIT(st->deepcopy_obj);
126 Py_VISIT(st->elementpath_obj);
127 Py_VISIT(st->comment_factory);
128 Py_VISIT(st->pi_factory);
129 return 0;
130 }
131
132 static void
elementtree_free(void * m)133 elementtree_free(void *m)
134 {
135 elementtree_clear((PyObject *)m);
136 }
137
138 /* helpers */
139
140 LOCAL(PyObject*)
list_join(PyObject * list)141 list_join(PyObject* list)
142 {
143 /* join list elements */
144 PyObject* joiner;
145 PyObject* result;
146
147 joiner = PyUnicode_FromStringAndSize("", 0);
148 if (!joiner)
149 return NULL;
150 result = PyUnicode_Join(joiner, list);
151 Py_DECREF(joiner);
152 return result;
153 }
154
155 /* Is the given object an empty dictionary?
156 */
157 static int
is_empty_dict(PyObject * obj)158 is_empty_dict(PyObject *obj)
159 {
160 return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
161 }
162
163
164 /* -------------------------------------------------------------------- */
165 /* the Element type */
166
167 typedef struct {
168
169 /* attributes (a dictionary object), or NULL if no attributes */
170 PyObject* attrib;
171
172 /* child elements */
173 Py_ssize_t length; /* actual number of items */
174 Py_ssize_t allocated; /* allocated items */
175
176 /* this either points to _children or to a malloced buffer */
177 PyObject* *children;
178
179 PyObject* _children[STATIC_CHILDREN];
180
181 } ElementObjectExtra;
182
183 typedef struct {
184 PyObject_HEAD
185
186 /* element tag (a string). */
187 PyObject* tag;
188
189 /* text before first child. note that this is a tagged pointer;
190 use JOIN_OBJ to get the object pointer. the join flag is used
191 to distinguish lists created by the tree builder from lists
192 assigned to the attribute by application code; the former
193 should be joined before being returned to the user, the latter
194 should be left intact. */
195 PyObject* text;
196
197 /* text after this element, in parent. note that this is a tagged
198 pointer; use JOIN_OBJ to get the object pointer. */
199 PyObject* tail;
200
201 ElementObjectExtra* extra;
202
203 PyObject *weakreflist; /* For tp_weaklistoffset */
204
205 } ElementObject;
206
207
208 #define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
209 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
210
211
212 /* -------------------------------------------------------------------- */
213 /* Element constructors and destructor */
214
215 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)216 create_extra(ElementObject* self, PyObject* attrib)
217 {
218 self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
219 if (!self->extra) {
220 PyErr_NoMemory();
221 return -1;
222 }
223
224 Py_XINCREF(attrib);
225 self->extra->attrib = attrib;
226
227 self->extra->length = 0;
228 self->extra->allocated = STATIC_CHILDREN;
229 self->extra->children = self->extra->_children;
230
231 return 0;
232 }
233
234 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)235 dealloc_extra(ElementObjectExtra *extra)
236 {
237 Py_ssize_t i;
238
239 if (!extra)
240 return;
241
242 Py_XDECREF(extra->attrib);
243
244 for (i = 0; i < extra->length; i++)
245 Py_DECREF(extra->children[i]);
246
247 if (extra->children != extra->_children)
248 PyObject_Free(extra->children);
249
250 PyObject_Free(extra);
251 }
252
253 LOCAL(void)
clear_extra(ElementObject * self)254 clear_extra(ElementObject* self)
255 {
256 ElementObjectExtra *myextra;
257
258 if (!self->extra)
259 return;
260
261 /* Avoid DECREFs calling into this code again (cycles, etc.)
262 */
263 myextra = self->extra;
264 self->extra = NULL;
265
266 dealloc_extra(myextra);
267 }
268
269 /* Convenience internal function to create new Element objects with the given
270 * tag and attributes.
271 */
272 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)273 create_new_element(PyObject* tag, PyObject* attrib)
274 {
275 ElementObject* self;
276
277 self = PyObject_GC_New(ElementObject, &Element_Type);
278 if (self == NULL)
279 return NULL;
280 self->extra = NULL;
281
282 Py_INCREF(tag);
283 self->tag = tag;
284
285 Py_INCREF(Py_None);
286 self->text = Py_None;
287
288 Py_INCREF(Py_None);
289 self->tail = Py_None;
290
291 self->weakreflist = NULL;
292
293 PyObject_GC_Track(self);
294
295 if (attrib != NULL && !is_empty_dict(attrib)) {
296 if (create_extra(self, attrib) < 0) {
297 Py_DECREF(self);
298 return NULL;
299 }
300 }
301
302 return (PyObject*) self;
303 }
304
305 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)306 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
307 {
308 ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
309 if (e != NULL) {
310 Py_INCREF(Py_None);
311 e->tag = Py_None;
312
313 Py_INCREF(Py_None);
314 e->text = Py_None;
315
316 Py_INCREF(Py_None);
317 e->tail = Py_None;
318
319 e->extra = NULL;
320 e->weakreflist = NULL;
321 }
322 return (PyObject *)e;
323 }
324
325 /* Helper function for extracting the attrib dictionary from a keywords dict.
326 * This is required by some constructors/functions in this module that can
327 * either accept attrib as a keyword argument or all attributes splashed
328 * directly into *kwds.
329 *
330 * Return a dictionary with the content of kwds merged into the content of
331 * attrib. If there is no attrib keyword, return a copy of kwds.
332 */
333 static PyObject*
get_attrib_from_keywords(PyObject * kwds)334 get_attrib_from_keywords(PyObject *kwds)
335 {
336 PyObject *attrib_str = PyUnicode_FromString("attrib");
337 if (attrib_str == NULL) {
338 return NULL;
339 }
340 PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
341
342 if (attrib) {
343 /* If attrib was found in kwds, copy its value and remove it from
344 * kwds
345 */
346 if (!PyDict_Check(attrib)) {
347 Py_DECREF(attrib_str);
348 PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
349 Py_TYPE(attrib)->tp_name);
350 return NULL;
351 }
352 attrib = PyDict_Copy(attrib);
353 if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
354 Py_DECREF(attrib);
355 attrib = NULL;
356 }
357 }
358 else if (!PyErr_Occurred()) {
359 attrib = PyDict_New();
360 }
361
362 Py_DECREF(attrib_str);
363
364 if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
365 Py_DECREF(attrib);
366 return NULL;
367 }
368 return attrib;
369 }
370
371 /*[clinic input]
372 module _elementtree
373 class _elementtree.Element "ElementObject *" "&Element_Type"
374 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
375 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
376 [clinic start generated code]*/
377 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
378
379 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)380 element_init(PyObject *self, PyObject *args, PyObject *kwds)
381 {
382 PyObject *tag;
383 PyObject *attrib = NULL;
384 ElementObject *self_elem;
385
386 if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
387 return -1;
388
389 if (attrib) {
390 /* attrib passed as positional arg */
391 attrib = PyDict_Copy(attrib);
392 if (!attrib)
393 return -1;
394 if (kwds) {
395 if (PyDict_Update(attrib, kwds) < 0) {
396 Py_DECREF(attrib);
397 return -1;
398 }
399 }
400 } else if (kwds) {
401 /* have keywords args */
402 attrib = get_attrib_from_keywords(kwds);
403 if (!attrib)
404 return -1;
405 }
406
407 self_elem = (ElementObject *)self;
408
409 if (attrib != NULL && !is_empty_dict(attrib)) {
410 if (create_extra(self_elem, attrib) < 0) {
411 Py_DECREF(attrib);
412 return -1;
413 }
414 }
415
416 /* We own a reference to attrib here and it's no longer needed. */
417 Py_XDECREF(attrib);
418
419 /* Replace the objects already pointed to by tag, text and tail. */
420 Py_INCREF(tag);
421 Py_XSETREF(self_elem->tag, tag);
422
423 Py_INCREF(Py_None);
424 _set_joined_ptr(&self_elem->text, Py_None);
425
426 Py_INCREF(Py_None);
427 _set_joined_ptr(&self_elem->tail, Py_None);
428
429 return 0;
430 }
431
432 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)433 element_resize(ElementObject* self, Py_ssize_t extra)
434 {
435 Py_ssize_t size;
436 PyObject* *children;
437
438 assert(extra >= 0);
439 /* make sure self->children can hold the given number of extra
440 elements. set an exception and return -1 if allocation failed */
441
442 if (!self->extra) {
443 if (create_extra(self, NULL) < 0)
444 return -1;
445 }
446
447 size = self->extra->length + extra; /* never overflows */
448
449 if (size > self->extra->allocated) {
450 /* use Python 2.4's list growth strategy */
451 size = (size >> 3) + (size < 9 ? 3 : 6) + size;
452 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
453 * which needs at least 4 bytes.
454 * Although it's a false alarm always assume at least one child to
455 * be safe.
456 */
457 size = size ? size : 1;
458 if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
459 goto nomemory;
460 if (self->extra->children != self->extra->_children) {
461 /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
462 * "children", which needs at least 4 bytes. Although it's a
463 * false alarm always assume at least one child to be safe.
464 */
465 children = PyObject_Realloc(self->extra->children,
466 size * sizeof(PyObject*));
467 if (!children)
468 goto nomemory;
469 } else {
470 children = PyObject_Malloc(size * sizeof(PyObject*));
471 if (!children)
472 goto nomemory;
473 /* copy existing children from static area to malloc buffer */
474 memcpy(children, self->extra->children,
475 self->extra->length * sizeof(PyObject*));
476 }
477 self->extra->children = children;
478 self->extra->allocated = size;
479 }
480
481 return 0;
482
483 nomemory:
484 PyErr_NoMemory();
485 return -1;
486 }
487
488 LOCAL(void)
raise_type_error(PyObject * element)489 raise_type_error(PyObject *element)
490 {
491 PyErr_Format(PyExc_TypeError,
492 "expected an Element, not \"%.200s\"",
493 Py_TYPE(element)->tp_name);
494 }
495
496 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)497 element_add_subelement(ElementObject* self, PyObject* element)
498 {
499 /* add a child element to a parent */
500
501 if (!Element_Check(element)) {
502 raise_type_error(element);
503 return -1;
504 }
505
506 if (element_resize(self, 1) < 0)
507 return -1;
508
509 Py_INCREF(element);
510 self->extra->children[self->extra->length] = element;
511
512 self->extra->length++;
513
514 return 0;
515 }
516
517 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)518 element_get_attrib(ElementObject* self)
519 {
520 /* return borrowed reference to attrib dictionary */
521 /* note: this function assumes that the extra section exists */
522
523 PyObject* res = self->extra->attrib;
524
525 if (!res) {
526 /* create missing dictionary */
527 res = self->extra->attrib = PyDict_New();
528 }
529
530 return res;
531 }
532
533 LOCAL(PyObject*)
element_get_text(ElementObject * self)534 element_get_text(ElementObject* self)
535 {
536 /* return borrowed reference to text attribute */
537
538 PyObject *res = self->text;
539
540 if (JOIN_GET(res)) {
541 res = JOIN_OBJ(res);
542 if (PyList_CheckExact(res)) {
543 PyObject *tmp = list_join(res);
544 if (!tmp)
545 return NULL;
546 self->text = tmp;
547 Py_DECREF(res);
548 res = tmp;
549 }
550 }
551
552 return res;
553 }
554
555 LOCAL(PyObject*)
element_get_tail(ElementObject * self)556 element_get_tail(ElementObject* self)
557 {
558 /* return borrowed reference to text attribute */
559
560 PyObject *res = self->tail;
561
562 if (JOIN_GET(res)) {
563 res = JOIN_OBJ(res);
564 if (PyList_CheckExact(res)) {
565 PyObject *tmp = list_join(res);
566 if (!tmp)
567 return NULL;
568 self->tail = tmp;
569 Py_DECREF(res);
570 res = tmp;
571 }
572 }
573
574 return res;
575 }
576
577 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)578 subelement(PyObject *self, PyObject *args, PyObject *kwds)
579 {
580 PyObject* elem;
581
582 ElementObject* parent;
583 PyObject* tag;
584 PyObject* attrib = NULL;
585 if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
586 &Element_Type, &parent, &tag,
587 &PyDict_Type, &attrib)) {
588 return NULL;
589 }
590
591 if (attrib) {
592 /* attrib passed as positional arg */
593 attrib = PyDict_Copy(attrib);
594 if (!attrib)
595 return NULL;
596 if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
597 Py_DECREF(attrib);
598 return NULL;
599 }
600 } else if (kwds) {
601 /* have keyword args */
602 attrib = get_attrib_from_keywords(kwds);
603 if (!attrib)
604 return NULL;
605 } else {
606 /* no attrib arg, no kwds, so no attribute */
607 }
608
609 elem = create_new_element(tag, attrib);
610 Py_XDECREF(attrib);
611 if (elem == NULL)
612 return NULL;
613
614 if (element_add_subelement(parent, elem) < 0) {
615 Py_DECREF(elem);
616 return NULL;
617 }
618
619 return elem;
620 }
621
622 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)623 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
624 {
625 Py_VISIT(self->tag);
626 Py_VISIT(JOIN_OBJ(self->text));
627 Py_VISIT(JOIN_OBJ(self->tail));
628
629 if (self->extra) {
630 Py_ssize_t i;
631 Py_VISIT(self->extra->attrib);
632
633 for (i = 0; i < self->extra->length; ++i)
634 Py_VISIT(self->extra->children[i]);
635 }
636 return 0;
637 }
638
639 static int
element_gc_clear(ElementObject * self)640 element_gc_clear(ElementObject *self)
641 {
642 Py_CLEAR(self->tag);
643 _clear_joined_ptr(&self->text);
644 _clear_joined_ptr(&self->tail);
645
646 /* After dropping all references from extra, it's no longer valid anyway,
647 * so fully deallocate it.
648 */
649 clear_extra(self);
650 return 0;
651 }
652
653 static void
element_dealloc(ElementObject * self)654 element_dealloc(ElementObject* self)
655 {
656 /* bpo-31095: UnTrack is needed before calling any callbacks */
657 PyObject_GC_UnTrack(self);
658 Py_TRASHCAN_BEGIN(self, element_dealloc)
659
660 if (self->weakreflist != NULL)
661 PyObject_ClearWeakRefs((PyObject *) self);
662
663 /* element_gc_clear clears all references and deallocates extra
664 */
665 element_gc_clear(self);
666
667 Py_TYPE(self)->tp_free((PyObject *)self);
668 Py_TRASHCAN_END
669 }
670
671 /* -------------------------------------------------------------------- */
672
673 /*[clinic input]
674 _elementtree.Element.append
675
676 subelement: object(subclass_of='&Element_Type')
677 /
678
679 [clinic start generated code]*/
680
681 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)682 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
683 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
684 {
685 if (element_add_subelement(self, subelement) < 0)
686 return NULL;
687
688 Py_RETURN_NONE;
689 }
690
691 /*[clinic input]
692 _elementtree.Element.clear
693
694 [clinic start generated code]*/
695
696 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)697 _elementtree_Element_clear_impl(ElementObject *self)
698 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
699 {
700 clear_extra(self);
701
702 Py_INCREF(Py_None);
703 _set_joined_ptr(&self->text, Py_None);
704
705 Py_INCREF(Py_None);
706 _set_joined_ptr(&self->tail, Py_None);
707
708 Py_RETURN_NONE;
709 }
710
711 /*[clinic input]
712 _elementtree.Element.__copy__
713
714 [clinic start generated code]*/
715
716 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)717 _elementtree_Element___copy___impl(ElementObject *self)
718 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
719 {
720 Py_ssize_t i;
721 ElementObject* element;
722
723 element = (ElementObject*) create_new_element(
724 self->tag, self->extra ? self->extra->attrib : NULL);
725 if (!element)
726 return NULL;
727
728 Py_INCREF(JOIN_OBJ(self->text));
729 _set_joined_ptr(&element->text, self->text);
730
731 Py_INCREF(JOIN_OBJ(self->tail));
732 _set_joined_ptr(&element->tail, self->tail);
733
734 assert(!element->extra || !element->extra->length);
735 if (self->extra) {
736 if (element_resize(element, self->extra->length) < 0) {
737 Py_DECREF(element);
738 return NULL;
739 }
740
741 for (i = 0; i < self->extra->length; i++) {
742 Py_INCREF(self->extra->children[i]);
743 element->extra->children[i] = self->extra->children[i];
744 }
745
746 assert(!element->extra->length);
747 element->extra->length = self->extra->length;
748 }
749
750 return (PyObject*) element;
751 }
752
753 /* Helper for a deep copy. */
754 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
755
756 /*[clinic input]
757 _elementtree.Element.__deepcopy__
758
759 memo: object(subclass_of="&PyDict_Type")
760 /
761
762 [clinic start generated code]*/
763
764 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)765 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
766 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
767 {
768 Py_ssize_t i;
769 ElementObject* element;
770 PyObject* tag;
771 PyObject* attrib;
772 PyObject* text;
773 PyObject* tail;
774 PyObject* id;
775
776 tag = deepcopy(self->tag, memo);
777 if (!tag)
778 return NULL;
779
780 if (self->extra && self->extra->attrib) {
781 attrib = deepcopy(self->extra->attrib, memo);
782 if (!attrib) {
783 Py_DECREF(tag);
784 return NULL;
785 }
786 } else {
787 attrib = NULL;
788 }
789
790 element = (ElementObject*) create_new_element(tag, attrib);
791
792 Py_DECREF(tag);
793 Py_XDECREF(attrib);
794
795 if (!element)
796 return NULL;
797
798 text = deepcopy(JOIN_OBJ(self->text), memo);
799 if (!text)
800 goto error;
801 _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
802
803 tail = deepcopy(JOIN_OBJ(self->tail), memo);
804 if (!tail)
805 goto error;
806 _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
807
808 assert(!element->extra || !element->extra->length);
809 if (self->extra) {
810 if (element_resize(element, self->extra->length) < 0)
811 goto error;
812
813 for (i = 0; i < self->extra->length; i++) {
814 PyObject* child = deepcopy(self->extra->children[i], memo);
815 if (!child || !Element_Check(child)) {
816 if (child) {
817 raise_type_error(child);
818 Py_DECREF(child);
819 }
820 element->extra->length = i;
821 goto error;
822 }
823 element->extra->children[i] = child;
824 }
825
826 assert(!element->extra->length);
827 element->extra->length = self->extra->length;
828 }
829
830 /* add object to memo dictionary (so deepcopy won't visit it again) */
831 id = PyLong_FromSsize_t((uintptr_t) self);
832 if (!id)
833 goto error;
834
835 i = PyDict_SetItem(memo, id, (PyObject*) element);
836
837 Py_DECREF(id);
838
839 if (i < 0)
840 goto error;
841
842 return (PyObject*) element;
843
844 error:
845 Py_DECREF(element);
846 return NULL;
847 }
848
849 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)850 deepcopy(PyObject *object, PyObject *memo)
851 {
852 /* do a deep copy of the given object */
853 elementtreestate *st;
854 PyObject *stack[2];
855
856 /* Fast paths */
857 if (object == Py_None || PyUnicode_CheckExact(object)) {
858 Py_INCREF(object);
859 return object;
860 }
861
862 if (Py_REFCNT(object) == 1) {
863 if (PyDict_CheckExact(object)) {
864 PyObject *key, *value;
865 Py_ssize_t pos = 0;
866 int simple = 1;
867 while (PyDict_Next(object, &pos, &key, &value)) {
868 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
869 simple = 0;
870 break;
871 }
872 }
873 if (simple)
874 return PyDict_Copy(object);
875 /* Fall through to general case */
876 }
877 else if (Element_CheckExact(object)) {
878 return _elementtree_Element___deepcopy___impl(
879 (ElementObject *)object, memo);
880 }
881 }
882
883 /* General case */
884 st = ET_STATE_GLOBAL;
885 if (!st->deepcopy_obj) {
886 PyErr_SetString(PyExc_RuntimeError,
887 "deepcopy helper not found");
888 return NULL;
889 }
890
891 stack[0] = object;
892 stack[1] = memo;
893 return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
894 }
895
896
897 /*[clinic input]
898 _elementtree.Element.__sizeof__ -> Py_ssize_t
899
900 [clinic start generated code]*/
901
902 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)903 _elementtree_Element___sizeof___impl(ElementObject *self)
904 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
905 {
906 Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
907 if (self->extra) {
908 result += sizeof(ElementObjectExtra);
909 if (self->extra->children != self->extra->_children)
910 result += sizeof(PyObject*) * self->extra->allocated;
911 }
912 return result;
913 }
914
915 /* dict keys for getstate/setstate. */
916 #define PICKLED_TAG "tag"
917 #define PICKLED_CHILDREN "_children"
918 #define PICKLED_ATTRIB "attrib"
919 #define PICKLED_TAIL "tail"
920 #define PICKLED_TEXT "text"
921
922 /* __getstate__ returns a fabricated instance dict as in the pure-Python
923 * Element implementation, for interoperability/interchangeability. This
924 * makes the pure-Python implementation details an API, but (a) there aren't
925 * any unnecessary structures there; and (b) it buys compatibility with 3.2
926 * pickles. See issue #16076.
927 */
928 /*[clinic input]
929 _elementtree.Element.__getstate__
930
931 [clinic start generated code]*/
932
933 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)934 _elementtree_Element___getstate___impl(ElementObject *self)
935 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
936 {
937 Py_ssize_t i;
938 PyObject *children, *attrib;
939
940 /* Build a list of children. */
941 children = PyList_New(self->extra ? self->extra->length : 0);
942 if (!children)
943 return NULL;
944 for (i = 0; i < PyList_GET_SIZE(children); i++) {
945 PyObject *child = self->extra->children[i];
946 Py_INCREF(child);
947 PyList_SET_ITEM(children, i, child);
948 }
949
950 if (self->extra && self->extra->attrib) {
951 attrib = self->extra->attrib;
952 Py_INCREF(attrib);
953 }
954 else {
955 attrib = PyDict_New();
956 if (!attrib) {
957 Py_DECREF(children);
958 return NULL;
959 }
960 }
961
962 return Py_BuildValue("{sOsNsNsOsO}",
963 PICKLED_TAG, self->tag,
964 PICKLED_CHILDREN, children,
965 PICKLED_ATTRIB, attrib,
966 PICKLED_TEXT, JOIN_OBJ(self->text),
967 PICKLED_TAIL, JOIN_OBJ(self->tail));
968 }
969
970 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)971 element_setstate_from_attributes(ElementObject *self,
972 PyObject *tag,
973 PyObject *attrib,
974 PyObject *text,
975 PyObject *tail,
976 PyObject *children)
977 {
978 Py_ssize_t i, nchildren;
979 ElementObjectExtra *oldextra = NULL;
980
981 if (!tag) {
982 PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
983 return NULL;
984 }
985
986 Py_INCREF(tag);
987 Py_XSETREF(self->tag, tag);
988
989 text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
990 Py_INCREF(JOIN_OBJ(text));
991 _set_joined_ptr(&self->text, text);
992
993 tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
994 Py_INCREF(JOIN_OBJ(tail));
995 _set_joined_ptr(&self->tail, tail);
996
997 /* Handle ATTRIB and CHILDREN. */
998 if (!children && !attrib) {
999 Py_RETURN_NONE;
1000 }
1001
1002 /* Compute 'nchildren'. */
1003 if (children) {
1004 if (!PyList_Check(children)) {
1005 PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1006 return NULL;
1007 }
1008 nchildren = PyList_GET_SIZE(children);
1009
1010 /* (Re-)allocate 'extra'.
1011 Avoid DECREFs calling into this code again (cycles, etc.)
1012 */
1013 oldextra = self->extra;
1014 self->extra = NULL;
1015 if (element_resize(self, nchildren)) {
1016 assert(!self->extra || !self->extra->length);
1017 clear_extra(self);
1018 self->extra = oldextra;
1019 return NULL;
1020 }
1021 assert(self->extra);
1022 assert(self->extra->allocated >= nchildren);
1023 if (oldextra) {
1024 assert(self->extra->attrib == NULL);
1025 self->extra->attrib = oldextra->attrib;
1026 oldextra->attrib = NULL;
1027 }
1028
1029 /* Copy children */
1030 for (i = 0; i < nchildren; i++) {
1031 PyObject *child = PyList_GET_ITEM(children, i);
1032 if (!Element_Check(child)) {
1033 raise_type_error(child);
1034 self->extra->length = i;
1035 dealloc_extra(oldextra);
1036 return NULL;
1037 }
1038 Py_INCREF(child);
1039 self->extra->children[i] = child;
1040 }
1041
1042 assert(!self->extra->length);
1043 self->extra->length = nchildren;
1044 }
1045 else {
1046 if (element_resize(self, 0)) {
1047 return NULL;
1048 }
1049 }
1050
1051 /* Stash attrib. */
1052 Py_XINCREF(attrib);
1053 Py_XSETREF(self->extra->attrib, attrib);
1054 dealloc_extra(oldextra);
1055
1056 Py_RETURN_NONE;
1057 }
1058
1059 /* __setstate__ for Element instance from the Python implementation.
1060 * 'state' should be the instance dict.
1061 */
1062
1063 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1064 element_setstate_from_Python(ElementObject *self, PyObject *state)
1065 {
1066 static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1067 PICKLED_TAIL, PICKLED_CHILDREN, 0};
1068 PyObject *args;
1069 PyObject *tag, *attrib, *text, *tail, *children;
1070 PyObject *retval;
1071
1072 tag = attrib = text = tail = children = NULL;
1073 args = PyTuple_New(0);
1074 if (!args)
1075 return NULL;
1076
1077 if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1078 &attrib, &text, &tail, &children))
1079 retval = element_setstate_from_attributes(self, tag, attrib, text,
1080 tail, children);
1081 else
1082 retval = NULL;
1083
1084 Py_DECREF(args);
1085 return retval;
1086 }
1087
1088 /*[clinic input]
1089 _elementtree.Element.__setstate__
1090
1091 state: object
1092 /
1093
1094 [clinic start generated code]*/
1095
1096 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1097 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1098 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1099 {
1100 if (!PyDict_CheckExact(state)) {
1101 PyErr_Format(PyExc_TypeError,
1102 "Don't know how to unpickle \"%.200R\" as an Element",
1103 state);
1104 return NULL;
1105 }
1106 else
1107 return element_setstate_from_Python(self, state);
1108 }
1109
1110 LOCAL(int)
checkpath(PyObject * tag)1111 checkpath(PyObject* tag)
1112 {
1113 Py_ssize_t i;
1114 int check = 1;
1115
1116 /* check if a tag contains an xpath character */
1117
1118 #define PATHCHAR(ch) \
1119 (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1120
1121 if (PyUnicode_Check(tag)) {
1122 const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1123 const void *data = PyUnicode_DATA(tag);
1124 unsigned int kind = PyUnicode_KIND(tag);
1125 if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1126 PyUnicode_READ(kind, data, 1) == '}' || (
1127 PyUnicode_READ(kind, data, 1) == '*' &&
1128 PyUnicode_READ(kind, data, 2) == '}'))) {
1129 /* wildcard: '{}tag' or '{*}tag' */
1130 return 1;
1131 }
1132 for (i = 0; i < len; i++) {
1133 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1134 if (ch == '{')
1135 check = 0;
1136 else if (ch == '}')
1137 check = 1;
1138 else if (check && PATHCHAR(ch))
1139 return 1;
1140 }
1141 return 0;
1142 }
1143 if (PyBytes_Check(tag)) {
1144 const char *p = PyBytes_AS_STRING(tag);
1145 const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1146 if (len >= 3 && p[0] == '{' && (
1147 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1148 /* wildcard: '{}tag' or '{*}tag' */
1149 return 1;
1150 }
1151 for (i = 0; i < len; i++) {
1152 if (p[i] == '{')
1153 check = 0;
1154 else if (p[i] == '}')
1155 check = 1;
1156 else if (check && PATHCHAR(p[i]))
1157 return 1;
1158 }
1159 return 0;
1160 }
1161
1162 return 1; /* unknown type; might be path expression */
1163 }
1164
1165 /*[clinic input]
1166 _elementtree.Element.extend
1167
1168 elements: object
1169 /
1170
1171 [clinic start generated code]*/
1172
1173 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1174 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1175 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1176 {
1177 PyObject* seq;
1178 Py_ssize_t i;
1179
1180 seq = PySequence_Fast(elements, "");
1181 if (!seq) {
1182 PyErr_Format(
1183 PyExc_TypeError,
1184 "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1185 );
1186 return NULL;
1187 }
1188
1189 for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1190 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1191 Py_INCREF(element);
1192 if (element_add_subelement(self, element) < 0) {
1193 Py_DECREF(seq);
1194 Py_DECREF(element);
1195 return NULL;
1196 }
1197 Py_DECREF(element);
1198 }
1199
1200 Py_DECREF(seq);
1201
1202 Py_RETURN_NONE;
1203 }
1204
1205 /*[clinic input]
1206 _elementtree.Element.find
1207
1208 path: object
1209 namespaces: object = None
1210
1211 [clinic start generated code]*/
1212
1213 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1214 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1215 PyObject *namespaces)
1216 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1217 {
1218 Py_ssize_t i;
1219 elementtreestate *st = ET_STATE_GLOBAL;
1220
1221 if (checkpath(path) || namespaces != Py_None) {
1222 _Py_IDENTIFIER(find);
1223 return _PyObject_CallMethodIdObjArgs(
1224 st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1225 );
1226 }
1227
1228 if (!self->extra)
1229 Py_RETURN_NONE;
1230
1231 for (i = 0; i < self->extra->length; i++) {
1232 PyObject* item = self->extra->children[i];
1233 int rc;
1234 assert(Element_Check(item));
1235 Py_INCREF(item);
1236 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1237 if (rc > 0)
1238 return item;
1239 Py_DECREF(item);
1240 if (rc < 0)
1241 return NULL;
1242 }
1243
1244 Py_RETURN_NONE;
1245 }
1246
1247 /*[clinic input]
1248 _elementtree.Element.findtext
1249
1250 path: object
1251 default: object = None
1252 namespaces: object = None
1253
1254 [clinic start generated code]*/
1255
1256 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1257 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1258 PyObject *default_value,
1259 PyObject *namespaces)
1260 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1261 {
1262 Py_ssize_t i;
1263 _Py_IDENTIFIER(findtext);
1264 elementtreestate *st = ET_STATE_GLOBAL;
1265
1266 if (checkpath(path) || namespaces != Py_None)
1267 return _PyObject_CallMethodIdObjArgs(
1268 st->elementpath_obj, &PyId_findtext,
1269 self, path, default_value, namespaces, NULL
1270 );
1271
1272 if (!self->extra) {
1273 Py_INCREF(default_value);
1274 return default_value;
1275 }
1276
1277 for (i = 0; i < self->extra->length; i++) {
1278 PyObject *item = self->extra->children[i];
1279 int rc;
1280 assert(Element_Check(item));
1281 Py_INCREF(item);
1282 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1283 if (rc > 0) {
1284 PyObject* text = element_get_text((ElementObject*)item);
1285 if (text == Py_None) {
1286 Py_DECREF(item);
1287 return PyUnicode_New(0, 0);
1288 }
1289 Py_XINCREF(text);
1290 Py_DECREF(item);
1291 return text;
1292 }
1293 Py_DECREF(item);
1294 if (rc < 0)
1295 return NULL;
1296 }
1297
1298 Py_INCREF(default_value);
1299 return default_value;
1300 }
1301
1302 /*[clinic input]
1303 _elementtree.Element.findall
1304
1305 path: object
1306 namespaces: object = None
1307
1308 [clinic start generated code]*/
1309
1310 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1311 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1312 PyObject *namespaces)
1313 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1314 {
1315 Py_ssize_t i;
1316 PyObject* out;
1317 elementtreestate *st = ET_STATE_GLOBAL;
1318
1319 if (checkpath(path) || namespaces != Py_None) {
1320 _Py_IDENTIFIER(findall);
1321 return _PyObject_CallMethodIdObjArgs(
1322 st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1323 );
1324 }
1325
1326 out = PyList_New(0);
1327 if (!out)
1328 return NULL;
1329
1330 if (!self->extra)
1331 return out;
1332
1333 for (i = 0; i < self->extra->length; i++) {
1334 PyObject* item = self->extra->children[i];
1335 int rc;
1336 assert(Element_Check(item));
1337 Py_INCREF(item);
1338 rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1339 if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1340 Py_DECREF(item);
1341 Py_DECREF(out);
1342 return NULL;
1343 }
1344 Py_DECREF(item);
1345 }
1346
1347 return out;
1348 }
1349
1350 /*[clinic input]
1351 _elementtree.Element.iterfind
1352
1353 path: object
1354 namespaces: object = None
1355
1356 [clinic start generated code]*/
1357
1358 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1359 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1360 PyObject *namespaces)
1361 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1362 {
1363 PyObject* tag = path;
1364 _Py_IDENTIFIER(iterfind);
1365 elementtreestate *st = ET_STATE_GLOBAL;
1366
1367 return _PyObject_CallMethodIdObjArgs(
1368 st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1369 }
1370
1371 /*[clinic input]
1372 _elementtree.Element.get
1373
1374 key: object
1375 default: object = None
1376
1377 [clinic start generated code]*/
1378
1379 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1380 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1381 PyObject *default_value)
1382 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1383 {
1384 if (self->extra && self->extra->attrib) {
1385 PyObject *attrib = self->extra->attrib;
1386 Py_INCREF(attrib);
1387 PyObject *value = PyDict_GetItemWithError(attrib, key);
1388 Py_XINCREF(value);
1389 Py_DECREF(attrib);
1390 if (value != NULL || PyErr_Occurred()) {
1391 return value;
1392 }
1393 }
1394
1395 Py_INCREF(default_value);
1396 return default_value;
1397 }
1398
1399 static PyObject *
1400 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1401
1402
1403 /*[clinic input]
1404 _elementtree.Element.iter
1405
1406 tag: object = None
1407
1408 [clinic start generated code]*/
1409
1410 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1411 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1412 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1413 {
1414 if (PyUnicode_Check(tag)) {
1415 if (PyUnicode_READY(tag) < 0)
1416 return NULL;
1417 if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1418 tag = Py_None;
1419 }
1420 else if (PyBytes_Check(tag)) {
1421 if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1422 tag = Py_None;
1423 }
1424
1425 return create_elementiter(self, tag, 0);
1426 }
1427
1428
1429 /*[clinic input]
1430 _elementtree.Element.itertext
1431
1432 [clinic start generated code]*/
1433
1434 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1435 _elementtree_Element_itertext_impl(ElementObject *self)
1436 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1437 {
1438 return create_elementiter(self, Py_None, 1);
1439 }
1440
1441
1442 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1443 element_getitem(PyObject* self_, Py_ssize_t index)
1444 {
1445 ElementObject* self = (ElementObject*) self_;
1446
1447 if (!self->extra || index < 0 || index >= self->extra->length) {
1448 PyErr_SetString(
1449 PyExc_IndexError,
1450 "child index out of range"
1451 );
1452 return NULL;
1453 }
1454
1455 Py_INCREF(self->extra->children[index]);
1456 return self->extra->children[index];
1457 }
1458
1459 /*[clinic input]
1460 _elementtree.Element.insert
1461
1462 index: Py_ssize_t
1463 subelement: object(subclass_of='&Element_Type')
1464 /
1465
1466 [clinic start generated code]*/
1467
1468 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1469 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1470 PyObject *subelement)
1471 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1472 {
1473 Py_ssize_t i;
1474
1475 if (!self->extra) {
1476 if (create_extra(self, NULL) < 0)
1477 return NULL;
1478 }
1479
1480 if (index < 0) {
1481 index += self->extra->length;
1482 if (index < 0)
1483 index = 0;
1484 }
1485 if (index > self->extra->length)
1486 index = self->extra->length;
1487
1488 if (element_resize(self, 1) < 0)
1489 return NULL;
1490
1491 for (i = self->extra->length; i > index; i--)
1492 self->extra->children[i] = self->extra->children[i-1];
1493
1494 Py_INCREF(subelement);
1495 self->extra->children[index] = subelement;
1496
1497 self->extra->length++;
1498
1499 Py_RETURN_NONE;
1500 }
1501
1502 /*[clinic input]
1503 _elementtree.Element.items
1504
1505 [clinic start generated code]*/
1506
1507 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1508 _elementtree_Element_items_impl(ElementObject *self)
1509 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1510 {
1511 if (!self->extra || !self->extra->attrib)
1512 return PyList_New(0);
1513
1514 return PyDict_Items(self->extra->attrib);
1515 }
1516
1517 /*[clinic input]
1518 _elementtree.Element.keys
1519
1520 [clinic start generated code]*/
1521
1522 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1523 _elementtree_Element_keys_impl(ElementObject *self)
1524 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1525 {
1526 if (!self->extra || !self->extra->attrib)
1527 return PyList_New(0);
1528
1529 return PyDict_Keys(self->extra->attrib);
1530 }
1531
1532 static Py_ssize_t
element_length(ElementObject * self)1533 element_length(ElementObject* self)
1534 {
1535 if (!self->extra)
1536 return 0;
1537
1538 return self->extra->length;
1539 }
1540
1541 /*[clinic input]
1542 _elementtree.Element.makeelement
1543
1544 tag: object
1545 attrib: object(subclass_of='&PyDict_Type')
1546 /
1547
1548 [clinic start generated code]*/
1549
1550 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1551 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1552 PyObject *attrib)
1553 /*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1554 {
1555 PyObject* elem;
1556
1557 attrib = PyDict_Copy(attrib);
1558 if (!attrib)
1559 return NULL;
1560
1561 elem = create_new_element(tag, attrib);
1562
1563 Py_DECREF(attrib);
1564
1565 return elem;
1566 }
1567
1568 /*[clinic input]
1569 _elementtree.Element.remove
1570
1571 subelement: object(subclass_of='&Element_Type')
1572 /
1573
1574 [clinic start generated code]*/
1575
1576 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1577 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1578 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1579 {
1580 Py_ssize_t i;
1581 int rc;
1582 PyObject *found;
1583
1584 if (!self->extra) {
1585 /* element has no children, so raise exception */
1586 PyErr_SetString(
1587 PyExc_ValueError,
1588 "list.remove(x): x not in list"
1589 );
1590 return NULL;
1591 }
1592
1593 for (i = 0; i < self->extra->length; i++) {
1594 if (self->extra->children[i] == subelement)
1595 break;
1596 rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1597 if (rc > 0)
1598 break;
1599 if (rc < 0)
1600 return NULL;
1601 }
1602
1603 if (i >= self->extra->length) {
1604 /* subelement is not in children, so raise exception */
1605 PyErr_SetString(
1606 PyExc_ValueError,
1607 "list.remove(x): x not in list"
1608 );
1609 return NULL;
1610 }
1611
1612 found = self->extra->children[i];
1613
1614 self->extra->length--;
1615 for (; i < self->extra->length; i++)
1616 self->extra->children[i] = self->extra->children[i+1];
1617
1618 Py_DECREF(found);
1619 Py_RETURN_NONE;
1620 }
1621
1622 static PyObject*
element_repr(ElementObject * self)1623 element_repr(ElementObject* self)
1624 {
1625 int status;
1626
1627 if (self->tag == NULL)
1628 return PyUnicode_FromFormat("<Element at %p>", self);
1629
1630 status = Py_ReprEnter((PyObject *)self);
1631 if (status == 0) {
1632 PyObject *res;
1633 res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1634 Py_ReprLeave((PyObject *)self);
1635 return res;
1636 }
1637 if (status > 0)
1638 PyErr_Format(PyExc_RuntimeError,
1639 "reentrant call inside %s.__repr__",
1640 Py_TYPE(self)->tp_name);
1641 return NULL;
1642 }
1643
1644 /*[clinic input]
1645 _elementtree.Element.set
1646
1647 key: object
1648 value: object
1649 /
1650
1651 [clinic start generated code]*/
1652
1653 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1654 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1655 PyObject *value)
1656 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1657 {
1658 PyObject* attrib;
1659
1660 if (!self->extra) {
1661 if (create_extra(self, NULL) < 0)
1662 return NULL;
1663 }
1664
1665 attrib = element_get_attrib(self);
1666 if (!attrib)
1667 return NULL;
1668
1669 if (PyDict_SetItem(attrib, key, value) < 0)
1670 return NULL;
1671
1672 Py_RETURN_NONE;
1673 }
1674
1675 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1676 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1677 {
1678 ElementObject* self = (ElementObject*) self_;
1679 Py_ssize_t i;
1680 PyObject* old;
1681
1682 if (!self->extra || index < 0 || index >= self->extra->length) {
1683 PyErr_SetString(
1684 PyExc_IndexError,
1685 "child assignment index out of range");
1686 return -1;
1687 }
1688
1689 old = self->extra->children[index];
1690
1691 if (item) {
1692 if (!Element_Check(item)) {
1693 raise_type_error(item);
1694 return -1;
1695 }
1696 Py_INCREF(item);
1697 self->extra->children[index] = item;
1698 } else {
1699 self->extra->length--;
1700 for (i = index; i < self->extra->length; i++)
1701 self->extra->children[i] = self->extra->children[i+1];
1702 }
1703
1704 Py_DECREF(old);
1705
1706 return 0;
1707 }
1708
1709 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1710 element_subscr(PyObject* self_, PyObject* item)
1711 {
1712 ElementObject* self = (ElementObject*) self_;
1713
1714 if (PyIndex_Check(item)) {
1715 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1716
1717 if (i == -1 && PyErr_Occurred()) {
1718 return NULL;
1719 }
1720 if (i < 0 && self->extra)
1721 i += self->extra->length;
1722 return element_getitem(self_, i);
1723 }
1724 else if (PySlice_Check(item)) {
1725 Py_ssize_t start, stop, step, slicelen, i;
1726 size_t cur;
1727 PyObject* list;
1728
1729 if (!self->extra)
1730 return PyList_New(0);
1731
1732 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1733 return NULL;
1734 }
1735 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1736 step);
1737
1738 if (slicelen <= 0)
1739 return PyList_New(0);
1740 else {
1741 list = PyList_New(slicelen);
1742 if (!list)
1743 return NULL;
1744
1745 for (cur = start, i = 0; i < slicelen;
1746 cur += step, i++) {
1747 PyObject* item = self->extra->children[cur];
1748 Py_INCREF(item);
1749 PyList_SET_ITEM(list, i, item);
1750 }
1751
1752 return list;
1753 }
1754 }
1755 else {
1756 PyErr_SetString(PyExc_TypeError,
1757 "element indices must be integers");
1758 return NULL;
1759 }
1760 }
1761
1762 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1763 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1764 {
1765 ElementObject* self = (ElementObject*) self_;
1766
1767 if (PyIndex_Check(item)) {
1768 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1769
1770 if (i == -1 && PyErr_Occurred()) {
1771 return -1;
1772 }
1773 if (i < 0 && self->extra)
1774 i += self->extra->length;
1775 return element_setitem(self_, i, value);
1776 }
1777 else if (PySlice_Check(item)) {
1778 Py_ssize_t start, stop, step, slicelen, newlen, i;
1779 size_t cur;
1780
1781 PyObject* recycle = NULL;
1782 PyObject* seq;
1783
1784 if (!self->extra) {
1785 if (create_extra(self, NULL) < 0)
1786 return -1;
1787 }
1788
1789 if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1790 return -1;
1791 }
1792 slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1793 step);
1794
1795 if (value == NULL) {
1796 /* Delete slice */
1797 size_t cur;
1798 Py_ssize_t i;
1799
1800 if (slicelen <= 0)
1801 return 0;
1802
1803 /* Since we're deleting, the direction of the range doesn't matter,
1804 * so for simplicity make it always ascending.
1805 */
1806 if (step < 0) {
1807 stop = start + 1;
1808 start = stop + step * (slicelen - 1) - 1;
1809 step = -step;
1810 }
1811
1812 assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1813
1814 /* recycle is a list that will contain all the children
1815 * scheduled for removal.
1816 */
1817 if (!(recycle = PyList_New(slicelen))) {
1818 return -1;
1819 }
1820
1821 /* This loop walks over all the children that have to be deleted,
1822 * with cur pointing at them. num_moved is the amount of children
1823 * until the next deleted child that have to be "shifted down" to
1824 * occupy the deleted's places.
1825 * Note that in the ith iteration, shifting is done i+i places down
1826 * because i children were already removed.
1827 */
1828 for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1829 /* Compute how many children have to be moved, clipping at the
1830 * list end.
1831 */
1832 Py_ssize_t num_moved = step - 1;
1833 if (cur + step >= (size_t)self->extra->length) {
1834 num_moved = self->extra->length - cur - 1;
1835 }
1836
1837 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1838
1839 memmove(
1840 self->extra->children + cur - i,
1841 self->extra->children + cur + 1,
1842 num_moved * sizeof(PyObject *));
1843 }
1844
1845 /* Leftover "tail" after the last removed child */
1846 cur = start + (size_t)slicelen * step;
1847 if (cur < (size_t)self->extra->length) {
1848 memmove(
1849 self->extra->children + cur - slicelen,
1850 self->extra->children + cur,
1851 (self->extra->length - cur) * sizeof(PyObject *));
1852 }
1853
1854 self->extra->length -= slicelen;
1855
1856 /* Discard the recycle list with all the deleted sub-elements */
1857 Py_DECREF(recycle);
1858 return 0;
1859 }
1860
1861 /* A new slice is actually being assigned */
1862 seq = PySequence_Fast(value, "");
1863 if (!seq) {
1864 PyErr_Format(
1865 PyExc_TypeError,
1866 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1867 );
1868 return -1;
1869 }
1870 newlen = PySequence_Fast_GET_SIZE(seq);
1871
1872 if (step != 1 && newlen != slicelen)
1873 {
1874 Py_DECREF(seq);
1875 PyErr_Format(PyExc_ValueError,
1876 "attempt to assign sequence of size %zd "
1877 "to extended slice of size %zd",
1878 newlen, slicelen
1879 );
1880 return -1;
1881 }
1882
1883 /* Resize before creating the recycle bin, to prevent refleaks. */
1884 if (newlen > slicelen) {
1885 if (element_resize(self, newlen - slicelen) < 0) {
1886 Py_DECREF(seq);
1887 return -1;
1888 }
1889 }
1890
1891 for (i = 0; i < newlen; i++) {
1892 PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1893 if (!Element_Check(element)) {
1894 raise_type_error(element);
1895 Py_DECREF(seq);
1896 return -1;
1897 }
1898 }
1899
1900 if (slicelen > 0) {
1901 /* to avoid recursive calls to this method (via decref), move
1902 old items to the recycle bin here, and get rid of them when
1903 we're done modifying the element */
1904 recycle = PyList_New(slicelen);
1905 if (!recycle) {
1906 Py_DECREF(seq);
1907 return -1;
1908 }
1909 for (cur = start, i = 0; i < slicelen;
1910 cur += step, i++)
1911 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1912 }
1913
1914 if (newlen < slicelen) {
1915 /* delete slice */
1916 for (i = stop; i < self->extra->length; i++)
1917 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1918 } else if (newlen > slicelen) {
1919 /* insert slice */
1920 for (i = self->extra->length-1; i >= stop; i--)
1921 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1922 }
1923
1924 /* replace the slice */
1925 for (cur = start, i = 0; i < newlen;
1926 cur += step, i++) {
1927 PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1928 Py_INCREF(element);
1929 self->extra->children[cur] = element;
1930 }
1931
1932 self->extra->length += newlen - slicelen;
1933
1934 Py_DECREF(seq);
1935
1936 /* discard the recycle bin, and everything in it */
1937 Py_XDECREF(recycle);
1938
1939 return 0;
1940 }
1941 else {
1942 PyErr_SetString(PyExc_TypeError,
1943 "element indices must be integers");
1944 return -1;
1945 }
1946 }
1947
1948 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1949 element_tag_getter(ElementObject *self, void *closure)
1950 {
1951 PyObject *res = self->tag;
1952 Py_INCREF(res);
1953 return res;
1954 }
1955
1956 static PyObject*
element_text_getter(ElementObject * self,void * closure)1957 element_text_getter(ElementObject *self, void *closure)
1958 {
1959 PyObject *res = element_get_text(self);
1960 Py_XINCREF(res);
1961 return res;
1962 }
1963
1964 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1965 element_tail_getter(ElementObject *self, void *closure)
1966 {
1967 PyObject *res = element_get_tail(self);
1968 Py_XINCREF(res);
1969 return res;
1970 }
1971
1972 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)1973 element_attrib_getter(ElementObject *self, void *closure)
1974 {
1975 PyObject *res;
1976 if (!self->extra) {
1977 if (create_extra(self, NULL) < 0)
1978 return NULL;
1979 }
1980 res = element_get_attrib(self);
1981 Py_XINCREF(res);
1982 return res;
1983 }
1984
1985 /* macro for setter validation */
1986 #define _VALIDATE_ATTR_VALUE(V) \
1987 if ((V) == NULL) { \
1988 PyErr_SetString( \
1989 PyExc_AttributeError, \
1990 "can't delete element attribute"); \
1991 return -1; \
1992 }
1993
1994 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)1995 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1996 {
1997 _VALIDATE_ATTR_VALUE(value);
1998 Py_INCREF(value);
1999 Py_SETREF(self->tag, value);
2000 return 0;
2001 }
2002
2003 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2004 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2005 {
2006 _VALIDATE_ATTR_VALUE(value);
2007 Py_INCREF(value);
2008 _set_joined_ptr(&self->text, value);
2009 return 0;
2010 }
2011
2012 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2013 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2014 {
2015 _VALIDATE_ATTR_VALUE(value);
2016 Py_INCREF(value);
2017 _set_joined_ptr(&self->tail, value);
2018 return 0;
2019 }
2020
2021 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2022 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2023 {
2024 _VALIDATE_ATTR_VALUE(value);
2025 if (!PyDict_Check(value)) {
2026 PyErr_Format(PyExc_TypeError,
2027 "attrib must be dict, not %.200s",
2028 Py_TYPE(value)->tp_name);
2029 return -1;
2030 }
2031 if (!self->extra) {
2032 if (create_extra(self, NULL) < 0)
2033 return -1;
2034 }
2035 Py_INCREF(value);
2036 Py_XSETREF(self->extra->attrib, value);
2037 return 0;
2038 }
2039
2040 static PySequenceMethods element_as_sequence = {
2041 (lenfunc) element_length,
2042 0, /* sq_concat */
2043 0, /* sq_repeat */
2044 element_getitem,
2045 0,
2046 element_setitem,
2047 0,
2048 };
2049
2050 /******************************* Element iterator ****************************/
2051
2052 /* ElementIterObject represents the iteration state over an XML element in
2053 * pre-order traversal. To keep track of which sub-element should be returned
2054 * next, a stack of parents is maintained. This is a standard stack-based
2055 * iterative pre-order traversal of a tree.
2056 * The stack is managed using a continuous array.
2057 * Each stack item contains the saved parent to which we should return after
2058 * the current one is exhausted, and the next child to examine in that parent.
2059 */
2060 typedef struct ParentLocator_t {
2061 ElementObject *parent;
2062 Py_ssize_t child_index;
2063 } ParentLocator;
2064
2065 typedef struct {
2066 PyObject_HEAD
2067 ParentLocator *parent_stack;
2068 Py_ssize_t parent_stack_used;
2069 Py_ssize_t parent_stack_size;
2070 ElementObject *root_element;
2071 PyObject *sought_tag;
2072 int gettext;
2073 } ElementIterObject;
2074
2075
2076 static void
elementiter_dealloc(ElementIterObject * it)2077 elementiter_dealloc(ElementIterObject *it)
2078 {
2079 Py_ssize_t i = it->parent_stack_used;
2080 it->parent_stack_used = 0;
2081 /* bpo-31095: UnTrack is needed before calling any callbacks */
2082 PyObject_GC_UnTrack(it);
2083 while (i--)
2084 Py_XDECREF(it->parent_stack[i].parent);
2085 PyMem_Free(it->parent_stack);
2086
2087 Py_XDECREF(it->sought_tag);
2088 Py_XDECREF(it->root_element);
2089
2090 PyObject_GC_Del(it);
2091 }
2092
2093 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2094 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2095 {
2096 Py_ssize_t i = it->parent_stack_used;
2097 while (i--)
2098 Py_VISIT(it->parent_stack[i].parent);
2099
2100 Py_VISIT(it->root_element);
2101 Py_VISIT(it->sought_tag);
2102 return 0;
2103 }
2104
2105 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2106 */
2107 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2108 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2109 {
2110 ParentLocator *item;
2111
2112 if (it->parent_stack_used >= it->parent_stack_size) {
2113 Py_ssize_t new_size = it->parent_stack_size * 2; /* never overflow */
2114 ParentLocator *parent_stack = it->parent_stack;
2115 PyMem_Resize(parent_stack, ParentLocator, new_size);
2116 if (parent_stack == NULL)
2117 return -1;
2118 it->parent_stack = parent_stack;
2119 it->parent_stack_size = new_size;
2120 }
2121 item = it->parent_stack + it->parent_stack_used++;
2122 Py_INCREF(parent);
2123 item->parent = parent;
2124 item->child_index = 0;
2125 return 0;
2126 }
2127
2128 static PyObject *
elementiter_next(ElementIterObject * it)2129 elementiter_next(ElementIterObject *it)
2130 {
2131 /* Sub-element iterator.
2132 *
2133 * A short note on gettext: this function serves both the iter() and
2134 * itertext() methods to avoid code duplication. However, there are a few
2135 * small differences in the way these iterations work. Namely:
2136 * - itertext() only yields text from nodes that have it, and continues
2137 * iterating when a node doesn't have text (so it doesn't return any
2138 * node like iter())
2139 * - itertext() also has to handle tail, after finishing with all the
2140 * children of a node.
2141 */
2142 int rc;
2143 ElementObject *elem;
2144 PyObject *text;
2145
2146 while (1) {
2147 /* Handle the case reached in the beginning and end of iteration, where
2148 * the parent stack is empty. If root_element is NULL and we're here, the
2149 * iterator is exhausted.
2150 */
2151 if (!it->parent_stack_used) {
2152 if (!it->root_element) {
2153 PyErr_SetNone(PyExc_StopIteration);
2154 return NULL;
2155 }
2156
2157 elem = it->root_element; /* steals a reference */
2158 it->root_element = NULL;
2159 }
2160 else {
2161 /* See if there are children left to traverse in the current parent. If
2162 * yes, visit the next child. If not, pop the stack and try again.
2163 */
2164 ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2165 Py_ssize_t child_index = item->child_index;
2166 ElementObjectExtra *extra;
2167 elem = item->parent;
2168 extra = elem->extra;
2169 if (!extra || child_index >= extra->length) {
2170 it->parent_stack_used--;
2171 /* Note that extra condition on it->parent_stack_used here;
2172 * this is because itertext() is supposed to only return *inner*
2173 * text, not text following the element it began iteration with.
2174 */
2175 if (it->gettext && it->parent_stack_used) {
2176 text = element_get_tail(elem);
2177 goto gettext;
2178 }
2179 Py_DECREF(elem);
2180 continue;
2181 }
2182
2183 assert(Element_Check(extra->children[child_index]));
2184 elem = (ElementObject *)extra->children[child_index];
2185 item->child_index++;
2186 Py_INCREF(elem);
2187 }
2188
2189 if (parent_stack_push_new(it, elem) < 0) {
2190 Py_DECREF(elem);
2191 PyErr_NoMemory();
2192 return NULL;
2193 }
2194 if (it->gettext) {
2195 text = element_get_text(elem);
2196 goto gettext;
2197 }
2198
2199 if (it->sought_tag == Py_None)
2200 return (PyObject *)elem;
2201
2202 rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2203 if (rc > 0)
2204 return (PyObject *)elem;
2205
2206 Py_DECREF(elem);
2207 if (rc < 0)
2208 return NULL;
2209 continue;
2210
2211 gettext:
2212 if (!text) {
2213 Py_DECREF(elem);
2214 return NULL;
2215 }
2216 if (text == Py_None) {
2217 Py_DECREF(elem);
2218 }
2219 else {
2220 Py_INCREF(text);
2221 Py_DECREF(elem);
2222 rc = PyObject_IsTrue(text);
2223 if (rc > 0)
2224 return text;
2225 Py_DECREF(text);
2226 if (rc < 0)
2227 return NULL;
2228 }
2229 }
2230
2231 return NULL;
2232 }
2233
2234
2235 static PyTypeObject ElementIter_Type = {
2236 PyVarObject_HEAD_INIT(NULL, 0)
2237 /* Using the module's name since the pure-Python implementation does not
2238 have such a type. */
2239 "_elementtree._element_iterator", /* tp_name */
2240 sizeof(ElementIterObject), /* tp_basicsize */
2241 0, /* tp_itemsize */
2242 /* methods */
2243 (destructor)elementiter_dealloc, /* tp_dealloc */
2244 0, /* tp_vectorcall_offset */
2245 0, /* tp_getattr */
2246 0, /* tp_setattr */
2247 0, /* tp_as_async */
2248 0, /* tp_repr */
2249 0, /* tp_as_number */
2250 0, /* tp_as_sequence */
2251 0, /* tp_as_mapping */
2252 0, /* tp_hash */
2253 0, /* tp_call */
2254 0, /* tp_str */
2255 0, /* tp_getattro */
2256 0, /* tp_setattro */
2257 0, /* tp_as_buffer */
2258 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2259 0, /* tp_doc */
2260 (traverseproc)elementiter_traverse, /* tp_traverse */
2261 0, /* tp_clear */
2262 0, /* tp_richcompare */
2263 0, /* tp_weaklistoffset */
2264 PyObject_SelfIter, /* tp_iter */
2265 (iternextfunc)elementiter_next, /* tp_iternext */
2266 0, /* tp_methods */
2267 0, /* tp_members */
2268 0, /* tp_getset */
2269 0, /* tp_base */
2270 0, /* tp_dict */
2271 0, /* tp_descr_get */
2272 0, /* tp_descr_set */
2273 0, /* tp_dictoffset */
2274 0, /* tp_init */
2275 0, /* tp_alloc */
2276 0, /* tp_new */
2277 };
2278
2279 #define INIT_PARENT_STACK_SIZE 8
2280
2281 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2282 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2283 {
2284 ElementIterObject *it;
2285
2286 it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2287 if (!it)
2288 return NULL;
2289
2290 Py_INCREF(tag);
2291 it->sought_tag = tag;
2292 it->gettext = gettext;
2293 Py_INCREF(self);
2294 it->root_element = self;
2295
2296 it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2297 if (it->parent_stack == NULL) {
2298 Py_DECREF(it);
2299 PyErr_NoMemory();
2300 return NULL;
2301 }
2302 it->parent_stack_used = 0;
2303 it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2304
2305 PyObject_GC_Track(it);
2306
2307 return (PyObject *)it;
2308 }
2309
2310
2311 /* ==================================================================== */
2312 /* the tree builder type */
2313
2314 typedef struct {
2315 PyObject_HEAD
2316
2317 PyObject *root; /* root node (first created node) */
2318
2319 PyObject *this; /* current node */
2320 PyObject *last; /* most recently created node */
2321 PyObject *last_for_tail; /* most recently created node that takes a tail */
2322
2323 PyObject *data; /* data collector (string or list), or NULL */
2324
2325 PyObject *stack; /* element stack */
2326 Py_ssize_t index; /* current stack size (0 means empty) */
2327
2328 PyObject *element_factory;
2329 PyObject *comment_factory;
2330 PyObject *pi_factory;
2331
2332 /* element tracing */
2333 PyObject *events_append; /* the append method of the list of events, or NULL */
2334 PyObject *start_event_obj; /* event objects (NULL to ignore) */
2335 PyObject *end_event_obj;
2336 PyObject *start_ns_event_obj;
2337 PyObject *end_ns_event_obj;
2338 PyObject *comment_event_obj;
2339 PyObject *pi_event_obj;
2340
2341 char insert_comments;
2342 char insert_pis;
2343 } TreeBuilderObject;
2344
2345 #define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2346
2347 /* -------------------------------------------------------------------- */
2348 /* constructor and destructor */
2349
2350 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2351 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2352 {
2353 TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2354 if (t != NULL) {
2355 t->root = NULL;
2356
2357 Py_INCREF(Py_None);
2358 t->this = Py_None;
2359 Py_INCREF(Py_None);
2360 t->last = Py_None;
2361
2362 t->data = NULL;
2363 t->element_factory = NULL;
2364 t->comment_factory = NULL;
2365 t->pi_factory = NULL;
2366 t->stack = PyList_New(20);
2367 if (!t->stack) {
2368 Py_DECREF(t->this);
2369 Py_DECREF(t->last);
2370 Py_DECREF((PyObject *) t);
2371 return NULL;
2372 }
2373 t->index = 0;
2374
2375 t->events_append = NULL;
2376 t->start_event_obj = t->end_event_obj = NULL;
2377 t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2378 t->comment_event_obj = t->pi_event_obj = NULL;
2379 t->insert_comments = t->insert_pis = 0;
2380 }
2381 return (PyObject *)t;
2382 }
2383
2384 /*[clinic input]
2385 _elementtree.TreeBuilder.__init__
2386
2387 element_factory: object = None
2388 *
2389 comment_factory: object = None
2390 pi_factory: object = None
2391 insert_comments: bool = False
2392 insert_pis: bool = False
2393
2394 [clinic start generated code]*/
2395
2396 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2397 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2398 PyObject *element_factory,
2399 PyObject *comment_factory,
2400 PyObject *pi_factory,
2401 int insert_comments, int insert_pis)
2402 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2403 {
2404 if (element_factory != Py_None) {
2405 Py_INCREF(element_factory);
2406 Py_XSETREF(self->element_factory, element_factory);
2407 } else {
2408 Py_CLEAR(self->element_factory);
2409 }
2410
2411 if (comment_factory == Py_None) {
2412 elementtreestate *st = ET_STATE_GLOBAL;
2413 comment_factory = st->comment_factory;
2414 }
2415 if (comment_factory) {
2416 Py_INCREF(comment_factory);
2417 Py_XSETREF(self->comment_factory, comment_factory);
2418 self->insert_comments = insert_comments;
2419 } else {
2420 Py_CLEAR(self->comment_factory);
2421 self->insert_comments = 0;
2422 }
2423
2424 if (pi_factory == Py_None) {
2425 elementtreestate *st = ET_STATE_GLOBAL;
2426 pi_factory = st->pi_factory;
2427 }
2428 if (pi_factory) {
2429 Py_INCREF(pi_factory);
2430 Py_XSETREF(self->pi_factory, pi_factory);
2431 self->insert_pis = insert_pis;
2432 } else {
2433 Py_CLEAR(self->pi_factory);
2434 self->insert_pis = 0;
2435 }
2436
2437 return 0;
2438 }
2439
2440 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2441 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2442 {
2443 Py_VISIT(self->pi_event_obj);
2444 Py_VISIT(self->comment_event_obj);
2445 Py_VISIT(self->end_ns_event_obj);
2446 Py_VISIT(self->start_ns_event_obj);
2447 Py_VISIT(self->end_event_obj);
2448 Py_VISIT(self->start_event_obj);
2449 Py_VISIT(self->events_append);
2450 Py_VISIT(self->root);
2451 Py_VISIT(self->this);
2452 Py_VISIT(self->last);
2453 Py_VISIT(self->last_for_tail);
2454 Py_VISIT(self->data);
2455 Py_VISIT(self->stack);
2456 Py_VISIT(self->pi_factory);
2457 Py_VISIT(self->comment_factory);
2458 Py_VISIT(self->element_factory);
2459 return 0;
2460 }
2461
2462 static int
treebuilder_gc_clear(TreeBuilderObject * self)2463 treebuilder_gc_clear(TreeBuilderObject *self)
2464 {
2465 Py_CLEAR(self->pi_event_obj);
2466 Py_CLEAR(self->comment_event_obj);
2467 Py_CLEAR(self->end_ns_event_obj);
2468 Py_CLEAR(self->start_ns_event_obj);
2469 Py_CLEAR(self->end_event_obj);
2470 Py_CLEAR(self->start_event_obj);
2471 Py_CLEAR(self->events_append);
2472 Py_CLEAR(self->stack);
2473 Py_CLEAR(self->data);
2474 Py_CLEAR(self->last);
2475 Py_CLEAR(self->last_for_tail);
2476 Py_CLEAR(self->this);
2477 Py_CLEAR(self->pi_factory);
2478 Py_CLEAR(self->comment_factory);
2479 Py_CLEAR(self->element_factory);
2480 Py_CLEAR(self->root);
2481 return 0;
2482 }
2483
2484 static void
treebuilder_dealloc(TreeBuilderObject * self)2485 treebuilder_dealloc(TreeBuilderObject *self)
2486 {
2487 PyObject_GC_UnTrack(self);
2488 treebuilder_gc_clear(self);
2489 Py_TYPE(self)->tp_free((PyObject *)self);
2490 }
2491
2492 /* -------------------------------------------------------------------- */
2493 /* helpers for handling of arbitrary element-like objects */
2494
2495 /*[clinic input]
2496 _elementtree._set_factories
2497
2498 comment_factory: object
2499 pi_factory: object
2500 /
2501
2502 Change the factories used to create comments and processing instructions.
2503
2504 For internal use only.
2505 [clinic start generated code]*/
2506
2507 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2508 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2509 PyObject *pi_factory)
2510 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2511 {
2512 elementtreestate *st = ET_STATE_GLOBAL;
2513 PyObject *old;
2514
2515 if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2516 PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2517 Py_TYPE(comment_factory)->tp_name);
2518 return NULL;
2519 }
2520 if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2521 PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2522 Py_TYPE(pi_factory)->tp_name);
2523 return NULL;
2524 }
2525
2526 old = PyTuple_Pack(2,
2527 st->comment_factory ? st->comment_factory : Py_None,
2528 st->pi_factory ? st->pi_factory : Py_None);
2529
2530 if (comment_factory == Py_None) {
2531 Py_CLEAR(st->comment_factory);
2532 } else {
2533 Py_INCREF(comment_factory);
2534 Py_XSETREF(st->comment_factory, comment_factory);
2535 }
2536 if (pi_factory == Py_None) {
2537 Py_CLEAR(st->pi_factory);
2538 } else {
2539 Py_INCREF(pi_factory);
2540 Py_XSETREF(st->pi_factory, pi_factory);
2541 }
2542
2543 return old;
2544 }
2545
2546 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2547 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2548 PyObject **dest, _Py_Identifier *name)
2549 {
2550 /* Fast paths for the "almost always" cases. */
2551 if (Element_CheckExact(element)) {
2552 PyObject *dest_obj = JOIN_OBJ(*dest);
2553 if (dest_obj == Py_None) {
2554 *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2555 *data = NULL;
2556 Py_DECREF(dest_obj);
2557 return 0;
2558 }
2559 else if (JOIN_GET(*dest)) {
2560 if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2561 return -1;
2562 }
2563 Py_CLEAR(*data);
2564 return 0;
2565 }
2566 }
2567
2568 /* Fallback for the non-Element / non-trivial cases. */
2569 {
2570 int r;
2571 PyObject* joined;
2572 PyObject* previous = _PyObject_GetAttrId(element, name);
2573 if (!previous)
2574 return -1;
2575 joined = list_join(*data);
2576 if (!joined) {
2577 Py_DECREF(previous);
2578 return -1;
2579 }
2580 if (previous != Py_None) {
2581 PyObject *tmp = PyNumber_Add(previous, joined);
2582 Py_DECREF(joined);
2583 Py_DECREF(previous);
2584 if (!tmp)
2585 return -1;
2586 joined = tmp;
2587 } else {
2588 Py_DECREF(previous);
2589 }
2590
2591 r = _PyObject_SetAttrId(element, name, joined);
2592 Py_DECREF(joined);
2593 if (r < 0)
2594 return -1;
2595 Py_CLEAR(*data);
2596 return 0;
2597 }
2598 }
2599
2600 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2601 treebuilder_flush_data(TreeBuilderObject* self)
2602 {
2603 if (!self->data) {
2604 return 0;
2605 }
2606
2607 if (!self->last_for_tail) {
2608 PyObject *element = self->last;
2609 _Py_IDENTIFIER(text);
2610 return treebuilder_extend_element_text_or_tail(
2611 element, &self->data,
2612 &((ElementObject *) element)->text, &PyId_text);
2613 }
2614 else {
2615 PyObject *element = self->last_for_tail;
2616 _Py_IDENTIFIER(tail);
2617 return treebuilder_extend_element_text_or_tail(
2618 element, &self->data,
2619 &((ElementObject *) element)->tail, &PyId_tail);
2620 }
2621 }
2622
2623 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2624 treebuilder_add_subelement(PyObject *element, PyObject *child)
2625 {
2626 _Py_IDENTIFIER(append);
2627 if (Element_CheckExact(element)) {
2628 ElementObject *elem = (ElementObject *) element;
2629 return element_add_subelement(elem, child);
2630 }
2631 else {
2632 PyObject *res;
2633 res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2634 if (res == NULL)
2635 return -1;
2636 Py_DECREF(res);
2637 return 0;
2638 }
2639 }
2640
2641 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2642 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2643 PyObject *node)
2644 {
2645 if (action != NULL) {
2646 PyObject *res;
2647 PyObject *event = PyTuple_Pack(2, action, node);
2648 if (event == NULL)
2649 return -1;
2650 res = PyObject_CallOneArg(self->events_append, event);
2651 Py_DECREF(event);
2652 if (res == NULL)
2653 return -1;
2654 Py_DECREF(res);
2655 }
2656 return 0;
2657 }
2658
2659 /* -------------------------------------------------------------------- */
2660 /* handlers */
2661
2662 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2663 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2664 PyObject* attrib)
2665 {
2666 PyObject* node;
2667 PyObject* this;
2668 elementtreestate *st = ET_STATE_GLOBAL;
2669
2670 if (treebuilder_flush_data(self) < 0) {
2671 return NULL;
2672 }
2673
2674 if (!self->element_factory) {
2675 node = create_new_element(tag, attrib);
2676 } else if (attrib == NULL) {
2677 attrib = PyDict_New();
2678 if (!attrib)
2679 return NULL;
2680 node = PyObject_CallFunctionObjArgs(self->element_factory,
2681 tag, attrib, NULL);
2682 Py_DECREF(attrib);
2683 }
2684 else {
2685 node = PyObject_CallFunctionObjArgs(self->element_factory,
2686 tag, attrib, NULL);
2687 }
2688 if (!node) {
2689 return NULL;
2690 }
2691
2692 this = self->this;
2693 Py_CLEAR(self->last_for_tail);
2694
2695 if (this != Py_None) {
2696 if (treebuilder_add_subelement(this, node) < 0)
2697 goto error;
2698 } else {
2699 if (self->root) {
2700 PyErr_SetString(
2701 st->parseerror_obj,
2702 "multiple elements on top level"
2703 );
2704 goto error;
2705 }
2706 Py_INCREF(node);
2707 self->root = node;
2708 }
2709
2710 if (self->index < PyList_GET_SIZE(self->stack)) {
2711 if (PyList_SetItem(self->stack, self->index, this) < 0)
2712 goto error;
2713 Py_INCREF(this);
2714 } else {
2715 if (PyList_Append(self->stack, this) < 0)
2716 goto error;
2717 }
2718 self->index++;
2719
2720 Py_INCREF(node);
2721 Py_SETREF(self->this, node);
2722 Py_INCREF(node);
2723 Py_SETREF(self->last, node);
2724
2725 if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2726 goto error;
2727
2728 return node;
2729
2730 error:
2731 Py_DECREF(node);
2732 return NULL;
2733 }
2734
2735 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2736 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2737 {
2738 if (!self->data) {
2739 if (self->last == Py_None) {
2740 /* ignore calls to data before the first call to start */
2741 Py_RETURN_NONE;
2742 }
2743 /* store the first item as is */
2744 Py_INCREF(data); self->data = data;
2745 } else {
2746 /* more than one item; use a list to collect items */
2747 if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2748 PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2749 /* XXX this code path unused in Python 3? */
2750 /* expat often generates single character data sections; handle
2751 the most common case by resizing the existing string... */
2752 Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2753 if (_PyBytes_Resize(&self->data, size + 1) < 0)
2754 return NULL;
2755 PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2756 } else if (PyList_CheckExact(self->data)) {
2757 if (PyList_Append(self->data, data) < 0)
2758 return NULL;
2759 } else {
2760 PyObject* list = PyList_New(2);
2761 if (!list)
2762 return NULL;
2763 PyList_SET_ITEM(list, 0, self->data);
2764 Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2765 self->data = list;
2766 }
2767 }
2768
2769 Py_RETURN_NONE;
2770 }
2771
2772 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2773 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2774 {
2775 PyObject* item;
2776
2777 if (treebuilder_flush_data(self) < 0) {
2778 return NULL;
2779 }
2780
2781 if (self->index == 0) {
2782 PyErr_SetString(
2783 PyExc_IndexError,
2784 "pop from empty stack"
2785 );
2786 return NULL;
2787 }
2788
2789 item = self->last;
2790 self->last = self->this;
2791 Py_INCREF(self->last);
2792 Py_XSETREF(self->last_for_tail, self->last);
2793 self->index--;
2794 self->this = PyList_GET_ITEM(self->stack, self->index);
2795 Py_INCREF(self->this);
2796 Py_DECREF(item);
2797
2798 if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2799 return NULL;
2800
2801 Py_INCREF(self->last);
2802 return (PyObject*) self->last;
2803 }
2804
2805 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2806 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2807 {
2808 PyObject* comment;
2809 PyObject* this;
2810
2811 if (treebuilder_flush_data(self) < 0) {
2812 return NULL;
2813 }
2814
2815 if (self->comment_factory) {
2816 comment = PyObject_CallOneArg(self->comment_factory, text);
2817 if (!comment)
2818 return NULL;
2819
2820 this = self->this;
2821 if (self->insert_comments && this != Py_None) {
2822 if (treebuilder_add_subelement(this, comment) < 0)
2823 goto error;
2824 Py_INCREF(comment);
2825 Py_XSETREF(self->last_for_tail, comment);
2826 }
2827 } else {
2828 Py_INCREF(text);
2829 comment = text;
2830 }
2831
2832 if (self->events_append && self->comment_event_obj) {
2833 if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2834 goto error;
2835 }
2836
2837 return comment;
2838
2839 error:
2840 Py_DECREF(comment);
2841 return NULL;
2842 }
2843
2844 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2845 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2846 {
2847 PyObject* pi;
2848 PyObject* this;
2849 PyObject* stack[2] = {target, text};
2850
2851 if (treebuilder_flush_data(self) < 0) {
2852 return NULL;
2853 }
2854
2855 if (self->pi_factory) {
2856 pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2857 if (!pi) {
2858 return NULL;
2859 }
2860
2861 this = self->this;
2862 if (self->insert_pis && this != Py_None) {
2863 if (treebuilder_add_subelement(this, pi) < 0)
2864 goto error;
2865 Py_INCREF(pi);
2866 Py_XSETREF(self->last_for_tail, pi);
2867 }
2868 } else {
2869 pi = PyTuple_Pack(2, target, text);
2870 if (!pi) {
2871 return NULL;
2872 }
2873 }
2874
2875 if (self->events_append && self->pi_event_obj) {
2876 if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2877 goto error;
2878 }
2879
2880 return pi;
2881
2882 error:
2883 Py_DECREF(pi);
2884 return NULL;
2885 }
2886
2887 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2888 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2889 {
2890 PyObject* parcel;
2891
2892 if (self->events_append && self->start_ns_event_obj) {
2893 parcel = PyTuple_Pack(2, prefix, uri);
2894 if (!parcel) {
2895 return NULL;
2896 }
2897
2898 if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2899 Py_DECREF(parcel);
2900 return NULL;
2901 }
2902 Py_DECREF(parcel);
2903 }
2904
2905 Py_RETURN_NONE;
2906 }
2907
2908 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2909 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2910 {
2911 if (self->events_append && self->end_ns_event_obj) {
2912 if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2913 return NULL;
2914 }
2915 }
2916
2917 Py_RETURN_NONE;
2918 }
2919
2920 /* -------------------------------------------------------------------- */
2921 /* methods (in alphabetical order) */
2922
2923 /*[clinic input]
2924 _elementtree.TreeBuilder.data
2925
2926 data: object
2927 /
2928
2929 [clinic start generated code]*/
2930
2931 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2932 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2933 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2934 {
2935 return treebuilder_handle_data(self, data);
2936 }
2937
2938 /*[clinic input]
2939 _elementtree.TreeBuilder.end
2940
2941 tag: object
2942 /
2943
2944 [clinic start generated code]*/
2945
2946 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2947 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2948 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2949 {
2950 return treebuilder_handle_end(self, tag);
2951 }
2952
2953 /*[clinic input]
2954 _elementtree.TreeBuilder.comment
2955
2956 text: object
2957 /
2958
2959 [clinic start generated code]*/
2960
2961 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)2962 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2963 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2964 {
2965 return treebuilder_handle_comment(self, text);
2966 }
2967
2968 /*[clinic input]
2969 _elementtree.TreeBuilder.pi
2970
2971 target: object
2972 text: object = None
2973 /
2974
2975 [clinic start generated code]*/
2976
2977 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)2978 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2979 PyObject *text)
2980 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2981 {
2982 return treebuilder_handle_pi(self, target, text);
2983 }
2984
2985 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)2986 treebuilder_done(TreeBuilderObject* self)
2987 {
2988 PyObject* res;
2989
2990 /* FIXME: check stack size? */
2991
2992 if (self->root)
2993 res = self->root;
2994 else
2995 res = Py_None;
2996
2997 Py_INCREF(res);
2998 return res;
2999 }
3000
3001 /*[clinic input]
3002 _elementtree.TreeBuilder.close
3003
3004 [clinic start generated code]*/
3005
3006 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3007 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3008 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3009 {
3010 return treebuilder_done(self);
3011 }
3012
3013 /*[clinic input]
3014 _elementtree.TreeBuilder.start
3015
3016 tag: object
3017 attrs: object(subclass_of='&PyDict_Type')
3018 /
3019
3020 [clinic start generated code]*/
3021
3022 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3023 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3024 PyObject *attrs)
3025 /*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3026 {
3027 return treebuilder_handle_start(self, tag, attrs);
3028 }
3029
3030 /* ==================================================================== */
3031 /* the expat interface */
3032
3033 #include "expat.h"
3034 #include "pyexpat.h"
3035
3036 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3037 * cached globally without being in per-module state.
3038 */
3039 static struct PyExpat_CAPI *expat_capi;
3040 #define EXPAT(func) (expat_capi->func)
3041
3042 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3043 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3044
3045 typedef struct {
3046 PyObject_HEAD
3047
3048 XML_Parser parser;
3049
3050 PyObject *target;
3051 PyObject *entity;
3052
3053 PyObject *names;
3054
3055 PyObject *handle_start_ns;
3056 PyObject *handle_end_ns;
3057 PyObject *handle_start;
3058 PyObject *handle_data;
3059 PyObject *handle_end;
3060
3061 PyObject *handle_comment;
3062 PyObject *handle_pi;
3063 PyObject *handle_doctype;
3064
3065 PyObject *handle_close;
3066
3067 } XMLParserObject;
3068
3069 /* helpers */
3070
3071 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3072 makeuniversal(XMLParserObject* self, const char* string)
3073 {
3074 /* convert a UTF-8 tag/attribute name from the expat parser
3075 to a universal name string */
3076
3077 Py_ssize_t size = (Py_ssize_t) strlen(string);
3078 PyObject* key;
3079 PyObject* value;
3080
3081 /* look the 'raw' name up in the names dictionary */
3082 key = PyBytes_FromStringAndSize(string, size);
3083 if (!key)
3084 return NULL;
3085
3086 value = PyDict_GetItemWithError(self->names, key);
3087
3088 if (value) {
3089 Py_INCREF(value);
3090 }
3091 else if (!PyErr_Occurred()) {
3092 /* new name. convert to universal name, and decode as
3093 necessary */
3094
3095 PyObject* tag;
3096 char* p;
3097 Py_ssize_t i;
3098
3099 /* look for namespace separator */
3100 for (i = 0; i < size; i++)
3101 if (string[i] == '}')
3102 break;
3103 if (i != size) {
3104 /* convert to universal name */
3105 tag = PyBytes_FromStringAndSize(NULL, size+1);
3106 if (tag == NULL) {
3107 Py_DECREF(key);
3108 return NULL;
3109 }
3110 p = PyBytes_AS_STRING(tag);
3111 p[0] = '{';
3112 memcpy(p+1, string, size);
3113 size++;
3114 } else {
3115 /* plain name; use key as tag */
3116 Py_INCREF(key);
3117 tag = key;
3118 }
3119
3120 /* decode universal name */
3121 p = PyBytes_AS_STRING(tag);
3122 value = PyUnicode_DecodeUTF8(p, size, "strict");
3123 Py_DECREF(tag);
3124 if (!value) {
3125 Py_DECREF(key);
3126 return NULL;
3127 }
3128
3129 /* add to names dictionary */
3130 if (PyDict_SetItem(self->names, key, value) < 0) {
3131 Py_DECREF(key);
3132 Py_DECREF(value);
3133 return NULL;
3134 }
3135 }
3136
3137 Py_DECREF(key);
3138 return value;
3139 }
3140
3141 /* Set the ParseError exception with the given parameters.
3142 * If message is not NULL, it's used as the error string. Otherwise, the
3143 * message string is the default for the given error_code.
3144 */
3145 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3146 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3147 const char *message)
3148 {
3149 PyObject *errmsg, *error, *position, *code;
3150 elementtreestate *st = ET_STATE_GLOBAL;
3151
3152 errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3153 message ? message : EXPAT(ErrorString)(error_code),
3154 line, column);
3155 if (errmsg == NULL)
3156 return;
3157
3158 error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3159 Py_DECREF(errmsg);
3160 if (!error)
3161 return;
3162
3163 /* Add code and position attributes */
3164 code = PyLong_FromLong((long)error_code);
3165 if (!code) {
3166 Py_DECREF(error);
3167 return;
3168 }
3169 if (PyObject_SetAttrString(error, "code", code) == -1) {
3170 Py_DECREF(error);
3171 Py_DECREF(code);
3172 return;
3173 }
3174 Py_DECREF(code);
3175
3176 position = Py_BuildValue("(nn)", line, column);
3177 if (!position) {
3178 Py_DECREF(error);
3179 return;
3180 }
3181 if (PyObject_SetAttrString(error, "position", position) == -1) {
3182 Py_DECREF(error);
3183 Py_DECREF(position);
3184 return;
3185 }
3186 Py_DECREF(position);
3187
3188 PyErr_SetObject(st->parseerror_obj, error);
3189 Py_DECREF(error);
3190 }
3191
3192 /* -------------------------------------------------------------------- */
3193 /* handlers */
3194
3195 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3196 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3197 int data_len)
3198 {
3199 PyObject* key;
3200 PyObject* value;
3201 PyObject* res;
3202
3203 if (data_len < 2 || data_in[0] != '&')
3204 return;
3205
3206 if (PyErr_Occurred())
3207 return;
3208
3209 key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3210 if (!key)
3211 return;
3212
3213 value = PyDict_GetItemWithError(self->entity, key);
3214
3215 if (value) {
3216 if (TreeBuilder_CheckExact(self->target))
3217 res = treebuilder_handle_data(
3218 (TreeBuilderObject*) self->target, value
3219 );
3220 else if (self->handle_data)
3221 res = PyObject_CallOneArg(self->handle_data, value);
3222 else
3223 res = NULL;
3224 Py_XDECREF(res);
3225 } else if (!PyErr_Occurred()) {
3226 /* Report the first error, not the last */
3227 char message[128] = "undefined entity ";
3228 strncat(message, data_in, data_len < 100?data_len:100);
3229 expat_set_error(
3230 XML_ERROR_UNDEFINED_ENTITY,
3231 EXPAT(GetErrorLineNumber)(self->parser),
3232 EXPAT(GetErrorColumnNumber)(self->parser),
3233 message
3234 );
3235 }
3236
3237 Py_DECREF(key);
3238 }
3239
3240 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3241 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3242 const XML_Char **attrib_in)
3243 {
3244 PyObject* res;
3245 PyObject* tag;
3246 PyObject* attrib;
3247 int ok;
3248
3249 if (PyErr_Occurred())
3250 return;
3251
3252 /* tag name */
3253 tag = makeuniversal(self, tag_in);
3254 if (!tag)
3255 return; /* parser will look for errors */
3256
3257 /* attributes */
3258 if (attrib_in[0]) {
3259 attrib = PyDict_New();
3260 if (!attrib) {
3261 Py_DECREF(tag);
3262 return;
3263 }
3264 while (attrib_in[0] && attrib_in[1]) {
3265 PyObject* key = makeuniversal(self, attrib_in[0]);
3266 PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3267 if (!key || !value) {
3268 Py_XDECREF(value);
3269 Py_XDECREF(key);
3270 Py_DECREF(attrib);
3271 Py_DECREF(tag);
3272 return;
3273 }
3274 ok = PyDict_SetItem(attrib, key, value);
3275 Py_DECREF(value);
3276 Py_DECREF(key);
3277 if (ok < 0) {
3278 Py_DECREF(attrib);
3279 Py_DECREF(tag);
3280 return;
3281 }
3282 attrib_in += 2;
3283 }
3284 } else {
3285 attrib = NULL;
3286 }
3287
3288 if (TreeBuilder_CheckExact(self->target)) {
3289 /* shortcut */
3290 res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3291 tag, attrib);
3292 }
3293 else if (self->handle_start) {
3294 if (attrib == NULL) {
3295 attrib = PyDict_New();
3296 if (!attrib) {
3297 Py_DECREF(tag);
3298 return;
3299 }
3300 }
3301 res = PyObject_CallFunctionObjArgs(self->handle_start,
3302 tag, attrib, NULL);
3303 } else
3304 res = NULL;
3305
3306 Py_DECREF(tag);
3307 Py_XDECREF(attrib);
3308
3309 Py_XDECREF(res);
3310 }
3311
3312 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3313 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3314 int data_len)
3315 {
3316 PyObject* data;
3317 PyObject* res;
3318
3319 if (PyErr_Occurred())
3320 return;
3321
3322 data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3323 if (!data)
3324 return; /* parser will look for errors */
3325
3326 if (TreeBuilder_CheckExact(self->target))
3327 /* shortcut */
3328 res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3329 else if (self->handle_data)
3330 res = PyObject_CallOneArg(self->handle_data, data);
3331 else
3332 res = NULL;
3333
3334 Py_DECREF(data);
3335
3336 Py_XDECREF(res);
3337 }
3338
3339 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3340 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3341 {
3342 PyObject* tag;
3343 PyObject* res = NULL;
3344
3345 if (PyErr_Occurred())
3346 return;
3347
3348 if (TreeBuilder_CheckExact(self->target))
3349 /* shortcut */
3350 /* the standard tree builder doesn't look at the end tag */
3351 res = treebuilder_handle_end(
3352 (TreeBuilderObject*) self->target, Py_None
3353 );
3354 else if (self->handle_end) {
3355 tag = makeuniversal(self, tag_in);
3356 if (tag) {
3357 res = PyObject_CallOneArg(self->handle_end, tag);
3358 Py_DECREF(tag);
3359 }
3360 }
3361
3362 Py_XDECREF(res);
3363 }
3364
3365 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3366 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3367 const XML_Char *uri_in)
3368 {
3369 PyObject* res = NULL;
3370 PyObject* uri;
3371 PyObject* prefix;
3372 PyObject* stack[2];
3373
3374 if (PyErr_Occurred())
3375 return;
3376
3377 if (!uri_in)
3378 uri_in = "";
3379 if (!prefix_in)
3380 prefix_in = "";
3381
3382 if (TreeBuilder_CheckExact(self->target)) {
3383 /* shortcut - TreeBuilder does not actually implement .start_ns() */
3384 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3385
3386 if (target->events_append && target->start_ns_event_obj) {
3387 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3388 if (!prefix)
3389 return;
3390 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3391 if (!uri) {
3392 Py_DECREF(prefix);
3393 return;
3394 }
3395
3396 res = treebuilder_handle_start_ns(target, prefix, uri);
3397 Py_DECREF(uri);
3398 Py_DECREF(prefix);
3399 }
3400 } else if (self->handle_start_ns) {
3401 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3402 if (!prefix)
3403 return;
3404 uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3405 if (!uri) {
3406 Py_DECREF(prefix);
3407 return;
3408 }
3409
3410 stack[0] = prefix;
3411 stack[1] = uri;
3412 res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3413 Py_DECREF(uri);
3414 Py_DECREF(prefix);
3415 }
3416
3417 Py_XDECREF(res);
3418 }
3419
3420 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3421 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3422 {
3423 PyObject *res = NULL;
3424 PyObject* prefix;
3425
3426 if (PyErr_Occurred())
3427 return;
3428
3429 if (!prefix_in)
3430 prefix_in = "";
3431
3432 if (TreeBuilder_CheckExact(self->target)) {
3433 /* shortcut - TreeBuilder does not actually implement .end_ns() */
3434 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3435
3436 if (target->events_append && target->end_ns_event_obj) {
3437 res = treebuilder_handle_end_ns(target, Py_None);
3438 }
3439 } else if (self->handle_end_ns) {
3440 prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3441 if (!prefix)
3442 return;
3443
3444 res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3445 Py_DECREF(prefix);
3446 }
3447
3448 Py_XDECREF(res);
3449 }
3450
3451 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3452 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3453 {
3454 PyObject* comment;
3455 PyObject* res;
3456
3457 if (PyErr_Occurred())
3458 return;
3459
3460 if (TreeBuilder_CheckExact(self->target)) {
3461 /* shortcut */
3462 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3463
3464 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3465 if (!comment)
3466 return; /* parser will look for errors */
3467
3468 res = treebuilder_handle_comment(target, comment);
3469 Py_XDECREF(res);
3470 Py_DECREF(comment);
3471 } else if (self->handle_comment) {
3472 comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3473 if (!comment)
3474 return;
3475
3476 res = PyObject_CallOneArg(self->handle_comment, comment);
3477 Py_XDECREF(res);
3478 Py_DECREF(comment);
3479 }
3480 }
3481
3482 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3483 expat_start_doctype_handler(XMLParserObject *self,
3484 const XML_Char *doctype_name,
3485 const XML_Char *sysid,
3486 const XML_Char *pubid,
3487 int has_internal_subset)
3488 {
3489 _Py_IDENTIFIER(doctype);
3490 PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3491 PyObject *res;
3492
3493 if (PyErr_Occurred())
3494 return;
3495
3496 doctype_name_obj = makeuniversal(self, doctype_name);
3497 if (!doctype_name_obj)
3498 return;
3499
3500 if (sysid) {
3501 sysid_obj = makeuniversal(self, sysid);
3502 if (!sysid_obj) {
3503 Py_DECREF(doctype_name_obj);
3504 return;
3505 }
3506 } else {
3507 Py_INCREF(Py_None);
3508 sysid_obj = Py_None;
3509 }
3510
3511 if (pubid) {
3512 pubid_obj = makeuniversal(self, pubid);
3513 if (!pubid_obj) {
3514 Py_DECREF(doctype_name_obj);
3515 Py_DECREF(sysid_obj);
3516 return;
3517 }
3518 } else {
3519 Py_INCREF(Py_None);
3520 pubid_obj = Py_None;
3521 }
3522
3523 /* If the target has a handler for doctype, call it. */
3524 if (self->handle_doctype) {
3525 res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3526 doctype_name_obj, pubid_obj,
3527 sysid_obj, NULL);
3528 Py_XDECREF(res);
3529 }
3530 else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3531 (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3532 "The doctype() method of XMLParser is ignored. "
3533 "Define doctype() method on the TreeBuilder target.",
3534 1);
3535 Py_DECREF(res);
3536 }
3537
3538 Py_DECREF(doctype_name_obj);
3539 Py_DECREF(pubid_obj);
3540 Py_DECREF(sysid_obj);
3541 }
3542
3543 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3544 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3545 const XML_Char* data_in)
3546 {
3547 PyObject* pi_target;
3548 PyObject* data;
3549 PyObject* res;
3550 PyObject* stack[2];
3551
3552 if (PyErr_Occurred())
3553 return;
3554
3555 if (TreeBuilder_CheckExact(self->target)) {
3556 /* shortcut */
3557 TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3558
3559 if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3560 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3561 if (!pi_target)
3562 goto error;
3563 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3564 if (!data)
3565 goto error;
3566 res = treebuilder_handle_pi(target, pi_target, data);
3567 Py_XDECREF(res);
3568 Py_DECREF(data);
3569 Py_DECREF(pi_target);
3570 }
3571 } else if (self->handle_pi) {
3572 pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3573 if (!pi_target)
3574 goto error;
3575 data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3576 if (!data)
3577 goto error;
3578
3579 stack[0] = pi_target;
3580 stack[1] = data;
3581 res = _PyObject_FastCall(self->handle_pi, stack, 2);
3582 Py_XDECREF(res);
3583 Py_DECREF(data);
3584 Py_DECREF(pi_target);
3585 }
3586
3587 return;
3588
3589 error:
3590 Py_XDECREF(pi_target);
3591 return;
3592 }
3593
3594 /* -------------------------------------------------------------------- */
3595
3596 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3597 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3598 {
3599 XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3600 if (self) {
3601 self->parser = NULL;
3602 self->target = self->entity = self->names = NULL;
3603 self->handle_start_ns = self->handle_end_ns = NULL;
3604 self->handle_start = self->handle_data = self->handle_end = NULL;
3605 self->handle_comment = self->handle_pi = self->handle_close = NULL;
3606 self->handle_doctype = NULL;
3607 }
3608 return (PyObject *)self;
3609 }
3610
3611 static int
ignore_attribute_error(PyObject * value)3612 ignore_attribute_error(PyObject *value)
3613 {
3614 if (value == NULL) {
3615 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3616 return -1;
3617 }
3618 PyErr_Clear();
3619 }
3620 return 0;
3621 }
3622
3623 /*[clinic input]
3624 _elementtree.XMLParser.__init__
3625
3626 *
3627 target: object = None
3628 encoding: str(accept={str, NoneType}) = None
3629
3630 [clinic start generated code]*/
3631
3632 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3633 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3634 const char *encoding)
3635 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/
3636 {
3637 self->entity = PyDict_New();
3638 if (!self->entity)
3639 return -1;
3640
3641 self->names = PyDict_New();
3642 if (!self->names) {
3643 Py_CLEAR(self->entity);
3644 return -1;
3645 }
3646
3647 self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3648 if (!self->parser) {
3649 Py_CLEAR(self->entity);
3650 Py_CLEAR(self->names);
3651 PyErr_NoMemory();
3652 return -1;
3653 }
3654 /* expat < 2.1.0 has no XML_SetHashSalt() */
3655 if (EXPAT(SetHashSalt) != NULL) {
3656 EXPAT(SetHashSalt)(self->parser,
3657 (unsigned long)_Py_HashSecret.expat.hashsalt);
3658 }
3659
3660 if (target != Py_None) {
3661 Py_INCREF(target);
3662 } else {
3663 target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3664 if (!target) {
3665 Py_CLEAR(self->entity);
3666 Py_CLEAR(self->names);
3667 return -1;
3668 }
3669 }
3670 self->target = target;
3671
3672 self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3673 if (ignore_attribute_error(self->handle_start_ns)) {
3674 return -1;
3675 }
3676 self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3677 if (ignore_attribute_error(self->handle_end_ns)) {
3678 return -1;
3679 }
3680 self->handle_start = PyObject_GetAttrString(target, "start");
3681 if (ignore_attribute_error(self->handle_start)) {
3682 return -1;
3683 }
3684 self->handle_data = PyObject_GetAttrString(target, "data");
3685 if (ignore_attribute_error(self->handle_data)) {
3686 return -1;
3687 }
3688 self->handle_end = PyObject_GetAttrString(target, "end");
3689 if (ignore_attribute_error(self->handle_end)) {
3690 return -1;
3691 }
3692 self->handle_comment = PyObject_GetAttrString(target, "comment");
3693 if (ignore_attribute_error(self->handle_comment)) {
3694 return -1;
3695 }
3696 self->handle_pi = PyObject_GetAttrString(target, "pi");
3697 if (ignore_attribute_error(self->handle_pi)) {
3698 return -1;
3699 }
3700 self->handle_close = PyObject_GetAttrString(target, "close");
3701 if (ignore_attribute_error(self->handle_close)) {
3702 return -1;
3703 }
3704 self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3705 if (ignore_attribute_error(self->handle_doctype)) {
3706 return -1;
3707 }
3708
3709 /* configure parser */
3710 EXPAT(SetUserData)(self->parser, self);
3711 if (self->handle_start_ns || self->handle_end_ns)
3712 EXPAT(SetNamespaceDeclHandler)(
3713 self->parser,
3714 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3715 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3716 );
3717 EXPAT(SetElementHandler)(
3718 self->parser,
3719 (XML_StartElementHandler) expat_start_handler,
3720 (XML_EndElementHandler) expat_end_handler
3721 );
3722 EXPAT(SetDefaultHandlerExpand)(
3723 self->parser,
3724 (XML_DefaultHandler) expat_default_handler
3725 );
3726 EXPAT(SetCharacterDataHandler)(
3727 self->parser,
3728 (XML_CharacterDataHandler) expat_data_handler
3729 );
3730 if (self->handle_comment)
3731 EXPAT(SetCommentHandler)(
3732 self->parser,
3733 (XML_CommentHandler) expat_comment_handler
3734 );
3735 if (self->handle_pi)
3736 EXPAT(SetProcessingInstructionHandler)(
3737 self->parser,
3738 (XML_ProcessingInstructionHandler) expat_pi_handler
3739 );
3740 EXPAT(SetStartDoctypeDeclHandler)(
3741 self->parser,
3742 (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3743 );
3744 EXPAT(SetUnknownEncodingHandler)(
3745 self->parser,
3746 EXPAT(DefaultUnknownEncodingHandler), NULL
3747 );
3748
3749 return 0;
3750 }
3751
3752 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3753 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3754 {
3755 Py_VISIT(self->handle_close);
3756 Py_VISIT(self->handle_pi);
3757 Py_VISIT(self->handle_comment);
3758 Py_VISIT(self->handle_end);
3759 Py_VISIT(self->handle_data);
3760 Py_VISIT(self->handle_start);
3761 Py_VISIT(self->handle_start_ns);
3762 Py_VISIT(self->handle_end_ns);
3763 Py_VISIT(self->handle_doctype);
3764
3765 Py_VISIT(self->target);
3766 Py_VISIT(self->entity);
3767 Py_VISIT(self->names);
3768
3769 return 0;
3770 }
3771
3772 static int
xmlparser_gc_clear(XMLParserObject * self)3773 xmlparser_gc_clear(XMLParserObject *self)
3774 {
3775 if (self->parser != NULL) {
3776 XML_Parser parser = self->parser;
3777 self->parser = NULL;
3778 EXPAT(ParserFree)(parser);
3779 }
3780
3781 Py_CLEAR(self->handle_close);
3782 Py_CLEAR(self->handle_pi);
3783 Py_CLEAR(self->handle_comment);
3784 Py_CLEAR(self->handle_end);
3785 Py_CLEAR(self->handle_data);
3786 Py_CLEAR(self->handle_start);
3787 Py_CLEAR(self->handle_start_ns);
3788 Py_CLEAR(self->handle_end_ns);
3789 Py_CLEAR(self->handle_doctype);
3790
3791 Py_CLEAR(self->target);
3792 Py_CLEAR(self->entity);
3793 Py_CLEAR(self->names);
3794
3795 return 0;
3796 }
3797
3798 static void
xmlparser_dealloc(XMLParserObject * self)3799 xmlparser_dealloc(XMLParserObject* self)
3800 {
3801 PyObject_GC_UnTrack(self);
3802 xmlparser_gc_clear(self);
3803 Py_TYPE(self)->tp_free((PyObject *)self);
3804 }
3805
3806 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3807 _check_xmlparser(XMLParserObject* self)
3808 {
3809 if (self->target == NULL) {
3810 PyErr_SetString(PyExc_ValueError,
3811 "XMLParser.__init__() wasn't called");
3812 return 0;
3813 }
3814 return 1;
3815 }
3816
3817 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3818 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3819 {
3820 int ok;
3821
3822 assert(!PyErr_Occurred());
3823 ok = EXPAT(Parse)(self->parser, data, data_len, final);
3824
3825 if (PyErr_Occurred())
3826 return NULL;
3827
3828 if (!ok) {
3829 expat_set_error(
3830 EXPAT(GetErrorCode)(self->parser),
3831 EXPAT(GetErrorLineNumber)(self->parser),
3832 EXPAT(GetErrorColumnNumber)(self->parser),
3833 NULL
3834 );
3835 return NULL;
3836 }
3837
3838 Py_RETURN_NONE;
3839 }
3840
3841 /*[clinic input]
3842 _elementtree.XMLParser.close
3843
3844 [clinic start generated code]*/
3845
3846 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3847 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3848 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3849 {
3850 /* end feeding data to parser */
3851
3852 PyObject* res;
3853
3854 if (!_check_xmlparser(self)) {
3855 return NULL;
3856 }
3857 res = expat_parse(self, "", 0, 1);
3858 if (!res)
3859 return NULL;
3860
3861 if (TreeBuilder_CheckExact(self->target)) {
3862 Py_DECREF(res);
3863 return treebuilder_done((TreeBuilderObject*) self->target);
3864 }
3865 else if (self->handle_close) {
3866 Py_DECREF(res);
3867 return PyObject_CallNoArgs(self->handle_close);
3868 }
3869 else {
3870 return res;
3871 }
3872 }
3873
3874 /*[clinic input]
3875 _elementtree.XMLParser.feed
3876
3877 data: object
3878 /
3879
3880 [clinic start generated code]*/
3881
3882 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3883 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3884 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3885 {
3886 /* feed data to parser */
3887
3888 if (!_check_xmlparser(self)) {
3889 return NULL;
3890 }
3891 if (PyUnicode_Check(data)) {
3892 Py_ssize_t data_len;
3893 const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3894 if (data_ptr == NULL)
3895 return NULL;
3896 if (data_len > INT_MAX) {
3897 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3898 return NULL;
3899 }
3900 /* Explicitly set UTF-8 encoding. Return code ignored. */
3901 (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3902 return expat_parse(self, data_ptr, (int)data_len, 0);
3903 }
3904 else {
3905 Py_buffer view;
3906 PyObject *res;
3907 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3908 return NULL;
3909 if (view.len > INT_MAX) {
3910 PyBuffer_Release(&view);
3911 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3912 return NULL;
3913 }
3914 res = expat_parse(self, view.buf, (int)view.len, 0);
3915 PyBuffer_Release(&view);
3916 return res;
3917 }
3918 }
3919
3920 /*[clinic input]
3921 _elementtree.XMLParser._parse_whole
3922
3923 file: object
3924 /
3925
3926 [clinic start generated code]*/
3927
3928 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3929 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3930 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3931 {
3932 /* (internal) parse the whole input, until end of stream */
3933 PyObject* reader;
3934 PyObject* buffer;
3935 PyObject* temp;
3936 PyObject* res;
3937
3938 if (!_check_xmlparser(self)) {
3939 return NULL;
3940 }
3941 reader = PyObject_GetAttrString(file, "read");
3942 if (!reader)
3943 return NULL;
3944
3945 /* read from open file object */
3946 for (;;) {
3947
3948 buffer = PyObject_CallFunction(reader, "i", 64*1024);
3949
3950 if (!buffer) {
3951 /* read failed (e.g. due to KeyboardInterrupt) */
3952 Py_DECREF(reader);
3953 return NULL;
3954 }
3955
3956 if (PyUnicode_CheckExact(buffer)) {
3957 /* A unicode object is encoded into bytes using UTF-8 */
3958 if (PyUnicode_GET_LENGTH(buffer) == 0) {
3959 Py_DECREF(buffer);
3960 break;
3961 }
3962 temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3963 Py_DECREF(buffer);
3964 if (!temp) {
3965 /* Propagate exception from PyUnicode_AsEncodedString */
3966 Py_DECREF(reader);
3967 return NULL;
3968 }
3969 buffer = temp;
3970 }
3971 else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3972 Py_DECREF(buffer);
3973 break;
3974 }
3975
3976 if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3977 Py_DECREF(buffer);
3978 Py_DECREF(reader);
3979 PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3980 return NULL;
3981 }
3982 res = expat_parse(
3983 self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3984 );
3985
3986 Py_DECREF(buffer);
3987
3988 if (!res) {
3989 Py_DECREF(reader);
3990 return NULL;
3991 }
3992 Py_DECREF(res);
3993
3994 }
3995
3996 Py_DECREF(reader);
3997
3998 res = expat_parse(self, "", 0, 1);
3999
4000 if (res && TreeBuilder_CheckExact(self->target)) {
4001 Py_DECREF(res);
4002 return treebuilder_done((TreeBuilderObject*) self->target);
4003 }
4004
4005 return res;
4006 }
4007
4008 /*[clinic input]
4009 _elementtree.XMLParser._setevents
4010
4011 events_queue: object
4012 events_to_report: object = None
4013 /
4014
4015 [clinic start generated code]*/
4016
4017 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4018 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4019 PyObject *events_queue,
4020 PyObject *events_to_report)
4021 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4022 {
4023 /* activate element event reporting */
4024 Py_ssize_t i;
4025 TreeBuilderObject *target;
4026 PyObject *events_append, *events_seq;
4027
4028 if (!_check_xmlparser(self)) {
4029 return NULL;
4030 }
4031 if (!TreeBuilder_CheckExact(self->target)) {
4032 PyErr_SetString(
4033 PyExc_TypeError,
4034 "event handling only supported for ElementTree.TreeBuilder "
4035 "targets"
4036 );
4037 return NULL;
4038 }
4039
4040 target = (TreeBuilderObject*) self->target;
4041
4042 events_append = PyObject_GetAttrString(events_queue, "append");
4043 if (events_append == NULL)
4044 return NULL;
4045 Py_XSETREF(target->events_append, events_append);
4046
4047 /* clear out existing events */
4048 Py_CLEAR(target->start_event_obj);
4049 Py_CLEAR(target->end_event_obj);
4050 Py_CLEAR(target->start_ns_event_obj);
4051 Py_CLEAR(target->end_ns_event_obj);
4052 Py_CLEAR(target->comment_event_obj);
4053 Py_CLEAR(target->pi_event_obj);
4054
4055 if (events_to_report == Py_None) {
4056 /* default is "end" only */
4057 target->end_event_obj = PyUnicode_FromString("end");
4058 Py_RETURN_NONE;
4059 }
4060
4061 if (!(events_seq = PySequence_Fast(events_to_report,
4062 "events must be a sequence"))) {
4063 return NULL;
4064 }
4065
4066 for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4067 PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4068 const char *event_name = NULL;
4069 if (PyUnicode_Check(event_name_obj)) {
4070 event_name = PyUnicode_AsUTF8(event_name_obj);
4071 } else if (PyBytes_Check(event_name_obj)) {
4072 event_name = PyBytes_AS_STRING(event_name_obj);
4073 }
4074 if (event_name == NULL) {
4075 Py_DECREF(events_seq);
4076 PyErr_Format(PyExc_ValueError, "invalid events sequence");
4077 return NULL;
4078 }
4079
4080 Py_INCREF(event_name_obj);
4081 if (strcmp(event_name, "start") == 0) {
4082 Py_XSETREF(target->start_event_obj, event_name_obj);
4083 } else if (strcmp(event_name, "end") == 0) {
4084 Py_XSETREF(target->end_event_obj, event_name_obj);
4085 } else if (strcmp(event_name, "start-ns") == 0) {
4086 Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4087 EXPAT(SetNamespaceDeclHandler)(
4088 self->parser,
4089 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4090 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4091 );
4092 } else if (strcmp(event_name, "end-ns") == 0) {
4093 Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4094 EXPAT(SetNamespaceDeclHandler)(
4095 self->parser,
4096 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4097 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4098 );
4099 } else if (strcmp(event_name, "comment") == 0) {
4100 Py_XSETREF(target->comment_event_obj, event_name_obj);
4101 EXPAT(SetCommentHandler)(
4102 self->parser,
4103 (XML_CommentHandler) expat_comment_handler
4104 );
4105 } else if (strcmp(event_name, "pi") == 0) {
4106 Py_XSETREF(target->pi_event_obj, event_name_obj);
4107 EXPAT(SetProcessingInstructionHandler)(
4108 self->parser,
4109 (XML_ProcessingInstructionHandler) expat_pi_handler
4110 );
4111 } else {
4112 Py_DECREF(event_name_obj);
4113 Py_DECREF(events_seq);
4114 PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4115 return NULL;
4116 }
4117 }
4118
4119 Py_DECREF(events_seq);
4120 Py_RETURN_NONE;
4121 }
4122
4123 static PyMemberDef xmlparser_members[] = {
4124 {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4125 {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4126 {NULL}
4127 };
4128
4129 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4130 xmlparser_version_getter(XMLParserObject *self, void *closure)
4131 {
4132 return PyUnicode_FromFormat(
4133 "Expat %d.%d.%d", XML_MAJOR_VERSION,
4134 XML_MINOR_VERSION, XML_MICRO_VERSION);
4135 }
4136
4137 static PyGetSetDef xmlparser_getsetlist[] = {
4138 {"version", (getter)xmlparser_version_getter, NULL, NULL},
4139 {NULL},
4140 };
4141
4142 #include "clinic/_elementtree.c.h"
4143
4144 static PyMethodDef element_methods[] = {
4145
4146 _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4147
4148 _ELEMENTTREE_ELEMENT_GET_METHODDEF
4149 _ELEMENTTREE_ELEMENT_SET_METHODDEF
4150
4151 _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4152 _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4153 _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4154
4155 _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4156 _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4157 _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4158 _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4159
4160 _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4161 _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4162 _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4163
4164 _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4165 _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4166
4167 _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4168
4169 _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4170 _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4171 _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4172 _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4173 _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4174
4175 {NULL, NULL}
4176 };
4177
4178 static PyMappingMethods element_as_mapping = {
4179 (lenfunc) element_length,
4180 (binaryfunc) element_subscr,
4181 (objobjargproc) element_ass_subscr,
4182 };
4183
4184 static PyGetSetDef element_getsetlist[] = {
4185 {"tag",
4186 (getter)element_tag_getter,
4187 (setter)element_tag_setter,
4188 "A string identifying what kind of data this element represents"},
4189 {"text",
4190 (getter)element_text_getter,
4191 (setter)element_text_setter,
4192 "A string of text directly after the start tag, or None"},
4193 {"tail",
4194 (getter)element_tail_getter,
4195 (setter)element_tail_setter,
4196 "A string of text directly after the end tag, or None"},
4197 {"attrib",
4198 (getter)element_attrib_getter,
4199 (setter)element_attrib_setter,
4200 "A dictionary containing the element's attributes"},
4201 {NULL},
4202 };
4203
4204 static PyTypeObject Element_Type = {
4205 PyVarObject_HEAD_INIT(NULL, 0)
4206 "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4207 /* methods */
4208 (destructor)element_dealloc, /* tp_dealloc */
4209 0, /* tp_vectorcall_offset */
4210 0, /* tp_getattr */
4211 0, /* tp_setattr */
4212 0, /* tp_as_async */
4213 (reprfunc)element_repr, /* tp_repr */
4214 0, /* tp_as_number */
4215 &element_as_sequence, /* tp_as_sequence */
4216 &element_as_mapping, /* tp_as_mapping */
4217 0, /* tp_hash */
4218 0, /* tp_call */
4219 0, /* tp_str */
4220 PyObject_GenericGetAttr, /* tp_getattro */
4221 0, /* tp_setattro */
4222 0, /* tp_as_buffer */
4223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4224 /* tp_flags */
4225 0, /* tp_doc */
4226 (traverseproc)element_gc_traverse, /* tp_traverse */
4227 (inquiry)element_gc_clear, /* tp_clear */
4228 0, /* tp_richcompare */
4229 offsetof(ElementObject, weakreflist), /* tp_weaklistoffset */
4230 0, /* tp_iter */
4231 0, /* tp_iternext */
4232 element_methods, /* tp_methods */
4233 0, /* tp_members */
4234 element_getsetlist, /* tp_getset */
4235 0, /* tp_base */
4236 0, /* tp_dict */
4237 0, /* tp_descr_get */
4238 0, /* tp_descr_set */
4239 0, /* tp_dictoffset */
4240 (initproc)element_init, /* tp_init */
4241 PyType_GenericAlloc, /* tp_alloc */
4242 element_new, /* tp_new */
4243 0, /* tp_free */
4244 };
4245
4246 static PyMethodDef treebuilder_methods[] = {
4247 _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4248 _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4249 _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4250 _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4251 _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4252 _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4253 {NULL, NULL}
4254 };
4255
4256 static PyTypeObject TreeBuilder_Type = {
4257 PyVarObject_HEAD_INIT(NULL, 0)
4258 "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4259 /* methods */
4260 (destructor)treebuilder_dealloc, /* tp_dealloc */
4261 0, /* tp_vectorcall_offset */
4262 0, /* tp_getattr */
4263 0, /* tp_setattr */
4264 0, /* tp_as_async */
4265 0, /* tp_repr */
4266 0, /* tp_as_number */
4267 0, /* tp_as_sequence */
4268 0, /* tp_as_mapping */
4269 0, /* tp_hash */
4270 0, /* tp_call */
4271 0, /* tp_str */
4272 0, /* tp_getattro */
4273 0, /* tp_setattro */
4274 0, /* tp_as_buffer */
4275 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4276 /* tp_flags */
4277 0, /* tp_doc */
4278 (traverseproc)treebuilder_gc_traverse, /* tp_traverse */
4279 (inquiry)treebuilder_gc_clear, /* tp_clear */
4280 0, /* tp_richcompare */
4281 0, /* tp_weaklistoffset */
4282 0, /* tp_iter */
4283 0, /* tp_iternext */
4284 treebuilder_methods, /* tp_methods */
4285 0, /* tp_members */
4286 0, /* tp_getset */
4287 0, /* tp_base */
4288 0, /* tp_dict */
4289 0, /* tp_descr_get */
4290 0, /* tp_descr_set */
4291 0, /* tp_dictoffset */
4292 _elementtree_TreeBuilder___init__, /* tp_init */
4293 PyType_GenericAlloc, /* tp_alloc */
4294 treebuilder_new, /* tp_new */
4295 0, /* tp_free */
4296 };
4297
4298 static PyMethodDef xmlparser_methods[] = {
4299 _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4300 _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4301 _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4302 _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4303 {NULL, NULL}
4304 };
4305
4306 static PyTypeObject XMLParser_Type = {
4307 PyVarObject_HEAD_INIT(NULL, 0)
4308 "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4309 /* methods */
4310 (destructor)xmlparser_dealloc, /* tp_dealloc */
4311 0, /* tp_vectorcall_offset */
4312 0, /* tp_getattr */
4313 0, /* tp_setattr */
4314 0, /* tp_as_async */
4315 0, /* tp_repr */
4316 0, /* tp_as_number */
4317 0, /* tp_as_sequence */
4318 0, /* tp_as_mapping */
4319 0, /* tp_hash */
4320 0, /* tp_call */
4321 0, /* tp_str */
4322 0, /* tp_getattro */
4323 0, /* tp_setattro */
4324 0, /* tp_as_buffer */
4325 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4326 /* tp_flags */
4327 0, /* tp_doc */
4328 (traverseproc)xmlparser_gc_traverse, /* tp_traverse */
4329 (inquiry)xmlparser_gc_clear, /* tp_clear */
4330 0, /* tp_richcompare */
4331 0, /* tp_weaklistoffset */
4332 0, /* tp_iter */
4333 0, /* tp_iternext */
4334 xmlparser_methods, /* tp_methods */
4335 xmlparser_members, /* tp_members */
4336 xmlparser_getsetlist, /* tp_getset */
4337 0, /* tp_base */
4338 0, /* tp_dict */
4339 0, /* tp_descr_get */
4340 0, /* tp_descr_set */
4341 0, /* tp_dictoffset */
4342 _elementtree_XMLParser___init__, /* tp_init */
4343 PyType_GenericAlloc, /* tp_alloc */
4344 xmlparser_new, /* tp_new */
4345 0, /* tp_free */
4346 };
4347
4348 /* ==================================================================== */
4349 /* python module interface */
4350
4351 static PyMethodDef _functions[] = {
4352 {"SubElement", _PyCFunction_CAST(subelement), METH_VARARGS | METH_KEYWORDS},
4353 _ELEMENTTREE__SET_FACTORIES_METHODDEF
4354 {NULL, NULL}
4355 };
4356
4357
4358 static struct PyModuleDef elementtreemodule = {
4359 PyModuleDef_HEAD_INIT,
4360 "_elementtree",
4361 NULL,
4362 sizeof(elementtreestate),
4363 _functions,
4364 NULL,
4365 elementtree_traverse,
4366 elementtree_clear,
4367 elementtree_free
4368 };
4369
4370 PyMODINIT_FUNC
PyInit__elementtree(void)4371 PyInit__elementtree(void)
4372 {
4373 PyObject *m, *temp;
4374 elementtreestate *st;
4375
4376 m = PyState_FindModule(&elementtreemodule);
4377 if (m) {
4378 Py_INCREF(m);
4379 return m;
4380 }
4381
4382 /* Initialize object types */
4383 if (PyType_Ready(&ElementIter_Type) < 0)
4384 return NULL;
4385 if (PyType_Ready(&TreeBuilder_Type) < 0)
4386 return NULL;
4387 if (PyType_Ready(&Element_Type) < 0)
4388 return NULL;
4389 if (PyType_Ready(&XMLParser_Type) < 0)
4390 return NULL;
4391
4392 m = PyModule_Create(&elementtreemodule);
4393 if (!m)
4394 return NULL;
4395 st = get_elementtree_state(m);
4396
4397 if (!(temp = PyImport_ImportModule("copy")))
4398 return NULL;
4399 st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4400 Py_XDECREF(temp);
4401
4402 if (st->deepcopy_obj == NULL) {
4403 return NULL;
4404 }
4405
4406 assert(!PyErr_Occurred());
4407 if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4408 return NULL;
4409
4410 /* link against pyexpat */
4411 expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4412 if (expat_capi) {
4413 /* check that it's usable */
4414 if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4415 (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4416 expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4417 expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4418 expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4419 PyErr_SetString(PyExc_ImportError,
4420 "pyexpat version is incompatible");
4421 return NULL;
4422 }
4423 } else {
4424 return NULL;
4425 }
4426
4427 st->parseerror_obj = PyErr_NewException(
4428 "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4429 );
4430 Py_INCREF(st->parseerror_obj);
4431 if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4432 Py_DECREF(st->parseerror_obj);
4433 return NULL;
4434 }
4435
4436 PyTypeObject *types[] = {
4437 &Element_Type,
4438 &TreeBuilder_Type,
4439 &XMLParser_Type
4440 };
4441
4442 for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4443 if (PyModule_AddType(m, types[i]) < 0) {
4444 return NULL;
4445 }
4446 }
4447
4448 return m;
4449 }
4450