1 /*--------------------------------------------------------------------
2  * Licensed to PSF under a Contributor Agreement.
3  * See https://www.python.org/psf/license for licensing details.
4  *
5  * _elementtree - C accelerator for xml.etree.ElementTree
6  * Copyright (c) 1999-2009 by Secret Labs AB.  All rights reserved.
7  * Copyright (c) 1999-2009 by Fredrik Lundh.
8  *
9  * [email protected]
10  * http://www.pythonware.com
11  *--------------------------------------------------------------------
12  */
13 
14 #define PY_SSIZE_T_CLEAN
15 #define NEEDS_PY_IDENTIFIER
16 
17 #include "Python.h"
18 #include "structmember.h"         // PyMemberDef
19 
20 /* -------------------------------------------------------------------- */
21 /* configuration */
22 
23 /* An element can hold this many children without extra memory
24    allocations. */
25 #define STATIC_CHILDREN 4
26 
27 /* For best performance, chose a value so that 80-90% of all nodes
28    have no more than the given number of children.  Set this to zero
29    to minimize the size of the element structure itself (this only
30    helps if you have lots of leaf nodes with attributes). */
31 
32 /* Also note that pymalloc always allocates blocks in multiples of
33    eight bytes.  For the current C version of ElementTree, this means
34    that the number of children should be an even number, at least on
35    32-bit platforms. */
36 
37 /* -------------------------------------------------------------------- */
38 
39 /* compiler tweaks */
40 #if defined(_MSC_VER)
41 #define LOCAL(type) static __inline type __fastcall
42 #else
43 #define LOCAL(type) static type
44 #endif
45 
46 /* macros used to store 'join' flags in string object pointers.  note
47    that all use of text and tail as object pointers must be wrapped in
48    JOIN_OBJ.  see comments in the ElementObject definition for more
49    info. */
50 #define JOIN_GET(p) ((uintptr_t) (p) & 1)
51 #define JOIN_SET(p, flag) ((void*) ((uintptr_t) (JOIN_OBJ(p)) | (flag)))
52 #define JOIN_OBJ(p) ((PyObject*) ((uintptr_t) (p) & ~(uintptr_t)1))
53 
54 /* Py_SETREF for a PyObject* that uses a join flag. */
55 Py_LOCAL_INLINE(void)
_set_joined_ptr(PyObject ** p,PyObject * new_joined_ptr)56 _set_joined_ptr(PyObject **p, PyObject *new_joined_ptr)
57 {
58     PyObject *tmp = JOIN_OBJ(*p);
59     *p = new_joined_ptr;
60     Py_DECREF(tmp);
61 }
62 
63 /* Py_CLEAR for a PyObject* that uses a join flag. Pass the pointer by
64  * reference since this function sets it to NULL.
65 */
_clear_joined_ptr(PyObject ** p)66 static void _clear_joined_ptr(PyObject **p)
67 {
68     if (*p) {
69         _set_joined_ptr(p, NULL);
70     }
71 }
72 
73 /* Types defined by this extension */
74 static PyTypeObject Element_Type;
75 static PyTypeObject ElementIter_Type;
76 static PyTypeObject TreeBuilder_Type;
77 static PyTypeObject XMLParser_Type;
78 
79 
80 /* Per-module state; PEP 3121 */
81 typedef struct {
82     PyObject *parseerror_obj;
83     PyObject *deepcopy_obj;
84     PyObject *elementpath_obj;
85     PyObject *comment_factory;
86     PyObject *pi_factory;
87 } elementtreestate;
88 
89 static struct PyModuleDef elementtreemodule;
90 
91 /* Given a module object (assumed to be _elementtree), get its per-module
92  * state.
93  */
94 static inline elementtreestate*
get_elementtree_state(PyObject * module)95 get_elementtree_state(PyObject *module)
96 {
97     void *state = PyModule_GetState(module);
98     assert(state != NULL);
99     return (elementtreestate *)state;
100 }
101 
102 /* Find the module instance imported in the currently running sub-interpreter
103  * and get its state.
104  */
105 #define ET_STATE_GLOBAL \
106     ((elementtreestate *) PyModule_GetState(PyState_FindModule(&elementtreemodule)))
107 
108 static int
elementtree_clear(PyObject * m)109 elementtree_clear(PyObject *m)
110 {
111     elementtreestate *st = get_elementtree_state(m);
112     Py_CLEAR(st->parseerror_obj);
113     Py_CLEAR(st->deepcopy_obj);
114     Py_CLEAR(st->elementpath_obj);
115     Py_CLEAR(st->comment_factory);
116     Py_CLEAR(st->pi_factory);
117     return 0;
118 }
119 
120 static int
elementtree_traverse(PyObject * m,visitproc visit,void * arg)121 elementtree_traverse(PyObject *m, visitproc visit, void *arg)
122 {
123     elementtreestate *st = get_elementtree_state(m);
124     Py_VISIT(st->parseerror_obj);
125     Py_VISIT(st->deepcopy_obj);
126     Py_VISIT(st->elementpath_obj);
127     Py_VISIT(st->comment_factory);
128     Py_VISIT(st->pi_factory);
129     return 0;
130 }
131 
132 static void
elementtree_free(void * m)133 elementtree_free(void *m)
134 {
135     elementtree_clear((PyObject *)m);
136 }
137 
138 /* helpers */
139 
140 LOCAL(PyObject*)
list_join(PyObject * list)141 list_join(PyObject* list)
142 {
143     /* join list elements */
144     PyObject* joiner;
145     PyObject* result;
146 
147     joiner = PyUnicode_FromStringAndSize("", 0);
148     if (!joiner)
149         return NULL;
150     result = PyUnicode_Join(joiner, list);
151     Py_DECREF(joiner);
152     return result;
153 }
154 
155 /* Is the given object an empty dictionary?
156 */
157 static int
is_empty_dict(PyObject * obj)158 is_empty_dict(PyObject *obj)
159 {
160     return PyDict_CheckExact(obj) && PyDict_GET_SIZE(obj) == 0;
161 }
162 
163 
164 /* -------------------------------------------------------------------- */
165 /* the Element type */
166 
167 typedef struct {
168 
169     /* attributes (a dictionary object), or NULL if no attributes */
170     PyObject* attrib;
171 
172     /* child elements */
173     Py_ssize_t length; /* actual number of items */
174     Py_ssize_t allocated; /* allocated items */
175 
176     /* this either points to _children or to a malloced buffer */
177     PyObject* *children;
178 
179     PyObject* _children[STATIC_CHILDREN];
180 
181 } ElementObjectExtra;
182 
183 typedef struct {
184     PyObject_HEAD
185 
186     /* element tag (a string). */
187     PyObject* tag;
188 
189     /* text before first child.  note that this is a tagged pointer;
190        use JOIN_OBJ to get the object pointer.  the join flag is used
191        to distinguish lists created by the tree builder from lists
192        assigned to the attribute by application code; the former
193        should be joined before being returned to the user, the latter
194        should be left intact. */
195     PyObject* text;
196 
197     /* text after this element, in parent.  note that this is a tagged
198        pointer; use JOIN_OBJ to get the object pointer. */
199     PyObject* tail;
200 
201     ElementObjectExtra* extra;
202 
203     PyObject *weakreflist; /* For tp_weaklistoffset */
204 
205 } ElementObject;
206 
207 
208 #define Element_CheckExact(op) Py_IS_TYPE(op, &Element_Type)
209 #define Element_Check(op) PyObject_TypeCheck(op, &Element_Type)
210 
211 
212 /* -------------------------------------------------------------------- */
213 /* Element constructors and destructor */
214 
215 LOCAL(int)
create_extra(ElementObject * self,PyObject * attrib)216 create_extra(ElementObject* self, PyObject* attrib)
217 {
218     self->extra = PyObject_Malloc(sizeof(ElementObjectExtra));
219     if (!self->extra) {
220         PyErr_NoMemory();
221         return -1;
222     }
223 
224     Py_XINCREF(attrib);
225     self->extra->attrib = attrib;
226 
227     self->extra->length = 0;
228     self->extra->allocated = STATIC_CHILDREN;
229     self->extra->children = self->extra->_children;
230 
231     return 0;
232 }
233 
234 LOCAL(void)
dealloc_extra(ElementObjectExtra * extra)235 dealloc_extra(ElementObjectExtra *extra)
236 {
237     Py_ssize_t i;
238 
239     if (!extra)
240         return;
241 
242     Py_XDECREF(extra->attrib);
243 
244     for (i = 0; i < extra->length; i++)
245         Py_DECREF(extra->children[i]);
246 
247     if (extra->children != extra->_children)
248         PyObject_Free(extra->children);
249 
250     PyObject_Free(extra);
251 }
252 
253 LOCAL(void)
clear_extra(ElementObject * self)254 clear_extra(ElementObject* self)
255 {
256     ElementObjectExtra *myextra;
257 
258     if (!self->extra)
259         return;
260 
261     /* Avoid DECREFs calling into this code again (cycles, etc.)
262     */
263     myextra = self->extra;
264     self->extra = NULL;
265 
266     dealloc_extra(myextra);
267 }
268 
269 /* Convenience internal function to create new Element objects with the given
270  * tag and attributes.
271 */
272 LOCAL(PyObject*)
create_new_element(PyObject * tag,PyObject * attrib)273 create_new_element(PyObject* tag, PyObject* attrib)
274 {
275     ElementObject* self;
276 
277     self = PyObject_GC_New(ElementObject, &Element_Type);
278     if (self == NULL)
279         return NULL;
280     self->extra = NULL;
281 
282     Py_INCREF(tag);
283     self->tag = tag;
284 
285     Py_INCREF(Py_None);
286     self->text = Py_None;
287 
288     Py_INCREF(Py_None);
289     self->tail = Py_None;
290 
291     self->weakreflist = NULL;
292 
293     PyObject_GC_Track(self);
294 
295     if (attrib != NULL && !is_empty_dict(attrib)) {
296         if (create_extra(self, attrib) < 0) {
297             Py_DECREF(self);
298             return NULL;
299         }
300     }
301 
302     return (PyObject*) self;
303 }
304 
305 static PyObject *
element_new(PyTypeObject * type,PyObject * args,PyObject * kwds)306 element_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
307 {
308     ElementObject *e = (ElementObject *)type->tp_alloc(type, 0);
309     if (e != NULL) {
310         Py_INCREF(Py_None);
311         e->tag = Py_None;
312 
313         Py_INCREF(Py_None);
314         e->text = Py_None;
315 
316         Py_INCREF(Py_None);
317         e->tail = Py_None;
318 
319         e->extra = NULL;
320         e->weakreflist = NULL;
321     }
322     return (PyObject *)e;
323 }
324 
325 /* Helper function for extracting the attrib dictionary from a keywords dict.
326  * This is required by some constructors/functions in this module that can
327  * either accept attrib as a keyword argument or all attributes splashed
328  * directly into *kwds.
329  *
330  * Return a dictionary with the content of kwds merged into the content of
331  * attrib. If there is no attrib keyword, return a copy of kwds.
332  */
333 static PyObject*
get_attrib_from_keywords(PyObject * kwds)334 get_attrib_from_keywords(PyObject *kwds)
335 {
336     PyObject *attrib_str = PyUnicode_FromString("attrib");
337     if (attrib_str == NULL) {
338         return NULL;
339     }
340     PyObject *attrib = PyDict_GetItemWithError(kwds, attrib_str);
341 
342     if (attrib) {
343         /* If attrib was found in kwds, copy its value and remove it from
344          * kwds
345          */
346         if (!PyDict_Check(attrib)) {
347             Py_DECREF(attrib_str);
348             PyErr_Format(PyExc_TypeError, "attrib must be dict, not %.100s",
349                          Py_TYPE(attrib)->tp_name);
350             return NULL;
351         }
352         attrib = PyDict_Copy(attrib);
353         if (attrib && PyDict_DelItem(kwds, attrib_str) < 0) {
354             Py_DECREF(attrib);
355             attrib = NULL;
356         }
357     }
358     else if (!PyErr_Occurred()) {
359         attrib = PyDict_New();
360     }
361 
362     Py_DECREF(attrib_str);
363 
364     if (attrib != NULL && PyDict_Update(attrib, kwds) < 0) {
365         Py_DECREF(attrib);
366         return NULL;
367     }
368     return attrib;
369 }
370 
371 /*[clinic input]
372 module _elementtree
373 class _elementtree.Element "ElementObject *" "&Element_Type"
374 class _elementtree.TreeBuilder "TreeBuilderObject *" "&TreeBuilder_Type"
375 class _elementtree.XMLParser "XMLParserObject *" "&XMLParser_Type"
376 [clinic start generated code]*/
377 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=159aa50a54061c22]*/
378 
379 static int
element_init(PyObject * self,PyObject * args,PyObject * kwds)380 element_init(PyObject *self, PyObject *args, PyObject *kwds)
381 {
382     PyObject *tag;
383     PyObject *attrib = NULL;
384     ElementObject *self_elem;
385 
386     if (!PyArg_ParseTuple(args, "O|O!:Element", &tag, &PyDict_Type, &attrib))
387         return -1;
388 
389     if (attrib) {
390         /* attrib passed as positional arg */
391         attrib = PyDict_Copy(attrib);
392         if (!attrib)
393             return -1;
394         if (kwds) {
395             if (PyDict_Update(attrib, kwds) < 0) {
396                 Py_DECREF(attrib);
397                 return -1;
398             }
399         }
400     } else if (kwds) {
401         /* have keywords args */
402         attrib = get_attrib_from_keywords(kwds);
403         if (!attrib)
404             return -1;
405     }
406 
407     self_elem = (ElementObject *)self;
408 
409     if (attrib != NULL && !is_empty_dict(attrib)) {
410         if (create_extra(self_elem, attrib) < 0) {
411             Py_DECREF(attrib);
412             return -1;
413         }
414     }
415 
416     /* We own a reference to attrib here and it's no longer needed. */
417     Py_XDECREF(attrib);
418 
419     /* Replace the objects already pointed to by tag, text and tail. */
420     Py_INCREF(tag);
421     Py_XSETREF(self_elem->tag, tag);
422 
423     Py_INCREF(Py_None);
424     _set_joined_ptr(&self_elem->text, Py_None);
425 
426     Py_INCREF(Py_None);
427     _set_joined_ptr(&self_elem->tail, Py_None);
428 
429     return 0;
430 }
431 
432 LOCAL(int)
element_resize(ElementObject * self,Py_ssize_t extra)433 element_resize(ElementObject* self, Py_ssize_t extra)
434 {
435     Py_ssize_t size;
436     PyObject* *children;
437 
438     assert(extra >= 0);
439     /* make sure self->children can hold the given number of extra
440        elements.  set an exception and return -1 if allocation failed */
441 
442     if (!self->extra) {
443         if (create_extra(self, NULL) < 0)
444             return -1;
445     }
446 
447     size = self->extra->length + extra;  /* never overflows */
448 
449     if (size > self->extra->allocated) {
450         /* use Python 2.4's list growth strategy */
451         size = (size >> 3) + (size < 9 ? 3 : 6) + size;
452         /* Coverity CID #182 size_error: Allocating 1 bytes to pointer "children"
453          * which needs at least 4 bytes.
454          * Although it's a false alarm always assume at least one child to
455          * be safe.
456          */
457         size = size ? size : 1;
458         if ((size_t)size > PY_SSIZE_T_MAX/sizeof(PyObject*))
459             goto nomemory;
460         if (self->extra->children != self->extra->_children) {
461             /* Coverity CID #182 size_error: Allocating 1 bytes to pointer
462              * "children", which needs at least 4 bytes. Although it's a
463              * false alarm always assume at least one child to be safe.
464              */
465             children = PyObject_Realloc(self->extra->children,
466                                         size * sizeof(PyObject*));
467             if (!children)
468                 goto nomemory;
469         } else {
470             children = PyObject_Malloc(size * sizeof(PyObject*));
471             if (!children)
472                 goto nomemory;
473             /* copy existing children from static area to malloc buffer */
474             memcpy(children, self->extra->children,
475                    self->extra->length * sizeof(PyObject*));
476         }
477         self->extra->children = children;
478         self->extra->allocated = size;
479     }
480 
481     return 0;
482 
483   nomemory:
484     PyErr_NoMemory();
485     return -1;
486 }
487 
488 LOCAL(void)
raise_type_error(PyObject * element)489 raise_type_error(PyObject *element)
490 {
491     PyErr_Format(PyExc_TypeError,
492                  "expected an Element, not \"%.200s\"",
493                  Py_TYPE(element)->tp_name);
494 }
495 
496 LOCAL(int)
element_add_subelement(ElementObject * self,PyObject * element)497 element_add_subelement(ElementObject* self, PyObject* element)
498 {
499     /* add a child element to a parent */
500 
501     if (!Element_Check(element)) {
502         raise_type_error(element);
503         return -1;
504     }
505 
506     if (element_resize(self, 1) < 0)
507         return -1;
508 
509     Py_INCREF(element);
510     self->extra->children[self->extra->length] = element;
511 
512     self->extra->length++;
513 
514     return 0;
515 }
516 
517 LOCAL(PyObject*)
element_get_attrib(ElementObject * self)518 element_get_attrib(ElementObject* self)
519 {
520     /* return borrowed reference to attrib dictionary */
521     /* note: this function assumes that the extra section exists */
522 
523     PyObject* res = self->extra->attrib;
524 
525     if (!res) {
526         /* create missing dictionary */
527         res = self->extra->attrib = PyDict_New();
528     }
529 
530     return res;
531 }
532 
533 LOCAL(PyObject*)
element_get_text(ElementObject * self)534 element_get_text(ElementObject* self)
535 {
536     /* return borrowed reference to text attribute */
537 
538     PyObject *res = self->text;
539 
540     if (JOIN_GET(res)) {
541         res = JOIN_OBJ(res);
542         if (PyList_CheckExact(res)) {
543             PyObject *tmp = list_join(res);
544             if (!tmp)
545                 return NULL;
546             self->text = tmp;
547             Py_DECREF(res);
548             res = tmp;
549         }
550     }
551 
552     return res;
553 }
554 
555 LOCAL(PyObject*)
element_get_tail(ElementObject * self)556 element_get_tail(ElementObject* self)
557 {
558     /* return borrowed reference to text attribute */
559 
560     PyObject *res = self->tail;
561 
562     if (JOIN_GET(res)) {
563         res = JOIN_OBJ(res);
564         if (PyList_CheckExact(res)) {
565             PyObject *tmp = list_join(res);
566             if (!tmp)
567                 return NULL;
568             self->tail = tmp;
569             Py_DECREF(res);
570             res = tmp;
571         }
572     }
573 
574     return res;
575 }
576 
577 static PyObject*
subelement(PyObject * self,PyObject * args,PyObject * kwds)578 subelement(PyObject *self, PyObject *args, PyObject *kwds)
579 {
580     PyObject* elem;
581 
582     ElementObject* parent;
583     PyObject* tag;
584     PyObject* attrib = NULL;
585     if (!PyArg_ParseTuple(args, "O!O|O!:SubElement",
586                           &Element_Type, &parent, &tag,
587                           &PyDict_Type, &attrib)) {
588         return NULL;
589     }
590 
591     if (attrib) {
592         /* attrib passed as positional arg */
593         attrib = PyDict_Copy(attrib);
594         if (!attrib)
595             return NULL;
596         if (kwds != NULL && PyDict_Update(attrib, kwds) < 0) {
597             Py_DECREF(attrib);
598             return NULL;
599         }
600     } else if (kwds) {
601         /* have keyword args */
602         attrib = get_attrib_from_keywords(kwds);
603         if (!attrib)
604             return NULL;
605     } else {
606         /* no attrib arg, no kwds, so no attribute */
607     }
608 
609     elem = create_new_element(tag, attrib);
610     Py_XDECREF(attrib);
611     if (elem == NULL)
612         return NULL;
613 
614     if (element_add_subelement(parent, elem) < 0) {
615         Py_DECREF(elem);
616         return NULL;
617     }
618 
619     return elem;
620 }
621 
622 static int
element_gc_traverse(ElementObject * self,visitproc visit,void * arg)623 element_gc_traverse(ElementObject *self, visitproc visit, void *arg)
624 {
625     Py_VISIT(self->tag);
626     Py_VISIT(JOIN_OBJ(self->text));
627     Py_VISIT(JOIN_OBJ(self->tail));
628 
629     if (self->extra) {
630         Py_ssize_t i;
631         Py_VISIT(self->extra->attrib);
632 
633         for (i = 0; i < self->extra->length; ++i)
634             Py_VISIT(self->extra->children[i]);
635     }
636     return 0;
637 }
638 
639 static int
element_gc_clear(ElementObject * self)640 element_gc_clear(ElementObject *self)
641 {
642     Py_CLEAR(self->tag);
643     _clear_joined_ptr(&self->text);
644     _clear_joined_ptr(&self->tail);
645 
646     /* After dropping all references from extra, it's no longer valid anyway,
647      * so fully deallocate it.
648     */
649     clear_extra(self);
650     return 0;
651 }
652 
653 static void
element_dealloc(ElementObject * self)654 element_dealloc(ElementObject* self)
655 {
656     /* bpo-31095: UnTrack is needed before calling any callbacks */
657     PyObject_GC_UnTrack(self);
658     Py_TRASHCAN_BEGIN(self, element_dealloc)
659 
660     if (self->weakreflist != NULL)
661         PyObject_ClearWeakRefs((PyObject *) self);
662 
663     /* element_gc_clear clears all references and deallocates extra
664     */
665     element_gc_clear(self);
666 
667     Py_TYPE(self)->tp_free((PyObject *)self);
668     Py_TRASHCAN_END
669 }
670 
671 /* -------------------------------------------------------------------- */
672 
673 /*[clinic input]
674 _elementtree.Element.append
675 
676     subelement: object(subclass_of='&Element_Type')
677     /
678 
679 [clinic start generated code]*/
680 
681 static PyObject *
_elementtree_Element_append_impl(ElementObject * self,PyObject * subelement)682 _elementtree_Element_append_impl(ElementObject *self, PyObject *subelement)
683 /*[clinic end generated code: output=54a884b7cf2295f4 input=3ed648beb5bfa22a]*/
684 {
685     if (element_add_subelement(self, subelement) < 0)
686         return NULL;
687 
688     Py_RETURN_NONE;
689 }
690 
691 /*[clinic input]
692 _elementtree.Element.clear
693 
694 [clinic start generated code]*/
695 
696 static PyObject *
_elementtree_Element_clear_impl(ElementObject * self)697 _elementtree_Element_clear_impl(ElementObject *self)
698 /*[clinic end generated code: output=8bcd7a51f94cfff6 input=3c719ff94bf45dd6]*/
699 {
700     clear_extra(self);
701 
702     Py_INCREF(Py_None);
703     _set_joined_ptr(&self->text, Py_None);
704 
705     Py_INCREF(Py_None);
706     _set_joined_ptr(&self->tail, Py_None);
707 
708     Py_RETURN_NONE;
709 }
710 
711 /*[clinic input]
712 _elementtree.Element.__copy__
713 
714 [clinic start generated code]*/
715 
716 static PyObject *
_elementtree_Element___copy___impl(ElementObject * self)717 _elementtree_Element___copy___impl(ElementObject *self)
718 /*[clinic end generated code: output=2c701ebff7247781 input=ad87aaebe95675bf]*/
719 {
720     Py_ssize_t i;
721     ElementObject* element;
722 
723     element = (ElementObject*) create_new_element(
724         self->tag, self->extra ? self->extra->attrib : NULL);
725     if (!element)
726         return NULL;
727 
728     Py_INCREF(JOIN_OBJ(self->text));
729     _set_joined_ptr(&element->text, self->text);
730 
731     Py_INCREF(JOIN_OBJ(self->tail));
732     _set_joined_ptr(&element->tail, self->tail);
733 
734     assert(!element->extra || !element->extra->length);
735     if (self->extra) {
736         if (element_resize(element, self->extra->length) < 0) {
737             Py_DECREF(element);
738             return NULL;
739         }
740 
741         for (i = 0; i < self->extra->length; i++) {
742             Py_INCREF(self->extra->children[i]);
743             element->extra->children[i] = self->extra->children[i];
744         }
745 
746         assert(!element->extra->length);
747         element->extra->length = self->extra->length;
748     }
749 
750     return (PyObject*) element;
751 }
752 
753 /* Helper for a deep copy. */
754 LOCAL(PyObject *) deepcopy(PyObject *, PyObject *);
755 
756 /*[clinic input]
757 _elementtree.Element.__deepcopy__
758 
759     memo: object(subclass_of="&PyDict_Type")
760     /
761 
762 [clinic start generated code]*/
763 
764 static PyObject *
_elementtree_Element___deepcopy___impl(ElementObject * self,PyObject * memo)765 _elementtree_Element___deepcopy___impl(ElementObject *self, PyObject *memo)
766 /*[clinic end generated code: output=eefc3df50465b642 input=a2d40348c0aade10]*/
767 {
768     Py_ssize_t i;
769     ElementObject* element;
770     PyObject* tag;
771     PyObject* attrib;
772     PyObject* text;
773     PyObject* tail;
774     PyObject* id;
775 
776     tag = deepcopy(self->tag, memo);
777     if (!tag)
778         return NULL;
779 
780     if (self->extra && self->extra->attrib) {
781         attrib = deepcopy(self->extra->attrib, memo);
782         if (!attrib) {
783             Py_DECREF(tag);
784             return NULL;
785         }
786     } else {
787         attrib = NULL;
788     }
789 
790     element = (ElementObject*) create_new_element(tag, attrib);
791 
792     Py_DECREF(tag);
793     Py_XDECREF(attrib);
794 
795     if (!element)
796         return NULL;
797 
798     text = deepcopy(JOIN_OBJ(self->text), memo);
799     if (!text)
800         goto error;
801     _set_joined_ptr(&element->text, JOIN_SET(text, JOIN_GET(self->text)));
802 
803     tail = deepcopy(JOIN_OBJ(self->tail), memo);
804     if (!tail)
805         goto error;
806     _set_joined_ptr(&element->tail, JOIN_SET(tail, JOIN_GET(self->tail)));
807 
808     assert(!element->extra || !element->extra->length);
809     if (self->extra) {
810         if (element_resize(element, self->extra->length) < 0)
811             goto error;
812 
813         for (i = 0; i < self->extra->length; i++) {
814             PyObject* child = deepcopy(self->extra->children[i], memo);
815             if (!child || !Element_Check(child)) {
816                 if (child) {
817                     raise_type_error(child);
818                     Py_DECREF(child);
819                 }
820                 element->extra->length = i;
821                 goto error;
822             }
823             element->extra->children[i] = child;
824         }
825 
826         assert(!element->extra->length);
827         element->extra->length = self->extra->length;
828     }
829 
830     /* add object to memo dictionary (so deepcopy won't visit it again) */
831     id = PyLong_FromSsize_t((uintptr_t) self);
832     if (!id)
833         goto error;
834 
835     i = PyDict_SetItem(memo, id, (PyObject*) element);
836 
837     Py_DECREF(id);
838 
839     if (i < 0)
840         goto error;
841 
842     return (PyObject*) element;
843 
844   error:
845     Py_DECREF(element);
846     return NULL;
847 }
848 
849 LOCAL(PyObject *)
deepcopy(PyObject * object,PyObject * memo)850 deepcopy(PyObject *object, PyObject *memo)
851 {
852     /* do a deep copy of the given object */
853     elementtreestate *st;
854     PyObject *stack[2];
855 
856     /* Fast paths */
857     if (object == Py_None || PyUnicode_CheckExact(object)) {
858         Py_INCREF(object);
859         return object;
860     }
861 
862     if (Py_REFCNT(object) == 1) {
863         if (PyDict_CheckExact(object)) {
864             PyObject *key, *value;
865             Py_ssize_t pos = 0;
866             int simple = 1;
867             while (PyDict_Next(object, &pos, &key, &value)) {
868                 if (!PyUnicode_CheckExact(key) || !PyUnicode_CheckExact(value)) {
869                     simple = 0;
870                     break;
871                 }
872             }
873             if (simple)
874                 return PyDict_Copy(object);
875             /* Fall through to general case */
876         }
877         else if (Element_CheckExact(object)) {
878             return _elementtree_Element___deepcopy___impl(
879                 (ElementObject *)object, memo);
880         }
881     }
882 
883     /* General case */
884     st = ET_STATE_GLOBAL;
885     if (!st->deepcopy_obj) {
886         PyErr_SetString(PyExc_RuntimeError,
887                         "deepcopy helper not found");
888         return NULL;
889     }
890 
891     stack[0] = object;
892     stack[1] = memo;
893     return _PyObject_FastCall(st->deepcopy_obj, stack, 2);
894 }
895 
896 
897 /*[clinic input]
898 _elementtree.Element.__sizeof__ -> Py_ssize_t
899 
900 [clinic start generated code]*/
901 
902 static Py_ssize_t
_elementtree_Element___sizeof___impl(ElementObject * self)903 _elementtree_Element___sizeof___impl(ElementObject *self)
904 /*[clinic end generated code: output=bf73867721008000 input=70f4b323d55a17c1]*/
905 {
906     Py_ssize_t result = _PyObject_SIZE(Py_TYPE(self));
907     if (self->extra) {
908         result += sizeof(ElementObjectExtra);
909         if (self->extra->children != self->extra->_children)
910             result += sizeof(PyObject*) * self->extra->allocated;
911     }
912     return result;
913 }
914 
915 /* dict keys for getstate/setstate. */
916 #define PICKLED_TAG "tag"
917 #define PICKLED_CHILDREN "_children"
918 #define PICKLED_ATTRIB "attrib"
919 #define PICKLED_TAIL "tail"
920 #define PICKLED_TEXT "text"
921 
922 /* __getstate__ returns a fabricated instance dict as in the pure-Python
923  * Element implementation, for interoperability/interchangeability.  This
924  * makes the pure-Python implementation details an API, but (a) there aren't
925  * any unnecessary structures there; and (b) it buys compatibility with 3.2
926  * pickles.  See issue #16076.
927  */
928 /*[clinic input]
929 _elementtree.Element.__getstate__
930 
931 [clinic start generated code]*/
932 
933 static PyObject *
_elementtree_Element___getstate___impl(ElementObject * self)934 _elementtree_Element___getstate___impl(ElementObject *self)
935 /*[clinic end generated code: output=37279aeeb6bb5b04 input=f0d16d7ec2f7adc1]*/
936 {
937     Py_ssize_t i;
938     PyObject *children, *attrib;
939 
940     /* Build a list of children. */
941     children = PyList_New(self->extra ? self->extra->length : 0);
942     if (!children)
943         return NULL;
944     for (i = 0; i < PyList_GET_SIZE(children); i++) {
945         PyObject *child = self->extra->children[i];
946         Py_INCREF(child);
947         PyList_SET_ITEM(children, i, child);
948     }
949 
950     if (self->extra && self->extra->attrib) {
951         attrib = self->extra->attrib;
952         Py_INCREF(attrib);
953     }
954     else {
955         attrib = PyDict_New();
956         if (!attrib) {
957             Py_DECREF(children);
958             return NULL;
959         }
960     }
961 
962     return Py_BuildValue("{sOsNsNsOsO}",
963                          PICKLED_TAG, self->tag,
964                          PICKLED_CHILDREN, children,
965                          PICKLED_ATTRIB, attrib,
966                          PICKLED_TEXT, JOIN_OBJ(self->text),
967                          PICKLED_TAIL, JOIN_OBJ(self->tail));
968 }
969 
970 static PyObject *
element_setstate_from_attributes(ElementObject * self,PyObject * tag,PyObject * attrib,PyObject * text,PyObject * tail,PyObject * children)971 element_setstate_from_attributes(ElementObject *self,
972                                  PyObject *tag,
973                                  PyObject *attrib,
974                                  PyObject *text,
975                                  PyObject *tail,
976                                  PyObject *children)
977 {
978     Py_ssize_t i, nchildren;
979     ElementObjectExtra *oldextra = NULL;
980 
981     if (!tag) {
982         PyErr_SetString(PyExc_TypeError, "tag may not be NULL");
983         return NULL;
984     }
985 
986     Py_INCREF(tag);
987     Py_XSETREF(self->tag, tag);
988 
989     text = text ? JOIN_SET(text, PyList_CheckExact(text)) : Py_None;
990     Py_INCREF(JOIN_OBJ(text));
991     _set_joined_ptr(&self->text, text);
992 
993     tail = tail ? JOIN_SET(tail, PyList_CheckExact(tail)) : Py_None;
994     Py_INCREF(JOIN_OBJ(tail));
995     _set_joined_ptr(&self->tail, tail);
996 
997     /* Handle ATTRIB and CHILDREN. */
998     if (!children && !attrib) {
999         Py_RETURN_NONE;
1000     }
1001 
1002     /* Compute 'nchildren'. */
1003     if (children) {
1004         if (!PyList_Check(children)) {
1005             PyErr_SetString(PyExc_TypeError, "'_children' is not a list");
1006             return NULL;
1007         }
1008         nchildren = PyList_GET_SIZE(children);
1009 
1010         /* (Re-)allocate 'extra'.
1011            Avoid DECREFs calling into this code again (cycles, etc.)
1012          */
1013         oldextra = self->extra;
1014         self->extra = NULL;
1015         if (element_resize(self, nchildren)) {
1016             assert(!self->extra || !self->extra->length);
1017             clear_extra(self);
1018             self->extra = oldextra;
1019             return NULL;
1020         }
1021         assert(self->extra);
1022         assert(self->extra->allocated >= nchildren);
1023         if (oldextra) {
1024             assert(self->extra->attrib == NULL);
1025             self->extra->attrib = oldextra->attrib;
1026             oldextra->attrib = NULL;
1027         }
1028 
1029         /* Copy children */
1030         for (i = 0; i < nchildren; i++) {
1031             PyObject *child = PyList_GET_ITEM(children, i);
1032             if (!Element_Check(child)) {
1033                 raise_type_error(child);
1034                 self->extra->length = i;
1035                 dealloc_extra(oldextra);
1036                 return NULL;
1037             }
1038             Py_INCREF(child);
1039             self->extra->children[i] = child;
1040         }
1041 
1042         assert(!self->extra->length);
1043         self->extra->length = nchildren;
1044     }
1045     else {
1046         if (element_resize(self, 0)) {
1047             return NULL;
1048         }
1049     }
1050 
1051     /* Stash attrib. */
1052     Py_XINCREF(attrib);
1053     Py_XSETREF(self->extra->attrib, attrib);
1054     dealloc_extra(oldextra);
1055 
1056     Py_RETURN_NONE;
1057 }
1058 
1059 /* __setstate__ for Element instance from the Python implementation.
1060  * 'state' should be the instance dict.
1061  */
1062 
1063 static PyObject *
element_setstate_from_Python(ElementObject * self,PyObject * state)1064 element_setstate_from_Python(ElementObject *self, PyObject *state)
1065 {
1066     static char *kwlist[] = {PICKLED_TAG, PICKLED_ATTRIB, PICKLED_TEXT,
1067                              PICKLED_TAIL, PICKLED_CHILDREN, 0};
1068     PyObject *args;
1069     PyObject *tag, *attrib, *text, *tail, *children;
1070     PyObject *retval;
1071 
1072     tag = attrib = text = tail = children = NULL;
1073     args = PyTuple_New(0);
1074     if (!args)
1075         return NULL;
1076 
1077     if (PyArg_ParseTupleAndKeywords(args, state, "|$OOOOO", kwlist, &tag,
1078                                     &attrib, &text, &tail, &children))
1079         retval = element_setstate_from_attributes(self, tag, attrib, text,
1080                                                   tail, children);
1081     else
1082         retval = NULL;
1083 
1084     Py_DECREF(args);
1085     return retval;
1086 }
1087 
1088 /*[clinic input]
1089 _elementtree.Element.__setstate__
1090 
1091     state: object
1092     /
1093 
1094 [clinic start generated code]*/
1095 
1096 static PyObject *
_elementtree_Element___setstate__(ElementObject * self,PyObject * state)1097 _elementtree_Element___setstate__(ElementObject *self, PyObject *state)
1098 /*[clinic end generated code: output=ea28bf3491b1f75e input=aaf80abea7c1e3b9]*/
1099 {
1100     if (!PyDict_CheckExact(state)) {
1101         PyErr_Format(PyExc_TypeError,
1102                      "Don't know how to unpickle \"%.200R\" as an Element",
1103                      state);
1104         return NULL;
1105     }
1106     else
1107         return element_setstate_from_Python(self, state);
1108 }
1109 
1110 LOCAL(int)
checkpath(PyObject * tag)1111 checkpath(PyObject* tag)
1112 {
1113     Py_ssize_t i;
1114     int check = 1;
1115 
1116     /* check if a tag contains an xpath character */
1117 
1118 #define PATHCHAR(ch) \
1119     (ch == '/' || ch == '*' || ch == '[' || ch == '@' || ch == '.')
1120 
1121     if (PyUnicode_Check(tag)) {
1122         const Py_ssize_t len = PyUnicode_GET_LENGTH(tag);
1123         const void *data = PyUnicode_DATA(tag);
1124         unsigned int kind = PyUnicode_KIND(tag);
1125         if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && (
1126                 PyUnicode_READ(kind, data, 1) == '}' || (
1127                 PyUnicode_READ(kind, data, 1) == '*' &&
1128                 PyUnicode_READ(kind, data, 2) == '}'))) {
1129             /* wildcard: '{}tag' or '{*}tag' */
1130             return 1;
1131         }
1132         for (i = 0; i < len; i++) {
1133             Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1134             if (ch == '{')
1135                 check = 0;
1136             else if (ch == '}')
1137                 check = 1;
1138             else if (check && PATHCHAR(ch))
1139                 return 1;
1140         }
1141         return 0;
1142     }
1143     if (PyBytes_Check(tag)) {
1144         const char *p = PyBytes_AS_STRING(tag);
1145         const Py_ssize_t len = PyBytes_GET_SIZE(tag);
1146         if (len >= 3 && p[0] == '{' && (
1147                 p[1] == '}' || (p[1] == '*' && p[2] == '}'))) {
1148             /* wildcard: '{}tag' or '{*}tag' */
1149             return 1;
1150         }
1151         for (i = 0; i < len; i++) {
1152             if (p[i] == '{')
1153                 check = 0;
1154             else if (p[i] == '}')
1155                 check = 1;
1156             else if (check && PATHCHAR(p[i]))
1157                 return 1;
1158         }
1159         return 0;
1160     }
1161 
1162     return 1; /* unknown type; might be path expression */
1163 }
1164 
1165 /*[clinic input]
1166 _elementtree.Element.extend
1167 
1168     elements: object
1169     /
1170 
1171 [clinic start generated code]*/
1172 
1173 static PyObject *
_elementtree_Element_extend(ElementObject * self,PyObject * elements)1174 _elementtree_Element_extend(ElementObject *self, PyObject *elements)
1175 /*[clinic end generated code: output=f6e67fc2ff529191 input=807bc4f31c69f7c0]*/
1176 {
1177     PyObject* seq;
1178     Py_ssize_t i;
1179 
1180     seq = PySequence_Fast(elements, "");
1181     if (!seq) {
1182         PyErr_Format(
1183             PyExc_TypeError,
1184             "expected sequence, not \"%.200s\"", Py_TYPE(elements)->tp_name
1185             );
1186         return NULL;
1187     }
1188 
1189     for (i = 0; i < PySequence_Fast_GET_SIZE(seq); i++) {
1190         PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1191         Py_INCREF(element);
1192         if (element_add_subelement(self, element) < 0) {
1193             Py_DECREF(seq);
1194             Py_DECREF(element);
1195             return NULL;
1196         }
1197         Py_DECREF(element);
1198     }
1199 
1200     Py_DECREF(seq);
1201 
1202     Py_RETURN_NONE;
1203 }
1204 
1205 /*[clinic input]
1206 _elementtree.Element.find
1207 
1208     path: object
1209     namespaces: object = None
1210 
1211 [clinic start generated code]*/
1212 
1213 static PyObject *
_elementtree_Element_find_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1214 _elementtree_Element_find_impl(ElementObject *self, PyObject *path,
1215                                PyObject *namespaces)
1216 /*[clinic end generated code: output=41b43f0f0becafae input=359b6985f6489d2e]*/
1217 {
1218     Py_ssize_t i;
1219     elementtreestate *st = ET_STATE_GLOBAL;
1220 
1221     if (checkpath(path) || namespaces != Py_None) {
1222         _Py_IDENTIFIER(find);
1223         return _PyObject_CallMethodIdObjArgs(
1224             st->elementpath_obj, &PyId_find, self, path, namespaces, NULL
1225             );
1226     }
1227 
1228     if (!self->extra)
1229         Py_RETURN_NONE;
1230 
1231     for (i = 0; i < self->extra->length; i++) {
1232         PyObject* item = self->extra->children[i];
1233         int rc;
1234         assert(Element_Check(item));
1235         Py_INCREF(item);
1236         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1237         if (rc > 0)
1238             return item;
1239         Py_DECREF(item);
1240         if (rc < 0)
1241             return NULL;
1242     }
1243 
1244     Py_RETURN_NONE;
1245 }
1246 
1247 /*[clinic input]
1248 _elementtree.Element.findtext
1249 
1250     path: object
1251     default: object = None
1252     namespaces: object = None
1253 
1254 [clinic start generated code]*/
1255 
1256 static PyObject *
_elementtree_Element_findtext_impl(ElementObject * self,PyObject * path,PyObject * default_value,PyObject * namespaces)1257 _elementtree_Element_findtext_impl(ElementObject *self, PyObject *path,
1258                                    PyObject *default_value,
1259                                    PyObject *namespaces)
1260 /*[clinic end generated code: output=83b3ba4535d308d2 input=b53a85aa5aa2a916]*/
1261 {
1262     Py_ssize_t i;
1263     _Py_IDENTIFIER(findtext);
1264     elementtreestate *st = ET_STATE_GLOBAL;
1265 
1266     if (checkpath(path) || namespaces != Py_None)
1267         return _PyObject_CallMethodIdObjArgs(
1268             st->elementpath_obj, &PyId_findtext,
1269             self, path, default_value, namespaces, NULL
1270             );
1271 
1272     if (!self->extra) {
1273         Py_INCREF(default_value);
1274         return default_value;
1275     }
1276 
1277     for (i = 0; i < self->extra->length; i++) {
1278         PyObject *item = self->extra->children[i];
1279         int rc;
1280         assert(Element_Check(item));
1281         Py_INCREF(item);
1282         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1283         if (rc > 0) {
1284             PyObject* text = element_get_text((ElementObject*)item);
1285             if (text == Py_None) {
1286                 Py_DECREF(item);
1287                 return PyUnicode_New(0, 0);
1288             }
1289             Py_XINCREF(text);
1290             Py_DECREF(item);
1291             return text;
1292         }
1293         Py_DECREF(item);
1294         if (rc < 0)
1295             return NULL;
1296     }
1297 
1298     Py_INCREF(default_value);
1299     return default_value;
1300 }
1301 
1302 /*[clinic input]
1303 _elementtree.Element.findall
1304 
1305     path: object
1306     namespaces: object = None
1307 
1308 [clinic start generated code]*/
1309 
1310 static PyObject *
_elementtree_Element_findall_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1311 _elementtree_Element_findall_impl(ElementObject *self, PyObject *path,
1312                                   PyObject *namespaces)
1313 /*[clinic end generated code: output=1a0bd9f5541b711d input=4d9e6505a638550c]*/
1314 {
1315     Py_ssize_t i;
1316     PyObject* out;
1317     elementtreestate *st = ET_STATE_GLOBAL;
1318 
1319     if (checkpath(path) || namespaces != Py_None) {
1320         _Py_IDENTIFIER(findall);
1321         return _PyObject_CallMethodIdObjArgs(
1322             st->elementpath_obj, &PyId_findall, self, path, namespaces, NULL
1323             );
1324     }
1325 
1326     out = PyList_New(0);
1327     if (!out)
1328         return NULL;
1329 
1330     if (!self->extra)
1331         return out;
1332 
1333     for (i = 0; i < self->extra->length; i++) {
1334         PyObject* item = self->extra->children[i];
1335         int rc;
1336         assert(Element_Check(item));
1337         Py_INCREF(item);
1338         rc = PyObject_RichCompareBool(((ElementObject*)item)->tag, path, Py_EQ);
1339         if (rc != 0 && (rc < 0 || PyList_Append(out, item) < 0)) {
1340             Py_DECREF(item);
1341             Py_DECREF(out);
1342             return NULL;
1343         }
1344         Py_DECREF(item);
1345     }
1346 
1347     return out;
1348 }
1349 
1350 /*[clinic input]
1351 _elementtree.Element.iterfind
1352 
1353     path: object
1354     namespaces: object = None
1355 
1356 [clinic start generated code]*/
1357 
1358 static PyObject *
_elementtree_Element_iterfind_impl(ElementObject * self,PyObject * path,PyObject * namespaces)1359 _elementtree_Element_iterfind_impl(ElementObject *self, PyObject *path,
1360                                    PyObject *namespaces)
1361 /*[clinic end generated code: output=ecdd56d63b19d40f input=abb974e350fb65c7]*/
1362 {
1363     PyObject* tag = path;
1364     _Py_IDENTIFIER(iterfind);
1365     elementtreestate *st = ET_STATE_GLOBAL;
1366 
1367     return _PyObject_CallMethodIdObjArgs(
1368         st->elementpath_obj, &PyId_iterfind, self, tag, namespaces, NULL);
1369 }
1370 
1371 /*[clinic input]
1372 _elementtree.Element.get
1373 
1374     key: object
1375     default: object = None
1376 
1377 [clinic start generated code]*/
1378 
1379 static PyObject *
_elementtree_Element_get_impl(ElementObject * self,PyObject * key,PyObject * default_value)1380 _elementtree_Element_get_impl(ElementObject *self, PyObject *key,
1381                               PyObject *default_value)
1382 /*[clinic end generated code: output=523c614142595d75 input=ee153bbf8cdb246e]*/
1383 {
1384     if (self->extra && self->extra->attrib) {
1385         PyObject *attrib = self->extra->attrib;
1386         Py_INCREF(attrib);
1387         PyObject *value = PyDict_GetItemWithError(attrib, key);
1388         Py_XINCREF(value);
1389         Py_DECREF(attrib);
1390         if (value != NULL || PyErr_Occurred()) {
1391             return value;
1392         }
1393     }
1394 
1395     Py_INCREF(default_value);
1396     return default_value;
1397 }
1398 
1399 static PyObject *
1400 create_elementiter(ElementObject *self, PyObject *tag, int gettext);
1401 
1402 
1403 /*[clinic input]
1404 _elementtree.Element.iter
1405 
1406     tag: object = None
1407 
1408 [clinic start generated code]*/
1409 
1410 static PyObject *
_elementtree_Element_iter_impl(ElementObject * self,PyObject * tag)1411 _elementtree_Element_iter_impl(ElementObject *self, PyObject *tag)
1412 /*[clinic end generated code: output=3f49f9a862941cc5 input=774d5b12e573aedd]*/
1413 {
1414     if (PyUnicode_Check(tag)) {
1415         if (PyUnicode_READY(tag) < 0)
1416             return NULL;
1417         if (PyUnicode_GET_LENGTH(tag) == 1 && PyUnicode_READ_CHAR(tag, 0) == '*')
1418             tag = Py_None;
1419     }
1420     else if (PyBytes_Check(tag)) {
1421         if (PyBytes_GET_SIZE(tag) == 1 && *PyBytes_AS_STRING(tag) == '*')
1422             tag = Py_None;
1423     }
1424 
1425     return create_elementiter(self, tag, 0);
1426 }
1427 
1428 
1429 /*[clinic input]
1430 _elementtree.Element.itertext
1431 
1432 [clinic start generated code]*/
1433 
1434 static PyObject *
_elementtree_Element_itertext_impl(ElementObject * self)1435 _elementtree_Element_itertext_impl(ElementObject *self)
1436 /*[clinic end generated code: output=5fa34b2fbcb65df6 input=af8f0e42cb239c89]*/
1437 {
1438     return create_elementiter(self, Py_None, 1);
1439 }
1440 
1441 
1442 static PyObject*
element_getitem(PyObject * self_,Py_ssize_t index)1443 element_getitem(PyObject* self_, Py_ssize_t index)
1444 {
1445     ElementObject* self = (ElementObject*) self_;
1446 
1447     if (!self->extra || index < 0 || index >= self->extra->length) {
1448         PyErr_SetString(
1449             PyExc_IndexError,
1450             "child index out of range"
1451             );
1452         return NULL;
1453     }
1454 
1455     Py_INCREF(self->extra->children[index]);
1456     return self->extra->children[index];
1457 }
1458 
1459 /*[clinic input]
1460 _elementtree.Element.insert
1461 
1462     index: Py_ssize_t
1463     subelement: object(subclass_of='&Element_Type')
1464     /
1465 
1466 [clinic start generated code]*/
1467 
1468 static PyObject *
_elementtree_Element_insert_impl(ElementObject * self,Py_ssize_t index,PyObject * subelement)1469 _elementtree_Element_insert_impl(ElementObject *self, Py_ssize_t index,
1470                                  PyObject *subelement)
1471 /*[clinic end generated code: output=990adfef4d424c0b input=cd6fbfcdab52d7a8]*/
1472 {
1473     Py_ssize_t i;
1474 
1475     if (!self->extra) {
1476         if (create_extra(self, NULL) < 0)
1477             return NULL;
1478     }
1479 
1480     if (index < 0) {
1481         index += self->extra->length;
1482         if (index < 0)
1483             index = 0;
1484     }
1485     if (index > self->extra->length)
1486         index = self->extra->length;
1487 
1488     if (element_resize(self, 1) < 0)
1489         return NULL;
1490 
1491     for (i = self->extra->length; i > index; i--)
1492         self->extra->children[i] = self->extra->children[i-1];
1493 
1494     Py_INCREF(subelement);
1495     self->extra->children[index] = subelement;
1496 
1497     self->extra->length++;
1498 
1499     Py_RETURN_NONE;
1500 }
1501 
1502 /*[clinic input]
1503 _elementtree.Element.items
1504 
1505 [clinic start generated code]*/
1506 
1507 static PyObject *
_elementtree_Element_items_impl(ElementObject * self)1508 _elementtree_Element_items_impl(ElementObject *self)
1509 /*[clinic end generated code: output=6db2c778ce3f5a4d input=adbe09aaea474447]*/
1510 {
1511     if (!self->extra || !self->extra->attrib)
1512         return PyList_New(0);
1513 
1514     return PyDict_Items(self->extra->attrib);
1515 }
1516 
1517 /*[clinic input]
1518 _elementtree.Element.keys
1519 
1520 [clinic start generated code]*/
1521 
1522 static PyObject *
_elementtree_Element_keys_impl(ElementObject * self)1523 _elementtree_Element_keys_impl(ElementObject *self)
1524 /*[clinic end generated code: output=bc5bfabbf20eeb3c input=f02caf5b496b5b0b]*/
1525 {
1526     if (!self->extra || !self->extra->attrib)
1527         return PyList_New(0);
1528 
1529     return PyDict_Keys(self->extra->attrib);
1530 }
1531 
1532 static Py_ssize_t
element_length(ElementObject * self)1533 element_length(ElementObject* self)
1534 {
1535     if (!self->extra)
1536         return 0;
1537 
1538     return self->extra->length;
1539 }
1540 
1541 /*[clinic input]
1542 _elementtree.Element.makeelement
1543 
1544     tag: object
1545     attrib: object(subclass_of='&PyDict_Type')
1546     /
1547 
1548 [clinic start generated code]*/
1549 
1550 static PyObject *
_elementtree_Element_makeelement_impl(ElementObject * self,PyObject * tag,PyObject * attrib)1551 _elementtree_Element_makeelement_impl(ElementObject *self, PyObject *tag,
1552                                       PyObject *attrib)
1553 /*[clinic end generated code: output=4109832d5bb789ef input=2279d974529c3861]*/
1554 {
1555     PyObject* elem;
1556 
1557     attrib = PyDict_Copy(attrib);
1558     if (!attrib)
1559         return NULL;
1560 
1561     elem = create_new_element(tag, attrib);
1562 
1563     Py_DECREF(attrib);
1564 
1565     return elem;
1566 }
1567 
1568 /*[clinic input]
1569 _elementtree.Element.remove
1570 
1571     subelement: object(subclass_of='&Element_Type')
1572     /
1573 
1574 [clinic start generated code]*/
1575 
1576 static PyObject *
_elementtree_Element_remove_impl(ElementObject * self,PyObject * subelement)1577 _elementtree_Element_remove_impl(ElementObject *self, PyObject *subelement)
1578 /*[clinic end generated code: output=38fe6c07d6d87d1f input=d52fc28ededc0bd8]*/
1579 {
1580     Py_ssize_t i;
1581     int rc;
1582     PyObject *found;
1583 
1584     if (!self->extra) {
1585         /* element has no children, so raise exception */
1586         PyErr_SetString(
1587             PyExc_ValueError,
1588             "list.remove(x): x not in list"
1589             );
1590         return NULL;
1591     }
1592 
1593     for (i = 0; i < self->extra->length; i++) {
1594         if (self->extra->children[i] == subelement)
1595             break;
1596         rc = PyObject_RichCompareBool(self->extra->children[i], subelement, Py_EQ);
1597         if (rc > 0)
1598             break;
1599         if (rc < 0)
1600             return NULL;
1601     }
1602 
1603     if (i >= self->extra->length) {
1604         /* subelement is not in children, so raise exception */
1605         PyErr_SetString(
1606             PyExc_ValueError,
1607             "list.remove(x): x not in list"
1608             );
1609         return NULL;
1610     }
1611 
1612     found = self->extra->children[i];
1613 
1614     self->extra->length--;
1615     for (; i < self->extra->length; i++)
1616         self->extra->children[i] = self->extra->children[i+1];
1617 
1618     Py_DECREF(found);
1619     Py_RETURN_NONE;
1620 }
1621 
1622 static PyObject*
element_repr(ElementObject * self)1623 element_repr(ElementObject* self)
1624 {
1625     int status;
1626 
1627     if (self->tag == NULL)
1628         return PyUnicode_FromFormat("<Element at %p>", self);
1629 
1630     status = Py_ReprEnter((PyObject *)self);
1631     if (status == 0) {
1632         PyObject *res;
1633         res = PyUnicode_FromFormat("<Element %R at %p>", self->tag, self);
1634         Py_ReprLeave((PyObject *)self);
1635         return res;
1636     }
1637     if (status > 0)
1638         PyErr_Format(PyExc_RuntimeError,
1639                      "reentrant call inside %s.__repr__",
1640                      Py_TYPE(self)->tp_name);
1641     return NULL;
1642 }
1643 
1644 /*[clinic input]
1645 _elementtree.Element.set
1646 
1647     key: object
1648     value: object
1649     /
1650 
1651 [clinic start generated code]*/
1652 
1653 static PyObject *
_elementtree_Element_set_impl(ElementObject * self,PyObject * key,PyObject * value)1654 _elementtree_Element_set_impl(ElementObject *self, PyObject *key,
1655                               PyObject *value)
1656 /*[clinic end generated code: output=fb938806be3c5656 input=1efe90f7d82b3fe9]*/
1657 {
1658     PyObject* attrib;
1659 
1660     if (!self->extra) {
1661         if (create_extra(self, NULL) < 0)
1662             return NULL;
1663     }
1664 
1665     attrib = element_get_attrib(self);
1666     if (!attrib)
1667         return NULL;
1668 
1669     if (PyDict_SetItem(attrib, key, value) < 0)
1670         return NULL;
1671 
1672     Py_RETURN_NONE;
1673 }
1674 
1675 static int
element_setitem(PyObject * self_,Py_ssize_t index,PyObject * item)1676 element_setitem(PyObject* self_, Py_ssize_t index, PyObject* item)
1677 {
1678     ElementObject* self = (ElementObject*) self_;
1679     Py_ssize_t i;
1680     PyObject* old;
1681 
1682     if (!self->extra || index < 0 || index >= self->extra->length) {
1683         PyErr_SetString(
1684             PyExc_IndexError,
1685             "child assignment index out of range");
1686         return -1;
1687     }
1688 
1689     old = self->extra->children[index];
1690 
1691     if (item) {
1692         if (!Element_Check(item)) {
1693             raise_type_error(item);
1694             return -1;
1695         }
1696         Py_INCREF(item);
1697         self->extra->children[index] = item;
1698     } else {
1699         self->extra->length--;
1700         for (i = index; i < self->extra->length; i++)
1701             self->extra->children[i] = self->extra->children[i+1];
1702     }
1703 
1704     Py_DECREF(old);
1705 
1706     return 0;
1707 }
1708 
1709 static PyObject*
element_subscr(PyObject * self_,PyObject * item)1710 element_subscr(PyObject* self_, PyObject* item)
1711 {
1712     ElementObject* self = (ElementObject*) self_;
1713 
1714     if (PyIndex_Check(item)) {
1715         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1716 
1717         if (i == -1 && PyErr_Occurred()) {
1718             return NULL;
1719         }
1720         if (i < 0 && self->extra)
1721             i += self->extra->length;
1722         return element_getitem(self_, i);
1723     }
1724     else if (PySlice_Check(item)) {
1725         Py_ssize_t start, stop, step, slicelen, i;
1726         size_t cur;
1727         PyObject* list;
1728 
1729         if (!self->extra)
1730             return PyList_New(0);
1731 
1732         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1733             return NULL;
1734         }
1735         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1736                                          step);
1737 
1738         if (slicelen <= 0)
1739             return PyList_New(0);
1740         else {
1741             list = PyList_New(slicelen);
1742             if (!list)
1743                 return NULL;
1744 
1745             for (cur = start, i = 0; i < slicelen;
1746                  cur += step, i++) {
1747                 PyObject* item = self->extra->children[cur];
1748                 Py_INCREF(item);
1749                 PyList_SET_ITEM(list, i, item);
1750             }
1751 
1752             return list;
1753         }
1754     }
1755     else {
1756         PyErr_SetString(PyExc_TypeError,
1757                 "element indices must be integers");
1758         return NULL;
1759     }
1760 }
1761 
1762 static int
element_ass_subscr(PyObject * self_,PyObject * item,PyObject * value)1763 element_ass_subscr(PyObject* self_, PyObject* item, PyObject* value)
1764 {
1765     ElementObject* self = (ElementObject*) self_;
1766 
1767     if (PyIndex_Check(item)) {
1768         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1769 
1770         if (i == -1 && PyErr_Occurred()) {
1771             return -1;
1772         }
1773         if (i < 0 && self->extra)
1774             i += self->extra->length;
1775         return element_setitem(self_, i, value);
1776     }
1777     else if (PySlice_Check(item)) {
1778         Py_ssize_t start, stop, step, slicelen, newlen, i;
1779         size_t cur;
1780 
1781         PyObject* recycle = NULL;
1782         PyObject* seq;
1783 
1784         if (!self->extra) {
1785             if (create_extra(self, NULL) < 0)
1786                 return -1;
1787         }
1788 
1789         if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
1790             return -1;
1791         }
1792         slicelen = PySlice_AdjustIndices(self->extra->length, &start, &stop,
1793                                          step);
1794 
1795         if (value == NULL) {
1796             /* Delete slice */
1797             size_t cur;
1798             Py_ssize_t i;
1799 
1800             if (slicelen <= 0)
1801                 return 0;
1802 
1803             /* Since we're deleting, the direction of the range doesn't matter,
1804              * so for simplicity make it always ascending.
1805             */
1806             if (step < 0) {
1807                 stop = start + 1;
1808                 start = stop + step * (slicelen - 1) - 1;
1809                 step = -step;
1810             }
1811 
1812             assert((size_t)slicelen <= SIZE_MAX / sizeof(PyObject *));
1813 
1814             /* recycle is a list that will contain all the children
1815              * scheduled for removal.
1816             */
1817             if (!(recycle = PyList_New(slicelen))) {
1818                 return -1;
1819             }
1820 
1821             /* This loop walks over all the children that have to be deleted,
1822              * with cur pointing at them. num_moved is the amount of children
1823              * until the next deleted child that have to be "shifted down" to
1824              * occupy the deleted's places.
1825              * Note that in the ith iteration, shifting is done i+i places down
1826              * because i children were already removed.
1827             */
1828             for (cur = start, i = 0; cur < (size_t)stop; cur += step, ++i) {
1829                 /* Compute how many children have to be moved, clipping at the
1830                  * list end.
1831                 */
1832                 Py_ssize_t num_moved = step - 1;
1833                 if (cur + step >= (size_t)self->extra->length) {
1834                     num_moved = self->extra->length - cur - 1;
1835                 }
1836 
1837                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1838 
1839                 memmove(
1840                     self->extra->children + cur - i,
1841                     self->extra->children + cur + 1,
1842                     num_moved * sizeof(PyObject *));
1843             }
1844 
1845             /* Leftover "tail" after the last removed child */
1846             cur = start + (size_t)slicelen * step;
1847             if (cur < (size_t)self->extra->length) {
1848                 memmove(
1849                     self->extra->children + cur - slicelen,
1850                     self->extra->children + cur,
1851                     (self->extra->length - cur) * sizeof(PyObject *));
1852             }
1853 
1854             self->extra->length -= slicelen;
1855 
1856             /* Discard the recycle list with all the deleted sub-elements */
1857             Py_DECREF(recycle);
1858             return 0;
1859         }
1860 
1861         /* A new slice is actually being assigned */
1862         seq = PySequence_Fast(value, "");
1863         if (!seq) {
1864             PyErr_Format(
1865                 PyExc_TypeError,
1866                 "expected sequence, not \"%.200s\"", Py_TYPE(value)->tp_name
1867                 );
1868             return -1;
1869         }
1870         newlen = PySequence_Fast_GET_SIZE(seq);
1871 
1872         if (step !=  1 && newlen != slicelen)
1873         {
1874             Py_DECREF(seq);
1875             PyErr_Format(PyExc_ValueError,
1876                 "attempt to assign sequence of size %zd "
1877                 "to extended slice of size %zd",
1878                 newlen, slicelen
1879                 );
1880             return -1;
1881         }
1882 
1883         /* Resize before creating the recycle bin, to prevent refleaks. */
1884         if (newlen > slicelen) {
1885             if (element_resize(self, newlen - slicelen) < 0) {
1886                 Py_DECREF(seq);
1887                 return -1;
1888             }
1889         }
1890 
1891         for (i = 0; i < newlen; i++) {
1892             PyObject *element = PySequence_Fast_GET_ITEM(seq, i);
1893             if (!Element_Check(element)) {
1894                 raise_type_error(element);
1895                 Py_DECREF(seq);
1896                 return -1;
1897             }
1898         }
1899 
1900         if (slicelen > 0) {
1901             /* to avoid recursive calls to this method (via decref), move
1902                old items to the recycle bin here, and get rid of them when
1903                we're done modifying the element */
1904             recycle = PyList_New(slicelen);
1905             if (!recycle) {
1906                 Py_DECREF(seq);
1907                 return -1;
1908             }
1909             for (cur = start, i = 0; i < slicelen;
1910                  cur += step, i++)
1911                 PyList_SET_ITEM(recycle, i, self->extra->children[cur]);
1912         }
1913 
1914         if (newlen < slicelen) {
1915             /* delete slice */
1916             for (i = stop; i < self->extra->length; i++)
1917                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1918         } else if (newlen > slicelen) {
1919             /* insert slice */
1920             for (i = self->extra->length-1; i >= stop; i--)
1921                 self->extra->children[i + newlen - slicelen] = self->extra->children[i];
1922         }
1923 
1924         /* replace the slice */
1925         for (cur = start, i = 0; i < newlen;
1926              cur += step, i++) {
1927             PyObject* element = PySequence_Fast_GET_ITEM(seq, i);
1928             Py_INCREF(element);
1929             self->extra->children[cur] = element;
1930         }
1931 
1932         self->extra->length += newlen - slicelen;
1933 
1934         Py_DECREF(seq);
1935 
1936         /* discard the recycle bin, and everything in it */
1937         Py_XDECREF(recycle);
1938 
1939         return 0;
1940     }
1941     else {
1942         PyErr_SetString(PyExc_TypeError,
1943                 "element indices must be integers");
1944         return -1;
1945     }
1946 }
1947 
1948 static PyObject*
element_tag_getter(ElementObject * self,void * closure)1949 element_tag_getter(ElementObject *self, void *closure)
1950 {
1951     PyObject *res = self->tag;
1952     Py_INCREF(res);
1953     return res;
1954 }
1955 
1956 static PyObject*
element_text_getter(ElementObject * self,void * closure)1957 element_text_getter(ElementObject *self, void *closure)
1958 {
1959     PyObject *res = element_get_text(self);
1960     Py_XINCREF(res);
1961     return res;
1962 }
1963 
1964 static PyObject*
element_tail_getter(ElementObject * self,void * closure)1965 element_tail_getter(ElementObject *self, void *closure)
1966 {
1967     PyObject *res = element_get_tail(self);
1968     Py_XINCREF(res);
1969     return res;
1970 }
1971 
1972 static PyObject*
element_attrib_getter(ElementObject * self,void * closure)1973 element_attrib_getter(ElementObject *self, void *closure)
1974 {
1975     PyObject *res;
1976     if (!self->extra) {
1977         if (create_extra(self, NULL) < 0)
1978             return NULL;
1979     }
1980     res = element_get_attrib(self);
1981     Py_XINCREF(res);
1982     return res;
1983 }
1984 
1985 /* macro for setter validation */
1986 #define _VALIDATE_ATTR_VALUE(V)                     \
1987     if ((V) == NULL) {                              \
1988         PyErr_SetString(                            \
1989             PyExc_AttributeError,                   \
1990             "can't delete element attribute");      \
1991         return -1;                                  \
1992     }
1993 
1994 static int
element_tag_setter(ElementObject * self,PyObject * value,void * closure)1995 element_tag_setter(ElementObject *self, PyObject *value, void *closure)
1996 {
1997     _VALIDATE_ATTR_VALUE(value);
1998     Py_INCREF(value);
1999     Py_SETREF(self->tag, value);
2000     return 0;
2001 }
2002 
2003 static int
element_text_setter(ElementObject * self,PyObject * value,void * closure)2004 element_text_setter(ElementObject *self, PyObject *value, void *closure)
2005 {
2006     _VALIDATE_ATTR_VALUE(value);
2007     Py_INCREF(value);
2008     _set_joined_ptr(&self->text, value);
2009     return 0;
2010 }
2011 
2012 static int
element_tail_setter(ElementObject * self,PyObject * value,void * closure)2013 element_tail_setter(ElementObject *self, PyObject *value, void *closure)
2014 {
2015     _VALIDATE_ATTR_VALUE(value);
2016     Py_INCREF(value);
2017     _set_joined_ptr(&self->tail, value);
2018     return 0;
2019 }
2020 
2021 static int
element_attrib_setter(ElementObject * self,PyObject * value,void * closure)2022 element_attrib_setter(ElementObject *self, PyObject *value, void *closure)
2023 {
2024     _VALIDATE_ATTR_VALUE(value);
2025     if (!PyDict_Check(value)) {
2026         PyErr_Format(PyExc_TypeError,
2027                      "attrib must be dict, not %.200s",
2028                      Py_TYPE(value)->tp_name);
2029         return -1;
2030     }
2031     if (!self->extra) {
2032         if (create_extra(self, NULL) < 0)
2033             return -1;
2034     }
2035     Py_INCREF(value);
2036     Py_XSETREF(self->extra->attrib, value);
2037     return 0;
2038 }
2039 
2040 static PySequenceMethods element_as_sequence = {
2041     (lenfunc) element_length,
2042     0, /* sq_concat */
2043     0, /* sq_repeat */
2044     element_getitem,
2045     0,
2046     element_setitem,
2047     0,
2048 };
2049 
2050 /******************************* Element iterator ****************************/
2051 
2052 /* ElementIterObject represents the iteration state over an XML element in
2053  * pre-order traversal. To keep track of which sub-element should be returned
2054  * next, a stack of parents is maintained. This is a standard stack-based
2055  * iterative pre-order traversal of a tree.
2056  * The stack is managed using a continuous array.
2057  * Each stack item contains the saved parent to which we should return after
2058  * the current one is exhausted, and the next child to examine in that parent.
2059  */
2060 typedef struct ParentLocator_t {
2061     ElementObject *parent;
2062     Py_ssize_t child_index;
2063 } ParentLocator;
2064 
2065 typedef struct {
2066     PyObject_HEAD
2067     ParentLocator *parent_stack;
2068     Py_ssize_t parent_stack_used;
2069     Py_ssize_t parent_stack_size;
2070     ElementObject *root_element;
2071     PyObject *sought_tag;
2072     int gettext;
2073 } ElementIterObject;
2074 
2075 
2076 static void
elementiter_dealloc(ElementIterObject * it)2077 elementiter_dealloc(ElementIterObject *it)
2078 {
2079     Py_ssize_t i = it->parent_stack_used;
2080     it->parent_stack_used = 0;
2081     /* bpo-31095: UnTrack is needed before calling any callbacks */
2082     PyObject_GC_UnTrack(it);
2083     while (i--)
2084         Py_XDECREF(it->parent_stack[i].parent);
2085     PyMem_Free(it->parent_stack);
2086 
2087     Py_XDECREF(it->sought_tag);
2088     Py_XDECREF(it->root_element);
2089 
2090     PyObject_GC_Del(it);
2091 }
2092 
2093 static int
elementiter_traverse(ElementIterObject * it,visitproc visit,void * arg)2094 elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
2095 {
2096     Py_ssize_t i = it->parent_stack_used;
2097     while (i--)
2098         Py_VISIT(it->parent_stack[i].parent);
2099 
2100     Py_VISIT(it->root_element);
2101     Py_VISIT(it->sought_tag);
2102     return 0;
2103 }
2104 
2105 /* Helper function for elementiter_next. Add a new parent to the parent stack.
2106  */
2107 static int
parent_stack_push_new(ElementIterObject * it,ElementObject * parent)2108 parent_stack_push_new(ElementIterObject *it, ElementObject *parent)
2109 {
2110     ParentLocator *item;
2111 
2112     if (it->parent_stack_used >= it->parent_stack_size) {
2113         Py_ssize_t new_size = it->parent_stack_size * 2;  /* never overflow */
2114         ParentLocator *parent_stack = it->parent_stack;
2115         PyMem_Resize(parent_stack, ParentLocator, new_size);
2116         if (parent_stack == NULL)
2117             return -1;
2118         it->parent_stack = parent_stack;
2119         it->parent_stack_size = new_size;
2120     }
2121     item = it->parent_stack + it->parent_stack_used++;
2122     Py_INCREF(parent);
2123     item->parent = parent;
2124     item->child_index = 0;
2125     return 0;
2126 }
2127 
2128 static PyObject *
elementiter_next(ElementIterObject * it)2129 elementiter_next(ElementIterObject *it)
2130 {
2131     /* Sub-element iterator.
2132      *
2133      * A short note on gettext: this function serves both the iter() and
2134      * itertext() methods to avoid code duplication. However, there are a few
2135      * small differences in the way these iterations work. Namely:
2136      *   - itertext() only yields text from nodes that have it, and continues
2137      *     iterating when a node doesn't have text (so it doesn't return any
2138      *     node like iter())
2139      *   - itertext() also has to handle tail, after finishing with all the
2140      *     children of a node.
2141      */
2142     int rc;
2143     ElementObject *elem;
2144     PyObject *text;
2145 
2146     while (1) {
2147         /* Handle the case reached in the beginning and end of iteration, where
2148          * the parent stack is empty. If root_element is NULL and we're here, the
2149          * iterator is exhausted.
2150          */
2151         if (!it->parent_stack_used) {
2152             if (!it->root_element) {
2153                 PyErr_SetNone(PyExc_StopIteration);
2154                 return NULL;
2155             }
2156 
2157             elem = it->root_element;  /* steals a reference */
2158             it->root_element = NULL;
2159         }
2160         else {
2161             /* See if there are children left to traverse in the current parent. If
2162              * yes, visit the next child. If not, pop the stack and try again.
2163              */
2164             ParentLocator *item = &it->parent_stack[it->parent_stack_used - 1];
2165             Py_ssize_t child_index = item->child_index;
2166             ElementObjectExtra *extra;
2167             elem = item->parent;
2168             extra = elem->extra;
2169             if (!extra || child_index >= extra->length) {
2170                 it->parent_stack_used--;
2171                 /* Note that extra condition on it->parent_stack_used here;
2172                  * this is because itertext() is supposed to only return *inner*
2173                  * text, not text following the element it began iteration with.
2174                  */
2175                 if (it->gettext && it->parent_stack_used) {
2176                     text = element_get_tail(elem);
2177                     goto gettext;
2178                 }
2179                 Py_DECREF(elem);
2180                 continue;
2181             }
2182 
2183             assert(Element_Check(extra->children[child_index]));
2184             elem = (ElementObject *)extra->children[child_index];
2185             item->child_index++;
2186             Py_INCREF(elem);
2187         }
2188 
2189         if (parent_stack_push_new(it, elem) < 0) {
2190             Py_DECREF(elem);
2191             PyErr_NoMemory();
2192             return NULL;
2193         }
2194         if (it->gettext) {
2195             text = element_get_text(elem);
2196             goto gettext;
2197         }
2198 
2199         if (it->sought_tag == Py_None)
2200             return (PyObject *)elem;
2201 
2202         rc = PyObject_RichCompareBool(elem->tag, it->sought_tag, Py_EQ);
2203         if (rc > 0)
2204             return (PyObject *)elem;
2205 
2206         Py_DECREF(elem);
2207         if (rc < 0)
2208             return NULL;
2209         continue;
2210 
2211 gettext:
2212         if (!text) {
2213             Py_DECREF(elem);
2214             return NULL;
2215         }
2216         if (text == Py_None) {
2217             Py_DECREF(elem);
2218         }
2219         else {
2220             Py_INCREF(text);
2221             Py_DECREF(elem);
2222             rc = PyObject_IsTrue(text);
2223             if (rc > 0)
2224                 return text;
2225             Py_DECREF(text);
2226             if (rc < 0)
2227                 return NULL;
2228         }
2229     }
2230 
2231     return NULL;
2232 }
2233 
2234 
2235 static PyTypeObject ElementIter_Type = {
2236     PyVarObject_HEAD_INIT(NULL, 0)
2237     /* Using the module's name since the pure-Python implementation does not
2238        have such a type. */
2239     "_elementtree._element_iterator",           /* tp_name */
2240     sizeof(ElementIterObject),                  /* tp_basicsize */
2241     0,                                          /* tp_itemsize */
2242     /* methods */
2243     (destructor)elementiter_dealloc,            /* tp_dealloc */
2244     0,                                          /* tp_vectorcall_offset */
2245     0,                                          /* tp_getattr */
2246     0,                                          /* tp_setattr */
2247     0,                                          /* tp_as_async */
2248     0,                                          /* tp_repr */
2249     0,                                          /* tp_as_number */
2250     0,                                          /* tp_as_sequence */
2251     0,                                          /* tp_as_mapping */
2252     0,                                          /* tp_hash */
2253     0,                                          /* tp_call */
2254     0,                                          /* tp_str */
2255     0,                                          /* tp_getattro */
2256     0,                                          /* tp_setattro */
2257     0,                                          /* tp_as_buffer */
2258     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
2259     0,                                          /* tp_doc */
2260     (traverseproc)elementiter_traverse,         /* tp_traverse */
2261     0,                                          /* tp_clear */
2262     0,                                          /* tp_richcompare */
2263     0,                                          /* tp_weaklistoffset */
2264     PyObject_SelfIter,                          /* tp_iter */
2265     (iternextfunc)elementiter_next,             /* tp_iternext */
2266     0,                                          /* tp_methods */
2267     0,                                          /* tp_members */
2268     0,                                          /* tp_getset */
2269     0,                                          /* tp_base */
2270     0,                                          /* tp_dict */
2271     0,                                          /* tp_descr_get */
2272     0,                                          /* tp_descr_set */
2273     0,                                          /* tp_dictoffset */
2274     0,                                          /* tp_init */
2275     0,                                          /* tp_alloc */
2276     0,                                          /* tp_new */
2277 };
2278 
2279 #define INIT_PARENT_STACK_SIZE 8
2280 
2281 static PyObject *
create_elementiter(ElementObject * self,PyObject * tag,int gettext)2282 create_elementiter(ElementObject *self, PyObject *tag, int gettext)
2283 {
2284     ElementIterObject *it;
2285 
2286     it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
2287     if (!it)
2288         return NULL;
2289 
2290     Py_INCREF(tag);
2291     it->sought_tag = tag;
2292     it->gettext = gettext;
2293     Py_INCREF(self);
2294     it->root_element = self;
2295 
2296     it->parent_stack = PyMem_New(ParentLocator, INIT_PARENT_STACK_SIZE);
2297     if (it->parent_stack == NULL) {
2298         Py_DECREF(it);
2299         PyErr_NoMemory();
2300         return NULL;
2301     }
2302     it->parent_stack_used = 0;
2303     it->parent_stack_size = INIT_PARENT_STACK_SIZE;
2304 
2305     PyObject_GC_Track(it);
2306 
2307     return (PyObject *)it;
2308 }
2309 
2310 
2311 /* ==================================================================== */
2312 /* the tree builder type */
2313 
2314 typedef struct {
2315     PyObject_HEAD
2316 
2317     PyObject *root; /* root node (first created node) */
2318 
2319     PyObject *this; /* current node */
2320     PyObject *last; /* most recently created node */
2321     PyObject *last_for_tail; /* most recently created node that takes a tail */
2322 
2323     PyObject *data; /* data collector (string or list), or NULL */
2324 
2325     PyObject *stack; /* element stack */
2326     Py_ssize_t index; /* current stack size (0 means empty) */
2327 
2328     PyObject *element_factory;
2329     PyObject *comment_factory;
2330     PyObject *pi_factory;
2331 
2332     /* element tracing */
2333     PyObject *events_append; /* the append method of the list of events, or NULL */
2334     PyObject *start_event_obj; /* event objects (NULL to ignore) */
2335     PyObject *end_event_obj;
2336     PyObject *start_ns_event_obj;
2337     PyObject *end_ns_event_obj;
2338     PyObject *comment_event_obj;
2339     PyObject *pi_event_obj;
2340 
2341     char insert_comments;
2342     char insert_pis;
2343 } TreeBuilderObject;
2344 
2345 #define TreeBuilder_CheckExact(op) Py_IS_TYPE((op), &TreeBuilder_Type)
2346 
2347 /* -------------------------------------------------------------------- */
2348 /* constructor and destructor */
2349 
2350 static PyObject *
treebuilder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)2351 treebuilder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2352 {
2353     TreeBuilderObject *t = (TreeBuilderObject *)type->tp_alloc(type, 0);
2354     if (t != NULL) {
2355         t->root = NULL;
2356 
2357         Py_INCREF(Py_None);
2358         t->this = Py_None;
2359         Py_INCREF(Py_None);
2360         t->last = Py_None;
2361 
2362         t->data = NULL;
2363         t->element_factory = NULL;
2364         t->comment_factory = NULL;
2365         t->pi_factory = NULL;
2366         t->stack = PyList_New(20);
2367         if (!t->stack) {
2368             Py_DECREF(t->this);
2369             Py_DECREF(t->last);
2370             Py_DECREF((PyObject *) t);
2371             return NULL;
2372         }
2373         t->index = 0;
2374 
2375         t->events_append = NULL;
2376         t->start_event_obj = t->end_event_obj = NULL;
2377         t->start_ns_event_obj = t->end_ns_event_obj = NULL;
2378         t->comment_event_obj = t->pi_event_obj = NULL;
2379         t->insert_comments = t->insert_pis = 0;
2380     }
2381     return (PyObject *)t;
2382 }
2383 
2384 /*[clinic input]
2385 _elementtree.TreeBuilder.__init__
2386 
2387     element_factory: object = None
2388     *
2389     comment_factory: object = None
2390     pi_factory: object = None
2391     insert_comments: bool = False
2392     insert_pis: bool = False
2393 
2394 [clinic start generated code]*/
2395 
2396 static int
_elementtree_TreeBuilder___init___impl(TreeBuilderObject * self,PyObject * element_factory,PyObject * comment_factory,PyObject * pi_factory,int insert_comments,int insert_pis)2397 _elementtree_TreeBuilder___init___impl(TreeBuilderObject *self,
2398                                        PyObject *element_factory,
2399                                        PyObject *comment_factory,
2400                                        PyObject *pi_factory,
2401                                        int insert_comments, int insert_pis)
2402 /*[clinic end generated code: output=8571d4dcadfdf952 input=ae98a94df20b5cc3]*/
2403 {
2404     if (element_factory != Py_None) {
2405         Py_INCREF(element_factory);
2406         Py_XSETREF(self->element_factory, element_factory);
2407     } else {
2408         Py_CLEAR(self->element_factory);
2409     }
2410 
2411     if (comment_factory == Py_None) {
2412         elementtreestate *st = ET_STATE_GLOBAL;
2413         comment_factory = st->comment_factory;
2414     }
2415     if (comment_factory) {
2416         Py_INCREF(comment_factory);
2417         Py_XSETREF(self->comment_factory, comment_factory);
2418         self->insert_comments = insert_comments;
2419     } else {
2420         Py_CLEAR(self->comment_factory);
2421         self->insert_comments = 0;
2422     }
2423 
2424     if (pi_factory == Py_None) {
2425         elementtreestate *st = ET_STATE_GLOBAL;
2426         pi_factory = st->pi_factory;
2427     }
2428     if (pi_factory) {
2429         Py_INCREF(pi_factory);
2430         Py_XSETREF(self->pi_factory, pi_factory);
2431         self->insert_pis = insert_pis;
2432     } else {
2433         Py_CLEAR(self->pi_factory);
2434         self->insert_pis = 0;
2435     }
2436 
2437     return 0;
2438 }
2439 
2440 static int
treebuilder_gc_traverse(TreeBuilderObject * self,visitproc visit,void * arg)2441 treebuilder_gc_traverse(TreeBuilderObject *self, visitproc visit, void *arg)
2442 {
2443     Py_VISIT(self->pi_event_obj);
2444     Py_VISIT(self->comment_event_obj);
2445     Py_VISIT(self->end_ns_event_obj);
2446     Py_VISIT(self->start_ns_event_obj);
2447     Py_VISIT(self->end_event_obj);
2448     Py_VISIT(self->start_event_obj);
2449     Py_VISIT(self->events_append);
2450     Py_VISIT(self->root);
2451     Py_VISIT(self->this);
2452     Py_VISIT(self->last);
2453     Py_VISIT(self->last_for_tail);
2454     Py_VISIT(self->data);
2455     Py_VISIT(self->stack);
2456     Py_VISIT(self->pi_factory);
2457     Py_VISIT(self->comment_factory);
2458     Py_VISIT(self->element_factory);
2459     return 0;
2460 }
2461 
2462 static int
treebuilder_gc_clear(TreeBuilderObject * self)2463 treebuilder_gc_clear(TreeBuilderObject *self)
2464 {
2465     Py_CLEAR(self->pi_event_obj);
2466     Py_CLEAR(self->comment_event_obj);
2467     Py_CLEAR(self->end_ns_event_obj);
2468     Py_CLEAR(self->start_ns_event_obj);
2469     Py_CLEAR(self->end_event_obj);
2470     Py_CLEAR(self->start_event_obj);
2471     Py_CLEAR(self->events_append);
2472     Py_CLEAR(self->stack);
2473     Py_CLEAR(self->data);
2474     Py_CLEAR(self->last);
2475     Py_CLEAR(self->last_for_tail);
2476     Py_CLEAR(self->this);
2477     Py_CLEAR(self->pi_factory);
2478     Py_CLEAR(self->comment_factory);
2479     Py_CLEAR(self->element_factory);
2480     Py_CLEAR(self->root);
2481     return 0;
2482 }
2483 
2484 static void
treebuilder_dealloc(TreeBuilderObject * self)2485 treebuilder_dealloc(TreeBuilderObject *self)
2486 {
2487     PyObject_GC_UnTrack(self);
2488     treebuilder_gc_clear(self);
2489     Py_TYPE(self)->tp_free((PyObject *)self);
2490 }
2491 
2492 /* -------------------------------------------------------------------- */
2493 /* helpers for handling of arbitrary element-like objects */
2494 
2495 /*[clinic input]
2496 _elementtree._set_factories
2497 
2498     comment_factory: object
2499     pi_factory: object
2500     /
2501 
2502 Change the factories used to create comments and processing instructions.
2503 
2504 For internal use only.
2505 [clinic start generated code]*/
2506 
2507 static PyObject *
_elementtree__set_factories_impl(PyObject * module,PyObject * comment_factory,PyObject * pi_factory)2508 _elementtree__set_factories_impl(PyObject *module, PyObject *comment_factory,
2509                                  PyObject *pi_factory)
2510 /*[clinic end generated code: output=813b408adee26535 input=99d17627aea7fb3b]*/
2511 {
2512     elementtreestate *st = ET_STATE_GLOBAL;
2513     PyObject *old;
2514 
2515     if (!PyCallable_Check(comment_factory) && comment_factory != Py_None) {
2516         PyErr_Format(PyExc_TypeError, "Comment factory must be callable, not %.100s",
2517                      Py_TYPE(comment_factory)->tp_name);
2518         return NULL;
2519     }
2520     if (!PyCallable_Check(pi_factory) && pi_factory != Py_None) {
2521         PyErr_Format(PyExc_TypeError, "PI factory must be callable, not %.100s",
2522                      Py_TYPE(pi_factory)->tp_name);
2523         return NULL;
2524     }
2525 
2526     old = PyTuple_Pack(2,
2527         st->comment_factory ? st->comment_factory : Py_None,
2528         st->pi_factory ? st->pi_factory : Py_None);
2529 
2530     if (comment_factory == Py_None) {
2531         Py_CLEAR(st->comment_factory);
2532     } else {
2533         Py_INCREF(comment_factory);
2534         Py_XSETREF(st->comment_factory, comment_factory);
2535     }
2536     if (pi_factory == Py_None) {
2537         Py_CLEAR(st->pi_factory);
2538     } else {
2539         Py_INCREF(pi_factory);
2540         Py_XSETREF(st->pi_factory, pi_factory);
2541     }
2542 
2543     return old;
2544 }
2545 
2546 static int
treebuilder_extend_element_text_or_tail(PyObject * element,PyObject ** data,PyObject ** dest,_Py_Identifier * name)2547 treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
2548                                         PyObject **dest, _Py_Identifier *name)
2549 {
2550     /* Fast paths for the "almost always" cases. */
2551     if (Element_CheckExact(element)) {
2552         PyObject *dest_obj = JOIN_OBJ(*dest);
2553         if (dest_obj == Py_None) {
2554             *dest = JOIN_SET(*data, PyList_CheckExact(*data));
2555             *data = NULL;
2556             Py_DECREF(dest_obj);
2557             return 0;
2558         }
2559         else if (JOIN_GET(*dest)) {
2560             if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
2561                 return -1;
2562             }
2563             Py_CLEAR(*data);
2564             return 0;
2565         }
2566     }
2567 
2568     /*  Fallback for the non-Element / non-trivial cases. */
2569     {
2570         int r;
2571         PyObject* joined;
2572         PyObject* previous = _PyObject_GetAttrId(element, name);
2573         if (!previous)
2574             return -1;
2575         joined = list_join(*data);
2576         if (!joined) {
2577             Py_DECREF(previous);
2578             return -1;
2579         }
2580         if (previous != Py_None) {
2581             PyObject *tmp = PyNumber_Add(previous, joined);
2582             Py_DECREF(joined);
2583             Py_DECREF(previous);
2584             if (!tmp)
2585                 return -1;
2586             joined = tmp;
2587         } else {
2588             Py_DECREF(previous);
2589         }
2590 
2591         r = _PyObject_SetAttrId(element, name, joined);
2592         Py_DECREF(joined);
2593         if (r < 0)
2594             return -1;
2595         Py_CLEAR(*data);
2596         return 0;
2597     }
2598 }
2599 
2600 LOCAL(int)
treebuilder_flush_data(TreeBuilderObject * self)2601 treebuilder_flush_data(TreeBuilderObject* self)
2602 {
2603     if (!self->data) {
2604         return 0;
2605     }
2606 
2607     if (!self->last_for_tail) {
2608         PyObject *element = self->last;
2609         _Py_IDENTIFIER(text);
2610         return treebuilder_extend_element_text_or_tail(
2611                 element, &self->data,
2612                 &((ElementObject *) element)->text, &PyId_text);
2613     }
2614     else {
2615         PyObject *element = self->last_for_tail;
2616         _Py_IDENTIFIER(tail);
2617         return treebuilder_extend_element_text_or_tail(
2618                 element, &self->data,
2619                 &((ElementObject *) element)->tail, &PyId_tail);
2620     }
2621 }
2622 
2623 static int
treebuilder_add_subelement(PyObject * element,PyObject * child)2624 treebuilder_add_subelement(PyObject *element, PyObject *child)
2625 {
2626     _Py_IDENTIFIER(append);
2627     if (Element_CheckExact(element)) {
2628         ElementObject *elem = (ElementObject *) element;
2629         return element_add_subelement(elem, child);
2630     }
2631     else {
2632         PyObject *res;
2633         res = _PyObject_CallMethodIdOneArg(element, &PyId_append, child);
2634         if (res == NULL)
2635             return -1;
2636         Py_DECREF(res);
2637         return 0;
2638     }
2639 }
2640 
2641 LOCAL(int)
treebuilder_append_event(TreeBuilderObject * self,PyObject * action,PyObject * node)2642 treebuilder_append_event(TreeBuilderObject *self, PyObject *action,
2643                          PyObject *node)
2644 {
2645     if (action != NULL) {
2646         PyObject *res;
2647         PyObject *event = PyTuple_Pack(2, action, node);
2648         if (event == NULL)
2649             return -1;
2650         res = PyObject_CallOneArg(self->events_append, event);
2651         Py_DECREF(event);
2652         if (res == NULL)
2653             return -1;
2654         Py_DECREF(res);
2655     }
2656     return 0;
2657 }
2658 
2659 /* -------------------------------------------------------------------- */
2660 /* handlers */
2661 
2662 LOCAL(PyObject*)
treebuilder_handle_start(TreeBuilderObject * self,PyObject * tag,PyObject * attrib)2663 treebuilder_handle_start(TreeBuilderObject* self, PyObject* tag,
2664                          PyObject* attrib)
2665 {
2666     PyObject* node;
2667     PyObject* this;
2668     elementtreestate *st = ET_STATE_GLOBAL;
2669 
2670     if (treebuilder_flush_data(self) < 0) {
2671         return NULL;
2672     }
2673 
2674     if (!self->element_factory) {
2675         node = create_new_element(tag, attrib);
2676     } else if (attrib == NULL) {
2677         attrib = PyDict_New();
2678         if (!attrib)
2679             return NULL;
2680         node = PyObject_CallFunctionObjArgs(self->element_factory,
2681                                             tag, attrib, NULL);
2682         Py_DECREF(attrib);
2683     }
2684     else {
2685         node = PyObject_CallFunctionObjArgs(self->element_factory,
2686                                             tag, attrib, NULL);
2687     }
2688     if (!node) {
2689         return NULL;
2690     }
2691 
2692     this = self->this;
2693     Py_CLEAR(self->last_for_tail);
2694 
2695     if (this != Py_None) {
2696         if (treebuilder_add_subelement(this, node) < 0)
2697             goto error;
2698     } else {
2699         if (self->root) {
2700             PyErr_SetString(
2701                 st->parseerror_obj,
2702                 "multiple elements on top level"
2703                 );
2704             goto error;
2705         }
2706         Py_INCREF(node);
2707         self->root = node;
2708     }
2709 
2710     if (self->index < PyList_GET_SIZE(self->stack)) {
2711         if (PyList_SetItem(self->stack, self->index, this) < 0)
2712             goto error;
2713         Py_INCREF(this);
2714     } else {
2715         if (PyList_Append(self->stack, this) < 0)
2716             goto error;
2717     }
2718     self->index++;
2719 
2720     Py_INCREF(node);
2721     Py_SETREF(self->this, node);
2722     Py_INCREF(node);
2723     Py_SETREF(self->last, node);
2724 
2725     if (treebuilder_append_event(self, self->start_event_obj, node) < 0)
2726         goto error;
2727 
2728     return node;
2729 
2730   error:
2731     Py_DECREF(node);
2732     return NULL;
2733 }
2734 
2735 LOCAL(PyObject*)
treebuilder_handle_data(TreeBuilderObject * self,PyObject * data)2736 treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
2737 {
2738     if (!self->data) {
2739         if (self->last == Py_None) {
2740             /* ignore calls to data before the first call to start */
2741             Py_RETURN_NONE;
2742         }
2743         /* store the first item as is */
2744         Py_INCREF(data); self->data = data;
2745     } else {
2746         /* more than one item; use a list to collect items */
2747         if (PyBytes_CheckExact(self->data) && Py_REFCNT(self->data) == 1 &&
2748             PyBytes_CheckExact(data) && PyBytes_GET_SIZE(data) == 1) {
2749             /* XXX this code path unused in Python 3? */
2750             /* expat often generates single character data sections; handle
2751                the most common case by resizing the existing string... */
2752             Py_ssize_t size = PyBytes_GET_SIZE(self->data);
2753             if (_PyBytes_Resize(&self->data, size + 1) < 0)
2754                 return NULL;
2755             PyBytes_AS_STRING(self->data)[size] = PyBytes_AS_STRING(data)[0];
2756         } else if (PyList_CheckExact(self->data)) {
2757             if (PyList_Append(self->data, data) < 0)
2758                 return NULL;
2759         } else {
2760             PyObject* list = PyList_New(2);
2761             if (!list)
2762                 return NULL;
2763             PyList_SET_ITEM(list, 0, self->data);
2764             Py_INCREF(data); PyList_SET_ITEM(list, 1, data);
2765             self->data = list;
2766         }
2767     }
2768 
2769     Py_RETURN_NONE;
2770 }
2771 
2772 LOCAL(PyObject*)
treebuilder_handle_end(TreeBuilderObject * self,PyObject * tag)2773 treebuilder_handle_end(TreeBuilderObject* self, PyObject* tag)
2774 {
2775     PyObject* item;
2776 
2777     if (treebuilder_flush_data(self) < 0) {
2778         return NULL;
2779     }
2780 
2781     if (self->index == 0) {
2782         PyErr_SetString(
2783             PyExc_IndexError,
2784             "pop from empty stack"
2785             );
2786         return NULL;
2787     }
2788 
2789     item = self->last;
2790     self->last = self->this;
2791     Py_INCREF(self->last);
2792     Py_XSETREF(self->last_for_tail, self->last);
2793     self->index--;
2794     self->this = PyList_GET_ITEM(self->stack, self->index);
2795     Py_INCREF(self->this);
2796     Py_DECREF(item);
2797 
2798     if (treebuilder_append_event(self, self->end_event_obj, self->last) < 0)
2799         return NULL;
2800 
2801     Py_INCREF(self->last);
2802     return (PyObject*) self->last;
2803 }
2804 
2805 LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject * self,PyObject * text)2806 treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
2807 {
2808     PyObject* comment;
2809     PyObject* this;
2810 
2811     if (treebuilder_flush_data(self) < 0) {
2812         return NULL;
2813     }
2814 
2815     if (self->comment_factory) {
2816         comment = PyObject_CallOneArg(self->comment_factory, text);
2817         if (!comment)
2818             return NULL;
2819 
2820         this = self->this;
2821         if (self->insert_comments && this != Py_None) {
2822             if (treebuilder_add_subelement(this, comment) < 0)
2823                 goto error;
2824             Py_INCREF(comment);
2825             Py_XSETREF(self->last_for_tail, comment);
2826         }
2827     } else {
2828         Py_INCREF(text);
2829         comment = text;
2830     }
2831 
2832     if (self->events_append && self->comment_event_obj) {
2833         if (treebuilder_append_event(self, self->comment_event_obj, comment) < 0)
2834             goto error;
2835     }
2836 
2837     return comment;
2838 
2839   error:
2840     Py_DECREF(comment);
2841     return NULL;
2842 }
2843 
2844 LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject * self,PyObject * target,PyObject * text)2845 treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
2846 {
2847     PyObject* pi;
2848     PyObject* this;
2849     PyObject* stack[2] = {target, text};
2850 
2851     if (treebuilder_flush_data(self) < 0) {
2852         return NULL;
2853     }
2854 
2855     if (self->pi_factory) {
2856         pi = _PyObject_FastCall(self->pi_factory, stack, 2);
2857         if (!pi) {
2858             return NULL;
2859         }
2860 
2861         this = self->this;
2862         if (self->insert_pis && this != Py_None) {
2863             if (treebuilder_add_subelement(this, pi) < 0)
2864                 goto error;
2865             Py_INCREF(pi);
2866             Py_XSETREF(self->last_for_tail, pi);
2867         }
2868     } else {
2869         pi = PyTuple_Pack(2, target, text);
2870         if (!pi) {
2871             return NULL;
2872         }
2873     }
2874 
2875     if (self->events_append && self->pi_event_obj) {
2876         if (treebuilder_append_event(self, self->pi_event_obj, pi) < 0)
2877             goto error;
2878     }
2879 
2880     return pi;
2881 
2882   error:
2883     Py_DECREF(pi);
2884     return NULL;
2885 }
2886 
2887 LOCAL(PyObject*)
treebuilder_handle_start_ns(TreeBuilderObject * self,PyObject * prefix,PyObject * uri)2888 treebuilder_handle_start_ns(TreeBuilderObject* self, PyObject* prefix, PyObject* uri)
2889 {
2890     PyObject* parcel;
2891 
2892     if (self->events_append && self->start_ns_event_obj) {
2893         parcel = PyTuple_Pack(2, prefix, uri);
2894         if (!parcel) {
2895             return NULL;
2896         }
2897 
2898         if (treebuilder_append_event(self, self->start_ns_event_obj, parcel) < 0) {
2899             Py_DECREF(parcel);
2900             return NULL;
2901         }
2902         Py_DECREF(parcel);
2903     }
2904 
2905     Py_RETURN_NONE;
2906 }
2907 
2908 LOCAL(PyObject*)
treebuilder_handle_end_ns(TreeBuilderObject * self,PyObject * prefix)2909 treebuilder_handle_end_ns(TreeBuilderObject* self, PyObject* prefix)
2910 {
2911     if (self->events_append && self->end_ns_event_obj) {
2912         if (treebuilder_append_event(self, self->end_ns_event_obj, prefix) < 0) {
2913             return NULL;
2914         }
2915     }
2916 
2917     Py_RETURN_NONE;
2918 }
2919 
2920 /* -------------------------------------------------------------------- */
2921 /* methods (in alphabetical order) */
2922 
2923 /*[clinic input]
2924 _elementtree.TreeBuilder.data
2925 
2926     data: object
2927     /
2928 
2929 [clinic start generated code]*/
2930 
2931 static PyObject *
_elementtree_TreeBuilder_data(TreeBuilderObject * self,PyObject * data)2932 _elementtree_TreeBuilder_data(TreeBuilderObject *self, PyObject *data)
2933 /*[clinic end generated code: output=69144c7100795bb2 input=a0540c532b284d29]*/
2934 {
2935     return treebuilder_handle_data(self, data);
2936 }
2937 
2938 /*[clinic input]
2939 _elementtree.TreeBuilder.end
2940 
2941     tag: object
2942     /
2943 
2944 [clinic start generated code]*/
2945 
2946 static PyObject *
_elementtree_TreeBuilder_end(TreeBuilderObject * self,PyObject * tag)2947 _elementtree_TreeBuilder_end(TreeBuilderObject *self, PyObject *tag)
2948 /*[clinic end generated code: output=9a98727cc691cd9d input=22dc3674236f5745]*/
2949 {
2950     return treebuilder_handle_end(self, tag);
2951 }
2952 
2953 /*[clinic input]
2954 _elementtree.TreeBuilder.comment
2955 
2956     text: object
2957     /
2958 
2959 [clinic start generated code]*/
2960 
2961 static PyObject *
_elementtree_TreeBuilder_comment(TreeBuilderObject * self,PyObject * text)2962 _elementtree_TreeBuilder_comment(TreeBuilderObject *self, PyObject *text)
2963 /*[clinic end generated code: output=22835be41deeaa27 input=47e7ebc48ed01dfa]*/
2964 {
2965     return treebuilder_handle_comment(self, text);
2966 }
2967 
2968 /*[clinic input]
2969 _elementtree.TreeBuilder.pi
2970 
2971     target: object
2972     text: object = None
2973     /
2974 
2975 [clinic start generated code]*/
2976 
2977 static PyObject *
_elementtree_TreeBuilder_pi_impl(TreeBuilderObject * self,PyObject * target,PyObject * text)2978 _elementtree_TreeBuilder_pi_impl(TreeBuilderObject *self, PyObject *target,
2979                                  PyObject *text)
2980 /*[clinic end generated code: output=21eb95ec9d04d1d9 input=349342bd79c35570]*/
2981 {
2982     return treebuilder_handle_pi(self, target, text);
2983 }
2984 
2985 LOCAL(PyObject*)
treebuilder_done(TreeBuilderObject * self)2986 treebuilder_done(TreeBuilderObject* self)
2987 {
2988     PyObject* res;
2989 
2990     /* FIXME: check stack size? */
2991 
2992     if (self->root)
2993         res = self->root;
2994     else
2995         res = Py_None;
2996 
2997     Py_INCREF(res);
2998     return res;
2999 }
3000 
3001 /*[clinic input]
3002 _elementtree.TreeBuilder.close
3003 
3004 [clinic start generated code]*/
3005 
3006 static PyObject *
_elementtree_TreeBuilder_close_impl(TreeBuilderObject * self)3007 _elementtree_TreeBuilder_close_impl(TreeBuilderObject *self)
3008 /*[clinic end generated code: output=b441fee3202f61ee input=f7c9c65dc718de14]*/
3009 {
3010     return treebuilder_done(self);
3011 }
3012 
3013 /*[clinic input]
3014 _elementtree.TreeBuilder.start
3015 
3016     tag: object
3017     attrs: object(subclass_of='&PyDict_Type')
3018     /
3019 
3020 [clinic start generated code]*/
3021 
3022 static PyObject *
_elementtree_TreeBuilder_start_impl(TreeBuilderObject * self,PyObject * tag,PyObject * attrs)3023 _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag,
3024                                     PyObject *attrs)
3025 /*[clinic end generated code: output=e7e9dc2861349411 input=7288e9e38e63b2b6]*/
3026 {
3027     return treebuilder_handle_start(self, tag, attrs);
3028 }
3029 
3030 /* ==================================================================== */
3031 /* the expat interface */
3032 
3033 #include "expat.h"
3034 #include "pyexpat.h"
3035 
3036 /* The PyExpat_CAPI structure is an immutable dispatch table, so it can be
3037  * cached globally without being in per-module state.
3038  */
3039 static struct PyExpat_CAPI *expat_capi;
3040 #define EXPAT(func) (expat_capi->func)
3041 
3042 static XML_Memory_Handling_Suite ExpatMemoryHandler = {
3043     PyObject_Malloc, PyObject_Realloc, PyObject_Free};
3044 
3045 typedef struct {
3046     PyObject_HEAD
3047 
3048     XML_Parser parser;
3049 
3050     PyObject *target;
3051     PyObject *entity;
3052 
3053     PyObject *names;
3054 
3055     PyObject *handle_start_ns;
3056     PyObject *handle_end_ns;
3057     PyObject *handle_start;
3058     PyObject *handle_data;
3059     PyObject *handle_end;
3060 
3061     PyObject *handle_comment;
3062     PyObject *handle_pi;
3063     PyObject *handle_doctype;
3064 
3065     PyObject *handle_close;
3066 
3067 } XMLParserObject;
3068 
3069 /* helpers */
3070 
3071 LOCAL(PyObject*)
makeuniversal(XMLParserObject * self,const char * string)3072 makeuniversal(XMLParserObject* self, const char* string)
3073 {
3074     /* convert a UTF-8 tag/attribute name from the expat parser
3075        to a universal name string */
3076 
3077     Py_ssize_t size = (Py_ssize_t) strlen(string);
3078     PyObject* key;
3079     PyObject* value;
3080 
3081     /* look the 'raw' name up in the names dictionary */
3082     key = PyBytes_FromStringAndSize(string, size);
3083     if (!key)
3084         return NULL;
3085 
3086     value = PyDict_GetItemWithError(self->names, key);
3087 
3088     if (value) {
3089         Py_INCREF(value);
3090     }
3091     else if (!PyErr_Occurred()) {
3092         /* new name.  convert to universal name, and decode as
3093            necessary */
3094 
3095         PyObject* tag;
3096         char* p;
3097         Py_ssize_t i;
3098 
3099         /* look for namespace separator */
3100         for (i = 0; i < size; i++)
3101             if (string[i] == '}')
3102                 break;
3103         if (i != size) {
3104             /* convert to universal name */
3105             tag = PyBytes_FromStringAndSize(NULL, size+1);
3106             if (tag == NULL) {
3107                 Py_DECREF(key);
3108                 return NULL;
3109             }
3110             p = PyBytes_AS_STRING(tag);
3111             p[0] = '{';
3112             memcpy(p+1, string, size);
3113             size++;
3114         } else {
3115             /* plain name; use key as tag */
3116             Py_INCREF(key);
3117             tag = key;
3118         }
3119 
3120         /* decode universal name */
3121         p = PyBytes_AS_STRING(tag);
3122         value = PyUnicode_DecodeUTF8(p, size, "strict");
3123         Py_DECREF(tag);
3124         if (!value) {
3125             Py_DECREF(key);
3126             return NULL;
3127         }
3128 
3129         /* add to names dictionary */
3130         if (PyDict_SetItem(self->names, key, value) < 0) {
3131             Py_DECREF(key);
3132             Py_DECREF(value);
3133             return NULL;
3134         }
3135     }
3136 
3137     Py_DECREF(key);
3138     return value;
3139 }
3140 
3141 /* Set the ParseError exception with the given parameters.
3142  * If message is not NULL, it's used as the error string. Otherwise, the
3143  * message string is the default for the given error_code.
3144 */
3145 static void
expat_set_error(enum XML_Error error_code,Py_ssize_t line,Py_ssize_t column,const char * message)3146 expat_set_error(enum XML_Error error_code, Py_ssize_t line, Py_ssize_t column,
3147                 const char *message)
3148 {
3149     PyObject *errmsg, *error, *position, *code;
3150     elementtreestate *st = ET_STATE_GLOBAL;
3151 
3152     errmsg = PyUnicode_FromFormat("%s: line %zd, column %zd",
3153                 message ? message : EXPAT(ErrorString)(error_code),
3154                 line, column);
3155     if (errmsg == NULL)
3156         return;
3157 
3158     error = PyObject_CallOneArg(st->parseerror_obj, errmsg);
3159     Py_DECREF(errmsg);
3160     if (!error)
3161         return;
3162 
3163     /* Add code and position attributes */
3164     code = PyLong_FromLong((long)error_code);
3165     if (!code) {
3166         Py_DECREF(error);
3167         return;
3168     }
3169     if (PyObject_SetAttrString(error, "code", code) == -1) {
3170         Py_DECREF(error);
3171         Py_DECREF(code);
3172         return;
3173     }
3174     Py_DECREF(code);
3175 
3176     position = Py_BuildValue("(nn)", line, column);
3177     if (!position) {
3178         Py_DECREF(error);
3179         return;
3180     }
3181     if (PyObject_SetAttrString(error, "position", position) == -1) {
3182         Py_DECREF(error);
3183         Py_DECREF(position);
3184         return;
3185     }
3186     Py_DECREF(position);
3187 
3188     PyErr_SetObject(st->parseerror_obj, error);
3189     Py_DECREF(error);
3190 }
3191 
3192 /* -------------------------------------------------------------------- */
3193 /* handlers */
3194 
3195 static void
expat_default_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3196 expat_default_handler(XMLParserObject* self, const XML_Char* data_in,
3197                       int data_len)
3198 {
3199     PyObject* key;
3200     PyObject* value;
3201     PyObject* res;
3202 
3203     if (data_len < 2 || data_in[0] != '&')
3204         return;
3205 
3206     if (PyErr_Occurred())
3207         return;
3208 
3209     key = PyUnicode_DecodeUTF8(data_in + 1, data_len - 2, "strict");
3210     if (!key)
3211         return;
3212 
3213     value = PyDict_GetItemWithError(self->entity, key);
3214 
3215     if (value) {
3216         if (TreeBuilder_CheckExact(self->target))
3217             res = treebuilder_handle_data(
3218                 (TreeBuilderObject*) self->target, value
3219                 );
3220         else if (self->handle_data)
3221             res = PyObject_CallOneArg(self->handle_data, value);
3222         else
3223             res = NULL;
3224         Py_XDECREF(res);
3225     } else if (!PyErr_Occurred()) {
3226         /* Report the first error, not the last */
3227         char message[128] = "undefined entity ";
3228         strncat(message, data_in, data_len < 100?data_len:100);
3229         expat_set_error(
3230             XML_ERROR_UNDEFINED_ENTITY,
3231             EXPAT(GetErrorLineNumber)(self->parser),
3232             EXPAT(GetErrorColumnNumber)(self->parser),
3233             message
3234             );
3235     }
3236 
3237     Py_DECREF(key);
3238 }
3239 
3240 static void
expat_start_handler(XMLParserObject * self,const XML_Char * tag_in,const XML_Char ** attrib_in)3241 expat_start_handler(XMLParserObject* self, const XML_Char* tag_in,
3242                     const XML_Char **attrib_in)
3243 {
3244     PyObject* res;
3245     PyObject* tag;
3246     PyObject* attrib;
3247     int ok;
3248 
3249     if (PyErr_Occurred())
3250         return;
3251 
3252     /* tag name */
3253     tag = makeuniversal(self, tag_in);
3254     if (!tag)
3255         return; /* parser will look for errors */
3256 
3257     /* attributes */
3258     if (attrib_in[0]) {
3259         attrib = PyDict_New();
3260         if (!attrib) {
3261             Py_DECREF(tag);
3262             return;
3263         }
3264         while (attrib_in[0] && attrib_in[1]) {
3265             PyObject* key = makeuniversal(self, attrib_in[0]);
3266             PyObject* value = PyUnicode_DecodeUTF8(attrib_in[1], strlen(attrib_in[1]), "strict");
3267             if (!key || !value) {
3268                 Py_XDECREF(value);
3269                 Py_XDECREF(key);
3270                 Py_DECREF(attrib);
3271                 Py_DECREF(tag);
3272                 return;
3273             }
3274             ok = PyDict_SetItem(attrib, key, value);
3275             Py_DECREF(value);
3276             Py_DECREF(key);
3277             if (ok < 0) {
3278                 Py_DECREF(attrib);
3279                 Py_DECREF(tag);
3280                 return;
3281             }
3282             attrib_in += 2;
3283         }
3284     } else {
3285         attrib = NULL;
3286     }
3287 
3288     if (TreeBuilder_CheckExact(self->target)) {
3289         /* shortcut */
3290         res = treebuilder_handle_start((TreeBuilderObject*) self->target,
3291                                        tag, attrib);
3292     }
3293     else if (self->handle_start) {
3294         if (attrib == NULL) {
3295             attrib = PyDict_New();
3296             if (!attrib) {
3297                 Py_DECREF(tag);
3298                 return;
3299             }
3300         }
3301         res = PyObject_CallFunctionObjArgs(self->handle_start,
3302                                            tag, attrib, NULL);
3303     } else
3304         res = NULL;
3305 
3306     Py_DECREF(tag);
3307     Py_XDECREF(attrib);
3308 
3309     Py_XDECREF(res);
3310 }
3311 
3312 static void
expat_data_handler(XMLParserObject * self,const XML_Char * data_in,int data_len)3313 expat_data_handler(XMLParserObject* self, const XML_Char* data_in,
3314                    int data_len)
3315 {
3316     PyObject* data;
3317     PyObject* res;
3318 
3319     if (PyErr_Occurred())
3320         return;
3321 
3322     data = PyUnicode_DecodeUTF8(data_in, data_len, "strict");
3323     if (!data)
3324         return; /* parser will look for errors */
3325 
3326     if (TreeBuilder_CheckExact(self->target))
3327         /* shortcut */
3328         res = treebuilder_handle_data((TreeBuilderObject*) self->target, data);
3329     else if (self->handle_data)
3330         res = PyObject_CallOneArg(self->handle_data, data);
3331     else
3332         res = NULL;
3333 
3334     Py_DECREF(data);
3335 
3336     Py_XDECREF(res);
3337 }
3338 
3339 static void
expat_end_handler(XMLParserObject * self,const XML_Char * tag_in)3340 expat_end_handler(XMLParserObject* self, const XML_Char* tag_in)
3341 {
3342     PyObject* tag;
3343     PyObject* res = NULL;
3344 
3345     if (PyErr_Occurred())
3346         return;
3347 
3348     if (TreeBuilder_CheckExact(self->target))
3349         /* shortcut */
3350         /* the standard tree builder doesn't look at the end tag */
3351         res = treebuilder_handle_end(
3352             (TreeBuilderObject*) self->target, Py_None
3353             );
3354     else if (self->handle_end) {
3355         tag = makeuniversal(self, tag_in);
3356         if (tag) {
3357             res = PyObject_CallOneArg(self->handle_end, tag);
3358             Py_DECREF(tag);
3359         }
3360     }
3361 
3362     Py_XDECREF(res);
3363 }
3364 
3365 static void
expat_start_ns_handler(XMLParserObject * self,const XML_Char * prefix_in,const XML_Char * uri_in)3366 expat_start_ns_handler(XMLParserObject* self, const XML_Char* prefix_in,
3367                        const XML_Char *uri_in)
3368 {
3369     PyObject* res = NULL;
3370     PyObject* uri;
3371     PyObject* prefix;
3372     PyObject* stack[2];
3373 
3374     if (PyErr_Occurred())
3375         return;
3376 
3377     if (!uri_in)
3378         uri_in = "";
3379     if (!prefix_in)
3380         prefix_in = "";
3381 
3382     if (TreeBuilder_CheckExact(self->target)) {
3383         /* shortcut - TreeBuilder does not actually implement .start_ns() */
3384         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3385 
3386         if (target->events_append && target->start_ns_event_obj) {
3387             prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3388             if (!prefix)
3389                 return;
3390             uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3391             if (!uri) {
3392                 Py_DECREF(prefix);
3393                 return;
3394             }
3395 
3396             res = treebuilder_handle_start_ns(target, prefix, uri);
3397             Py_DECREF(uri);
3398             Py_DECREF(prefix);
3399         }
3400     } else if (self->handle_start_ns) {
3401         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3402         if (!prefix)
3403             return;
3404         uri = PyUnicode_DecodeUTF8(uri_in, strlen(uri_in), "strict");
3405         if (!uri) {
3406             Py_DECREF(prefix);
3407             return;
3408         }
3409 
3410         stack[0] = prefix;
3411         stack[1] = uri;
3412         res = _PyObject_FastCall(self->handle_start_ns, stack, 2);
3413         Py_DECREF(uri);
3414         Py_DECREF(prefix);
3415     }
3416 
3417     Py_XDECREF(res);
3418 }
3419 
3420 static void
expat_end_ns_handler(XMLParserObject * self,const XML_Char * prefix_in)3421 expat_end_ns_handler(XMLParserObject* self, const XML_Char* prefix_in)
3422 {
3423     PyObject *res = NULL;
3424     PyObject* prefix;
3425 
3426     if (PyErr_Occurred())
3427         return;
3428 
3429     if (!prefix_in)
3430         prefix_in = "";
3431 
3432     if (TreeBuilder_CheckExact(self->target)) {
3433         /* shortcut - TreeBuilder does not actually implement .end_ns() */
3434         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3435 
3436         if (target->events_append && target->end_ns_event_obj) {
3437             res = treebuilder_handle_end_ns(target, Py_None);
3438         }
3439     } else if (self->handle_end_ns) {
3440         prefix = PyUnicode_DecodeUTF8(prefix_in, strlen(prefix_in), "strict");
3441         if (!prefix)
3442             return;
3443 
3444         res = PyObject_CallOneArg(self->handle_end_ns, prefix);
3445         Py_DECREF(prefix);
3446     }
3447 
3448     Py_XDECREF(res);
3449 }
3450 
3451 static void
expat_comment_handler(XMLParserObject * self,const XML_Char * comment_in)3452 expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
3453 {
3454     PyObject* comment;
3455     PyObject* res;
3456 
3457     if (PyErr_Occurred())
3458         return;
3459 
3460     if (TreeBuilder_CheckExact(self->target)) {
3461         /* shortcut */
3462         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3463 
3464         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3465         if (!comment)
3466             return; /* parser will look for errors */
3467 
3468         res = treebuilder_handle_comment(target,  comment);
3469         Py_XDECREF(res);
3470         Py_DECREF(comment);
3471     } else if (self->handle_comment) {
3472         comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
3473         if (!comment)
3474             return;
3475 
3476         res = PyObject_CallOneArg(self->handle_comment, comment);
3477         Py_XDECREF(res);
3478         Py_DECREF(comment);
3479     }
3480 }
3481 
3482 static void
expat_start_doctype_handler(XMLParserObject * self,const XML_Char * doctype_name,const XML_Char * sysid,const XML_Char * pubid,int has_internal_subset)3483 expat_start_doctype_handler(XMLParserObject *self,
3484                             const XML_Char *doctype_name,
3485                             const XML_Char *sysid,
3486                             const XML_Char *pubid,
3487                             int has_internal_subset)
3488 {
3489     _Py_IDENTIFIER(doctype);
3490     PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
3491     PyObject *res;
3492 
3493     if (PyErr_Occurred())
3494         return;
3495 
3496     doctype_name_obj = makeuniversal(self, doctype_name);
3497     if (!doctype_name_obj)
3498         return;
3499 
3500     if (sysid) {
3501         sysid_obj = makeuniversal(self, sysid);
3502         if (!sysid_obj) {
3503             Py_DECREF(doctype_name_obj);
3504             return;
3505         }
3506     } else {
3507         Py_INCREF(Py_None);
3508         sysid_obj = Py_None;
3509     }
3510 
3511     if (pubid) {
3512         pubid_obj = makeuniversal(self, pubid);
3513         if (!pubid_obj) {
3514             Py_DECREF(doctype_name_obj);
3515             Py_DECREF(sysid_obj);
3516             return;
3517         }
3518     } else {
3519         Py_INCREF(Py_None);
3520         pubid_obj = Py_None;
3521     }
3522 
3523     /* If the target has a handler for doctype, call it. */
3524     if (self->handle_doctype) {
3525         res = PyObject_CallFunctionObjArgs(self->handle_doctype,
3526                                            doctype_name_obj, pubid_obj,
3527                                            sysid_obj, NULL);
3528         Py_XDECREF(res);
3529     }
3530     else if (_PyObject_LookupAttrId((PyObject *)self, &PyId_doctype, &res) > 0) {
3531         (void)PyErr_WarnEx(PyExc_RuntimeWarning,
3532                 "The doctype() method of XMLParser is ignored.  "
3533                 "Define doctype() method on the TreeBuilder target.",
3534                 1);
3535         Py_DECREF(res);
3536     }
3537 
3538     Py_DECREF(doctype_name_obj);
3539     Py_DECREF(pubid_obj);
3540     Py_DECREF(sysid_obj);
3541 }
3542 
3543 static void
expat_pi_handler(XMLParserObject * self,const XML_Char * target_in,const XML_Char * data_in)3544 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
3545                  const XML_Char* data_in)
3546 {
3547     PyObject* pi_target;
3548     PyObject* data;
3549     PyObject* res;
3550     PyObject* stack[2];
3551 
3552     if (PyErr_Occurred())
3553         return;
3554 
3555     if (TreeBuilder_CheckExact(self->target)) {
3556         /* shortcut */
3557         TreeBuilderObject *target = (TreeBuilderObject*) self->target;
3558 
3559         if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
3560             pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3561             if (!pi_target)
3562                 goto error;
3563             data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3564             if (!data)
3565                 goto error;
3566             res = treebuilder_handle_pi(target, pi_target, data);
3567             Py_XDECREF(res);
3568             Py_DECREF(data);
3569             Py_DECREF(pi_target);
3570         }
3571     } else if (self->handle_pi) {
3572         pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
3573         if (!pi_target)
3574             goto error;
3575         data = PyUnicode_DecodeUTF8(data_in, strlen(data_in), "strict");
3576         if (!data)
3577             goto error;
3578 
3579         stack[0] = pi_target;
3580         stack[1] = data;
3581         res = _PyObject_FastCall(self->handle_pi, stack, 2);
3582         Py_XDECREF(res);
3583         Py_DECREF(data);
3584         Py_DECREF(pi_target);
3585     }
3586 
3587     return;
3588 
3589   error:
3590     Py_XDECREF(pi_target);
3591     return;
3592 }
3593 
3594 /* -------------------------------------------------------------------- */
3595 
3596 static PyObject *
xmlparser_new(PyTypeObject * type,PyObject * args,PyObject * kwds)3597 xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3598 {
3599     XMLParserObject *self = (XMLParserObject *)type->tp_alloc(type, 0);
3600     if (self) {
3601         self->parser = NULL;
3602         self->target = self->entity = self->names = NULL;
3603         self->handle_start_ns = self->handle_end_ns = NULL;
3604         self->handle_start = self->handle_data = self->handle_end = NULL;
3605         self->handle_comment = self->handle_pi = self->handle_close = NULL;
3606         self->handle_doctype = NULL;
3607     }
3608     return (PyObject *)self;
3609 }
3610 
3611 static int
ignore_attribute_error(PyObject * value)3612 ignore_attribute_error(PyObject *value)
3613 {
3614     if (value == NULL) {
3615         if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
3616             return -1;
3617         }
3618         PyErr_Clear();
3619     }
3620     return 0;
3621 }
3622 
3623 /*[clinic input]
3624 _elementtree.XMLParser.__init__
3625 
3626     *
3627     target: object = None
3628     encoding: str(accept={str, NoneType}) = None
3629 
3630 [clinic start generated code]*/
3631 
3632 static int
_elementtree_XMLParser___init___impl(XMLParserObject * self,PyObject * target,const char * encoding)3633 _elementtree_XMLParser___init___impl(XMLParserObject *self, PyObject *target,
3634                                      const char *encoding)
3635 /*[clinic end generated code: output=3ae45ec6cdf344e4 input=7e716dd6e4f3e439]*/
3636 {
3637     self->entity = PyDict_New();
3638     if (!self->entity)
3639         return -1;
3640 
3641     self->names = PyDict_New();
3642     if (!self->names) {
3643         Py_CLEAR(self->entity);
3644         return -1;
3645     }
3646 
3647     self->parser = EXPAT(ParserCreate_MM)(encoding, &ExpatMemoryHandler, "}");
3648     if (!self->parser) {
3649         Py_CLEAR(self->entity);
3650         Py_CLEAR(self->names);
3651         PyErr_NoMemory();
3652         return -1;
3653     }
3654     /* expat < 2.1.0 has no XML_SetHashSalt() */
3655     if (EXPAT(SetHashSalt) != NULL) {
3656         EXPAT(SetHashSalt)(self->parser,
3657                            (unsigned long)_Py_HashSecret.expat.hashsalt);
3658     }
3659 
3660     if (target != Py_None) {
3661         Py_INCREF(target);
3662     } else {
3663         target = treebuilder_new(&TreeBuilder_Type, NULL, NULL);
3664         if (!target) {
3665             Py_CLEAR(self->entity);
3666             Py_CLEAR(self->names);
3667             return -1;
3668         }
3669     }
3670     self->target = target;
3671 
3672     self->handle_start_ns = PyObject_GetAttrString(target, "start_ns");
3673     if (ignore_attribute_error(self->handle_start_ns)) {
3674         return -1;
3675     }
3676     self->handle_end_ns = PyObject_GetAttrString(target, "end_ns");
3677     if (ignore_attribute_error(self->handle_end_ns)) {
3678         return -1;
3679     }
3680     self->handle_start = PyObject_GetAttrString(target, "start");
3681     if (ignore_attribute_error(self->handle_start)) {
3682         return -1;
3683     }
3684     self->handle_data = PyObject_GetAttrString(target, "data");
3685     if (ignore_attribute_error(self->handle_data)) {
3686         return -1;
3687     }
3688     self->handle_end = PyObject_GetAttrString(target, "end");
3689     if (ignore_attribute_error(self->handle_end)) {
3690         return -1;
3691     }
3692     self->handle_comment = PyObject_GetAttrString(target, "comment");
3693     if (ignore_attribute_error(self->handle_comment)) {
3694         return -1;
3695     }
3696     self->handle_pi = PyObject_GetAttrString(target, "pi");
3697     if (ignore_attribute_error(self->handle_pi)) {
3698         return -1;
3699     }
3700     self->handle_close = PyObject_GetAttrString(target, "close");
3701     if (ignore_attribute_error(self->handle_close)) {
3702         return -1;
3703     }
3704     self->handle_doctype = PyObject_GetAttrString(target, "doctype");
3705     if (ignore_attribute_error(self->handle_doctype)) {
3706         return -1;
3707     }
3708 
3709     /* configure parser */
3710     EXPAT(SetUserData)(self->parser, self);
3711     if (self->handle_start_ns || self->handle_end_ns)
3712         EXPAT(SetNamespaceDeclHandler)(
3713             self->parser,
3714             (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
3715             (XML_EndNamespaceDeclHandler) expat_end_ns_handler
3716             );
3717     EXPAT(SetElementHandler)(
3718         self->parser,
3719         (XML_StartElementHandler) expat_start_handler,
3720         (XML_EndElementHandler) expat_end_handler
3721         );
3722     EXPAT(SetDefaultHandlerExpand)(
3723         self->parser,
3724         (XML_DefaultHandler) expat_default_handler
3725         );
3726     EXPAT(SetCharacterDataHandler)(
3727         self->parser,
3728         (XML_CharacterDataHandler) expat_data_handler
3729         );
3730     if (self->handle_comment)
3731         EXPAT(SetCommentHandler)(
3732             self->parser,
3733             (XML_CommentHandler) expat_comment_handler
3734             );
3735     if (self->handle_pi)
3736         EXPAT(SetProcessingInstructionHandler)(
3737             self->parser,
3738             (XML_ProcessingInstructionHandler) expat_pi_handler
3739             );
3740     EXPAT(SetStartDoctypeDeclHandler)(
3741         self->parser,
3742         (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
3743         );
3744     EXPAT(SetUnknownEncodingHandler)(
3745         self->parser,
3746         EXPAT(DefaultUnknownEncodingHandler), NULL
3747         );
3748 
3749     return 0;
3750 }
3751 
3752 static int
xmlparser_gc_traverse(XMLParserObject * self,visitproc visit,void * arg)3753 xmlparser_gc_traverse(XMLParserObject *self, visitproc visit, void *arg)
3754 {
3755     Py_VISIT(self->handle_close);
3756     Py_VISIT(self->handle_pi);
3757     Py_VISIT(self->handle_comment);
3758     Py_VISIT(self->handle_end);
3759     Py_VISIT(self->handle_data);
3760     Py_VISIT(self->handle_start);
3761     Py_VISIT(self->handle_start_ns);
3762     Py_VISIT(self->handle_end_ns);
3763     Py_VISIT(self->handle_doctype);
3764 
3765     Py_VISIT(self->target);
3766     Py_VISIT(self->entity);
3767     Py_VISIT(self->names);
3768 
3769     return 0;
3770 }
3771 
3772 static int
xmlparser_gc_clear(XMLParserObject * self)3773 xmlparser_gc_clear(XMLParserObject *self)
3774 {
3775     if (self->parser != NULL) {
3776         XML_Parser parser = self->parser;
3777         self->parser = NULL;
3778         EXPAT(ParserFree)(parser);
3779     }
3780 
3781     Py_CLEAR(self->handle_close);
3782     Py_CLEAR(self->handle_pi);
3783     Py_CLEAR(self->handle_comment);
3784     Py_CLEAR(self->handle_end);
3785     Py_CLEAR(self->handle_data);
3786     Py_CLEAR(self->handle_start);
3787     Py_CLEAR(self->handle_start_ns);
3788     Py_CLEAR(self->handle_end_ns);
3789     Py_CLEAR(self->handle_doctype);
3790 
3791     Py_CLEAR(self->target);
3792     Py_CLEAR(self->entity);
3793     Py_CLEAR(self->names);
3794 
3795     return 0;
3796 }
3797 
3798 static void
xmlparser_dealloc(XMLParserObject * self)3799 xmlparser_dealloc(XMLParserObject* self)
3800 {
3801     PyObject_GC_UnTrack(self);
3802     xmlparser_gc_clear(self);
3803     Py_TYPE(self)->tp_free((PyObject *)self);
3804 }
3805 
3806 Py_LOCAL_INLINE(int)
_check_xmlparser(XMLParserObject * self)3807 _check_xmlparser(XMLParserObject* self)
3808 {
3809     if (self->target == NULL) {
3810         PyErr_SetString(PyExc_ValueError,
3811                         "XMLParser.__init__() wasn't called");
3812         return 0;
3813     }
3814     return 1;
3815 }
3816 
3817 LOCAL(PyObject*)
expat_parse(XMLParserObject * self,const char * data,int data_len,int final)3818 expat_parse(XMLParserObject* self, const char* data, int data_len, int final)
3819 {
3820     int ok;
3821 
3822     assert(!PyErr_Occurred());
3823     ok = EXPAT(Parse)(self->parser, data, data_len, final);
3824 
3825     if (PyErr_Occurred())
3826         return NULL;
3827 
3828     if (!ok) {
3829         expat_set_error(
3830             EXPAT(GetErrorCode)(self->parser),
3831             EXPAT(GetErrorLineNumber)(self->parser),
3832             EXPAT(GetErrorColumnNumber)(self->parser),
3833             NULL
3834             );
3835         return NULL;
3836     }
3837 
3838     Py_RETURN_NONE;
3839 }
3840 
3841 /*[clinic input]
3842 _elementtree.XMLParser.close
3843 
3844 [clinic start generated code]*/
3845 
3846 static PyObject *
_elementtree_XMLParser_close_impl(XMLParserObject * self)3847 _elementtree_XMLParser_close_impl(XMLParserObject *self)
3848 /*[clinic end generated code: output=d68d375dd23bc7fb input=ca7909ca78c3abfe]*/
3849 {
3850     /* end feeding data to parser */
3851 
3852     PyObject* res;
3853 
3854     if (!_check_xmlparser(self)) {
3855         return NULL;
3856     }
3857     res = expat_parse(self, "", 0, 1);
3858     if (!res)
3859         return NULL;
3860 
3861     if (TreeBuilder_CheckExact(self->target)) {
3862         Py_DECREF(res);
3863         return treebuilder_done((TreeBuilderObject*) self->target);
3864     }
3865     else if (self->handle_close) {
3866         Py_DECREF(res);
3867         return PyObject_CallNoArgs(self->handle_close);
3868     }
3869     else {
3870         return res;
3871     }
3872 }
3873 
3874 /*[clinic input]
3875 _elementtree.XMLParser.feed
3876 
3877     data: object
3878     /
3879 
3880 [clinic start generated code]*/
3881 
3882 static PyObject *
_elementtree_XMLParser_feed(XMLParserObject * self,PyObject * data)3883 _elementtree_XMLParser_feed(XMLParserObject *self, PyObject *data)
3884 /*[clinic end generated code: output=e42b6a78eec7446d input=fe231b6b8de3ce1f]*/
3885 {
3886     /* feed data to parser */
3887 
3888     if (!_check_xmlparser(self)) {
3889         return NULL;
3890     }
3891     if (PyUnicode_Check(data)) {
3892         Py_ssize_t data_len;
3893         const char *data_ptr = PyUnicode_AsUTF8AndSize(data, &data_len);
3894         if (data_ptr == NULL)
3895             return NULL;
3896         if (data_len > INT_MAX) {
3897             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3898             return NULL;
3899         }
3900         /* Explicitly set UTF-8 encoding. Return code ignored. */
3901         (void)EXPAT(SetEncoding)(self->parser, "utf-8");
3902         return expat_parse(self, data_ptr, (int)data_len, 0);
3903     }
3904     else {
3905         Py_buffer view;
3906         PyObject *res;
3907         if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
3908             return NULL;
3909         if (view.len > INT_MAX) {
3910             PyBuffer_Release(&view);
3911             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3912             return NULL;
3913         }
3914         res = expat_parse(self, view.buf, (int)view.len, 0);
3915         PyBuffer_Release(&view);
3916         return res;
3917     }
3918 }
3919 
3920 /*[clinic input]
3921 _elementtree.XMLParser._parse_whole
3922 
3923     file: object
3924     /
3925 
3926 [clinic start generated code]*/
3927 
3928 static PyObject *
_elementtree_XMLParser__parse_whole(XMLParserObject * self,PyObject * file)3929 _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file)
3930 /*[clinic end generated code: output=f797197bb818dda3 input=19ecc893b6f3e752]*/
3931 {
3932     /* (internal) parse the whole input, until end of stream */
3933     PyObject* reader;
3934     PyObject* buffer;
3935     PyObject* temp;
3936     PyObject* res;
3937 
3938     if (!_check_xmlparser(self)) {
3939         return NULL;
3940     }
3941     reader = PyObject_GetAttrString(file, "read");
3942     if (!reader)
3943         return NULL;
3944 
3945     /* read from open file object */
3946     for (;;) {
3947 
3948         buffer = PyObject_CallFunction(reader, "i", 64*1024);
3949 
3950         if (!buffer) {
3951             /* read failed (e.g. due to KeyboardInterrupt) */
3952             Py_DECREF(reader);
3953             return NULL;
3954         }
3955 
3956         if (PyUnicode_CheckExact(buffer)) {
3957             /* A unicode object is encoded into bytes using UTF-8 */
3958             if (PyUnicode_GET_LENGTH(buffer) == 0) {
3959                 Py_DECREF(buffer);
3960                 break;
3961             }
3962             temp = PyUnicode_AsEncodedString(buffer, "utf-8", "surrogatepass");
3963             Py_DECREF(buffer);
3964             if (!temp) {
3965                 /* Propagate exception from PyUnicode_AsEncodedString */
3966                 Py_DECREF(reader);
3967                 return NULL;
3968             }
3969             buffer = temp;
3970         }
3971         else if (!PyBytes_CheckExact(buffer) || PyBytes_GET_SIZE(buffer) == 0) {
3972             Py_DECREF(buffer);
3973             break;
3974         }
3975 
3976         if (PyBytes_GET_SIZE(buffer) > INT_MAX) {
3977             Py_DECREF(buffer);
3978             Py_DECREF(reader);
3979             PyErr_SetString(PyExc_OverflowError, "size does not fit in an int");
3980             return NULL;
3981         }
3982         res = expat_parse(
3983             self, PyBytes_AS_STRING(buffer), (int)PyBytes_GET_SIZE(buffer), 0
3984             );
3985 
3986         Py_DECREF(buffer);
3987 
3988         if (!res) {
3989             Py_DECREF(reader);
3990             return NULL;
3991         }
3992         Py_DECREF(res);
3993 
3994     }
3995 
3996     Py_DECREF(reader);
3997 
3998     res = expat_parse(self, "", 0, 1);
3999 
4000     if (res && TreeBuilder_CheckExact(self->target)) {
4001         Py_DECREF(res);
4002         return treebuilder_done((TreeBuilderObject*) self->target);
4003     }
4004 
4005     return res;
4006 }
4007 
4008 /*[clinic input]
4009 _elementtree.XMLParser._setevents
4010 
4011     events_queue: object
4012     events_to_report: object = None
4013     /
4014 
4015 [clinic start generated code]*/
4016 
4017 static PyObject *
_elementtree_XMLParser__setevents_impl(XMLParserObject * self,PyObject * events_queue,PyObject * events_to_report)4018 _elementtree_XMLParser__setevents_impl(XMLParserObject *self,
4019                                        PyObject *events_queue,
4020                                        PyObject *events_to_report)
4021 /*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
4022 {
4023     /* activate element event reporting */
4024     Py_ssize_t i;
4025     TreeBuilderObject *target;
4026     PyObject *events_append, *events_seq;
4027 
4028     if (!_check_xmlparser(self)) {
4029         return NULL;
4030     }
4031     if (!TreeBuilder_CheckExact(self->target)) {
4032         PyErr_SetString(
4033             PyExc_TypeError,
4034             "event handling only supported for ElementTree.TreeBuilder "
4035             "targets"
4036             );
4037         return NULL;
4038     }
4039 
4040     target = (TreeBuilderObject*) self->target;
4041 
4042     events_append = PyObject_GetAttrString(events_queue, "append");
4043     if (events_append == NULL)
4044         return NULL;
4045     Py_XSETREF(target->events_append, events_append);
4046 
4047     /* clear out existing events */
4048     Py_CLEAR(target->start_event_obj);
4049     Py_CLEAR(target->end_event_obj);
4050     Py_CLEAR(target->start_ns_event_obj);
4051     Py_CLEAR(target->end_ns_event_obj);
4052     Py_CLEAR(target->comment_event_obj);
4053     Py_CLEAR(target->pi_event_obj);
4054 
4055     if (events_to_report == Py_None) {
4056         /* default is "end" only */
4057         target->end_event_obj = PyUnicode_FromString("end");
4058         Py_RETURN_NONE;
4059     }
4060 
4061     if (!(events_seq = PySequence_Fast(events_to_report,
4062                                        "events must be a sequence"))) {
4063         return NULL;
4064     }
4065 
4066     for (i = 0; i < PySequence_Fast_GET_SIZE(events_seq); ++i) {
4067         PyObject *event_name_obj = PySequence_Fast_GET_ITEM(events_seq, i);
4068         const char *event_name = NULL;
4069         if (PyUnicode_Check(event_name_obj)) {
4070             event_name = PyUnicode_AsUTF8(event_name_obj);
4071         } else if (PyBytes_Check(event_name_obj)) {
4072             event_name = PyBytes_AS_STRING(event_name_obj);
4073         }
4074         if (event_name == NULL) {
4075             Py_DECREF(events_seq);
4076             PyErr_Format(PyExc_ValueError, "invalid events sequence");
4077             return NULL;
4078         }
4079 
4080         Py_INCREF(event_name_obj);
4081         if (strcmp(event_name, "start") == 0) {
4082             Py_XSETREF(target->start_event_obj, event_name_obj);
4083         } else if (strcmp(event_name, "end") == 0) {
4084             Py_XSETREF(target->end_event_obj, event_name_obj);
4085         } else if (strcmp(event_name, "start-ns") == 0) {
4086             Py_XSETREF(target->start_ns_event_obj, event_name_obj);
4087             EXPAT(SetNamespaceDeclHandler)(
4088                 self->parser,
4089                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4090                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4091                 );
4092         } else if (strcmp(event_name, "end-ns") == 0) {
4093             Py_XSETREF(target->end_ns_event_obj, event_name_obj);
4094             EXPAT(SetNamespaceDeclHandler)(
4095                 self->parser,
4096                 (XML_StartNamespaceDeclHandler) expat_start_ns_handler,
4097                 (XML_EndNamespaceDeclHandler) expat_end_ns_handler
4098                 );
4099         } else if (strcmp(event_name, "comment") == 0) {
4100             Py_XSETREF(target->comment_event_obj, event_name_obj);
4101             EXPAT(SetCommentHandler)(
4102                 self->parser,
4103                 (XML_CommentHandler) expat_comment_handler
4104                 );
4105         } else if (strcmp(event_name, "pi") == 0) {
4106             Py_XSETREF(target->pi_event_obj, event_name_obj);
4107             EXPAT(SetProcessingInstructionHandler)(
4108                 self->parser,
4109                 (XML_ProcessingInstructionHandler) expat_pi_handler
4110                 );
4111         } else {
4112             Py_DECREF(event_name_obj);
4113             Py_DECREF(events_seq);
4114             PyErr_Format(PyExc_ValueError, "unknown event '%s'", event_name);
4115             return NULL;
4116         }
4117     }
4118 
4119     Py_DECREF(events_seq);
4120     Py_RETURN_NONE;
4121 }
4122 
4123 static PyMemberDef xmlparser_members[] = {
4124     {"entity", T_OBJECT, offsetof(XMLParserObject, entity), READONLY, NULL},
4125     {"target", T_OBJECT, offsetof(XMLParserObject, target), READONLY, NULL},
4126     {NULL}
4127 };
4128 
4129 static PyObject*
xmlparser_version_getter(XMLParserObject * self,void * closure)4130 xmlparser_version_getter(XMLParserObject *self, void *closure)
4131 {
4132     return PyUnicode_FromFormat(
4133         "Expat %d.%d.%d", XML_MAJOR_VERSION,
4134         XML_MINOR_VERSION, XML_MICRO_VERSION);
4135 }
4136 
4137 static PyGetSetDef xmlparser_getsetlist[] = {
4138     {"version", (getter)xmlparser_version_getter, NULL, NULL},
4139     {NULL},
4140 };
4141 
4142 #include "clinic/_elementtree.c.h"
4143 
4144 static PyMethodDef element_methods[] = {
4145 
4146     _ELEMENTTREE_ELEMENT_CLEAR_METHODDEF
4147 
4148     _ELEMENTTREE_ELEMENT_GET_METHODDEF
4149     _ELEMENTTREE_ELEMENT_SET_METHODDEF
4150 
4151     _ELEMENTTREE_ELEMENT_FIND_METHODDEF
4152     _ELEMENTTREE_ELEMENT_FINDTEXT_METHODDEF
4153     _ELEMENTTREE_ELEMENT_FINDALL_METHODDEF
4154 
4155     _ELEMENTTREE_ELEMENT_APPEND_METHODDEF
4156     _ELEMENTTREE_ELEMENT_EXTEND_METHODDEF
4157     _ELEMENTTREE_ELEMENT_INSERT_METHODDEF
4158     _ELEMENTTREE_ELEMENT_REMOVE_METHODDEF
4159 
4160     _ELEMENTTREE_ELEMENT_ITER_METHODDEF
4161     _ELEMENTTREE_ELEMENT_ITERTEXT_METHODDEF
4162     _ELEMENTTREE_ELEMENT_ITERFIND_METHODDEF
4163 
4164     _ELEMENTTREE_ELEMENT_ITEMS_METHODDEF
4165     _ELEMENTTREE_ELEMENT_KEYS_METHODDEF
4166 
4167     _ELEMENTTREE_ELEMENT_MAKEELEMENT_METHODDEF
4168 
4169     _ELEMENTTREE_ELEMENT___COPY___METHODDEF
4170     _ELEMENTTREE_ELEMENT___DEEPCOPY___METHODDEF
4171     _ELEMENTTREE_ELEMENT___SIZEOF___METHODDEF
4172     _ELEMENTTREE_ELEMENT___GETSTATE___METHODDEF
4173     _ELEMENTTREE_ELEMENT___SETSTATE___METHODDEF
4174 
4175     {NULL, NULL}
4176 };
4177 
4178 static PyMappingMethods element_as_mapping = {
4179     (lenfunc) element_length,
4180     (binaryfunc) element_subscr,
4181     (objobjargproc) element_ass_subscr,
4182 };
4183 
4184 static PyGetSetDef element_getsetlist[] = {
4185     {"tag",
4186         (getter)element_tag_getter,
4187         (setter)element_tag_setter,
4188         "A string identifying what kind of data this element represents"},
4189     {"text",
4190         (getter)element_text_getter,
4191         (setter)element_text_setter,
4192         "A string of text directly after the start tag, or None"},
4193     {"tail",
4194         (getter)element_tail_getter,
4195         (setter)element_tail_setter,
4196         "A string of text directly after the end tag, or None"},
4197     {"attrib",
4198         (getter)element_attrib_getter,
4199         (setter)element_attrib_setter,
4200         "A dictionary containing the element's attributes"},
4201     {NULL},
4202 };
4203 
4204 static PyTypeObject Element_Type = {
4205     PyVarObject_HEAD_INIT(NULL, 0)
4206     "xml.etree.ElementTree.Element", sizeof(ElementObject), 0,
4207     /* methods */
4208     (destructor)element_dealloc,                    /* tp_dealloc */
4209     0,                                              /* tp_vectorcall_offset */
4210     0,                                              /* tp_getattr */
4211     0,                                              /* tp_setattr */
4212     0,                                              /* tp_as_async */
4213     (reprfunc)element_repr,                         /* tp_repr */
4214     0,                                              /* tp_as_number */
4215     &element_as_sequence,                           /* tp_as_sequence */
4216     &element_as_mapping,                            /* tp_as_mapping */
4217     0,                                              /* tp_hash */
4218     0,                                              /* tp_call */
4219     0,                                              /* tp_str */
4220     PyObject_GenericGetAttr,                        /* tp_getattro */
4221     0,                                              /* tp_setattro */
4222     0,                                              /* tp_as_buffer */
4223     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4224                                                     /* tp_flags */
4225     0,                                              /* tp_doc */
4226     (traverseproc)element_gc_traverse,              /* tp_traverse */
4227     (inquiry)element_gc_clear,                      /* tp_clear */
4228     0,                                              /* tp_richcompare */
4229     offsetof(ElementObject, weakreflist),           /* tp_weaklistoffset */
4230     0,                                              /* tp_iter */
4231     0,                                              /* tp_iternext */
4232     element_methods,                                /* tp_methods */
4233     0,                                              /* tp_members */
4234     element_getsetlist,                             /* tp_getset */
4235     0,                                              /* tp_base */
4236     0,                                              /* tp_dict */
4237     0,                                              /* tp_descr_get */
4238     0,                                              /* tp_descr_set */
4239     0,                                              /* tp_dictoffset */
4240     (initproc)element_init,                         /* tp_init */
4241     PyType_GenericAlloc,                            /* tp_alloc */
4242     element_new,                                    /* tp_new */
4243     0,                                              /* tp_free */
4244 };
4245 
4246 static PyMethodDef treebuilder_methods[] = {
4247     _ELEMENTTREE_TREEBUILDER_DATA_METHODDEF
4248     _ELEMENTTREE_TREEBUILDER_START_METHODDEF
4249     _ELEMENTTREE_TREEBUILDER_END_METHODDEF
4250     _ELEMENTTREE_TREEBUILDER_COMMENT_METHODDEF
4251     _ELEMENTTREE_TREEBUILDER_PI_METHODDEF
4252     _ELEMENTTREE_TREEBUILDER_CLOSE_METHODDEF
4253     {NULL, NULL}
4254 };
4255 
4256 static PyTypeObject TreeBuilder_Type = {
4257     PyVarObject_HEAD_INIT(NULL, 0)
4258     "xml.etree.ElementTree.TreeBuilder", sizeof(TreeBuilderObject), 0,
4259     /* methods */
4260     (destructor)treebuilder_dealloc,                /* tp_dealloc */
4261     0,                                              /* tp_vectorcall_offset */
4262     0,                                              /* tp_getattr */
4263     0,                                              /* tp_setattr */
4264     0,                                              /* tp_as_async */
4265     0,                                              /* tp_repr */
4266     0,                                              /* tp_as_number */
4267     0,                                              /* tp_as_sequence */
4268     0,                                              /* tp_as_mapping */
4269     0,                                              /* tp_hash */
4270     0,                                              /* tp_call */
4271     0,                                              /* tp_str */
4272     0,                                              /* tp_getattro */
4273     0,                                              /* tp_setattro */
4274     0,                                              /* tp_as_buffer */
4275     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4276                                                     /* tp_flags */
4277     0,                                              /* tp_doc */
4278     (traverseproc)treebuilder_gc_traverse,          /* tp_traverse */
4279     (inquiry)treebuilder_gc_clear,                  /* tp_clear */
4280     0,                                              /* tp_richcompare */
4281     0,                                              /* tp_weaklistoffset */
4282     0,                                              /* tp_iter */
4283     0,                                              /* tp_iternext */
4284     treebuilder_methods,                            /* tp_methods */
4285     0,                                              /* tp_members */
4286     0,                                              /* tp_getset */
4287     0,                                              /* tp_base */
4288     0,                                              /* tp_dict */
4289     0,                                              /* tp_descr_get */
4290     0,                                              /* tp_descr_set */
4291     0,                                              /* tp_dictoffset */
4292     _elementtree_TreeBuilder___init__,              /* tp_init */
4293     PyType_GenericAlloc,                            /* tp_alloc */
4294     treebuilder_new,                                /* tp_new */
4295     0,                                              /* tp_free */
4296 };
4297 
4298 static PyMethodDef xmlparser_methods[] = {
4299     _ELEMENTTREE_XMLPARSER_FEED_METHODDEF
4300     _ELEMENTTREE_XMLPARSER_CLOSE_METHODDEF
4301     _ELEMENTTREE_XMLPARSER__PARSE_WHOLE_METHODDEF
4302     _ELEMENTTREE_XMLPARSER__SETEVENTS_METHODDEF
4303     {NULL, NULL}
4304 };
4305 
4306 static PyTypeObject XMLParser_Type = {
4307     PyVarObject_HEAD_INIT(NULL, 0)
4308     "xml.etree.ElementTree.XMLParser", sizeof(XMLParserObject), 0,
4309     /* methods */
4310     (destructor)xmlparser_dealloc,                  /* tp_dealloc */
4311     0,                                              /* tp_vectorcall_offset */
4312     0,                                              /* tp_getattr */
4313     0,                                              /* tp_setattr */
4314     0,                                              /* tp_as_async */
4315     0,                                              /* tp_repr */
4316     0,                                              /* tp_as_number */
4317     0,                                              /* tp_as_sequence */
4318     0,                                              /* tp_as_mapping */
4319     0,                                              /* tp_hash */
4320     0,                                              /* tp_call */
4321     0,                                              /* tp_str */
4322     0,                                              /* tp_getattro */
4323     0,                                              /* tp_setattro */
4324     0,                                              /* tp_as_buffer */
4325     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4326                                                     /* tp_flags */
4327     0,                                              /* tp_doc */
4328     (traverseproc)xmlparser_gc_traverse,            /* tp_traverse */
4329     (inquiry)xmlparser_gc_clear,                    /* tp_clear */
4330     0,                                              /* tp_richcompare */
4331     0,                                              /* tp_weaklistoffset */
4332     0,                                              /* tp_iter */
4333     0,                                              /* tp_iternext */
4334     xmlparser_methods,                              /* tp_methods */
4335     xmlparser_members,                              /* tp_members */
4336     xmlparser_getsetlist,                           /* tp_getset */
4337     0,                                              /* tp_base */
4338     0,                                              /* tp_dict */
4339     0,                                              /* tp_descr_get */
4340     0,                                              /* tp_descr_set */
4341     0,                                              /* tp_dictoffset */
4342     _elementtree_XMLParser___init__,                /* tp_init */
4343     PyType_GenericAlloc,                            /* tp_alloc */
4344     xmlparser_new,                                  /* tp_new */
4345     0,                                              /* tp_free */
4346 };
4347 
4348 /* ==================================================================== */
4349 /* python module interface */
4350 
4351 static PyMethodDef _functions[] = {
4352     {"SubElement", _PyCFunction_CAST(subelement), METH_VARARGS | METH_KEYWORDS},
4353     _ELEMENTTREE__SET_FACTORIES_METHODDEF
4354     {NULL, NULL}
4355 };
4356 
4357 
4358 static struct PyModuleDef elementtreemodule = {
4359     PyModuleDef_HEAD_INIT,
4360     "_elementtree",
4361     NULL,
4362     sizeof(elementtreestate),
4363     _functions,
4364     NULL,
4365     elementtree_traverse,
4366     elementtree_clear,
4367     elementtree_free
4368 };
4369 
4370 PyMODINIT_FUNC
PyInit__elementtree(void)4371 PyInit__elementtree(void)
4372 {
4373     PyObject *m, *temp;
4374     elementtreestate *st;
4375 
4376     m = PyState_FindModule(&elementtreemodule);
4377     if (m) {
4378         Py_INCREF(m);
4379         return m;
4380     }
4381 
4382     /* Initialize object types */
4383     if (PyType_Ready(&ElementIter_Type) < 0)
4384         return NULL;
4385     if (PyType_Ready(&TreeBuilder_Type) < 0)
4386         return NULL;
4387     if (PyType_Ready(&Element_Type) < 0)
4388         return NULL;
4389     if (PyType_Ready(&XMLParser_Type) < 0)
4390         return NULL;
4391 
4392     m = PyModule_Create(&elementtreemodule);
4393     if (!m)
4394         return NULL;
4395     st = get_elementtree_state(m);
4396 
4397     if (!(temp = PyImport_ImportModule("copy")))
4398         return NULL;
4399     st->deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
4400     Py_XDECREF(temp);
4401 
4402     if (st->deepcopy_obj == NULL) {
4403         return NULL;
4404     }
4405 
4406     assert(!PyErr_Occurred());
4407     if (!(st->elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
4408         return NULL;
4409 
4410     /* link against pyexpat */
4411     expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
4412     if (expat_capi) {
4413         /* check that it's usable */
4414         if (strcmp(expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||
4415             (size_t)expat_capi->size < sizeof(struct PyExpat_CAPI) ||
4416             expat_capi->MAJOR_VERSION != XML_MAJOR_VERSION ||
4417             expat_capi->MINOR_VERSION != XML_MINOR_VERSION ||
4418             expat_capi->MICRO_VERSION != XML_MICRO_VERSION) {
4419             PyErr_SetString(PyExc_ImportError,
4420                             "pyexpat version is incompatible");
4421             return NULL;
4422         }
4423     } else {
4424         return NULL;
4425     }
4426 
4427     st->parseerror_obj = PyErr_NewException(
4428         "xml.etree.ElementTree.ParseError", PyExc_SyntaxError, NULL
4429         );
4430     Py_INCREF(st->parseerror_obj);
4431     if (PyModule_AddObject(m, "ParseError", st->parseerror_obj) < 0) {
4432         Py_DECREF(st->parseerror_obj);
4433         return NULL;
4434     }
4435 
4436     PyTypeObject *types[] = {
4437         &Element_Type,
4438         &TreeBuilder_Type,
4439         &XMLParser_Type
4440     };
4441 
4442     for (size_t i = 0; i < Py_ARRAY_LENGTH(types); i++) {
4443         if (PyModule_AddType(m, types[i]) < 0) {
4444             return NULL;
4445         }
4446     }
4447 
4448     return m;
4449 }
4450