1 /* pickle accelerator C extensor: _pickle module.
2  *
3  * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4  * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5  * platforms. */
6 
7 #ifndef Py_BUILD_CORE_BUILTIN
8 #  define Py_BUILD_CORE_MODULE 1
9 #endif
10 
11 #include "Python.h"
12 #include "pycore_ceval.h"         // _Py_EnterRecursiveCall()
13 #include "pycore_moduleobject.h"  // _PyModule_GetState()
14 #include "pycore_runtime.h"       // _Py_ID()
15 #include "pycore_pystate.h"       // _PyThreadState_GET()
16 #include "structmember.h"         // PyMemberDef
17 
18 #include <stdlib.h>               // strtol()
19 
20 PyDoc_STRVAR(pickle_module_doc,
21 "Optimized C implementation for the Python pickle module.");
22 
23 /*[clinic input]
24 module _pickle
25 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
26 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
27 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
28 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
29 [clinic start generated code]*/
30 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
31 
32 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
33    Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
34    already includes it. */
35 enum {
36     HIGHEST_PROTOCOL = 5,
37     DEFAULT_PROTOCOL = 4
38 };
39 
40 #ifdef MS_WINDOWS
41 // These are already typedefs from windows.h, pulled in via pycore_runtime.h.
42 #define FLOAT FLOAT_
43 #define INT INT_
44 #define LONG LONG_
45 #endif
46 
47 /* Pickle opcodes. These must be kept updated with pickle.py.
48    Extensive docs are in pickletools.py. */
49 enum opcode {
50     MARK            = '(',
51     STOP            = '.',
52     POP             = '0',
53     POP_MARK        = '1',
54     DUP             = '2',
55     FLOAT           = 'F',
56     INT             = 'I',
57     BININT          = 'J',
58     BININT1         = 'K',
59     LONG            = 'L',
60     BININT2         = 'M',
61     NONE            = 'N',
62     PERSID          = 'P',
63     BINPERSID       = 'Q',
64     REDUCE          = 'R',
65     STRING          = 'S',
66     BINSTRING       = 'T',
67     SHORT_BINSTRING = 'U',
68     UNICODE         = 'V',
69     BINUNICODE      = 'X',
70     APPEND          = 'a',
71     BUILD           = 'b',
72     GLOBAL          = 'c',
73     DICT            = 'd',
74     EMPTY_DICT      = '}',
75     APPENDS         = 'e',
76     GET             = 'g',
77     BINGET          = 'h',
78     INST            = 'i',
79     LONG_BINGET     = 'j',
80     LIST            = 'l',
81     EMPTY_LIST      = ']',
82     OBJ             = 'o',
83     PUT             = 'p',
84     BINPUT          = 'q',
85     LONG_BINPUT     = 'r',
86     SETITEM         = 's',
87     TUPLE           = 't',
88     EMPTY_TUPLE     = ')',
89     SETITEMS        = 'u',
90     BINFLOAT        = 'G',
91 
92     /* Protocol 2. */
93     PROTO       = '\x80',
94     NEWOBJ      = '\x81',
95     EXT1        = '\x82',
96     EXT2        = '\x83',
97     EXT4        = '\x84',
98     TUPLE1      = '\x85',
99     TUPLE2      = '\x86',
100     TUPLE3      = '\x87',
101     NEWTRUE     = '\x88',
102     NEWFALSE    = '\x89',
103     LONG1       = '\x8a',
104     LONG4       = '\x8b',
105 
106     /* Protocol 3 (Python 3.x) */
107     BINBYTES       = 'B',
108     SHORT_BINBYTES = 'C',
109 
110     /* Protocol 4 */
111     SHORT_BINUNICODE = '\x8c',
112     BINUNICODE8      = '\x8d',
113     BINBYTES8        = '\x8e',
114     EMPTY_SET        = '\x8f',
115     ADDITEMS         = '\x90',
116     FROZENSET        = '\x91',
117     NEWOBJ_EX        = '\x92',
118     STACK_GLOBAL     = '\x93',
119     MEMOIZE          = '\x94',
120     FRAME            = '\x95',
121 
122     /* Protocol 5 */
123     BYTEARRAY8       = '\x96',
124     NEXT_BUFFER      = '\x97',
125     READONLY_BUFFER  = '\x98'
126 };
127 
128 enum {
129    /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
130       batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
131       break if this gets out of synch with pickle.py, but it's unclear that would
132       help anything either. */
133     BATCHSIZE = 1000,
134 
135     /* Nesting limit until Pickler, when running in "fast mode", starts
136        checking for self-referential data-structures. */
137     FAST_NESTING_LIMIT = 50,
138 
139     /* Initial size of the write buffer of Pickler. */
140     WRITE_BUF_SIZE = 4096,
141 
142     /* Prefetch size when unpickling (disabled on unpeekable streams) */
143     PREFETCH = 8192 * 16,
144 
145     FRAME_SIZE_MIN = 4,
146     FRAME_SIZE_TARGET = 64 * 1024,
147     FRAME_HEADER_SIZE = 9
148 };
149 
150 /*************************************************************************/
151 
152 /* State of the pickle module, per PEP 3121. */
153 typedef struct {
154     /* Exception classes for pickle. */
155     PyObject *PickleError;
156     PyObject *PicklingError;
157     PyObject *UnpicklingError;
158 
159     /* copyreg.dispatch_table, {type_object: pickling_function} */
160     PyObject *dispatch_table;
161 
162     /* For the extension opcodes EXT1, EXT2 and EXT4. */
163 
164     /* copyreg._extension_registry, {(module_name, function_name): code} */
165     PyObject *extension_registry;
166     /* copyreg._extension_cache, {code: object} */
167     PyObject *extension_cache;
168     /* copyreg._inverted_registry, {code: (module_name, function_name)} */
169     PyObject *inverted_registry;
170 
171     /* Import mappings for compatibility with Python 2.x */
172 
173     /* _compat_pickle.NAME_MAPPING,
174        {(oldmodule, oldname): (newmodule, newname)} */
175     PyObject *name_mapping_2to3;
176     /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
177     PyObject *import_mapping_2to3;
178     /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
179     PyObject *name_mapping_3to2;
180     PyObject *import_mapping_3to2;
181 
182     /* codecs.encode, used for saving bytes in older protocols */
183     PyObject *codecs_encode;
184     /* builtins.getattr, used for saving nested names with protocol < 4 */
185     PyObject *getattr;
186     /* functools.partial, used for implementing __newobj_ex__ with protocols
187        2 and 3 */
188     PyObject *partial;
189 } PickleState;
190 
191 /* Forward declaration of the _pickle module definition. */
192 static struct PyModuleDef _picklemodule;
193 
194 /* Given a module object, get its per-module state. */
195 static PickleState *
_Pickle_GetState(PyObject * module)196 _Pickle_GetState(PyObject *module)
197 {
198     return (PickleState *)_PyModule_GetState(module);
199 }
200 
201 /* Find the module instance imported in the currently running sub-interpreter
202    and get its state. */
203 static PickleState *
_Pickle_GetGlobalState(void)204 _Pickle_GetGlobalState(void)
205 {
206     return _Pickle_GetState(PyState_FindModule(&_picklemodule));
207 }
208 
209 /* Clear the given pickle module state. */
210 static void
_Pickle_ClearState(PickleState * st)211 _Pickle_ClearState(PickleState *st)
212 {
213     Py_CLEAR(st->PickleError);
214     Py_CLEAR(st->PicklingError);
215     Py_CLEAR(st->UnpicklingError);
216     Py_CLEAR(st->dispatch_table);
217     Py_CLEAR(st->extension_registry);
218     Py_CLEAR(st->extension_cache);
219     Py_CLEAR(st->inverted_registry);
220     Py_CLEAR(st->name_mapping_2to3);
221     Py_CLEAR(st->import_mapping_2to3);
222     Py_CLEAR(st->name_mapping_3to2);
223     Py_CLEAR(st->import_mapping_3to2);
224     Py_CLEAR(st->codecs_encode);
225     Py_CLEAR(st->getattr);
226     Py_CLEAR(st->partial);
227 }
228 
229 /* Initialize the given pickle module state. */
230 static int
_Pickle_InitState(PickleState * st)231 _Pickle_InitState(PickleState *st)
232 {
233     PyObject *copyreg = NULL;
234     PyObject *compat_pickle = NULL;
235     PyObject *codecs = NULL;
236     PyObject *functools = NULL;
237 
238     st->getattr = _PyEval_GetBuiltin(&_Py_ID(getattr));
239     if (st->getattr == NULL)
240         goto error;
241 
242     copyreg = PyImport_ImportModule("copyreg");
243     if (!copyreg)
244         goto error;
245     st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
246     if (!st->dispatch_table)
247         goto error;
248     if (!PyDict_CheckExact(st->dispatch_table)) {
249         PyErr_Format(PyExc_RuntimeError,
250                      "copyreg.dispatch_table should be a dict, not %.200s",
251                      Py_TYPE(st->dispatch_table)->tp_name);
252         goto error;
253     }
254     st->extension_registry = \
255         PyObject_GetAttrString(copyreg, "_extension_registry");
256     if (!st->extension_registry)
257         goto error;
258     if (!PyDict_CheckExact(st->extension_registry)) {
259         PyErr_Format(PyExc_RuntimeError,
260                      "copyreg._extension_registry should be a dict, "
261                      "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
262         goto error;
263     }
264     st->inverted_registry = \
265         PyObject_GetAttrString(copyreg, "_inverted_registry");
266     if (!st->inverted_registry)
267         goto error;
268     if (!PyDict_CheckExact(st->inverted_registry)) {
269         PyErr_Format(PyExc_RuntimeError,
270                      "copyreg._inverted_registry should be a dict, "
271                      "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
272         goto error;
273     }
274     st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
275     if (!st->extension_cache)
276         goto error;
277     if (!PyDict_CheckExact(st->extension_cache)) {
278         PyErr_Format(PyExc_RuntimeError,
279                      "copyreg._extension_cache should be a dict, "
280                      "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
281         goto error;
282     }
283     Py_CLEAR(copyreg);
284 
285     /* Load the 2.x -> 3.x stdlib module mapping tables */
286     compat_pickle = PyImport_ImportModule("_compat_pickle");
287     if (!compat_pickle)
288         goto error;
289     st->name_mapping_2to3 = \
290         PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
291     if (!st->name_mapping_2to3)
292         goto error;
293     if (!PyDict_CheckExact(st->name_mapping_2to3)) {
294         PyErr_Format(PyExc_RuntimeError,
295                      "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
296                      Py_TYPE(st->name_mapping_2to3)->tp_name);
297         goto error;
298     }
299     st->import_mapping_2to3 = \
300         PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
301     if (!st->import_mapping_2to3)
302         goto error;
303     if (!PyDict_CheckExact(st->import_mapping_2to3)) {
304         PyErr_Format(PyExc_RuntimeError,
305                      "_compat_pickle.IMPORT_MAPPING should be a dict, "
306                      "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
307         goto error;
308     }
309     /* ... and the 3.x -> 2.x mapping tables */
310     st->name_mapping_3to2 = \
311         PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
312     if (!st->name_mapping_3to2)
313         goto error;
314     if (!PyDict_CheckExact(st->name_mapping_3to2)) {
315         PyErr_Format(PyExc_RuntimeError,
316                      "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
317                      "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
318         goto error;
319     }
320     st->import_mapping_3to2 = \
321         PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
322     if (!st->import_mapping_3to2)
323         goto error;
324     if (!PyDict_CheckExact(st->import_mapping_3to2)) {
325         PyErr_Format(PyExc_RuntimeError,
326                      "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
327                      "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
328         goto error;
329     }
330     Py_CLEAR(compat_pickle);
331 
332     codecs = PyImport_ImportModule("codecs");
333     if (codecs == NULL)
334         goto error;
335     st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
336     if (st->codecs_encode == NULL) {
337         goto error;
338     }
339     if (!PyCallable_Check(st->codecs_encode)) {
340         PyErr_Format(PyExc_RuntimeError,
341                      "codecs.encode should be a callable, not %.200s",
342                      Py_TYPE(st->codecs_encode)->tp_name);
343         goto error;
344     }
345     Py_CLEAR(codecs);
346 
347     functools = PyImport_ImportModule("functools");
348     if (!functools)
349         goto error;
350     st->partial = PyObject_GetAttrString(functools, "partial");
351     if (!st->partial)
352         goto error;
353     Py_CLEAR(functools);
354 
355     return 0;
356 
357   error:
358     Py_CLEAR(copyreg);
359     Py_CLEAR(compat_pickle);
360     Py_CLEAR(codecs);
361     Py_CLEAR(functools);
362     _Pickle_ClearState(st);
363     return -1;
364 }
365 
366 /* Helper for calling a function with a single argument quickly.
367 
368    This function steals the reference of the given argument. */
369 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)370 _Pickle_FastCall(PyObject *func, PyObject *obj)
371 {
372     PyObject *result;
373 
374     result = PyObject_CallOneArg(func, obj);
375     Py_DECREF(obj);
376     return result;
377 }
378 
379 /*************************************************************************/
380 
381 /* Retrieve and deconstruct a method for avoiding a reference cycle
382    (pickler -> bound method of pickler -> pickler) */
383 static int
init_method_ref(PyObject * self,PyObject * name,PyObject ** method_func,PyObject ** method_self)384 init_method_ref(PyObject *self, PyObject *name,
385                 PyObject **method_func, PyObject **method_self)
386 {
387     PyObject *func, *func2;
388     int ret;
389 
390     /* *method_func and *method_self should be consistent.  All refcount decrements
391        should be occurred after setting *method_self and *method_func. */
392     ret = _PyObject_LookupAttr(self, name, &func);
393     if (func == NULL) {
394         *method_self = NULL;
395         Py_CLEAR(*method_func);
396         return ret;
397     }
398 
399     if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
400         /* Deconstruct a bound Python method */
401         func2 = PyMethod_GET_FUNCTION(func);
402         Py_INCREF(func2);
403         *method_self = self; /* borrowed */
404         Py_XSETREF(*method_func, func2);
405         Py_DECREF(func);
406         return 0;
407     }
408     else {
409         *method_self = NULL;
410         Py_XSETREF(*method_func, func);
411         return 0;
412     }
413 }
414 
415 /* Bind a method if it was deconstructed */
416 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)417 reconstruct_method(PyObject *func, PyObject *self)
418 {
419     if (self) {
420         return PyMethod_New(func, self);
421     }
422     else {
423         Py_INCREF(func);
424         return func;
425     }
426 }
427 
428 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)429 call_method(PyObject *func, PyObject *self, PyObject *obj)
430 {
431     if (self) {
432         return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
433     }
434     else {
435         return PyObject_CallOneArg(func, obj);
436     }
437 }
438 
439 /*************************************************************************/
440 
441 /* Internal data type used as the unpickling stack. */
442 typedef struct {
443     PyObject_VAR_HEAD
444     PyObject **data;
445     int mark_set;          /* is MARK set? */
446     Py_ssize_t fence;      /* position of top MARK or 0 */
447     Py_ssize_t allocated;  /* number of slots in data allocated */
448 } Pdata;
449 
450 static void
Pdata_dealloc(Pdata * self)451 Pdata_dealloc(Pdata *self)
452 {
453     Py_ssize_t i = Py_SIZE(self);
454     while (--i >= 0) {
455         Py_DECREF(self->data[i]);
456     }
457     PyMem_Free(self->data);
458     PyObject_Free(self);
459 }
460 
461 static PyTypeObject Pdata_Type = {
462     PyVarObject_HEAD_INIT(NULL, 0)
463     "_pickle.Pdata",              /*tp_name*/
464     sizeof(Pdata),                /*tp_basicsize*/
465     sizeof(PyObject *),           /*tp_itemsize*/
466     (destructor)Pdata_dealloc,    /*tp_dealloc*/
467 };
468 
469 static PyObject *
Pdata_New(void)470 Pdata_New(void)
471 {
472     Pdata *self;
473 
474     if (!(self = PyObject_New(Pdata, &Pdata_Type)))
475         return NULL;
476     Py_SET_SIZE(self, 0);
477     self->mark_set = 0;
478     self->fence = 0;
479     self->allocated = 8;
480     self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
481     if (self->data)
482         return (PyObject *)self;
483     Py_DECREF(self);
484     return PyErr_NoMemory();
485 }
486 
487 
488 /* Retain only the initial clearto items.  If clearto >= the current
489  * number of items, this is a (non-erroneous) NOP.
490  */
491 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)492 Pdata_clear(Pdata *self, Py_ssize_t clearto)
493 {
494     Py_ssize_t i = Py_SIZE(self);
495 
496     assert(clearto >= self->fence);
497     if (clearto >= i)
498         return 0;
499 
500     while (--i >= clearto) {
501         Py_CLEAR(self->data[i]);
502     }
503     Py_SET_SIZE(self, clearto);
504     return 0;
505 }
506 
507 static int
Pdata_grow(Pdata * self)508 Pdata_grow(Pdata *self)
509 {
510     PyObject **data = self->data;
511     size_t allocated = (size_t)self->allocated;
512     size_t new_allocated;
513 
514     new_allocated = (allocated >> 3) + 6;
515     /* check for integer overflow */
516     if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
517         goto nomemory;
518     new_allocated += allocated;
519     PyMem_RESIZE(data, PyObject *, new_allocated);
520     if (data == NULL)
521         goto nomemory;
522 
523     self->data = data;
524     self->allocated = (Py_ssize_t)new_allocated;
525     return 0;
526 
527   nomemory:
528     PyErr_NoMemory();
529     return -1;
530 }
531 
532 static int
Pdata_stack_underflow(Pdata * self)533 Pdata_stack_underflow(Pdata *self)
534 {
535     PickleState *st = _Pickle_GetGlobalState();
536     PyErr_SetString(st->UnpicklingError,
537                     self->mark_set ?
538                     "unexpected MARK found" :
539                     "unpickling stack underflow");
540     return -1;
541 }
542 
543 /* D is a Pdata*.  Pop the topmost element and store it into V, which
544  * must be an lvalue holding PyObject*.  On stack underflow, UnpicklingError
545  * is raised and V is set to NULL.
546  */
547 static PyObject *
Pdata_pop(Pdata * self)548 Pdata_pop(Pdata *self)
549 {
550     if (Py_SIZE(self) <= self->fence) {
551         Pdata_stack_underflow(self);
552         return NULL;
553     }
554     Py_SET_SIZE(self, Py_SIZE(self) - 1);
555     return self->data[Py_SIZE(self)];
556 }
557 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
558 
559 static int
Pdata_push(Pdata * self,PyObject * obj)560 Pdata_push(Pdata *self, PyObject *obj)
561 {
562     if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
563         return -1;
564     }
565     self->data[Py_SIZE(self)] = obj;
566     Py_SET_SIZE(self, Py_SIZE(self) + 1);
567     return 0;
568 }
569 
570 /* Push an object on stack, transferring its ownership to the stack. */
571 #define PDATA_PUSH(D, O, ER) do {                               \
572         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
573 
574 /* Push an object on stack, adding a new reference to the object. */
575 #define PDATA_APPEND(D, O, ER) do {                             \
576         Py_INCREF((O));                                         \
577         if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
578 
579 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)580 Pdata_poptuple(Pdata *self, Py_ssize_t start)
581 {
582     PyObject *tuple;
583     Py_ssize_t len, i, j;
584 
585     if (start < self->fence) {
586         Pdata_stack_underflow(self);
587         return NULL;
588     }
589     len = Py_SIZE(self) - start;
590     tuple = PyTuple_New(len);
591     if (tuple == NULL)
592         return NULL;
593     for (i = start, j = 0; j < len; i++, j++)
594         PyTuple_SET_ITEM(tuple, j, self->data[i]);
595 
596     Py_SET_SIZE(self, start);
597     return tuple;
598 }
599 
600 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)601 Pdata_poplist(Pdata *self, Py_ssize_t start)
602 {
603     PyObject *list;
604     Py_ssize_t len, i, j;
605 
606     len = Py_SIZE(self) - start;
607     list = PyList_New(len);
608     if (list == NULL)
609         return NULL;
610     for (i = start, j = 0; j < len; i++, j++)
611         PyList_SET_ITEM(list, j, self->data[i]);
612 
613     Py_SET_SIZE(self, start);
614     return list;
615 }
616 
617 typedef struct {
618     PyObject *me_key;
619     Py_ssize_t me_value;
620 } PyMemoEntry;
621 
622 typedef struct {
623     size_t mt_mask;
624     size_t mt_used;
625     size_t mt_allocated;
626     PyMemoEntry *mt_table;
627 } PyMemoTable;
628 
629 typedef struct PicklerObject {
630     PyObject_HEAD
631     PyMemoTable *memo;          /* Memo table, keep track of the seen
632                                    objects to support self-referential objects
633                                    pickling. */
634     PyObject *pers_func;        /* persistent_id() method, can be NULL */
635     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
636                                    is an unbound method, NULL otherwise */
637     PyObject *dispatch_table;   /* private dispatch_table, can be NULL */
638     PyObject *reducer_override; /* hook for invoking user-defined callbacks
639                                    instead of save_global when pickling
640                                    functions and classes*/
641 
642     PyObject *write;            /* write() method of the output stream. */
643     PyObject *output_buffer;    /* Write into a local bytearray buffer before
644                                    flushing to the stream. */
645     Py_ssize_t output_len;      /* Length of output_buffer. */
646     Py_ssize_t max_output_len;  /* Allocation size of output_buffer. */
647     int proto;                  /* Pickle protocol number, >= 0 */
648     int bin;                    /* Boolean, true if proto > 0 */
649     int framing;                /* True when framing is enabled, proto >= 4 */
650     Py_ssize_t frame_start;     /* Position in output_buffer where the
651                                    current frame begins. -1 if there
652                                    is no frame currently open. */
653 
654     Py_ssize_t buf_size;        /* Size of the current buffered pickle data */
655     int fast;                   /* Enable fast mode if set to a true value.
656                                    The fast mode disable the usage of memo,
657                                    therefore speeding the pickling process by
658                                    not generating superfluous PUT opcodes. It
659                                    should not be used if with self-referential
660                                    objects. */
661     int fast_nesting;
662     int fix_imports;            /* Indicate whether Pickler should fix
663                                    the name of globals for Python 2.x. */
664     PyObject *fast_memo;
665     PyObject *buffer_callback;  /* Callback for out-of-band buffers, or NULL */
666 } PicklerObject;
667 
668 typedef struct UnpicklerObject {
669     PyObject_HEAD
670     Pdata *stack;               /* Pickle data stack, store unpickled objects. */
671 
672     /* The unpickler memo is just an array of PyObject *s. Using a dict
673        is unnecessary, since the keys are contiguous ints. */
674     PyObject **memo;
675     size_t memo_size;       /* Capacity of the memo array */
676     size_t memo_len;        /* Number of objects in the memo */
677 
678     PyObject *pers_func;        /* persistent_load() method, can be NULL. */
679     PyObject *pers_func_self;   /* borrowed reference to self if pers_func
680                                    is an unbound method, NULL otherwise */
681 
682     Py_buffer buffer;
683     char *input_buffer;
684     char *input_line;
685     Py_ssize_t input_len;
686     Py_ssize_t next_read_idx;
687     Py_ssize_t prefetched_idx;  /* index of first prefetched byte */
688 
689     PyObject *read;             /* read() method of the input stream. */
690     PyObject *readinto;         /* readinto() method of the input stream. */
691     PyObject *readline;         /* readline() method of the input stream. */
692     PyObject *peek;             /* peek() method of the input stream, or NULL */
693     PyObject *buffers;          /* iterable of out-of-band buffers, or NULL */
694 
695     char *encoding;             /* Name of the encoding to be used for
696                                    decoding strings pickled using Python
697                                    2.x. The default value is "ASCII" */
698     char *errors;               /* Name of errors handling scheme to used when
699                                    decoding strings. The default value is
700                                    "strict". */
701     Py_ssize_t *marks;          /* Mark stack, used for unpickling container
702                                    objects. */
703     Py_ssize_t num_marks;       /* Number of marks in the mark stack. */
704     Py_ssize_t marks_size;      /* Current allocated size of the mark stack. */
705     int proto;                  /* Protocol of the pickle loaded. */
706     int fix_imports;            /* Indicate whether Unpickler should fix
707                                    the name of globals pickled by Python 2.x. */
708 } UnpicklerObject;
709 
710 typedef struct {
711     PyObject_HEAD
712     PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
713 }  PicklerMemoProxyObject;
714 
715 typedef struct {
716     PyObject_HEAD
717     UnpicklerObject *unpickler;
718 } UnpicklerMemoProxyObject;
719 
720 /* Forward declarations */
721 static int save(PicklerObject *, PyObject *, int);
722 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
723 static PyTypeObject Pickler_Type;
724 static PyTypeObject Unpickler_Type;
725 
726 #include "clinic/_pickle.c.h"
727 
728 /*************************************************************************
729  A custom hashtable mapping void* to Python ints. This is used by the pickler
730  for memoization. Using a custom hashtable rather than PyDict allows us to skip
731  a bunch of unnecessary object creation. This makes a huge performance
732  difference. */
733 
734 #define MT_MINSIZE 8
735 #define PERTURB_SHIFT 5
736 
737 
738 static PyMemoTable *
PyMemoTable_New(void)739 PyMemoTable_New(void)
740 {
741     PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
742     if (memo == NULL) {
743         PyErr_NoMemory();
744         return NULL;
745     }
746 
747     memo->mt_used = 0;
748     memo->mt_allocated = MT_MINSIZE;
749     memo->mt_mask = MT_MINSIZE - 1;
750     memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
751     if (memo->mt_table == NULL) {
752         PyMem_Free(memo);
753         PyErr_NoMemory();
754         return NULL;
755     }
756     memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
757 
758     return memo;
759 }
760 
761 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)762 PyMemoTable_Copy(PyMemoTable *self)
763 {
764     PyMemoTable *new = PyMemoTable_New();
765     if (new == NULL)
766         return NULL;
767 
768     new->mt_used = self->mt_used;
769     new->mt_allocated = self->mt_allocated;
770     new->mt_mask = self->mt_mask;
771     /* The table we get from _New() is probably smaller than we wanted.
772        Free it and allocate one that's the right size. */
773     PyMem_Free(new->mt_table);
774     new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
775     if (new->mt_table == NULL) {
776         PyMem_Free(new);
777         PyErr_NoMemory();
778         return NULL;
779     }
780     for (size_t i = 0; i < self->mt_allocated; i++) {
781         Py_XINCREF(self->mt_table[i].me_key);
782     }
783     memcpy(new->mt_table, self->mt_table,
784            sizeof(PyMemoEntry) * self->mt_allocated);
785 
786     return new;
787 }
788 
789 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)790 PyMemoTable_Size(PyMemoTable *self)
791 {
792     return self->mt_used;
793 }
794 
795 static int
PyMemoTable_Clear(PyMemoTable * self)796 PyMemoTable_Clear(PyMemoTable *self)
797 {
798     Py_ssize_t i = self->mt_allocated;
799 
800     while (--i >= 0) {
801         Py_XDECREF(self->mt_table[i].me_key);
802     }
803     self->mt_used = 0;
804     memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
805     return 0;
806 }
807 
808 static void
PyMemoTable_Del(PyMemoTable * self)809 PyMemoTable_Del(PyMemoTable *self)
810 {
811     if (self == NULL)
812         return;
813     PyMemoTable_Clear(self);
814 
815     PyMem_Free(self->mt_table);
816     PyMem_Free(self);
817 }
818 
819 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
820    can be considerably simpler than dictobject.c's lookdict(). */
821 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)822 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
823 {
824     size_t i;
825     size_t perturb;
826     size_t mask = self->mt_mask;
827     PyMemoEntry *table = self->mt_table;
828     PyMemoEntry *entry;
829     Py_hash_t hash = (Py_hash_t)key >> 3;
830 
831     i = hash & mask;
832     entry = &table[i];
833     if (entry->me_key == NULL || entry->me_key == key)
834         return entry;
835 
836     for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
837         i = (i << 2) + i + perturb + 1;
838         entry = &table[i & mask];
839         if (entry->me_key == NULL || entry->me_key == key)
840             return entry;
841     }
842     Py_UNREACHABLE();
843 }
844 
845 /* Returns -1 on failure, 0 on success. */
846 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)847 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
848 {
849     PyMemoEntry *oldtable = NULL;
850     PyMemoEntry *oldentry, *newentry;
851     size_t new_size = MT_MINSIZE;
852     size_t to_process;
853 
854     assert(min_size > 0);
855 
856     if (min_size > PY_SSIZE_T_MAX) {
857         PyErr_NoMemory();
858         return -1;
859     }
860 
861     /* Find the smallest valid table size >= min_size. */
862     while (new_size < min_size) {
863         new_size <<= 1;
864     }
865     /* new_size needs to be a power of two. */
866     assert((new_size & (new_size - 1)) == 0);
867 
868     /* Allocate new table. */
869     oldtable = self->mt_table;
870     self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
871     if (self->mt_table == NULL) {
872         self->mt_table = oldtable;
873         PyErr_NoMemory();
874         return -1;
875     }
876     self->mt_allocated = new_size;
877     self->mt_mask = new_size - 1;
878     memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
879 
880     /* Copy entries from the old table. */
881     to_process = self->mt_used;
882     for (oldentry = oldtable; to_process > 0; oldentry++) {
883         if (oldentry->me_key != NULL) {
884             to_process--;
885             /* newentry is a pointer to a chunk of the new
886                mt_table, so we're setting the key:value pair
887                in-place. */
888             newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
889             newentry->me_key = oldentry->me_key;
890             newentry->me_value = oldentry->me_value;
891         }
892     }
893 
894     /* Deallocate the old table. */
895     PyMem_Free(oldtable);
896     return 0;
897 }
898 
899 /* Returns NULL on failure, a pointer to the value otherwise. */
900 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)901 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
902 {
903     PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
904     if (entry->me_key == NULL)
905         return NULL;
906     return &entry->me_value;
907 }
908 
909 /* Returns -1 on failure, 0 on success. */
910 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)911 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
912 {
913     PyMemoEntry *entry;
914 
915     assert(key != NULL);
916 
917     entry = _PyMemoTable_Lookup(self, key);
918     if (entry->me_key != NULL) {
919         entry->me_value = value;
920         return 0;
921     }
922     Py_INCREF(key);
923     entry->me_key = key;
924     entry->me_value = value;
925     self->mt_used++;
926 
927     /* If we added a key, we can safely resize. Otherwise just return!
928      * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
929      *
930      * Quadrupling the size improves average table sparseness
931      * (reducing collisions) at the cost of some memory. It also halves
932      * the number of expensive resize operations in a growing memo table.
933      *
934      * Very large memo tables (over 50K items) use doubling instead.
935      * This may help applications with severe memory constraints.
936      */
937     if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
938         return 0;
939     }
940     // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
941     size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
942     return _PyMemoTable_ResizeTable(self, desired_size);
943 }
944 
945 #undef MT_MINSIZE
946 #undef PERTURB_SHIFT
947 
948 /*************************************************************************/
949 
950 
951 static int
_Pickler_ClearBuffer(PicklerObject * self)952 _Pickler_ClearBuffer(PicklerObject *self)
953 {
954     Py_XSETREF(self->output_buffer,
955               PyBytes_FromStringAndSize(NULL, self->max_output_len));
956     if (self->output_buffer == NULL)
957         return -1;
958     self->output_len = 0;
959     self->frame_start = -1;
960     return 0;
961 }
962 
963 static void
_write_size64(char * out,size_t value)964 _write_size64(char *out, size_t value)
965 {
966     size_t i;
967 
968     static_assert(sizeof(size_t) <= 8, "size_t is larger than 64-bit");
969 
970     for (i = 0; i < sizeof(size_t); i++) {
971         out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
972     }
973     for (i = sizeof(size_t); i < 8; i++) {
974         out[i] = 0;
975     }
976 }
977 
978 static int
_Pickler_CommitFrame(PicklerObject * self)979 _Pickler_CommitFrame(PicklerObject *self)
980 {
981     size_t frame_len;
982     char *qdata;
983 
984     if (!self->framing || self->frame_start == -1)
985         return 0;
986     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
987     qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
988     if (frame_len >= FRAME_SIZE_MIN) {
989         qdata[0] = FRAME;
990         _write_size64(qdata + 1, frame_len);
991     }
992     else {
993         memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
994         self->output_len -= FRAME_HEADER_SIZE;
995     }
996     self->frame_start = -1;
997     return 0;
998 }
999 
1000 static PyObject *
_Pickler_GetString(PicklerObject * self)1001 _Pickler_GetString(PicklerObject *self)
1002 {
1003     PyObject *output_buffer = self->output_buffer;
1004 
1005     assert(self->output_buffer != NULL);
1006 
1007     if (_Pickler_CommitFrame(self))
1008         return NULL;
1009 
1010     self->output_buffer = NULL;
1011     /* Resize down to exact size */
1012     if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1013         return NULL;
1014     return output_buffer;
1015 }
1016 
1017 static int
_Pickler_FlushToFile(PicklerObject * self)1018 _Pickler_FlushToFile(PicklerObject *self)
1019 {
1020     PyObject *output, *result;
1021 
1022     assert(self->write != NULL);
1023 
1024     /* This will commit the frame first */
1025     output = _Pickler_GetString(self);
1026     if (output == NULL)
1027         return -1;
1028 
1029     result = _Pickle_FastCall(self->write, output);
1030     Py_XDECREF(result);
1031     return (result == NULL) ? -1 : 0;
1032 }
1033 
1034 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1035 _Pickler_OpcodeBoundary(PicklerObject *self)
1036 {
1037     Py_ssize_t frame_len;
1038 
1039     if (!self->framing || self->frame_start == -1) {
1040         return 0;
1041     }
1042     frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1043     if (frame_len >= FRAME_SIZE_TARGET) {
1044         if(_Pickler_CommitFrame(self)) {
1045             return -1;
1046         }
1047         /* Flush the content of the committed frame to the underlying
1048          * file and reuse the pickler buffer for the next frame so as
1049          * to limit memory usage when dumping large complex objects to
1050          * a file.
1051          *
1052          * self->write is NULL when called via dumps.
1053          */
1054         if (self->write != NULL) {
1055             if (_Pickler_FlushToFile(self) < 0) {
1056                 return -1;
1057             }
1058             if (_Pickler_ClearBuffer(self) < 0) {
1059                 return -1;
1060             }
1061         }
1062     }
1063     return 0;
1064 }
1065 
1066 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1067 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1068 {
1069     Py_ssize_t i, n, required;
1070     char *buffer;
1071     int need_new_frame;
1072 
1073     assert(s != NULL);
1074     need_new_frame = (self->framing && self->frame_start == -1);
1075 
1076     if (need_new_frame)
1077         n = data_len + FRAME_HEADER_SIZE;
1078     else
1079         n = data_len;
1080 
1081     required = self->output_len + n;
1082     if (required > self->max_output_len) {
1083         /* Make place in buffer for the pickle chunk */
1084         if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1085             PyErr_NoMemory();
1086             return -1;
1087         }
1088         self->max_output_len = (self->output_len + n) / 2 * 3;
1089         if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1090             return -1;
1091     }
1092     buffer = PyBytes_AS_STRING(self->output_buffer);
1093     if (need_new_frame) {
1094         /* Setup new frame */
1095         Py_ssize_t frame_start = self->output_len;
1096         self->frame_start = frame_start;
1097         for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1098             /* Write an invalid value, for debugging */
1099             buffer[frame_start + i] = 0xFE;
1100         }
1101         self->output_len += FRAME_HEADER_SIZE;
1102     }
1103     if (data_len < 8) {
1104         /* This is faster than memcpy when the string is short. */
1105         for (i = 0; i < data_len; i++) {
1106             buffer[self->output_len + i] = s[i];
1107         }
1108     }
1109     else {
1110         memcpy(buffer + self->output_len, s, data_len);
1111     }
1112     self->output_len += data_len;
1113     return data_len;
1114 }
1115 
1116 static PicklerObject *
_Pickler_New(void)1117 _Pickler_New(void)
1118 {
1119     PicklerObject *self;
1120 
1121     self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1122     if (self == NULL)
1123         return NULL;
1124 
1125     self->pers_func = NULL;
1126     self->dispatch_table = NULL;
1127     self->buffer_callback = NULL;
1128     self->write = NULL;
1129     self->proto = 0;
1130     self->bin = 0;
1131     self->framing = 0;
1132     self->frame_start = -1;
1133     self->fast = 0;
1134     self->fast_nesting = 0;
1135     self->fix_imports = 0;
1136     self->fast_memo = NULL;
1137     self->max_output_len = WRITE_BUF_SIZE;
1138     self->output_len = 0;
1139     self->reducer_override = NULL;
1140 
1141     self->memo = PyMemoTable_New();
1142     self->output_buffer = PyBytes_FromStringAndSize(NULL,
1143                                                     self->max_output_len);
1144 
1145     if (self->memo == NULL || self->output_buffer == NULL) {
1146         Py_DECREF(self);
1147         return NULL;
1148     }
1149 
1150     PyObject_GC_Track(self);
1151     return self;
1152 }
1153 
1154 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1155 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1156 {
1157     long proto;
1158 
1159     if (protocol == Py_None) {
1160         proto = DEFAULT_PROTOCOL;
1161     }
1162     else {
1163         proto = PyLong_AsLong(protocol);
1164         if (proto < 0) {
1165             if (proto == -1 && PyErr_Occurred())
1166                 return -1;
1167             proto = HIGHEST_PROTOCOL;
1168         }
1169         else if (proto > HIGHEST_PROTOCOL) {
1170             PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1171                          HIGHEST_PROTOCOL);
1172             return -1;
1173         }
1174     }
1175     self->proto = (int)proto;
1176     self->bin = proto > 0;
1177     self->fix_imports = fix_imports && proto < 3;
1178     return 0;
1179 }
1180 
1181 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1182    be called once on a freshly created Pickler. */
1183 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1184 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1185 {
1186     assert(file != NULL);
1187     if (_PyObject_LookupAttr(file, &_Py_ID(write), &self->write) < 0) {
1188         return -1;
1189     }
1190     if (self->write == NULL) {
1191         PyErr_SetString(PyExc_TypeError,
1192                         "file must have a 'write' attribute");
1193         return -1;
1194     }
1195 
1196     return 0;
1197 }
1198 
1199 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1200 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1201 {
1202     if (buffer_callback == Py_None) {
1203         buffer_callback = NULL;
1204     }
1205     if (buffer_callback != NULL && self->proto < 5) {
1206         PyErr_SetString(PyExc_ValueError,
1207                         "buffer_callback needs protocol >= 5");
1208         return -1;
1209     }
1210 
1211     Py_XINCREF(buffer_callback);
1212     self->buffer_callback = buffer_callback;
1213     return 0;
1214 }
1215 
1216 /* Returns the size of the input on success, -1 on failure. This takes its
1217    own reference to `input`. */
1218 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1219 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1220 {
1221     if (self->buffer.buf != NULL)
1222         PyBuffer_Release(&self->buffer);
1223     if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1224         return -1;
1225     self->input_buffer = self->buffer.buf;
1226     self->input_len = self->buffer.len;
1227     self->next_read_idx = 0;
1228     self->prefetched_idx = self->input_len;
1229     return self->input_len;
1230 }
1231 
1232 static int
bad_readline(void)1233 bad_readline(void)
1234 {
1235     PickleState *st = _Pickle_GetGlobalState();
1236     PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1237     return -1;
1238 }
1239 
1240 /* Skip any consumed data that was only prefetched using peek() */
1241 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1242 _Unpickler_SkipConsumed(UnpicklerObject *self)
1243 {
1244     Py_ssize_t consumed;
1245     PyObject *r;
1246 
1247     consumed = self->next_read_idx - self->prefetched_idx;
1248     if (consumed <= 0)
1249         return 0;
1250 
1251     assert(self->peek);  /* otherwise we did something wrong */
1252     /* This makes a useless copy... */
1253     r = PyObject_CallFunction(self->read, "n", consumed);
1254     if (r == NULL)
1255         return -1;
1256     Py_DECREF(r);
1257 
1258     self->prefetched_idx = self->next_read_idx;
1259     return 0;
1260 }
1261 
1262 static const Py_ssize_t READ_WHOLE_LINE = -1;
1263 
1264 /* If reading from a file, we need to only pull the bytes we need, since there
1265    may be multiple pickle objects arranged contiguously in the same input
1266    buffer.
1267 
1268    If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1269    bytes from the input stream/buffer.
1270 
1271    Update the unpickler's input buffer with the newly-read data. Returns -1 on
1272    failure; on success, returns the number of bytes read from the file.
1273 
1274    On success, self->input_len will be 0; this is intentional so that when
1275    unpickling from a file, the "we've run out of data" code paths will trigger,
1276    causing the Unpickler to go back to the file for more data. Use the returned
1277    size to tell you how much data you can process. */
1278 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1279 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1280 {
1281     PyObject *data;
1282     Py_ssize_t read_size;
1283 
1284     assert(self->read != NULL);
1285 
1286     if (_Unpickler_SkipConsumed(self) < 0)
1287         return -1;
1288 
1289     if (n == READ_WHOLE_LINE) {
1290         data = PyObject_CallNoArgs(self->readline);
1291     }
1292     else {
1293         PyObject *len;
1294         /* Prefetch some data without advancing the file pointer, if possible */
1295         if (self->peek && n < PREFETCH) {
1296             len = PyLong_FromSsize_t(PREFETCH);
1297             if (len == NULL)
1298                 return -1;
1299             data = _Pickle_FastCall(self->peek, len);
1300             if (data == NULL) {
1301                 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1302                     return -1;
1303                 /* peek() is probably not supported by the given file object */
1304                 PyErr_Clear();
1305                 Py_CLEAR(self->peek);
1306             }
1307             else {
1308                 read_size = _Unpickler_SetStringInput(self, data);
1309                 Py_DECREF(data);
1310                 self->prefetched_idx = 0;
1311                 if (n <= read_size)
1312                     return n;
1313             }
1314         }
1315         len = PyLong_FromSsize_t(n);
1316         if (len == NULL)
1317             return -1;
1318         data = _Pickle_FastCall(self->read, len);
1319     }
1320     if (data == NULL)
1321         return -1;
1322 
1323     read_size = _Unpickler_SetStringInput(self, data);
1324     Py_DECREF(data);
1325     return read_size;
1326 }
1327 
1328 /* Don't call it directly: use _Unpickler_Read() */
1329 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1330 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1331 {
1332     Py_ssize_t num_read;
1333 
1334     *s = NULL;
1335     if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1336         PickleState *st = _Pickle_GetGlobalState();
1337         PyErr_SetString(st->UnpicklingError,
1338                         "read would overflow (invalid bytecode)");
1339         return -1;
1340     }
1341 
1342     /* This case is handled by the _Unpickler_Read() macro for efficiency */
1343     assert(self->next_read_idx + n > self->input_len);
1344 
1345     if (!self->read)
1346         return bad_readline();
1347 
1348     /* Extend the buffer to satisfy desired size */
1349     num_read = _Unpickler_ReadFromFile(self, n);
1350     if (num_read < 0)
1351         return -1;
1352     if (num_read < n)
1353         return bad_readline();
1354     *s = self->input_buffer;
1355     self->next_read_idx = n;
1356     return n;
1357 }
1358 
1359 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1360  *
1361  * This should only be used for non-small data reads where potentially
1362  * avoiding a copy is beneficial.  This method does not try to prefetch
1363  * more data into the input buffer.
1364  *
1365  * _Unpickler_Read() is recommended in most cases.
1366  */
1367 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1368 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1369 {
1370     assert(n != READ_WHOLE_LINE);
1371 
1372     /* Read from available buffer data, if any */
1373     Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1374     if (in_buffer > 0) {
1375         Py_ssize_t to_read = Py_MIN(in_buffer, n);
1376         memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1377         self->next_read_idx += to_read;
1378         buf += to_read;
1379         n -= to_read;
1380         if (n == 0) {
1381             /* Entire read was satisfied from buffer */
1382             return n;
1383         }
1384     }
1385 
1386     /* Read from file */
1387     if (!self->read) {
1388         /* We're unpickling memory, this means the input is truncated */
1389         return bad_readline();
1390     }
1391     if (_Unpickler_SkipConsumed(self) < 0) {
1392         return -1;
1393     }
1394 
1395     if (!self->readinto) {
1396         /* readinto() not supported on file-like object, fall back to read()
1397          * and copy into destination buffer (bpo-39681) */
1398         PyObject* len = PyLong_FromSsize_t(n);
1399         if (len == NULL) {
1400             return -1;
1401         }
1402         PyObject* data = _Pickle_FastCall(self->read, len);
1403         if (data == NULL) {
1404             return -1;
1405         }
1406         if (!PyBytes_Check(data)) {
1407             PyErr_Format(PyExc_ValueError,
1408                          "read() returned non-bytes object (%R)",
1409                          Py_TYPE(data));
1410             Py_DECREF(data);
1411             return -1;
1412         }
1413         Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1414         if (read_size < n) {
1415             Py_DECREF(data);
1416             return bad_readline();
1417         }
1418         memcpy(buf, PyBytes_AS_STRING(data), n);
1419         Py_DECREF(data);
1420         return n;
1421     }
1422 
1423     /* Call readinto() into user buffer */
1424     PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1425     if (buf_obj == NULL) {
1426         return -1;
1427     }
1428     PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1429     if (read_size_obj == NULL) {
1430         return -1;
1431     }
1432     Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1433     Py_DECREF(read_size_obj);
1434 
1435     if (read_size < 0) {
1436         if (!PyErr_Occurred()) {
1437             PyErr_SetString(PyExc_ValueError,
1438                             "readinto() returned negative size");
1439         }
1440         return -1;
1441     }
1442     if (read_size < n) {
1443         return bad_readline();
1444     }
1445     return n;
1446 }
1447 
1448 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1449 
1450    This should be used for all data reads, rather than accessing the unpickler's
1451    input buffer directly. This method deals correctly with reading from input
1452    streams, which the input buffer doesn't deal with.
1453 
1454    Note that when reading from a file-like object, self->next_read_idx won't
1455    be updated (it should remain at 0 for the entire unpickling process). You
1456    should use this function's return value to know how many bytes you can
1457    consume.
1458 
1459    Returns -1 (with an exception set) on failure. On success, return the
1460    number of chars read. */
1461 #define _Unpickler_Read(self, s, n) \
1462     (((n) <= (self)->input_len - (self)->next_read_idx)      \
1463      ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1464         (self)->next_read_idx += (n),                        \
1465         (n))                                                 \
1466      : _Unpickler_ReadImpl(self, (s), (n)))
1467 
1468 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1469 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1470                     char **result)
1471 {
1472     char *input_line = PyMem_Realloc(self->input_line, len + 1);
1473     if (input_line == NULL) {
1474         PyErr_NoMemory();
1475         return -1;
1476     }
1477 
1478     memcpy(input_line, line, len);
1479     input_line[len] = '\0';
1480     self->input_line = input_line;
1481     *result = self->input_line;
1482     return len;
1483 }
1484 
1485 /* Read a line from the input stream/buffer. If we run off the end of the input
1486    before hitting \n, raise an error.
1487 
1488    Returns the number of chars read, or -1 on failure. */
1489 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1490 _Unpickler_Readline(UnpicklerObject *self, char **result)
1491 {
1492     Py_ssize_t i, num_read;
1493 
1494     for (i = self->next_read_idx; i < self->input_len; i++) {
1495         if (self->input_buffer[i] == '\n') {
1496             char *line_start = self->input_buffer + self->next_read_idx;
1497             num_read = i - self->next_read_idx + 1;
1498             self->next_read_idx = i + 1;
1499             return _Unpickler_CopyLine(self, line_start, num_read, result);
1500         }
1501     }
1502     if (!self->read)
1503         return bad_readline();
1504 
1505     num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1506     if (num_read < 0)
1507         return -1;
1508     if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1509         return bad_readline();
1510     self->next_read_idx = num_read;
1511     return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1512 }
1513 
1514 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1515    will be modified in place. */
1516 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1517 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1518 {
1519     size_t i;
1520 
1521     assert(new_size > self->memo_size);
1522 
1523     PyObject **memo_new = self->memo;
1524     PyMem_RESIZE(memo_new, PyObject *, new_size);
1525     if (memo_new == NULL) {
1526         PyErr_NoMemory();
1527         return -1;
1528     }
1529     self->memo = memo_new;
1530     for (i = self->memo_size; i < new_size; i++)
1531         self->memo[i] = NULL;
1532     self->memo_size = new_size;
1533     return 0;
1534 }
1535 
1536 /* Returns NULL if idx is out of bounds. */
1537 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1538 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1539 {
1540     if (idx >= self->memo_size)
1541         return NULL;
1542 
1543     return self->memo[idx];
1544 }
1545 
1546 /* Returns -1 (with an exception set) on failure, 0 on success.
1547    This takes its own reference to `value`. */
1548 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1549 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1550 {
1551     PyObject *old_item;
1552 
1553     if (idx >= self->memo_size) {
1554         if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1555             return -1;
1556         assert(idx < self->memo_size);
1557     }
1558     Py_INCREF(value);
1559     old_item = self->memo[idx];
1560     self->memo[idx] = value;
1561     if (old_item != NULL) {
1562         Py_DECREF(old_item);
1563     }
1564     else {
1565         self->memo_len++;
1566     }
1567     return 0;
1568 }
1569 
1570 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1571 _Unpickler_NewMemo(Py_ssize_t new_size)
1572 {
1573     PyObject **memo = PyMem_NEW(PyObject *, new_size);
1574     if (memo == NULL) {
1575         PyErr_NoMemory();
1576         return NULL;
1577     }
1578     memset(memo, 0, new_size * sizeof(PyObject *));
1579     return memo;
1580 }
1581 
1582 /* Free the unpickler's memo, taking care to decref any items left in it. */
1583 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1584 _Unpickler_MemoCleanup(UnpicklerObject *self)
1585 {
1586     Py_ssize_t i;
1587     PyObject **memo = self->memo;
1588 
1589     if (self->memo == NULL)
1590         return;
1591     self->memo = NULL;
1592     i = self->memo_size;
1593     while (--i >= 0) {
1594         Py_XDECREF(memo[i]);
1595     }
1596     PyMem_Free(memo);
1597 }
1598 
1599 static UnpicklerObject *
_Unpickler_New(void)1600 _Unpickler_New(void)
1601 {
1602     UnpicklerObject *self;
1603 
1604     self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1605     if (self == NULL)
1606         return NULL;
1607 
1608     self->pers_func = NULL;
1609     self->input_buffer = NULL;
1610     self->input_line = NULL;
1611     self->input_len = 0;
1612     self->next_read_idx = 0;
1613     self->prefetched_idx = 0;
1614     self->read = NULL;
1615     self->readinto = NULL;
1616     self->readline = NULL;
1617     self->peek = NULL;
1618     self->buffers = NULL;
1619     self->encoding = NULL;
1620     self->errors = NULL;
1621     self->marks = NULL;
1622     self->num_marks = 0;
1623     self->marks_size = 0;
1624     self->proto = 0;
1625     self->fix_imports = 0;
1626     memset(&self->buffer, 0, sizeof(Py_buffer));
1627     self->memo_size = 32;
1628     self->memo_len = 0;
1629     self->memo = _Unpickler_NewMemo(self->memo_size);
1630     self->stack = (Pdata *)Pdata_New();
1631 
1632     if (self->memo == NULL || self->stack == NULL) {
1633         Py_DECREF(self);
1634         return NULL;
1635     }
1636 
1637     PyObject_GC_Track(self);
1638     return self;
1639 }
1640 
1641 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1642    be called once on a freshly created Unpickler. */
1643 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1644 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1645 {
1646     /* Optional file methods */
1647     if (_PyObject_LookupAttr(file, &_Py_ID(peek), &self->peek) < 0) {
1648         return -1;
1649     }
1650     if (_PyObject_LookupAttr(file, &_Py_ID(readinto), &self->readinto) < 0) {
1651         return -1;
1652     }
1653     (void)_PyObject_LookupAttr(file, &_Py_ID(read), &self->read);
1654     (void)_PyObject_LookupAttr(file, &_Py_ID(readline), &self->readline);
1655     if (!self->readline || !self->read) {
1656         if (!PyErr_Occurred()) {
1657             PyErr_SetString(PyExc_TypeError,
1658                             "file must have 'read' and 'readline' attributes");
1659         }
1660         Py_CLEAR(self->read);
1661         Py_CLEAR(self->readinto);
1662         Py_CLEAR(self->readline);
1663         Py_CLEAR(self->peek);
1664         return -1;
1665     }
1666     return 0;
1667 }
1668 
1669 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1670    be called once on a freshly created Unpickler. */
1671 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1672 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1673                             const char *encoding,
1674                             const char *errors)
1675 {
1676     if (encoding == NULL)
1677         encoding = "ASCII";
1678     if (errors == NULL)
1679         errors = "strict";
1680 
1681     self->encoding = _PyMem_Strdup(encoding);
1682     self->errors = _PyMem_Strdup(errors);
1683     if (self->encoding == NULL || self->errors == NULL) {
1684         PyErr_NoMemory();
1685         return -1;
1686     }
1687     return 0;
1688 }
1689 
1690 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1691    be called once on a freshly created Unpickler. */
1692 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1693 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1694 {
1695     if (buffers == NULL || buffers == Py_None) {
1696         self->buffers = NULL;
1697     }
1698     else {
1699         self->buffers = PyObject_GetIter(buffers);
1700         if (self->buffers == NULL) {
1701             return -1;
1702         }
1703     }
1704     return 0;
1705 }
1706 
1707 /* Generate a GET opcode for an object stored in the memo. */
1708 static int
memo_get(PicklerObject * self,PyObject * key)1709 memo_get(PicklerObject *self, PyObject *key)
1710 {
1711     Py_ssize_t *value;
1712     char pdata[30];
1713     Py_ssize_t len;
1714 
1715     value = PyMemoTable_Get(self->memo, key);
1716     if (value == NULL)  {
1717         PyErr_SetObject(PyExc_KeyError, key);
1718         return -1;
1719     }
1720 
1721     if (!self->bin) {
1722         pdata[0] = GET;
1723         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1724                       "%zd\n", *value);
1725         len = strlen(pdata);
1726     }
1727     else {
1728         if (*value < 256) {
1729             pdata[0] = BINGET;
1730             pdata[1] = (unsigned char)(*value & 0xff);
1731             len = 2;
1732         }
1733         else if ((size_t)*value <= 0xffffffffUL) {
1734             pdata[0] = LONG_BINGET;
1735             pdata[1] = (unsigned char)(*value & 0xff);
1736             pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1737             pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1738             pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1739             len = 5;
1740         }
1741         else { /* unlikely */
1742             PickleState *st = _Pickle_GetGlobalState();
1743             PyErr_SetString(st->PicklingError,
1744                             "memo id too large for LONG_BINGET");
1745             return -1;
1746         }
1747     }
1748 
1749     if (_Pickler_Write(self, pdata, len) < 0)
1750         return -1;
1751 
1752     return 0;
1753 }
1754 
1755 /* Store an object in the memo, assign it a new unique ID based on the number
1756    of objects currently stored in the memo and generate a PUT opcode. */
1757 static int
memo_put(PicklerObject * self,PyObject * obj)1758 memo_put(PicklerObject *self, PyObject *obj)
1759 {
1760     char pdata[30];
1761     Py_ssize_t len;
1762     Py_ssize_t idx;
1763 
1764     const char memoize_op = MEMOIZE;
1765 
1766     if (self->fast)
1767         return 0;
1768 
1769     idx = PyMemoTable_Size(self->memo);
1770     if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1771         return -1;
1772 
1773     if (self->proto >= 4) {
1774         if (_Pickler_Write(self, &memoize_op, 1) < 0)
1775             return -1;
1776         return 0;
1777     }
1778     else if (!self->bin) {
1779         pdata[0] = PUT;
1780         PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1781                       "%zd\n", idx);
1782         len = strlen(pdata);
1783     }
1784     else {
1785         if (idx < 256) {
1786             pdata[0] = BINPUT;
1787             pdata[1] = (unsigned char)idx;
1788             len = 2;
1789         }
1790         else if ((size_t)idx <= 0xffffffffUL) {
1791             pdata[0] = LONG_BINPUT;
1792             pdata[1] = (unsigned char)(idx & 0xff);
1793             pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1794             pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1795             pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1796             len = 5;
1797         }
1798         else { /* unlikely */
1799             PickleState *st = _Pickle_GetGlobalState();
1800             PyErr_SetString(st->PicklingError,
1801                             "memo id too large for LONG_BINPUT");
1802             return -1;
1803         }
1804     }
1805     if (_Pickler_Write(self, pdata, len) < 0)
1806         return -1;
1807 
1808     return 0;
1809 }
1810 
1811 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1812 get_dotted_path(PyObject *obj, PyObject *name)
1813 {
1814     PyObject *dotted_path;
1815     Py_ssize_t i, n;
1816     _Py_DECLARE_STR(dot, ".");
1817     dotted_path = PyUnicode_Split(name, &_Py_STR(dot), -1);
1818     if (dotted_path == NULL)
1819         return NULL;
1820     n = PyList_GET_SIZE(dotted_path);
1821     assert(n >= 1);
1822     for (i = 0; i < n; i++) {
1823         PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1824         if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1825             if (obj == NULL)
1826                 PyErr_Format(PyExc_AttributeError,
1827                              "Can't pickle local object %R", name);
1828             else
1829                 PyErr_Format(PyExc_AttributeError,
1830                              "Can't pickle local attribute %R on %R", name, obj);
1831             Py_DECREF(dotted_path);
1832             return NULL;
1833         }
1834     }
1835     return dotted_path;
1836 }
1837 
1838 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1839 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1840 {
1841     Py_ssize_t i, n;
1842     PyObject *parent = NULL;
1843 
1844     assert(PyList_CheckExact(names));
1845     Py_INCREF(obj);
1846     n = PyList_GET_SIZE(names);
1847     for (i = 0; i < n; i++) {
1848         PyObject *name = PyList_GET_ITEM(names, i);
1849         Py_XDECREF(parent);
1850         parent = obj;
1851         (void)_PyObject_LookupAttr(parent, name, &obj);
1852         if (obj == NULL) {
1853             Py_DECREF(parent);
1854             return NULL;
1855         }
1856     }
1857     if (pparent != NULL)
1858         *pparent = parent;
1859     else
1860         Py_XDECREF(parent);
1861     return obj;
1862 }
1863 
1864 
1865 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1866 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1867 {
1868     PyObject *dotted_path, *attr;
1869 
1870     if (allow_qualname) {
1871         dotted_path = get_dotted_path(obj, name);
1872         if (dotted_path == NULL)
1873             return NULL;
1874         attr = get_deep_attribute(obj, dotted_path, NULL);
1875         Py_DECREF(dotted_path);
1876     }
1877     else {
1878         (void)_PyObject_LookupAttr(obj, name, &attr);
1879     }
1880     if (attr == NULL && !PyErr_Occurred()) {
1881         PyErr_Format(PyExc_AttributeError,
1882                      "Can't get attribute %R on %R", name, obj);
1883     }
1884     return attr;
1885 }
1886 
1887 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1888 _checkmodule(PyObject *module_name, PyObject *module,
1889              PyObject *global, PyObject *dotted_path)
1890 {
1891     if (module == Py_None) {
1892         return -1;
1893     }
1894     if (PyUnicode_Check(module_name) &&
1895             _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1896         return -1;
1897     }
1898 
1899     PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1900     if (candidate == NULL) {
1901         return -1;
1902     }
1903     if (candidate != global) {
1904         Py_DECREF(candidate);
1905         return -1;
1906     }
1907     Py_DECREF(candidate);
1908     return 0;
1909 }
1910 
1911 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1912 whichmodule(PyObject *global, PyObject *dotted_path)
1913 {
1914     PyObject *module_name;
1915     PyObject *module = NULL;
1916     Py_ssize_t i;
1917     PyObject *modules;
1918 
1919     if (_PyObject_LookupAttr(global, &_Py_ID(__module__), &module_name) < 0) {
1920         return NULL;
1921     }
1922     if (module_name) {
1923         /* In some rare cases (e.g., bound methods of extension types),
1924            __module__ can be None. If it is so, then search sys.modules for
1925            the module of global. */
1926         if (module_name != Py_None)
1927             return module_name;
1928         Py_CLEAR(module_name);
1929     }
1930     assert(module_name == NULL);
1931 
1932     /* Fallback on walking sys.modules */
1933     PyThreadState *tstate = _PyThreadState_GET();
1934     modules = _PySys_GetAttr(tstate, &_Py_ID(modules));
1935     if (modules == NULL) {
1936         PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1937         return NULL;
1938     }
1939     if (PyDict_CheckExact(modules)) {
1940         i = 0;
1941         while (PyDict_Next(modules, &i, &module_name, &module)) {
1942             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1943                 Py_INCREF(module_name);
1944                 return module_name;
1945             }
1946             if (PyErr_Occurred()) {
1947                 return NULL;
1948             }
1949         }
1950     }
1951     else {
1952         PyObject *iterator = PyObject_GetIter(modules);
1953         if (iterator == NULL) {
1954             return NULL;
1955         }
1956         while ((module_name = PyIter_Next(iterator))) {
1957             module = PyObject_GetItem(modules, module_name);
1958             if (module == NULL) {
1959                 Py_DECREF(module_name);
1960                 Py_DECREF(iterator);
1961                 return NULL;
1962             }
1963             if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1964                 Py_DECREF(module);
1965                 Py_DECREF(iterator);
1966                 return module_name;
1967             }
1968             Py_DECREF(module);
1969             Py_DECREF(module_name);
1970             if (PyErr_Occurred()) {
1971                 Py_DECREF(iterator);
1972                 return NULL;
1973             }
1974         }
1975         Py_DECREF(iterator);
1976     }
1977 
1978     /* If no module is found, use __main__. */
1979     module_name = &_Py_ID(__main__);
1980     Py_INCREF(module_name);
1981     return module_name;
1982 }
1983 
1984 /* fast_save_enter() and fast_save_leave() are guards against recursive
1985    objects when Pickler is used with the "fast mode" (i.e., with object
1986    memoization disabled). If the nesting of a list or dict object exceed
1987    FAST_NESTING_LIMIT, these guards will start keeping an internal
1988    reference to the seen list or dict objects and check whether these objects
1989    are recursive. These are not strictly necessary, since save() has a
1990    hard-coded recursion limit, but they give a nicer error message than the
1991    typical RuntimeError. */
1992 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1993 fast_save_enter(PicklerObject *self, PyObject *obj)
1994 {
1995     /* if fast_nesting < 0, we're doing an error exit. */
1996     if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1997         PyObject *key = NULL;
1998         if (self->fast_memo == NULL) {
1999             self->fast_memo = PyDict_New();
2000             if (self->fast_memo == NULL) {
2001                 self->fast_nesting = -1;
2002                 return 0;
2003             }
2004         }
2005         key = PyLong_FromVoidPtr(obj);
2006         if (key == NULL) {
2007             self->fast_nesting = -1;
2008             return 0;
2009         }
2010         int r = PyDict_Contains(self->fast_memo, key);
2011         if (r > 0) {
2012             PyErr_Format(PyExc_ValueError,
2013                          "fast mode: can't pickle cyclic objects "
2014                          "including object type %.200s at %p",
2015                          Py_TYPE(obj)->tp_name, obj);
2016         }
2017         else if (r == 0) {
2018             r = PyDict_SetItem(self->fast_memo, key, Py_None);
2019         }
2020         Py_DECREF(key);
2021         if (r != 0) {
2022             self->fast_nesting = -1;
2023             return 0;
2024         }
2025     }
2026     return 1;
2027 }
2028 
2029 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2030 fast_save_leave(PicklerObject *self, PyObject *obj)
2031 {
2032     if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2033         PyObject *key = PyLong_FromVoidPtr(obj);
2034         if (key == NULL)
2035             return 0;
2036         if (PyDict_DelItem(self->fast_memo, key) < 0) {
2037             Py_DECREF(key);
2038             return 0;
2039         }
2040         Py_DECREF(key);
2041     }
2042     return 1;
2043 }
2044 
2045 static int
save_none(PicklerObject * self,PyObject * obj)2046 save_none(PicklerObject *self, PyObject *obj)
2047 {
2048     const char none_op = NONE;
2049     if (_Pickler_Write(self, &none_op, 1) < 0)
2050         return -1;
2051 
2052     return 0;
2053 }
2054 
2055 static int
save_bool(PicklerObject * self,PyObject * obj)2056 save_bool(PicklerObject *self, PyObject *obj)
2057 {
2058     if (self->proto >= 2) {
2059         const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2060         if (_Pickler_Write(self, &bool_op, 1) < 0)
2061             return -1;
2062     }
2063     else {
2064         /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2065          * so that unpicklers written before bools were introduced unpickle them
2066          * as ints, but unpicklers after can recognize that bools were intended.
2067          * Note that protocol 2 added direct ways to pickle bools.
2068          */
2069         const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2070         if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2071             return -1;
2072     }
2073     return 0;
2074 }
2075 
2076 static int
save_long(PicklerObject * self,PyObject * obj)2077 save_long(PicklerObject *self, PyObject *obj)
2078 {
2079     PyObject *repr = NULL;
2080     Py_ssize_t size;
2081     long val;
2082     int overflow;
2083     int status = 0;
2084 
2085     val= PyLong_AsLongAndOverflow(obj, &overflow);
2086     if (!overflow && (sizeof(long) <= 4 ||
2087             (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2088     {
2089         /* result fits in a signed 4-byte integer.
2090 
2091            Note: we can't use -0x80000000L in the above condition because some
2092            compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2093            before applying the unary minus when sizeof(long) <= 4. The
2094            resulting value stays unsigned which is commonly not what we want,
2095            so MSVC happily warns us about it.  However, that result would have
2096            been fine because we guard for sizeof(long) <= 4 which turns the
2097            condition true in that particular case. */
2098         char pdata[32];
2099         Py_ssize_t len = 0;
2100 
2101         if (self->bin) {
2102             pdata[1] = (unsigned char)(val & 0xff);
2103             pdata[2] = (unsigned char)((val >> 8) & 0xff);
2104             pdata[3] = (unsigned char)((val >> 16) & 0xff);
2105             pdata[4] = (unsigned char)((val >> 24) & 0xff);
2106 
2107             if ((pdata[4] != 0) || (pdata[3] != 0)) {
2108                 pdata[0] = BININT;
2109                 len = 5;
2110             }
2111             else if (pdata[2] != 0) {
2112                 pdata[0] = BININT2;
2113                 len = 3;
2114             }
2115             else {
2116                 pdata[0] = BININT1;
2117                 len = 2;
2118             }
2119         }
2120         else {
2121             sprintf(pdata, "%c%ld\n", INT,  val);
2122             len = strlen(pdata);
2123         }
2124         if (_Pickler_Write(self, pdata, len) < 0)
2125             return -1;
2126 
2127         return 0;
2128     }
2129     assert(!PyErr_Occurred());
2130 
2131     if (self->proto >= 2) {
2132         /* Linear-time pickling. */
2133         size_t nbits;
2134         size_t nbytes;
2135         unsigned char *pdata;
2136         char header[5];
2137         int i;
2138         int sign = _PyLong_Sign(obj);
2139 
2140         if (sign == 0) {
2141             header[0] = LONG1;
2142             header[1] = 0;      /* It's 0 -- an empty bytestring. */
2143             if (_Pickler_Write(self, header, 2) < 0)
2144                 goto error;
2145             return 0;
2146         }
2147         nbits = _PyLong_NumBits(obj);
2148         if (nbits == (size_t)-1 && PyErr_Occurred())
2149             goto error;
2150         /* How many bytes do we need?  There are nbits >> 3 full
2151          * bytes of data, and nbits & 7 leftover bits.  If there
2152          * are any leftover bits, then we clearly need another
2153          * byte.  What's not so obvious is that we *probably*
2154          * need another byte even if there aren't any leftovers:
2155          * the most-significant bit of the most-significant byte
2156          * acts like a sign bit, and it's usually got a sense
2157          * opposite of the one we need.  The exception is ints
2158          * of the form -(2**(8*j-1)) for j > 0.  Such an int is
2159          * its own 256's-complement, so has the right sign bit
2160          * even without the extra byte.  That's a pain to check
2161          * for in advance, though, so we always grab an extra
2162          * byte at the start, and cut it back later if possible.
2163          */
2164         nbytes = (nbits >> 3) + 1;
2165         if (nbytes > 0x7fffffffL) {
2166             PyErr_SetString(PyExc_OverflowError,
2167                             "int too large to pickle");
2168             goto error;
2169         }
2170         repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2171         if (repr == NULL)
2172             goto error;
2173         pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2174         i = _PyLong_AsByteArray((PyLongObject *)obj,
2175                                 pdata, nbytes,
2176                                 1 /* little endian */ , 1 /* signed */ );
2177         if (i < 0)
2178             goto error;
2179         /* If the int is negative, this may be a byte more than
2180          * needed.  This is so iff the MSB is all redundant sign
2181          * bits.
2182          */
2183         if (sign < 0 &&
2184             nbytes > 1 &&
2185             pdata[nbytes - 1] == 0xff &&
2186             (pdata[nbytes - 2] & 0x80) != 0) {
2187             nbytes--;
2188         }
2189 
2190         if (nbytes < 256) {
2191             header[0] = LONG1;
2192             header[1] = (unsigned char)nbytes;
2193             size = 2;
2194         }
2195         else {
2196             header[0] = LONG4;
2197             size = (Py_ssize_t) nbytes;
2198             for (i = 1; i < 5; i++) {
2199                 header[i] = (unsigned char)(size & 0xff);
2200                 size >>= 8;
2201             }
2202             size = 5;
2203         }
2204         if (_Pickler_Write(self, header, size) < 0 ||
2205             _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2206             goto error;
2207     }
2208     else {
2209         const char long_op = LONG;
2210         const char *string;
2211 
2212         /* proto < 2: write the repr and newline.  This is quadratic-time (in
2213            the number of digits), in both directions.  We add a trailing 'L'
2214            to the repr, for compatibility with Python 2.x. */
2215 
2216         repr = PyObject_Repr(obj);
2217         if (repr == NULL)
2218             goto error;
2219 
2220         string = PyUnicode_AsUTF8AndSize(repr, &size);
2221         if (string == NULL)
2222             goto error;
2223 
2224         if (_Pickler_Write(self, &long_op, 1) < 0 ||
2225             _Pickler_Write(self, string, size) < 0 ||
2226             _Pickler_Write(self, "L\n", 2) < 0)
2227             goto error;
2228     }
2229 
2230     if (0) {
2231   error:
2232       status = -1;
2233     }
2234     Py_XDECREF(repr);
2235 
2236     return status;
2237 }
2238 
2239 static int
save_float(PicklerObject * self,PyObject * obj)2240 save_float(PicklerObject *self, PyObject *obj)
2241 {
2242     double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2243 
2244     if (self->bin) {
2245         char pdata[9];
2246         pdata[0] = BINFLOAT;
2247         if (PyFloat_Pack8(x, &pdata[1], 0) < 0)
2248             return -1;
2249         if (_Pickler_Write(self, pdata, 9) < 0)
2250             return -1;
2251    }
2252     else {
2253         int result = -1;
2254         char *buf = NULL;
2255         char op = FLOAT;
2256 
2257         if (_Pickler_Write(self, &op, 1) < 0)
2258             goto done;
2259 
2260         buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2261         if (!buf) {
2262             PyErr_NoMemory();
2263             goto done;
2264         }
2265 
2266         if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2267             goto done;
2268 
2269         if (_Pickler_Write(self, "\n", 1) < 0)
2270             goto done;
2271 
2272         result = 0;
2273 done:
2274         PyMem_Free(buf);
2275         return result;
2276     }
2277 
2278     return 0;
2279 }
2280 
2281 /* Perform direct write of the header and payload of the binary object.
2282 
2283    The large contiguous data is written directly into the underlying file
2284    object, bypassing the output_buffer of the Pickler.  We intentionally
2285    do not insert a protocol 4 frame opcode to make it possible to optimize
2286    file.read calls in the loader.
2287  */
2288 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2289 _Pickler_write_bytes(PicklerObject *self,
2290                      const char *header, Py_ssize_t header_size,
2291                      const char *data, Py_ssize_t data_size,
2292                      PyObject *payload)
2293 {
2294     int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2295     int framing = self->framing;
2296 
2297     if (bypass_buffer) {
2298         assert(self->output_buffer != NULL);
2299         /* Commit the previous frame. */
2300         if (_Pickler_CommitFrame(self)) {
2301             return -1;
2302         }
2303         /* Disable framing temporarily */
2304         self->framing = 0;
2305     }
2306 
2307     if (_Pickler_Write(self, header, header_size) < 0) {
2308         return -1;
2309     }
2310 
2311     if (bypass_buffer && self->write != NULL) {
2312         /* Bypass the in-memory buffer to directly stream large data
2313            into the underlying file object. */
2314         PyObject *result, *mem = NULL;
2315         /* Dump the output buffer to the file. */
2316         if (_Pickler_FlushToFile(self) < 0) {
2317             return -1;
2318         }
2319 
2320         /* Stream write the payload into the file without going through the
2321            output buffer. */
2322         if (payload == NULL) {
2323             /* TODO: It would be better to use a memoryview with a linked
2324                original string if this is possible. */
2325             payload = mem = PyBytes_FromStringAndSize(data, data_size);
2326             if (payload == NULL) {
2327                 return -1;
2328             }
2329         }
2330         result = PyObject_CallOneArg(self->write, payload);
2331         Py_XDECREF(mem);
2332         if (result == NULL) {
2333             return -1;
2334         }
2335         Py_DECREF(result);
2336 
2337         /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2338         if (_Pickler_ClearBuffer(self) < 0) {
2339             return -1;
2340         }
2341     }
2342     else {
2343         if (_Pickler_Write(self, data, data_size) < 0) {
2344             return -1;
2345         }
2346     }
2347 
2348     /* Re-enable framing for subsequent calls to _Pickler_Write. */
2349     self->framing = framing;
2350 
2351     return 0;
2352 }
2353 
2354 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2355 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2356                  Py_ssize_t size)
2357 {
2358     assert(self->proto >= 3);
2359 
2360     char header[9];
2361     Py_ssize_t len;
2362 
2363     if (size < 0)
2364         return -1;
2365 
2366     if (size <= 0xff) {
2367         header[0] = SHORT_BINBYTES;
2368         header[1] = (unsigned char)size;
2369         len = 2;
2370     }
2371     else if ((size_t)size <= 0xffffffffUL) {
2372         header[0] = BINBYTES;
2373         header[1] = (unsigned char)(size & 0xff);
2374         header[2] = (unsigned char)((size >> 8) & 0xff);
2375         header[3] = (unsigned char)((size >> 16) & 0xff);
2376         header[4] = (unsigned char)((size >> 24) & 0xff);
2377         len = 5;
2378     }
2379     else if (self->proto >= 4) {
2380         header[0] = BINBYTES8;
2381         _write_size64(header + 1, size);
2382         len = 9;
2383     }
2384     else {
2385         PyErr_SetString(PyExc_OverflowError,
2386                         "serializing a bytes object larger than 4 GiB "
2387                         "requires pickle protocol 4 or higher");
2388         return -1;
2389     }
2390 
2391     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2392         return -1;
2393     }
2394 
2395     if (memo_put(self, obj) < 0) {
2396         return -1;
2397     }
2398 
2399     return 0;
2400 }
2401 
2402 static int
save_bytes(PicklerObject * self,PyObject * obj)2403 save_bytes(PicklerObject *self, PyObject *obj)
2404 {
2405     if (self->proto < 3) {
2406         /* Older pickle protocols do not have an opcode for pickling bytes
2407            objects. Therefore, we need to fake the copy protocol (i.e.,
2408            the __reduce__ method) to permit bytes object unpickling.
2409 
2410            Here we use a hack to be compatible with Python 2. Since in Python
2411            2 'bytes' is just an alias for 'str' (which has different
2412            parameters than the actual bytes object), we use codecs.encode
2413            to create the appropriate 'str' object when unpickled using
2414            Python 2 *and* the appropriate 'bytes' object when unpickled
2415            using Python 3. Again this is a hack and we don't need to do this
2416            with newer protocols. */
2417         PyObject *reduce_value;
2418         int status;
2419 
2420         if (PyBytes_GET_SIZE(obj) == 0) {
2421             reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2422         }
2423         else {
2424             PickleState *st = _Pickle_GetGlobalState();
2425             PyObject *unicode_str =
2426                 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2427                                        PyBytes_GET_SIZE(obj),
2428                                        "strict");
2429 
2430             if (unicode_str == NULL)
2431                 return -1;
2432             reduce_value = Py_BuildValue("(O(OO))",
2433                                          st->codecs_encode, unicode_str,
2434                                          &_Py_ID(latin1));
2435             Py_DECREF(unicode_str);
2436         }
2437 
2438         if (reduce_value == NULL)
2439             return -1;
2440 
2441         /* save_reduce() will memoize the object automatically. */
2442         status = save_reduce(self, reduce_value, obj);
2443         Py_DECREF(reduce_value);
2444         return status;
2445     }
2446     else {
2447         return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2448                                 PyBytes_GET_SIZE(obj));
2449     }
2450 }
2451 
2452 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2453 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2454                      Py_ssize_t size)
2455 {
2456     assert(self->proto >= 5);
2457 
2458     char header[9];
2459     Py_ssize_t len;
2460 
2461     if (size < 0)
2462         return -1;
2463 
2464     header[0] = BYTEARRAY8;
2465     _write_size64(header + 1, size);
2466     len = 9;
2467 
2468     if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2469         return -1;
2470     }
2471 
2472     if (memo_put(self, obj) < 0) {
2473         return -1;
2474     }
2475 
2476     return 0;
2477 }
2478 
2479 static int
save_bytearray(PicklerObject * self,PyObject * obj)2480 save_bytearray(PicklerObject *self, PyObject *obj)
2481 {
2482     if (self->proto < 5) {
2483         /* Older pickle protocols do not have an opcode for pickling
2484          * bytearrays. */
2485         PyObject *reduce_value = NULL;
2486         int status;
2487 
2488         if (PyByteArray_GET_SIZE(obj) == 0) {
2489             reduce_value = Py_BuildValue("(O())",
2490                                          (PyObject *) &PyByteArray_Type);
2491         }
2492         else {
2493             PyObject *bytes_obj = PyBytes_FromObject(obj);
2494             if (bytes_obj != NULL) {
2495                 reduce_value = Py_BuildValue("(O(O))",
2496                                              (PyObject *) &PyByteArray_Type,
2497                                              bytes_obj);
2498                 Py_DECREF(bytes_obj);
2499             }
2500         }
2501         if (reduce_value == NULL)
2502             return -1;
2503 
2504         /* save_reduce() will memoize the object automatically. */
2505         status = save_reduce(self, reduce_value, obj);
2506         Py_DECREF(reduce_value);
2507         return status;
2508     }
2509     else {
2510         return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2511                                     PyByteArray_GET_SIZE(obj));
2512     }
2513 }
2514 
2515 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2516 save_picklebuffer(PicklerObject *self, PyObject *obj)
2517 {
2518     if (self->proto < 5) {
2519         PickleState *st = _Pickle_GetGlobalState();
2520         PyErr_SetString(st->PicklingError,
2521                         "PickleBuffer can only pickled with protocol >= 5");
2522         return -1;
2523     }
2524     const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2525     if (view == NULL) {
2526         return -1;
2527     }
2528     if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2529         PickleState *st = _Pickle_GetGlobalState();
2530         PyErr_SetString(st->PicklingError,
2531                         "PickleBuffer can not be pickled when "
2532                         "pointing to a non-contiguous buffer");
2533         return -1;
2534     }
2535     int in_band = 1;
2536     if (self->buffer_callback != NULL) {
2537         PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2538         if (ret == NULL) {
2539             return -1;
2540         }
2541         in_band = PyObject_IsTrue(ret);
2542         Py_DECREF(ret);
2543         if (in_band == -1) {
2544             return -1;
2545         }
2546     }
2547     if (in_band) {
2548         /* Write data in-band */
2549         if (view->readonly) {
2550             return _save_bytes_data(self, obj, (const char*) view->buf,
2551                                     view->len);
2552         }
2553         else {
2554             return _save_bytearray_data(self, obj, (const char*) view->buf,
2555                                         view->len);
2556         }
2557     }
2558     else {
2559         /* Write data out-of-band */
2560         const char next_buffer_op = NEXT_BUFFER;
2561         if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2562             return -1;
2563         }
2564         if (view->readonly) {
2565             const char readonly_buffer_op = READONLY_BUFFER;
2566             if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2567                 return -1;
2568             }
2569         }
2570     }
2571     return 0;
2572 }
2573 
2574 /* A copy of PyUnicode_AsRawUnicodeEscapeString() that also translates
2575    backslash and newline characters to \uXXXX escapes. */
2576 static PyObject *
raw_unicode_escape(PyObject * obj)2577 raw_unicode_escape(PyObject *obj)
2578 {
2579     char *p;
2580     Py_ssize_t i, size;
2581     const void *data;
2582     unsigned int kind;
2583     _PyBytesWriter writer;
2584 
2585     if (PyUnicode_READY(obj))
2586         return NULL;
2587 
2588     _PyBytesWriter_Init(&writer);
2589 
2590     size = PyUnicode_GET_LENGTH(obj);
2591     data = PyUnicode_DATA(obj);
2592     kind = PyUnicode_KIND(obj);
2593 
2594     p = _PyBytesWriter_Alloc(&writer, size);
2595     if (p == NULL)
2596         goto error;
2597     writer.overallocate = 1;
2598 
2599     for (i=0; i < size; i++) {
2600         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2601         /* Map 32-bit characters to '\Uxxxxxxxx' */
2602         if (ch >= 0x10000) {
2603             /* -1: subtract 1 preallocated byte */
2604             p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2605             if (p == NULL)
2606                 goto error;
2607 
2608             *p++ = '\\';
2609             *p++ = 'U';
2610             *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2611             *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2612             *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2613             *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2614             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2615             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2616             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2617             *p++ = Py_hexdigits[ch & 15];
2618         }
2619         /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2620         else if (ch >= 256 ||
2621                  ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2622                  ch == 0x1a)
2623         {
2624             /* -1: subtract 1 preallocated byte */
2625             p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2626             if (p == NULL)
2627                 goto error;
2628 
2629             *p++ = '\\';
2630             *p++ = 'u';
2631             *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2632             *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2633             *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2634             *p++ = Py_hexdigits[ch & 15];
2635         }
2636         /* Copy everything else as-is */
2637         else
2638             *p++ = (char) ch;
2639     }
2640 
2641     return _PyBytesWriter_Finish(&writer, p);
2642 
2643 error:
2644     _PyBytesWriter_Dealloc(&writer);
2645     return NULL;
2646 }
2647 
2648 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2649 write_unicode_binary(PicklerObject *self, PyObject *obj)
2650 {
2651     char header[9];
2652     Py_ssize_t len;
2653     PyObject *encoded = NULL;
2654     Py_ssize_t size;
2655     const char *data;
2656 
2657     if (PyUnicode_READY(obj))
2658         return -1;
2659 
2660     data = PyUnicode_AsUTF8AndSize(obj, &size);
2661     if (data == NULL) {
2662         /* Issue #8383: for strings with lone surrogates, fallback on the
2663            "surrogatepass" error handler. */
2664         PyErr_Clear();
2665         encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2666         if (encoded == NULL)
2667             return -1;
2668 
2669         data = PyBytes_AS_STRING(encoded);
2670         size = PyBytes_GET_SIZE(encoded);
2671     }
2672 
2673     assert(size >= 0);
2674     if (size <= 0xff && self->proto >= 4) {
2675         header[0] = SHORT_BINUNICODE;
2676         header[1] = (unsigned char)(size & 0xff);
2677         len = 2;
2678     }
2679     else if ((size_t)size <= 0xffffffffUL) {
2680         header[0] = BINUNICODE;
2681         header[1] = (unsigned char)(size & 0xff);
2682         header[2] = (unsigned char)((size >> 8) & 0xff);
2683         header[3] = (unsigned char)((size >> 16) & 0xff);
2684         header[4] = (unsigned char)((size >> 24) & 0xff);
2685         len = 5;
2686     }
2687     else if (self->proto >= 4) {
2688         header[0] = BINUNICODE8;
2689         _write_size64(header + 1, size);
2690         len = 9;
2691     }
2692     else {
2693         PyErr_SetString(PyExc_OverflowError,
2694                         "serializing a string larger than 4 GiB "
2695                         "requires pickle protocol 4 or higher");
2696         Py_XDECREF(encoded);
2697         return -1;
2698     }
2699 
2700     if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2701         Py_XDECREF(encoded);
2702         return -1;
2703     }
2704     Py_XDECREF(encoded);
2705     return 0;
2706 }
2707 
2708 static int
save_unicode(PicklerObject * self,PyObject * obj)2709 save_unicode(PicklerObject *self, PyObject *obj)
2710 {
2711     if (self->bin) {
2712         if (write_unicode_binary(self, obj) < 0)
2713             return -1;
2714     }
2715     else {
2716         PyObject *encoded;
2717         Py_ssize_t size;
2718         const char unicode_op = UNICODE;
2719 
2720         encoded = raw_unicode_escape(obj);
2721         if (encoded == NULL)
2722             return -1;
2723 
2724         if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2725             Py_DECREF(encoded);
2726             return -1;
2727         }
2728 
2729         size = PyBytes_GET_SIZE(encoded);
2730         if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2731             Py_DECREF(encoded);
2732             return -1;
2733         }
2734         Py_DECREF(encoded);
2735 
2736         if (_Pickler_Write(self, "\n", 1) < 0)
2737             return -1;
2738     }
2739     if (memo_put(self, obj) < 0)
2740         return -1;
2741 
2742     return 0;
2743 }
2744 
2745 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */
2746 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2747 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2748 {
2749     Py_ssize_t i;
2750 
2751     assert(PyTuple_Size(t) == len);
2752 
2753     for (i = 0; i < len; i++) {
2754         PyObject *element = PyTuple_GET_ITEM(t, i);
2755 
2756         if (element == NULL)
2757             return -1;
2758         if (save(self, element, 0) < 0)
2759             return -1;
2760     }
2761 
2762     return 0;
2763 }
2764 
2765 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2766  * used across protocols to minimize the space needed to pickle them.
2767  * Tuples are also the only builtin immutable type that can be recursive
2768  * (a tuple can be reached from itself), and that requires some subtle
2769  * magic so that it works in all cases.  IOW, this is a long routine.
2770  */
2771 static int
save_tuple(PicklerObject * self,PyObject * obj)2772 save_tuple(PicklerObject *self, PyObject *obj)
2773 {
2774     Py_ssize_t len, i;
2775 
2776     const char mark_op = MARK;
2777     const char tuple_op = TUPLE;
2778     const char pop_op = POP;
2779     const char pop_mark_op = POP_MARK;
2780     const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2781 
2782     if ((len = PyTuple_Size(obj)) < 0)
2783         return -1;
2784 
2785     if (len == 0) {
2786         char pdata[2];
2787 
2788         if (self->proto) {
2789             pdata[0] = EMPTY_TUPLE;
2790             len = 1;
2791         }
2792         else {
2793             pdata[0] = MARK;
2794             pdata[1] = TUPLE;
2795             len = 2;
2796         }
2797         if (_Pickler_Write(self, pdata, len) < 0)
2798             return -1;
2799         return 0;
2800     }
2801 
2802     /* The tuple isn't in the memo now.  If it shows up there after
2803      * saving the tuple elements, the tuple must be recursive, in
2804      * which case we'll pop everything we put on the stack, and fetch
2805      * its value from the memo.
2806      */
2807     if (len <= 3 && self->proto >= 2) {
2808         /* Use TUPLE{1,2,3} opcodes. */
2809         if (store_tuple_elements(self, obj, len) < 0)
2810             return -1;
2811 
2812         if (PyMemoTable_Get(self->memo, obj)) {
2813             /* pop the len elements */
2814             for (i = 0; i < len; i++)
2815                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2816                     return -1;
2817             /* fetch from memo */
2818             if (memo_get(self, obj) < 0)
2819                 return -1;
2820 
2821             return 0;
2822         }
2823         else { /* Not recursive. */
2824             if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2825                 return -1;
2826         }
2827         goto memoize;
2828     }
2829 
2830     /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2831      * Generate MARK e1 e2 ... TUPLE
2832      */
2833     if (_Pickler_Write(self, &mark_op, 1) < 0)
2834         return -1;
2835 
2836     if (store_tuple_elements(self, obj, len) < 0)
2837         return -1;
2838 
2839     if (PyMemoTable_Get(self->memo, obj)) {
2840         /* pop the stack stuff we pushed */
2841         if (self->bin) {
2842             if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2843                 return -1;
2844         }
2845         else {
2846             /* Note that we pop one more than len, to remove
2847              * the MARK too.
2848              */
2849             for (i = 0; i <= len; i++)
2850                 if (_Pickler_Write(self, &pop_op, 1) < 0)
2851                     return -1;
2852         }
2853         /* fetch from memo */
2854         if (memo_get(self, obj) < 0)
2855             return -1;
2856 
2857         return 0;
2858     }
2859     else { /* Not recursive. */
2860         if (_Pickler_Write(self, &tuple_op, 1) < 0)
2861             return -1;
2862     }
2863 
2864   memoize:
2865     if (memo_put(self, obj) < 0)
2866         return -1;
2867 
2868     return 0;
2869 }
2870 
2871 /* iter is an iterator giving items, and we batch up chunks of
2872  *     MARK item item ... item APPENDS
2873  * opcode sequences.  Calling code should have arranged to first create an
2874  * empty list, or list-like object, for the APPENDS to operate on.
2875  * Returns 0 on success, <0 on error.
2876  */
2877 static int
batch_list(PicklerObject * self,PyObject * iter)2878 batch_list(PicklerObject *self, PyObject *iter)
2879 {
2880     PyObject *obj = NULL;
2881     PyObject *firstitem = NULL;
2882     int i, n;
2883 
2884     const char mark_op = MARK;
2885     const char append_op = APPEND;
2886     const char appends_op = APPENDS;
2887 
2888     assert(iter != NULL);
2889 
2890     /* XXX: I think this function could be made faster by avoiding the
2891        iterator interface and fetching objects directly from list using
2892        PyList_GET_ITEM.
2893     */
2894 
2895     if (self->proto == 0) {
2896         /* APPENDS isn't available; do one at a time. */
2897         for (;;) {
2898             obj = PyIter_Next(iter);
2899             if (obj == NULL) {
2900                 if (PyErr_Occurred())
2901                     return -1;
2902                 break;
2903             }
2904             i = save(self, obj, 0);
2905             Py_DECREF(obj);
2906             if (i < 0)
2907                 return -1;
2908             if (_Pickler_Write(self, &append_op, 1) < 0)
2909                 return -1;
2910         }
2911         return 0;
2912     }
2913 
2914     /* proto > 0:  write in batches of BATCHSIZE. */
2915     do {
2916         /* Get first item */
2917         firstitem = PyIter_Next(iter);
2918         if (firstitem == NULL) {
2919             if (PyErr_Occurred())
2920                 goto error;
2921 
2922             /* nothing more to add */
2923             break;
2924         }
2925 
2926         /* Try to get a second item */
2927         obj = PyIter_Next(iter);
2928         if (obj == NULL) {
2929             if (PyErr_Occurred())
2930                 goto error;
2931 
2932             /* Only one item to write */
2933             if (save(self, firstitem, 0) < 0)
2934                 goto error;
2935             if (_Pickler_Write(self, &append_op, 1) < 0)
2936                 goto error;
2937             Py_CLEAR(firstitem);
2938             break;
2939         }
2940 
2941         /* More than one item to write */
2942 
2943         /* Pump out MARK, items, APPENDS. */
2944         if (_Pickler_Write(self, &mark_op, 1) < 0)
2945             goto error;
2946 
2947         if (save(self, firstitem, 0) < 0)
2948             goto error;
2949         Py_CLEAR(firstitem);
2950         n = 1;
2951 
2952         /* Fetch and save up to BATCHSIZE items */
2953         while (obj) {
2954             if (save(self, obj, 0) < 0)
2955                 goto error;
2956             Py_CLEAR(obj);
2957             n += 1;
2958 
2959             if (n == BATCHSIZE)
2960                 break;
2961 
2962             obj = PyIter_Next(iter);
2963             if (obj == NULL) {
2964                 if (PyErr_Occurred())
2965                     goto error;
2966                 break;
2967             }
2968         }
2969 
2970         if (_Pickler_Write(self, &appends_op, 1) < 0)
2971             goto error;
2972 
2973     } while (n == BATCHSIZE);
2974     return 0;
2975 
2976   error:
2977     Py_XDECREF(firstitem);
2978     Py_XDECREF(obj);
2979     return -1;
2980 }
2981 
2982 /* This is a variant of batch_list() above, specialized for lists (with no
2983  * support for list subclasses). Like batch_list(), we batch up chunks of
2984  *     MARK item item ... item APPENDS
2985  * opcode sequences.  Calling code should have arranged to first create an
2986  * empty list, or list-like object, for the APPENDS to operate on.
2987  * Returns 0 on success, -1 on error.
2988  *
2989  * This version is considerably faster than batch_list(), if less general.
2990  *
2991  * Note that this only works for protocols > 0.
2992  */
2993 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2994 batch_list_exact(PicklerObject *self, PyObject *obj)
2995 {
2996     PyObject *item = NULL;
2997     Py_ssize_t this_batch, total;
2998 
2999     const char append_op = APPEND;
3000     const char appends_op = APPENDS;
3001     const char mark_op = MARK;
3002 
3003     assert(obj != NULL);
3004     assert(self->proto > 0);
3005     assert(PyList_CheckExact(obj));
3006 
3007     if (PyList_GET_SIZE(obj) == 1) {
3008         item = PyList_GET_ITEM(obj, 0);
3009         Py_INCREF(item);
3010         int err = save(self, item, 0);
3011         Py_DECREF(item);
3012         if (err < 0)
3013             return -1;
3014         if (_Pickler_Write(self, &append_op, 1) < 0)
3015             return -1;
3016         return 0;
3017     }
3018 
3019     /* Write in batches of BATCHSIZE. */
3020     total = 0;
3021     do {
3022         this_batch = 0;
3023         if (_Pickler_Write(self, &mark_op, 1) < 0)
3024             return -1;
3025         while (total < PyList_GET_SIZE(obj)) {
3026             item = PyList_GET_ITEM(obj, total);
3027             Py_INCREF(item);
3028             int err = save(self, item, 0);
3029             Py_DECREF(item);
3030             if (err < 0)
3031                 return -1;
3032             total++;
3033             if (++this_batch == BATCHSIZE)
3034                 break;
3035         }
3036         if (_Pickler_Write(self, &appends_op, 1) < 0)
3037             return -1;
3038 
3039     } while (total < PyList_GET_SIZE(obj));
3040 
3041     return 0;
3042 }
3043 
3044 static int
save_list(PicklerObject * self,PyObject * obj)3045 save_list(PicklerObject *self, PyObject *obj)
3046 {
3047     char header[3];
3048     Py_ssize_t len;
3049     int status = 0;
3050 
3051     if (self->fast && !fast_save_enter(self, obj))
3052         goto error;
3053 
3054     /* Create an empty list. */
3055     if (self->bin) {
3056         header[0] = EMPTY_LIST;
3057         len = 1;
3058     }
3059     else {
3060         header[0] = MARK;
3061         header[1] = LIST;
3062         len = 2;
3063     }
3064 
3065     if (_Pickler_Write(self, header, len) < 0)
3066         goto error;
3067 
3068     /* Get list length, and bow out early if empty. */
3069     if ((len = PyList_Size(obj)) < 0)
3070         goto error;
3071 
3072     if (memo_put(self, obj) < 0)
3073         goto error;
3074 
3075     if (len != 0) {
3076         /* Materialize the list elements. */
3077         if (PyList_CheckExact(obj) && self->proto > 0) {
3078             if (_Py_EnterRecursiveCall(" while pickling an object"))
3079                 goto error;
3080             status = batch_list_exact(self, obj);
3081             _Py_LeaveRecursiveCall();
3082         } else {
3083             PyObject *iter = PyObject_GetIter(obj);
3084             if (iter == NULL)
3085                 goto error;
3086 
3087             if (_Py_EnterRecursiveCall(" while pickling an object")) {
3088                 Py_DECREF(iter);
3089                 goto error;
3090             }
3091             status = batch_list(self, iter);
3092             _Py_LeaveRecursiveCall();
3093             Py_DECREF(iter);
3094         }
3095     }
3096     if (0) {
3097   error:
3098         status = -1;
3099     }
3100 
3101     if (self->fast && !fast_save_leave(self, obj))
3102         status = -1;
3103 
3104     return status;
3105 }
3106 
3107 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3108  *     MARK key value ... key value SETITEMS
3109  * opcode sequences.  Calling code should have arranged to first create an
3110  * empty dict, or dict-like object, for the SETITEMS to operate on.
3111  * Returns 0 on success, <0 on error.
3112  *
3113  * This is very much like batch_list().  The difference between saving
3114  * elements directly, and picking apart two-tuples, is so long-winded at
3115  * the C level, though, that attempts to combine these routines were too
3116  * ugly to bear.
3117  */
3118 static int
batch_dict(PicklerObject * self,PyObject * iter)3119 batch_dict(PicklerObject *self, PyObject *iter)
3120 {
3121     PyObject *obj = NULL;
3122     PyObject *firstitem = NULL;
3123     int i, n;
3124 
3125     const char mark_op = MARK;
3126     const char setitem_op = SETITEM;
3127     const char setitems_op = SETITEMS;
3128 
3129     assert(iter != NULL);
3130 
3131     if (self->proto == 0) {
3132         /* SETITEMS isn't available; do one at a time. */
3133         for (;;) {
3134             obj = PyIter_Next(iter);
3135             if (obj == NULL) {
3136                 if (PyErr_Occurred())
3137                     return -1;
3138                 break;
3139             }
3140             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3141                 PyErr_SetString(PyExc_TypeError, "dict items "
3142                                 "iterator must return 2-tuples");
3143                 return -1;
3144             }
3145             i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3146             if (i >= 0)
3147                 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3148             Py_DECREF(obj);
3149             if (i < 0)
3150                 return -1;
3151             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3152                 return -1;
3153         }
3154         return 0;
3155     }
3156 
3157     /* proto > 0:  write in batches of BATCHSIZE. */
3158     do {
3159         /* Get first item */
3160         firstitem = PyIter_Next(iter);
3161         if (firstitem == NULL) {
3162             if (PyErr_Occurred())
3163                 goto error;
3164 
3165             /* nothing more to add */
3166             break;
3167         }
3168         if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3169             PyErr_SetString(PyExc_TypeError, "dict items "
3170                                 "iterator must return 2-tuples");
3171             goto error;
3172         }
3173 
3174         /* Try to get a second item */
3175         obj = PyIter_Next(iter);
3176         if (obj == NULL) {
3177             if (PyErr_Occurred())
3178                 goto error;
3179 
3180             /* Only one item to write */
3181             if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3182                 goto error;
3183             if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3184                 goto error;
3185             if (_Pickler_Write(self, &setitem_op, 1) < 0)
3186                 goto error;
3187             Py_CLEAR(firstitem);
3188             break;
3189         }
3190 
3191         /* More than one item to write */
3192 
3193         /* Pump out MARK, items, SETITEMS. */
3194         if (_Pickler_Write(self, &mark_op, 1) < 0)
3195             goto error;
3196 
3197         if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3198             goto error;
3199         if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3200             goto error;
3201         Py_CLEAR(firstitem);
3202         n = 1;
3203 
3204         /* Fetch and save up to BATCHSIZE items */
3205         while (obj) {
3206             if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3207                 PyErr_SetString(PyExc_TypeError, "dict items "
3208                     "iterator must return 2-tuples");
3209                 goto error;
3210             }
3211             if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3212                 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3213                 goto error;
3214             Py_CLEAR(obj);
3215             n += 1;
3216 
3217             if (n == BATCHSIZE)
3218                 break;
3219 
3220             obj = PyIter_Next(iter);
3221             if (obj == NULL) {
3222                 if (PyErr_Occurred())
3223                     goto error;
3224                 break;
3225             }
3226         }
3227 
3228         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3229             goto error;
3230 
3231     } while (n == BATCHSIZE);
3232     return 0;
3233 
3234   error:
3235     Py_XDECREF(firstitem);
3236     Py_XDECREF(obj);
3237     return -1;
3238 }
3239 
3240 /* This is a variant of batch_dict() above that specializes for dicts, with no
3241  * support for dict subclasses. Like batch_dict(), we batch up chunks of
3242  *     MARK key value ... key value SETITEMS
3243  * opcode sequences.  Calling code should have arranged to first create an
3244  * empty dict, or dict-like object, for the SETITEMS to operate on.
3245  * Returns 0 on success, -1 on error.
3246  *
3247  * Note that this currently doesn't work for protocol 0.
3248  */
3249 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3250 batch_dict_exact(PicklerObject *self, PyObject *obj)
3251 {
3252     PyObject *key = NULL, *value = NULL;
3253     int i;
3254     Py_ssize_t dict_size, ppos = 0;
3255 
3256     const char mark_op = MARK;
3257     const char setitem_op = SETITEM;
3258     const char setitems_op = SETITEMS;
3259 
3260     assert(obj != NULL && PyDict_CheckExact(obj));
3261     assert(self->proto > 0);
3262 
3263     dict_size = PyDict_GET_SIZE(obj);
3264 
3265     /* Special-case len(d) == 1 to save space. */
3266     if (dict_size == 1) {
3267         PyDict_Next(obj, &ppos, &key, &value);
3268         Py_INCREF(key);
3269         Py_INCREF(value);
3270         if (save(self, key, 0) < 0) {
3271             goto error;
3272         }
3273         if (save(self, value, 0) < 0) {
3274             goto error;
3275         }
3276         Py_CLEAR(key);
3277         Py_CLEAR(value);
3278         if (_Pickler_Write(self, &setitem_op, 1) < 0)
3279             return -1;
3280         return 0;
3281     }
3282 
3283     /* Write in batches of BATCHSIZE. */
3284     do {
3285         i = 0;
3286         if (_Pickler_Write(self, &mark_op, 1) < 0)
3287             return -1;
3288         while (PyDict_Next(obj, &ppos, &key, &value)) {
3289             Py_INCREF(key);
3290             Py_INCREF(value);
3291             if (save(self, key, 0) < 0) {
3292                 goto error;
3293             }
3294             if (save(self, value, 0) < 0) {
3295                 goto error;
3296             }
3297             Py_CLEAR(key);
3298             Py_CLEAR(value);
3299             if (++i == BATCHSIZE)
3300                 break;
3301         }
3302         if (_Pickler_Write(self, &setitems_op, 1) < 0)
3303             return -1;
3304         if (PyDict_GET_SIZE(obj) != dict_size) {
3305             PyErr_Format(
3306                 PyExc_RuntimeError,
3307                 "dictionary changed size during iteration");
3308             return -1;
3309         }
3310 
3311     } while (i == BATCHSIZE);
3312     return 0;
3313 error:
3314     Py_XDECREF(key);
3315     Py_XDECREF(value);
3316     return -1;
3317 }
3318 
3319 static int
save_dict(PicklerObject * self,PyObject * obj)3320 save_dict(PicklerObject *self, PyObject *obj)
3321 {
3322     PyObject *items, *iter;
3323     char header[3];
3324     Py_ssize_t len;
3325     int status = 0;
3326     assert(PyDict_Check(obj));
3327 
3328     if (self->fast && !fast_save_enter(self, obj))
3329         goto error;
3330 
3331     /* Create an empty dict. */
3332     if (self->bin) {
3333         header[0] = EMPTY_DICT;
3334         len = 1;
3335     }
3336     else {
3337         header[0] = MARK;
3338         header[1] = DICT;
3339         len = 2;
3340     }
3341 
3342     if (_Pickler_Write(self, header, len) < 0)
3343         goto error;
3344 
3345     if (memo_put(self, obj) < 0)
3346         goto error;
3347 
3348     if (PyDict_GET_SIZE(obj)) {
3349         /* Save the dict items. */
3350         if (PyDict_CheckExact(obj) && self->proto > 0) {
3351             /* We can take certain shortcuts if we know this is a dict and
3352                not a dict subclass. */
3353             if (_Py_EnterRecursiveCall(" while pickling an object"))
3354                 goto error;
3355             status = batch_dict_exact(self, obj);
3356             _Py_LeaveRecursiveCall();
3357         } else {
3358             items = PyObject_CallMethodNoArgs(obj, &_Py_ID(items));
3359             if (items == NULL)
3360                 goto error;
3361             iter = PyObject_GetIter(items);
3362             Py_DECREF(items);
3363             if (iter == NULL)
3364                 goto error;
3365             if (_Py_EnterRecursiveCall(" while pickling an object")) {
3366                 Py_DECREF(iter);
3367                 goto error;
3368             }
3369             status = batch_dict(self, iter);
3370             _Py_LeaveRecursiveCall();
3371             Py_DECREF(iter);
3372         }
3373     }
3374 
3375     if (0) {
3376   error:
3377         status = -1;
3378     }
3379 
3380     if (self->fast && !fast_save_leave(self, obj))
3381         status = -1;
3382 
3383     return status;
3384 }
3385 
3386 static int
save_set(PicklerObject * self,PyObject * obj)3387 save_set(PicklerObject *self, PyObject *obj)
3388 {
3389     PyObject *item;
3390     int i;
3391     Py_ssize_t set_size, ppos = 0;
3392     Py_hash_t hash;
3393 
3394     const char empty_set_op = EMPTY_SET;
3395     const char mark_op = MARK;
3396     const char additems_op = ADDITEMS;
3397 
3398     if (self->proto < 4) {
3399         PyObject *items;
3400         PyObject *reduce_value;
3401         int status;
3402 
3403         items = PySequence_List(obj);
3404         if (items == NULL) {
3405             return -1;
3406         }
3407         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3408         Py_DECREF(items);
3409         if (reduce_value == NULL) {
3410             return -1;
3411         }
3412         /* save_reduce() will memoize the object automatically. */
3413         status = save_reduce(self, reduce_value, obj);
3414         Py_DECREF(reduce_value);
3415         return status;
3416     }
3417 
3418     if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3419         return -1;
3420 
3421     if (memo_put(self, obj) < 0)
3422         return -1;
3423 
3424     set_size = PySet_GET_SIZE(obj);
3425     if (set_size == 0)
3426         return 0;  /* nothing to do */
3427 
3428     /* Write in batches of BATCHSIZE. */
3429     do {
3430         i = 0;
3431         if (_Pickler_Write(self, &mark_op, 1) < 0)
3432             return -1;
3433         while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3434             Py_INCREF(item);
3435             int err = save(self, item, 0);
3436             Py_CLEAR(item);
3437             if (err < 0)
3438                 return -1;
3439             if (++i == BATCHSIZE)
3440                 break;
3441         }
3442         if (_Pickler_Write(self, &additems_op, 1) < 0)
3443             return -1;
3444         if (PySet_GET_SIZE(obj) != set_size) {
3445             PyErr_Format(
3446                 PyExc_RuntimeError,
3447                 "set changed size during iteration");
3448             return -1;
3449         }
3450     } while (i == BATCHSIZE);
3451 
3452     return 0;
3453 }
3454 
3455 static int
save_frozenset(PicklerObject * self,PyObject * obj)3456 save_frozenset(PicklerObject *self, PyObject *obj)
3457 {
3458     PyObject *iter;
3459 
3460     const char mark_op = MARK;
3461     const char frozenset_op = FROZENSET;
3462 
3463     if (self->fast && !fast_save_enter(self, obj))
3464         return -1;
3465 
3466     if (self->proto < 4) {
3467         PyObject *items;
3468         PyObject *reduce_value;
3469         int status;
3470 
3471         items = PySequence_List(obj);
3472         if (items == NULL) {
3473             return -1;
3474         }
3475         reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3476                                      items);
3477         Py_DECREF(items);
3478         if (reduce_value == NULL) {
3479             return -1;
3480         }
3481         /* save_reduce() will memoize the object automatically. */
3482         status = save_reduce(self, reduce_value, obj);
3483         Py_DECREF(reduce_value);
3484         return status;
3485     }
3486 
3487     if (_Pickler_Write(self, &mark_op, 1) < 0)
3488         return -1;
3489 
3490     iter = PyObject_GetIter(obj);
3491     if (iter == NULL) {
3492         return -1;
3493     }
3494     for (;;) {
3495         PyObject *item;
3496 
3497         item = PyIter_Next(iter);
3498         if (item == NULL) {
3499             if (PyErr_Occurred()) {
3500                 Py_DECREF(iter);
3501                 return -1;
3502             }
3503             break;
3504         }
3505         if (save(self, item, 0) < 0) {
3506             Py_DECREF(item);
3507             Py_DECREF(iter);
3508             return -1;
3509         }
3510         Py_DECREF(item);
3511     }
3512     Py_DECREF(iter);
3513 
3514     /* If the object is already in the memo, this means it is
3515        recursive. In this case, throw away everything we put on the
3516        stack, and fetch the object back from the memo. */
3517     if (PyMemoTable_Get(self->memo, obj)) {
3518         const char pop_mark_op = POP_MARK;
3519 
3520         if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3521             return -1;
3522         if (memo_get(self, obj) < 0)
3523             return -1;
3524         return 0;
3525     }
3526 
3527     if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3528         return -1;
3529     if (memo_put(self, obj) < 0)
3530         return -1;
3531 
3532     return 0;
3533 }
3534 
3535 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3536 fix_imports(PyObject **module_name, PyObject **global_name)
3537 {
3538     PyObject *key;
3539     PyObject *item;
3540     PickleState *st = _Pickle_GetGlobalState();
3541 
3542     key = PyTuple_Pack(2, *module_name, *global_name);
3543     if (key == NULL)
3544         return -1;
3545     item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3546     Py_DECREF(key);
3547     if (item) {
3548         PyObject *fixed_module_name;
3549         PyObject *fixed_global_name;
3550 
3551         if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3552             PyErr_Format(PyExc_RuntimeError,
3553                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3554                          "should be 2-tuples, not %.200s",
3555                          Py_TYPE(item)->tp_name);
3556             return -1;
3557         }
3558         fixed_module_name = PyTuple_GET_ITEM(item, 0);
3559         fixed_global_name = PyTuple_GET_ITEM(item, 1);
3560         if (!PyUnicode_Check(fixed_module_name) ||
3561             !PyUnicode_Check(fixed_global_name)) {
3562             PyErr_Format(PyExc_RuntimeError,
3563                          "_compat_pickle.REVERSE_NAME_MAPPING values "
3564                          "should be pairs of str, not (%.200s, %.200s)",
3565                          Py_TYPE(fixed_module_name)->tp_name,
3566                          Py_TYPE(fixed_global_name)->tp_name);
3567             return -1;
3568         }
3569 
3570         Py_CLEAR(*module_name);
3571         Py_CLEAR(*global_name);
3572         Py_INCREF(fixed_module_name);
3573         Py_INCREF(fixed_global_name);
3574         *module_name = fixed_module_name;
3575         *global_name = fixed_global_name;
3576         return 0;
3577     }
3578     else if (PyErr_Occurred()) {
3579         return -1;
3580     }
3581 
3582     item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3583     if (item) {
3584         if (!PyUnicode_Check(item)) {
3585             PyErr_Format(PyExc_RuntimeError,
3586                          "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3587                          "should be strings, not %.200s",
3588                          Py_TYPE(item)->tp_name);
3589             return -1;
3590         }
3591         Py_INCREF(item);
3592         Py_XSETREF(*module_name, item);
3593     }
3594     else if (PyErr_Occurred()) {
3595         return -1;
3596     }
3597 
3598     return 0;
3599 }
3600 
3601 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3602 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3603 {
3604     PyObject *global_name = NULL;
3605     PyObject *module_name = NULL;
3606     PyObject *module = NULL;
3607     PyObject *parent = NULL;
3608     PyObject *dotted_path = NULL;
3609     PyObject *lastname = NULL;
3610     PyObject *cls;
3611     PickleState *st = _Pickle_GetGlobalState();
3612     int status = 0;
3613 
3614     const char global_op = GLOBAL;
3615 
3616     if (name) {
3617         Py_INCREF(name);
3618         global_name = name;
3619     }
3620     else {
3621         if (_PyObject_LookupAttr(obj, &_Py_ID(__qualname__), &global_name) < 0)
3622             goto error;
3623         if (global_name == NULL) {
3624             global_name = PyObject_GetAttr(obj, &_Py_ID(__name__));
3625             if (global_name == NULL)
3626                 goto error;
3627         }
3628     }
3629 
3630     dotted_path = get_dotted_path(module, global_name);
3631     if (dotted_path == NULL)
3632         goto error;
3633     module_name = whichmodule(obj, dotted_path);
3634     if (module_name == NULL)
3635         goto error;
3636 
3637     /* XXX: Change to use the import C API directly with level=0 to disallow
3638        relative imports.
3639 
3640        XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3641        builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3642        custom import functions (IMHO, this would be a nice security
3643        feature). The import C API would need to be extended to support the
3644        extra parameters of __import__ to fix that. */
3645     module = PyImport_Import(module_name);
3646     if (module == NULL) {
3647         PyErr_Format(st->PicklingError,
3648                      "Can't pickle %R: import of module %R failed",
3649                      obj, module_name);
3650         goto error;
3651     }
3652     lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3653     Py_INCREF(lastname);
3654     cls = get_deep_attribute(module, dotted_path, &parent);
3655     Py_CLEAR(dotted_path);
3656     if (cls == NULL) {
3657         PyErr_Format(st->PicklingError,
3658                      "Can't pickle %R: attribute lookup %S on %S failed",
3659                      obj, global_name, module_name);
3660         goto error;
3661     }
3662     if (cls != obj) {
3663         Py_DECREF(cls);
3664         PyErr_Format(st->PicklingError,
3665                      "Can't pickle %R: it's not the same object as %S.%S",
3666                      obj, module_name, global_name);
3667         goto error;
3668     }
3669     Py_DECREF(cls);
3670 
3671     if (self->proto >= 2) {
3672         /* See whether this is in the extension registry, and if
3673          * so generate an EXT opcode.
3674          */
3675         PyObject *extension_key;
3676         PyObject *code_obj;      /* extension code as Python object */
3677         long code;               /* extension code as C value */
3678         char pdata[5];
3679         Py_ssize_t n;
3680 
3681         extension_key = PyTuple_Pack(2, module_name, global_name);
3682         if (extension_key == NULL) {
3683             goto error;
3684         }
3685         code_obj = PyDict_GetItemWithError(st->extension_registry,
3686                                            extension_key);
3687         Py_DECREF(extension_key);
3688         /* The object is not registered in the extension registry.
3689            This is the most likely code path. */
3690         if (code_obj == NULL) {
3691             if (PyErr_Occurred()) {
3692                 goto error;
3693             }
3694             goto gen_global;
3695         }
3696 
3697         /* XXX: pickle.py doesn't check neither the type, nor the range
3698            of the value returned by the extension_registry. It should for
3699            consistency. */
3700 
3701         /* Verify code_obj has the right type and value. */
3702         if (!PyLong_Check(code_obj)) {
3703             PyErr_Format(st->PicklingError,
3704                          "Can't pickle %R: extension code %R isn't an integer",
3705                          obj, code_obj);
3706             goto error;
3707         }
3708         code = PyLong_AS_LONG(code_obj);
3709         if (code <= 0 || code > 0x7fffffffL) {
3710             if (!PyErr_Occurred())
3711                 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3712                              "code %ld is out of range", obj, code);
3713             goto error;
3714         }
3715 
3716         /* Generate an EXT opcode. */
3717         if (code <= 0xff) {
3718             pdata[0] = EXT1;
3719             pdata[1] = (unsigned char)code;
3720             n = 2;
3721         }
3722         else if (code <= 0xffff) {
3723             pdata[0] = EXT2;
3724             pdata[1] = (unsigned char)(code & 0xff);
3725             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3726             n = 3;
3727         }
3728         else {
3729             pdata[0] = EXT4;
3730             pdata[1] = (unsigned char)(code & 0xff);
3731             pdata[2] = (unsigned char)((code >> 8) & 0xff);
3732             pdata[3] = (unsigned char)((code >> 16) & 0xff);
3733             pdata[4] = (unsigned char)((code >> 24) & 0xff);
3734             n = 5;
3735         }
3736 
3737         if (_Pickler_Write(self, pdata, n) < 0)
3738             goto error;
3739     }
3740     else {
3741   gen_global:
3742         if (parent == module) {
3743             Py_INCREF(lastname);
3744             Py_DECREF(global_name);
3745             global_name = lastname;
3746         }
3747         if (self->proto >= 4) {
3748             const char stack_global_op = STACK_GLOBAL;
3749 
3750             if (save(self, module_name, 0) < 0)
3751                 goto error;
3752             if (save(self, global_name, 0) < 0)
3753                 goto error;
3754 
3755             if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3756                 goto error;
3757         }
3758         else if (parent != module) {
3759             PickleState *st = _Pickle_GetGlobalState();
3760             PyObject *reduce_value = Py_BuildValue("(O(OO))",
3761                                         st->getattr, parent, lastname);
3762             if (reduce_value == NULL)
3763                 goto error;
3764             status = save_reduce(self, reduce_value, NULL);
3765             Py_DECREF(reduce_value);
3766             if (status < 0)
3767                 goto error;
3768         }
3769         else {
3770             /* Generate a normal global opcode if we are using a pickle
3771                protocol < 4, or if the object is not registered in the
3772                extension registry. */
3773             PyObject *encoded;
3774             PyObject *(*unicode_encoder)(PyObject *);
3775 
3776             if (_Pickler_Write(self, &global_op, 1) < 0)
3777                 goto error;
3778 
3779             /* For protocol < 3 and if the user didn't request against doing
3780                so, we convert module names to the old 2.x module names. */
3781             if (self->proto < 3 && self->fix_imports) {
3782                 if (fix_imports(&module_name, &global_name) < 0) {
3783                     goto error;
3784                 }
3785             }
3786 
3787             /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3788                both the module name and the global name using UTF-8. We do so
3789                only when we are using the pickle protocol newer than version
3790                3. This is to ensure compatibility with older Unpickler running
3791                on Python 2.x. */
3792             if (self->proto == 3) {
3793                 unicode_encoder = PyUnicode_AsUTF8String;
3794             }
3795             else {
3796                 unicode_encoder = PyUnicode_AsASCIIString;
3797             }
3798             encoded = unicode_encoder(module_name);
3799             if (encoded == NULL) {
3800                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3801                     PyErr_Format(st->PicklingError,
3802                                  "can't pickle module identifier '%S' using "
3803                                  "pickle protocol %i",
3804                                  module_name, self->proto);
3805                 goto error;
3806             }
3807             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3808                                PyBytes_GET_SIZE(encoded)) < 0) {
3809                 Py_DECREF(encoded);
3810                 goto error;
3811             }
3812             Py_DECREF(encoded);
3813             if(_Pickler_Write(self, "\n", 1) < 0)
3814                 goto error;
3815 
3816             /* Save the name of the module. */
3817             encoded = unicode_encoder(global_name);
3818             if (encoded == NULL) {
3819                 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3820                     PyErr_Format(st->PicklingError,
3821                                  "can't pickle global identifier '%S' using "
3822                                  "pickle protocol %i",
3823                                  global_name, self->proto);
3824                 goto error;
3825             }
3826             if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3827                                PyBytes_GET_SIZE(encoded)) < 0) {
3828                 Py_DECREF(encoded);
3829                 goto error;
3830             }
3831             Py_DECREF(encoded);
3832             if (_Pickler_Write(self, "\n", 1) < 0)
3833                 goto error;
3834         }
3835         /* Memoize the object. */
3836         if (memo_put(self, obj) < 0)
3837             goto error;
3838     }
3839 
3840     if (0) {
3841   error:
3842         status = -1;
3843     }
3844     Py_XDECREF(module_name);
3845     Py_XDECREF(global_name);
3846     Py_XDECREF(module);
3847     Py_XDECREF(parent);
3848     Py_XDECREF(dotted_path);
3849     Py_XDECREF(lastname);
3850 
3851     return status;
3852 }
3853 
3854 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3855 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3856 {
3857     PyObject *reduce_value;
3858     int status;
3859 
3860     reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3861     if (reduce_value == NULL) {
3862         return -1;
3863     }
3864     status = save_reduce(self, reduce_value, obj);
3865     Py_DECREF(reduce_value);
3866     return status;
3867 }
3868 
3869 static int
save_type(PicklerObject * self,PyObject * obj)3870 save_type(PicklerObject *self, PyObject *obj)
3871 {
3872     if (obj == (PyObject *)&_PyNone_Type) {
3873         return save_singleton_type(self, obj, Py_None);
3874     }
3875     else if (obj == (PyObject *)&PyEllipsis_Type) {
3876         return save_singleton_type(self, obj, Py_Ellipsis);
3877     }
3878     else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3879         return save_singleton_type(self, obj, Py_NotImplemented);
3880     }
3881     return save_global(self, obj, NULL);
3882 }
3883 
3884 static int
save_pers(PicklerObject * self,PyObject * obj)3885 save_pers(PicklerObject *self, PyObject *obj)
3886 {
3887     PyObject *pid = NULL;
3888     int status = 0;
3889 
3890     const char persid_op = PERSID;
3891     const char binpersid_op = BINPERSID;
3892 
3893     pid = call_method(self->pers_func, self->pers_func_self, obj);
3894     if (pid == NULL)
3895         return -1;
3896 
3897     if (pid != Py_None) {
3898         if (self->bin) {
3899             if (save(self, pid, 1) < 0 ||
3900                 _Pickler_Write(self, &binpersid_op, 1) < 0)
3901                 goto error;
3902         }
3903         else {
3904             PyObject *pid_str;
3905 
3906             pid_str = PyObject_Str(pid);
3907             if (pid_str == NULL)
3908                 goto error;
3909 
3910             /* XXX: Should it check whether the pid contains embedded
3911                newlines? */
3912             if (!PyUnicode_IS_ASCII(pid_str)) {
3913                 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3914                                 "persistent IDs in protocol 0 must be "
3915                                 "ASCII strings");
3916                 Py_DECREF(pid_str);
3917                 goto error;
3918             }
3919 
3920             if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3921                 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3922                                PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3923                 _Pickler_Write(self, "\n", 1) < 0) {
3924                 Py_DECREF(pid_str);
3925                 goto error;
3926             }
3927             Py_DECREF(pid_str);
3928         }
3929         status = 1;
3930     }
3931 
3932     if (0) {
3933   error:
3934         status = -1;
3935     }
3936     Py_XDECREF(pid);
3937 
3938     return status;
3939 }
3940 
3941 static PyObject *
get_class(PyObject * obj)3942 get_class(PyObject *obj)
3943 {
3944     PyObject *cls;
3945 
3946     if (_PyObject_LookupAttr(obj, &_Py_ID(__class__), &cls) == 0) {
3947         cls = (PyObject *) Py_TYPE(obj);
3948         Py_INCREF(cls);
3949     }
3950     return cls;
3951 }
3952 
3953 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3954  * appropriate __reduce__ method for obj.
3955  */
3956 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3957 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3958 {
3959     PyObject *callable;
3960     PyObject *argtup;
3961     PyObject *state = NULL;
3962     PyObject *listitems = Py_None;
3963     PyObject *dictitems = Py_None;
3964     PyObject *state_setter = Py_None;
3965     PickleState *st = _Pickle_GetGlobalState();
3966     Py_ssize_t size;
3967     int use_newobj = 0, use_newobj_ex = 0;
3968 
3969     const char reduce_op = REDUCE;
3970     const char build_op = BUILD;
3971     const char newobj_op = NEWOBJ;
3972     const char newobj_ex_op = NEWOBJ_EX;
3973 
3974     size = PyTuple_Size(args);
3975     if (size < 2 || size > 6) {
3976         PyErr_SetString(st->PicklingError, "tuple returned by "
3977                         "__reduce__ must contain 2 through 6 elements");
3978         return -1;
3979     }
3980 
3981     if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3982                            &callable, &argtup, &state, &listitems, &dictitems,
3983                            &state_setter))
3984         return -1;
3985 
3986     if (!PyCallable_Check(callable)) {
3987         PyErr_SetString(st->PicklingError, "first item of the tuple "
3988                         "returned by __reduce__ must be callable");
3989         return -1;
3990     }
3991     if (!PyTuple_Check(argtup)) {
3992         PyErr_SetString(st->PicklingError, "second item of the tuple "
3993                         "returned by __reduce__ must be a tuple");
3994         return -1;
3995     }
3996 
3997     if (state == Py_None)
3998         state = NULL;
3999 
4000     if (listitems == Py_None)
4001         listitems = NULL;
4002     else if (!PyIter_Check(listitems)) {
4003         PyErr_Format(st->PicklingError, "fourth element of the tuple "
4004                      "returned by __reduce__ must be an iterator, not %s",
4005                      Py_TYPE(listitems)->tp_name);
4006         return -1;
4007     }
4008 
4009     if (dictitems == Py_None)
4010         dictitems = NULL;
4011     else if (!PyIter_Check(dictitems)) {
4012         PyErr_Format(st->PicklingError, "fifth element of the tuple "
4013                      "returned by __reduce__ must be an iterator, not %s",
4014                      Py_TYPE(dictitems)->tp_name);
4015         return -1;
4016     }
4017 
4018     if (state_setter == Py_None)
4019         state_setter = NULL;
4020     else if (!PyCallable_Check(state_setter)) {
4021         PyErr_Format(st->PicklingError, "sixth element of the tuple "
4022                      "returned by __reduce__ must be a function, not %s",
4023                      Py_TYPE(state_setter)->tp_name);
4024         return -1;
4025     }
4026 
4027     if (self->proto >= 2) {
4028         PyObject *name;
4029 
4030         if (_PyObject_LookupAttr(callable, &_Py_ID(__name__), &name) < 0) {
4031             return -1;
4032         }
4033         if (name != NULL && PyUnicode_Check(name)) {
4034             use_newobj_ex = _PyUnicode_Equal(name, &_Py_ID(__newobj_ex__));
4035             if (!use_newobj_ex) {
4036                 use_newobj = _PyUnicode_Equal(name, &_Py_ID(__newobj__));
4037             }
4038         }
4039         Py_XDECREF(name);
4040     }
4041 
4042     if (use_newobj_ex) {
4043         PyObject *cls;
4044         PyObject *args;
4045         PyObject *kwargs;
4046 
4047         if (PyTuple_GET_SIZE(argtup) != 3) {
4048             PyErr_Format(st->PicklingError,
4049                          "length of the NEWOBJ_EX argument tuple must be "
4050                          "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4051             return -1;
4052         }
4053 
4054         cls = PyTuple_GET_ITEM(argtup, 0);
4055         if (!PyType_Check(cls)) {
4056             PyErr_Format(st->PicklingError,
4057                          "first item from NEWOBJ_EX argument tuple must "
4058                          "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4059             return -1;
4060         }
4061         args = PyTuple_GET_ITEM(argtup, 1);
4062         if (!PyTuple_Check(args)) {
4063             PyErr_Format(st->PicklingError,
4064                          "second item from NEWOBJ_EX argument tuple must "
4065                          "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4066             return -1;
4067         }
4068         kwargs = PyTuple_GET_ITEM(argtup, 2);
4069         if (!PyDict_Check(kwargs)) {
4070             PyErr_Format(st->PicklingError,
4071                          "third item from NEWOBJ_EX argument tuple must "
4072                          "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4073             return -1;
4074         }
4075 
4076         if (self->proto >= 4) {
4077             if (save(self, cls, 0) < 0 ||
4078                 save(self, args, 0) < 0 ||
4079                 save(self, kwargs, 0) < 0 ||
4080                 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4081                 return -1;
4082             }
4083         }
4084         else {
4085             PyObject *newargs;
4086             PyObject *cls_new;
4087             Py_ssize_t i;
4088 
4089             newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4090             if (newargs == NULL)
4091                 return -1;
4092 
4093             cls_new = PyObject_GetAttr(cls, &_Py_ID(__new__));
4094             if (cls_new == NULL) {
4095                 Py_DECREF(newargs);
4096                 return -1;
4097             }
4098             PyTuple_SET_ITEM(newargs, 0, cls_new);
4099             Py_INCREF(cls);
4100             PyTuple_SET_ITEM(newargs, 1, cls);
4101             for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4102                 PyObject *item = PyTuple_GET_ITEM(args, i);
4103                 Py_INCREF(item);
4104                 PyTuple_SET_ITEM(newargs, i + 2, item);
4105             }
4106 
4107             callable = PyObject_Call(st->partial, newargs, kwargs);
4108             Py_DECREF(newargs);
4109             if (callable == NULL)
4110                 return -1;
4111 
4112             newargs = PyTuple_New(0);
4113             if (newargs == NULL) {
4114                 Py_DECREF(callable);
4115                 return -1;
4116             }
4117 
4118             if (save(self, callable, 0) < 0 ||
4119                 save(self, newargs, 0) < 0 ||
4120                 _Pickler_Write(self, &reduce_op, 1) < 0) {
4121                 Py_DECREF(newargs);
4122                 Py_DECREF(callable);
4123                 return -1;
4124             }
4125             Py_DECREF(newargs);
4126             Py_DECREF(callable);
4127         }
4128     }
4129     else if (use_newobj) {
4130         PyObject *cls;
4131         PyObject *newargtup;
4132         PyObject *obj_class;
4133         int p;
4134 
4135         /* Sanity checks. */
4136         if (PyTuple_GET_SIZE(argtup) < 1) {
4137             PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4138             return -1;
4139         }
4140 
4141         cls = PyTuple_GET_ITEM(argtup, 0);
4142         if (!PyType_Check(cls)) {
4143             PyErr_SetString(st->PicklingError, "args[0] from "
4144                             "__newobj__ args is not a type");
4145             return -1;
4146         }
4147 
4148         if (obj != NULL) {
4149             obj_class = get_class(obj);
4150             if (obj_class == NULL) {
4151                 return -1;
4152             }
4153             p = obj_class != cls;
4154             Py_DECREF(obj_class);
4155             if (p) {
4156                 PyErr_SetString(st->PicklingError, "args[0] from "
4157                                 "__newobj__ args has the wrong class");
4158                 return -1;
4159             }
4160         }
4161         /* XXX: These calls save() are prone to infinite recursion. Imagine
4162            what happen if the value returned by the __reduce__() method of
4163            some extension type contains another object of the same type. Ouch!
4164 
4165            Here is a quick example, that I ran into, to illustrate what I
4166            mean:
4167 
4168              >>> import pickle, copyreg
4169              >>> copyreg.dispatch_table.pop(complex)
4170              >>> pickle.dumps(1+2j)
4171              Traceback (most recent call last):
4172                ...
4173              RecursionError: maximum recursion depth exceeded
4174 
4175            Removing the complex class from copyreg.dispatch_table made the
4176            __reduce_ex__() method emit another complex object:
4177 
4178              >>> (1+1j).__reduce_ex__(2)
4179              (<function __newobj__ at 0xb7b71c3c>,
4180                (<class 'complex'>, (1+1j)), None, None, None)
4181 
4182            Thus when save() was called on newargstup (the 2nd item) recursion
4183            ensued. Of course, the bug was in the complex class which had a
4184            broken __getnewargs__() that emitted another complex object. But,
4185            the point, here, is it is quite easy to end up with a broken reduce
4186            function. */
4187 
4188         /* Save the class and its __new__ arguments. */
4189         if (save(self, cls, 0) < 0)
4190             return -1;
4191 
4192         newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4193         if (newargtup == NULL)
4194             return -1;
4195 
4196         p = save(self, newargtup, 0);
4197         Py_DECREF(newargtup);
4198         if (p < 0)
4199             return -1;
4200 
4201         /* Add NEWOBJ opcode. */
4202         if (_Pickler_Write(self, &newobj_op, 1) < 0)
4203             return -1;
4204     }
4205     else { /* Not using NEWOBJ. */
4206         if (save(self, callable, 0) < 0 ||
4207             save(self, argtup, 0) < 0 ||
4208             _Pickler_Write(self, &reduce_op, 1) < 0)
4209             return -1;
4210     }
4211 
4212     /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4213        the caller do not want to memoize the object. Not particularly useful,
4214        but that is to mimic the behavior save_reduce() in pickle.py when
4215        obj is None. */
4216     if (obj != NULL) {
4217         /* If the object is already in the memo, this means it is
4218            recursive. In this case, throw away everything we put on the
4219            stack, and fetch the object back from the memo. */
4220         if (PyMemoTable_Get(self->memo, obj)) {
4221             const char pop_op = POP;
4222 
4223             if (_Pickler_Write(self, &pop_op, 1) < 0)
4224                 return -1;
4225             if (memo_get(self, obj) < 0)
4226                 return -1;
4227 
4228             return 0;
4229         }
4230         else if (memo_put(self, obj) < 0)
4231             return -1;
4232     }
4233 
4234     if (listitems && batch_list(self, listitems) < 0)
4235         return -1;
4236 
4237     if (dictitems && batch_dict(self, dictitems) < 0)
4238         return -1;
4239 
4240     if (state) {
4241         if (state_setter == NULL) {
4242             if (save(self, state, 0) < 0 ||
4243                 _Pickler_Write(self, &build_op, 1) < 0)
4244                 return -1;
4245         }
4246         else {
4247 
4248             /* If a state_setter is specified, call it instead of load_build to
4249              * update obj's with its previous state.
4250              * The first 4 save/write instructions push state_setter and its
4251              * tuple of expected arguments (obj, state) onto the stack. The
4252              * REDUCE opcode triggers the state_setter(obj, state) function
4253              * call. Finally, because state-updating routines only do in-place
4254              * modification, the whole operation has to be stack-transparent.
4255              * Thus, we finally pop the call's output from the stack.*/
4256 
4257             const char tupletwo_op = TUPLE2;
4258             const char pop_op = POP;
4259             if (save(self, state_setter, 0) < 0 ||
4260                 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4261                 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4262                 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4263                 _Pickler_Write(self, &pop_op, 1) < 0)
4264                 return -1;
4265         }
4266     }
4267     return 0;
4268 }
4269 
4270 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4271 save(PicklerObject *self, PyObject *obj, int pers_save)
4272 {
4273     PyTypeObject *type;
4274     PyObject *reduce_func = NULL;
4275     PyObject *reduce_value = NULL;
4276     int status = 0;
4277 
4278     if (_Pickler_OpcodeBoundary(self) < 0)
4279         return -1;
4280 
4281     /* The extra pers_save argument is necessary to avoid calling save_pers()
4282        on its returned object. */
4283     if (!pers_save && self->pers_func) {
4284         /* save_pers() returns:
4285             -1   to signal an error;
4286              0   if it did nothing successfully;
4287              1   if a persistent id was saved.
4288          */
4289         if ((status = save_pers(self, obj)) != 0)
4290             return status;
4291     }
4292 
4293     type = Py_TYPE(obj);
4294 
4295     /* The old cPickle had an optimization that used switch-case statement
4296        dispatching on the first letter of the type name.  This has was removed
4297        since benchmarks shown that this optimization was actually slowing
4298        things down. */
4299 
4300     /* Atom types; these aren't memoized, so don't check the memo. */
4301 
4302     if (obj == Py_None) {
4303         return save_none(self, obj);
4304     }
4305     else if (obj == Py_False || obj == Py_True) {
4306         return save_bool(self, obj);
4307     }
4308     else if (type == &PyLong_Type) {
4309         return save_long(self, obj);
4310     }
4311     else if (type == &PyFloat_Type) {
4312         return save_float(self, obj);
4313     }
4314 
4315     /* Check the memo to see if it has the object. If so, generate
4316        a GET (or BINGET) opcode, instead of pickling the object
4317        once again. */
4318     if (PyMemoTable_Get(self->memo, obj)) {
4319         return memo_get(self, obj);
4320     }
4321 
4322     if (type == &PyBytes_Type) {
4323         return save_bytes(self, obj);
4324     }
4325     else if (type == &PyUnicode_Type) {
4326         return save_unicode(self, obj);
4327     }
4328 
4329     /* We're only calling _Py_EnterRecursiveCall here so that atomic
4330        types above are pickled faster. */
4331     if (_Py_EnterRecursiveCall(" while pickling an object")) {
4332         return -1;
4333     }
4334 
4335     if (type == &PyDict_Type) {
4336         status = save_dict(self, obj);
4337         goto done;
4338     }
4339     else if (type == &PySet_Type) {
4340         status = save_set(self, obj);
4341         goto done;
4342     }
4343     else if (type == &PyFrozenSet_Type) {
4344         status = save_frozenset(self, obj);
4345         goto done;
4346     }
4347     else if (type == &PyList_Type) {
4348         status = save_list(self, obj);
4349         goto done;
4350     }
4351     else if (type == &PyTuple_Type) {
4352         status = save_tuple(self, obj);
4353         goto done;
4354     }
4355     else if (type == &PyByteArray_Type) {
4356         status = save_bytearray(self, obj);
4357         goto done;
4358     }
4359     else if (type == &PyPickleBuffer_Type) {
4360         status = save_picklebuffer(self, obj);
4361         goto done;
4362     }
4363 
4364     /* Now, check reducer_override.  If it returns NotImplemented,
4365      * fallback to save_type or save_global, and then perhaps to the
4366      * regular reduction mechanism.
4367      */
4368     if (self->reducer_override != NULL) {
4369         reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4370         if (reduce_value == NULL) {
4371             goto error;
4372         }
4373         if (reduce_value != Py_NotImplemented) {
4374             goto reduce;
4375         }
4376         Py_DECREF(reduce_value);
4377         reduce_value = NULL;
4378     }
4379 
4380     if (type == &PyType_Type) {
4381         status = save_type(self, obj);
4382         goto done;
4383     }
4384     else if (type == &PyFunction_Type) {
4385         status = save_global(self, obj, NULL);
4386         goto done;
4387     }
4388 
4389     /* XXX: This part needs some unit tests. */
4390 
4391     /* Get a reduction callable, and call it.  This may come from
4392      * self.dispatch_table, copyreg.dispatch_table, the object's
4393      * __reduce_ex__ method, or the object's __reduce__ method.
4394      */
4395     if (self->dispatch_table == NULL) {
4396         PickleState *st = _Pickle_GetGlobalState();
4397         reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4398                                               (PyObject *)type);
4399         if (reduce_func == NULL) {
4400             if (PyErr_Occurred()) {
4401                 goto error;
4402             }
4403         } else {
4404             /* PyDict_GetItemWithError() returns a borrowed reference.
4405                Increase the reference count to be consistent with
4406                PyObject_GetItem and _PyObject_GetAttrId used below. */
4407             Py_INCREF(reduce_func);
4408         }
4409     } else {
4410         reduce_func = PyObject_GetItem(self->dispatch_table,
4411                                        (PyObject *)type);
4412         if (reduce_func == NULL) {
4413             if (PyErr_ExceptionMatches(PyExc_KeyError))
4414                 PyErr_Clear();
4415             else
4416                 goto error;
4417         }
4418     }
4419     if (reduce_func != NULL) {
4420         Py_INCREF(obj);
4421         reduce_value = _Pickle_FastCall(reduce_func, obj);
4422     }
4423     else if (PyType_IsSubtype(type, &PyType_Type)) {
4424         status = save_global(self, obj, NULL);
4425         goto done;
4426     }
4427     else {
4428         /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4429            automatically defined as __reduce__. While this is convenient, this
4430            make it impossible to know which method was actually called. Of
4431            course, this is not a big deal. But still, it would be nice to let
4432            the user know which method was called when something go
4433            wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4434            don't actually have to check for a __reduce__ method. */
4435 
4436         /* Check for a __reduce_ex__ method. */
4437         if (_PyObject_LookupAttr(obj, &_Py_ID(__reduce_ex__), &reduce_func) < 0) {
4438             goto error;
4439         }
4440         if (reduce_func != NULL) {
4441             PyObject *proto;
4442             proto = PyLong_FromLong(self->proto);
4443             if (proto != NULL) {
4444                 reduce_value = _Pickle_FastCall(reduce_func, proto);
4445             }
4446         }
4447         else {
4448             /* Check for a __reduce__ method. */
4449             if (_PyObject_LookupAttr(obj, &_Py_ID(__reduce__), &reduce_func) < 0) {
4450                 goto error;
4451             }
4452             if (reduce_func != NULL) {
4453                 reduce_value = PyObject_CallNoArgs(reduce_func);
4454             }
4455             else {
4456                 PickleState *st = _Pickle_GetGlobalState();
4457                 PyErr_Format(st->PicklingError,
4458                              "can't pickle '%.200s' object: %R",
4459                              type->tp_name, obj);
4460                 goto error;
4461             }
4462         }
4463     }
4464 
4465     if (reduce_value == NULL)
4466         goto error;
4467 
4468   reduce:
4469     if (PyUnicode_Check(reduce_value)) {
4470         status = save_global(self, obj, reduce_value);
4471         goto done;
4472     }
4473 
4474     if (!PyTuple_Check(reduce_value)) {
4475         PickleState *st = _Pickle_GetGlobalState();
4476         PyErr_SetString(st->PicklingError,
4477                         "__reduce__ must return a string or tuple");
4478         goto error;
4479     }
4480 
4481     status = save_reduce(self, reduce_value, obj);
4482 
4483     if (0) {
4484   error:
4485         status = -1;
4486     }
4487   done:
4488 
4489     _Py_LeaveRecursiveCall();
4490     Py_XDECREF(reduce_func);
4491     Py_XDECREF(reduce_value);
4492 
4493     return status;
4494 }
4495 
4496 static int
dump(PicklerObject * self,PyObject * obj)4497 dump(PicklerObject *self, PyObject *obj)
4498 {
4499     const char stop_op = STOP;
4500     int status = -1;
4501     PyObject *tmp;
4502 
4503     if (_PyObject_LookupAttr((PyObject *)self, &_Py_ID(reducer_override),
4504                              &tmp) < 0) {
4505       goto error;
4506     }
4507     /* Cache the reducer_override method, if it exists. */
4508     if (tmp != NULL) {
4509         Py_XSETREF(self->reducer_override, tmp);
4510     }
4511     else {
4512         Py_CLEAR(self->reducer_override);
4513     }
4514 
4515     if (self->proto >= 2) {
4516         char header[2];
4517 
4518         header[0] = PROTO;
4519         assert(self->proto >= 0 && self->proto < 256);
4520         header[1] = (unsigned char)self->proto;
4521         if (_Pickler_Write(self, header, 2) < 0)
4522             goto error;
4523         if (self->proto >= 4)
4524             self->framing = 1;
4525     }
4526 
4527     if (save(self, obj, 0) < 0 ||
4528         _Pickler_Write(self, &stop_op, 1) < 0 ||
4529         _Pickler_CommitFrame(self) < 0)
4530         goto error;
4531 
4532     // Success
4533     status = 0;
4534 
4535   error:
4536     self->framing = 0;
4537 
4538     /* Break the reference cycle we generated at the beginning this function
4539      * call when setting the reducer_override attribute of the Pickler instance
4540      * to a bound method of the same instance. This is important as the Pickler
4541      * instance holds a reference to each object it has pickled (through its
4542      * memo): thus, these objects won't be garbage-collected as long as the
4543      * Pickler itself is not collected. */
4544     Py_CLEAR(self->reducer_override);
4545     return status;
4546 }
4547 
4548 /*[clinic input]
4549 
4550 _pickle.Pickler.clear_memo
4551 
4552 Clears the pickler's "memo".
4553 
4554 The memo is the data structure that remembers which objects the
4555 pickler has already seen, so that shared or recursive objects are
4556 pickled by reference and not by value.  This method is useful when
4557 re-using picklers.
4558 [clinic start generated code]*/
4559 
4560 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4561 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4562 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4563 {
4564     if (self->memo)
4565         PyMemoTable_Clear(self->memo);
4566 
4567     Py_RETURN_NONE;
4568 }
4569 
4570 /*[clinic input]
4571 
4572 _pickle.Pickler.dump
4573 
4574   obj: object
4575   /
4576 
4577 Write a pickled representation of the given object to the open file.
4578 [clinic start generated code]*/
4579 
4580 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4581 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4582 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4583 {
4584     /* Check whether the Pickler was initialized correctly (issue3664).
4585        Developers often forget to call __init__() in their subclasses, which
4586        would trigger a segfault without this check. */
4587     if (self->write == NULL) {
4588         PickleState *st = _Pickle_GetGlobalState();
4589         PyErr_Format(st->PicklingError,
4590                      "Pickler.__init__() was not called by %s.__init__()",
4591                      Py_TYPE(self)->tp_name);
4592         return NULL;
4593     }
4594 
4595     if (_Pickler_ClearBuffer(self) < 0)
4596         return NULL;
4597 
4598     if (dump(self, obj) < 0)
4599         return NULL;
4600 
4601     if (_Pickler_FlushToFile(self) < 0)
4602         return NULL;
4603 
4604     Py_RETURN_NONE;
4605 }
4606 
4607 /*[clinic input]
4608 
4609 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4610 
4611 Returns size in memory, in bytes.
4612 [clinic start generated code]*/
4613 
4614 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4615 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4616 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4617 {
4618     Py_ssize_t res, s;
4619 
4620     res = _PyObject_SIZE(Py_TYPE(self));
4621     if (self->memo != NULL) {
4622         res += sizeof(PyMemoTable);
4623         res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4624     }
4625     if (self->output_buffer != NULL) {
4626         s = _PySys_GetSizeOf(self->output_buffer);
4627         if (s == -1)
4628             return -1;
4629         res += s;
4630     }
4631     return res;
4632 }
4633 
4634 static struct PyMethodDef Pickler_methods[] = {
4635     _PICKLE_PICKLER_DUMP_METHODDEF
4636     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4637     _PICKLE_PICKLER___SIZEOF___METHODDEF
4638     {NULL, NULL}                /* sentinel */
4639 };
4640 
4641 static void
Pickler_dealloc(PicklerObject * self)4642 Pickler_dealloc(PicklerObject *self)
4643 {
4644     PyObject_GC_UnTrack(self);
4645 
4646     Py_XDECREF(self->output_buffer);
4647     Py_XDECREF(self->write);
4648     Py_XDECREF(self->pers_func);
4649     Py_XDECREF(self->dispatch_table);
4650     Py_XDECREF(self->fast_memo);
4651     Py_XDECREF(self->reducer_override);
4652     Py_XDECREF(self->buffer_callback);
4653 
4654     PyMemoTable_Del(self->memo);
4655 
4656     Py_TYPE(self)->tp_free((PyObject *)self);
4657 }
4658 
4659 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4660 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4661 {
4662     Py_VISIT(self->write);
4663     Py_VISIT(self->pers_func);
4664     Py_VISIT(self->dispatch_table);
4665     Py_VISIT(self->fast_memo);
4666     Py_VISIT(self->reducer_override);
4667     Py_VISIT(self->buffer_callback);
4668     return 0;
4669 }
4670 
4671 static int
Pickler_clear(PicklerObject * self)4672 Pickler_clear(PicklerObject *self)
4673 {
4674     Py_CLEAR(self->output_buffer);
4675     Py_CLEAR(self->write);
4676     Py_CLEAR(self->pers_func);
4677     Py_CLEAR(self->dispatch_table);
4678     Py_CLEAR(self->fast_memo);
4679     Py_CLEAR(self->reducer_override);
4680     Py_CLEAR(self->buffer_callback);
4681 
4682     if (self->memo != NULL) {
4683         PyMemoTable *memo = self->memo;
4684         self->memo = NULL;
4685         PyMemoTable_Del(memo);
4686     }
4687     return 0;
4688 }
4689 
4690 
4691 /*[clinic input]
4692 
4693 _pickle.Pickler.__init__
4694 
4695   file: object
4696   protocol: object = None
4697   fix_imports: bool = True
4698   buffer_callback: object = None
4699 
4700 This takes a binary file for writing a pickle data stream.
4701 
4702 The optional *protocol* argument tells the pickler to use the given
4703 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
4704 protocol is 4. It was introduced in Python 3.4, and is incompatible
4705 with previous versions.
4706 
4707 Specifying a negative protocol version selects the highest protocol
4708 version supported.  The higher the protocol used, the more recent the
4709 version of Python needed to read the pickle produced.
4710 
4711 The *file* argument must have a write() method that accepts a single
4712 bytes argument. It can thus be a file object opened for binary
4713 writing, an io.BytesIO instance, or any other custom object that meets
4714 this interface.
4715 
4716 If *fix_imports* is True and protocol is less than 3, pickle will try
4717 to map the new Python 3 names to the old module names used in Python
4718 2, so that the pickle data stream is readable with Python 2.
4719 
4720 If *buffer_callback* is None (the default), buffer views are
4721 serialized into *file* as part of the pickle stream.
4722 
4723 If *buffer_callback* is not None, then it can be called any number
4724 of times with a buffer view.  If the callback returns a false value
4725 (such as None), the given buffer is out-of-band; otherwise the
4726 buffer is serialized in-band, i.e. inside the pickle stream.
4727 
4728 It is an error if *buffer_callback* is not None and *protocol*
4729 is None or smaller than 5.
4730 
4731 [clinic start generated code]*/
4732 
4733 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4734 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4735                               PyObject *protocol, int fix_imports,
4736                               PyObject *buffer_callback)
4737 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4738 {
4739     /* In case of multiple __init__() calls, clear previous content. */
4740     if (self->write != NULL)
4741         (void)Pickler_clear(self);
4742 
4743     if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4744         return -1;
4745 
4746     if (_Pickler_SetOutputStream(self, file) < 0)
4747         return -1;
4748 
4749     if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4750         return -1;
4751 
4752     /* memo and output_buffer may have already been created in _Pickler_New */
4753     if (self->memo == NULL) {
4754         self->memo = PyMemoTable_New();
4755         if (self->memo == NULL)
4756             return -1;
4757     }
4758     self->output_len = 0;
4759     if (self->output_buffer == NULL) {
4760         self->max_output_len = WRITE_BUF_SIZE;
4761         self->output_buffer = PyBytes_FromStringAndSize(NULL,
4762                                                         self->max_output_len);
4763         if (self->output_buffer == NULL)
4764             return -1;
4765     }
4766 
4767     self->fast = 0;
4768     self->fast_nesting = 0;
4769     self->fast_memo = NULL;
4770 
4771     if (init_method_ref((PyObject *)self, &_Py_ID(persistent_id),
4772                         &self->pers_func, &self->pers_func_self) < 0)
4773     {
4774         return -1;
4775     }
4776     if (self->dispatch_table != NULL) {
4777         return 0;
4778     }
4779     if (_PyObject_LookupAttr((PyObject *)self, &_Py_ID(dispatch_table),
4780                              &self->dispatch_table) < 0) {
4781         return -1;
4782     }
4783 
4784     return 0;
4785 }
4786 
4787 
4788 /* Define a proxy object for the Pickler's internal memo object. This is to
4789  * avoid breaking code like:
4790  *  pickler.memo.clear()
4791  * and
4792  *  pickler.memo = saved_memo
4793  * Is this a good idea? Not really, but we don't want to break code that uses
4794  * it. Note that we don't implement the entire mapping API here. This is
4795  * intentional, as these should be treated as black-box implementation details.
4796  */
4797 
4798 /*[clinic input]
4799 _pickle.PicklerMemoProxy.clear
4800 
4801 Remove all items from memo.
4802 [clinic start generated code]*/
4803 
4804 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4805 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4806 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4807 {
4808     if (self->pickler->memo)
4809         PyMemoTable_Clear(self->pickler->memo);
4810     Py_RETURN_NONE;
4811 }
4812 
4813 /*[clinic input]
4814 _pickle.PicklerMemoProxy.copy
4815 
4816 Copy the memo to a new object.
4817 [clinic start generated code]*/
4818 
4819 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4820 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4821 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4822 {
4823     PyMemoTable *memo;
4824     PyObject *new_memo = PyDict_New();
4825     if (new_memo == NULL)
4826         return NULL;
4827 
4828     memo = self->pickler->memo;
4829     for (size_t i = 0; i < memo->mt_allocated; ++i) {
4830         PyMemoEntry entry = memo->mt_table[i];
4831         if (entry.me_key != NULL) {
4832             int status;
4833             PyObject *key, *value;
4834 
4835             key = PyLong_FromVoidPtr(entry.me_key);
4836             value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4837 
4838             if (key == NULL || value == NULL) {
4839                 Py_XDECREF(key);
4840                 Py_XDECREF(value);
4841                 goto error;
4842             }
4843             status = PyDict_SetItem(new_memo, key, value);
4844             Py_DECREF(key);
4845             Py_DECREF(value);
4846             if (status < 0)
4847                 goto error;
4848         }
4849     }
4850     return new_memo;
4851 
4852   error:
4853     Py_XDECREF(new_memo);
4854     return NULL;
4855 }
4856 
4857 /*[clinic input]
4858 _pickle.PicklerMemoProxy.__reduce__
4859 
4860 Implement pickle support.
4861 [clinic start generated code]*/
4862 
4863 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4864 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4865 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4866 {
4867     PyObject *reduce_value, *dict_args;
4868     PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4869     if (contents == NULL)
4870         return NULL;
4871 
4872     reduce_value = PyTuple_New(2);
4873     if (reduce_value == NULL) {
4874         Py_DECREF(contents);
4875         return NULL;
4876     }
4877     dict_args = PyTuple_New(1);
4878     if (dict_args == NULL) {
4879         Py_DECREF(contents);
4880         Py_DECREF(reduce_value);
4881         return NULL;
4882     }
4883     PyTuple_SET_ITEM(dict_args, 0, contents);
4884     Py_INCREF((PyObject *)&PyDict_Type);
4885     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4886     PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4887     return reduce_value;
4888 }
4889 
4890 static PyMethodDef picklerproxy_methods[] = {
4891     _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4892     _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4893     _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4894     {NULL, NULL} /* sentinel */
4895 };
4896 
4897 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4898 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4899 {
4900     PyObject_GC_UnTrack(self);
4901     Py_XDECREF(self->pickler);
4902     PyObject_GC_Del((PyObject *)self);
4903 }
4904 
4905 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4906 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4907                           visitproc visit, void *arg)
4908 {
4909     Py_VISIT(self->pickler);
4910     return 0;
4911 }
4912 
4913 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4914 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4915 {
4916     Py_CLEAR(self->pickler);
4917     return 0;
4918 }
4919 
4920 static PyTypeObject PicklerMemoProxyType = {
4921     PyVarObject_HEAD_INIT(NULL, 0)
4922     "_pickle.PicklerMemoProxy",                 /*tp_name*/
4923     sizeof(PicklerMemoProxyObject),             /*tp_basicsize*/
4924     0,
4925     (destructor)PicklerMemoProxy_dealloc,       /* tp_dealloc */
4926     0,                                          /* tp_vectorcall_offset */
4927     0,                                          /* tp_getattr */
4928     0,                                          /* tp_setattr */
4929     0,                                          /* tp_as_async */
4930     0,                                          /* tp_repr */
4931     0,                                          /* tp_as_number */
4932     0,                                          /* tp_as_sequence */
4933     0,                                          /* tp_as_mapping */
4934     PyObject_HashNotImplemented,                /* tp_hash */
4935     0,                                          /* tp_call */
4936     0,                                          /* tp_str */
4937     PyObject_GenericGetAttr,                    /* tp_getattro */
4938     PyObject_GenericSetAttr,                    /* tp_setattro */
4939     0,                                          /* tp_as_buffer */
4940     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4941     0,                                          /* tp_doc */
4942     (traverseproc)PicklerMemoProxy_traverse,    /* tp_traverse */
4943     (inquiry)PicklerMemoProxy_clear,            /* tp_clear */
4944     0,                                          /* tp_richcompare */
4945     0,                                          /* tp_weaklistoffset */
4946     0,                                          /* tp_iter */
4947     0,                                          /* tp_iternext */
4948     picklerproxy_methods,                       /* tp_methods */
4949 };
4950 
4951 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4952 PicklerMemoProxy_New(PicklerObject *pickler)
4953 {
4954     PicklerMemoProxyObject *self;
4955 
4956     self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4957     if (self == NULL)
4958         return NULL;
4959     Py_INCREF(pickler);
4960     self->pickler = pickler;
4961     PyObject_GC_Track(self);
4962     return (PyObject *)self;
4963 }
4964 
4965 /*****************************************************************************/
4966 
4967 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4968 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4969 {
4970     return PicklerMemoProxy_New(self);
4971 }
4972 
4973 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4974 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4975 {
4976     PyMemoTable *new_memo = NULL;
4977 
4978     if (obj == NULL) {
4979         PyErr_SetString(PyExc_TypeError,
4980                         "attribute deletion is not supported");
4981         return -1;
4982     }
4983 
4984     if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4985         PicklerObject *pickler =
4986             ((PicklerMemoProxyObject *)obj)->pickler;
4987 
4988         new_memo = PyMemoTable_Copy(pickler->memo);
4989         if (new_memo == NULL)
4990             return -1;
4991     }
4992     else if (PyDict_Check(obj)) {
4993         Py_ssize_t i = 0;
4994         PyObject *key, *value;
4995 
4996         new_memo = PyMemoTable_New();
4997         if (new_memo == NULL)
4998             return -1;
4999 
5000         while (PyDict_Next(obj, &i, &key, &value)) {
5001             Py_ssize_t memo_id;
5002             PyObject *memo_obj;
5003 
5004             if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
5005                 PyErr_SetString(PyExc_TypeError,
5006                                 "'memo' values must be 2-item tuples");
5007                 goto error;
5008             }
5009             memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5010             if (memo_id == -1 && PyErr_Occurred())
5011                 goto error;
5012             memo_obj = PyTuple_GET_ITEM(value, 1);
5013             if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5014                 goto error;
5015         }
5016     }
5017     else {
5018         PyErr_Format(PyExc_TypeError,
5019                      "'memo' attribute must be a PicklerMemoProxy object "
5020                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5021         return -1;
5022     }
5023 
5024     PyMemoTable_Del(self->memo);
5025     self->memo = new_memo;
5026 
5027     return 0;
5028 
5029   error:
5030     if (new_memo)
5031         PyMemoTable_Del(new_memo);
5032     return -1;
5033 }
5034 
5035 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5036 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5037 {
5038     if (self->pers_func == NULL) {
5039         PyErr_SetString(PyExc_AttributeError, "persistent_id");
5040         return NULL;
5041     }
5042     return reconstruct_method(self->pers_func, self->pers_func_self);
5043 }
5044 
5045 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5046 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5047 {
5048     if (value == NULL) {
5049         PyErr_SetString(PyExc_TypeError,
5050                         "attribute deletion is not supported");
5051         return -1;
5052     }
5053     if (!PyCallable_Check(value)) {
5054         PyErr_SetString(PyExc_TypeError,
5055                         "persistent_id must be a callable taking one argument");
5056         return -1;
5057     }
5058 
5059     self->pers_func_self = NULL;
5060     Py_INCREF(value);
5061     Py_XSETREF(self->pers_func, value);
5062 
5063     return 0;
5064 }
5065 
5066 static PyMemberDef Pickler_members[] = {
5067     {"bin", T_INT, offsetof(PicklerObject, bin)},
5068     {"fast", T_INT, offsetof(PicklerObject, fast)},
5069     {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5070     {NULL}
5071 };
5072 
5073 static PyGetSetDef Pickler_getsets[] = {
5074     {"memo",          (getter)Pickler_get_memo,
5075                       (setter)Pickler_set_memo},
5076     {"persistent_id", (getter)Pickler_get_persid,
5077                       (setter)Pickler_set_persid},
5078     {NULL}
5079 };
5080 
5081 static PyTypeObject Pickler_Type = {
5082     PyVarObject_HEAD_INIT(NULL, 0)
5083     "_pickle.Pickler"  ,                /*tp_name*/
5084     sizeof(PicklerObject),              /*tp_basicsize*/
5085     0,                                  /*tp_itemsize*/
5086     (destructor)Pickler_dealloc,        /*tp_dealloc*/
5087     0,                                  /*tp_vectorcall_offset*/
5088     0,                                  /*tp_getattr*/
5089     0,                                  /*tp_setattr*/
5090     0,                                  /*tp_as_async*/
5091     0,                                  /*tp_repr*/
5092     0,                                  /*tp_as_number*/
5093     0,                                  /*tp_as_sequence*/
5094     0,                                  /*tp_as_mapping*/
5095     0,                                  /*tp_hash*/
5096     0,                                  /*tp_call*/
5097     0,                                  /*tp_str*/
5098     0,                                  /*tp_getattro*/
5099     0,                                  /*tp_setattro*/
5100     0,                                  /*tp_as_buffer*/
5101     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5102     _pickle_Pickler___init____doc__,    /*tp_doc*/
5103     (traverseproc)Pickler_traverse,     /*tp_traverse*/
5104     (inquiry)Pickler_clear,             /*tp_clear*/
5105     0,                                  /*tp_richcompare*/
5106     0,                                  /*tp_weaklistoffset*/
5107     0,                                  /*tp_iter*/
5108     0,                                  /*tp_iternext*/
5109     Pickler_methods,                    /*tp_methods*/
5110     Pickler_members,                    /*tp_members*/
5111     Pickler_getsets,                    /*tp_getset*/
5112     0,                                  /*tp_base*/
5113     0,                                  /*tp_dict*/
5114     0,                                  /*tp_descr_get*/
5115     0,                                  /*tp_descr_set*/
5116     0,                                  /*tp_dictoffset*/
5117     _pickle_Pickler___init__,           /*tp_init*/
5118     PyType_GenericAlloc,                /*tp_alloc*/
5119     PyType_GenericNew,                  /*tp_new*/
5120     PyObject_GC_Del,                    /*tp_free*/
5121     0,                                  /*tp_is_gc*/
5122 };
5123 
5124 /* Temporary helper for calling self.find_class().
5125 
5126    XXX: It would be nice to able to avoid Python function call overhead, by
5127    using directly the C version of find_class(), when find_class() is not
5128    overridden by a subclass. Although, this could become rather hackish. A
5129    simpler optimization would be to call the C function when self is not a
5130    subclass instance. */
5131 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5132 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5133 {
5134     return PyObject_CallMethodObjArgs((PyObject *)self, &_Py_ID(find_class),
5135                                       module_name, global_name, NULL);
5136 }
5137 
5138 static Py_ssize_t
marker(UnpicklerObject * self)5139 marker(UnpicklerObject *self)
5140 {
5141     Py_ssize_t mark;
5142 
5143     if (self->num_marks < 1) {
5144         PickleState *st = _Pickle_GetGlobalState();
5145         PyErr_SetString(st->UnpicklingError, "could not find MARK");
5146         return -1;
5147     }
5148 
5149     mark = self->marks[--self->num_marks];
5150     self->stack->mark_set = self->num_marks != 0;
5151     self->stack->fence = self->num_marks ?
5152             self->marks[self->num_marks - 1] : 0;
5153     return mark;
5154 }
5155 
5156 static int
load_none(UnpicklerObject * self)5157 load_none(UnpicklerObject *self)
5158 {
5159     PDATA_APPEND(self->stack, Py_None, -1);
5160     return 0;
5161 }
5162 
5163 static int
load_int(UnpicklerObject * self)5164 load_int(UnpicklerObject *self)
5165 {
5166     PyObject *value;
5167     char *endptr, *s;
5168     Py_ssize_t len;
5169     long x;
5170 
5171     if ((len = _Unpickler_Readline(self, &s)) < 0)
5172         return -1;
5173     if (len < 2)
5174         return bad_readline();
5175 
5176     errno = 0;
5177     /* XXX: Should the base argument of strtol() be explicitly set to 10?
5178        XXX(avassalotti): Should this uses PyOS_strtol()? */
5179     x = strtol(s, &endptr, 0);
5180 
5181     if (errno || (*endptr != '\n' && *endptr != '\0')) {
5182         /* Hm, maybe we've got something long.  Let's try reading
5183          * it as a Python int object. */
5184         errno = 0;
5185         /* XXX: Same thing about the base here. */
5186         value = PyLong_FromString(s, NULL, 0);
5187         if (value == NULL) {
5188             PyErr_SetString(PyExc_ValueError,
5189                             "could not convert string to int");
5190             return -1;
5191         }
5192     }
5193     else {
5194         if (len == 3 && (x == 0 || x == 1)) {
5195             if ((value = PyBool_FromLong(x)) == NULL)
5196                 return -1;
5197         }
5198         else {
5199             if ((value = PyLong_FromLong(x)) == NULL)
5200                 return -1;
5201         }
5202     }
5203 
5204     PDATA_PUSH(self->stack, value, -1);
5205     return 0;
5206 }
5207 
5208 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5209 load_bool(UnpicklerObject *self, PyObject *boolean)
5210 {
5211     assert(boolean == Py_True || boolean == Py_False);
5212     PDATA_APPEND(self->stack, boolean, -1);
5213     return 0;
5214 }
5215 
5216 /* s contains x bytes of an unsigned little-endian integer.  Return its value
5217  * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5218  */
5219 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5220 calc_binsize(char *bytes, int nbytes)
5221 {
5222     unsigned char *s = (unsigned char *)bytes;
5223     int i;
5224     size_t x = 0;
5225 
5226     if (nbytes > (int)sizeof(size_t)) {
5227         /* Check for integer overflow.  BINBYTES8 and BINUNICODE8 opcodes
5228          * have 64-bit size that can't be represented on 32-bit platform.
5229          */
5230         for (i = (int)sizeof(size_t); i < nbytes; i++) {
5231             if (s[i])
5232                 return -1;
5233         }
5234         nbytes = (int)sizeof(size_t);
5235     }
5236     for (i = 0; i < nbytes; i++) {
5237         x |= (size_t) s[i] << (8 * i);
5238     }
5239 
5240     if (x > PY_SSIZE_T_MAX)
5241         return -1;
5242     else
5243         return (Py_ssize_t) x;
5244 }
5245 
5246 /* s contains x bytes of a little-endian integer.  Return its value as a
5247  * C int.  Obscure:  when x is 1 or 2, this is an unsigned little-endian
5248  * int, but when x is 4 it's a signed one.  This is a historical source
5249  * of x-platform bugs.
5250  */
5251 static long
calc_binint(char * bytes,int nbytes)5252 calc_binint(char *bytes, int nbytes)
5253 {
5254     unsigned char *s = (unsigned char *)bytes;
5255     Py_ssize_t i;
5256     long x = 0;
5257 
5258     for (i = 0; i < nbytes; i++) {
5259         x |= (long)s[i] << (8 * i);
5260     }
5261 
5262     /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5263      * is signed, so on a box with longs bigger than 4 bytes we need
5264      * to extend a BININT's sign bit to the full width.
5265      */
5266     if (SIZEOF_LONG > 4 && nbytes == 4) {
5267         x |= -(x & (1L << 31));
5268     }
5269 
5270     return x;
5271 }
5272 
5273 static int
load_binintx(UnpicklerObject * self,char * s,int size)5274 load_binintx(UnpicklerObject *self, char *s, int size)
5275 {
5276     PyObject *value;
5277     long x;
5278 
5279     x = calc_binint(s, size);
5280 
5281     if ((value = PyLong_FromLong(x)) == NULL)
5282         return -1;
5283 
5284     PDATA_PUSH(self->stack, value, -1);
5285     return 0;
5286 }
5287 
5288 static int
load_binint(UnpicklerObject * self)5289 load_binint(UnpicklerObject *self)
5290 {
5291     char *s;
5292 
5293     if (_Unpickler_Read(self, &s, 4) < 0)
5294         return -1;
5295 
5296     return load_binintx(self, s, 4);
5297 }
5298 
5299 static int
load_binint1(UnpicklerObject * self)5300 load_binint1(UnpicklerObject *self)
5301 {
5302     char *s;
5303 
5304     if (_Unpickler_Read(self, &s, 1) < 0)
5305         return -1;
5306 
5307     return load_binintx(self, s, 1);
5308 }
5309 
5310 static int
load_binint2(UnpicklerObject * self)5311 load_binint2(UnpicklerObject *self)
5312 {
5313     char *s;
5314 
5315     if (_Unpickler_Read(self, &s, 2) < 0)
5316         return -1;
5317 
5318     return load_binintx(self, s, 2);
5319 }
5320 
5321 static int
load_long(UnpicklerObject * self)5322 load_long(UnpicklerObject *self)
5323 {
5324     PyObject *value;
5325     char *s = NULL;
5326     Py_ssize_t len;
5327 
5328     if ((len = _Unpickler_Readline(self, &s)) < 0)
5329         return -1;
5330     if (len < 2)
5331         return bad_readline();
5332 
5333     /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5334        the 'L' before calling PyLong_FromString.  In order to maintain
5335        compatibility with Python 3.0.0, we don't actually *require*
5336        the 'L' to be present. */
5337     if (s[len-2] == 'L')
5338         s[len-2] = '\0';
5339     /* XXX: Should the base argument explicitly set to 10? */
5340     value = PyLong_FromString(s, NULL, 0);
5341     if (value == NULL)
5342         return -1;
5343 
5344     PDATA_PUSH(self->stack, value, -1);
5345     return 0;
5346 }
5347 
5348 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5349  * data following.
5350  */
5351 static int
load_counted_long(UnpicklerObject * self,int size)5352 load_counted_long(UnpicklerObject *self, int size)
5353 {
5354     PyObject *value;
5355     char *nbytes;
5356     char *pdata;
5357 
5358     assert(size == 1 || size == 4);
5359     if (_Unpickler_Read(self, &nbytes, size) < 0)
5360         return -1;
5361 
5362     size = calc_binint(nbytes, size);
5363     if (size < 0) {
5364         PickleState *st = _Pickle_GetGlobalState();
5365         /* Corrupt or hostile pickle -- we never write one like this */
5366         PyErr_SetString(st->UnpicklingError,
5367                         "LONG pickle has negative byte count");
5368         return -1;
5369     }
5370 
5371     if (size == 0)
5372         value = PyLong_FromLong(0L);
5373     else {
5374         /* Read the raw little-endian bytes and convert. */
5375         if (_Unpickler_Read(self, &pdata, size) < 0)
5376             return -1;
5377         value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5378                                       1 /* little endian */ , 1 /* signed */ );
5379     }
5380     if (value == NULL)
5381         return -1;
5382     PDATA_PUSH(self->stack, value, -1);
5383     return 0;
5384 }
5385 
5386 static int
load_float(UnpicklerObject * self)5387 load_float(UnpicklerObject *self)
5388 {
5389     PyObject *value;
5390     char *endptr, *s;
5391     Py_ssize_t len;
5392     double d;
5393 
5394     if ((len = _Unpickler_Readline(self, &s)) < 0)
5395         return -1;
5396     if (len < 2)
5397         return bad_readline();
5398 
5399     errno = 0;
5400     d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5401     if (d == -1.0 && PyErr_Occurred())
5402         return -1;
5403     if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5404         PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5405         return -1;
5406     }
5407     value = PyFloat_FromDouble(d);
5408     if (value == NULL)
5409         return -1;
5410 
5411     PDATA_PUSH(self->stack, value, -1);
5412     return 0;
5413 }
5414 
5415 static int
load_binfloat(UnpicklerObject * self)5416 load_binfloat(UnpicklerObject *self)
5417 {
5418     PyObject *value;
5419     double x;
5420     char *s;
5421 
5422     if (_Unpickler_Read(self, &s, 8) < 0)
5423         return -1;
5424 
5425     x = PyFloat_Unpack8(s, 0);
5426     if (x == -1.0 && PyErr_Occurred())
5427         return -1;
5428 
5429     if ((value = PyFloat_FromDouble(x)) == NULL)
5430         return -1;
5431 
5432     PDATA_PUSH(self->stack, value, -1);
5433     return 0;
5434 }
5435 
5436 static int
load_string(UnpicklerObject * self)5437 load_string(UnpicklerObject *self)
5438 {
5439     PyObject *bytes;
5440     PyObject *obj;
5441     Py_ssize_t len;
5442     char *s, *p;
5443 
5444     if ((len = _Unpickler_Readline(self, &s)) < 0)
5445         return -1;
5446     /* Strip the newline */
5447     len--;
5448     /* Strip outermost quotes */
5449     if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5450         p = s + 1;
5451         len -= 2;
5452     }
5453     else {
5454         PickleState *st = _Pickle_GetGlobalState();
5455         PyErr_SetString(st->UnpicklingError,
5456                         "the STRING opcode argument must be quoted");
5457         return -1;
5458     }
5459     assert(len >= 0);
5460 
5461     /* Use the PyBytes API to decode the string, since that is what is used
5462        to encode, and then coerce the result to Unicode. */
5463     bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5464     if (bytes == NULL)
5465         return -1;
5466 
5467     /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5468        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5469     if (strcmp(self->encoding, "bytes") == 0) {
5470         obj = bytes;
5471     }
5472     else {
5473         obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5474         Py_DECREF(bytes);
5475         if (obj == NULL) {
5476             return -1;
5477         }
5478     }
5479 
5480     PDATA_PUSH(self->stack, obj, -1);
5481     return 0;
5482 }
5483 
5484 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5485 load_counted_binstring(UnpicklerObject *self, int nbytes)
5486 {
5487     PyObject *obj;
5488     Py_ssize_t size;
5489     char *s;
5490 
5491     if (_Unpickler_Read(self, &s, nbytes) < 0)
5492         return -1;
5493 
5494     size = calc_binsize(s, nbytes);
5495     if (size < 0) {
5496         PickleState *st = _Pickle_GetGlobalState();
5497         PyErr_Format(st->UnpicklingError,
5498                      "BINSTRING exceeds system's maximum size of %zd bytes",
5499                      PY_SSIZE_T_MAX);
5500         return -1;
5501     }
5502 
5503     if (_Unpickler_Read(self, &s, size) < 0)
5504         return -1;
5505 
5506     /* Convert Python 2.x strings to bytes if the *encoding* given to the
5507        Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5508     if (strcmp(self->encoding, "bytes") == 0) {
5509         obj = PyBytes_FromStringAndSize(s, size);
5510     }
5511     else {
5512         obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5513     }
5514     if (obj == NULL) {
5515         return -1;
5516     }
5517 
5518     PDATA_PUSH(self->stack, obj, -1);
5519     return 0;
5520 }
5521 
5522 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5523 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5524 {
5525     PyObject *bytes;
5526     Py_ssize_t size;
5527     char *s;
5528 
5529     if (_Unpickler_Read(self, &s, nbytes) < 0)
5530         return -1;
5531 
5532     size = calc_binsize(s, nbytes);
5533     if (size < 0) {
5534         PyErr_Format(PyExc_OverflowError,
5535                      "BINBYTES exceeds system's maximum size of %zd bytes",
5536                      PY_SSIZE_T_MAX);
5537         return -1;
5538     }
5539 
5540     bytes = PyBytes_FromStringAndSize(NULL, size);
5541     if (bytes == NULL)
5542         return -1;
5543     if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5544         Py_DECREF(bytes);
5545         return -1;
5546     }
5547 
5548     PDATA_PUSH(self->stack, bytes, -1);
5549     return 0;
5550 }
5551 
5552 static int
load_counted_bytearray(UnpicklerObject * self)5553 load_counted_bytearray(UnpicklerObject *self)
5554 {
5555     PyObject *bytearray;
5556     Py_ssize_t size;
5557     char *s;
5558 
5559     if (_Unpickler_Read(self, &s, 8) < 0) {
5560         return -1;
5561     }
5562 
5563     size = calc_binsize(s, 8);
5564     if (size < 0) {
5565         PyErr_Format(PyExc_OverflowError,
5566                      "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5567                      PY_SSIZE_T_MAX);
5568         return -1;
5569     }
5570 
5571     bytearray = PyByteArray_FromStringAndSize(NULL, size);
5572     if (bytearray == NULL) {
5573         return -1;
5574     }
5575     if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5576         Py_DECREF(bytearray);
5577         return -1;
5578     }
5579 
5580     PDATA_PUSH(self->stack, bytearray, -1);
5581     return 0;
5582 }
5583 
5584 static int
load_next_buffer(UnpicklerObject * self)5585 load_next_buffer(UnpicklerObject *self)
5586 {
5587     if (self->buffers == NULL) {
5588         PickleState *st = _Pickle_GetGlobalState();
5589         PyErr_SetString(st->UnpicklingError,
5590                         "pickle stream refers to out-of-band data "
5591                         "but no *buffers* argument was given");
5592         return -1;
5593     }
5594     PyObject *buf = PyIter_Next(self->buffers);
5595     if (buf == NULL) {
5596         if (!PyErr_Occurred()) {
5597             PickleState *st = _Pickle_GetGlobalState();
5598             PyErr_SetString(st->UnpicklingError,
5599                             "not enough out-of-band buffers");
5600         }
5601         return -1;
5602     }
5603 
5604     PDATA_PUSH(self->stack, buf, -1);
5605     return 0;
5606 }
5607 
5608 static int
load_readonly_buffer(UnpicklerObject * self)5609 load_readonly_buffer(UnpicklerObject *self)
5610 {
5611     Py_ssize_t len = Py_SIZE(self->stack);
5612     if (len <= self->stack->fence) {
5613         return Pdata_stack_underflow(self->stack);
5614     }
5615 
5616     PyObject *obj = self->stack->data[len - 1];
5617     PyObject *view = PyMemoryView_FromObject(obj);
5618     if (view == NULL) {
5619         return -1;
5620     }
5621     if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5622         /* Original object is writable */
5623         PyMemoryView_GET_BUFFER(view)->readonly = 1;
5624         self->stack->data[len - 1] = view;
5625         Py_DECREF(obj);
5626     }
5627     else {
5628         /* Original object is read-only, no need to replace it */
5629         Py_DECREF(view);
5630     }
5631     return 0;
5632 }
5633 
5634 static int
load_unicode(UnpicklerObject * self)5635 load_unicode(UnpicklerObject *self)
5636 {
5637     PyObject *str;
5638     Py_ssize_t len;
5639     char *s = NULL;
5640 
5641     if ((len = _Unpickler_Readline(self, &s)) < 0)
5642         return -1;
5643     if (len < 1)
5644         return bad_readline();
5645 
5646     str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5647     if (str == NULL)
5648         return -1;
5649 
5650     PDATA_PUSH(self->stack, str, -1);
5651     return 0;
5652 }
5653 
5654 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5655 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5656 {
5657     PyObject *str;
5658     Py_ssize_t size;
5659     char *s;
5660 
5661     if (_Unpickler_Read(self, &s, nbytes) < 0)
5662         return -1;
5663 
5664     size = calc_binsize(s, nbytes);
5665     if (size < 0) {
5666         PyErr_Format(PyExc_OverflowError,
5667                      "BINUNICODE exceeds system's maximum size of %zd bytes",
5668                      PY_SSIZE_T_MAX);
5669         return -1;
5670     }
5671 
5672     if (_Unpickler_Read(self, &s, size) < 0)
5673         return -1;
5674 
5675     str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5676     if (str == NULL)
5677         return -1;
5678 
5679     PDATA_PUSH(self->stack, str, -1);
5680     return 0;
5681 }
5682 
5683 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5684 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5685 {
5686     PyObject *tuple;
5687 
5688     if (Py_SIZE(self->stack) < len)
5689         return Pdata_stack_underflow(self->stack);
5690 
5691     tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5692     if (tuple == NULL)
5693         return -1;
5694     PDATA_PUSH(self->stack, tuple, -1);
5695     return 0;
5696 }
5697 
5698 static int
load_tuple(UnpicklerObject * self)5699 load_tuple(UnpicklerObject *self)
5700 {
5701     Py_ssize_t i;
5702 
5703     if ((i = marker(self)) < 0)
5704         return -1;
5705 
5706     return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5707 }
5708 
5709 static int
load_empty_list(UnpicklerObject * self)5710 load_empty_list(UnpicklerObject *self)
5711 {
5712     PyObject *list;
5713 
5714     if ((list = PyList_New(0)) == NULL)
5715         return -1;
5716     PDATA_PUSH(self->stack, list, -1);
5717     return 0;
5718 }
5719 
5720 static int
load_empty_dict(UnpicklerObject * self)5721 load_empty_dict(UnpicklerObject *self)
5722 {
5723     PyObject *dict;
5724 
5725     if ((dict = PyDict_New()) == NULL)
5726         return -1;
5727     PDATA_PUSH(self->stack, dict, -1);
5728     return 0;
5729 }
5730 
5731 static int
load_empty_set(UnpicklerObject * self)5732 load_empty_set(UnpicklerObject *self)
5733 {
5734     PyObject *set;
5735 
5736     if ((set = PySet_New(NULL)) == NULL)
5737         return -1;
5738     PDATA_PUSH(self->stack, set, -1);
5739     return 0;
5740 }
5741 
5742 static int
load_list(UnpicklerObject * self)5743 load_list(UnpicklerObject *self)
5744 {
5745     PyObject *list;
5746     Py_ssize_t i;
5747 
5748     if ((i = marker(self)) < 0)
5749         return -1;
5750 
5751     list = Pdata_poplist(self->stack, i);
5752     if (list == NULL)
5753         return -1;
5754     PDATA_PUSH(self->stack, list, -1);
5755     return 0;
5756 }
5757 
5758 static int
load_dict(UnpicklerObject * self)5759 load_dict(UnpicklerObject *self)
5760 {
5761     PyObject *dict, *key, *value;
5762     Py_ssize_t i, j, k;
5763 
5764     if ((i = marker(self)) < 0)
5765         return -1;
5766     j = Py_SIZE(self->stack);
5767 
5768     if ((dict = PyDict_New()) == NULL)
5769         return -1;
5770 
5771     if ((j - i) % 2 != 0) {
5772         PickleState *st = _Pickle_GetGlobalState();
5773         PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5774         Py_DECREF(dict);
5775         return -1;
5776     }
5777 
5778     for (k = i + 1; k < j; k += 2) {
5779         key = self->stack->data[k - 1];
5780         value = self->stack->data[k];
5781         if (PyDict_SetItem(dict, key, value) < 0) {
5782             Py_DECREF(dict);
5783             return -1;
5784         }
5785     }
5786     Pdata_clear(self->stack, i);
5787     PDATA_PUSH(self->stack, dict, -1);
5788     return 0;
5789 }
5790 
5791 static int
load_frozenset(UnpicklerObject * self)5792 load_frozenset(UnpicklerObject *self)
5793 {
5794     PyObject *items;
5795     PyObject *frozenset;
5796     Py_ssize_t i;
5797 
5798     if ((i = marker(self)) < 0)
5799         return -1;
5800 
5801     items = Pdata_poptuple(self->stack, i);
5802     if (items == NULL)
5803         return -1;
5804 
5805     frozenset = PyFrozenSet_New(items);
5806     Py_DECREF(items);
5807     if (frozenset == NULL)
5808         return -1;
5809 
5810     PDATA_PUSH(self->stack, frozenset, -1);
5811     return 0;
5812 }
5813 
5814 static PyObject *
instantiate(PyObject * cls,PyObject * args)5815 instantiate(PyObject *cls, PyObject *args)
5816 {
5817     /* Caller must assure args are a tuple.  Normally, args come from
5818        Pdata_poptuple which packs objects from the top of the stack
5819        into a newly created tuple. */
5820     assert(PyTuple_Check(args));
5821     if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5822         PyObject *func;
5823         if (_PyObject_LookupAttr(cls, &_Py_ID(__getinitargs__), &func) < 0) {
5824             return NULL;
5825         }
5826         if (func == NULL) {
5827             return PyObject_CallMethodOneArg(cls, &_Py_ID(__new__), cls);
5828         }
5829         Py_DECREF(func);
5830     }
5831     return PyObject_CallObject(cls, args);
5832 }
5833 
5834 static int
load_obj(UnpicklerObject * self)5835 load_obj(UnpicklerObject *self)
5836 {
5837     PyObject *cls, *args, *obj = NULL;
5838     Py_ssize_t i;
5839 
5840     if ((i = marker(self)) < 0)
5841         return -1;
5842 
5843     if (Py_SIZE(self->stack) - i < 1)
5844         return Pdata_stack_underflow(self->stack);
5845 
5846     args = Pdata_poptuple(self->stack, i + 1);
5847     if (args == NULL)
5848         return -1;
5849 
5850     PDATA_POP(self->stack, cls);
5851     if (cls) {
5852         obj = instantiate(cls, args);
5853         Py_DECREF(cls);
5854     }
5855     Py_DECREF(args);
5856     if (obj == NULL)
5857         return -1;
5858 
5859     PDATA_PUSH(self->stack, obj, -1);
5860     return 0;
5861 }
5862 
5863 static int
load_inst(UnpicklerObject * self)5864 load_inst(UnpicklerObject *self)
5865 {
5866     PyObject *cls = NULL;
5867     PyObject *args = NULL;
5868     PyObject *obj = NULL;
5869     PyObject *module_name;
5870     PyObject *class_name;
5871     Py_ssize_t len;
5872     Py_ssize_t i;
5873     char *s;
5874 
5875     if ((i = marker(self)) < 0)
5876         return -1;
5877     if ((len = _Unpickler_Readline(self, &s)) < 0)
5878         return -1;
5879     if (len < 2)
5880         return bad_readline();
5881 
5882     /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5883        identifiers are permitted in Python 3.0, since the INST opcode is only
5884        supported by older protocols on Python 2.x. */
5885     module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5886     if (module_name == NULL)
5887         return -1;
5888 
5889     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5890         if (len < 2) {
5891             Py_DECREF(module_name);
5892             return bad_readline();
5893         }
5894         class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5895         if (class_name != NULL) {
5896             cls = find_class(self, module_name, class_name);
5897             Py_DECREF(class_name);
5898         }
5899     }
5900     Py_DECREF(module_name);
5901 
5902     if (cls == NULL)
5903         return -1;
5904 
5905     if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5906         obj = instantiate(cls, args);
5907         Py_DECREF(args);
5908     }
5909     Py_DECREF(cls);
5910 
5911     if (obj == NULL)
5912         return -1;
5913 
5914     PDATA_PUSH(self->stack, obj, -1);
5915     return 0;
5916 }
5917 
5918 static void
newobj_unpickling_error(const char * msg,int use_kwargs,PyObject * arg)5919 newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
5920 {
5921     PickleState *st = _Pickle_GetGlobalState();
5922     PyErr_Format(st->UnpicklingError, msg,
5923                  use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5924                  Py_TYPE(arg)->tp_name);
5925 }
5926 
5927 static int
load_newobj(UnpicklerObject * self,int use_kwargs)5928 load_newobj(UnpicklerObject *self, int use_kwargs)
5929 {
5930     PyObject *cls, *args, *kwargs = NULL;
5931     PyObject *obj;
5932 
5933     /* Stack is ... cls args [kwargs], and we want to call
5934      * cls.__new__(cls, *args, **kwargs).
5935      */
5936     if (use_kwargs) {
5937         PDATA_POP(self->stack, kwargs);
5938         if (kwargs == NULL) {
5939             return -1;
5940         }
5941     }
5942     PDATA_POP(self->stack, args);
5943     if (args == NULL) {
5944         Py_XDECREF(kwargs);
5945         return -1;
5946     }
5947     PDATA_POP(self->stack, cls);
5948     if (cls == NULL) {
5949         Py_XDECREF(kwargs);
5950         Py_DECREF(args);
5951         return -1;
5952     }
5953 
5954     if (!PyType_Check(cls)) {
5955         newobj_unpickling_error("%s class argument must be a type, not %.200s",
5956                                 use_kwargs, cls);
5957         goto error;
5958     }
5959     if (((PyTypeObject *)cls)->tp_new == NULL) {
5960         newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5961                                 use_kwargs, cls);
5962         goto error;
5963     }
5964     if (!PyTuple_Check(args)) {
5965         newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5966                                 use_kwargs, args);
5967         goto error;
5968     }
5969     if (use_kwargs && !PyDict_Check(kwargs)) {
5970         newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5971                                 use_kwargs, kwargs);
5972         goto error;
5973     }
5974 
5975     obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5976     if (obj == NULL) {
5977         goto error;
5978     }
5979     Py_XDECREF(kwargs);
5980     Py_DECREF(args);
5981     Py_DECREF(cls);
5982     PDATA_PUSH(self->stack, obj, -1);
5983     return 0;
5984 
5985 error:
5986     Py_XDECREF(kwargs);
5987     Py_DECREF(args);
5988     Py_DECREF(cls);
5989     return -1;
5990 }
5991 
5992 static int
load_global(UnpicklerObject * self)5993 load_global(UnpicklerObject *self)
5994 {
5995     PyObject *global = NULL;
5996     PyObject *module_name;
5997     PyObject *global_name;
5998     Py_ssize_t len;
5999     char *s;
6000 
6001     if ((len = _Unpickler_Readline(self, &s)) < 0)
6002         return -1;
6003     if (len < 2)
6004         return bad_readline();
6005     module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6006     if (!module_name)
6007         return -1;
6008 
6009     if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6010         if (len < 2) {
6011             Py_DECREF(module_name);
6012             return bad_readline();
6013         }
6014         global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6015         if (global_name) {
6016             global = find_class(self, module_name, global_name);
6017             Py_DECREF(global_name);
6018         }
6019     }
6020     Py_DECREF(module_name);
6021 
6022     if (global == NULL)
6023         return -1;
6024     PDATA_PUSH(self->stack, global, -1);
6025     return 0;
6026 }
6027 
6028 static int
load_stack_global(UnpicklerObject * self)6029 load_stack_global(UnpicklerObject *self)
6030 {
6031     PyObject *global;
6032     PyObject *module_name;
6033     PyObject *global_name;
6034 
6035     PDATA_POP(self->stack, global_name);
6036     PDATA_POP(self->stack, module_name);
6037     if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6038         global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6039         PickleState *st = _Pickle_GetGlobalState();
6040         PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6041         Py_XDECREF(global_name);
6042         Py_XDECREF(module_name);
6043         return -1;
6044     }
6045     global = find_class(self, module_name, global_name);
6046     Py_DECREF(global_name);
6047     Py_DECREF(module_name);
6048     if (global == NULL)
6049         return -1;
6050     PDATA_PUSH(self->stack, global, -1);
6051     return 0;
6052 }
6053 
6054 static int
load_persid(UnpicklerObject * self)6055 load_persid(UnpicklerObject *self)
6056 {
6057     PyObject *pid, *obj;
6058     Py_ssize_t len;
6059     char *s;
6060 
6061     if (self->pers_func) {
6062         if ((len = _Unpickler_Readline(self, &s)) < 0)
6063             return -1;
6064         if (len < 1)
6065             return bad_readline();
6066 
6067         pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6068         if (pid == NULL) {
6069             if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6070                 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6071                                 "persistent IDs in protocol 0 must be "
6072                                 "ASCII strings");
6073             }
6074             return -1;
6075         }
6076 
6077         obj = call_method(self->pers_func, self->pers_func_self, pid);
6078         Py_DECREF(pid);
6079         if (obj == NULL)
6080             return -1;
6081 
6082         PDATA_PUSH(self->stack, obj, -1);
6083         return 0;
6084     }
6085     else {
6086         PickleState *st = _Pickle_GetGlobalState();
6087         PyErr_SetString(st->UnpicklingError,
6088                         "A load persistent id instruction was encountered,\n"
6089                         "but no persistent_load function was specified.");
6090         return -1;
6091     }
6092 }
6093 
6094 static int
load_binpersid(UnpicklerObject * self)6095 load_binpersid(UnpicklerObject *self)
6096 {
6097     PyObject *pid, *obj;
6098 
6099     if (self->pers_func) {
6100         PDATA_POP(self->stack, pid);
6101         if (pid == NULL)
6102             return -1;
6103 
6104         obj = call_method(self->pers_func, self->pers_func_self, pid);
6105         Py_DECREF(pid);
6106         if (obj == NULL)
6107             return -1;
6108 
6109         PDATA_PUSH(self->stack, obj, -1);
6110         return 0;
6111     }
6112     else {
6113         PickleState *st = _Pickle_GetGlobalState();
6114         PyErr_SetString(st->UnpicklingError,
6115                         "A load persistent id instruction was encountered,\n"
6116                         "but no persistent_load function was specified.");
6117         return -1;
6118     }
6119 }
6120 
6121 static int
load_pop(UnpicklerObject * self)6122 load_pop(UnpicklerObject *self)
6123 {
6124     Py_ssize_t len = Py_SIZE(self->stack);
6125 
6126     /* Note that we split the (pickle.py) stack into two stacks,
6127      * an object stack and a mark stack. We have to be clever and
6128      * pop the right one. We do this by looking at the top of the
6129      * mark stack first, and only signalling a stack underflow if
6130      * the object stack is empty and the mark stack doesn't match
6131      * our expectations.
6132      */
6133     if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6134         self->num_marks--;
6135         self->stack->mark_set = self->num_marks != 0;
6136         self->stack->fence = self->num_marks ?
6137                 self->marks[self->num_marks - 1] : 0;
6138     } else if (len <= self->stack->fence)
6139         return Pdata_stack_underflow(self->stack);
6140     else {
6141         len--;
6142         Py_DECREF(self->stack->data[len]);
6143         Py_SET_SIZE(self->stack, len);
6144     }
6145     return 0;
6146 }
6147 
6148 static int
load_pop_mark(UnpicklerObject * self)6149 load_pop_mark(UnpicklerObject *self)
6150 {
6151     Py_ssize_t i;
6152 
6153     if ((i = marker(self)) < 0)
6154         return -1;
6155 
6156     Pdata_clear(self->stack, i);
6157 
6158     return 0;
6159 }
6160 
6161 static int
load_dup(UnpicklerObject * self)6162 load_dup(UnpicklerObject *self)
6163 {
6164     PyObject *last;
6165     Py_ssize_t len = Py_SIZE(self->stack);
6166 
6167     if (len <= self->stack->fence)
6168         return Pdata_stack_underflow(self->stack);
6169     last = self->stack->data[len - 1];
6170     PDATA_APPEND(self->stack, last, -1);
6171     return 0;
6172 }
6173 
6174 static int
load_get(UnpicklerObject * self)6175 load_get(UnpicklerObject *self)
6176 {
6177     PyObject *key, *value;
6178     Py_ssize_t idx;
6179     Py_ssize_t len;
6180     char *s;
6181 
6182     if ((len = _Unpickler_Readline(self, &s)) < 0)
6183         return -1;
6184     if (len < 2)
6185         return bad_readline();
6186 
6187     key = PyLong_FromString(s, NULL, 10);
6188     if (key == NULL)
6189         return -1;
6190     idx = PyLong_AsSsize_t(key);
6191     if (idx == -1 && PyErr_Occurred()) {
6192         Py_DECREF(key);
6193         return -1;
6194     }
6195 
6196     value = _Unpickler_MemoGet(self, idx);
6197     if (value == NULL) {
6198         if (!PyErr_Occurred()) {
6199            PickleState *st = _Pickle_GetGlobalState();
6200            PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6201         }
6202         Py_DECREF(key);
6203         return -1;
6204     }
6205     Py_DECREF(key);
6206 
6207     PDATA_APPEND(self->stack, value, -1);
6208     return 0;
6209 }
6210 
6211 static int
load_binget(UnpicklerObject * self)6212 load_binget(UnpicklerObject *self)
6213 {
6214     PyObject *value;
6215     Py_ssize_t idx;
6216     char *s;
6217 
6218     if (_Unpickler_Read(self, &s, 1) < 0)
6219         return -1;
6220 
6221     idx = Py_CHARMASK(s[0]);
6222 
6223     value = _Unpickler_MemoGet(self, idx);
6224     if (value == NULL) {
6225         PyObject *key = PyLong_FromSsize_t(idx);
6226         if (key != NULL) {
6227             PickleState *st = _Pickle_GetGlobalState();
6228             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6229             Py_DECREF(key);
6230         }
6231         return -1;
6232     }
6233 
6234     PDATA_APPEND(self->stack, value, -1);
6235     return 0;
6236 }
6237 
6238 static int
load_long_binget(UnpicklerObject * self)6239 load_long_binget(UnpicklerObject *self)
6240 {
6241     PyObject *value;
6242     Py_ssize_t idx;
6243     char *s;
6244 
6245     if (_Unpickler_Read(self, &s, 4) < 0)
6246         return -1;
6247 
6248     idx = calc_binsize(s, 4);
6249 
6250     value = _Unpickler_MemoGet(self, idx);
6251     if (value == NULL) {
6252         PyObject *key = PyLong_FromSsize_t(idx);
6253         if (key != NULL) {
6254             PickleState *st = _Pickle_GetGlobalState();
6255             PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6256             Py_DECREF(key);
6257         }
6258         return -1;
6259     }
6260 
6261     PDATA_APPEND(self->stack, value, -1);
6262     return 0;
6263 }
6264 
6265 /* Push an object from the extension registry (EXT[124]).  nbytes is
6266  * the number of bytes following the opcode, holding the index (code) value.
6267  */
6268 static int
load_extension(UnpicklerObject * self,int nbytes)6269 load_extension(UnpicklerObject *self, int nbytes)
6270 {
6271     char *codebytes;            /* the nbytes bytes after the opcode */
6272     long code;                  /* calc_binint returns long */
6273     PyObject *py_code;          /* code as a Python int */
6274     PyObject *obj;              /* the object to push */
6275     PyObject *pair;             /* (module_name, class_name) */
6276     PyObject *module_name, *class_name;
6277     PickleState *st = _Pickle_GetGlobalState();
6278 
6279     assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6280     if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6281         return -1;
6282     code = calc_binint(codebytes, nbytes);
6283     if (code <= 0) {            /* note that 0 is forbidden */
6284         /* Corrupt or hostile pickle. */
6285         PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6286         return -1;
6287     }
6288 
6289     /* Look for the code in the cache. */
6290     py_code = PyLong_FromLong(code);
6291     if (py_code == NULL)
6292         return -1;
6293     obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6294     if (obj != NULL) {
6295         /* Bingo. */
6296         Py_DECREF(py_code);
6297         PDATA_APPEND(self->stack, obj, -1);
6298         return 0;
6299     }
6300     if (PyErr_Occurred()) {
6301         Py_DECREF(py_code);
6302         return -1;
6303     }
6304 
6305     /* Look up the (module_name, class_name) pair. */
6306     pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6307     if (pair == NULL) {
6308         Py_DECREF(py_code);
6309         if (!PyErr_Occurred()) {
6310             PyErr_Format(PyExc_ValueError, "unregistered extension "
6311                          "code %ld", code);
6312         }
6313         return -1;
6314     }
6315     /* Since the extension registry is manipulable via Python code,
6316      * confirm that pair is really a 2-tuple of strings.
6317      */
6318     if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6319         goto error;
6320     }
6321 
6322     module_name = PyTuple_GET_ITEM(pair, 0);
6323     if (!PyUnicode_Check(module_name)) {
6324         goto error;
6325     }
6326 
6327     class_name = PyTuple_GET_ITEM(pair, 1);
6328     if (!PyUnicode_Check(class_name)) {
6329         goto error;
6330     }
6331 
6332     /* Load the object. */
6333     obj = find_class(self, module_name, class_name);
6334     if (obj == NULL) {
6335         Py_DECREF(py_code);
6336         return -1;
6337     }
6338     /* Cache code -> obj. */
6339     code = PyDict_SetItem(st->extension_cache, py_code, obj);
6340     Py_DECREF(py_code);
6341     if (code < 0) {
6342         Py_DECREF(obj);
6343         return -1;
6344     }
6345     PDATA_PUSH(self->stack, obj, -1);
6346     return 0;
6347 
6348 error:
6349     Py_DECREF(py_code);
6350     PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6351                  "isn't a 2-tuple of strings", code);
6352     return -1;
6353 }
6354 
6355 static int
load_put(UnpicklerObject * self)6356 load_put(UnpicklerObject *self)
6357 {
6358     PyObject *key, *value;
6359     Py_ssize_t idx;
6360     Py_ssize_t len;
6361     char *s = NULL;
6362 
6363     if ((len = _Unpickler_Readline(self, &s)) < 0)
6364         return -1;
6365     if (len < 2)
6366         return bad_readline();
6367     if (Py_SIZE(self->stack) <= self->stack->fence)
6368         return Pdata_stack_underflow(self->stack);
6369     value = self->stack->data[Py_SIZE(self->stack) - 1];
6370 
6371     key = PyLong_FromString(s, NULL, 10);
6372     if (key == NULL)
6373         return -1;
6374     idx = PyLong_AsSsize_t(key);
6375     Py_DECREF(key);
6376     if (idx < 0) {
6377         if (!PyErr_Occurred())
6378             PyErr_SetString(PyExc_ValueError,
6379                             "negative PUT argument");
6380         return -1;
6381     }
6382 
6383     return _Unpickler_MemoPut(self, idx, value);
6384 }
6385 
6386 static int
load_binput(UnpicklerObject * self)6387 load_binput(UnpicklerObject *self)
6388 {
6389     PyObject *value;
6390     Py_ssize_t idx;
6391     char *s;
6392 
6393     if (_Unpickler_Read(self, &s, 1) < 0)
6394         return -1;
6395 
6396     if (Py_SIZE(self->stack) <= self->stack->fence)
6397         return Pdata_stack_underflow(self->stack);
6398     value = self->stack->data[Py_SIZE(self->stack) - 1];
6399 
6400     idx = Py_CHARMASK(s[0]);
6401 
6402     return _Unpickler_MemoPut(self, idx, value);
6403 }
6404 
6405 static int
load_long_binput(UnpicklerObject * self)6406 load_long_binput(UnpicklerObject *self)
6407 {
6408     PyObject *value;
6409     Py_ssize_t idx;
6410     char *s;
6411 
6412     if (_Unpickler_Read(self, &s, 4) < 0)
6413         return -1;
6414 
6415     if (Py_SIZE(self->stack) <= self->stack->fence)
6416         return Pdata_stack_underflow(self->stack);
6417     value = self->stack->data[Py_SIZE(self->stack) - 1];
6418 
6419     idx = calc_binsize(s, 4);
6420     if (idx < 0) {
6421         PyErr_SetString(PyExc_ValueError,
6422                         "negative LONG_BINPUT argument");
6423         return -1;
6424     }
6425 
6426     return _Unpickler_MemoPut(self, idx, value);
6427 }
6428 
6429 static int
load_memoize(UnpicklerObject * self)6430 load_memoize(UnpicklerObject *self)
6431 {
6432     PyObject *value;
6433 
6434     if (Py_SIZE(self->stack) <= self->stack->fence)
6435         return Pdata_stack_underflow(self->stack);
6436     value = self->stack->data[Py_SIZE(self->stack) - 1];
6437 
6438     return _Unpickler_MemoPut(self, self->memo_len, value);
6439 }
6440 
6441 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6442 do_append(UnpicklerObject *self, Py_ssize_t x)
6443 {
6444     PyObject *value;
6445     PyObject *slice;
6446     PyObject *list;
6447     PyObject *result;
6448     Py_ssize_t len, i;
6449 
6450     len = Py_SIZE(self->stack);
6451     if (x > len || x <= self->stack->fence)
6452         return Pdata_stack_underflow(self->stack);
6453     if (len == x)  /* nothing to do */
6454         return 0;
6455 
6456     list = self->stack->data[x - 1];
6457 
6458     if (PyList_CheckExact(list)) {
6459         Py_ssize_t list_len;
6460         int ret;
6461 
6462         slice = Pdata_poplist(self->stack, x);
6463         if (!slice)
6464             return -1;
6465         list_len = PyList_GET_SIZE(list);
6466         ret = PyList_SetSlice(list, list_len, list_len, slice);
6467         Py_DECREF(slice);
6468         return ret;
6469     }
6470     else {
6471         PyObject *extend_func;
6472 
6473         if (_PyObject_LookupAttr(list, &_Py_ID(extend), &extend_func) < 0) {
6474             return -1;
6475         }
6476         if (extend_func != NULL) {
6477             slice = Pdata_poplist(self->stack, x);
6478             if (!slice) {
6479                 Py_DECREF(extend_func);
6480                 return -1;
6481             }
6482             result = _Pickle_FastCall(extend_func, slice);
6483             Py_DECREF(extend_func);
6484             if (result == NULL)
6485                 return -1;
6486             Py_DECREF(result);
6487         }
6488         else {
6489             PyObject *append_func;
6490 
6491             /* Even if the PEP 307 requires extend() and append() methods,
6492                fall back on append() if the object has no extend() method
6493                for backward compatibility. */
6494             append_func = PyObject_GetAttr(list, &_Py_ID(append));
6495             if (append_func == NULL)
6496                 return -1;
6497             for (i = x; i < len; i++) {
6498                 value = self->stack->data[i];
6499                 result = _Pickle_FastCall(append_func, value);
6500                 if (result == NULL) {
6501                     Pdata_clear(self->stack, i + 1);
6502                     Py_SET_SIZE(self->stack, x);
6503                     Py_DECREF(append_func);
6504                     return -1;
6505                 }
6506                 Py_DECREF(result);
6507             }
6508             Py_SET_SIZE(self->stack, x);
6509             Py_DECREF(append_func);
6510         }
6511     }
6512 
6513     return 0;
6514 }
6515 
6516 static int
load_append(UnpicklerObject * self)6517 load_append(UnpicklerObject *self)
6518 {
6519     if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6520         return Pdata_stack_underflow(self->stack);
6521     return do_append(self, Py_SIZE(self->stack) - 1);
6522 }
6523 
6524 static int
load_appends(UnpicklerObject * self)6525 load_appends(UnpicklerObject *self)
6526 {
6527     Py_ssize_t i = marker(self);
6528     if (i < 0)
6529         return -1;
6530     return do_append(self, i);
6531 }
6532 
6533 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6534 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6535 {
6536     PyObject *value, *key;
6537     PyObject *dict;
6538     Py_ssize_t len, i;
6539     int status = 0;
6540 
6541     len = Py_SIZE(self->stack);
6542     if (x > len || x <= self->stack->fence)
6543         return Pdata_stack_underflow(self->stack);
6544     if (len == x)  /* nothing to do */
6545         return 0;
6546     if ((len - x) % 2 != 0) {
6547         PickleState *st = _Pickle_GetGlobalState();
6548         /* Corrupt or hostile pickle -- we never write one like this. */
6549         PyErr_SetString(st->UnpicklingError,
6550                         "odd number of items for SETITEMS");
6551         return -1;
6552     }
6553 
6554     /* Here, dict does not actually need to be a PyDict; it could be anything
6555        that supports the __setitem__ attribute. */
6556     dict = self->stack->data[x - 1];
6557 
6558     for (i = x + 1; i < len; i += 2) {
6559         key = self->stack->data[i - 1];
6560         value = self->stack->data[i];
6561         if (PyObject_SetItem(dict, key, value) < 0) {
6562             status = -1;
6563             break;
6564         }
6565     }
6566 
6567     Pdata_clear(self->stack, x);
6568     return status;
6569 }
6570 
6571 static int
load_setitem(UnpicklerObject * self)6572 load_setitem(UnpicklerObject *self)
6573 {
6574     return do_setitems(self, Py_SIZE(self->stack) - 2);
6575 }
6576 
6577 static int
load_setitems(UnpicklerObject * self)6578 load_setitems(UnpicklerObject *self)
6579 {
6580     Py_ssize_t i = marker(self);
6581     if (i < 0)
6582         return -1;
6583     return do_setitems(self, i);
6584 }
6585 
6586 static int
load_additems(UnpicklerObject * self)6587 load_additems(UnpicklerObject *self)
6588 {
6589     PyObject *set;
6590     Py_ssize_t mark, len, i;
6591 
6592     mark =  marker(self);
6593     if (mark < 0)
6594         return -1;
6595     len = Py_SIZE(self->stack);
6596     if (mark > len || mark <= self->stack->fence)
6597         return Pdata_stack_underflow(self->stack);
6598     if (len == mark)  /* nothing to do */
6599         return 0;
6600 
6601     set = self->stack->data[mark - 1];
6602 
6603     if (PySet_Check(set)) {
6604         PyObject *items;
6605         int status;
6606 
6607         items = Pdata_poptuple(self->stack, mark);
6608         if (items == NULL)
6609             return -1;
6610 
6611         status = _PySet_Update(set, items);
6612         Py_DECREF(items);
6613         return status;
6614     }
6615     else {
6616         PyObject *add_func;
6617 
6618         add_func = PyObject_GetAttr(set, &_Py_ID(add));
6619         if (add_func == NULL)
6620             return -1;
6621         for (i = mark; i < len; i++) {
6622             PyObject *result;
6623             PyObject *item;
6624 
6625             item = self->stack->data[i];
6626             result = _Pickle_FastCall(add_func, item);
6627             if (result == NULL) {
6628                 Pdata_clear(self->stack, i + 1);
6629                 Py_SET_SIZE(self->stack, mark);
6630                 return -1;
6631             }
6632             Py_DECREF(result);
6633         }
6634         Py_SET_SIZE(self->stack, mark);
6635     }
6636 
6637     return 0;
6638 }
6639 
6640 static int
load_build(UnpicklerObject * self)6641 load_build(UnpicklerObject *self)
6642 {
6643     PyObject *state, *inst, *slotstate;
6644     PyObject *setstate;
6645     int status = 0;
6646 
6647     /* Stack is ... instance, state.  We want to leave instance at
6648      * the stack top, possibly mutated via instance.__setstate__(state).
6649      */
6650     if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6651         return Pdata_stack_underflow(self->stack);
6652 
6653     PDATA_POP(self->stack, state);
6654     if (state == NULL)
6655         return -1;
6656 
6657     inst = self->stack->data[Py_SIZE(self->stack) - 1];
6658 
6659     if (_PyObject_LookupAttr(inst, &_Py_ID(__setstate__), &setstate) < 0) {
6660         Py_DECREF(state);
6661         return -1;
6662     }
6663     if (setstate != NULL) {
6664         PyObject *result;
6665 
6666         /* The explicit __setstate__ is responsible for everything. */
6667         result = _Pickle_FastCall(setstate, state);
6668         Py_DECREF(setstate);
6669         if (result == NULL)
6670             return -1;
6671         Py_DECREF(result);
6672         return 0;
6673     }
6674 
6675     /* A default __setstate__.  First see whether state embeds a
6676      * slot state dict too (a proto 2 addition).
6677      */
6678     if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6679         PyObject *tmp = state;
6680 
6681         state = PyTuple_GET_ITEM(tmp, 0);
6682         slotstate = PyTuple_GET_ITEM(tmp, 1);
6683         Py_INCREF(state);
6684         Py_INCREF(slotstate);
6685         Py_DECREF(tmp);
6686     }
6687     else
6688         slotstate = NULL;
6689 
6690     /* Set inst.__dict__ from the state dict (if any). */
6691     if (state != Py_None) {
6692         PyObject *dict;
6693         PyObject *d_key, *d_value;
6694         Py_ssize_t i;
6695 
6696         if (!PyDict_Check(state)) {
6697             PickleState *st = _Pickle_GetGlobalState();
6698             PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6699             goto error;
6700         }
6701         dict = PyObject_GetAttr(inst, &_Py_ID(__dict__));
6702         if (dict == NULL)
6703             goto error;
6704 
6705         i = 0;
6706         while (PyDict_Next(state, &i, &d_key, &d_value)) {
6707             /* normally the keys for instance attributes are
6708                interned.  we should try to do that here. */
6709             Py_INCREF(d_key);
6710             if (PyUnicode_CheckExact(d_key))
6711                 PyUnicode_InternInPlace(&d_key);
6712             if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6713                 Py_DECREF(d_key);
6714                 goto error;
6715             }
6716             Py_DECREF(d_key);
6717         }
6718         Py_DECREF(dict);
6719     }
6720 
6721     /* Also set instance attributes from the slotstate dict (if any). */
6722     if (slotstate != NULL) {
6723         PyObject *d_key, *d_value;
6724         Py_ssize_t i;
6725 
6726         if (!PyDict_Check(slotstate)) {
6727             PickleState *st = _Pickle_GetGlobalState();
6728             PyErr_SetString(st->UnpicklingError,
6729                             "slot state is not a dictionary");
6730             goto error;
6731         }
6732         i = 0;
6733         while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6734             if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6735                 goto error;
6736         }
6737     }
6738 
6739     if (0) {
6740   error:
6741         status = -1;
6742     }
6743 
6744     Py_DECREF(state);
6745     Py_XDECREF(slotstate);
6746     return status;
6747 }
6748 
6749 static int
load_mark(UnpicklerObject * self)6750 load_mark(UnpicklerObject *self)
6751 {
6752 
6753     /* Note that we split the (pickle.py) stack into two stacks, an
6754      * object stack and a mark stack. Here we push a mark onto the
6755      * mark stack.
6756      */
6757 
6758     if (self->num_marks >= self->marks_size) {
6759         size_t alloc = ((size_t)self->num_marks << 1) + 20;
6760         Py_ssize_t *marks_new = self->marks;
6761         PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6762         if (marks_new == NULL) {
6763             PyErr_NoMemory();
6764             return -1;
6765         }
6766         self->marks = marks_new;
6767         self->marks_size = (Py_ssize_t)alloc;
6768     }
6769 
6770     self->stack->mark_set = 1;
6771     self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6772 
6773     return 0;
6774 }
6775 
6776 static int
load_reduce(UnpicklerObject * self)6777 load_reduce(UnpicklerObject *self)
6778 {
6779     PyObject *callable = NULL;
6780     PyObject *argtup = NULL;
6781     PyObject *obj = NULL;
6782 
6783     PDATA_POP(self->stack, argtup);
6784     if (argtup == NULL)
6785         return -1;
6786     PDATA_POP(self->stack, callable);
6787     if (callable) {
6788         obj = PyObject_CallObject(callable, argtup);
6789         Py_DECREF(callable);
6790     }
6791     Py_DECREF(argtup);
6792 
6793     if (obj == NULL)
6794         return -1;
6795 
6796     PDATA_PUSH(self->stack, obj, -1);
6797     return 0;
6798 }
6799 
6800 /* Just raises an error if we don't know the protocol specified.  PROTO
6801  * is the first opcode for protocols >= 2.
6802  */
6803 static int
load_proto(UnpicklerObject * self)6804 load_proto(UnpicklerObject *self)
6805 {
6806     char *s;
6807     int i;
6808 
6809     if (_Unpickler_Read(self, &s, 1) < 0)
6810         return -1;
6811 
6812     i = (unsigned char)s[0];
6813     if (i <= HIGHEST_PROTOCOL) {
6814         self->proto = i;
6815         return 0;
6816     }
6817 
6818     PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6819     return -1;
6820 }
6821 
6822 static int
load_frame(UnpicklerObject * self)6823 load_frame(UnpicklerObject *self)
6824 {
6825     char *s;
6826     Py_ssize_t frame_len;
6827 
6828     if (_Unpickler_Read(self, &s, 8) < 0)
6829         return -1;
6830 
6831     frame_len = calc_binsize(s, 8);
6832     if (frame_len < 0) {
6833         PyErr_Format(PyExc_OverflowError,
6834                      "FRAME length exceeds system's maximum of %zd bytes",
6835                      PY_SSIZE_T_MAX);
6836         return -1;
6837     }
6838 
6839     if (_Unpickler_Read(self, &s, frame_len) < 0)
6840         return -1;
6841 
6842     /* Rewind to start of frame */
6843     self->next_read_idx -= frame_len;
6844     return 0;
6845 }
6846 
6847 static PyObject *
load(UnpicklerObject * self)6848 load(UnpicklerObject *self)
6849 {
6850     PyObject *value = NULL;
6851     char *s = NULL;
6852 
6853     self->num_marks = 0;
6854     self->stack->mark_set = 0;
6855     self->stack->fence = 0;
6856     self->proto = 0;
6857     if (Py_SIZE(self->stack))
6858         Pdata_clear(self->stack, 0);
6859 
6860     /* Convenient macros for the dispatch while-switch loop just below. */
6861 #define OP(opcode, load_func) \
6862     case opcode: if (load_func(self) < 0) break; continue;
6863 
6864 #define OP_ARG(opcode, load_func, arg) \
6865     case opcode: if (load_func(self, (arg)) < 0) break; continue;
6866 
6867     while (1) {
6868         if (_Unpickler_Read(self, &s, 1) < 0) {
6869             PickleState *st = _Pickle_GetGlobalState();
6870             if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6871                 PyErr_Format(PyExc_EOFError, "Ran out of input");
6872             }
6873             return NULL;
6874         }
6875 
6876         switch ((enum opcode)s[0]) {
6877         OP(NONE, load_none)
6878         OP(BININT, load_binint)
6879         OP(BININT1, load_binint1)
6880         OP(BININT2, load_binint2)
6881         OP(INT, load_int)
6882         OP(LONG, load_long)
6883         OP_ARG(LONG1, load_counted_long, 1)
6884         OP_ARG(LONG4, load_counted_long, 4)
6885         OP(FLOAT, load_float)
6886         OP(BINFLOAT, load_binfloat)
6887         OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6888         OP_ARG(BINBYTES, load_counted_binbytes, 4)
6889         OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6890         OP(BYTEARRAY8, load_counted_bytearray)
6891         OP(NEXT_BUFFER, load_next_buffer)
6892         OP(READONLY_BUFFER, load_readonly_buffer)
6893         OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6894         OP_ARG(BINSTRING, load_counted_binstring, 4)
6895         OP(STRING, load_string)
6896         OP(UNICODE, load_unicode)
6897         OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6898         OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6899         OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6900         OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6901         OP_ARG(TUPLE1, load_counted_tuple, 1)
6902         OP_ARG(TUPLE2, load_counted_tuple, 2)
6903         OP_ARG(TUPLE3, load_counted_tuple, 3)
6904         OP(TUPLE, load_tuple)
6905         OP(EMPTY_LIST, load_empty_list)
6906         OP(LIST, load_list)
6907         OP(EMPTY_DICT, load_empty_dict)
6908         OP(DICT, load_dict)
6909         OP(EMPTY_SET, load_empty_set)
6910         OP(ADDITEMS, load_additems)
6911         OP(FROZENSET, load_frozenset)
6912         OP(OBJ, load_obj)
6913         OP(INST, load_inst)
6914         OP_ARG(NEWOBJ, load_newobj, 0)
6915         OP_ARG(NEWOBJ_EX, load_newobj, 1)
6916         OP(GLOBAL, load_global)
6917         OP(STACK_GLOBAL, load_stack_global)
6918         OP(APPEND, load_append)
6919         OP(APPENDS, load_appends)
6920         OP(BUILD, load_build)
6921         OP(DUP, load_dup)
6922         OP(BINGET, load_binget)
6923         OP(LONG_BINGET, load_long_binget)
6924         OP(GET, load_get)
6925         OP(MARK, load_mark)
6926         OP(BINPUT, load_binput)
6927         OP(LONG_BINPUT, load_long_binput)
6928         OP(PUT, load_put)
6929         OP(MEMOIZE, load_memoize)
6930         OP(POP, load_pop)
6931         OP(POP_MARK, load_pop_mark)
6932         OP(SETITEM, load_setitem)
6933         OP(SETITEMS, load_setitems)
6934         OP(PERSID, load_persid)
6935         OP(BINPERSID, load_binpersid)
6936         OP(REDUCE, load_reduce)
6937         OP(PROTO, load_proto)
6938         OP(FRAME, load_frame)
6939         OP_ARG(EXT1, load_extension, 1)
6940         OP_ARG(EXT2, load_extension, 2)
6941         OP_ARG(EXT4, load_extension, 4)
6942         OP_ARG(NEWTRUE, load_bool, Py_True)
6943         OP_ARG(NEWFALSE, load_bool, Py_False)
6944 
6945         case STOP:
6946             break;
6947 
6948         default:
6949             {
6950                 PickleState *st = _Pickle_GetGlobalState();
6951                 unsigned char c = (unsigned char) *s;
6952                 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6953                     PyErr_Format(st->UnpicklingError,
6954                                  "invalid load key, '%c'.", c);
6955                 }
6956                 else {
6957                     PyErr_Format(st->UnpicklingError,
6958                                  "invalid load key, '\\x%02x'.", c);
6959                 }
6960                 return NULL;
6961             }
6962         }
6963 
6964         break;                  /* and we are done! */
6965     }
6966 
6967     if (PyErr_Occurred()) {
6968         return NULL;
6969     }
6970 
6971     if (_Unpickler_SkipConsumed(self) < 0)
6972         return NULL;
6973 
6974     PDATA_POP(self->stack, value);
6975     return value;
6976 }
6977 
6978 /*[clinic input]
6979 
6980 _pickle.Unpickler.load
6981 
6982 Load a pickle.
6983 
6984 Read a pickled object representation from the open file object given
6985 in the constructor, and return the reconstituted object hierarchy
6986 specified therein.
6987 [clinic start generated code]*/
6988 
6989 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6990 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6991 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6992 {
6993     UnpicklerObject *unpickler = (UnpicklerObject*)self;
6994 
6995     /* Check whether the Unpickler was initialized correctly. This prevents
6996        segfaulting if a subclass overridden __init__ with a function that does
6997        not call Unpickler.__init__(). Here, we simply ensure that self->read
6998        is not NULL. */
6999     if (unpickler->read == NULL) {
7000         PickleState *st = _Pickle_GetGlobalState();
7001         PyErr_Format(st->UnpicklingError,
7002                      "Unpickler.__init__() was not called by %s.__init__()",
7003                      Py_TYPE(unpickler)->tp_name);
7004         return NULL;
7005     }
7006 
7007     return load(unpickler);
7008 }
7009 
7010 /* The name of find_class() is misleading. In newer pickle protocols, this
7011    function is used for loading any global (i.e., functions), not just
7012    classes. The name is kept only for backward compatibility. */
7013 
7014 /*[clinic input]
7015 
7016 _pickle.Unpickler.find_class
7017 
7018   module_name: object
7019   global_name: object
7020   /
7021 
7022 Return an object from a specified module.
7023 
7024 If necessary, the module will be imported. Subclasses may override
7025 this method (e.g. to restrict unpickling of arbitrary classes and
7026 functions).
7027 
7028 This method is called whenever a class or a function object is
7029 needed.  Both arguments passed are str objects.
7030 [clinic start generated code]*/
7031 
7032 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7033 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7034                                   PyObject *module_name,
7035                                   PyObject *global_name)
7036 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7037 {
7038     PyObject *global;
7039     PyObject *module;
7040 
7041     if (PySys_Audit("pickle.find_class", "OO",
7042                     module_name, global_name) < 0) {
7043         return NULL;
7044     }
7045 
7046     /* Try to map the old names used in Python 2.x to the new ones used in
7047        Python 3.x.  We do this only with old pickle protocols and when the
7048        user has not disabled the feature. */
7049     if (self->proto < 3 && self->fix_imports) {
7050         PyObject *key;
7051         PyObject *item;
7052         PickleState *st = _Pickle_GetGlobalState();
7053 
7054         /* Check if the global (i.e., a function or a class) was renamed
7055            or moved to another module. */
7056         key = PyTuple_Pack(2, module_name, global_name);
7057         if (key == NULL)
7058             return NULL;
7059         item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7060         Py_DECREF(key);
7061         if (item) {
7062             if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7063                 PyErr_Format(PyExc_RuntimeError,
7064                              "_compat_pickle.NAME_MAPPING values should be "
7065                              "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7066                 return NULL;
7067             }
7068             module_name = PyTuple_GET_ITEM(item, 0);
7069             global_name = PyTuple_GET_ITEM(item, 1);
7070             if (!PyUnicode_Check(module_name) ||
7071                 !PyUnicode_Check(global_name)) {
7072                 PyErr_Format(PyExc_RuntimeError,
7073                              "_compat_pickle.NAME_MAPPING values should be "
7074                              "pairs of str, not (%.200s, %.200s)",
7075                              Py_TYPE(module_name)->tp_name,
7076                              Py_TYPE(global_name)->tp_name);
7077                 return NULL;
7078             }
7079         }
7080         else if (PyErr_Occurred()) {
7081             return NULL;
7082         }
7083         else {
7084             /* Check if the module was renamed. */
7085             item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7086             if (item) {
7087                 if (!PyUnicode_Check(item)) {
7088                     PyErr_Format(PyExc_RuntimeError,
7089                                 "_compat_pickle.IMPORT_MAPPING values should be "
7090                                 "strings, not %.200s", Py_TYPE(item)->tp_name);
7091                     return NULL;
7092                 }
7093                 module_name = item;
7094             }
7095             else if (PyErr_Occurred()) {
7096                 return NULL;
7097             }
7098         }
7099     }
7100 
7101     /*
7102      * we don't use PyImport_GetModule here, because it can return partially-
7103      * initialised modules, which then cause the getattribute to fail.
7104      */
7105     module = PyImport_Import(module_name);
7106     if (module == NULL) {
7107         return NULL;
7108     }
7109     global = getattribute(module, global_name, self->proto >= 4);
7110     Py_DECREF(module);
7111     return global;
7112 }
7113 
7114 /*[clinic input]
7115 
7116 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7117 
7118 Returns size in memory, in bytes.
7119 [clinic start generated code]*/
7120 
7121 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7122 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7123 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7124 {
7125     Py_ssize_t res;
7126 
7127     res = _PyObject_SIZE(Py_TYPE(self));
7128     if (self->memo != NULL)
7129         res += self->memo_size * sizeof(PyObject *);
7130     if (self->marks != NULL)
7131         res += self->marks_size * sizeof(Py_ssize_t);
7132     if (self->input_line != NULL)
7133         res += strlen(self->input_line) + 1;
7134     if (self->encoding != NULL)
7135         res += strlen(self->encoding) + 1;
7136     if (self->errors != NULL)
7137         res += strlen(self->errors) + 1;
7138     return res;
7139 }
7140 
7141 static struct PyMethodDef Unpickler_methods[] = {
7142     _PICKLE_UNPICKLER_LOAD_METHODDEF
7143     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7144     _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7145     {NULL, NULL}                /* sentinel */
7146 };
7147 
7148 static void
Unpickler_dealloc(UnpicklerObject * self)7149 Unpickler_dealloc(UnpicklerObject *self)
7150 {
7151     PyObject_GC_UnTrack((PyObject *)self);
7152     Py_XDECREF(self->readline);
7153     Py_XDECREF(self->readinto);
7154     Py_XDECREF(self->read);
7155     Py_XDECREF(self->peek);
7156     Py_XDECREF(self->stack);
7157     Py_XDECREF(self->pers_func);
7158     Py_XDECREF(self->buffers);
7159     if (self->buffer.buf != NULL) {
7160         PyBuffer_Release(&self->buffer);
7161         self->buffer.buf = NULL;
7162     }
7163 
7164     _Unpickler_MemoCleanup(self);
7165     PyMem_Free(self->marks);
7166     PyMem_Free(self->input_line);
7167     PyMem_Free(self->encoding);
7168     PyMem_Free(self->errors);
7169 
7170     Py_TYPE(self)->tp_free((PyObject *)self);
7171 }
7172 
7173 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7174 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7175 {
7176     Py_VISIT(self->readline);
7177     Py_VISIT(self->readinto);
7178     Py_VISIT(self->read);
7179     Py_VISIT(self->peek);
7180     Py_VISIT(self->stack);
7181     Py_VISIT(self->pers_func);
7182     Py_VISIT(self->buffers);
7183     return 0;
7184 }
7185 
7186 static int
Unpickler_clear(UnpicklerObject * self)7187 Unpickler_clear(UnpicklerObject *self)
7188 {
7189     Py_CLEAR(self->readline);
7190     Py_CLEAR(self->readinto);
7191     Py_CLEAR(self->read);
7192     Py_CLEAR(self->peek);
7193     Py_CLEAR(self->stack);
7194     Py_CLEAR(self->pers_func);
7195     Py_CLEAR(self->buffers);
7196     if (self->buffer.buf != NULL) {
7197         PyBuffer_Release(&self->buffer);
7198         self->buffer.buf = NULL;
7199     }
7200 
7201     _Unpickler_MemoCleanup(self);
7202     PyMem_Free(self->marks);
7203     self->marks = NULL;
7204     PyMem_Free(self->input_line);
7205     self->input_line = NULL;
7206     PyMem_Free(self->encoding);
7207     self->encoding = NULL;
7208     PyMem_Free(self->errors);
7209     self->errors = NULL;
7210 
7211     return 0;
7212 }
7213 
7214 /*[clinic input]
7215 
7216 _pickle.Unpickler.__init__
7217 
7218   file: object
7219   *
7220   fix_imports: bool = True
7221   encoding: str = 'ASCII'
7222   errors: str = 'strict'
7223   buffers: object(c_default="NULL") = ()
7224 
7225 This takes a binary file for reading a pickle data stream.
7226 
7227 The protocol version of the pickle is detected automatically, so no
7228 protocol argument is needed.  Bytes past the pickled object's
7229 representation are ignored.
7230 
7231 The argument *file* must have two methods, a read() method that takes
7232 an integer argument, and a readline() method that requires no
7233 arguments.  Both methods should return bytes.  Thus *file* can be a
7234 binary file object opened for reading, an io.BytesIO object, or any
7235 other custom object that meets this interface.
7236 
7237 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7238 which are used to control compatibility support for pickle stream
7239 generated by Python 2.  If *fix_imports* is True, pickle will try to
7240 map the old Python 2 names to the new names used in Python 3.  The
7241 *encoding* and *errors* tell pickle how to decode 8-bit string
7242 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7243 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7244 string instances as bytes objects.
7245 [clinic start generated code]*/
7246 
7247 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7248 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7249                                 int fix_imports, const char *encoding,
7250                                 const char *errors, PyObject *buffers)
7251 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7252 {
7253     /* In case of multiple __init__() calls, clear previous content. */
7254     if (self->read != NULL)
7255         (void)Unpickler_clear(self);
7256 
7257     if (_Unpickler_SetInputStream(self, file) < 0)
7258         return -1;
7259 
7260     if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7261         return -1;
7262 
7263     if (_Unpickler_SetBuffers(self, buffers) < 0)
7264         return -1;
7265 
7266     self->fix_imports = fix_imports;
7267 
7268     if (init_method_ref((PyObject *)self, &_Py_ID(persistent_load),
7269                         &self->pers_func, &self->pers_func_self) < 0)
7270     {
7271         return -1;
7272     }
7273 
7274     self->stack = (Pdata *)Pdata_New();
7275     if (self->stack == NULL)
7276         return -1;
7277 
7278     self->memo_size = 32;
7279     self->memo = _Unpickler_NewMemo(self->memo_size);
7280     if (self->memo == NULL)
7281         return -1;
7282 
7283     self->proto = 0;
7284 
7285     return 0;
7286 }
7287 
7288 
7289 /* Define a proxy object for the Unpickler's internal memo object. This is to
7290  * avoid breaking code like:
7291  *  unpickler.memo.clear()
7292  * and
7293  *  unpickler.memo = saved_memo
7294  * Is this a good idea? Not really, but we don't want to break code that uses
7295  * it. Note that we don't implement the entire mapping API here. This is
7296  * intentional, as these should be treated as black-box implementation details.
7297  *
7298  * We do, however, have to implement pickling/unpickling support because of
7299  * real-world code like cvs2svn.
7300  */
7301 
7302 /*[clinic input]
7303 _pickle.UnpicklerMemoProxy.clear
7304 
7305 Remove all items from memo.
7306 [clinic start generated code]*/
7307 
7308 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7309 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7310 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7311 {
7312     _Unpickler_MemoCleanup(self->unpickler);
7313     self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7314     if (self->unpickler->memo == NULL)
7315         return NULL;
7316     Py_RETURN_NONE;
7317 }
7318 
7319 /*[clinic input]
7320 _pickle.UnpicklerMemoProxy.copy
7321 
7322 Copy the memo to a new object.
7323 [clinic start generated code]*/
7324 
7325 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7326 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7327 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7328 {
7329     size_t i;
7330     PyObject *new_memo = PyDict_New();
7331     if (new_memo == NULL)
7332         return NULL;
7333 
7334     for (i = 0; i < self->unpickler->memo_size; i++) {
7335         int status;
7336         PyObject *key, *value;
7337 
7338         value = self->unpickler->memo[i];
7339         if (value == NULL)
7340             continue;
7341 
7342         key = PyLong_FromSsize_t(i);
7343         if (key == NULL)
7344             goto error;
7345         status = PyDict_SetItem(new_memo, key, value);
7346         Py_DECREF(key);
7347         if (status < 0)
7348             goto error;
7349     }
7350     return new_memo;
7351 
7352 error:
7353     Py_DECREF(new_memo);
7354     return NULL;
7355 }
7356 
7357 /*[clinic input]
7358 _pickle.UnpicklerMemoProxy.__reduce__
7359 
7360 Implement pickling support.
7361 [clinic start generated code]*/
7362 
7363 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7364 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7365 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7366 {
7367     PyObject *reduce_value;
7368     PyObject *constructor_args;
7369     PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7370     if (contents == NULL)
7371         return NULL;
7372 
7373     reduce_value = PyTuple_New(2);
7374     if (reduce_value == NULL) {
7375         Py_DECREF(contents);
7376         return NULL;
7377     }
7378     constructor_args = PyTuple_New(1);
7379     if (constructor_args == NULL) {
7380         Py_DECREF(contents);
7381         Py_DECREF(reduce_value);
7382         return NULL;
7383     }
7384     PyTuple_SET_ITEM(constructor_args, 0, contents);
7385     Py_INCREF((PyObject *)&PyDict_Type);
7386     PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7387     PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7388     return reduce_value;
7389 }
7390 
7391 static PyMethodDef unpicklerproxy_methods[] = {
7392     _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7393     _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7394     _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7395     {NULL, NULL}    /* sentinel */
7396 };
7397 
7398 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7399 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7400 {
7401     PyObject_GC_UnTrack(self);
7402     Py_XDECREF(self->unpickler);
7403     PyObject_GC_Del((PyObject *)self);
7404 }
7405 
7406 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7407 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7408                             visitproc visit, void *arg)
7409 {
7410     Py_VISIT(self->unpickler);
7411     return 0;
7412 }
7413 
7414 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7415 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7416 {
7417     Py_CLEAR(self->unpickler);
7418     return 0;
7419 }
7420 
7421 static PyTypeObject UnpicklerMemoProxyType = {
7422     PyVarObject_HEAD_INIT(NULL, 0)
7423     "_pickle.UnpicklerMemoProxy",               /*tp_name*/
7424     sizeof(UnpicklerMemoProxyObject),           /*tp_basicsize*/
7425     0,
7426     (destructor)UnpicklerMemoProxy_dealloc,     /* tp_dealloc */
7427     0,                                          /* tp_vectorcall_offset */
7428     0,                                          /* tp_getattr */
7429     0,                                          /* tp_setattr */
7430     0,                                          /* tp_as_async */
7431     0,                                          /* tp_repr */
7432     0,                                          /* tp_as_number */
7433     0,                                          /* tp_as_sequence */
7434     0,                                          /* tp_as_mapping */
7435     PyObject_HashNotImplemented,                /* tp_hash */
7436     0,                                          /* tp_call */
7437     0,                                          /* tp_str */
7438     PyObject_GenericGetAttr,                    /* tp_getattro */
7439     PyObject_GenericSetAttr,                    /* tp_setattro */
7440     0,                                          /* tp_as_buffer */
7441     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7442     0,                                          /* tp_doc */
7443     (traverseproc)UnpicklerMemoProxy_traverse,  /* tp_traverse */
7444     (inquiry)UnpicklerMemoProxy_clear,          /* tp_clear */
7445     0,                                          /* tp_richcompare */
7446     0,                                          /* tp_weaklistoffset */
7447     0,                                          /* tp_iter */
7448     0,                                          /* tp_iternext */
7449     unpicklerproxy_methods,                     /* tp_methods */
7450 };
7451 
7452 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7453 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7454 {
7455     UnpicklerMemoProxyObject *self;
7456 
7457     self = PyObject_GC_New(UnpicklerMemoProxyObject,
7458                            &UnpicklerMemoProxyType);
7459     if (self == NULL)
7460         return NULL;
7461     Py_INCREF(unpickler);
7462     self->unpickler = unpickler;
7463     PyObject_GC_Track(self);
7464     return (PyObject *)self;
7465 }
7466 
7467 /*****************************************************************************/
7468 
7469 
7470 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7471 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7472 {
7473     return UnpicklerMemoProxy_New(self);
7474 }
7475 
7476 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7477 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7478 {
7479     PyObject **new_memo;
7480     size_t new_memo_size = 0;
7481 
7482     if (obj == NULL) {
7483         PyErr_SetString(PyExc_TypeError,
7484                         "attribute deletion is not supported");
7485         return -1;
7486     }
7487 
7488     if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7489         UnpicklerObject *unpickler =
7490             ((UnpicklerMemoProxyObject *)obj)->unpickler;
7491 
7492         new_memo_size = unpickler->memo_size;
7493         new_memo = _Unpickler_NewMemo(new_memo_size);
7494         if (new_memo == NULL)
7495             return -1;
7496 
7497         for (size_t i = 0; i < new_memo_size; i++) {
7498             Py_XINCREF(unpickler->memo[i]);
7499             new_memo[i] = unpickler->memo[i];
7500         }
7501     }
7502     else if (PyDict_Check(obj)) {
7503         Py_ssize_t i = 0;
7504         PyObject *key, *value;
7505 
7506         new_memo_size = PyDict_GET_SIZE(obj);
7507         new_memo = _Unpickler_NewMemo(new_memo_size);
7508         if (new_memo == NULL)
7509             return -1;
7510 
7511         while (PyDict_Next(obj, &i, &key, &value)) {
7512             Py_ssize_t idx;
7513             if (!PyLong_Check(key)) {
7514                 PyErr_SetString(PyExc_TypeError,
7515                                 "memo key must be integers");
7516                 goto error;
7517             }
7518             idx = PyLong_AsSsize_t(key);
7519             if (idx == -1 && PyErr_Occurred())
7520                 goto error;
7521             if (idx < 0) {
7522                 PyErr_SetString(PyExc_ValueError,
7523                                 "memo key must be positive integers.");
7524                 goto error;
7525             }
7526             if (_Unpickler_MemoPut(self, idx, value) < 0)
7527                 goto error;
7528         }
7529     }
7530     else {
7531         PyErr_Format(PyExc_TypeError,
7532                      "'memo' attribute must be an UnpicklerMemoProxy object "
7533                      "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7534         return -1;
7535     }
7536 
7537     _Unpickler_MemoCleanup(self);
7538     self->memo_size = new_memo_size;
7539     self->memo = new_memo;
7540 
7541     return 0;
7542 
7543   error:
7544     if (new_memo_size) {
7545         for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7546             Py_XDECREF(new_memo[i]);
7547         }
7548         PyMem_Free(new_memo);
7549     }
7550     return -1;
7551 }
7552 
7553 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7554 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7555 {
7556     if (self->pers_func == NULL) {
7557         PyErr_SetString(PyExc_AttributeError, "persistent_load");
7558         return NULL;
7559     }
7560     return reconstruct_method(self->pers_func, self->pers_func_self);
7561 }
7562 
7563 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7564 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7565 {
7566     if (value == NULL) {
7567         PyErr_SetString(PyExc_TypeError,
7568                         "attribute deletion is not supported");
7569         return -1;
7570     }
7571     if (!PyCallable_Check(value)) {
7572         PyErr_SetString(PyExc_TypeError,
7573                         "persistent_load must be a callable taking "
7574                         "one argument");
7575         return -1;
7576     }
7577 
7578     self->pers_func_self = NULL;
7579     Py_INCREF(value);
7580     Py_XSETREF(self->pers_func, value);
7581 
7582     return 0;
7583 }
7584 
7585 static PyGetSetDef Unpickler_getsets[] = {
7586     {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7587     {"persistent_load", (getter)Unpickler_get_persload,
7588                         (setter)Unpickler_set_persload},
7589     {NULL}
7590 };
7591 
7592 static PyTypeObject Unpickler_Type = {
7593     PyVarObject_HEAD_INIT(NULL, 0)
7594     "_pickle.Unpickler",                /*tp_name*/
7595     sizeof(UnpicklerObject),            /*tp_basicsize*/
7596     0,                                  /*tp_itemsize*/
7597     (destructor)Unpickler_dealloc,      /*tp_dealloc*/
7598     0,                                  /*tp_vectorcall_offset*/
7599     0,                                  /*tp_getattr*/
7600     0,                                  /*tp_setattr*/
7601     0,                                  /*tp_as_async*/
7602     0,                                  /*tp_repr*/
7603     0,                                  /*tp_as_number*/
7604     0,                                  /*tp_as_sequence*/
7605     0,                                  /*tp_as_mapping*/
7606     0,                                  /*tp_hash*/
7607     0,                                  /*tp_call*/
7608     0,                                  /*tp_str*/
7609     0,                                  /*tp_getattro*/
7610     0,                                  /*tp_setattro*/
7611     0,                                  /*tp_as_buffer*/
7612     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7613     _pickle_Unpickler___init____doc__,  /*tp_doc*/
7614     (traverseproc)Unpickler_traverse,   /*tp_traverse*/
7615     (inquiry)Unpickler_clear,           /*tp_clear*/
7616     0,                                  /*tp_richcompare*/
7617     0,                                  /*tp_weaklistoffset*/
7618     0,                                  /*tp_iter*/
7619     0,                                  /*tp_iternext*/
7620     Unpickler_methods,                  /*tp_methods*/
7621     0,                                  /*tp_members*/
7622     Unpickler_getsets,                  /*tp_getset*/
7623     0,                                  /*tp_base*/
7624     0,                                  /*tp_dict*/
7625     0,                                  /*tp_descr_get*/
7626     0,                                  /*tp_descr_set*/
7627     0,                                  /*tp_dictoffset*/
7628     _pickle_Unpickler___init__,         /*tp_init*/
7629     PyType_GenericAlloc,                /*tp_alloc*/
7630     PyType_GenericNew,                  /*tp_new*/
7631     PyObject_GC_Del,                    /*tp_free*/
7632     0,                                  /*tp_is_gc*/
7633 };
7634 
7635 /*[clinic input]
7636 
7637 _pickle.dump
7638 
7639   obj: object
7640   file: object
7641   protocol: object = None
7642   *
7643   fix_imports: bool = True
7644   buffer_callback: object = None
7645 
7646 Write a pickled representation of obj to the open file object file.
7647 
7648 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7649 be more efficient.
7650 
7651 The optional *protocol* argument tells the pickler to use the given
7652 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7653 protocol is 4. It was introduced in Python 3.4, and is incompatible
7654 with previous versions.
7655 
7656 Specifying a negative protocol version selects the highest protocol
7657 version supported.  The higher the protocol used, the more recent the
7658 version of Python needed to read the pickle produced.
7659 
7660 The *file* argument must have a write() method that accepts a single
7661 bytes argument.  It can thus be a file object opened for binary
7662 writing, an io.BytesIO instance, or any other custom object that meets
7663 this interface.
7664 
7665 If *fix_imports* is True and protocol is less than 3, pickle will try
7666 to map the new Python 3 names to the old module names used in Python
7667 2, so that the pickle data stream is readable with Python 2.
7668 
7669 If *buffer_callback* is None (the default), buffer views are serialized
7670 into *file* as part of the pickle stream.  It is an error if
7671 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7672 
7673 [clinic start generated code]*/
7674 
7675 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7676 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7677                   PyObject *protocol, int fix_imports,
7678                   PyObject *buffer_callback)
7679 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7680 {
7681     PicklerObject *pickler = _Pickler_New();
7682 
7683     if (pickler == NULL)
7684         return NULL;
7685 
7686     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7687         goto error;
7688 
7689     if (_Pickler_SetOutputStream(pickler, file) < 0)
7690         goto error;
7691 
7692     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7693         goto error;
7694 
7695     if (dump(pickler, obj) < 0)
7696         goto error;
7697 
7698     if (_Pickler_FlushToFile(pickler) < 0)
7699         goto error;
7700 
7701     Py_DECREF(pickler);
7702     Py_RETURN_NONE;
7703 
7704   error:
7705     Py_XDECREF(pickler);
7706     return NULL;
7707 }
7708 
7709 /*[clinic input]
7710 
7711 _pickle.dumps
7712 
7713   obj: object
7714   protocol: object = None
7715   *
7716   fix_imports: bool = True
7717   buffer_callback: object = None
7718 
7719 Return the pickled representation of the object as a bytes object.
7720 
7721 The optional *protocol* argument tells the pickler to use the given
7722 protocol; supported protocols are 0, 1, 2, 3, 4 and 5.  The default
7723 protocol is 4. It was introduced in Python 3.4, and is incompatible
7724 with previous versions.
7725 
7726 Specifying a negative protocol version selects the highest protocol
7727 version supported.  The higher the protocol used, the more recent the
7728 version of Python needed to read the pickle produced.
7729 
7730 If *fix_imports* is True and *protocol* is less than 3, pickle will
7731 try to map the new Python 3 names to the old module names used in
7732 Python 2, so that the pickle data stream is readable with Python 2.
7733 
7734 If *buffer_callback* is None (the default), buffer views are serialized
7735 into *file* as part of the pickle stream.  It is an error if
7736 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7737 
7738 [clinic start generated code]*/
7739 
7740 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7741 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7742                    int fix_imports, PyObject *buffer_callback)
7743 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7744 {
7745     PyObject *result;
7746     PicklerObject *pickler = _Pickler_New();
7747 
7748     if (pickler == NULL)
7749         return NULL;
7750 
7751     if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7752         goto error;
7753 
7754     if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7755         goto error;
7756 
7757     if (dump(pickler, obj) < 0)
7758         goto error;
7759 
7760     result = _Pickler_GetString(pickler);
7761     Py_DECREF(pickler);
7762     return result;
7763 
7764   error:
7765     Py_XDECREF(pickler);
7766     return NULL;
7767 }
7768 
7769 /*[clinic input]
7770 
7771 _pickle.load
7772 
7773   file: object
7774   *
7775   fix_imports: bool = True
7776   encoding: str = 'ASCII'
7777   errors: str = 'strict'
7778   buffers: object(c_default="NULL") = ()
7779 
7780 Read and return an object from the pickle data stored in a file.
7781 
7782 This is equivalent to ``Unpickler(file).load()``, but may be more
7783 efficient.
7784 
7785 The protocol version of the pickle is detected automatically, so no
7786 protocol argument is needed.  Bytes past the pickled object's
7787 representation are ignored.
7788 
7789 The argument *file* must have two methods, a read() method that takes
7790 an integer argument, and a readline() method that requires no
7791 arguments.  Both methods should return bytes.  Thus *file* can be a
7792 binary file object opened for reading, an io.BytesIO object, or any
7793 other custom object that meets this interface.
7794 
7795 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7796 which are used to control compatibility support for pickle stream
7797 generated by Python 2.  If *fix_imports* is True, pickle will try to
7798 map the old Python 2 names to the new names used in Python 3.  The
7799 *encoding* and *errors* tell pickle how to decode 8-bit string
7800 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7801 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7802 string instances as bytes objects.
7803 [clinic start generated code]*/
7804 
7805 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7806 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7807                   const char *encoding, const char *errors,
7808                   PyObject *buffers)
7809 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7810 {
7811     PyObject *result;
7812     UnpicklerObject *unpickler = _Unpickler_New();
7813 
7814     if (unpickler == NULL)
7815         return NULL;
7816 
7817     if (_Unpickler_SetInputStream(unpickler, file) < 0)
7818         goto error;
7819 
7820     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7821         goto error;
7822 
7823     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7824         goto error;
7825 
7826     unpickler->fix_imports = fix_imports;
7827 
7828     result = load(unpickler);
7829     Py_DECREF(unpickler);
7830     return result;
7831 
7832   error:
7833     Py_XDECREF(unpickler);
7834     return NULL;
7835 }
7836 
7837 /*[clinic input]
7838 
7839 _pickle.loads
7840 
7841   data: object
7842   /
7843   *
7844   fix_imports: bool = True
7845   encoding: str = 'ASCII'
7846   errors: str = 'strict'
7847   buffers: object(c_default="NULL") = ()
7848 
7849 Read and return an object from the given pickle data.
7850 
7851 The protocol version of the pickle is detected automatically, so no
7852 protocol argument is needed.  Bytes past the pickled object's
7853 representation are ignored.
7854 
7855 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7856 which are used to control compatibility support for pickle stream
7857 generated by Python 2.  If *fix_imports* is True, pickle will try to
7858 map the old Python 2 names to the new names used in Python 3.  The
7859 *encoding* and *errors* tell pickle how to decode 8-bit string
7860 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861 respectively.  The *encoding* can be 'bytes' to read these 8-bit
7862 string instances as bytes objects.
7863 [clinic start generated code]*/
7864 
7865 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7866 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7867                    const char *encoding, const char *errors,
7868                    PyObject *buffers)
7869 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7870 {
7871     PyObject *result;
7872     UnpicklerObject *unpickler = _Unpickler_New();
7873 
7874     if (unpickler == NULL)
7875         return NULL;
7876 
7877     if (_Unpickler_SetStringInput(unpickler, data) < 0)
7878         goto error;
7879 
7880     if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881         goto error;
7882 
7883     if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884         goto error;
7885 
7886     unpickler->fix_imports = fix_imports;
7887 
7888     result = load(unpickler);
7889     Py_DECREF(unpickler);
7890     return result;
7891 
7892   error:
7893     Py_XDECREF(unpickler);
7894     return NULL;
7895 }
7896 
7897 static struct PyMethodDef pickle_methods[] = {
7898     _PICKLE_DUMP_METHODDEF
7899     _PICKLE_DUMPS_METHODDEF
7900     _PICKLE_LOAD_METHODDEF
7901     _PICKLE_LOADS_METHODDEF
7902     {NULL, NULL} /* sentinel */
7903 };
7904 
7905 static int
pickle_clear(PyObject * m)7906 pickle_clear(PyObject *m)
7907 {
7908     _Pickle_ClearState(_Pickle_GetState(m));
7909     return 0;
7910 }
7911 
7912 static void
pickle_free(PyObject * m)7913 pickle_free(PyObject *m)
7914 {
7915     _Pickle_ClearState(_Pickle_GetState(m));
7916 }
7917 
7918 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7919 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920 {
7921     PickleState *st = _Pickle_GetState(m);
7922     Py_VISIT(st->PickleError);
7923     Py_VISIT(st->PicklingError);
7924     Py_VISIT(st->UnpicklingError);
7925     Py_VISIT(st->dispatch_table);
7926     Py_VISIT(st->extension_registry);
7927     Py_VISIT(st->extension_cache);
7928     Py_VISIT(st->inverted_registry);
7929     Py_VISIT(st->name_mapping_2to3);
7930     Py_VISIT(st->import_mapping_2to3);
7931     Py_VISIT(st->name_mapping_3to2);
7932     Py_VISIT(st->import_mapping_3to2);
7933     Py_VISIT(st->codecs_encode);
7934     Py_VISIT(st->getattr);
7935     Py_VISIT(st->partial);
7936     return 0;
7937 }
7938 
7939 static struct PyModuleDef _picklemodule = {
7940     PyModuleDef_HEAD_INIT,
7941     "_pickle",            /* m_name */
7942     pickle_module_doc,    /* m_doc */
7943     sizeof(PickleState),  /* m_size */
7944     pickle_methods,       /* m_methods */
7945     NULL,                 /* m_reload */
7946     pickle_traverse,      /* m_traverse */
7947     pickle_clear,         /* m_clear */
7948     (freefunc)pickle_free /* m_free */
7949 };
7950 
7951 PyMODINIT_FUNC
PyInit__pickle(void)7952 PyInit__pickle(void)
7953 {
7954     PyObject *m;
7955     PickleState *st;
7956 
7957     m = PyState_FindModule(&_picklemodule);
7958     if (m) {
7959         Py_INCREF(m);
7960         return m;
7961     }
7962 
7963     if (PyType_Ready(&Pdata_Type) < 0)
7964         return NULL;
7965     if (PyType_Ready(&PicklerMemoProxyType) < 0)
7966         return NULL;
7967     if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7968         return NULL;
7969 
7970     /* Create the module and add the functions. */
7971     m = PyModule_Create(&_picklemodule);
7972     if (m == NULL)
7973         return NULL;
7974 
7975     /* Add types */
7976     if (PyModule_AddType(m, &Pickler_Type) < 0) {
7977         return NULL;
7978     }
7979     if (PyModule_AddType(m, &Unpickler_Type) < 0) {
7980         return NULL;
7981     }
7982     if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7983         return NULL;
7984     }
7985 
7986     st = _Pickle_GetState(m);
7987 
7988     /* Initialize the exceptions. */
7989     st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7990     if (st->PickleError == NULL)
7991         return NULL;
7992     st->PicklingError = \
7993         PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7994     if (st->PicklingError == NULL)
7995         return NULL;
7996     st->UnpicklingError = \
7997         PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7998     if (st->UnpicklingError == NULL)
7999         return NULL;
8000 
8001     if (PyModule_AddObjectRef(m, "PickleError", st->PickleError) < 0) {
8002         return NULL;
8003     }
8004     if (PyModule_AddObjectRef(m, "PicklingError", st->PicklingError) < 0) {
8005         return NULL;
8006     }
8007     if (PyModule_AddObjectRef(m, "UnpicklingError", st->UnpicklingError) < 0) {
8008         return NULL;
8009     }
8010     if (_Pickle_InitState(st) < 0)
8011         return NULL;
8012 
8013     return m;
8014 }
8015