1 /* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
6
7 #ifndef Py_BUILD_CORE_BUILTIN
8 # define Py_BUILD_CORE_MODULE 1
9 #endif
10
11 #include "Python.h"
12 #include "pycore_ceval.h" // _Py_EnterRecursiveCall()
13 #include "pycore_moduleobject.h" // _PyModule_GetState()
14 #include "pycore_runtime.h" // _Py_ID()
15 #include "pycore_pystate.h" // _PyThreadState_GET()
16 #include "structmember.h" // PyMemberDef
17
18 #include <stdlib.h> // strtol()
19
20 PyDoc_STRVAR(pickle_module_doc,
21 "Optimized C implementation for the Python pickle module.");
22
23 /*[clinic input]
24 module _pickle
25 class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
26 class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
27 class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
28 class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
29 [clinic start generated code]*/
30 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
31
32 /* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
33 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
34 already includes it. */
35 enum {
36 HIGHEST_PROTOCOL = 5,
37 DEFAULT_PROTOCOL = 4
38 };
39
40 #ifdef MS_WINDOWS
41 // These are already typedefs from windows.h, pulled in via pycore_runtime.h.
42 #define FLOAT FLOAT_
43 #define INT INT_
44 #define LONG LONG_
45 #endif
46
47 /* Pickle opcodes. These must be kept updated with pickle.py.
48 Extensive docs are in pickletools.py. */
49 enum opcode {
50 MARK = '(',
51 STOP = '.',
52 POP = '0',
53 POP_MARK = '1',
54 DUP = '2',
55 FLOAT = 'F',
56 INT = 'I',
57 BININT = 'J',
58 BININT1 = 'K',
59 LONG = 'L',
60 BININT2 = 'M',
61 NONE = 'N',
62 PERSID = 'P',
63 BINPERSID = 'Q',
64 REDUCE = 'R',
65 STRING = 'S',
66 BINSTRING = 'T',
67 SHORT_BINSTRING = 'U',
68 UNICODE = 'V',
69 BINUNICODE = 'X',
70 APPEND = 'a',
71 BUILD = 'b',
72 GLOBAL = 'c',
73 DICT = 'd',
74 EMPTY_DICT = '}',
75 APPENDS = 'e',
76 GET = 'g',
77 BINGET = 'h',
78 INST = 'i',
79 LONG_BINGET = 'j',
80 LIST = 'l',
81 EMPTY_LIST = ']',
82 OBJ = 'o',
83 PUT = 'p',
84 BINPUT = 'q',
85 LONG_BINPUT = 'r',
86 SETITEM = 's',
87 TUPLE = 't',
88 EMPTY_TUPLE = ')',
89 SETITEMS = 'u',
90 BINFLOAT = 'G',
91
92 /* Protocol 2. */
93 PROTO = '\x80',
94 NEWOBJ = '\x81',
95 EXT1 = '\x82',
96 EXT2 = '\x83',
97 EXT4 = '\x84',
98 TUPLE1 = '\x85',
99 TUPLE2 = '\x86',
100 TUPLE3 = '\x87',
101 NEWTRUE = '\x88',
102 NEWFALSE = '\x89',
103 LONG1 = '\x8a',
104 LONG4 = '\x8b',
105
106 /* Protocol 3 (Python 3.x) */
107 BINBYTES = 'B',
108 SHORT_BINBYTES = 'C',
109
110 /* Protocol 4 */
111 SHORT_BINUNICODE = '\x8c',
112 BINUNICODE8 = '\x8d',
113 BINBYTES8 = '\x8e',
114 EMPTY_SET = '\x8f',
115 ADDITEMS = '\x90',
116 FROZENSET = '\x91',
117 NEWOBJ_EX = '\x92',
118 STACK_GLOBAL = '\x93',
119 MEMOIZE = '\x94',
120 FRAME = '\x95',
121
122 /* Protocol 5 */
123 BYTEARRAY8 = '\x96',
124 NEXT_BUFFER = '\x97',
125 READONLY_BUFFER = '\x98'
126 };
127
128 enum {
129 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
130 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
131 break if this gets out of synch with pickle.py, but it's unclear that would
132 help anything either. */
133 BATCHSIZE = 1000,
134
135 /* Nesting limit until Pickler, when running in "fast mode", starts
136 checking for self-referential data-structures. */
137 FAST_NESTING_LIMIT = 50,
138
139 /* Initial size of the write buffer of Pickler. */
140 WRITE_BUF_SIZE = 4096,
141
142 /* Prefetch size when unpickling (disabled on unpeekable streams) */
143 PREFETCH = 8192 * 16,
144
145 FRAME_SIZE_MIN = 4,
146 FRAME_SIZE_TARGET = 64 * 1024,
147 FRAME_HEADER_SIZE = 9
148 };
149
150 /*************************************************************************/
151
152 /* State of the pickle module, per PEP 3121. */
153 typedef struct {
154 /* Exception classes for pickle. */
155 PyObject *PickleError;
156 PyObject *PicklingError;
157 PyObject *UnpicklingError;
158
159 /* copyreg.dispatch_table, {type_object: pickling_function} */
160 PyObject *dispatch_table;
161
162 /* For the extension opcodes EXT1, EXT2 and EXT4. */
163
164 /* copyreg._extension_registry, {(module_name, function_name): code} */
165 PyObject *extension_registry;
166 /* copyreg._extension_cache, {code: object} */
167 PyObject *extension_cache;
168 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
169 PyObject *inverted_registry;
170
171 /* Import mappings for compatibility with Python 2.x */
172
173 /* _compat_pickle.NAME_MAPPING,
174 {(oldmodule, oldname): (newmodule, newname)} */
175 PyObject *name_mapping_2to3;
176 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
177 PyObject *import_mapping_2to3;
178 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
179 PyObject *name_mapping_3to2;
180 PyObject *import_mapping_3to2;
181
182 /* codecs.encode, used for saving bytes in older protocols */
183 PyObject *codecs_encode;
184 /* builtins.getattr, used for saving nested names with protocol < 4 */
185 PyObject *getattr;
186 /* functools.partial, used for implementing __newobj_ex__ with protocols
187 2 and 3 */
188 PyObject *partial;
189 } PickleState;
190
191 /* Forward declaration of the _pickle module definition. */
192 static struct PyModuleDef _picklemodule;
193
194 /* Given a module object, get its per-module state. */
195 static PickleState *
_Pickle_GetState(PyObject * module)196 _Pickle_GetState(PyObject *module)
197 {
198 return (PickleState *)_PyModule_GetState(module);
199 }
200
201 /* Find the module instance imported in the currently running sub-interpreter
202 and get its state. */
203 static PickleState *
_Pickle_GetGlobalState(void)204 _Pickle_GetGlobalState(void)
205 {
206 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
207 }
208
209 /* Clear the given pickle module state. */
210 static void
_Pickle_ClearState(PickleState * st)211 _Pickle_ClearState(PickleState *st)
212 {
213 Py_CLEAR(st->PickleError);
214 Py_CLEAR(st->PicklingError);
215 Py_CLEAR(st->UnpicklingError);
216 Py_CLEAR(st->dispatch_table);
217 Py_CLEAR(st->extension_registry);
218 Py_CLEAR(st->extension_cache);
219 Py_CLEAR(st->inverted_registry);
220 Py_CLEAR(st->name_mapping_2to3);
221 Py_CLEAR(st->import_mapping_2to3);
222 Py_CLEAR(st->name_mapping_3to2);
223 Py_CLEAR(st->import_mapping_3to2);
224 Py_CLEAR(st->codecs_encode);
225 Py_CLEAR(st->getattr);
226 Py_CLEAR(st->partial);
227 }
228
229 /* Initialize the given pickle module state. */
230 static int
_Pickle_InitState(PickleState * st)231 _Pickle_InitState(PickleState *st)
232 {
233 PyObject *copyreg = NULL;
234 PyObject *compat_pickle = NULL;
235 PyObject *codecs = NULL;
236 PyObject *functools = NULL;
237
238 st->getattr = _PyEval_GetBuiltin(&_Py_ID(getattr));
239 if (st->getattr == NULL)
240 goto error;
241
242 copyreg = PyImport_ImportModule("copyreg");
243 if (!copyreg)
244 goto error;
245 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
246 if (!st->dispatch_table)
247 goto error;
248 if (!PyDict_CheckExact(st->dispatch_table)) {
249 PyErr_Format(PyExc_RuntimeError,
250 "copyreg.dispatch_table should be a dict, not %.200s",
251 Py_TYPE(st->dispatch_table)->tp_name);
252 goto error;
253 }
254 st->extension_registry = \
255 PyObject_GetAttrString(copyreg, "_extension_registry");
256 if (!st->extension_registry)
257 goto error;
258 if (!PyDict_CheckExact(st->extension_registry)) {
259 PyErr_Format(PyExc_RuntimeError,
260 "copyreg._extension_registry should be a dict, "
261 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
262 goto error;
263 }
264 st->inverted_registry = \
265 PyObject_GetAttrString(copyreg, "_inverted_registry");
266 if (!st->inverted_registry)
267 goto error;
268 if (!PyDict_CheckExact(st->inverted_registry)) {
269 PyErr_Format(PyExc_RuntimeError,
270 "copyreg._inverted_registry should be a dict, "
271 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
272 goto error;
273 }
274 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
275 if (!st->extension_cache)
276 goto error;
277 if (!PyDict_CheckExact(st->extension_cache)) {
278 PyErr_Format(PyExc_RuntimeError,
279 "copyreg._extension_cache should be a dict, "
280 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
281 goto error;
282 }
283 Py_CLEAR(copyreg);
284
285 /* Load the 2.x -> 3.x stdlib module mapping tables */
286 compat_pickle = PyImport_ImportModule("_compat_pickle");
287 if (!compat_pickle)
288 goto error;
289 st->name_mapping_2to3 = \
290 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
291 if (!st->name_mapping_2to3)
292 goto error;
293 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
294 PyErr_Format(PyExc_RuntimeError,
295 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
296 Py_TYPE(st->name_mapping_2to3)->tp_name);
297 goto error;
298 }
299 st->import_mapping_2to3 = \
300 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
301 if (!st->import_mapping_2to3)
302 goto error;
303 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
304 PyErr_Format(PyExc_RuntimeError,
305 "_compat_pickle.IMPORT_MAPPING should be a dict, "
306 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
307 goto error;
308 }
309 /* ... and the 3.x -> 2.x mapping tables */
310 st->name_mapping_3to2 = \
311 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
312 if (!st->name_mapping_3to2)
313 goto error;
314 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
315 PyErr_Format(PyExc_RuntimeError,
316 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
317 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
318 goto error;
319 }
320 st->import_mapping_3to2 = \
321 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
322 if (!st->import_mapping_3to2)
323 goto error;
324 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
325 PyErr_Format(PyExc_RuntimeError,
326 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
327 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
328 goto error;
329 }
330 Py_CLEAR(compat_pickle);
331
332 codecs = PyImport_ImportModule("codecs");
333 if (codecs == NULL)
334 goto error;
335 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
336 if (st->codecs_encode == NULL) {
337 goto error;
338 }
339 if (!PyCallable_Check(st->codecs_encode)) {
340 PyErr_Format(PyExc_RuntimeError,
341 "codecs.encode should be a callable, not %.200s",
342 Py_TYPE(st->codecs_encode)->tp_name);
343 goto error;
344 }
345 Py_CLEAR(codecs);
346
347 functools = PyImport_ImportModule("functools");
348 if (!functools)
349 goto error;
350 st->partial = PyObject_GetAttrString(functools, "partial");
351 if (!st->partial)
352 goto error;
353 Py_CLEAR(functools);
354
355 return 0;
356
357 error:
358 Py_CLEAR(copyreg);
359 Py_CLEAR(compat_pickle);
360 Py_CLEAR(codecs);
361 Py_CLEAR(functools);
362 _Pickle_ClearState(st);
363 return -1;
364 }
365
366 /* Helper for calling a function with a single argument quickly.
367
368 This function steals the reference of the given argument. */
369 static PyObject *
_Pickle_FastCall(PyObject * func,PyObject * obj)370 _Pickle_FastCall(PyObject *func, PyObject *obj)
371 {
372 PyObject *result;
373
374 result = PyObject_CallOneArg(func, obj);
375 Py_DECREF(obj);
376 return result;
377 }
378
379 /*************************************************************************/
380
381 /* Retrieve and deconstruct a method for avoiding a reference cycle
382 (pickler -> bound method of pickler -> pickler) */
383 static int
init_method_ref(PyObject * self,PyObject * name,PyObject ** method_func,PyObject ** method_self)384 init_method_ref(PyObject *self, PyObject *name,
385 PyObject **method_func, PyObject **method_self)
386 {
387 PyObject *func, *func2;
388 int ret;
389
390 /* *method_func and *method_self should be consistent. All refcount decrements
391 should be occurred after setting *method_self and *method_func. */
392 ret = _PyObject_LookupAttr(self, name, &func);
393 if (func == NULL) {
394 *method_self = NULL;
395 Py_CLEAR(*method_func);
396 return ret;
397 }
398
399 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
400 /* Deconstruct a bound Python method */
401 func2 = PyMethod_GET_FUNCTION(func);
402 Py_INCREF(func2);
403 *method_self = self; /* borrowed */
404 Py_XSETREF(*method_func, func2);
405 Py_DECREF(func);
406 return 0;
407 }
408 else {
409 *method_self = NULL;
410 Py_XSETREF(*method_func, func);
411 return 0;
412 }
413 }
414
415 /* Bind a method if it was deconstructed */
416 static PyObject *
reconstruct_method(PyObject * func,PyObject * self)417 reconstruct_method(PyObject *func, PyObject *self)
418 {
419 if (self) {
420 return PyMethod_New(func, self);
421 }
422 else {
423 Py_INCREF(func);
424 return func;
425 }
426 }
427
428 static PyObject *
call_method(PyObject * func,PyObject * self,PyObject * obj)429 call_method(PyObject *func, PyObject *self, PyObject *obj)
430 {
431 if (self) {
432 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
433 }
434 else {
435 return PyObject_CallOneArg(func, obj);
436 }
437 }
438
439 /*************************************************************************/
440
441 /* Internal data type used as the unpickling stack. */
442 typedef struct {
443 PyObject_VAR_HEAD
444 PyObject **data;
445 int mark_set; /* is MARK set? */
446 Py_ssize_t fence; /* position of top MARK or 0 */
447 Py_ssize_t allocated; /* number of slots in data allocated */
448 } Pdata;
449
450 static void
Pdata_dealloc(Pdata * self)451 Pdata_dealloc(Pdata *self)
452 {
453 Py_ssize_t i = Py_SIZE(self);
454 while (--i >= 0) {
455 Py_DECREF(self->data[i]);
456 }
457 PyMem_Free(self->data);
458 PyObject_Free(self);
459 }
460
461 static PyTypeObject Pdata_Type = {
462 PyVarObject_HEAD_INIT(NULL, 0)
463 "_pickle.Pdata", /*tp_name*/
464 sizeof(Pdata), /*tp_basicsize*/
465 sizeof(PyObject *), /*tp_itemsize*/
466 (destructor)Pdata_dealloc, /*tp_dealloc*/
467 };
468
469 static PyObject *
Pdata_New(void)470 Pdata_New(void)
471 {
472 Pdata *self;
473
474 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
475 return NULL;
476 Py_SET_SIZE(self, 0);
477 self->mark_set = 0;
478 self->fence = 0;
479 self->allocated = 8;
480 self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
481 if (self->data)
482 return (PyObject *)self;
483 Py_DECREF(self);
484 return PyErr_NoMemory();
485 }
486
487
488 /* Retain only the initial clearto items. If clearto >= the current
489 * number of items, this is a (non-erroneous) NOP.
490 */
491 static int
Pdata_clear(Pdata * self,Py_ssize_t clearto)492 Pdata_clear(Pdata *self, Py_ssize_t clearto)
493 {
494 Py_ssize_t i = Py_SIZE(self);
495
496 assert(clearto >= self->fence);
497 if (clearto >= i)
498 return 0;
499
500 while (--i >= clearto) {
501 Py_CLEAR(self->data[i]);
502 }
503 Py_SET_SIZE(self, clearto);
504 return 0;
505 }
506
507 static int
Pdata_grow(Pdata * self)508 Pdata_grow(Pdata *self)
509 {
510 PyObject **data = self->data;
511 size_t allocated = (size_t)self->allocated;
512 size_t new_allocated;
513
514 new_allocated = (allocated >> 3) + 6;
515 /* check for integer overflow */
516 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
517 goto nomemory;
518 new_allocated += allocated;
519 PyMem_RESIZE(data, PyObject *, new_allocated);
520 if (data == NULL)
521 goto nomemory;
522
523 self->data = data;
524 self->allocated = (Py_ssize_t)new_allocated;
525 return 0;
526
527 nomemory:
528 PyErr_NoMemory();
529 return -1;
530 }
531
532 static int
Pdata_stack_underflow(Pdata * self)533 Pdata_stack_underflow(Pdata *self)
534 {
535 PickleState *st = _Pickle_GetGlobalState();
536 PyErr_SetString(st->UnpicklingError,
537 self->mark_set ?
538 "unexpected MARK found" :
539 "unpickling stack underflow");
540 return -1;
541 }
542
543 /* D is a Pdata*. Pop the topmost element and store it into V, which
544 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
545 * is raised and V is set to NULL.
546 */
547 static PyObject *
Pdata_pop(Pdata * self)548 Pdata_pop(Pdata *self)
549 {
550 if (Py_SIZE(self) <= self->fence) {
551 Pdata_stack_underflow(self);
552 return NULL;
553 }
554 Py_SET_SIZE(self, Py_SIZE(self) - 1);
555 return self->data[Py_SIZE(self)];
556 }
557 #define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
558
559 static int
Pdata_push(Pdata * self,PyObject * obj)560 Pdata_push(Pdata *self, PyObject *obj)
561 {
562 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
563 return -1;
564 }
565 self->data[Py_SIZE(self)] = obj;
566 Py_SET_SIZE(self, Py_SIZE(self) + 1);
567 return 0;
568 }
569
570 /* Push an object on stack, transferring its ownership to the stack. */
571 #define PDATA_PUSH(D, O, ER) do { \
572 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
573
574 /* Push an object on stack, adding a new reference to the object. */
575 #define PDATA_APPEND(D, O, ER) do { \
576 Py_INCREF((O)); \
577 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
578
579 static PyObject *
Pdata_poptuple(Pdata * self,Py_ssize_t start)580 Pdata_poptuple(Pdata *self, Py_ssize_t start)
581 {
582 PyObject *tuple;
583 Py_ssize_t len, i, j;
584
585 if (start < self->fence) {
586 Pdata_stack_underflow(self);
587 return NULL;
588 }
589 len = Py_SIZE(self) - start;
590 tuple = PyTuple_New(len);
591 if (tuple == NULL)
592 return NULL;
593 for (i = start, j = 0; j < len; i++, j++)
594 PyTuple_SET_ITEM(tuple, j, self->data[i]);
595
596 Py_SET_SIZE(self, start);
597 return tuple;
598 }
599
600 static PyObject *
Pdata_poplist(Pdata * self,Py_ssize_t start)601 Pdata_poplist(Pdata *self, Py_ssize_t start)
602 {
603 PyObject *list;
604 Py_ssize_t len, i, j;
605
606 len = Py_SIZE(self) - start;
607 list = PyList_New(len);
608 if (list == NULL)
609 return NULL;
610 for (i = start, j = 0; j < len; i++, j++)
611 PyList_SET_ITEM(list, j, self->data[i]);
612
613 Py_SET_SIZE(self, start);
614 return list;
615 }
616
617 typedef struct {
618 PyObject *me_key;
619 Py_ssize_t me_value;
620 } PyMemoEntry;
621
622 typedef struct {
623 size_t mt_mask;
624 size_t mt_used;
625 size_t mt_allocated;
626 PyMemoEntry *mt_table;
627 } PyMemoTable;
628
629 typedef struct PicklerObject {
630 PyObject_HEAD
631 PyMemoTable *memo; /* Memo table, keep track of the seen
632 objects to support self-referential objects
633 pickling. */
634 PyObject *pers_func; /* persistent_id() method, can be NULL */
635 PyObject *pers_func_self; /* borrowed reference to self if pers_func
636 is an unbound method, NULL otherwise */
637 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
638 PyObject *reducer_override; /* hook for invoking user-defined callbacks
639 instead of save_global when pickling
640 functions and classes*/
641
642 PyObject *write; /* write() method of the output stream. */
643 PyObject *output_buffer; /* Write into a local bytearray buffer before
644 flushing to the stream. */
645 Py_ssize_t output_len; /* Length of output_buffer. */
646 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
647 int proto; /* Pickle protocol number, >= 0 */
648 int bin; /* Boolean, true if proto > 0 */
649 int framing; /* True when framing is enabled, proto >= 4 */
650 Py_ssize_t frame_start; /* Position in output_buffer where the
651 current frame begins. -1 if there
652 is no frame currently open. */
653
654 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
655 int fast; /* Enable fast mode if set to a true value.
656 The fast mode disable the usage of memo,
657 therefore speeding the pickling process by
658 not generating superfluous PUT opcodes. It
659 should not be used if with self-referential
660 objects. */
661 int fast_nesting;
662 int fix_imports; /* Indicate whether Pickler should fix
663 the name of globals for Python 2.x. */
664 PyObject *fast_memo;
665 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
666 } PicklerObject;
667
668 typedef struct UnpicklerObject {
669 PyObject_HEAD
670 Pdata *stack; /* Pickle data stack, store unpickled objects. */
671
672 /* The unpickler memo is just an array of PyObject *s. Using a dict
673 is unnecessary, since the keys are contiguous ints. */
674 PyObject **memo;
675 size_t memo_size; /* Capacity of the memo array */
676 size_t memo_len; /* Number of objects in the memo */
677
678 PyObject *pers_func; /* persistent_load() method, can be NULL. */
679 PyObject *pers_func_self; /* borrowed reference to self if pers_func
680 is an unbound method, NULL otherwise */
681
682 Py_buffer buffer;
683 char *input_buffer;
684 char *input_line;
685 Py_ssize_t input_len;
686 Py_ssize_t next_read_idx;
687 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
688
689 PyObject *read; /* read() method of the input stream. */
690 PyObject *readinto; /* readinto() method of the input stream. */
691 PyObject *readline; /* readline() method of the input stream. */
692 PyObject *peek; /* peek() method of the input stream, or NULL */
693 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
694
695 char *encoding; /* Name of the encoding to be used for
696 decoding strings pickled using Python
697 2.x. The default value is "ASCII" */
698 char *errors; /* Name of errors handling scheme to used when
699 decoding strings. The default value is
700 "strict". */
701 Py_ssize_t *marks; /* Mark stack, used for unpickling container
702 objects. */
703 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
704 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
705 int proto; /* Protocol of the pickle loaded. */
706 int fix_imports; /* Indicate whether Unpickler should fix
707 the name of globals pickled by Python 2.x. */
708 } UnpicklerObject;
709
710 typedef struct {
711 PyObject_HEAD
712 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
713 } PicklerMemoProxyObject;
714
715 typedef struct {
716 PyObject_HEAD
717 UnpicklerObject *unpickler;
718 } UnpicklerMemoProxyObject;
719
720 /* Forward declarations */
721 static int save(PicklerObject *, PyObject *, int);
722 static int save_reduce(PicklerObject *, PyObject *, PyObject *);
723 static PyTypeObject Pickler_Type;
724 static PyTypeObject Unpickler_Type;
725
726 #include "clinic/_pickle.c.h"
727
728 /*************************************************************************
729 A custom hashtable mapping void* to Python ints. This is used by the pickler
730 for memoization. Using a custom hashtable rather than PyDict allows us to skip
731 a bunch of unnecessary object creation. This makes a huge performance
732 difference. */
733
734 #define MT_MINSIZE 8
735 #define PERTURB_SHIFT 5
736
737
738 static PyMemoTable *
PyMemoTable_New(void)739 PyMemoTable_New(void)
740 {
741 PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
742 if (memo == NULL) {
743 PyErr_NoMemory();
744 return NULL;
745 }
746
747 memo->mt_used = 0;
748 memo->mt_allocated = MT_MINSIZE;
749 memo->mt_mask = MT_MINSIZE - 1;
750 memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
751 if (memo->mt_table == NULL) {
752 PyMem_Free(memo);
753 PyErr_NoMemory();
754 return NULL;
755 }
756 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
757
758 return memo;
759 }
760
761 static PyMemoTable *
PyMemoTable_Copy(PyMemoTable * self)762 PyMemoTable_Copy(PyMemoTable *self)
763 {
764 PyMemoTable *new = PyMemoTable_New();
765 if (new == NULL)
766 return NULL;
767
768 new->mt_used = self->mt_used;
769 new->mt_allocated = self->mt_allocated;
770 new->mt_mask = self->mt_mask;
771 /* The table we get from _New() is probably smaller than we wanted.
772 Free it and allocate one that's the right size. */
773 PyMem_Free(new->mt_table);
774 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
775 if (new->mt_table == NULL) {
776 PyMem_Free(new);
777 PyErr_NoMemory();
778 return NULL;
779 }
780 for (size_t i = 0; i < self->mt_allocated; i++) {
781 Py_XINCREF(self->mt_table[i].me_key);
782 }
783 memcpy(new->mt_table, self->mt_table,
784 sizeof(PyMemoEntry) * self->mt_allocated);
785
786 return new;
787 }
788
789 static Py_ssize_t
PyMemoTable_Size(PyMemoTable * self)790 PyMemoTable_Size(PyMemoTable *self)
791 {
792 return self->mt_used;
793 }
794
795 static int
PyMemoTable_Clear(PyMemoTable * self)796 PyMemoTable_Clear(PyMemoTable *self)
797 {
798 Py_ssize_t i = self->mt_allocated;
799
800 while (--i >= 0) {
801 Py_XDECREF(self->mt_table[i].me_key);
802 }
803 self->mt_used = 0;
804 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
805 return 0;
806 }
807
808 static void
PyMemoTable_Del(PyMemoTable * self)809 PyMemoTable_Del(PyMemoTable *self)
810 {
811 if (self == NULL)
812 return;
813 PyMemoTable_Clear(self);
814
815 PyMem_Free(self->mt_table);
816 PyMem_Free(self);
817 }
818
819 /* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
820 can be considerably simpler than dictobject.c's lookdict(). */
821 static PyMemoEntry *
_PyMemoTable_Lookup(PyMemoTable * self,PyObject * key)822 _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
823 {
824 size_t i;
825 size_t perturb;
826 size_t mask = self->mt_mask;
827 PyMemoEntry *table = self->mt_table;
828 PyMemoEntry *entry;
829 Py_hash_t hash = (Py_hash_t)key >> 3;
830
831 i = hash & mask;
832 entry = &table[i];
833 if (entry->me_key == NULL || entry->me_key == key)
834 return entry;
835
836 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
837 i = (i << 2) + i + perturb + 1;
838 entry = &table[i & mask];
839 if (entry->me_key == NULL || entry->me_key == key)
840 return entry;
841 }
842 Py_UNREACHABLE();
843 }
844
845 /* Returns -1 on failure, 0 on success. */
846 static int
_PyMemoTable_ResizeTable(PyMemoTable * self,size_t min_size)847 _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
848 {
849 PyMemoEntry *oldtable = NULL;
850 PyMemoEntry *oldentry, *newentry;
851 size_t new_size = MT_MINSIZE;
852 size_t to_process;
853
854 assert(min_size > 0);
855
856 if (min_size > PY_SSIZE_T_MAX) {
857 PyErr_NoMemory();
858 return -1;
859 }
860
861 /* Find the smallest valid table size >= min_size. */
862 while (new_size < min_size) {
863 new_size <<= 1;
864 }
865 /* new_size needs to be a power of two. */
866 assert((new_size & (new_size - 1)) == 0);
867
868 /* Allocate new table. */
869 oldtable = self->mt_table;
870 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
871 if (self->mt_table == NULL) {
872 self->mt_table = oldtable;
873 PyErr_NoMemory();
874 return -1;
875 }
876 self->mt_allocated = new_size;
877 self->mt_mask = new_size - 1;
878 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
879
880 /* Copy entries from the old table. */
881 to_process = self->mt_used;
882 for (oldentry = oldtable; to_process > 0; oldentry++) {
883 if (oldentry->me_key != NULL) {
884 to_process--;
885 /* newentry is a pointer to a chunk of the new
886 mt_table, so we're setting the key:value pair
887 in-place. */
888 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
889 newentry->me_key = oldentry->me_key;
890 newentry->me_value = oldentry->me_value;
891 }
892 }
893
894 /* Deallocate the old table. */
895 PyMem_Free(oldtable);
896 return 0;
897 }
898
899 /* Returns NULL on failure, a pointer to the value otherwise. */
900 static Py_ssize_t *
PyMemoTable_Get(PyMemoTable * self,PyObject * key)901 PyMemoTable_Get(PyMemoTable *self, PyObject *key)
902 {
903 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
904 if (entry->me_key == NULL)
905 return NULL;
906 return &entry->me_value;
907 }
908
909 /* Returns -1 on failure, 0 on success. */
910 static int
PyMemoTable_Set(PyMemoTable * self,PyObject * key,Py_ssize_t value)911 PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
912 {
913 PyMemoEntry *entry;
914
915 assert(key != NULL);
916
917 entry = _PyMemoTable_Lookup(self, key);
918 if (entry->me_key != NULL) {
919 entry->me_value = value;
920 return 0;
921 }
922 Py_INCREF(key);
923 entry->me_key = key;
924 entry->me_value = value;
925 self->mt_used++;
926
927 /* If we added a key, we can safely resize. Otherwise just return!
928 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
929 *
930 * Quadrupling the size improves average table sparseness
931 * (reducing collisions) at the cost of some memory. It also halves
932 * the number of expensive resize operations in a growing memo table.
933 *
934 * Very large memo tables (over 50K items) use doubling instead.
935 * This may help applications with severe memory constraints.
936 */
937 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
938 return 0;
939 }
940 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
941 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
942 return _PyMemoTable_ResizeTable(self, desired_size);
943 }
944
945 #undef MT_MINSIZE
946 #undef PERTURB_SHIFT
947
948 /*************************************************************************/
949
950
951 static int
_Pickler_ClearBuffer(PicklerObject * self)952 _Pickler_ClearBuffer(PicklerObject *self)
953 {
954 Py_XSETREF(self->output_buffer,
955 PyBytes_FromStringAndSize(NULL, self->max_output_len));
956 if (self->output_buffer == NULL)
957 return -1;
958 self->output_len = 0;
959 self->frame_start = -1;
960 return 0;
961 }
962
963 static void
_write_size64(char * out,size_t value)964 _write_size64(char *out, size_t value)
965 {
966 size_t i;
967
968 static_assert(sizeof(size_t) <= 8, "size_t is larger than 64-bit");
969
970 for (i = 0; i < sizeof(size_t); i++) {
971 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
972 }
973 for (i = sizeof(size_t); i < 8; i++) {
974 out[i] = 0;
975 }
976 }
977
978 static int
_Pickler_CommitFrame(PicklerObject * self)979 _Pickler_CommitFrame(PicklerObject *self)
980 {
981 size_t frame_len;
982 char *qdata;
983
984 if (!self->framing || self->frame_start == -1)
985 return 0;
986 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
987 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
988 if (frame_len >= FRAME_SIZE_MIN) {
989 qdata[0] = FRAME;
990 _write_size64(qdata + 1, frame_len);
991 }
992 else {
993 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
994 self->output_len -= FRAME_HEADER_SIZE;
995 }
996 self->frame_start = -1;
997 return 0;
998 }
999
1000 static PyObject *
_Pickler_GetString(PicklerObject * self)1001 _Pickler_GetString(PicklerObject *self)
1002 {
1003 PyObject *output_buffer = self->output_buffer;
1004
1005 assert(self->output_buffer != NULL);
1006
1007 if (_Pickler_CommitFrame(self))
1008 return NULL;
1009
1010 self->output_buffer = NULL;
1011 /* Resize down to exact size */
1012 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1013 return NULL;
1014 return output_buffer;
1015 }
1016
1017 static int
_Pickler_FlushToFile(PicklerObject * self)1018 _Pickler_FlushToFile(PicklerObject *self)
1019 {
1020 PyObject *output, *result;
1021
1022 assert(self->write != NULL);
1023
1024 /* This will commit the frame first */
1025 output = _Pickler_GetString(self);
1026 if (output == NULL)
1027 return -1;
1028
1029 result = _Pickle_FastCall(self->write, output);
1030 Py_XDECREF(result);
1031 return (result == NULL) ? -1 : 0;
1032 }
1033
1034 static int
_Pickler_OpcodeBoundary(PicklerObject * self)1035 _Pickler_OpcodeBoundary(PicklerObject *self)
1036 {
1037 Py_ssize_t frame_len;
1038
1039 if (!self->framing || self->frame_start == -1) {
1040 return 0;
1041 }
1042 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1043 if (frame_len >= FRAME_SIZE_TARGET) {
1044 if(_Pickler_CommitFrame(self)) {
1045 return -1;
1046 }
1047 /* Flush the content of the committed frame to the underlying
1048 * file and reuse the pickler buffer for the next frame so as
1049 * to limit memory usage when dumping large complex objects to
1050 * a file.
1051 *
1052 * self->write is NULL when called via dumps.
1053 */
1054 if (self->write != NULL) {
1055 if (_Pickler_FlushToFile(self) < 0) {
1056 return -1;
1057 }
1058 if (_Pickler_ClearBuffer(self) < 0) {
1059 return -1;
1060 }
1061 }
1062 }
1063 return 0;
1064 }
1065
1066 static Py_ssize_t
_Pickler_Write(PicklerObject * self,const char * s,Py_ssize_t data_len)1067 _Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
1068 {
1069 Py_ssize_t i, n, required;
1070 char *buffer;
1071 int need_new_frame;
1072
1073 assert(s != NULL);
1074 need_new_frame = (self->framing && self->frame_start == -1);
1075
1076 if (need_new_frame)
1077 n = data_len + FRAME_HEADER_SIZE;
1078 else
1079 n = data_len;
1080
1081 required = self->output_len + n;
1082 if (required > self->max_output_len) {
1083 /* Make place in buffer for the pickle chunk */
1084 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1085 PyErr_NoMemory();
1086 return -1;
1087 }
1088 self->max_output_len = (self->output_len + n) / 2 * 3;
1089 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1090 return -1;
1091 }
1092 buffer = PyBytes_AS_STRING(self->output_buffer);
1093 if (need_new_frame) {
1094 /* Setup new frame */
1095 Py_ssize_t frame_start = self->output_len;
1096 self->frame_start = frame_start;
1097 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1098 /* Write an invalid value, for debugging */
1099 buffer[frame_start + i] = 0xFE;
1100 }
1101 self->output_len += FRAME_HEADER_SIZE;
1102 }
1103 if (data_len < 8) {
1104 /* This is faster than memcpy when the string is short. */
1105 for (i = 0; i < data_len; i++) {
1106 buffer[self->output_len + i] = s[i];
1107 }
1108 }
1109 else {
1110 memcpy(buffer + self->output_len, s, data_len);
1111 }
1112 self->output_len += data_len;
1113 return data_len;
1114 }
1115
1116 static PicklerObject *
_Pickler_New(void)1117 _Pickler_New(void)
1118 {
1119 PicklerObject *self;
1120
1121 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1122 if (self == NULL)
1123 return NULL;
1124
1125 self->pers_func = NULL;
1126 self->dispatch_table = NULL;
1127 self->buffer_callback = NULL;
1128 self->write = NULL;
1129 self->proto = 0;
1130 self->bin = 0;
1131 self->framing = 0;
1132 self->frame_start = -1;
1133 self->fast = 0;
1134 self->fast_nesting = 0;
1135 self->fix_imports = 0;
1136 self->fast_memo = NULL;
1137 self->max_output_len = WRITE_BUF_SIZE;
1138 self->output_len = 0;
1139 self->reducer_override = NULL;
1140
1141 self->memo = PyMemoTable_New();
1142 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1143 self->max_output_len);
1144
1145 if (self->memo == NULL || self->output_buffer == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 PyObject_GC_Track(self);
1151 return self;
1152 }
1153
1154 static int
_Pickler_SetProtocol(PicklerObject * self,PyObject * protocol,int fix_imports)1155 _Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
1156 {
1157 long proto;
1158
1159 if (protocol == Py_None) {
1160 proto = DEFAULT_PROTOCOL;
1161 }
1162 else {
1163 proto = PyLong_AsLong(protocol);
1164 if (proto < 0) {
1165 if (proto == -1 && PyErr_Occurred())
1166 return -1;
1167 proto = HIGHEST_PROTOCOL;
1168 }
1169 else if (proto > HIGHEST_PROTOCOL) {
1170 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1171 HIGHEST_PROTOCOL);
1172 return -1;
1173 }
1174 }
1175 self->proto = (int)proto;
1176 self->bin = proto > 0;
1177 self->fix_imports = fix_imports && proto < 3;
1178 return 0;
1179 }
1180
1181 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1182 be called once on a freshly created Pickler. */
1183 static int
_Pickler_SetOutputStream(PicklerObject * self,PyObject * file)1184 _Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1185 {
1186 assert(file != NULL);
1187 if (_PyObject_LookupAttr(file, &_Py_ID(write), &self->write) < 0) {
1188 return -1;
1189 }
1190 if (self->write == NULL) {
1191 PyErr_SetString(PyExc_TypeError,
1192 "file must have a 'write' attribute");
1193 return -1;
1194 }
1195
1196 return 0;
1197 }
1198
1199 static int
_Pickler_SetBufferCallback(PicklerObject * self,PyObject * buffer_callback)1200 _Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1201 {
1202 if (buffer_callback == Py_None) {
1203 buffer_callback = NULL;
1204 }
1205 if (buffer_callback != NULL && self->proto < 5) {
1206 PyErr_SetString(PyExc_ValueError,
1207 "buffer_callback needs protocol >= 5");
1208 return -1;
1209 }
1210
1211 Py_XINCREF(buffer_callback);
1212 self->buffer_callback = buffer_callback;
1213 return 0;
1214 }
1215
1216 /* Returns the size of the input on success, -1 on failure. This takes its
1217 own reference to `input`. */
1218 static Py_ssize_t
_Unpickler_SetStringInput(UnpicklerObject * self,PyObject * input)1219 _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1220 {
1221 if (self->buffer.buf != NULL)
1222 PyBuffer_Release(&self->buffer);
1223 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1224 return -1;
1225 self->input_buffer = self->buffer.buf;
1226 self->input_len = self->buffer.len;
1227 self->next_read_idx = 0;
1228 self->prefetched_idx = self->input_len;
1229 return self->input_len;
1230 }
1231
1232 static int
bad_readline(void)1233 bad_readline(void)
1234 {
1235 PickleState *st = _Pickle_GetGlobalState();
1236 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1237 return -1;
1238 }
1239
1240 /* Skip any consumed data that was only prefetched using peek() */
1241 static int
_Unpickler_SkipConsumed(UnpicklerObject * self)1242 _Unpickler_SkipConsumed(UnpicklerObject *self)
1243 {
1244 Py_ssize_t consumed;
1245 PyObject *r;
1246
1247 consumed = self->next_read_idx - self->prefetched_idx;
1248 if (consumed <= 0)
1249 return 0;
1250
1251 assert(self->peek); /* otherwise we did something wrong */
1252 /* This makes a useless copy... */
1253 r = PyObject_CallFunction(self->read, "n", consumed);
1254 if (r == NULL)
1255 return -1;
1256 Py_DECREF(r);
1257
1258 self->prefetched_idx = self->next_read_idx;
1259 return 0;
1260 }
1261
1262 static const Py_ssize_t READ_WHOLE_LINE = -1;
1263
1264 /* If reading from a file, we need to only pull the bytes we need, since there
1265 may be multiple pickle objects arranged contiguously in the same input
1266 buffer.
1267
1268 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1269 bytes from the input stream/buffer.
1270
1271 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1272 failure; on success, returns the number of bytes read from the file.
1273
1274 On success, self->input_len will be 0; this is intentional so that when
1275 unpickling from a file, the "we've run out of data" code paths will trigger,
1276 causing the Unpickler to go back to the file for more data. Use the returned
1277 size to tell you how much data you can process. */
1278 static Py_ssize_t
_Unpickler_ReadFromFile(UnpicklerObject * self,Py_ssize_t n)1279 _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1280 {
1281 PyObject *data;
1282 Py_ssize_t read_size;
1283
1284 assert(self->read != NULL);
1285
1286 if (_Unpickler_SkipConsumed(self) < 0)
1287 return -1;
1288
1289 if (n == READ_WHOLE_LINE) {
1290 data = PyObject_CallNoArgs(self->readline);
1291 }
1292 else {
1293 PyObject *len;
1294 /* Prefetch some data without advancing the file pointer, if possible */
1295 if (self->peek && n < PREFETCH) {
1296 len = PyLong_FromSsize_t(PREFETCH);
1297 if (len == NULL)
1298 return -1;
1299 data = _Pickle_FastCall(self->peek, len);
1300 if (data == NULL) {
1301 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1302 return -1;
1303 /* peek() is probably not supported by the given file object */
1304 PyErr_Clear();
1305 Py_CLEAR(self->peek);
1306 }
1307 else {
1308 read_size = _Unpickler_SetStringInput(self, data);
1309 Py_DECREF(data);
1310 self->prefetched_idx = 0;
1311 if (n <= read_size)
1312 return n;
1313 }
1314 }
1315 len = PyLong_FromSsize_t(n);
1316 if (len == NULL)
1317 return -1;
1318 data = _Pickle_FastCall(self->read, len);
1319 }
1320 if (data == NULL)
1321 return -1;
1322
1323 read_size = _Unpickler_SetStringInput(self, data);
1324 Py_DECREF(data);
1325 return read_size;
1326 }
1327
1328 /* Don't call it directly: use _Unpickler_Read() */
1329 static Py_ssize_t
_Unpickler_ReadImpl(UnpicklerObject * self,char ** s,Py_ssize_t n)1330 _Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
1331 {
1332 Py_ssize_t num_read;
1333
1334 *s = NULL;
1335 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1336 PickleState *st = _Pickle_GetGlobalState();
1337 PyErr_SetString(st->UnpicklingError,
1338 "read would overflow (invalid bytecode)");
1339 return -1;
1340 }
1341
1342 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1343 assert(self->next_read_idx + n > self->input_len);
1344
1345 if (!self->read)
1346 return bad_readline();
1347
1348 /* Extend the buffer to satisfy desired size */
1349 num_read = _Unpickler_ReadFromFile(self, n);
1350 if (num_read < 0)
1351 return -1;
1352 if (num_read < n)
1353 return bad_readline();
1354 *s = self->input_buffer;
1355 self->next_read_idx = n;
1356 return n;
1357 }
1358
1359 /* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1360 *
1361 * This should only be used for non-small data reads where potentially
1362 * avoiding a copy is beneficial. This method does not try to prefetch
1363 * more data into the input buffer.
1364 *
1365 * _Unpickler_Read() is recommended in most cases.
1366 */
1367 static Py_ssize_t
_Unpickler_ReadInto(UnpicklerObject * self,char * buf,Py_ssize_t n)1368 _Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1369 {
1370 assert(n != READ_WHOLE_LINE);
1371
1372 /* Read from available buffer data, if any */
1373 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1374 if (in_buffer > 0) {
1375 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1376 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1377 self->next_read_idx += to_read;
1378 buf += to_read;
1379 n -= to_read;
1380 if (n == 0) {
1381 /* Entire read was satisfied from buffer */
1382 return n;
1383 }
1384 }
1385
1386 /* Read from file */
1387 if (!self->read) {
1388 /* We're unpickling memory, this means the input is truncated */
1389 return bad_readline();
1390 }
1391 if (_Unpickler_SkipConsumed(self) < 0) {
1392 return -1;
1393 }
1394
1395 if (!self->readinto) {
1396 /* readinto() not supported on file-like object, fall back to read()
1397 * and copy into destination buffer (bpo-39681) */
1398 PyObject* len = PyLong_FromSsize_t(n);
1399 if (len == NULL) {
1400 return -1;
1401 }
1402 PyObject* data = _Pickle_FastCall(self->read, len);
1403 if (data == NULL) {
1404 return -1;
1405 }
1406 if (!PyBytes_Check(data)) {
1407 PyErr_Format(PyExc_ValueError,
1408 "read() returned non-bytes object (%R)",
1409 Py_TYPE(data));
1410 Py_DECREF(data);
1411 return -1;
1412 }
1413 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1414 if (read_size < n) {
1415 Py_DECREF(data);
1416 return bad_readline();
1417 }
1418 memcpy(buf, PyBytes_AS_STRING(data), n);
1419 Py_DECREF(data);
1420 return n;
1421 }
1422
1423 /* Call readinto() into user buffer */
1424 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1425 if (buf_obj == NULL) {
1426 return -1;
1427 }
1428 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1429 if (read_size_obj == NULL) {
1430 return -1;
1431 }
1432 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1433 Py_DECREF(read_size_obj);
1434
1435 if (read_size < 0) {
1436 if (!PyErr_Occurred()) {
1437 PyErr_SetString(PyExc_ValueError,
1438 "readinto() returned negative size");
1439 }
1440 return -1;
1441 }
1442 if (read_size < n) {
1443 return bad_readline();
1444 }
1445 return n;
1446 }
1447
1448 /* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1449
1450 This should be used for all data reads, rather than accessing the unpickler's
1451 input buffer directly. This method deals correctly with reading from input
1452 streams, which the input buffer doesn't deal with.
1453
1454 Note that when reading from a file-like object, self->next_read_idx won't
1455 be updated (it should remain at 0 for the entire unpickling process). You
1456 should use this function's return value to know how many bytes you can
1457 consume.
1458
1459 Returns -1 (with an exception set) on failure. On success, return the
1460 number of chars read. */
1461 #define _Unpickler_Read(self, s, n) \
1462 (((n) <= (self)->input_len - (self)->next_read_idx) \
1463 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1464 (self)->next_read_idx += (n), \
1465 (n)) \
1466 : _Unpickler_ReadImpl(self, (s), (n)))
1467
1468 static Py_ssize_t
_Unpickler_CopyLine(UnpicklerObject * self,char * line,Py_ssize_t len,char ** result)1469 _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1470 char **result)
1471 {
1472 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1473 if (input_line == NULL) {
1474 PyErr_NoMemory();
1475 return -1;
1476 }
1477
1478 memcpy(input_line, line, len);
1479 input_line[len] = '\0';
1480 self->input_line = input_line;
1481 *result = self->input_line;
1482 return len;
1483 }
1484
1485 /* Read a line from the input stream/buffer. If we run off the end of the input
1486 before hitting \n, raise an error.
1487
1488 Returns the number of chars read, or -1 on failure. */
1489 static Py_ssize_t
_Unpickler_Readline(UnpicklerObject * self,char ** result)1490 _Unpickler_Readline(UnpicklerObject *self, char **result)
1491 {
1492 Py_ssize_t i, num_read;
1493
1494 for (i = self->next_read_idx; i < self->input_len; i++) {
1495 if (self->input_buffer[i] == '\n') {
1496 char *line_start = self->input_buffer + self->next_read_idx;
1497 num_read = i - self->next_read_idx + 1;
1498 self->next_read_idx = i + 1;
1499 return _Unpickler_CopyLine(self, line_start, num_read, result);
1500 }
1501 }
1502 if (!self->read)
1503 return bad_readline();
1504
1505 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1506 if (num_read < 0)
1507 return -1;
1508 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1509 return bad_readline();
1510 self->next_read_idx = num_read;
1511 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
1512 }
1513
1514 /* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1515 will be modified in place. */
1516 static int
_Unpickler_ResizeMemoList(UnpicklerObject * self,size_t new_size)1517 _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
1518 {
1519 size_t i;
1520
1521 assert(new_size > self->memo_size);
1522
1523 PyObject **memo_new = self->memo;
1524 PyMem_RESIZE(memo_new, PyObject *, new_size);
1525 if (memo_new == NULL) {
1526 PyErr_NoMemory();
1527 return -1;
1528 }
1529 self->memo = memo_new;
1530 for (i = self->memo_size; i < new_size; i++)
1531 self->memo[i] = NULL;
1532 self->memo_size = new_size;
1533 return 0;
1534 }
1535
1536 /* Returns NULL if idx is out of bounds. */
1537 static PyObject *
_Unpickler_MemoGet(UnpicklerObject * self,size_t idx)1538 _Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1539 {
1540 if (idx >= self->memo_size)
1541 return NULL;
1542
1543 return self->memo[idx];
1544 }
1545
1546 /* Returns -1 (with an exception set) on failure, 0 on success.
1547 This takes its own reference to `value`. */
1548 static int
_Unpickler_MemoPut(UnpicklerObject * self,size_t idx,PyObject * value)1549 _Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
1550 {
1551 PyObject *old_item;
1552
1553 if (idx >= self->memo_size) {
1554 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1555 return -1;
1556 assert(idx < self->memo_size);
1557 }
1558 Py_INCREF(value);
1559 old_item = self->memo[idx];
1560 self->memo[idx] = value;
1561 if (old_item != NULL) {
1562 Py_DECREF(old_item);
1563 }
1564 else {
1565 self->memo_len++;
1566 }
1567 return 0;
1568 }
1569
1570 static PyObject **
_Unpickler_NewMemo(Py_ssize_t new_size)1571 _Unpickler_NewMemo(Py_ssize_t new_size)
1572 {
1573 PyObject **memo = PyMem_NEW(PyObject *, new_size);
1574 if (memo == NULL) {
1575 PyErr_NoMemory();
1576 return NULL;
1577 }
1578 memset(memo, 0, new_size * sizeof(PyObject *));
1579 return memo;
1580 }
1581
1582 /* Free the unpickler's memo, taking care to decref any items left in it. */
1583 static void
_Unpickler_MemoCleanup(UnpicklerObject * self)1584 _Unpickler_MemoCleanup(UnpicklerObject *self)
1585 {
1586 Py_ssize_t i;
1587 PyObject **memo = self->memo;
1588
1589 if (self->memo == NULL)
1590 return;
1591 self->memo = NULL;
1592 i = self->memo_size;
1593 while (--i >= 0) {
1594 Py_XDECREF(memo[i]);
1595 }
1596 PyMem_Free(memo);
1597 }
1598
1599 static UnpicklerObject *
_Unpickler_New(void)1600 _Unpickler_New(void)
1601 {
1602 UnpicklerObject *self;
1603
1604 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1605 if (self == NULL)
1606 return NULL;
1607
1608 self->pers_func = NULL;
1609 self->input_buffer = NULL;
1610 self->input_line = NULL;
1611 self->input_len = 0;
1612 self->next_read_idx = 0;
1613 self->prefetched_idx = 0;
1614 self->read = NULL;
1615 self->readinto = NULL;
1616 self->readline = NULL;
1617 self->peek = NULL;
1618 self->buffers = NULL;
1619 self->encoding = NULL;
1620 self->errors = NULL;
1621 self->marks = NULL;
1622 self->num_marks = 0;
1623 self->marks_size = 0;
1624 self->proto = 0;
1625 self->fix_imports = 0;
1626 memset(&self->buffer, 0, sizeof(Py_buffer));
1627 self->memo_size = 32;
1628 self->memo_len = 0;
1629 self->memo = _Unpickler_NewMemo(self->memo_size);
1630 self->stack = (Pdata *)Pdata_New();
1631
1632 if (self->memo == NULL || self->stack == NULL) {
1633 Py_DECREF(self);
1634 return NULL;
1635 }
1636
1637 PyObject_GC_Track(self);
1638 return self;
1639 }
1640
1641 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1642 be called once on a freshly created Unpickler. */
1643 static int
_Unpickler_SetInputStream(UnpicklerObject * self,PyObject * file)1644 _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1645 {
1646 /* Optional file methods */
1647 if (_PyObject_LookupAttr(file, &_Py_ID(peek), &self->peek) < 0) {
1648 return -1;
1649 }
1650 if (_PyObject_LookupAttr(file, &_Py_ID(readinto), &self->readinto) < 0) {
1651 return -1;
1652 }
1653 (void)_PyObject_LookupAttr(file, &_Py_ID(read), &self->read);
1654 (void)_PyObject_LookupAttr(file, &_Py_ID(readline), &self->readline);
1655 if (!self->readline || !self->read) {
1656 if (!PyErr_Occurred()) {
1657 PyErr_SetString(PyExc_TypeError,
1658 "file must have 'read' and 'readline' attributes");
1659 }
1660 Py_CLEAR(self->read);
1661 Py_CLEAR(self->readinto);
1662 Py_CLEAR(self->readline);
1663 Py_CLEAR(self->peek);
1664 return -1;
1665 }
1666 return 0;
1667 }
1668
1669 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1670 be called once on a freshly created Unpickler. */
1671 static int
_Unpickler_SetInputEncoding(UnpicklerObject * self,const char * encoding,const char * errors)1672 _Unpickler_SetInputEncoding(UnpicklerObject *self,
1673 const char *encoding,
1674 const char *errors)
1675 {
1676 if (encoding == NULL)
1677 encoding = "ASCII";
1678 if (errors == NULL)
1679 errors = "strict";
1680
1681 self->encoding = _PyMem_Strdup(encoding);
1682 self->errors = _PyMem_Strdup(errors);
1683 if (self->encoding == NULL || self->errors == NULL) {
1684 PyErr_NoMemory();
1685 return -1;
1686 }
1687 return 0;
1688 }
1689
1690 /* Returns -1 (with an exception set) on failure, 0 on success. This may
1691 be called once on a freshly created Unpickler. */
1692 static int
_Unpickler_SetBuffers(UnpicklerObject * self,PyObject * buffers)1693 _Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1694 {
1695 if (buffers == NULL || buffers == Py_None) {
1696 self->buffers = NULL;
1697 }
1698 else {
1699 self->buffers = PyObject_GetIter(buffers);
1700 if (self->buffers == NULL) {
1701 return -1;
1702 }
1703 }
1704 return 0;
1705 }
1706
1707 /* Generate a GET opcode for an object stored in the memo. */
1708 static int
memo_get(PicklerObject * self,PyObject * key)1709 memo_get(PicklerObject *self, PyObject *key)
1710 {
1711 Py_ssize_t *value;
1712 char pdata[30];
1713 Py_ssize_t len;
1714
1715 value = PyMemoTable_Get(self->memo, key);
1716 if (value == NULL) {
1717 PyErr_SetObject(PyExc_KeyError, key);
1718 return -1;
1719 }
1720
1721 if (!self->bin) {
1722 pdata[0] = GET;
1723 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1724 "%zd\n", *value);
1725 len = strlen(pdata);
1726 }
1727 else {
1728 if (*value < 256) {
1729 pdata[0] = BINGET;
1730 pdata[1] = (unsigned char)(*value & 0xff);
1731 len = 2;
1732 }
1733 else if ((size_t)*value <= 0xffffffffUL) {
1734 pdata[0] = LONG_BINGET;
1735 pdata[1] = (unsigned char)(*value & 0xff);
1736 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1737 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1738 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
1739 len = 5;
1740 }
1741 else { /* unlikely */
1742 PickleState *st = _Pickle_GetGlobalState();
1743 PyErr_SetString(st->PicklingError,
1744 "memo id too large for LONG_BINGET");
1745 return -1;
1746 }
1747 }
1748
1749 if (_Pickler_Write(self, pdata, len) < 0)
1750 return -1;
1751
1752 return 0;
1753 }
1754
1755 /* Store an object in the memo, assign it a new unique ID based on the number
1756 of objects currently stored in the memo and generate a PUT opcode. */
1757 static int
memo_put(PicklerObject * self,PyObject * obj)1758 memo_put(PicklerObject *self, PyObject *obj)
1759 {
1760 char pdata[30];
1761 Py_ssize_t len;
1762 Py_ssize_t idx;
1763
1764 const char memoize_op = MEMOIZE;
1765
1766 if (self->fast)
1767 return 0;
1768
1769 idx = PyMemoTable_Size(self->memo);
1770 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1771 return -1;
1772
1773 if (self->proto >= 4) {
1774 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1775 return -1;
1776 return 0;
1777 }
1778 else if (!self->bin) {
1779 pdata[0] = PUT;
1780 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1781 "%zd\n", idx);
1782 len = strlen(pdata);
1783 }
1784 else {
1785 if (idx < 256) {
1786 pdata[0] = BINPUT;
1787 pdata[1] = (unsigned char)idx;
1788 len = 2;
1789 }
1790 else if ((size_t)idx <= 0xffffffffUL) {
1791 pdata[0] = LONG_BINPUT;
1792 pdata[1] = (unsigned char)(idx & 0xff);
1793 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1794 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1795 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
1796 len = 5;
1797 }
1798 else { /* unlikely */
1799 PickleState *st = _Pickle_GetGlobalState();
1800 PyErr_SetString(st->PicklingError,
1801 "memo id too large for LONG_BINPUT");
1802 return -1;
1803 }
1804 }
1805 if (_Pickler_Write(self, pdata, len) < 0)
1806 return -1;
1807
1808 return 0;
1809 }
1810
1811 static PyObject *
get_dotted_path(PyObject * obj,PyObject * name)1812 get_dotted_path(PyObject *obj, PyObject *name)
1813 {
1814 PyObject *dotted_path;
1815 Py_ssize_t i, n;
1816 _Py_DECLARE_STR(dot, ".");
1817 dotted_path = PyUnicode_Split(name, &_Py_STR(dot), -1);
1818 if (dotted_path == NULL)
1819 return NULL;
1820 n = PyList_GET_SIZE(dotted_path);
1821 assert(n >= 1);
1822 for (i = 0; i < n; i++) {
1823 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1824 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
1825 if (obj == NULL)
1826 PyErr_Format(PyExc_AttributeError,
1827 "Can't pickle local object %R", name);
1828 else
1829 PyErr_Format(PyExc_AttributeError,
1830 "Can't pickle local attribute %R on %R", name, obj);
1831 Py_DECREF(dotted_path);
1832 return NULL;
1833 }
1834 }
1835 return dotted_path;
1836 }
1837
1838 static PyObject *
get_deep_attribute(PyObject * obj,PyObject * names,PyObject ** pparent)1839 get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
1840 {
1841 Py_ssize_t i, n;
1842 PyObject *parent = NULL;
1843
1844 assert(PyList_CheckExact(names));
1845 Py_INCREF(obj);
1846 n = PyList_GET_SIZE(names);
1847 for (i = 0; i < n; i++) {
1848 PyObject *name = PyList_GET_ITEM(names, i);
1849 Py_XDECREF(parent);
1850 parent = obj;
1851 (void)_PyObject_LookupAttr(parent, name, &obj);
1852 if (obj == NULL) {
1853 Py_DECREF(parent);
1854 return NULL;
1855 }
1856 }
1857 if (pparent != NULL)
1858 *pparent = parent;
1859 else
1860 Py_XDECREF(parent);
1861 return obj;
1862 }
1863
1864
1865 static PyObject *
getattribute(PyObject * obj,PyObject * name,int allow_qualname)1866 getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1867 {
1868 PyObject *dotted_path, *attr;
1869
1870 if (allow_qualname) {
1871 dotted_path = get_dotted_path(obj, name);
1872 if (dotted_path == NULL)
1873 return NULL;
1874 attr = get_deep_attribute(obj, dotted_path, NULL);
1875 Py_DECREF(dotted_path);
1876 }
1877 else {
1878 (void)_PyObject_LookupAttr(obj, name, &attr);
1879 }
1880 if (attr == NULL && !PyErr_Occurred()) {
1881 PyErr_Format(PyExc_AttributeError,
1882 "Can't get attribute %R on %R", name, obj);
1883 }
1884 return attr;
1885 }
1886
1887 static int
_checkmodule(PyObject * module_name,PyObject * module,PyObject * global,PyObject * dotted_path)1888 _checkmodule(PyObject *module_name, PyObject *module,
1889 PyObject *global, PyObject *dotted_path)
1890 {
1891 if (module == Py_None) {
1892 return -1;
1893 }
1894 if (PyUnicode_Check(module_name) &&
1895 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1896 return -1;
1897 }
1898
1899 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1900 if (candidate == NULL) {
1901 return -1;
1902 }
1903 if (candidate != global) {
1904 Py_DECREF(candidate);
1905 return -1;
1906 }
1907 Py_DECREF(candidate);
1908 return 0;
1909 }
1910
1911 static PyObject *
whichmodule(PyObject * global,PyObject * dotted_path)1912 whichmodule(PyObject *global, PyObject *dotted_path)
1913 {
1914 PyObject *module_name;
1915 PyObject *module = NULL;
1916 Py_ssize_t i;
1917 PyObject *modules;
1918
1919 if (_PyObject_LookupAttr(global, &_Py_ID(__module__), &module_name) < 0) {
1920 return NULL;
1921 }
1922 if (module_name) {
1923 /* In some rare cases (e.g., bound methods of extension types),
1924 __module__ can be None. If it is so, then search sys.modules for
1925 the module of global. */
1926 if (module_name != Py_None)
1927 return module_name;
1928 Py_CLEAR(module_name);
1929 }
1930 assert(module_name == NULL);
1931
1932 /* Fallback on walking sys.modules */
1933 PyThreadState *tstate = _PyThreadState_GET();
1934 modules = _PySys_GetAttr(tstate, &_Py_ID(modules));
1935 if (modules == NULL) {
1936 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
1937 return NULL;
1938 }
1939 if (PyDict_CheckExact(modules)) {
1940 i = 0;
1941 while (PyDict_Next(modules, &i, &module_name, &module)) {
1942 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1943 Py_INCREF(module_name);
1944 return module_name;
1945 }
1946 if (PyErr_Occurred()) {
1947 return NULL;
1948 }
1949 }
1950 }
1951 else {
1952 PyObject *iterator = PyObject_GetIter(modules);
1953 if (iterator == NULL) {
1954 return NULL;
1955 }
1956 while ((module_name = PyIter_Next(iterator))) {
1957 module = PyObject_GetItem(modules, module_name);
1958 if (module == NULL) {
1959 Py_DECREF(module_name);
1960 Py_DECREF(iterator);
1961 return NULL;
1962 }
1963 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1964 Py_DECREF(module);
1965 Py_DECREF(iterator);
1966 return module_name;
1967 }
1968 Py_DECREF(module);
1969 Py_DECREF(module_name);
1970 if (PyErr_Occurred()) {
1971 Py_DECREF(iterator);
1972 return NULL;
1973 }
1974 }
1975 Py_DECREF(iterator);
1976 }
1977
1978 /* If no module is found, use __main__. */
1979 module_name = &_Py_ID(__main__);
1980 Py_INCREF(module_name);
1981 return module_name;
1982 }
1983
1984 /* fast_save_enter() and fast_save_leave() are guards against recursive
1985 objects when Pickler is used with the "fast mode" (i.e., with object
1986 memoization disabled). If the nesting of a list or dict object exceed
1987 FAST_NESTING_LIMIT, these guards will start keeping an internal
1988 reference to the seen list or dict objects and check whether these objects
1989 are recursive. These are not strictly necessary, since save() has a
1990 hard-coded recursion limit, but they give a nicer error message than the
1991 typical RuntimeError. */
1992 static int
fast_save_enter(PicklerObject * self,PyObject * obj)1993 fast_save_enter(PicklerObject *self, PyObject *obj)
1994 {
1995 /* if fast_nesting < 0, we're doing an error exit. */
1996 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1997 PyObject *key = NULL;
1998 if (self->fast_memo == NULL) {
1999 self->fast_memo = PyDict_New();
2000 if (self->fast_memo == NULL) {
2001 self->fast_nesting = -1;
2002 return 0;
2003 }
2004 }
2005 key = PyLong_FromVoidPtr(obj);
2006 if (key == NULL) {
2007 self->fast_nesting = -1;
2008 return 0;
2009 }
2010 int r = PyDict_Contains(self->fast_memo, key);
2011 if (r > 0) {
2012 PyErr_Format(PyExc_ValueError,
2013 "fast mode: can't pickle cyclic objects "
2014 "including object type %.200s at %p",
2015 Py_TYPE(obj)->tp_name, obj);
2016 }
2017 else if (r == 0) {
2018 r = PyDict_SetItem(self->fast_memo, key, Py_None);
2019 }
2020 Py_DECREF(key);
2021 if (r != 0) {
2022 self->fast_nesting = -1;
2023 return 0;
2024 }
2025 }
2026 return 1;
2027 }
2028
2029 static int
fast_save_leave(PicklerObject * self,PyObject * obj)2030 fast_save_leave(PicklerObject *self, PyObject *obj)
2031 {
2032 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2033 PyObject *key = PyLong_FromVoidPtr(obj);
2034 if (key == NULL)
2035 return 0;
2036 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2037 Py_DECREF(key);
2038 return 0;
2039 }
2040 Py_DECREF(key);
2041 }
2042 return 1;
2043 }
2044
2045 static int
save_none(PicklerObject * self,PyObject * obj)2046 save_none(PicklerObject *self, PyObject *obj)
2047 {
2048 const char none_op = NONE;
2049 if (_Pickler_Write(self, &none_op, 1) < 0)
2050 return -1;
2051
2052 return 0;
2053 }
2054
2055 static int
save_bool(PicklerObject * self,PyObject * obj)2056 save_bool(PicklerObject *self, PyObject *obj)
2057 {
2058 if (self->proto >= 2) {
2059 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
2060 if (_Pickler_Write(self, &bool_op, 1) < 0)
2061 return -1;
2062 }
2063 else {
2064 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2065 * so that unpicklers written before bools were introduced unpickle them
2066 * as ints, but unpicklers after can recognize that bools were intended.
2067 * Note that protocol 2 added direct ways to pickle bools.
2068 */
2069 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2070 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2071 return -1;
2072 }
2073 return 0;
2074 }
2075
2076 static int
save_long(PicklerObject * self,PyObject * obj)2077 save_long(PicklerObject *self, PyObject *obj)
2078 {
2079 PyObject *repr = NULL;
2080 Py_ssize_t size;
2081 long val;
2082 int overflow;
2083 int status = 0;
2084
2085 val= PyLong_AsLongAndOverflow(obj, &overflow);
2086 if (!overflow && (sizeof(long) <= 4 ||
2087 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2088 {
2089 /* result fits in a signed 4-byte integer.
2090
2091 Note: we can't use -0x80000000L in the above condition because some
2092 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2093 before applying the unary minus when sizeof(long) <= 4. The
2094 resulting value stays unsigned which is commonly not what we want,
2095 so MSVC happily warns us about it. However, that result would have
2096 been fine because we guard for sizeof(long) <= 4 which turns the
2097 condition true in that particular case. */
2098 char pdata[32];
2099 Py_ssize_t len = 0;
2100
2101 if (self->bin) {
2102 pdata[1] = (unsigned char)(val & 0xff);
2103 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2104 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2105 pdata[4] = (unsigned char)((val >> 24) & 0xff);
2106
2107 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2108 pdata[0] = BININT;
2109 len = 5;
2110 }
2111 else if (pdata[2] != 0) {
2112 pdata[0] = BININT2;
2113 len = 3;
2114 }
2115 else {
2116 pdata[0] = BININT1;
2117 len = 2;
2118 }
2119 }
2120 else {
2121 sprintf(pdata, "%c%ld\n", INT, val);
2122 len = strlen(pdata);
2123 }
2124 if (_Pickler_Write(self, pdata, len) < 0)
2125 return -1;
2126
2127 return 0;
2128 }
2129 assert(!PyErr_Occurred());
2130
2131 if (self->proto >= 2) {
2132 /* Linear-time pickling. */
2133 size_t nbits;
2134 size_t nbytes;
2135 unsigned char *pdata;
2136 char header[5];
2137 int i;
2138 int sign = _PyLong_Sign(obj);
2139
2140 if (sign == 0) {
2141 header[0] = LONG1;
2142 header[1] = 0; /* It's 0 -- an empty bytestring. */
2143 if (_Pickler_Write(self, header, 2) < 0)
2144 goto error;
2145 return 0;
2146 }
2147 nbits = _PyLong_NumBits(obj);
2148 if (nbits == (size_t)-1 && PyErr_Occurred())
2149 goto error;
2150 /* How many bytes do we need? There are nbits >> 3 full
2151 * bytes of data, and nbits & 7 leftover bits. If there
2152 * are any leftover bits, then we clearly need another
2153 * byte. What's not so obvious is that we *probably*
2154 * need another byte even if there aren't any leftovers:
2155 * the most-significant bit of the most-significant byte
2156 * acts like a sign bit, and it's usually got a sense
2157 * opposite of the one we need. The exception is ints
2158 * of the form -(2**(8*j-1)) for j > 0. Such an int is
2159 * its own 256's-complement, so has the right sign bit
2160 * even without the extra byte. That's a pain to check
2161 * for in advance, though, so we always grab an extra
2162 * byte at the start, and cut it back later if possible.
2163 */
2164 nbytes = (nbits >> 3) + 1;
2165 if (nbytes > 0x7fffffffL) {
2166 PyErr_SetString(PyExc_OverflowError,
2167 "int too large to pickle");
2168 goto error;
2169 }
2170 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
2171 if (repr == NULL)
2172 goto error;
2173 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
2174 i = _PyLong_AsByteArray((PyLongObject *)obj,
2175 pdata, nbytes,
2176 1 /* little endian */ , 1 /* signed */ );
2177 if (i < 0)
2178 goto error;
2179 /* If the int is negative, this may be a byte more than
2180 * needed. This is so iff the MSB is all redundant sign
2181 * bits.
2182 */
2183 if (sign < 0 &&
2184 nbytes > 1 &&
2185 pdata[nbytes - 1] == 0xff &&
2186 (pdata[nbytes - 2] & 0x80) != 0) {
2187 nbytes--;
2188 }
2189
2190 if (nbytes < 256) {
2191 header[0] = LONG1;
2192 header[1] = (unsigned char)nbytes;
2193 size = 2;
2194 }
2195 else {
2196 header[0] = LONG4;
2197 size = (Py_ssize_t) nbytes;
2198 for (i = 1; i < 5; i++) {
2199 header[i] = (unsigned char)(size & 0xff);
2200 size >>= 8;
2201 }
2202 size = 5;
2203 }
2204 if (_Pickler_Write(self, header, size) < 0 ||
2205 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
2206 goto error;
2207 }
2208 else {
2209 const char long_op = LONG;
2210 const char *string;
2211
2212 /* proto < 2: write the repr and newline. This is quadratic-time (in
2213 the number of digits), in both directions. We add a trailing 'L'
2214 to the repr, for compatibility with Python 2.x. */
2215
2216 repr = PyObject_Repr(obj);
2217 if (repr == NULL)
2218 goto error;
2219
2220 string = PyUnicode_AsUTF8AndSize(repr, &size);
2221 if (string == NULL)
2222 goto error;
2223
2224 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2225 _Pickler_Write(self, string, size) < 0 ||
2226 _Pickler_Write(self, "L\n", 2) < 0)
2227 goto error;
2228 }
2229
2230 if (0) {
2231 error:
2232 status = -1;
2233 }
2234 Py_XDECREF(repr);
2235
2236 return status;
2237 }
2238
2239 static int
save_float(PicklerObject * self,PyObject * obj)2240 save_float(PicklerObject *self, PyObject *obj)
2241 {
2242 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2243
2244 if (self->bin) {
2245 char pdata[9];
2246 pdata[0] = BINFLOAT;
2247 if (PyFloat_Pack8(x, &pdata[1], 0) < 0)
2248 return -1;
2249 if (_Pickler_Write(self, pdata, 9) < 0)
2250 return -1;
2251 }
2252 else {
2253 int result = -1;
2254 char *buf = NULL;
2255 char op = FLOAT;
2256
2257 if (_Pickler_Write(self, &op, 1) < 0)
2258 goto done;
2259
2260 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
2261 if (!buf) {
2262 PyErr_NoMemory();
2263 goto done;
2264 }
2265
2266 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
2267 goto done;
2268
2269 if (_Pickler_Write(self, "\n", 1) < 0)
2270 goto done;
2271
2272 result = 0;
2273 done:
2274 PyMem_Free(buf);
2275 return result;
2276 }
2277
2278 return 0;
2279 }
2280
2281 /* Perform direct write of the header and payload of the binary object.
2282
2283 The large contiguous data is written directly into the underlying file
2284 object, bypassing the output_buffer of the Pickler. We intentionally
2285 do not insert a protocol 4 frame opcode to make it possible to optimize
2286 file.read calls in the loader.
2287 */
2288 static int
_Pickler_write_bytes(PicklerObject * self,const char * header,Py_ssize_t header_size,const char * data,Py_ssize_t data_size,PyObject * payload)2289 _Pickler_write_bytes(PicklerObject *self,
2290 const char *header, Py_ssize_t header_size,
2291 const char *data, Py_ssize_t data_size,
2292 PyObject *payload)
2293 {
2294 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2295 int framing = self->framing;
2296
2297 if (bypass_buffer) {
2298 assert(self->output_buffer != NULL);
2299 /* Commit the previous frame. */
2300 if (_Pickler_CommitFrame(self)) {
2301 return -1;
2302 }
2303 /* Disable framing temporarily */
2304 self->framing = 0;
2305 }
2306
2307 if (_Pickler_Write(self, header, header_size) < 0) {
2308 return -1;
2309 }
2310
2311 if (bypass_buffer && self->write != NULL) {
2312 /* Bypass the in-memory buffer to directly stream large data
2313 into the underlying file object. */
2314 PyObject *result, *mem = NULL;
2315 /* Dump the output buffer to the file. */
2316 if (_Pickler_FlushToFile(self) < 0) {
2317 return -1;
2318 }
2319
2320 /* Stream write the payload into the file without going through the
2321 output buffer. */
2322 if (payload == NULL) {
2323 /* TODO: It would be better to use a memoryview with a linked
2324 original string if this is possible. */
2325 payload = mem = PyBytes_FromStringAndSize(data, data_size);
2326 if (payload == NULL) {
2327 return -1;
2328 }
2329 }
2330 result = PyObject_CallOneArg(self->write, payload);
2331 Py_XDECREF(mem);
2332 if (result == NULL) {
2333 return -1;
2334 }
2335 Py_DECREF(result);
2336
2337 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2338 if (_Pickler_ClearBuffer(self) < 0) {
2339 return -1;
2340 }
2341 }
2342 else {
2343 if (_Pickler_Write(self, data, data_size) < 0) {
2344 return -1;
2345 }
2346 }
2347
2348 /* Re-enable framing for subsequent calls to _Pickler_Write. */
2349 self->framing = framing;
2350
2351 return 0;
2352 }
2353
2354 static int
_save_bytes_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2355 _save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2356 Py_ssize_t size)
2357 {
2358 assert(self->proto >= 3);
2359
2360 char header[9];
2361 Py_ssize_t len;
2362
2363 if (size < 0)
2364 return -1;
2365
2366 if (size <= 0xff) {
2367 header[0] = SHORT_BINBYTES;
2368 header[1] = (unsigned char)size;
2369 len = 2;
2370 }
2371 else if ((size_t)size <= 0xffffffffUL) {
2372 header[0] = BINBYTES;
2373 header[1] = (unsigned char)(size & 0xff);
2374 header[2] = (unsigned char)((size >> 8) & 0xff);
2375 header[3] = (unsigned char)((size >> 16) & 0xff);
2376 header[4] = (unsigned char)((size >> 24) & 0xff);
2377 len = 5;
2378 }
2379 else if (self->proto >= 4) {
2380 header[0] = BINBYTES8;
2381 _write_size64(header + 1, size);
2382 len = 9;
2383 }
2384 else {
2385 PyErr_SetString(PyExc_OverflowError,
2386 "serializing a bytes object larger than 4 GiB "
2387 "requires pickle protocol 4 or higher");
2388 return -1;
2389 }
2390
2391 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2392 return -1;
2393 }
2394
2395 if (memo_put(self, obj) < 0) {
2396 return -1;
2397 }
2398
2399 return 0;
2400 }
2401
2402 static int
save_bytes(PicklerObject * self,PyObject * obj)2403 save_bytes(PicklerObject *self, PyObject *obj)
2404 {
2405 if (self->proto < 3) {
2406 /* Older pickle protocols do not have an opcode for pickling bytes
2407 objects. Therefore, we need to fake the copy protocol (i.e.,
2408 the __reduce__ method) to permit bytes object unpickling.
2409
2410 Here we use a hack to be compatible with Python 2. Since in Python
2411 2 'bytes' is just an alias for 'str' (which has different
2412 parameters than the actual bytes object), we use codecs.encode
2413 to create the appropriate 'str' object when unpickled using
2414 Python 2 *and* the appropriate 'bytes' object when unpickled
2415 using Python 3. Again this is a hack and we don't need to do this
2416 with newer protocols. */
2417 PyObject *reduce_value;
2418 int status;
2419
2420 if (PyBytes_GET_SIZE(obj) == 0) {
2421 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2422 }
2423 else {
2424 PickleState *st = _Pickle_GetGlobalState();
2425 PyObject *unicode_str =
2426 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2427 PyBytes_GET_SIZE(obj),
2428 "strict");
2429
2430 if (unicode_str == NULL)
2431 return -1;
2432 reduce_value = Py_BuildValue("(O(OO))",
2433 st->codecs_encode, unicode_str,
2434 &_Py_ID(latin1));
2435 Py_DECREF(unicode_str);
2436 }
2437
2438 if (reduce_value == NULL)
2439 return -1;
2440
2441 /* save_reduce() will memoize the object automatically. */
2442 status = save_reduce(self, reduce_value, obj);
2443 Py_DECREF(reduce_value);
2444 return status;
2445 }
2446 else {
2447 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2448 PyBytes_GET_SIZE(obj));
2449 }
2450 }
2451
2452 static int
_save_bytearray_data(PicklerObject * self,PyObject * obj,const char * data,Py_ssize_t size)2453 _save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2454 Py_ssize_t size)
2455 {
2456 assert(self->proto >= 5);
2457
2458 char header[9];
2459 Py_ssize_t len;
2460
2461 if (size < 0)
2462 return -1;
2463
2464 header[0] = BYTEARRAY8;
2465 _write_size64(header + 1, size);
2466 len = 9;
2467
2468 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2469 return -1;
2470 }
2471
2472 if (memo_put(self, obj) < 0) {
2473 return -1;
2474 }
2475
2476 return 0;
2477 }
2478
2479 static int
save_bytearray(PicklerObject * self,PyObject * obj)2480 save_bytearray(PicklerObject *self, PyObject *obj)
2481 {
2482 if (self->proto < 5) {
2483 /* Older pickle protocols do not have an opcode for pickling
2484 * bytearrays. */
2485 PyObject *reduce_value = NULL;
2486 int status;
2487
2488 if (PyByteArray_GET_SIZE(obj) == 0) {
2489 reduce_value = Py_BuildValue("(O())",
2490 (PyObject *) &PyByteArray_Type);
2491 }
2492 else {
2493 PyObject *bytes_obj = PyBytes_FromObject(obj);
2494 if (bytes_obj != NULL) {
2495 reduce_value = Py_BuildValue("(O(O))",
2496 (PyObject *) &PyByteArray_Type,
2497 bytes_obj);
2498 Py_DECREF(bytes_obj);
2499 }
2500 }
2501 if (reduce_value == NULL)
2502 return -1;
2503
2504 /* save_reduce() will memoize the object automatically. */
2505 status = save_reduce(self, reduce_value, obj);
2506 Py_DECREF(reduce_value);
2507 return status;
2508 }
2509 else {
2510 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2511 PyByteArray_GET_SIZE(obj));
2512 }
2513 }
2514
2515 static int
save_picklebuffer(PicklerObject * self,PyObject * obj)2516 save_picklebuffer(PicklerObject *self, PyObject *obj)
2517 {
2518 if (self->proto < 5) {
2519 PickleState *st = _Pickle_GetGlobalState();
2520 PyErr_SetString(st->PicklingError,
2521 "PickleBuffer can only pickled with protocol >= 5");
2522 return -1;
2523 }
2524 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2525 if (view == NULL) {
2526 return -1;
2527 }
2528 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2529 PickleState *st = _Pickle_GetGlobalState();
2530 PyErr_SetString(st->PicklingError,
2531 "PickleBuffer can not be pickled when "
2532 "pointing to a non-contiguous buffer");
2533 return -1;
2534 }
2535 int in_band = 1;
2536 if (self->buffer_callback != NULL) {
2537 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
2538 if (ret == NULL) {
2539 return -1;
2540 }
2541 in_band = PyObject_IsTrue(ret);
2542 Py_DECREF(ret);
2543 if (in_band == -1) {
2544 return -1;
2545 }
2546 }
2547 if (in_band) {
2548 /* Write data in-band */
2549 if (view->readonly) {
2550 return _save_bytes_data(self, obj, (const char*) view->buf,
2551 view->len);
2552 }
2553 else {
2554 return _save_bytearray_data(self, obj, (const char*) view->buf,
2555 view->len);
2556 }
2557 }
2558 else {
2559 /* Write data out-of-band */
2560 const char next_buffer_op = NEXT_BUFFER;
2561 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2562 return -1;
2563 }
2564 if (view->readonly) {
2565 const char readonly_buffer_op = READONLY_BUFFER;
2566 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2567 return -1;
2568 }
2569 }
2570 }
2571 return 0;
2572 }
2573
2574 /* A copy of PyUnicode_AsRawUnicodeEscapeString() that also translates
2575 backslash and newline characters to \uXXXX escapes. */
2576 static PyObject *
raw_unicode_escape(PyObject * obj)2577 raw_unicode_escape(PyObject *obj)
2578 {
2579 char *p;
2580 Py_ssize_t i, size;
2581 const void *data;
2582 unsigned int kind;
2583 _PyBytesWriter writer;
2584
2585 if (PyUnicode_READY(obj))
2586 return NULL;
2587
2588 _PyBytesWriter_Init(&writer);
2589
2590 size = PyUnicode_GET_LENGTH(obj);
2591 data = PyUnicode_DATA(obj);
2592 kind = PyUnicode_KIND(obj);
2593
2594 p = _PyBytesWriter_Alloc(&writer, size);
2595 if (p == NULL)
2596 goto error;
2597 writer.overallocate = 1;
2598
2599 for (i=0; i < size; i++) {
2600 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
2601 /* Map 32-bit characters to '\Uxxxxxxxx' */
2602 if (ch >= 0x10000) {
2603 /* -1: subtract 1 preallocated byte */
2604 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2605 if (p == NULL)
2606 goto error;
2607
2608 *p++ = '\\';
2609 *p++ = 'U';
2610 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2611 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2612 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2613 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2615 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2616 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2617 *p++ = Py_hexdigits[ch & 15];
2618 }
2619 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
2620 else if (ch >= 256 ||
2621 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2622 ch == 0x1a)
2623 {
2624 /* -1: subtract 1 preallocated byte */
2625 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2626 if (p == NULL)
2627 goto error;
2628
2629 *p++ = '\\';
2630 *p++ = 'u';
2631 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2632 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2633 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2634 *p++ = Py_hexdigits[ch & 15];
2635 }
2636 /* Copy everything else as-is */
2637 else
2638 *p++ = (char) ch;
2639 }
2640
2641 return _PyBytesWriter_Finish(&writer, p);
2642
2643 error:
2644 _PyBytesWriter_Dealloc(&writer);
2645 return NULL;
2646 }
2647
2648 static int
write_unicode_binary(PicklerObject * self,PyObject * obj)2649 write_unicode_binary(PicklerObject *self, PyObject *obj)
2650 {
2651 char header[9];
2652 Py_ssize_t len;
2653 PyObject *encoded = NULL;
2654 Py_ssize_t size;
2655 const char *data;
2656
2657 if (PyUnicode_READY(obj))
2658 return -1;
2659
2660 data = PyUnicode_AsUTF8AndSize(obj, &size);
2661 if (data == NULL) {
2662 /* Issue #8383: for strings with lone surrogates, fallback on the
2663 "surrogatepass" error handler. */
2664 PyErr_Clear();
2665 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2666 if (encoded == NULL)
2667 return -1;
2668
2669 data = PyBytes_AS_STRING(encoded);
2670 size = PyBytes_GET_SIZE(encoded);
2671 }
2672
2673 assert(size >= 0);
2674 if (size <= 0xff && self->proto >= 4) {
2675 header[0] = SHORT_BINUNICODE;
2676 header[1] = (unsigned char)(size & 0xff);
2677 len = 2;
2678 }
2679 else if ((size_t)size <= 0xffffffffUL) {
2680 header[0] = BINUNICODE;
2681 header[1] = (unsigned char)(size & 0xff);
2682 header[2] = (unsigned char)((size >> 8) & 0xff);
2683 header[3] = (unsigned char)((size >> 16) & 0xff);
2684 header[4] = (unsigned char)((size >> 24) & 0xff);
2685 len = 5;
2686 }
2687 else if (self->proto >= 4) {
2688 header[0] = BINUNICODE8;
2689 _write_size64(header + 1, size);
2690 len = 9;
2691 }
2692 else {
2693 PyErr_SetString(PyExc_OverflowError,
2694 "serializing a string larger than 4 GiB "
2695 "requires pickle protocol 4 or higher");
2696 Py_XDECREF(encoded);
2697 return -1;
2698 }
2699
2700 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2701 Py_XDECREF(encoded);
2702 return -1;
2703 }
2704 Py_XDECREF(encoded);
2705 return 0;
2706 }
2707
2708 static int
save_unicode(PicklerObject * self,PyObject * obj)2709 save_unicode(PicklerObject *self, PyObject *obj)
2710 {
2711 if (self->bin) {
2712 if (write_unicode_binary(self, obj) < 0)
2713 return -1;
2714 }
2715 else {
2716 PyObject *encoded;
2717 Py_ssize_t size;
2718 const char unicode_op = UNICODE;
2719
2720 encoded = raw_unicode_escape(obj);
2721 if (encoded == NULL)
2722 return -1;
2723
2724 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2725 Py_DECREF(encoded);
2726 return -1;
2727 }
2728
2729 size = PyBytes_GET_SIZE(encoded);
2730 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2731 Py_DECREF(encoded);
2732 return -1;
2733 }
2734 Py_DECREF(encoded);
2735
2736 if (_Pickler_Write(self, "\n", 1) < 0)
2737 return -1;
2738 }
2739 if (memo_put(self, obj) < 0)
2740 return -1;
2741
2742 return 0;
2743 }
2744
2745 /* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2746 static int
store_tuple_elements(PicklerObject * self,PyObject * t,Py_ssize_t len)2747 store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
2748 {
2749 Py_ssize_t i;
2750
2751 assert(PyTuple_Size(t) == len);
2752
2753 for (i = 0; i < len; i++) {
2754 PyObject *element = PyTuple_GET_ITEM(t, i);
2755
2756 if (element == NULL)
2757 return -1;
2758 if (save(self, element, 0) < 0)
2759 return -1;
2760 }
2761
2762 return 0;
2763 }
2764
2765 /* Tuples are ubiquitous in the pickle protocols, so many techniques are
2766 * used across protocols to minimize the space needed to pickle them.
2767 * Tuples are also the only builtin immutable type that can be recursive
2768 * (a tuple can be reached from itself), and that requires some subtle
2769 * magic so that it works in all cases. IOW, this is a long routine.
2770 */
2771 static int
save_tuple(PicklerObject * self,PyObject * obj)2772 save_tuple(PicklerObject *self, PyObject *obj)
2773 {
2774 Py_ssize_t len, i;
2775
2776 const char mark_op = MARK;
2777 const char tuple_op = TUPLE;
2778 const char pop_op = POP;
2779 const char pop_mark_op = POP_MARK;
2780 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2781
2782 if ((len = PyTuple_Size(obj)) < 0)
2783 return -1;
2784
2785 if (len == 0) {
2786 char pdata[2];
2787
2788 if (self->proto) {
2789 pdata[0] = EMPTY_TUPLE;
2790 len = 1;
2791 }
2792 else {
2793 pdata[0] = MARK;
2794 pdata[1] = TUPLE;
2795 len = 2;
2796 }
2797 if (_Pickler_Write(self, pdata, len) < 0)
2798 return -1;
2799 return 0;
2800 }
2801
2802 /* The tuple isn't in the memo now. If it shows up there after
2803 * saving the tuple elements, the tuple must be recursive, in
2804 * which case we'll pop everything we put on the stack, and fetch
2805 * its value from the memo.
2806 */
2807 if (len <= 3 && self->proto >= 2) {
2808 /* Use TUPLE{1,2,3} opcodes. */
2809 if (store_tuple_elements(self, obj, len) < 0)
2810 return -1;
2811
2812 if (PyMemoTable_Get(self->memo, obj)) {
2813 /* pop the len elements */
2814 for (i = 0; i < len; i++)
2815 if (_Pickler_Write(self, &pop_op, 1) < 0)
2816 return -1;
2817 /* fetch from memo */
2818 if (memo_get(self, obj) < 0)
2819 return -1;
2820
2821 return 0;
2822 }
2823 else { /* Not recursive. */
2824 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2825 return -1;
2826 }
2827 goto memoize;
2828 }
2829
2830 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2831 * Generate MARK e1 e2 ... TUPLE
2832 */
2833 if (_Pickler_Write(self, &mark_op, 1) < 0)
2834 return -1;
2835
2836 if (store_tuple_elements(self, obj, len) < 0)
2837 return -1;
2838
2839 if (PyMemoTable_Get(self->memo, obj)) {
2840 /* pop the stack stuff we pushed */
2841 if (self->bin) {
2842 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2843 return -1;
2844 }
2845 else {
2846 /* Note that we pop one more than len, to remove
2847 * the MARK too.
2848 */
2849 for (i = 0; i <= len; i++)
2850 if (_Pickler_Write(self, &pop_op, 1) < 0)
2851 return -1;
2852 }
2853 /* fetch from memo */
2854 if (memo_get(self, obj) < 0)
2855 return -1;
2856
2857 return 0;
2858 }
2859 else { /* Not recursive. */
2860 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2861 return -1;
2862 }
2863
2864 memoize:
2865 if (memo_put(self, obj) < 0)
2866 return -1;
2867
2868 return 0;
2869 }
2870
2871 /* iter is an iterator giving items, and we batch up chunks of
2872 * MARK item item ... item APPENDS
2873 * opcode sequences. Calling code should have arranged to first create an
2874 * empty list, or list-like object, for the APPENDS to operate on.
2875 * Returns 0 on success, <0 on error.
2876 */
2877 static int
batch_list(PicklerObject * self,PyObject * iter)2878 batch_list(PicklerObject *self, PyObject *iter)
2879 {
2880 PyObject *obj = NULL;
2881 PyObject *firstitem = NULL;
2882 int i, n;
2883
2884 const char mark_op = MARK;
2885 const char append_op = APPEND;
2886 const char appends_op = APPENDS;
2887
2888 assert(iter != NULL);
2889
2890 /* XXX: I think this function could be made faster by avoiding the
2891 iterator interface and fetching objects directly from list using
2892 PyList_GET_ITEM.
2893 */
2894
2895 if (self->proto == 0) {
2896 /* APPENDS isn't available; do one at a time. */
2897 for (;;) {
2898 obj = PyIter_Next(iter);
2899 if (obj == NULL) {
2900 if (PyErr_Occurred())
2901 return -1;
2902 break;
2903 }
2904 i = save(self, obj, 0);
2905 Py_DECREF(obj);
2906 if (i < 0)
2907 return -1;
2908 if (_Pickler_Write(self, &append_op, 1) < 0)
2909 return -1;
2910 }
2911 return 0;
2912 }
2913
2914 /* proto > 0: write in batches of BATCHSIZE. */
2915 do {
2916 /* Get first item */
2917 firstitem = PyIter_Next(iter);
2918 if (firstitem == NULL) {
2919 if (PyErr_Occurred())
2920 goto error;
2921
2922 /* nothing more to add */
2923 break;
2924 }
2925
2926 /* Try to get a second item */
2927 obj = PyIter_Next(iter);
2928 if (obj == NULL) {
2929 if (PyErr_Occurred())
2930 goto error;
2931
2932 /* Only one item to write */
2933 if (save(self, firstitem, 0) < 0)
2934 goto error;
2935 if (_Pickler_Write(self, &append_op, 1) < 0)
2936 goto error;
2937 Py_CLEAR(firstitem);
2938 break;
2939 }
2940
2941 /* More than one item to write */
2942
2943 /* Pump out MARK, items, APPENDS. */
2944 if (_Pickler_Write(self, &mark_op, 1) < 0)
2945 goto error;
2946
2947 if (save(self, firstitem, 0) < 0)
2948 goto error;
2949 Py_CLEAR(firstitem);
2950 n = 1;
2951
2952 /* Fetch and save up to BATCHSIZE items */
2953 while (obj) {
2954 if (save(self, obj, 0) < 0)
2955 goto error;
2956 Py_CLEAR(obj);
2957 n += 1;
2958
2959 if (n == BATCHSIZE)
2960 break;
2961
2962 obj = PyIter_Next(iter);
2963 if (obj == NULL) {
2964 if (PyErr_Occurred())
2965 goto error;
2966 break;
2967 }
2968 }
2969
2970 if (_Pickler_Write(self, &appends_op, 1) < 0)
2971 goto error;
2972
2973 } while (n == BATCHSIZE);
2974 return 0;
2975
2976 error:
2977 Py_XDECREF(firstitem);
2978 Py_XDECREF(obj);
2979 return -1;
2980 }
2981
2982 /* This is a variant of batch_list() above, specialized for lists (with no
2983 * support for list subclasses). Like batch_list(), we batch up chunks of
2984 * MARK item item ... item APPENDS
2985 * opcode sequences. Calling code should have arranged to first create an
2986 * empty list, or list-like object, for the APPENDS to operate on.
2987 * Returns 0 on success, -1 on error.
2988 *
2989 * This version is considerably faster than batch_list(), if less general.
2990 *
2991 * Note that this only works for protocols > 0.
2992 */
2993 static int
batch_list_exact(PicklerObject * self,PyObject * obj)2994 batch_list_exact(PicklerObject *self, PyObject *obj)
2995 {
2996 PyObject *item = NULL;
2997 Py_ssize_t this_batch, total;
2998
2999 const char append_op = APPEND;
3000 const char appends_op = APPENDS;
3001 const char mark_op = MARK;
3002
3003 assert(obj != NULL);
3004 assert(self->proto > 0);
3005 assert(PyList_CheckExact(obj));
3006
3007 if (PyList_GET_SIZE(obj) == 1) {
3008 item = PyList_GET_ITEM(obj, 0);
3009 Py_INCREF(item);
3010 int err = save(self, item, 0);
3011 Py_DECREF(item);
3012 if (err < 0)
3013 return -1;
3014 if (_Pickler_Write(self, &append_op, 1) < 0)
3015 return -1;
3016 return 0;
3017 }
3018
3019 /* Write in batches of BATCHSIZE. */
3020 total = 0;
3021 do {
3022 this_batch = 0;
3023 if (_Pickler_Write(self, &mark_op, 1) < 0)
3024 return -1;
3025 while (total < PyList_GET_SIZE(obj)) {
3026 item = PyList_GET_ITEM(obj, total);
3027 Py_INCREF(item);
3028 int err = save(self, item, 0);
3029 Py_DECREF(item);
3030 if (err < 0)
3031 return -1;
3032 total++;
3033 if (++this_batch == BATCHSIZE)
3034 break;
3035 }
3036 if (_Pickler_Write(self, &appends_op, 1) < 0)
3037 return -1;
3038
3039 } while (total < PyList_GET_SIZE(obj));
3040
3041 return 0;
3042 }
3043
3044 static int
save_list(PicklerObject * self,PyObject * obj)3045 save_list(PicklerObject *self, PyObject *obj)
3046 {
3047 char header[3];
3048 Py_ssize_t len;
3049 int status = 0;
3050
3051 if (self->fast && !fast_save_enter(self, obj))
3052 goto error;
3053
3054 /* Create an empty list. */
3055 if (self->bin) {
3056 header[0] = EMPTY_LIST;
3057 len = 1;
3058 }
3059 else {
3060 header[0] = MARK;
3061 header[1] = LIST;
3062 len = 2;
3063 }
3064
3065 if (_Pickler_Write(self, header, len) < 0)
3066 goto error;
3067
3068 /* Get list length, and bow out early if empty. */
3069 if ((len = PyList_Size(obj)) < 0)
3070 goto error;
3071
3072 if (memo_put(self, obj) < 0)
3073 goto error;
3074
3075 if (len != 0) {
3076 /* Materialize the list elements. */
3077 if (PyList_CheckExact(obj) && self->proto > 0) {
3078 if (_Py_EnterRecursiveCall(" while pickling an object"))
3079 goto error;
3080 status = batch_list_exact(self, obj);
3081 _Py_LeaveRecursiveCall();
3082 } else {
3083 PyObject *iter = PyObject_GetIter(obj);
3084 if (iter == NULL)
3085 goto error;
3086
3087 if (_Py_EnterRecursiveCall(" while pickling an object")) {
3088 Py_DECREF(iter);
3089 goto error;
3090 }
3091 status = batch_list(self, iter);
3092 _Py_LeaveRecursiveCall();
3093 Py_DECREF(iter);
3094 }
3095 }
3096 if (0) {
3097 error:
3098 status = -1;
3099 }
3100
3101 if (self->fast && !fast_save_leave(self, obj))
3102 status = -1;
3103
3104 return status;
3105 }
3106
3107 /* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3108 * MARK key value ... key value SETITEMS
3109 * opcode sequences. Calling code should have arranged to first create an
3110 * empty dict, or dict-like object, for the SETITEMS to operate on.
3111 * Returns 0 on success, <0 on error.
3112 *
3113 * This is very much like batch_list(). The difference between saving
3114 * elements directly, and picking apart two-tuples, is so long-winded at
3115 * the C level, though, that attempts to combine these routines were too
3116 * ugly to bear.
3117 */
3118 static int
batch_dict(PicklerObject * self,PyObject * iter)3119 batch_dict(PicklerObject *self, PyObject *iter)
3120 {
3121 PyObject *obj = NULL;
3122 PyObject *firstitem = NULL;
3123 int i, n;
3124
3125 const char mark_op = MARK;
3126 const char setitem_op = SETITEM;
3127 const char setitems_op = SETITEMS;
3128
3129 assert(iter != NULL);
3130
3131 if (self->proto == 0) {
3132 /* SETITEMS isn't available; do one at a time. */
3133 for (;;) {
3134 obj = PyIter_Next(iter);
3135 if (obj == NULL) {
3136 if (PyErr_Occurred())
3137 return -1;
3138 break;
3139 }
3140 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3141 PyErr_SetString(PyExc_TypeError, "dict items "
3142 "iterator must return 2-tuples");
3143 return -1;
3144 }
3145 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3146 if (i >= 0)
3147 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3148 Py_DECREF(obj);
3149 if (i < 0)
3150 return -1;
3151 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3152 return -1;
3153 }
3154 return 0;
3155 }
3156
3157 /* proto > 0: write in batches of BATCHSIZE. */
3158 do {
3159 /* Get first item */
3160 firstitem = PyIter_Next(iter);
3161 if (firstitem == NULL) {
3162 if (PyErr_Occurred())
3163 goto error;
3164
3165 /* nothing more to add */
3166 break;
3167 }
3168 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3169 PyErr_SetString(PyExc_TypeError, "dict items "
3170 "iterator must return 2-tuples");
3171 goto error;
3172 }
3173
3174 /* Try to get a second item */
3175 obj = PyIter_Next(iter);
3176 if (obj == NULL) {
3177 if (PyErr_Occurred())
3178 goto error;
3179
3180 /* Only one item to write */
3181 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3182 goto error;
3183 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3184 goto error;
3185 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3186 goto error;
3187 Py_CLEAR(firstitem);
3188 break;
3189 }
3190
3191 /* More than one item to write */
3192
3193 /* Pump out MARK, items, SETITEMS. */
3194 if (_Pickler_Write(self, &mark_op, 1) < 0)
3195 goto error;
3196
3197 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3198 goto error;
3199 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3200 goto error;
3201 Py_CLEAR(firstitem);
3202 n = 1;
3203
3204 /* Fetch and save up to BATCHSIZE items */
3205 while (obj) {
3206 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3207 PyErr_SetString(PyExc_TypeError, "dict items "
3208 "iterator must return 2-tuples");
3209 goto error;
3210 }
3211 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3212 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3213 goto error;
3214 Py_CLEAR(obj);
3215 n += 1;
3216
3217 if (n == BATCHSIZE)
3218 break;
3219
3220 obj = PyIter_Next(iter);
3221 if (obj == NULL) {
3222 if (PyErr_Occurred())
3223 goto error;
3224 break;
3225 }
3226 }
3227
3228 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3229 goto error;
3230
3231 } while (n == BATCHSIZE);
3232 return 0;
3233
3234 error:
3235 Py_XDECREF(firstitem);
3236 Py_XDECREF(obj);
3237 return -1;
3238 }
3239
3240 /* This is a variant of batch_dict() above that specializes for dicts, with no
3241 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3242 * MARK key value ... key value SETITEMS
3243 * opcode sequences. Calling code should have arranged to first create an
3244 * empty dict, or dict-like object, for the SETITEMS to operate on.
3245 * Returns 0 on success, -1 on error.
3246 *
3247 * Note that this currently doesn't work for protocol 0.
3248 */
3249 static int
batch_dict_exact(PicklerObject * self,PyObject * obj)3250 batch_dict_exact(PicklerObject *self, PyObject *obj)
3251 {
3252 PyObject *key = NULL, *value = NULL;
3253 int i;
3254 Py_ssize_t dict_size, ppos = 0;
3255
3256 const char mark_op = MARK;
3257 const char setitem_op = SETITEM;
3258 const char setitems_op = SETITEMS;
3259
3260 assert(obj != NULL && PyDict_CheckExact(obj));
3261 assert(self->proto > 0);
3262
3263 dict_size = PyDict_GET_SIZE(obj);
3264
3265 /* Special-case len(d) == 1 to save space. */
3266 if (dict_size == 1) {
3267 PyDict_Next(obj, &ppos, &key, &value);
3268 Py_INCREF(key);
3269 Py_INCREF(value);
3270 if (save(self, key, 0) < 0) {
3271 goto error;
3272 }
3273 if (save(self, value, 0) < 0) {
3274 goto error;
3275 }
3276 Py_CLEAR(key);
3277 Py_CLEAR(value);
3278 if (_Pickler_Write(self, &setitem_op, 1) < 0)
3279 return -1;
3280 return 0;
3281 }
3282
3283 /* Write in batches of BATCHSIZE. */
3284 do {
3285 i = 0;
3286 if (_Pickler_Write(self, &mark_op, 1) < 0)
3287 return -1;
3288 while (PyDict_Next(obj, &ppos, &key, &value)) {
3289 Py_INCREF(key);
3290 Py_INCREF(value);
3291 if (save(self, key, 0) < 0) {
3292 goto error;
3293 }
3294 if (save(self, value, 0) < 0) {
3295 goto error;
3296 }
3297 Py_CLEAR(key);
3298 Py_CLEAR(value);
3299 if (++i == BATCHSIZE)
3300 break;
3301 }
3302 if (_Pickler_Write(self, &setitems_op, 1) < 0)
3303 return -1;
3304 if (PyDict_GET_SIZE(obj) != dict_size) {
3305 PyErr_Format(
3306 PyExc_RuntimeError,
3307 "dictionary changed size during iteration");
3308 return -1;
3309 }
3310
3311 } while (i == BATCHSIZE);
3312 return 0;
3313 error:
3314 Py_XDECREF(key);
3315 Py_XDECREF(value);
3316 return -1;
3317 }
3318
3319 static int
save_dict(PicklerObject * self,PyObject * obj)3320 save_dict(PicklerObject *self, PyObject *obj)
3321 {
3322 PyObject *items, *iter;
3323 char header[3];
3324 Py_ssize_t len;
3325 int status = 0;
3326 assert(PyDict_Check(obj));
3327
3328 if (self->fast && !fast_save_enter(self, obj))
3329 goto error;
3330
3331 /* Create an empty dict. */
3332 if (self->bin) {
3333 header[0] = EMPTY_DICT;
3334 len = 1;
3335 }
3336 else {
3337 header[0] = MARK;
3338 header[1] = DICT;
3339 len = 2;
3340 }
3341
3342 if (_Pickler_Write(self, header, len) < 0)
3343 goto error;
3344
3345 if (memo_put(self, obj) < 0)
3346 goto error;
3347
3348 if (PyDict_GET_SIZE(obj)) {
3349 /* Save the dict items. */
3350 if (PyDict_CheckExact(obj) && self->proto > 0) {
3351 /* We can take certain shortcuts if we know this is a dict and
3352 not a dict subclass. */
3353 if (_Py_EnterRecursiveCall(" while pickling an object"))
3354 goto error;
3355 status = batch_dict_exact(self, obj);
3356 _Py_LeaveRecursiveCall();
3357 } else {
3358 items = PyObject_CallMethodNoArgs(obj, &_Py_ID(items));
3359 if (items == NULL)
3360 goto error;
3361 iter = PyObject_GetIter(items);
3362 Py_DECREF(items);
3363 if (iter == NULL)
3364 goto error;
3365 if (_Py_EnterRecursiveCall(" while pickling an object")) {
3366 Py_DECREF(iter);
3367 goto error;
3368 }
3369 status = batch_dict(self, iter);
3370 _Py_LeaveRecursiveCall();
3371 Py_DECREF(iter);
3372 }
3373 }
3374
3375 if (0) {
3376 error:
3377 status = -1;
3378 }
3379
3380 if (self->fast && !fast_save_leave(self, obj))
3381 status = -1;
3382
3383 return status;
3384 }
3385
3386 static int
save_set(PicklerObject * self,PyObject * obj)3387 save_set(PicklerObject *self, PyObject *obj)
3388 {
3389 PyObject *item;
3390 int i;
3391 Py_ssize_t set_size, ppos = 0;
3392 Py_hash_t hash;
3393
3394 const char empty_set_op = EMPTY_SET;
3395 const char mark_op = MARK;
3396 const char additems_op = ADDITEMS;
3397
3398 if (self->proto < 4) {
3399 PyObject *items;
3400 PyObject *reduce_value;
3401 int status;
3402
3403 items = PySequence_List(obj);
3404 if (items == NULL) {
3405 return -1;
3406 }
3407 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3408 Py_DECREF(items);
3409 if (reduce_value == NULL) {
3410 return -1;
3411 }
3412 /* save_reduce() will memoize the object automatically. */
3413 status = save_reduce(self, reduce_value, obj);
3414 Py_DECREF(reduce_value);
3415 return status;
3416 }
3417
3418 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3419 return -1;
3420
3421 if (memo_put(self, obj) < 0)
3422 return -1;
3423
3424 set_size = PySet_GET_SIZE(obj);
3425 if (set_size == 0)
3426 return 0; /* nothing to do */
3427
3428 /* Write in batches of BATCHSIZE. */
3429 do {
3430 i = 0;
3431 if (_Pickler_Write(self, &mark_op, 1) < 0)
3432 return -1;
3433 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3434 Py_INCREF(item);
3435 int err = save(self, item, 0);
3436 Py_CLEAR(item);
3437 if (err < 0)
3438 return -1;
3439 if (++i == BATCHSIZE)
3440 break;
3441 }
3442 if (_Pickler_Write(self, &additems_op, 1) < 0)
3443 return -1;
3444 if (PySet_GET_SIZE(obj) != set_size) {
3445 PyErr_Format(
3446 PyExc_RuntimeError,
3447 "set changed size during iteration");
3448 return -1;
3449 }
3450 } while (i == BATCHSIZE);
3451
3452 return 0;
3453 }
3454
3455 static int
save_frozenset(PicklerObject * self,PyObject * obj)3456 save_frozenset(PicklerObject *self, PyObject *obj)
3457 {
3458 PyObject *iter;
3459
3460 const char mark_op = MARK;
3461 const char frozenset_op = FROZENSET;
3462
3463 if (self->fast && !fast_save_enter(self, obj))
3464 return -1;
3465
3466 if (self->proto < 4) {
3467 PyObject *items;
3468 PyObject *reduce_value;
3469 int status;
3470
3471 items = PySequence_List(obj);
3472 if (items == NULL) {
3473 return -1;
3474 }
3475 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3476 items);
3477 Py_DECREF(items);
3478 if (reduce_value == NULL) {
3479 return -1;
3480 }
3481 /* save_reduce() will memoize the object automatically. */
3482 status = save_reduce(self, reduce_value, obj);
3483 Py_DECREF(reduce_value);
3484 return status;
3485 }
3486
3487 if (_Pickler_Write(self, &mark_op, 1) < 0)
3488 return -1;
3489
3490 iter = PyObject_GetIter(obj);
3491 if (iter == NULL) {
3492 return -1;
3493 }
3494 for (;;) {
3495 PyObject *item;
3496
3497 item = PyIter_Next(iter);
3498 if (item == NULL) {
3499 if (PyErr_Occurred()) {
3500 Py_DECREF(iter);
3501 return -1;
3502 }
3503 break;
3504 }
3505 if (save(self, item, 0) < 0) {
3506 Py_DECREF(item);
3507 Py_DECREF(iter);
3508 return -1;
3509 }
3510 Py_DECREF(item);
3511 }
3512 Py_DECREF(iter);
3513
3514 /* If the object is already in the memo, this means it is
3515 recursive. In this case, throw away everything we put on the
3516 stack, and fetch the object back from the memo. */
3517 if (PyMemoTable_Get(self->memo, obj)) {
3518 const char pop_mark_op = POP_MARK;
3519
3520 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3521 return -1;
3522 if (memo_get(self, obj) < 0)
3523 return -1;
3524 return 0;
3525 }
3526
3527 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3528 return -1;
3529 if (memo_put(self, obj) < 0)
3530 return -1;
3531
3532 return 0;
3533 }
3534
3535 static int
fix_imports(PyObject ** module_name,PyObject ** global_name)3536 fix_imports(PyObject **module_name, PyObject **global_name)
3537 {
3538 PyObject *key;
3539 PyObject *item;
3540 PickleState *st = _Pickle_GetGlobalState();
3541
3542 key = PyTuple_Pack(2, *module_name, *global_name);
3543 if (key == NULL)
3544 return -1;
3545 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
3546 Py_DECREF(key);
3547 if (item) {
3548 PyObject *fixed_module_name;
3549 PyObject *fixed_global_name;
3550
3551 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3552 PyErr_Format(PyExc_RuntimeError,
3553 "_compat_pickle.REVERSE_NAME_MAPPING values "
3554 "should be 2-tuples, not %.200s",
3555 Py_TYPE(item)->tp_name);
3556 return -1;
3557 }
3558 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3559 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3560 if (!PyUnicode_Check(fixed_module_name) ||
3561 !PyUnicode_Check(fixed_global_name)) {
3562 PyErr_Format(PyExc_RuntimeError,
3563 "_compat_pickle.REVERSE_NAME_MAPPING values "
3564 "should be pairs of str, not (%.200s, %.200s)",
3565 Py_TYPE(fixed_module_name)->tp_name,
3566 Py_TYPE(fixed_global_name)->tp_name);
3567 return -1;
3568 }
3569
3570 Py_CLEAR(*module_name);
3571 Py_CLEAR(*global_name);
3572 Py_INCREF(fixed_module_name);
3573 Py_INCREF(fixed_global_name);
3574 *module_name = fixed_module_name;
3575 *global_name = fixed_global_name;
3576 return 0;
3577 }
3578 else if (PyErr_Occurred()) {
3579 return -1;
3580 }
3581
3582 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
3583 if (item) {
3584 if (!PyUnicode_Check(item)) {
3585 PyErr_Format(PyExc_RuntimeError,
3586 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3587 "should be strings, not %.200s",
3588 Py_TYPE(item)->tp_name);
3589 return -1;
3590 }
3591 Py_INCREF(item);
3592 Py_XSETREF(*module_name, item);
3593 }
3594 else if (PyErr_Occurred()) {
3595 return -1;
3596 }
3597
3598 return 0;
3599 }
3600
3601 static int
save_global(PicklerObject * self,PyObject * obj,PyObject * name)3602 save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3603 {
3604 PyObject *global_name = NULL;
3605 PyObject *module_name = NULL;
3606 PyObject *module = NULL;
3607 PyObject *parent = NULL;
3608 PyObject *dotted_path = NULL;
3609 PyObject *lastname = NULL;
3610 PyObject *cls;
3611 PickleState *st = _Pickle_GetGlobalState();
3612 int status = 0;
3613
3614 const char global_op = GLOBAL;
3615
3616 if (name) {
3617 Py_INCREF(name);
3618 global_name = name;
3619 }
3620 else {
3621 if (_PyObject_LookupAttr(obj, &_Py_ID(__qualname__), &global_name) < 0)
3622 goto error;
3623 if (global_name == NULL) {
3624 global_name = PyObject_GetAttr(obj, &_Py_ID(__name__));
3625 if (global_name == NULL)
3626 goto error;
3627 }
3628 }
3629
3630 dotted_path = get_dotted_path(module, global_name);
3631 if (dotted_path == NULL)
3632 goto error;
3633 module_name = whichmodule(obj, dotted_path);
3634 if (module_name == NULL)
3635 goto error;
3636
3637 /* XXX: Change to use the import C API directly with level=0 to disallow
3638 relative imports.
3639
3640 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3641 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3642 custom import functions (IMHO, this would be a nice security
3643 feature). The import C API would need to be extended to support the
3644 extra parameters of __import__ to fix that. */
3645 module = PyImport_Import(module_name);
3646 if (module == NULL) {
3647 PyErr_Format(st->PicklingError,
3648 "Can't pickle %R: import of module %R failed",
3649 obj, module_name);
3650 goto error;
3651 }
3652 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3653 Py_INCREF(lastname);
3654 cls = get_deep_attribute(module, dotted_path, &parent);
3655 Py_CLEAR(dotted_path);
3656 if (cls == NULL) {
3657 PyErr_Format(st->PicklingError,
3658 "Can't pickle %R: attribute lookup %S on %S failed",
3659 obj, global_name, module_name);
3660 goto error;
3661 }
3662 if (cls != obj) {
3663 Py_DECREF(cls);
3664 PyErr_Format(st->PicklingError,
3665 "Can't pickle %R: it's not the same object as %S.%S",
3666 obj, module_name, global_name);
3667 goto error;
3668 }
3669 Py_DECREF(cls);
3670
3671 if (self->proto >= 2) {
3672 /* See whether this is in the extension registry, and if
3673 * so generate an EXT opcode.
3674 */
3675 PyObject *extension_key;
3676 PyObject *code_obj; /* extension code as Python object */
3677 long code; /* extension code as C value */
3678 char pdata[5];
3679 Py_ssize_t n;
3680
3681 extension_key = PyTuple_Pack(2, module_name, global_name);
3682 if (extension_key == NULL) {
3683 goto error;
3684 }
3685 code_obj = PyDict_GetItemWithError(st->extension_registry,
3686 extension_key);
3687 Py_DECREF(extension_key);
3688 /* The object is not registered in the extension registry.
3689 This is the most likely code path. */
3690 if (code_obj == NULL) {
3691 if (PyErr_Occurred()) {
3692 goto error;
3693 }
3694 goto gen_global;
3695 }
3696
3697 /* XXX: pickle.py doesn't check neither the type, nor the range
3698 of the value returned by the extension_registry. It should for
3699 consistency. */
3700
3701 /* Verify code_obj has the right type and value. */
3702 if (!PyLong_Check(code_obj)) {
3703 PyErr_Format(st->PicklingError,
3704 "Can't pickle %R: extension code %R isn't an integer",
3705 obj, code_obj);
3706 goto error;
3707 }
3708 code = PyLong_AS_LONG(code_obj);
3709 if (code <= 0 || code > 0x7fffffffL) {
3710 if (!PyErr_Occurred())
3711 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3712 "code %ld is out of range", obj, code);
3713 goto error;
3714 }
3715
3716 /* Generate an EXT opcode. */
3717 if (code <= 0xff) {
3718 pdata[0] = EXT1;
3719 pdata[1] = (unsigned char)code;
3720 n = 2;
3721 }
3722 else if (code <= 0xffff) {
3723 pdata[0] = EXT2;
3724 pdata[1] = (unsigned char)(code & 0xff);
3725 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3726 n = 3;
3727 }
3728 else {
3729 pdata[0] = EXT4;
3730 pdata[1] = (unsigned char)(code & 0xff);
3731 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3732 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3733 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3734 n = 5;
3735 }
3736
3737 if (_Pickler_Write(self, pdata, n) < 0)
3738 goto error;
3739 }
3740 else {
3741 gen_global:
3742 if (parent == module) {
3743 Py_INCREF(lastname);
3744 Py_DECREF(global_name);
3745 global_name = lastname;
3746 }
3747 if (self->proto >= 4) {
3748 const char stack_global_op = STACK_GLOBAL;
3749
3750 if (save(self, module_name, 0) < 0)
3751 goto error;
3752 if (save(self, global_name, 0) < 0)
3753 goto error;
3754
3755 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3756 goto error;
3757 }
3758 else if (parent != module) {
3759 PickleState *st = _Pickle_GetGlobalState();
3760 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3761 st->getattr, parent, lastname);
3762 if (reduce_value == NULL)
3763 goto error;
3764 status = save_reduce(self, reduce_value, NULL);
3765 Py_DECREF(reduce_value);
3766 if (status < 0)
3767 goto error;
3768 }
3769 else {
3770 /* Generate a normal global opcode if we are using a pickle
3771 protocol < 4, or if the object is not registered in the
3772 extension registry. */
3773 PyObject *encoded;
3774 PyObject *(*unicode_encoder)(PyObject *);
3775
3776 if (_Pickler_Write(self, &global_op, 1) < 0)
3777 goto error;
3778
3779 /* For protocol < 3 and if the user didn't request against doing
3780 so, we convert module names to the old 2.x module names. */
3781 if (self->proto < 3 && self->fix_imports) {
3782 if (fix_imports(&module_name, &global_name) < 0) {
3783 goto error;
3784 }
3785 }
3786
3787 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3788 both the module name and the global name using UTF-8. We do so
3789 only when we are using the pickle protocol newer than version
3790 3. This is to ensure compatibility with older Unpickler running
3791 on Python 2.x. */
3792 if (self->proto == 3) {
3793 unicode_encoder = PyUnicode_AsUTF8String;
3794 }
3795 else {
3796 unicode_encoder = PyUnicode_AsASCIIString;
3797 }
3798 encoded = unicode_encoder(module_name);
3799 if (encoded == NULL) {
3800 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3801 PyErr_Format(st->PicklingError,
3802 "can't pickle module identifier '%S' using "
3803 "pickle protocol %i",
3804 module_name, self->proto);
3805 goto error;
3806 }
3807 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3808 PyBytes_GET_SIZE(encoded)) < 0) {
3809 Py_DECREF(encoded);
3810 goto error;
3811 }
3812 Py_DECREF(encoded);
3813 if(_Pickler_Write(self, "\n", 1) < 0)
3814 goto error;
3815
3816 /* Save the name of the module. */
3817 encoded = unicode_encoder(global_name);
3818 if (encoded == NULL) {
3819 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3820 PyErr_Format(st->PicklingError,
3821 "can't pickle global identifier '%S' using "
3822 "pickle protocol %i",
3823 global_name, self->proto);
3824 goto error;
3825 }
3826 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3827 PyBytes_GET_SIZE(encoded)) < 0) {
3828 Py_DECREF(encoded);
3829 goto error;
3830 }
3831 Py_DECREF(encoded);
3832 if (_Pickler_Write(self, "\n", 1) < 0)
3833 goto error;
3834 }
3835 /* Memoize the object. */
3836 if (memo_put(self, obj) < 0)
3837 goto error;
3838 }
3839
3840 if (0) {
3841 error:
3842 status = -1;
3843 }
3844 Py_XDECREF(module_name);
3845 Py_XDECREF(global_name);
3846 Py_XDECREF(module);
3847 Py_XDECREF(parent);
3848 Py_XDECREF(dotted_path);
3849 Py_XDECREF(lastname);
3850
3851 return status;
3852 }
3853
3854 static int
save_singleton_type(PicklerObject * self,PyObject * obj,PyObject * singleton)3855 save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3856 {
3857 PyObject *reduce_value;
3858 int status;
3859
3860 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3861 if (reduce_value == NULL) {
3862 return -1;
3863 }
3864 status = save_reduce(self, reduce_value, obj);
3865 Py_DECREF(reduce_value);
3866 return status;
3867 }
3868
3869 static int
save_type(PicklerObject * self,PyObject * obj)3870 save_type(PicklerObject *self, PyObject *obj)
3871 {
3872 if (obj == (PyObject *)&_PyNone_Type) {
3873 return save_singleton_type(self, obj, Py_None);
3874 }
3875 else if (obj == (PyObject *)&PyEllipsis_Type) {
3876 return save_singleton_type(self, obj, Py_Ellipsis);
3877 }
3878 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
3879 return save_singleton_type(self, obj, Py_NotImplemented);
3880 }
3881 return save_global(self, obj, NULL);
3882 }
3883
3884 static int
save_pers(PicklerObject * self,PyObject * obj)3885 save_pers(PicklerObject *self, PyObject *obj)
3886 {
3887 PyObject *pid = NULL;
3888 int status = 0;
3889
3890 const char persid_op = PERSID;
3891 const char binpersid_op = BINPERSID;
3892
3893 pid = call_method(self->pers_func, self->pers_func_self, obj);
3894 if (pid == NULL)
3895 return -1;
3896
3897 if (pid != Py_None) {
3898 if (self->bin) {
3899 if (save(self, pid, 1) < 0 ||
3900 _Pickler_Write(self, &binpersid_op, 1) < 0)
3901 goto error;
3902 }
3903 else {
3904 PyObject *pid_str;
3905
3906 pid_str = PyObject_Str(pid);
3907 if (pid_str == NULL)
3908 goto error;
3909
3910 /* XXX: Should it check whether the pid contains embedded
3911 newlines? */
3912 if (!PyUnicode_IS_ASCII(pid_str)) {
3913 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3914 "persistent IDs in protocol 0 must be "
3915 "ASCII strings");
3916 Py_DECREF(pid_str);
3917 goto error;
3918 }
3919
3920 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3921 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3922 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3923 _Pickler_Write(self, "\n", 1) < 0) {
3924 Py_DECREF(pid_str);
3925 goto error;
3926 }
3927 Py_DECREF(pid_str);
3928 }
3929 status = 1;
3930 }
3931
3932 if (0) {
3933 error:
3934 status = -1;
3935 }
3936 Py_XDECREF(pid);
3937
3938 return status;
3939 }
3940
3941 static PyObject *
get_class(PyObject * obj)3942 get_class(PyObject *obj)
3943 {
3944 PyObject *cls;
3945
3946 if (_PyObject_LookupAttr(obj, &_Py_ID(__class__), &cls) == 0) {
3947 cls = (PyObject *) Py_TYPE(obj);
3948 Py_INCREF(cls);
3949 }
3950 return cls;
3951 }
3952
3953 /* We're saving obj, and args is the 2-thru-5 tuple returned by the
3954 * appropriate __reduce__ method for obj.
3955 */
3956 static int
save_reduce(PicklerObject * self,PyObject * args,PyObject * obj)3957 save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3958 {
3959 PyObject *callable;
3960 PyObject *argtup;
3961 PyObject *state = NULL;
3962 PyObject *listitems = Py_None;
3963 PyObject *dictitems = Py_None;
3964 PyObject *state_setter = Py_None;
3965 PickleState *st = _Pickle_GetGlobalState();
3966 Py_ssize_t size;
3967 int use_newobj = 0, use_newobj_ex = 0;
3968
3969 const char reduce_op = REDUCE;
3970 const char build_op = BUILD;
3971 const char newobj_op = NEWOBJ;
3972 const char newobj_ex_op = NEWOBJ_EX;
3973
3974 size = PyTuple_Size(args);
3975 if (size < 2 || size > 6) {
3976 PyErr_SetString(st->PicklingError, "tuple returned by "
3977 "__reduce__ must contain 2 through 6 elements");
3978 return -1;
3979 }
3980
3981 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3982 &callable, &argtup, &state, &listitems, &dictitems,
3983 &state_setter))
3984 return -1;
3985
3986 if (!PyCallable_Check(callable)) {
3987 PyErr_SetString(st->PicklingError, "first item of the tuple "
3988 "returned by __reduce__ must be callable");
3989 return -1;
3990 }
3991 if (!PyTuple_Check(argtup)) {
3992 PyErr_SetString(st->PicklingError, "second item of the tuple "
3993 "returned by __reduce__ must be a tuple");
3994 return -1;
3995 }
3996
3997 if (state == Py_None)
3998 state = NULL;
3999
4000 if (listitems == Py_None)
4001 listitems = NULL;
4002 else if (!PyIter_Check(listitems)) {
4003 PyErr_Format(st->PicklingError, "fourth element of the tuple "
4004 "returned by __reduce__ must be an iterator, not %s",
4005 Py_TYPE(listitems)->tp_name);
4006 return -1;
4007 }
4008
4009 if (dictitems == Py_None)
4010 dictitems = NULL;
4011 else if (!PyIter_Check(dictitems)) {
4012 PyErr_Format(st->PicklingError, "fifth element of the tuple "
4013 "returned by __reduce__ must be an iterator, not %s",
4014 Py_TYPE(dictitems)->tp_name);
4015 return -1;
4016 }
4017
4018 if (state_setter == Py_None)
4019 state_setter = NULL;
4020 else if (!PyCallable_Check(state_setter)) {
4021 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4022 "returned by __reduce__ must be a function, not %s",
4023 Py_TYPE(state_setter)->tp_name);
4024 return -1;
4025 }
4026
4027 if (self->proto >= 2) {
4028 PyObject *name;
4029
4030 if (_PyObject_LookupAttr(callable, &_Py_ID(__name__), &name) < 0) {
4031 return -1;
4032 }
4033 if (name != NULL && PyUnicode_Check(name)) {
4034 use_newobj_ex = _PyUnicode_Equal(name, &_Py_ID(__newobj_ex__));
4035 if (!use_newobj_ex) {
4036 use_newobj = _PyUnicode_Equal(name, &_Py_ID(__newobj__));
4037 }
4038 }
4039 Py_XDECREF(name);
4040 }
4041
4042 if (use_newobj_ex) {
4043 PyObject *cls;
4044 PyObject *args;
4045 PyObject *kwargs;
4046
4047 if (PyTuple_GET_SIZE(argtup) != 3) {
4048 PyErr_Format(st->PicklingError,
4049 "length of the NEWOBJ_EX argument tuple must be "
4050 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
4051 return -1;
4052 }
4053
4054 cls = PyTuple_GET_ITEM(argtup, 0);
4055 if (!PyType_Check(cls)) {
4056 PyErr_Format(st->PicklingError,
4057 "first item from NEWOBJ_EX argument tuple must "
4058 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4059 return -1;
4060 }
4061 args = PyTuple_GET_ITEM(argtup, 1);
4062 if (!PyTuple_Check(args)) {
4063 PyErr_Format(st->PicklingError,
4064 "second item from NEWOBJ_EX argument tuple must "
4065 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4066 return -1;
4067 }
4068 kwargs = PyTuple_GET_ITEM(argtup, 2);
4069 if (!PyDict_Check(kwargs)) {
4070 PyErr_Format(st->PicklingError,
4071 "third item from NEWOBJ_EX argument tuple must "
4072 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4073 return -1;
4074 }
4075
4076 if (self->proto >= 4) {
4077 if (save(self, cls, 0) < 0 ||
4078 save(self, args, 0) < 0 ||
4079 save(self, kwargs, 0) < 0 ||
4080 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4081 return -1;
4082 }
4083 }
4084 else {
4085 PyObject *newargs;
4086 PyObject *cls_new;
4087 Py_ssize_t i;
4088
4089 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
4090 if (newargs == NULL)
4091 return -1;
4092
4093 cls_new = PyObject_GetAttr(cls, &_Py_ID(__new__));
4094 if (cls_new == NULL) {
4095 Py_DECREF(newargs);
4096 return -1;
4097 }
4098 PyTuple_SET_ITEM(newargs, 0, cls_new);
4099 Py_INCREF(cls);
4100 PyTuple_SET_ITEM(newargs, 1, cls);
4101 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
4102 PyObject *item = PyTuple_GET_ITEM(args, i);
4103 Py_INCREF(item);
4104 PyTuple_SET_ITEM(newargs, i + 2, item);
4105 }
4106
4107 callable = PyObject_Call(st->partial, newargs, kwargs);
4108 Py_DECREF(newargs);
4109 if (callable == NULL)
4110 return -1;
4111
4112 newargs = PyTuple_New(0);
4113 if (newargs == NULL) {
4114 Py_DECREF(callable);
4115 return -1;
4116 }
4117
4118 if (save(self, callable, 0) < 0 ||
4119 save(self, newargs, 0) < 0 ||
4120 _Pickler_Write(self, &reduce_op, 1) < 0) {
4121 Py_DECREF(newargs);
4122 Py_DECREF(callable);
4123 return -1;
4124 }
4125 Py_DECREF(newargs);
4126 Py_DECREF(callable);
4127 }
4128 }
4129 else if (use_newobj) {
4130 PyObject *cls;
4131 PyObject *newargtup;
4132 PyObject *obj_class;
4133 int p;
4134
4135 /* Sanity checks. */
4136 if (PyTuple_GET_SIZE(argtup) < 1) {
4137 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
4138 return -1;
4139 }
4140
4141 cls = PyTuple_GET_ITEM(argtup, 0);
4142 if (!PyType_Check(cls)) {
4143 PyErr_SetString(st->PicklingError, "args[0] from "
4144 "__newobj__ args is not a type");
4145 return -1;
4146 }
4147
4148 if (obj != NULL) {
4149 obj_class = get_class(obj);
4150 if (obj_class == NULL) {
4151 return -1;
4152 }
4153 p = obj_class != cls;
4154 Py_DECREF(obj_class);
4155 if (p) {
4156 PyErr_SetString(st->PicklingError, "args[0] from "
4157 "__newobj__ args has the wrong class");
4158 return -1;
4159 }
4160 }
4161 /* XXX: These calls save() are prone to infinite recursion. Imagine
4162 what happen if the value returned by the __reduce__() method of
4163 some extension type contains another object of the same type. Ouch!
4164
4165 Here is a quick example, that I ran into, to illustrate what I
4166 mean:
4167
4168 >>> import pickle, copyreg
4169 >>> copyreg.dispatch_table.pop(complex)
4170 >>> pickle.dumps(1+2j)
4171 Traceback (most recent call last):
4172 ...
4173 RecursionError: maximum recursion depth exceeded
4174
4175 Removing the complex class from copyreg.dispatch_table made the
4176 __reduce_ex__() method emit another complex object:
4177
4178 >>> (1+1j).__reduce_ex__(2)
4179 (<function __newobj__ at 0xb7b71c3c>,
4180 (<class 'complex'>, (1+1j)), None, None, None)
4181
4182 Thus when save() was called on newargstup (the 2nd item) recursion
4183 ensued. Of course, the bug was in the complex class which had a
4184 broken __getnewargs__() that emitted another complex object. But,
4185 the point, here, is it is quite easy to end up with a broken reduce
4186 function. */
4187
4188 /* Save the class and its __new__ arguments. */
4189 if (save(self, cls, 0) < 0)
4190 return -1;
4191
4192 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
4193 if (newargtup == NULL)
4194 return -1;
4195
4196 p = save(self, newargtup, 0);
4197 Py_DECREF(newargtup);
4198 if (p < 0)
4199 return -1;
4200
4201 /* Add NEWOBJ opcode. */
4202 if (_Pickler_Write(self, &newobj_op, 1) < 0)
4203 return -1;
4204 }
4205 else { /* Not using NEWOBJ. */
4206 if (save(self, callable, 0) < 0 ||
4207 save(self, argtup, 0) < 0 ||
4208 _Pickler_Write(self, &reduce_op, 1) < 0)
4209 return -1;
4210 }
4211
4212 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4213 the caller do not want to memoize the object. Not particularly useful,
4214 but that is to mimic the behavior save_reduce() in pickle.py when
4215 obj is None. */
4216 if (obj != NULL) {
4217 /* If the object is already in the memo, this means it is
4218 recursive. In this case, throw away everything we put on the
4219 stack, and fetch the object back from the memo. */
4220 if (PyMemoTable_Get(self->memo, obj)) {
4221 const char pop_op = POP;
4222
4223 if (_Pickler_Write(self, &pop_op, 1) < 0)
4224 return -1;
4225 if (memo_get(self, obj) < 0)
4226 return -1;
4227
4228 return 0;
4229 }
4230 else if (memo_put(self, obj) < 0)
4231 return -1;
4232 }
4233
4234 if (listitems && batch_list(self, listitems) < 0)
4235 return -1;
4236
4237 if (dictitems && batch_dict(self, dictitems) < 0)
4238 return -1;
4239
4240 if (state) {
4241 if (state_setter == NULL) {
4242 if (save(self, state, 0) < 0 ||
4243 _Pickler_Write(self, &build_op, 1) < 0)
4244 return -1;
4245 }
4246 else {
4247
4248 /* If a state_setter is specified, call it instead of load_build to
4249 * update obj's with its previous state.
4250 * The first 4 save/write instructions push state_setter and its
4251 * tuple of expected arguments (obj, state) onto the stack. The
4252 * REDUCE opcode triggers the state_setter(obj, state) function
4253 * call. Finally, because state-updating routines only do in-place
4254 * modification, the whole operation has to be stack-transparent.
4255 * Thus, we finally pop the call's output from the stack.*/
4256
4257 const char tupletwo_op = TUPLE2;
4258 const char pop_op = POP;
4259 if (save(self, state_setter, 0) < 0 ||
4260 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4261 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4262 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4263 _Pickler_Write(self, &pop_op, 1) < 0)
4264 return -1;
4265 }
4266 }
4267 return 0;
4268 }
4269
4270 static int
save(PicklerObject * self,PyObject * obj,int pers_save)4271 save(PicklerObject *self, PyObject *obj, int pers_save)
4272 {
4273 PyTypeObject *type;
4274 PyObject *reduce_func = NULL;
4275 PyObject *reduce_value = NULL;
4276 int status = 0;
4277
4278 if (_Pickler_OpcodeBoundary(self) < 0)
4279 return -1;
4280
4281 /* The extra pers_save argument is necessary to avoid calling save_pers()
4282 on its returned object. */
4283 if (!pers_save && self->pers_func) {
4284 /* save_pers() returns:
4285 -1 to signal an error;
4286 0 if it did nothing successfully;
4287 1 if a persistent id was saved.
4288 */
4289 if ((status = save_pers(self, obj)) != 0)
4290 return status;
4291 }
4292
4293 type = Py_TYPE(obj);
4294
4295 /* The old cPickle had an optimization that used switch-case statement
4296 dispatching on the first letter of the type name. This has was removed
4297 since benchmarks shown that this optimization was actually slowing
4298 things down. */
4299
4300 /* Atom types; these aren't memoized, so don't check the memo. */
4301
4302 if (obj == Py_None) {
4303 return save_none(self, obj);
4304 }
4305 else if (obj == Py_False || obj == Py_True) {
4306 return save_bool(self, obj);
4307 }
4308 else if (type == &PyLong_Type) {
4309 return save_long(self, obj);
4310 }
4311 else if (type == &PyFloat_Type) {
4312 return save_float(self, obj);
4313 }
4314
4315 /* Check the memo to see if it has the object. If so, generate
4316 a GET (or BINGET) opcode, instead of pickling the object
4317 once again. */
4318 if (PyMemoTable_Get(self->memo, obj)) {
4319 return memo_get(self, obj);
4320 }
4321
4322 if (type == &PyBytes_Type) {
4323 return save_bytes(self, obj);
4324 }
4325 else if (type == &PyUnicode_Type) {
4326 return save_unicode(self, obj);
4327 }
4328
4329 /* We're only calling _Py_EnterRecursiveCall here so that atomic
4330 types above are pickled faster. */
4331 if (_Py_EnterRecursiveCall(" while pickling an object")) {
4332 return -1;
4333 }
4334
4335 if (type == &PyDict_Type) {
4336 status = save_dict(self, obj);
4337 goto done;
4338 }
4339 else if (type == &PySet_Type) {
4340 status = save_set(self, obj);
4341 goto done;
4342 }
4343 else if (type == &PyFrozenSet_Type) {
4344 status = save_frozenset(self, obj);
4345 goto done;
4346 }
4347 else if (type == &PyList_Type) {
4348 status = save_list(self, obj);
4349 goto done;
4350 }
4351 else if (type == &PyTuple_Type) {
4352 status = save_tuple(self, obj);
4353 goto done;
4354 }
4355 else if (type == &PyByteArray_Type) {
4356 status = save_bytearray(self, obj);
4357 goto done;
4358 }
4359 else if (type == &PyPickleBuffer_Type) {
4360 status = save_picklebuffer(self, obj);
4361 goto done;
4362 }
4363
4364 /* Now, check reducer_override. If it returns NotImplemented,
4365 * fallback to save_type or save_global, and then perhaps to the
4366 * regular reduction mechanism.
4367 */
4368 if (self->reducer_override != NULL) {
4369 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
4370 if (reduce_value == NULL) {
4371 goto error;
4372 }
4373 if (reduce_value != Py_NotImplemented) {
4374 goto reduce;
4375 }
4376 Py_DECREF(reduce_value);
4377 reduce_value = NULL;
4378 }
4379
4380 if (type == &PyType_Type) {
4381 status = save_type(self, obj);
4382 goto done;
4383 }
4384 else if (type == &PyFunction_Type) {
4385 status = save_global(self, obj, NULL);
4386 goto done;
4387 }
4388
4389 /* XXX: This part needs some unit tests. */
4390
4391 /* Get a reduction callable, and call it. This may come from
4392 * self.dispatch_table, copyreg.dispatch_table, the object's
4393 * __reduce_ex__ method, or the object's __reduce__ method.
4394 */
4395 if (self->dispatch_table == NULL) {
4396 PickleState *st = _Pickle_GetGlobalState();
4397 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4398 (PyObject *)type);
4399 if (reduce_func == NULL) {
4400 if (PyErr_Occurred()) {
4401 goto error;
4402 }
4403 } else {
4404 /* PyDict_GetItemWithError() returns a borrowed reference.
4405 Increase the reference count to be consistent with
4406 PyObject_GetItem and _PyObject_GetAttrId used below. */
4407 Py_INCREF(reduce_func);
4408 }
4409 } else {
4410 reduce_func = PyObject_GetItem(self->dispatch_table,
4411 (PyObject *)type);
4412 if (reduce_func == NULL) {
4413 if (PyErr_ExceptionMatches(PyExc_KeyError))
4414 PyErr_Clear();
4415 else
4416 goto error;
4417 }
4418 }
4419 if (reduce_func != NULL) {
4420 Py_INCREF(obj);
4421 reduce_value = _Pickle_FastCall(reduce_func, obj);
4422 }
4423 else if (PyType_IsSubtype(type, &PyType_Type)) {
4424 status = save_global(self, obj, NULL);
4425 goto done;
4426 }
4427 else {
4428 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4429 automatically defined as __reduce__. While this is convenient, this
4430 make it impossible to know which method was actually called. Of
4431 course, this is not a big deal. But still, it would be nice to let
4432 the user know which method was called when something go
4433 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4434 don't actually have to check for a __reduce__ method. */
4435
4436 /* Check for a __reduce_ex__ method. */
4437 if (_PyObject_LookupAttr(obj, &_Py_ID(__reduce_ex__), &reduce_func) < 0) {
4438 goto error;
4439 }
4440 if (reduce_func != NULL) {
4441 PyObject *proto;
4442 proto = PyLong_FromLong(self->proto);
4443 if (proto != NULL) {
4444 reduce_value = _Pickle_FastCall(reduce_func, proto);
4445 }
4446 }
4447 else {
4448 /* Check for a __reduce__ method. */
4449 if (_PyObject_LookupAttr(obj, &_Py_ID(__reduce__), &reduce_func) < 0) {
4450 goto error;
4451 }
4452 if (reduce_func != NULL) {
4453 reduce_value = PyObject_CallNoArgs(reduce_func);
4454 }
4455 else {
4456 PickleState *st = _Pickle_GetGlobalState();
4457 PyErr_Format(st->PicklingError,
4458 "can't pickle '%.200s' object: %R",
4459 type->tp_name, obj);
4460 goto error;
4461 }
4462 }
4463 }
4464
4465 if (reduce_value == NULL)
4466 goto error;
4467
4468 reduce:
4469 if (PyUnicode_Check(reduce_value)) {
4470 status = save_global(self, obj, reduce_value);
4471 goto done;
4472 }
4473
4474 if (!PyTuple_Check(reduce_value)) {
4475 PickleState *st = _Pickle_GetGlobalState();
4476 PyErr_SetString(st->PicklingError,
4477 "__reduce__ must return a string or tuple");
4478 goto error;
4479 }
4480
4481 status = save_reduce(self, reduce_value, obj);
4482
4483 if (0) {
4484 error:
4485 status = -1;
4486 }
4487 done:
4488
4489 _Py_LeaveRecursiveCall();
4490 Py_XDECREF(reduce_func);
4491 Py_XDECREF(reduce_value);
4492
4493 return status;
4494 }
4495
4496 static int
dump(PicklerObject * self,PyObject * obj)4497 dump(PicklerObject *self, PyObject *obj)
4498 {
4499 const char stop_op = STOP;
4500 int status = -1;
4501 PyObject *tmp;
4502
4503 if (_PyObject_LookupAttr((PyObject *)self, &_Py_ID(reducer_override),
4504 &tmp) < 0) {
4505 goto error;
4506 }
4507 /* Cache the reducer_override method, if it exists. */
4508 if (tmp != NULL) {
4509 Py_XSETREF(self->reducer_override, tmp);
4510 }
4511 else {
4512 Py_CLEAR(self->reducer_override);
4513 }
4514
4515 if (self->proto >= 2) {
4516 char header[2];
4517
4518 header[0] = PROTO;
4519 assert(self->proto >= 0 && self->proto < 256);
4520 header[1] = (unsigned char)self->proto;
4521 if (_Pickler_Write(self, header, 2) < 0)
4522 goto error;
4523 if (self->proto >= 4)
4524 self->framing = 1;
4525 }
4526
4527 if (save(self, obj, 0) < 0 ||
4528 _Pickler_Write(self, &stop_op, 1) < 0 ||
4529 _Pickler_CommitFrame(self) < 0)
4530 goto error;
4531
4532 // Success
4533 status = 0;
4534
4535 error:
4536 self->framing = 0;
4537
4538 /* Break the reference cycle we generated at the beginning this function
4539 * call when setting the reducer_override attribute of the Pickler instance
4540 * to a bound method of the same instance. This is important as the Pickler
4541 * instance holds a reference to each object it has pickled (through its
4542 * memo): thus, these objects won't be garbage-collected as long as the
4543 * Pickler itself is not collected. */
4544 Py_CLEAR(self->reducer_override);
4545 return status;
4546 }
4547
4548 /*[clinic input]
4549
4550 _pickle.Pickler.clear_memo
4551
4552 Clears the pickler's "memo".
4553
4554 The memo is the data structure that remembers which objects the
4555 pickler has already seen, so that shared or recursive objects are
4556 pickled by reference and not by value. This method is useful when
4557 re-using picklers.
4558 [clinic start generated code]*/
4559
4560 static PyObject *
_pickle_Pickler_clear_memo_impl(PicklerObject * self)4561 _pickle_Pickler_clear_memo_impl(PicklerObject *self)
4562 /*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
4563 {
4564 if (self->memo)
4565 PyMemoTable_Clear(self->memo);
4566
4567 Py_RETURN_NONE;
4568 }
4569
4570 /*[clinic input]
4571
4572 _pickle.Pickler.dump
4573
4574 obj: object
4575 /
4576
4577 Write a pickled representation of the given object to the open file.
4578 [clinic start generated code]*/
4579
4580 static PyObject *
_pickle_Pickler_dump(PicklerObject * self,PyObject * obj)4581 _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
4582 /*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
4583 {
4584 /* Check whether the Pickler was initialized correctly (issue3664).
4585 Developers often forget to call __init__() in their subclasses, which
4586 would trigger a segfault without this check. */
4587 if (self->write == NULL) {
4588 PickleState *st = _Pickle_GetGlobalState();
4589 PyErr_Format(st->PicklingError,
4590 "Pickler.__init__() was not called by %s.__init__()",
4591 Py_TYPE(self)->tp_name);
4592 return NULL;
4593 }
4594
4595 if (_Pickler_ClearBuffer(self) < 0)
4596 return NULL;
4597
4598 if (dump(self, obj) < 0)
4599 return NULL;
4600
4601 if (_Pickler_FlushToFile(self) < 0)
4602 return NULL;
4603
4604 Py_RETURN_NONE;
4605 }
4606
4607 /*[clinic input]
4608
4609 _pickle.Pickler.__sizeof__ -> Py_ssize_t
4610
4611 Returns size in memory, in bytes.
4612 [clinic start generated code]*/
4613
4614 static Py_ssize_t
_pickle_Pickler___sizeof___impl(PicklerObject * self)4615 _pickle_Pickler___sizeof___impl(PicklerObject *self)
4616 /*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4617 {
4618 Py_ssize_t res, s;
4619
4620 res = _PyObject_SIZE(Py_TYPE(self));
4621 if (self->memo != NULL) {
4622 res += sizeof(PyMemoTable);
4623 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4624 }
4625 if (self->output_buffer != NULL) {
4626 s = _PySys_GetSizeOf(self->output_buffer);
4627 if (s == -1)
4628 return -1;
4629 res += s;
4630 }
4631 return res;
4632 }
4633
4634 static struct PyMethodDef Pickler_methods[] = {
4635 _PICKLE_PICKLER_DUMP_METHODDEF
4636 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
4637 _PICKLE_PICKLER___SIZEOF___METHODDEF
4638 {NULL, NULL} /* sentinel */
4639 };
4640
4641 static void
Pickler_dealloc(PicklerObject * self)4642 Pickler_dealloc(PicklerObject *self)
4643 {
4644 PyObject_GC_UnTrack(self);
4645
4646 Py_XDECREF(self->output_buffer);
4647 Py_XDECREF(self->write);
4648 Py_XDECREF(self->pers_func);
4649 Py_XDECREF(self->dispatch_table);
4650 Py_XDECREF(self->fast_memo);
4651 Py_XDECREF(self->reducer_override);
4652 Py_XDECREF(self->buffer_callback);
4653
4654 PyMemoTable_Del(self->memo);
4655
4656 Py_TYPE(self)->tp_free((PyObject *)self);
4657 }
4658
4659 static int
Pickler_traverse(PicklerObject * self,visitproc visit,void * arg)4660 Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4661 {
4662 Py_VISIT(self->write);
4663 Py_VISIT(self->pers_func);
4664 Py_VISIT(self->dispatch_table);
4665 Py_VISIT(self->fast_memo);
4666 Py_VISIT(self->reducer_override);
4667 Py_VISIT(self->buffer_callback);
4668 return 0;
4669 }
4670
4671 static int
Pickler_clear(PicklerObject * self)4672 Pickler_clear(PicklerObject *self)
4673 {
4674 Py_CLEAR(self->output_buffer);
4675 Py_CLEAR(self->write);
4676 Py_CLEAR(self->pers_func);
4677 Py_CLEAR(self->dispatch_table);
4678 Py_CLEAR(self->fast_memo);
4679 Py_CLEAR(self->reducer_override);
4680 Py_CLEAR(self->buffer_callback);
4681
4682 if (self->memo != NULL) {
4683 PyMemoTable *memo = self->memo;
4684 self->memo = NULL;
4685 PyMemoTable_Del(memo);
4686 }
4687 return 0;
4688 }
4689
4690
4691 /*[clinic input]
4692
4693 _pickle.Pickler.__init__
4694
4695 file: object
4696 protocol: object = None
4697 fix_imports: bool = True
4698 buffer_callback: object = None
4699
4700 This takes a binary file for writing a pickle data stream.
4701
4702 The optional *protocol* argument tells the pickler to use the given
4703 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4704 protocol is 4. It was introduced in Python 3.4, and is incompatible
4705 with previous versions.
4706
4707 Specifying a negative protocol version selects the highest protocol
4708 version supported. The higher the protocol used, the more recent the
4709 version of Python needed to read the pickle produced.
4710
4711 The *file* argument must have a write() method that accepts a single
4712 bytes argument. It can thus be a file object opened for binary
4713 writing, an io.BytesIO instance, or any other custom object that meets
4714 this interface.
4715
4716 If *fix_imports* is True and protocol is less than 3, pickle will try
4717 to map the new Python 3 names to the old module names used in Python
4718 2, so that the pickle data stream is readable with Python 2.
4719
4720 If *buffer_callback* is None (the default), buffer views are
4721 serialized into *file* as part of the pickle stream.
4722
4723 If *buffer_callback* is not None, then it can be called any number
4724 of times with a buffer view. If the callback returns a false value
4725 (such as None), the given buffer is out-of-band; otherwise the
4726 buffer is serialized in-band, i.e. inside the pickle stream.
4727
4728 It is an error if *buffer_callback* is not None and *protocol*
4729 is None or smaller than 5.
4730
4731 [clinic start generated code]*/
4732
4733 static int
_pickle_Pickler___init___impl(PicklerObject * self,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)4734 _pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4735 PyObject *protocol, int fix_imports,
4736 PyObject *buffer_callback)
4737 /*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
4738 {
4739 /* In case of multiple __init__() calls, clear previous content. */
4740 if (self->write != NULL)
4741 (void)Pickler_clear(self);
4742
4743 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
4744 return -1;
4745
4746 if (_Pickler_SetOutputStream(self, file) < 0)
4747 return -1;
4748
4749 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4750 return -1;
4751
4752 /* memo and output_buffer may have already been created in _Pickler_New */
4753 if (self->memo == NULL) {
4754 self->memo = PyMemoTable_New();
4755 if (self->memo == NULL)
4756 return -1;
4757 }
4758 self->output_len = 0;
4759 if (self->output_buffer == NULL) {
4760 self->max_output_len = WRITE_BUF_SIZE;
4761 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4762 self->max_output_len);
4763 if (self->output_buffer == NULL)
4764 return -1;
4765 }
4766
4767 self->fast = 0;
4768 self->fast_nesting = 0;
4769 self->fast_memo = NULL;
4770
4771 if (init_method_ref((PyObject *)self, &_Py_ID(persistent_id),
4772 &self->pers_func, &self->pers_func_self) < 0)
4773 {
4774 return -1;
4775 }
4776 if (self->dispatch_table != NULL) {
4777 return 0;
4778 }
4779 if (_PyObject_LookupAttr((PyObject *)self, &_Py_ID(dispatch_table),
4780 &self->dispatch_table) < 0) {
4781 return -1;
4782 }
4783
4784 return 0;
4785 }
4786
4787
4788 /* Define a proxy object for the Pickler's internal memo object. This is to
4789 * avoid breaking code like:
4790 * pickler.memo.clear()
4791 * and
4792 * pickler.memo = saved_memo
4793 * Is this a good idea? Not really, but we don't want to break code that uses
4794 * it. Note that we don't implement the entire mapping API here. This is
4795 * intentional, as these should be treated as black-box implementation details.
4796 */
4797
4798 /*[clinic input]
4799 _pickle.PicklerMemoProxy.clear
4800
4801 Remove all items from memo.
4802 [clinic start generated code]*/
4803
4804 static PyObject *
_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject * self)4805 _pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
4806 /*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
4807 {
4808 if (self->pickler->memo)
4809 PyMemoTable_Clear(self->pickler->memo);
4810 Py_RETURN_NONE;
4811 }
4812
4813 /*[clinic input]
4814 _pickle.PicklerMemoProxy.copy
4815
4816 Copy the memo to a new object.
4817 [clinic start generated code]*/
4818
4819 static PyObject *
_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject * self)4820 _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
4821 /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
4822 {
4823 PyMemoTable *memo;
4824 PyObject *new_memo = PyDict_New();
4825 if (new_memo == NULL)
4826 return NULL;
4827
4828 memo = self->pickler->memo;
4829 for (size_t i = 0; i < memo->mt_allocated; ++i) {
4830 PyMemoEntry entry = memo->mt_table[i];
4831 if (entry.me_key != NULL) {
4832 int status;
4833 PyObject *key, *value;
4834
4835 key = PyLong_FromVoidPtr(entry.me_key);
4836 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
4837
4838 if (key == NULL || value == NULL) {
4839 Py_XDECREF(key);
4840 Py_XDECREF(value);
4841 goto error;
4842 }
4843 status = PyDict_SetItem(new_memo, key, value);
4844 Py_DECREF(key);
4845 Py_DECREF(value);
4846 if (status < 0)
4847 goto error;
4848 }
4849 }
4850 return new_memo;
4851
4852 error:
4853 Py_XDECREF(new_memo);
4854 return NULL;
4855 }
4856
4857 /*[clinic input]
4858 _pickle.PicklerMemoProxy.__reduce__
4859
4860 Implement pickle support.
4861 [clinic start generated code]*/
4862
4863 static PyObject *
_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject * self)4864 _pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
4865 /*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
4866 {
4867 PyObject *reduce_value, *dict_args;
4868 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
4869 if (contents == NULL)
4870 return NULL;
4871
4872 reduce_value = PyTuple_New(2);
4873 if (reduce_value == NULL) {
4874 Py_DECREF(contents);
4875 return NULL;
4876 }
4877 dict_args = PyTuple_New(1);
4878 if (dict_args == NULL) {
4879 Py_DECREF(contents);
4880 Py_DECREF(reduce_value);
4881 return NULL;
4882 }
4883 PyTuple_SET_ITEM(dict_args, 0, contents);
4884 Py_INCREF((PyObject *)&PyDict_Type);
4885 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4886 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4887 return reduce_value;
4888 }
4889
4890 static PyMethodDef picklerproxy_methods[] = {
4891 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4892 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4893 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
4894 {NULL, NULL} /* sentinel */
4895 };
4896
4897 static void
PicklerMemoProxy_dealloc(PicklerMemoProxyObject * self)4898 PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4899 {
4900 PyObject_GC_UnTrack(self);
4901 Py_XDECREF(self->pickler);
4902 PyObject_GC_Del((PyObject *)self);
4903 }
4904
4905 static int
PicklerMemoProxy_traverse(PicklerMemoProxyObject * self,visitproc visit,void * arg)4906 PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4907 visitproc visit, void *arg)
4908 {
4909 Py_VISIT(self->pickler);
4910 return 0;
4911 }
4912
4913 static int
PicklerMemoProxy_clear(PicklerMemoProxyObject * self)4914 PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4915 {
4916 Py_CLEAR(self->pickler);
4917 return 0;
4918 }
4919
4920 static PyTypeObject PicklerMemoProxyType = {
4921 PyVarObject_HEAD_INIT(NULL, 0)
4922 "_pickle.PicklerMemoProxy", /*tp_name*/
4923 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4924 0,
4925 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4926 0, /* tp_vectorcall_offset */
4927 0, /* tp_getattr */
4928 0, /* tp_setattr */
4929 0, /* tp_as_async */
4930 0, /* tp_repr */
4931 0, /* tp_as_number */
4932 0, /* tp_as_sequence */
4933 0, /* tp_as_mapping */
4934 PyObject_HashNotImplemented, /* tp_hash */
4935 0, /* tp_call */
4936 0, /* tp_str */
4937 PyObject_GenericGetAttr, /* tp_getattro */
4938 PyObject_GenericSetAttr, /* tp_setattro */
4939 0, /* tp_as_buffer */
4940 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4941 0, /* tp_doc */
4942 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4943 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4944 0, /* tp_richcompare */
4945 0, /* tp_weaklistoffset */
4946 0, /* tp_iter */
4947 0, /* tp_iternext */
4948 picklerproxy_methods, /* tp_methods */
4949 };
4950
4951 static PyObject *
PicklerMemoProxy_New(PicklerObject * pickler)4952 PicklerMemoProxy_New(PicklerObject *pickler)
4953 {
4954 PicklerMemoProxyObject *self;
4955
4956 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4957 if (self == NULL)
4958 return NULL;
4959 Py_INCREF(pickler);
4960 self->pickler = pickler;
4961 PyObject_GC_Track(self);
4962 return (PyObject *)self;
4963 }
4964
4965 /*****************************************************************************/
4966
4967 static PyObject *
Pickler_get_memo(PicklerObject * self,void * Py_UNUSED (ignored))4968 Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
4969 {
4970 return PicklerMemoProxy_New(self);
4971 }
4972
4973 static int
Pickler_set_memo(PicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))4974 Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
4975 {
4976 PyMemoTable *new_memo = NULL;
4977
4978 if (obj == NULL) {
4979 PyErr_SetString(PyExc_TypeError,
4980 "attribute deletion is not supported");
4981 return -1;
4982 }
4983
4984 if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
4985 PicklerObject *pickler =
4986 ((PicklerMemoProxyObject *)obj)->pickler;
4987
4988 new_memo = PyMemoTable_Copy(pickler->memo);
4989 if (new_memo == NULL)
4990 return -1;
4991 }
4992 else if (PyDict_Check(obj)) {
4993 Py_ssize_t i = 0;
4994 PyObject *key, *value;
4995
4996 new_memo = PyMemoTable_New();
4997 if (new_memo == NULL)
4998 return -1;
4999
5000 while (PyDict_Next(obj, &i, &key, &value)) {
5001 Py_ssize_t memo_id;
5002 PyObject *memo_obj;
5003
5004 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
5005 PyErr_SetString(PyExc_TypeError,
5006 "'memo' values must be 2-item tuples");
5007 goto error;
5008 }
5009 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
5010 if (memo_id == -1 && PyErr_Occurred())
5011 goto error;
5012 memo_obj = PyTuple_GET_ITEM(value, 1);
5013 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5014 goto error;
5015 }
5016 }
5017 else {
5018 PyErr_Format(PyExc_TypeError,
5019 "'memo' attribute must be a PicklerMemoProxy object "
5020 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
5021 return -1;
5022 }
5023
5024 PyMemoTable_Del(self->memo);
5025 self->memo = new_memo;
5026
5027 return 0;
5028
5029 error:
5030 if (new_memo)
5031 PyMemoTable_Del(new_memo);
5032 return -1;
5033 }
5034
5035 static PyObject *
Pickler_get_persid(PicklerObject * self,void * Py_UNUSED (ignored))5036 Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
5037 {
5038 if (self->pers_func == NULL) {
5039 PyErr_SetString(PyExc_AttributeError, "persistent_id");
5040 return NULL;
5041 }
5042 return reconstruct_method(self->pers_func, self->pers_func_self);
5043 }
5044
5045 static int
Pickler_set_persid(PicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))5046 Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
5047 {
5048 if (value == NULL) {
5049 PyErr_SetString(PyExc_TypeError,
5050 "attribute deletion is not supported");
5051 return -1;
5052 }
5053 if (!PyCallable_Check(value)) {
5054 PyErr_SetString(PyExc_TypeError,
5055 "persistent_id must be a callable taking one argument");
5056 return -1;
5057 }
5058
5059 self->pers_func_self = NULL;
5060 Py_INCREF(value);
5061 Py_XSETREF(self->pers_func, value);
5062
5063 return 0;
5064 }
5065
5066 static PyMemberDef Pickler_members[] = {
5067 {"bin", T_INT, offsetof(PicklerObject, bin)},
5068 {"fast", T_INT, offsetof(PicklerObject, fast)},
5069 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
5070 {NULL}
5071 };
5072
5073 static PyGetSetDef Pickler_getsets[] = {
5074 {"memo", (getter)Pickler_get_memo,
5075 (setter)Pickler_set_memo},
5076 {"persistent_id", (getter)Pickler_get_persid,
5077 (setter)Pickler_set_persid},
5078 {NULL}
5079 };
5080
5081 static PyTypeObject Pickler_Type = {
5082 PyVarObject_HEAD_INIT(NULL, 0)
5083 "_pickle.Pickler" , /*tp_name*/
5084 sizeof(PicklerObject), /*tp_basicsize*/
5085 0, /*tp_itemsize*/
5086 (destructor)Pickler_dealloc, /*tp_dealloc*/
5087 0, /*tp_vectorcall_offset*/
5088 0, /*tp_getattr*/
5089 0, /*tp_setattr*/
5090 0, /*tp_as_async*/
5091 0, /*tp_repr*/
5092 0, /*tp_as_number*/
5093 0, /*tp_as_sequence*/
5094 0, /*tp_as_mapping*/
5095 0, /*tp_hash*/
5096 0, /*tp_call*/
5097 0, /*tp_str*/
5098 0, /*tp_getattro*/
5099 0, /*tp_setattro*/
5100 0, /*tp_as_buffer*/
5101 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5102 _pickle_Pickler___init____doc__, /*tp_doc*/
5103 (traverseproc)Pickler_traverse, /*tp_traverse*/
5104 (inquiry)Pickler_clear, /*tp_clear*/
5105 0, /*tp_richcompare*/
5106 0, /*tp_weaklistoffset*/
5107 0, /*tp_iter*/
5108 0, /*tp_iternext*/
5109 Pickler_methods, /*tp_methods*/
5110 Pickler_members, /*tp_members*/
5111 Pickler_getsets, /*tp_getset*/
5112 0, /*tp_base*/
5113 0, /*tp_dict*/
5114 0, /*tp_descr_get*/
5115 0, /*tp_descr_set*/
5116 0, /*tp_dictoffset*/
5117 _pickle_Pickler___init__, /*tp_init*/
5118 PyType_GenericAlloc, /*tp_alloc*/
5119 PyType_GenericNew, /*tp_new*/
5120 PyObject_GC_Del, /*tp_free*/
5121 0, /*tp_is_gc*/
5122 };
5123
5124 /* Temporary helper for calling self.find_class().
5125
5126 XXX: It would be nice to able to avoid Python function call overhead, by
5127 using directly the C version of find_class(), when find_class() is not
5128 overridden by a subclass. Although, this could become rather hackish. A
5129 simpler optimization would be to call the C function when self is not a
5130 subclass instance. */
5131 static PyObject *
find_class(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)5132 find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5133 {
5134 return PyObject_CallMethodObjArgs((PyObject *)self, &_Py_ID(find_class),
5135 module_name, global_name, NULL);
5136 }
5137
5138 static Py_ssize_t
marker(UnpicklerObject * self)5139 marker(UnpicklerObject *self)
5140 {
5141 Py_ssize_t mark;
5142
5143 if (self->num_marks < 1) {
5144 PickleState *st = _Pickle_GetGlobalState();
5145 PyErr_SetString(st->UnpicklingError, "could not find MARK");
5146 return -1;
5147 }
5148
5149 mark = self->marks[--self->num_marks];
5150 self->stack->mark_set = self->num_marks != 0;
5151 self->stack->fence = self->num_marks ?
5152 self->marks[self->num_marks - 1] : 0;
5153 return mark;
5154 }
5155
5156 static int
load_none(UnpicklerObject * self)5157 load_none(UnpicklerObject *self)
5158 {
5159 PDATA_APPEND(self->stack, Py_None, -1);
5160 return 0;
5161 }
5162
5163 static int
load_int(UnpicklerObject * self)5164 load_int(UnpicklerObject *self)
5165 {
5166 PyObject *value;
5167 char *endptr, *s;
5168 Py_ssize_t len;
5169 long x;
5170
5171 if ((len = _Unpickler_Readline(self, &s)) < 0)
5172 return -1;
5173 if (len < 2)
5174 return bad_readline();
5175
5176 errno = 0;
5177 /* XXX: Should the base argument of strtol() be explicitly set to 10?
5178 XXX(avassalotti): Should this uses PyOS_strtol()? */
5179 x = strtol(s, &endptr, 0);
5180
5181 if (errno || (*endptr != '\n' && *endptr != '\0')) {
5182 /* Hm, maybe we've got something long. Let's try reading
5183 * it as a Python int object. */
5184 errno = 0;
5185 /* XXX: Same thing about the base here. */
5186 value = PyLong_FromString(s, NULL, 0);
5187 if (value == NULL) {
5188 PyErr_SetString(PyExc_ValueError,
5189 "could not convert string to int");
5190 return -1;
5191 }
5192 }
5193 else {
5194 if (len == 3 && (x == 0 || x == 1)) {
5195 if ((value = PyBool_FromLong(x)) == NULL)
5196 return -1;
5197 }
5198 else {
5199 if ((value = PyLong_FromLong(x)) == NULL)
5200 return -1;
5201 }
5202 }
5203
5204 PDATA_PUSH(self->stack, value, -1);
5205 return 0;
5206 }
5207
5208 static int
load_bool(UnpicklerObject * self,PyObject * boolean)5209 load_bool(UnpicklerObject *self, PyObject *boolean)
5210 {
5211 assert(boolean == Py_True || boolean == Py_False);
5212 PDATA_APPEND(self->stack, boolean, -1);
5213 return 0;
5214 }
5215
5216 /* s contains x bytes of an unsigned little-endian integer. Return its value
5217 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5218 */
5219 static Py_ssize_t
calc_binsize(char * bytes,int nbytes)5220 calc_binsize(char *bytes, int nbytes)
5221 {
5222 unsigned char *s = (unsigned char *)bytes;
5223 int i;
5224 size_t x = 0;
5225
5226 if (nbytes > (int)sizeof(size_t)) {
5227 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5228 * have 64-bit size that can't be represented on 32-bit platform.
5229 */
5230 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5231 if (s[i])
5232 return -1;
5233 }
5234 nbytes = (int)sizeof(size_t);
5235 }
5236 for (i = 0; i < nbytes; i++) {
5237 x |= (size_t) s[i] << (8 * i);
5238 }
5239
5240 if (x > PY_SSIZE_T_MAX)
5241 return -1;
5242 else
5243 return (Py_ssize_t) x;
5244 }
5245
5246 /* s contains x bytes of a little-endian integer. Return its value as a
5247 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
5248 * int, but when x is 4 it's a signed one. This is a historical source
5249 * of x-platform bugs.
5250 */
5251 static long
calc_binint(char * bytes,int nbytes)5252 calc_binint(char *bytes, int nbytes)
5253 {
5254 unsigned char *s = (unsigned char *)bytes;
5255 Py_ssize_t i;
5256 long x = 0;
5257
5258 for (i = 0; i < nbytes; i++) {
5259 x |= (long)s[i] << (8 * i);
5260 }
5261
5262 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5263 * is signed, so on a box with longs bigger than 4 bytes we need
5264 * to extend a BININT's sign bit to the full width.
5265 */
5266 if (SIZEOF_LONG > 4 && nbytes == 4) {
5267 x |= -(x & (1L << 31));
5268 }
5269
5270 return x;
5271 }
5272
5273 static int
load_binintx(UnpicklerObject * self,char * s,int size)5274 load_binintx(UnpicklerObject *self, char *s, int size)
5275 {
5276 PyObject *value;
5277 long x;
5278
5279 x = calc_binint(s, size);
5280
5281 if ((value = PyLong_FromLong(x)) == NULL)
5282 return -1;
5283
5284 PDATA_PUSH(self->stack, value, -1);
5285 return 0;
5286 }
5287
5288 static int
load_binint(UnpicklerObject * self)5289 load_binint(UnpicklerObject *self)
5290 {
5291 char *s;
5292
5293 if (_Unpickler_Read(self, &s, 4) < 0)
5294 return -1;
5295
5296 return load_binintx(self, s, 4);
5297 }
5298
5299 static int
load_binint1(UnpicklerObject * self)5300 load_binint1(UnpicklerObject *self)
5301 {
5302 char *s;
5303
5304 if (_Unpickler_Read(self, &s, 1) < 0)
5305 return -1;
5306
5307 return load_binintx(self, s, 1);
5308 }
5309
5310 static int
load_binint2(UnpicklerObject * self)5311 load_binint2(UnpicklerObject *self)
5312 {
5313 char *s;
5314
5315 if (_Unpickler_Read(self, &s, 2) < 0)
5316 return -1;
5317
5318 return load_binintx(self, s, 2);
5319 }
5320
5321 static int
load_long(UnpicklerObject * self)5322 load_long(UnpicklerObject *self)
5323 {
5324 PyObject *value;
5325 char *s = NULL;
5326 Py_ssize_t len;
5327
5328 if ((len = _Unpickler_Readline(self, &s)) < 0)
5329 return -1;
5330 if (len < 2)
5331 return bad_readline();
5332
5333 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5334 the 'L' before calling PyLong_FromString. In order to maintain
5335 compatibility with Python 3.0.0, we don't actually *require*
5336 the 'L' to be present. */
5337 if (s[len-2] == 'L')
5338 s[len-2] = '\0';
5339 /* XXX: Should the base argument explicitly set to 10? */
5340 value = PyLong_FromString(s, NULL, 0);
5341 if (value == NULL)
5342 return -1;
5343
5344 PDATA_PUSH(self->stack, value, -1);
5345 return 0;
5346 }
5347
5348 /* 'size' bytes contain the # of bytes of little-endian 256's-complement
5349 * data following.
5350 */
5351 static int
load_counted_long(UnpicklerObject * self,int size)5352 load_counted_long(UnpicklerObject *self, int size)
5353 {
5354 PyObject *value;
5355 char *nbytes;
5356 char *pdata;
5357
5358 assert(size == 1 || size == 4);
5359 if (_Unpickler_Read(self, &nbytes, size) < 0)
5360 return -1;
5361
5362 size = calc_binint(nbytes, size);
5363 if (size < 0) {
5364 PickleState *st = _Pickle_GetGlobalState();
5365 /* Corrupt or hostile pickle -- we never write one like this */
5366 PyErr_SetString(st->UnpicklingError,
5367 "LONG pickle has negative byte count");
5368 return -1;
5369 }
5370
5371 if (size == 0)
5372 value = PyLong_FromLong(0L);
5373 else {
5374 /* Read the raw little-endian bytes and convert. */
5375 if (_Unpickler_Read(self, &pdata, size) < 0)
5376 return -1;
5377 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5378 1 /* little endian */ , 1 /* signed */ );
5379 }
5380 if (value == NULL)
5381 return -1;
5382 PDATA_PUSH(self->stack, value, -1);
5383 return 0;
5384 }
5385
5386 static int
load_float(UnpicklerObject * self)5387 load_float(UnpicklerObject *self)
5388 {
5389 PyObject *value;
5390 char *endptr, *s;
5391 Py_ssize_t len;
5392 double d;
5393
5394 if ((len = _Unpickler_Readline(self, &s)) < 0)
5395 return -1;
5396 if (len < 2)
5397 return bad_readline();
5398
5399 errno = 0;
5400 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5401 if (d == -1.0 && PyErr_Occurred())
5402 return -1;
5403 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
5404 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5405 return -1;
5406 }
5407 value = PyFloat_FromDouble(d);
5408 if (value == NULL)
5409 return -1;
5410
5411 PDATA_PUSH(self->stack, value, -1);
5412 return 0;
5413 }
5414
5415 static int
load_binfloat(UnpicklerObject * self)5416 load_binfloat(UnpicklerObject *self)
5417 {
5418 PyObject *value;
5419 double x;
5420 char *s;
5421
5422 if (_Unpickler_Read(self, &s, 8) < 0)
5423 return -1;
5424
5425 x = PyFloat_Unpack8(s, 0);
5426 if (x == -1.0 && PyErr_Occurred())
5427 return -1;
5428
5429 if ((value = PyFloat_FromDouble(x)) == NULL)
5430 return -1;
5431
5432 PDATA_PUSH(self->stack, value, -1);
5433 return 0;
5434 }
5435
5436 static int
load_string(UnpicklerObject * self)5437 load_string(UnpicklerObject *self)
5438 {
5439 PyObject *bytes;
5440 PyObject *obj;
5441 Py_ssize_t len;
5442 char *s, *p;
5443
5444 if ((len = _Unpickler_Readline(self, &s)) < 0)
5445 return -1;
5446 /* Strip the newline */
5447 len--;
5448 /* Strip outermost quotes */
5449 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
5450 p = s + 1;
5451 len -= 2;
5452 }
5453 else {
5454 PickleState *st = _Pickle_GetGlobalState();
5455 PyErr_SetString(st->UnpicklingError,
5456 "the STRING opcode argument must be quoted");
5457 return -1;
5458 }
5459 assert(len >= 0);
5460
5461 /* Use the PyBytes API to decode the string, since that is what is used
5462 to encode, and then coerce the result to Unicode. */
5463 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
5464 if (bytes == NULL)
5465 return -1;
5466
5467 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5468 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5469 if (strcmp(self->encoding, "bytes") == 0) {
5470 obj = bytes;
5471 }
5472 else {
5473 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5474 Py_DECREF(bytes);
5475 if (obj == NULL) {
5476 return -1;
5477 }
5478 }
5479
5480 PDATA_PUSH(self->stack, obj, -1);
5481 return 0;
5482 }
5483
5484 static int
load_counted_binstring(UnpicklerObject * self,int nbytes)5485 load_counted_binstring(UnpicklerObject *self, int nbytes)
5486 {
5487 PyObject *obj;
5488 Py_ssize_t size;
5489 char *s;
5490
5491 if (_Unpickler_Read(self, &s, nbytes) < 0)
5492 return -1;
5493
5494 size = calc_binsize(s, nbytes);
5495 if (size < 0) {
5496 PickleState *st = _Pickle_GetGlobalState();
5497 PyErr_Format(st->UnpicklingError,
5498 "BINSTRING exceeds system's maximum size of %zd bytes",
5499 PY_SSIZE_T_MAX);
5500 return -1;
5501 }
5502
5503 if (_Unpickler_Read(self, &s, size) < 0)
5504 return -1;
5505
5506 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5507 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5508 if (strcmp(self->encoding, "bytes") == 0) {
5509 obj = PyBytes_FromStringAndSize(s, size);
5510 }
5511 else {
5512 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5513 }
5514 if (obj == NULL) {
5515 return -1;
5516 }
5517
5518 PDATA_PUSH(self->stack, obj, -1);
5519 return 0;
5520 }
5521
5522 static int
load_counted_binbytes(UnpicklerObject * self,int nbytes)5523 load_counted_binbytes(UnpicklerObject *self, int nbytes)
5524 {
5525 PyObject *bytes;
5526 Py_ssize_t size;
5527 char *s;
5528
5529 if (_Unpickler_Read(self, &s, nbytes) < 0)
5530 return -1;
5531
5532 size = calc_binsize(s, nbytes);
5533 if (size < 0) {
5534 PyErr_Format(PyExc_OverflowError,
5535 "BINBYTES exceeds system's maximum size of %zd bytes",
5536 PY_SSIZE_T_MAX);
5537 return -1;
5538 }
5539
5540 bytes = PyBytes_FromStringAndSize(NULL, size);
5541 if (bytes == NULL)
5542 return -1;
5543 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5544 Py_DECREF(bytes);
5545 return -1;
5546 }
5547
5548 PDATA_PUSH(self->stack, bytes, -1);
5549 return 0;
5550 }
5551
5552 static int
load_counted_bytearray(UnpicklerObject * self)5553 load_counted_bytearray(UnpicklerObject *self)
5554 {
5555 PyObject *bytearray;
5556 Py_ssize_t size;
5557 char *s;
5558
5559 if (_Unpickler_Read(self, &s, 8) < 0) {
5560 return -1;
5561 }
5562
5563 size = calc_binsize(s, 8);
5564 if (size < 0) {
5565 PyErr_Format(PyExc_OverflowError,
5566 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5567 PY_SSIZE_T_MAX);
5568 return -1;
5569 }
5570
5571 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5572 if (bytearray == NULL) {
5573 return -1;
5574 }
5575 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5576 Py_DECREF(bytearray);
5577 return -1;
5578 }
5579
5580 PDATA_PUSH(self->stack, bytearray, -1);
5581 return 0;
5582 }
5583
5584 static int
load_next_buffer(UnpicklerObject * self)5585 load_next_buffer(UnpicklerObject *self)
5586 {
5587 if (self->buffers == NULL) {
5588 PickleState *st = _Pickle_GetGlobalState();
5589 PyErr_SetString(st->UnpicklingError,
5590 "pickle stream refers to out-of-band data "
5591 "but no *buffers* argument was given");
5592 return -1;
5593 }
5594 PyObject *buf = PyIter_Next(self->buffers);
5595 if (buf == NULL) {
5596 if (!PyErr_Occurred()) {
5597 PickleState *st = _Pickle_GetGlobalState();
5598 PyErr_SetString(st->UnpicklingError,
5599 "not enough out-of-band buffers");
5600 }
5601 return -1;
5602 }
5603
5604 PDATA_PUSH(self->stack, buf, -1);
5605 return 0;
5606 }
5607
5608 static int
load_readonly_buffer(UnpicklerObject * self)5609 load_readonly_buffer(UnpicklerObject *self)
5610 {
5611 Py_ssize_t len = Py_SIZE(self->stack);
5612 if (len <= self->stack->fence) {
5613 return Pdata_stack_underflow(self->stack);
5614 }
5615
5616 PyObject *obj = self->stack->data[len - 1];
5617 PyObject *view = PyMemoryView_FromObject(obj);
5618 if (view == NULL) {
5619 return -1;
5620 }
5621 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5622 /* Original object is writable */
5623 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5624 self->stack->data[len - 1] = view;
5625 Py_DECREF(obj);
5626 }
5627 else {
5628 /* Original object is read-only, no need to replace it */
5629 Py_DECREF(view);
5630 }
5631 return 0;
5632 }
5633
5634 static int
load_unicode(UnpicklerObject * self)5635 load_unicode(UnpicklerObject *self)
5636 {
5637 PyObject *str;
5638 Py_ssize_t len;
5639 char *s = NULL;
5640
5641 if ((len = _Unpickler_Readline(self, &s)) < 0)
5642 return -1;
5643 if (len < 1)
5644 return bad_readline();
5645
5646 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5647 if (str == NULL)
5648 return -1;
5649
5650 PDATA_PUSH(self->stack, str, -1);
5651 return 0;
5652 }
5653
5654 static int
load_counted_binunicode(UnpicklerObject * self,int nbytes)5655 load_counted_binunicode(UnpicklerObject *self, int nbytes)
5656 {
5657 PyObject *str;
5658 Py_ssize_t size;
5659 char *s;
5660
5661 if (_Unpickler_Read(self, &s, nbytes) < 0)
5662 return -1;
5663
5664 size = calc_binsize(s, nbytes);
5665 if (size < 0) {
5666 PyErr_Format(PyExc_OverflowError,
5667 "BINUNICODE exceeds system's maximum size of %zd bytes",
5668 PY_SSIZE_T_MAX);
5669 return -1;
5670 }
5671
5672 if (_Unpickler_Read(self, &s, size) < 0)
5673 return -1;
5674
5675 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
5676 if (str == NULL)
5677 return -1;
5678
5679 PDATA_PUSH(self->stack, str, -1);
5680 return 0;
5681 }
5682
5683 static int
load_counted_tuple(UnpicklerObject * self,Py_ssize_t len)5684 load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
5685 {
5686 PyObject *tuple;
5687
5688 if (Py_SIZE(self->stack) < len)
5689 return Pdata_stack_underflow(self->stack);
5690
5691 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
5692 if (tuple == NULL)
5693 return -1;
5694 PDATA_PUSH(self->stack, tuple, -1);
5695 return 0;
5696 }
5697
5698 static int
load_tuple(UnpicklerObject * self)5699 load_tuple(UnpicklerObject *self)
5700 {
5701 Py_ssize_t i;
5702
5703 if ((i = marker(self)) < 0)
5704 return -1;
5705
5706 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
5707 }
5708
5709 static int
load_empty_list(UnpicklerObject * self)5710 load_empty_list(UnpicklerObject *self)
5711 {
5712 PyObject *list;
5713
5714 if ((list = PyList_New(0)) == NULL)
5715 return -1;
5716 PDATA_PUSH(self->stack, list, -1);
5717 return 0;
5718 }
5719
5720 static int
load_empty_dict(UnpicklerObject * self)5721 load_empty_dict(UnpicklerObject *self)
5722 {
5723 PyObject *dict;
5724
5725 if ((dict = PyDict_New()) == NULL)
5726 return -1;
5727 PDATA_PUSH(self->stack, dict, -1);
5728 return 0;
5729 }
5730
5731 static int
load_empty_set(UnpicklerObject * self)5732 load_empty_set(UnpicklerObject *self)
5733 {
5734 PyObject *set;
5735
5736 if ((set = PySet_New(NULL)) == NULL)
5737 return -1;
5738 PDATA_PUSH(self->stack, set, -1);
5739 return 0;
5740 }
5741
5742 static int
load_list(UnpicklerObject * self)5743 load_list(UnpicklerObject *self)
5744 {
5745 PyObject *list;
5746 Py_ssize_t i;
5747
5748 if ((i = marker(self)) < 0)
5749 return -1;
5750
5751 list = Pdata_poplist(self->stack, i);
5752 if (list == NULL)
5753 return -1;
5754 PDATA_PUSH(self->stack, list, -1);
5755 return 0;
5756 }
5757
5758 static int
load_dict(UnpicklerObject * self)5759 load_dict(UnpicklerObject *self)
5760 {
5761 PyObject *dict, *key, *value;
5762 Py_ssize_t i, j, k;
5763
5764 if ((i = marker(self)) < 0)
5765 return -1;
5766 j = Py_SIZE(self->stack);
5767
5768 if ((dict = PyDict_New()) == NULL)
5769 return -1;
5770
5771 if ((j - i) % 2 != 0) {
5772 PickleState *st = _Pickle_GetGlobalState();
5773 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
5774 Py_DECREF(dict);
5775 return -1;
5776 }
5777
5778 for (k = i + 1; k < j; k += 2) {
5779 key = self->stack->data[k - 1];
5780 value = self->stack->data[k];
5781 if (PyDict_SetItem(dict, key, value) < 0) {
5782 Py_DECREF(dict);
5783 return -1;
5784 }
5785 }
5786 Pdata_clear(self->stack, i);
5787 PDATA_PUSH(self->stack, dict, -1);
5788 return 0;
5789 }
5790
5791 static int
load_frozenset(UnpicklerObject * self)5792 load_frozenset(UnpicklerObject *self)
5793 {
5794 PyObject *items;
5795 PyObject *frozenset;
5796 Py_ssize_t i;
5797
5798 if ((i = marker(self)) < 0)
5799 return -1;
5800
5801 items = Pdata_poptuple(self->stack, i);
5802 if (items == NULL)
5803 return -1;
5804
5805 frozenset = PyFrozenSet_New(items);
5806 Py_DECREF(items);
5807 if (frozenset == NULL)
5808 return -1;
5809
5810 PDATA_PUSH(self->stack, frozenset, -1);
5811 return 0;
5812 }
5813
5814 static PyObject *
instantiate(PyObject * cls,PyObject * args)5815 instantiate(PyObject *cls, PyObject *args)
5816 {
5817 /* Caller must assure args are a tuple. Normally, args come from
5818 Pdata_poptuple which packs objects from the top of the stack
5819 into a newly created tuple. */
5820 assert(PyTuple_Check(args));
5821 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5822 PyObject *func;
5823 if (_PyObject_LookupAttr(cls, &_Py_ID(__getinitargs__), &func) < 0) {
5824 return NULL;
5825 }
5826 if (func == NULL) {
5827 return PyObject_CallMethodOneArg(cls, &_Py_ID(__new__), cls);
5828 }
5829 Py_DECREF(func);
5830 }
5831 return PyObject_CallObject(cls, args);
5832 }
5833
5834 static int
load_obj(UnpicklerObject * self)5835 load_obj(UnpicklerObject *self)
5836 {
5837 PyObject *cls, *args, *obj = NULL;
5838 Py_ssize_t i;
5839
5840 if ((i = marker(self)) < 0)
5841 return -1;
5842
5843 if (Py_SIZE(self->stack) - i < 1)
5844 return Pdata_stack_underflow(self->stack);
5845
5846 args = Pdata_poptuple(self->stack, i + 1);
5847 if (args == NULL)
5848 return -1;
5849
5850 PDATA_POP(self->stack, cls);
5851 if (cls) {
5852 obj = instantiate(cls, args);
5853 Py_DECREF(cls);
5854 }
5855 Py_DECREF(args);
5856 if (obj == NULL)
5857 return -1;
5858
5859 PDATA_PUSH(self->stack, obj, -1);
5860 return 0;
5861 }
5862
5863 static int
load_inst(UnpicklerObject * self)5864 load_inst(UnpicklerObject *self)
5865 {
5866 PyObject *cls = NULL;
5867 PyObject *args = NULL;
5868 PyObject *obj = NULL;
5869 PyObject *module_name;
5870 PyObject *class_name;
5871 Py_ssize_t len;
5872 Py_ssize_t i;
5873 char *s;
5874
5875 if ((i = marker(self)) < 0)
5876 return -1;
5877 if ((len = _Unpickler_Readline(self, &s)) < 0)
5878 return -1;
5879 if (len < 2)
5880 return bad_readline();
5881
5882 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5883 identifiers are permitted in Python 3.0, since the INST opcode is only
5884 supported by older protocols on Python 2.x. */
5885 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5886 if (module_name == NULL)
5887 return -1;
5888
5889 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
5890 if (len < 2) {
5891 Py_DECREF(module_name);
5892 return bad_readline();
5893 }
5894 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5895 if (class_name != NULL) {
5896 cls = find_class(self, module_name, class_name);
5897 Py_DECREF(class_name);
5898 }
5899 }
5900 Py_DECREF(module_name);
5901
5902 if (cls == NULL)
5903 return -1;
5904
5905 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5906 obj = instantiate(cls, args);
5907 Py_DECREF(args);
5908 }
5909 Py_DECREF(cls);
5910
5911 if (obj == NULL)
5912 return -1;
5913
5914 PDATA_PUSH(self->stack, obj, -1);
5915 return 0;
5916 }
5917
5918 static void
newobj_unpickling_error(const char * msg,int use_kwargs,PyObject * arg)5919 newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
5920 {
5921 PickleState *st = _Pickle_GetGlobalState();
5922 PyErr_Format(st->UnpicklingError, msg,
5923 use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5924 Py_TYPE(arg)->tp_name);
5925 }
5926
5927 static int
load_newobj(UnpicklerObject * self,int use_kwargs)5928 load_newobj(UnpicklerObject *self, int use_kwargs)
5929 {
5930 PyObject *cls, *args, *kwargs = NULL;
5931 PyObject *obj;
5932
5933 /* Stack is ... cls args [kwargs], and we want to call
5934 * cls.__new__(cls, *args, **kwargs).
5935 */
5936 if (use_kwargs) {
5937 PDATA_POP(self->stack, kwargs);
5938 if (kwargs == NULL) {
5939 return -1;
5940 }
5941 }
5942 PDATA_POP(self->stack, args);
5943 if (args == NULL) {
5944 Py_XDECREF(kwargs);
5945 return -1;
5946 }
5947 PDATA_POP(self->stack, cls);
5948 if (cls == NULL) {
5949 Py_XDECREF(kwargs);
5950 Py_DECREF(args);
5951 return -1;
5952 }
5953
5954 if (!PyType_Check(cls)) {
5955 newobj_unpickling_error("%s class argument must be a type, not %.200s",
5956 use_kwargs, cls);
5957 goto error;
5958 }
5959 if (((PyTypeObject *)cls)->tp_new == NULL) {
5960 newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5961 use_kwargs, cls);
5962 goto error;
5963 }
5964 if (!PyTuple_Check(args)) {
5965 newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5966 use_kwargs, args);
5967 goto error;
5968 }
5969 if (use_kwargs && !PyDict_Check(kwargs)) {
5970 newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5971 use_kwargs, kwargs);
5972 goto error;
5973 }
5974
5975 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5976 if (obj == NULL) {
5977 goto error;
5978 }
5979 Py_XDECREF(kwargs);
5980 Py_DECREF(args);
5981 Py_DECREF(cls);
5982 PDATA_PUSH(self->stack, obj, -1);
5983 return 0;
5984
5985 error:
5986 Py_XDECREF(kwargs);
5987 Py_DECREF(args);
5988 Py_DECREF(cls);
5989 return -1;
5990 }
5991
5992 static int
load_global(UnpicklerObject * self)5993 load_global(UnpicklerObject *self)
5994 {
5995 PyObject *global = NULL;
5996 PyObject *module_name;
5997 PyObject *global_name;
5998 Py_ssize_t len;
5999 char *s;
6000
6001 if ((len = _Unpickler_Readline(self, &s)) < 0)
6002 return -1;
6003 if (len < 2)
6004 return bad_readline();
6005 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6006 if (!module_name)
6007 return -1;
6008
6009 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
6010 if (len < 2) {
6011 Py_DECREF(module_name);
6012 return bad_readline();
6013 }
6014 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6015 if (global_name) {
6016 global = find_class(self, module_name, global_name);
6017 Py_DECREF(global_name);
6018 }
6019 }
6020 Py_DECREF(module_name);
6021
6022 if (global == NULL)
6023 return -1;
6024 PDATA_PUSH(self->stack, global, -1);
6025 return 0;
6026 }
6027
6028 static int
load_stack_global(UnpicklerObject * self)6029 load_stack_global(UnpicklerObject *self)
6030 {
6031 PyObject *global;
6032 PyObject *module_name;
6033 PyObject *global_name;
6034
6035 PDATA_POP(self->stack, global_name);
6036 PDATA_POP(self->stack, module_name);
6037 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6038 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
6039 PickleState *st = _Pickle_GetGlobalState();
6040 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
6041 Py_XDECREF(global_name);
6042 Py_XDECREF(module_name);
6043 return -1;
6044 }
6045 global = find_class(self, module_name, global_name);
6046 Py_DECREF(global_name);
6047 Py_DECREF(module_name);
6048 if (global == NULL)
6049 return -1;
6050 PDATA_PUSH(self->stack, global, -1);
6051 return 0;
6052 }
6053
6054 static int
load_persid(UnpicklerObject * self)6055 load_persid(UnpicklerObject *self)
6056 {
6057 PyObject *pid, *obj;
6058 Py_ssize_t len;
6059 char *s;
6060
6061 if (self->pers_func) {
6062 if ((len = _Unpickler_Readline(self, &s)) < 0)
6063 return -1;
6064 if (len < 1)
6065 return bad_readline();
6066
6067 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6068 if (pid == NULL) {
6069 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6070 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6071 "persistent IDs in protocol 0 must be "
6072 "ASCII strings");
6073 }
6074 return -1;
6075 }
6076
6077 obj = call_method(self->pers_func, self->pers_func_self, pid);
6078 Py_DECREF(pid);
6079 if (obj == NULL)
6080 return -1;
6081
6082 PDATA_PUSH(self->stack, obj, -1);
6083 return 0;
6084 }
6085 else {
6086 PickleState *st = _Pickle_GetGlobalState();
6087 PyErr_SetString(st->UnpicklingError,
6088 "A load persistent id instruction was encountered,\n"
6089 "but no persistent_load function was specified.");
6090 return -1;
6091 }
6092 }
6093
6094 static int
load_binpersid(UnpicklerObject * self)6095 load_binpersid(UnpicklerObject *self)
6096 {
6097 PyObject *pid, *obj;
6098
6099 if (self->pers_func) {
6100 PDATA_POP(self->stack, pid);
6101 if (pid == NULL)
6102 return -1;
6103
6104 obj = call_method(self->pers_func, self->pers_func_self, pid);
6105 Py_DECREF(pid);
6106 if (obj == NULL)
6107 return -1;
6108
6109 PDATA_PUSH(self->stack, obj, -1);
6110 return 0;
6111 }
6112 else {
6113 PickleState *st = _Pickle_GetGlobalState();
6114 PyErr_SetString(st->UnpicklingError,
6115 "A load persistent id instruction was encountered,\n"
6116 "but no persistent_load function was specified.");
6117 return -1;
6118 }
6119 }
6120
6121 static int
load_pop(UnpicklerObject * self)6122 load_pop(UnpicklerObject *self)
6123 {
6124 Py_ssize_t len = Py_SIZE(self->stack);
6125
6126 /* Note that we split the (pickle.py) stack into two stacks,
6127 * an object stack and a mark stack. We have to be clever and
6128 * pop the right one. We do this by looking at the top of the
6129 * mark stack first, and only signalling a stack underflow if
6130 * the object stack is empty and the mark stack doesn't match
6131 * our expectations.
6132 */
6133 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
6134 self->num_marks--;
6135 self->stack->mark_set = self->num_marks != 0;
6136 self->stack->fence = self->num_marks ?
6137 self->marks[self->num_marks - 1] : 0;
6138 } else if (len <= self->stack->fence)
6139 return Pdata_stack_underflow(self->stack);
6140 else {
6141 len--;
6142 Py_DECREF(self->stack->data[len]);
6143 Py_SET_SIZE(self->stack, len);
6144 }
6145 return 0;
6146 }
6147
6148 static int
load_pop_mark(UnpicklerObject * self)6149 load_pop_mark(UnpicklerObject *self)
6150 {
6151 Py_ssize_t i;
6152
6153 if ((i = marker(self)) < 0)
6154 return -1;
6155
6156 Pdata_clear(self->stack, i);
6157
6158 return 0;
6159 }
6160
6161 static int
load_dup(UnpicklerObject * self)6162 load_dup(UnpicklerObject *self)
6163 {
6164 PyObject *last;
6165 Py_ssize_t len = Py_SIZE(self->stack);
6166
6167 if (len <= self->stack->fence)
6168 return Pdata_stack_underflow(self->stack);
6169 last = self->stack->data[len - 1];
6170 PDATA_APPEND(self->stack, last, -1);
6171 return 0;
6172 }
6173
6174 static int
load_get(UnpicklerObject * self)6175 load_get(UnpicklerObject *self)
6176 {
6177 PyObject *key, *value;
6178 Py_ssize_t idx;
6179 Py_ssize_t len;
6180 char *s;
6181
6182 if ((len = _Unpickler_Readline(self, &s)) < 0)
6183 return -1;
6184 if (len < 2)
6185 return bad_readline();
6186
6187 key = PyLong_FromString(s, NULL, 10);
6188 if (key == NULL)
6189 return -1;
6190 idx = PyLong_AsSsize_t(key);
6191 if (idx == -1 && PyErr_Occurred()) {
6192 Py_DECREF(key);
6193 return -1;
6194 }
6195
6196 value = _Unpickler_MemoGet(self, idx);
6197 if (value == NULL) {
6198 if (!PyErr_Occurred()) {
6199 PickleState *st = _Pickle_GetGlobalState();
6200 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6201 }
6202 Py_DECREF(key);
6203 return -1;
6204 }
6205 Py_DECREF(key);
6206
6207 PDATA_APPEND(self->stack, value, -1);
6208 return 0;
6209 }
6210
6211 static int
load_binget(UnpicklerObject * self)6212 load_binget(UnpicklerObject *self)
6213 {
6214 PyObject *value;
6215 Py_ssize_t idx;
6216 char *s;
6217
6218 if (_Unpickler_Read(self, &s, 1) < 0)
6219 return -1;
6220
6221 idx = Py_CHARMASK(s[0]);
6222
6223 value = _Unpickler_MemoGet(self, idx);
6224 if (value == NULL) {
6225 PyObject *key = PyLong_FromSsize_t(idx);
6226 if (key != NULL) {
6227 PickleState *st = _Pickle_GetGlobalState();
6228 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6229 Py_DECREF(key);
6230 }
6231 return -1;
6232 }
6233
6234 PDATA_APPEND(self->stack, value, -1);
6235 return 0;
6236 }
6237
6238 static int
load_long_binget(UnpicklerObject * self)6239 load_long_binget(UnpicklerObject *self)
6240 {
6241 PyObject *value;
6242 Py_ssize_t idx;
6243 char *s;
6244
6245 if (_Unpickler_Read(self, &s, 4) < 0)
6246 return -1;
6247
6248 idx = calc_binsize(s, 4);
6249
6250 value = _Unpickler_MemoGet(self, idx);
6251 if (value == NULL) {
6252 PyObject *key = PyLong_FromSsize_t(idx);
6253 if (key != NULL) {
6254 PickleState *st = _Pickle_GetGlobalState();
6255 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6256 Py_DECREF(key);
6257 }
6258 return -1;
6259 }
6260
6261 PDATA_APPEND(self->stack, value, -1);
6262 return 0;
6263 }
6264
6265 /* Push an object from the extension registry (EXT[124]). nbytes is
6266 * the number of bytes following the opcode, holding the index (code) value.
6267 */
6268 static int
load_extension(UnpicklerObject * self,int nbytes)6269 load_extension(UnpicklerObject *self, int nbytes)
6270 {
6271 char *codebytes; /* the nbytes bytes after the opcode */
6272 long code; /* calc_binint returns long */
6273 PyObject *py_code; /* code as a Python int */
6274 PyObject *obj; /* the object to push */
6275 PyObject *pair; /* (module_name, class_name) */
6276 PyObject *module_name, *class_name;
6277 PickleState *st = _Pickle_GetGlobalState();
6278
6279 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
6280 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
6281 return -1;
6282 code = calc_binint(codebytes, nbytes);
6283 if (code <= 0) { /* note that 0 is forbidden */
6284 /* Corrupt or hostile pickle. */
6285 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
6286 return -1;
6287 }
6288
6289 /* Look for the code in the cache. */
6290 py_code = PyLong_FromLong(code);
6291 if (py_code == NULL)
6292 return -1;
6293 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
6294 if (obj != NULL) {
6295 /* Bingo. */
6296 Py_DECREF(py_code);
6297 PDATA_APPEND(self->stack, obj, -1);
6298 return 0;
6299 }
6300 if (PyErr_Occurred()) {
6301 Py_DECREF(py_code);
6302 return -1;
6303 }
6304
6305 /* Look up the (module_name, class_name) pair. */
6306 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
6307 if (pair == NULL) {
6308 Py_DECREF(py_code);
6309 if (!PyErr_Occurred()) {
6310 PyErr_Format(PyExc_ValueError, "unregistered extension "
6311 "code %ld", code);
6312 }
6313 return -1;
6314 }
6315 /* Since the extension registry is manipulable via Python code,
6316 * confirm that pair is really a 2-tuple of strings.
6317 */
6318 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6319 goto error;
6320 }
6321
6322 module_name = PyTuple_GET_ITEM(pair, 0);
6323 if (!PyUnicode_Check(module_name)) {
6324 goto error;
6325 }
6326
6327 class_name = PyTuple_GET_ITEM(pair, 1);
6328 if (!PyUnicode_Check(class_name)) {
6329 goto error;
6330 }
6331
6332 /* Load the object. */
6333 obj = find_class(self, module_name, class_name);
6334 if (obj == NULL) {
6335 Py_DECREF(py_code);
6336 return -1;
6337 }
6338 /* Cache code -> obj. */
6339 code = PyDict_SetItem(st->extension_cache, py_code, obj);
6340 Py_DECREF(py_code);
6341 if (code < 0) {
6342 Py_DECREF(obj);
6343 return -1;
6344 }
6345 PDATA_PUSH(self->stack, obj, -1);
6346 return 0;
6347
6348 error:
6349 Py_DECREF(py_code);
6350 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6351 "isn't a 2-tuple of strings", code);
6352 return -1;
6353 }
6354
6355 static int
load_put(UnpicklerObject * self)6356 load_put(UnpicklerObject *self)
6357 {
6358 PyObject *key, *value;
6359 Py_ssize_t idx;
6360 Py_ssize_t len;
6361 char *s = NULL;
6362
6363 if ((len = _Unpickler_Readline(self, &s)) < 0)
6364 return -1;
6365 if (len < 2)
6366 return bad_readline();
6367 if (Py_SIZE(self->stack) <= self->stack->fence)
6368 return Pdata_stack_underflow(self->stack);
6369 value = self->stack->data[Py_SIZE(self->stack) - 1];
6370
6371 key = PyLong_FromString(s, NULL, 10);
6372 if (key == NULL)
6373 return -1;
6374 idx = PyLong_AsSsize_t(key);
6375 Py_DECREF(key);
6376 if (idx < 0) {
6377 if (!PyErr_Occurred())
6378 PyErr_SetString(PyExc_ValueError,
6379 "negative PUT argument");
6380 return -1;
6381 }
6382
6383 return _Unpickler_MemoPut(self, idx, value);
6384 }
6385
6386 static int
load_binput(UnpicklerObject * self)6387 load_binput(UnpicklerObject *self)
6388 {
6389 PyObject *value;
6390 Py_ssize_t idx;
6391 char *s;
6392
6393 if (_Unpickler_Read(self, &s, 1) < 0)
6394 return -1;
6395
6396 if (Py_SIZE(self->stack) <= self->stack->fence)
6397 return Pdata_stack_underflow(self->stack);
6398 value = self->stack->data[Py_SIZE(self->stack) - 1];
6399
6400 idx = Py_CHARMASK(s[0]);
6401
6402 return _Unpickler_MemoPut(self, idx, value);
6403 }
6404
6405 static int
load_long_binput(UnpicklerObject * self)6406 load_long_binput(UnpicklerObject *self)
6407 {
6408 PyObject *value;
6409 Py_ssize_t idx;
6410 char *s;
6411
6412 if (_Unpickler_Read(self, &s, 4) < 0)
6413 return -1;
6414
6415 if (Py_SIZE(self->stack) <= self->stack->fence)
6416 return Pdata_stack_underflow(self->stack);
6417 value = self->stack->data[Py_SIZE(self->stack) - 1];
6418
6419 idx = calc_binsize(s, 4);
6420 if (idx < 0) {
6421 PyErr_SetString(PyExc_ValueError,
6422 "negative LONG_BINPUT argument");
6423 return -1;
6424 }
6425
6426 return _Unpickler_MemoPut(self, idx, value);
6427 }
6428
6429 static int
load_memoize(UnpicklerObject * self)6430 load_memoize(UnpicklerObject *self)
6431 {
6432 PyObject *value;
6433
6434 if (Py_SIZE(self->stack) <= self->stack->fence)
6435 return Pdata_stack_underflow(self->stack);
6436 value = self->stack->data[Py_SIZE(self->stack) - 1];
6437
6438 return _Unpickler_MemoPut(self, self->memo_len, value);
6439 }
6440
6441 static int
do_append(UnpicklerObject * self,Py_ssize_t x)6442 do_append(UnpicklerObject *self, Py_ssize_t x)
6443 {
6444 PyObject *value;
6445 PyObject *slice;
6446 PyObject *list;
6447 PyObject *result;
6448 Py_ssize_t len, i;
6449
6450 len = Py_SIZE(self->stack);
6451 if (x > len || x <= self->stack->fence)
6452 return Pdata_stack_underflow(self->stack);
6453 if (len == x) /* nothing to do */
6454 return 0;
6455
6456 list = self->stack->data[x - 1];
6457
6458 if (PyList_CheckExact(list)) {
6459 Py_ssize_t list_len;
6460 int ret;
6461
6462 slice = Pdata_poplist(self->stack, x);
6463 if (!slice)
6464 return -1;
6465 list_len = PyList_GET_SIZE(list);
6466 ret = PyList_SetSlice(list, list_len, list_len, slice);
6467 Py_DECREF(slice);
6468 return ret;
6469 }
6470 else {
6471 PyObject *extend_func;
6472
6473 if (_PyObject_LookupAttr(list, &_Py_ID(extend), &extend_func) < 0) {
6474 return -1;
6475 }
6476 if (extend_func != NULL) {
6477 slice = Pdata_poplist(self->stack, x);
6478 if (!slice) {
6479 Py_DECREF(extend_func);
6480 return -1;
6481 }
6482 result = _Pickle_FastCall(extend_func, slice);
6483 Py_DECREF(extend_func);
6484 if (result == NULL)
6485 return -1;
6486 Py_DECREF(result);
6487 }
6488 else {
6489 PyObject *append_func;
6490
6491 /* Even if the PEP 307 requires extend() and append() methods,
6492 fall back on append() if the object has no extend() method
6493 for backward compatibility. */
6494 append_func = PyObject_GetAttr(list, &_Py_ID(append));
6495 if (append_func == NULL)
6496 return -1;
6497 for (i = x; i < len; i++) {
6498 value = self->stack->data[i];
6499 result = _Pickle_FastCall(append_func, value);
6500 if (result == NULL) {
6501 Pdata_clear(self->stack, i + 1);
6502 Py_SET_SIZE(self->stack, x);
6503 Py_DECREF(append_func);
6504 return -1;
6505 }
6506 Py_DECREF(result);
6507 }
6508 Py_SET_SIZE(self->stack, x);
6509 Py_DECREF(append_func);
6510 }
6511 }
6512
6513 return 0;
6514 }
6515
6516 static int
load_append(UnpicklerObject * self)6517 load_append(UnpicklerObject *self)
6518 {
6519 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6520 return Pdata_stack_underflow(self->stack);
6521 return do_append(self, Py_SIZE(self->stack) - 1);
6522 }
6523
6524 static int
load_appends(UnpicklerObject * self)6525 load_appends(UnpicklerObject *self)
6526 {
6527 Py_ssize_t i = marker(self);
6528 if (i < 0)
6529 return -1;
6530 return do_append(self, i);
6531 }
6532
6533 static int
do_setitems(UnpicklerObject * self,Py_ssize_t x)6534 do_setitems(UnpicklerObject *self, Py_ssize_t x)
6535 {
6536 PyObject *value, *key;
6537 PyObject *dict;
6538 Py_ssize_t len, i;
6539 int status = 0;
6540
6541 len = Py_SIZE(self->stack);
6542 if (x > len || x <= self->stack->fence)
6543 return Pdata_stack_underflow(self->stack);
6544 if (len == x) /* nothing to do */
6545 return 0;
6546 if ((len - x) % 2 != 0) {
6547 PickleState *st = _Pickle_GetGlobalState();
6548 /* Corrupt or hostile pickle -- we never write one like this. */
6549 PyErr_SetString(st->UnpicklingError,
6550 "odd number of items for SETITEMS");
6551 return -1;
6552 }
6553
6554 /* Here, dict does not actually need to be a PyDict; it could be anything
6555 that supports the __setitem__ attribute. */
6556 dict = self->stack->data[x - 1];
6557
6558 for (i = x + 1; i < len; i += 2) {
6559 key = self->stack->data[i - 1];
6560 value = self->stack->data[i];
6561 if (PyObject_SetItem(dict, key, value) < 0) {
6562 status = -1;
6563 break;
6564 }
6565 }
6566
6567 Pdata_clear(self->stack, x);
6568 return status;
6569 }
6570
6571 static int
load_setitem(UnpicklerObject * self)6572 load_setitem(UnpicklerObject *self)
6573 {
6574 return do_setitems(self, Py_SIZE(self->stack) - 2);
6575 }
6576
6577 static int
load_setitems(UnpicklerObject * self)6578 load_setitems(UnpicklerObject *self)
6579 {
6580 Py_ssize_t i = marker(self);
6581 if (i < 0)
6582 return -1;
6583 return do_setitems(self, i);
6584 }
6585
6586 static int
load_additems(UnpicklerObject * self)6587 load_additems(UnpicklerObject *self)
6588 {
6589 PyObject *set;
6590 Py_ssize_t mark, len, i;
6591
6592 mark = marker(self);
6593 if (mark < 0)
6594 return -1;
6595 len = Py_SIZE(self->stack);
6596 if (mark > len || mark <= self->stack->fence)
6597 return Pdata_stack_underflow(self->stack);
6598 if (len == mark) /* nothing to do */
6599 return 0;
6600
6601 set = self->stack->data[mark - 1];
6602
6603 if (PySet_Check(set)) {
6604 PyObject *items;
6605 int status;
6606
6607 items = Pdata_poptuple(self->stack, mark);
6608 if (items == NULL)
6609 return -1;
6610
6611 status = _PySet_Update(set, items);
6612 Py_DECREF(items);
6613 return status;
6614 }
6615 else {
6616 PyObject *add_func;
6617
6618 add_func = PyObject_GetAttr(set, &_Py_ID(add));
6619 if (add_func == NULL)
6620 return -1;
6621 for (i = mark; i < len; i++) {
6622 PyObject *result;
6623 PyObject *item;
6624
6625 item = self->stack->data[i];
6626 result = _Pickle_FastCall(add_func, item);
6627 if (result == NULL) {
6628 Pdata_clear(self->stack, i + 1);
6629 Py_SET_SIZE(self->stack, mark);
6630 return -1;
6631 }
6632 Py_DECREF(result);
6633 }
6634 Py_SET_SIZE(self->stack, mark);
6635 }
6636
6637 return 0;
6638 }
6639
6640 static int
load_build(UnpicklerObject * self)6641 load_build(UnpicklerObject *self)
6642 {
6643 PyObject *state, *inst, *slotstate;
6644 PyObject *setstate;
6645 int status = 0;
6646
6647 /* Stack is ... instance, state. We want to leave instance at
6648 * the stack top, possibly mutated via instance.__setstate__(state).
6649 */
6650 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6651 return Pdata_stack_underflow(self->stack);
6652
6653 PDATA_POP(self->stack, state);
6654 if (state == NULL)
6655 return -1;
6656
6657 inst = self->stack->data[Py_SIZE(self->stack) - 1];
6658
6659 if (_PyObject_LookupAttr(inst, &_Py_ID(__setstate__), &setstate) < 0) {
6660 Py_DECREF(state);
6661 return -1;
6662 }
6663 if (setstate != NULL) {
6664 PyObject *result;
6665
6666 /* The explicit __setstate__ is responsible for everything. */
6667 result = _Pickle_FastCall(setstate, state);
6668 Py_DECREF(setstate);
6669 if (result == NULL)
6670 return -1;
6671 Py_DECREF(result);
6672 return 0;
6673 }
6674
6675 /* A default __setstate__. First see whether state embeds a
6676 * slot state dict too (a proto 2 addition).
6677 */
6678 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
6679 PyObject *tmp = state;
6680
6681 state = PyTuple_GET_ITEM(tmp, 0);
6682 slotstate = PyTuple_GET_ITEM(tmp, 1);
6683 Py_INCREF(state);
6684 Py_INCREF(slotstate);
6685 Py_DECREF(tmp);
6686 }
6687 else
6688 slotstate = NULL;
6689
6690 /* Set inst.__dict__ from the state dict (if any). */
6691 if (state != Py_None) {
6692 PyObject *dict;
6693 PyObject *d_key, *d_value;
6694 Py_ssize_t i;
6695
6696 if (!PyDict_Check(state)) {
6697 PickleState *st = _Pickle_GetGlobalState();
6698 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
6699 goto error;
6700 }
6701 dict = PyObject_GetAttr(inst, &_Py_ID(__dict__));
6702 if (dict == NULL)
6703 goto error;
6704
6705 i = 0;
6706 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6707 /* normally the keys for instance attributes are
6708 interned. we should try to do that here. */
6709 Py_INCREF(d_key);
6710 if (PyUnicode_CheckExact(d_key))
6711 PyUnicode_InternInPlace(&d_key);
6712 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6713 Py_DECREF(d_key);
6714 goto error;
6715 }
6716 Py_DECREF(d_key);
6717 }
6718 Py_DECREF(dict);
6719 }
6720
6721 /* Also set instance attributes from the slotstate dict (if any). */
6722 if (slotstate != NULL) {
6723 PyObject *d_key, *d_value;
6724 Py_ssize_t i;
6725
6726 if (!PyDict_Check(slotstate)) {
6727 PickleState *st = _Pickle_GetGlobalState();
6728 PyErr_SetString(st->UnpicklingError,
6729 "slot state is not a dictionary");
6730 goto error;
6731 }
6732 i = 0;
6733 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6734 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6735 goto error;
6736 }
6737 }
6738
6739 if (0) {
6740 error:
6741 status = -1;
6742 }
6743
6744 Py_DECREF(state);
6745 Py_XDECREF(slotstate);
6746 return status;
6747 }
6748
6749 static int
load_mark(UnpicklerObject * self)6750 load_mark(UnpicklerObject *self)
6751 {
6752
6753 /* Note that we split the (pickle.py) stack into two stacks, an
6754 * object stack and a mark stack. Here we push a mark onto the
6755 * mark stack.
6756 */
6757
6758 if (self->num_marks >= self->marks_size) {
6759 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6760 Py_ssize_t *marks_new = self->marks;
6761 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6762 if (marks_new == NULL) {
6763 PyErr_NoMemory();
6764 return -1;
6765 }
6766 self->marks = marks_new;
6767 self->marks_size = (Py_ssize_t)alloc;
6768 }
6769
6770 self->stack->mark_set = 1;
6771 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
6772
6773 return 0;
6774 }
6775
6776 static int
load_reduce(UnpicklerObject * self)6777 load_reduce(UnpicklerObject *self)
6778 {
6779 PyObject *callable = NULL;
6780 PyObject *argtup = NULL;
6781 PyObject *obj = NULL;
6782
6783 PDATA_POP(self->stack, argtup);
6784 if (argtup == NULL)
6785 return -1;
6786 PDATA_POP(self->stack, callable);
6787 if (callable) {
6788 obj = PyObject_CallObject(callable, argtup);
6789 Py_DECREF(callable);
6790 }
6791 Py_DECREF(argtup);
6792
6793 if (obj == NULL)
6794 return -1;
6795
6796 PDATA_PUSH(self->stack, obj, -1);
6797 return 0;
6798 }
6799
6800 /* Just raises an error if we don't know the protocol specified. PROTO
6801 * is the first opcode for protocols >= 2.
6802 */
6803 static int
load_proto(UnpicklerObject * self)6804 load_proto(UnpicklerObject *self)
6805 {
6806 char *s;
6807 int i;
6808
6809 if (_Unpickler_Read(self, &s, 1) < 0)
6810 return -1;
6811
6812 i = (unsigned char)s[0];
6813 if (i <= HIGHEST_PROTOCOL) {
6814 self->proto = i;
6815 return 0;
6816 }
6817
6818 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6819 return -1;
6820 }
6821
6822 static int
load_frame(UnpicklerObject * self)6823 load_frame(UnpicklerObject *self)
6824 {
6825 char *s;
6826 Py_ssize_t frame_len;
6827
6828 if (_Unpickler_Read(self, &s, 8) < 0)
6829 return -1;
6830
6831 frame_len = calc_binsize(s, 8);
6832 if (frame_len < 0) {
6833 PyErr_Format(PyExc_OverflowError,
6834 "FRAME length exceeds system's maximum of %zd bytes",
6835 PY_SSIZE_T_MAX);
6836 return -1;
6837 }
6838
6839 if (_Unpickler_Read(self, &s, frame_len) < 0)
6840 return -1;
6841
6842 /* Rewind to start of frame */
6843 self->next_read_idx -= frame_len;
6844 return 0;
6845 }
6846
6847 static PyObject *
load(UnpicklerObject * self)6848 load(UnpicklerObject *self)
6849 {
6850 PyObject *value = NULL;
6851 char *s = NULL;
6852
6853 self->num_marks = 0;
6854 self->stack->mark_set = 0;
6855 self->stack->fence = 0;
6856 self->proto = 0;
6857 if (Py_SIZE(self->stack))
6858 Pdata_clear(self->stack, 0);
6859
6860 /* Convenient macros for the dispatch while-switch loop just below. */
6861 #define OP(opcode, load_func) \
6862 case opcode: if (load_func(self) < 0) break; continue;
6863
6864 #define OP_ARG(opcode, load_func, arg) \
6865 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6866
6867 while (1) {
6868 if (_Unpickler_Read(self, &s, 1) < 0) {
6869 PickleState *st = _Pickle_GetGlobalState();
6870 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6871 PyErr_Format(PyExc_EOFError, "Ran out of input");
6872 }
6873 return NULL;
6874 }
6875
6876 switch ((enum opcode)s[0]) {
6877 OP(NONE, load_none)
6878 OP(BININT, load_binint)
6879 OP(BININT1, load_binint1)
6880 OP(BININT2, load_binint2)
6881 OP(INT, load_int)
6882 OP(LONG, load_long)
6883 OP_ARG(LONG1, load_counted_long, 1)
6884 OP_ARG(LONG4, load_counted_long, 4)
6885 OP(FLOAT, load_float)
6886 OP(BINFLOAT, load_binfloat)
6887 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6888 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6889 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6890 OP(BYTEARRAY8, load_counted_bytearray)
6891 OP(NEXT_BUFFER, load_next_buffer)
6892 OP(READONLY_BUFFER, load_readonly_buffer)
6893 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6894 OP_ARG(BINSTRING, load_counted_binstring, 4)
6895 OP(STRING, load_string)
6896 OP(UNICODE, load_unicode)
6897 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6898 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6899 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
6900 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6901 OP_ARG(TUPLE1, load_counted_tuple, 1)
6902 OP_ARG(TUPLE2, load_counted_tuple, 2)
6903 OP_ARG(TUPLE3, load_counted_tuple, 3)
6904 OP(TUPLE, load_tuple)
6905 OP(EMPTY_LIST, load_empty_list)
6906 OP(LIST, load_list)
6907 OP(EMPTY_DICT, load_empty_dict)
6908 OP(DICT, load_dict)
6909 OP(EMPTY_SET, load_empty_set)
6910 OP(ADDITEMS, load_additems)
6911 OP(FROZENSET, load_frozenset)
6912 OP(OBJ, load_obj)
6913 OP(INST, load_inst)
6914 OP_ARG(NEWOBJ, load_newobj, 0)
6915 OP_ARG(NEWOBJ_EX, load_newobj, 1)
6916 OP(GLOBAL, load_global)
6917 OP(STACK_GLOBAL, load_stack_global)
6918 OP(APPEND, load_append)
6919 OP(APPENDS, load_appends)
6920 OP(BUILD, load_build)
6921 OP(DUP, load_dup)
6922 OP(BINGET, load_binget)
6923 OP(LONG_BINGET, load_long_binget)
6924 OP(GET, load_get)
6925 OP(MARK, load_mark)
6926 OP(BINPUT, load_binput)
6927 OP(LONG_BINPUT, load_long_binput)
6928 OP(PUT, load_put)
6929 OP(MEMOIZE, load_memoize)
6930 OP(POP, load_pop)
6931 OP(POP_MARK, load_pop_mark)
6932 OP(SETITEM, load_setitem)
6933 OP(SETITEMS, load_setitems)
6934 OP(PERSID, load_persid)
6935 OP(BINPERSID, load_binpersid)
6936 OP(REDUCE, load_reduce)
6937 OP(PROTO, load_proto)
6938 OP(FRAME, load_frame)
6939 OP_ARG(EXT1, load_extension, 1)
6940 OP_ARG(EXT2, load_extension, 2)
6941 OP_ARG(EXT4, load_extension, 4)
6942 OP_ARG(NEWTRUE, load_bool, Py_True)
6943 OP_ARG(NEWFALSE, load_bool, Py_False)
6944
6945 case STOP:
6946 break;
6947
6948 default:
6949 {
6950 PickleState *st = _Pickle_GetGlobalState();
6951 unsigned char c = (unsigned char) *s;
6952 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6953 PyErr_Format(st->UnpicklingError,
6954 "invalid load key, '%c'.", c);
6955 }
6956 else {
6957 PyErr_Format(st->UnpicklingError,
6958 "invalid load key, '\\x%02x'.", c);
6959 }
6960 return NULL;
6961 }
6962 }
6963
6964 break; /* and we are done! */
6965 }
6966
6967 if (PyErr_Occurred()) {
6968 return NULL;
6969 }
6970
6971 if (_Unpickler_SkipConsumed(self) < 0)
6972 return NULL;
6973
6974 PDATA_POP(self->stack, value);
6975 return value;
6976 }
6977
6978 /*[clinic input]
6979
6980 _pickle.Unpickler.load
6981
6982 Load a pickle.
6983
6984 Read a pickled object representation from the open file object given
6985 in the constructor, and return the reconstituted object hierarchy
6986 specified therein.
6987 [clinic start generated code]*/
6988
6989 static PyObject *
_pickle_Unpickler_load_impl(UnpicklerObject * self)6990 _pickle_Unpickler_load_impl(UnpicklerObject *self)
6991 /*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
6992 {
6993 UnpicklerObject *unpickler = (UnpicklerObject*)self;
6994
6995 /* Check whether the Unpickler was initialized correctly. This prevents
6996 segfaulting if a subclass overridden __init__ with a function that does
6997 not call Unpickler.__init__(). Here, we simply ensure that self->read
6998 is not NULL. */
6999 if (unpickler->read == NULL) {
7000 PickleState *st = _Pickle_GetGlobalState();
7001 PyErr_Format(st->UnpicklingError,
7002 "Unpickler.__init__() was not called by %s.__init__()",
7003 Py_TYPE(unpickler)->tp_name);
7004 return NULL;
7005 }
7006
7007 return load(unpickler);
7008 }
7009
7010 /* The name of find_class() is misleading. In newer pickle protocols, this
7011 function is used for loading any global (i.e., functions), not just
7012 classes. The name is kept only for backward compatibility. */
7013
7014 /*[clinic input]
7015
7016 _pickle.Unpickler.find_class
7017
7018 module_name: object
7019 global_name: object
7020 /
7021
7022 Return an object from a specified module.
7023
7024 If necessary, the module will be imported. Subclasses may override
7025 this method (e.g. to restrict unpickling of arbitrary classes and
7026 functions).
7027
7028 This method is called whenever a class or a function object is
7029 needed. Both arguments passed are str objects.
7030 [clinic start generated code]*/
7031
7032 static PyObject *
_pickle_Unpickler_find_class_impl(UnpicklerObject * self,PyObject * module_name,PyObject * global_name)7033 _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7034 PyObject *module_name,
7035 PyObject *global_name)
7036 /*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
7037 {
7038 PyObject *global;
7039 PyObject *module;
7040
7041 if (PySys_Audit("pickle.find_class", "OO",
7042 module_name, global_name) < 0) {
7043 return NULL;
7044 }
7045
7046 /* Try to map the old names used in Python 2.x to the new ones used in
7047 Python 3.x. We do this only with old pickle protocols and when the
7048 user has not disabled the feature. */
7049 if (self->proto < 3 && self->fix_imports) {
7050 PyObject *key;
7051 PyObject *item;
7052 PickleState *st = _Pickle_GetGlobalState();
7053
7054 /* Check if the global (i.e., a function or a class) was renamed
7055 or moved to another module. */
7056 key = PyTuple_Pack(2, module_name, global_name);
7057 if (key == NULL)
7058 return NULL;
7059 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
7060 Py_DECREF(key);
7061 if (item) {
7062 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7063 PyErr_Format(PyExc_RuntimeError,
7064 "_compat_pickle.NAME_MAPPING values should be "
7065 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7066 return NULL;
7067 }
7068 module_name = PyTuple_GET_ITEM(item, 0);
7069 global_name = PyTuple_GET_ITEM(item, 1);
7070 if (!PyUnicode_Check(module_name) ||
7071 !PyUnicode_Check(global_name)) {
7072 PyErr_Format(PyExc_RuntimeError,
7073 "_compat_pickle.NAME_MAPPING values should be "
7074 "pairs of str, not (%.200s, %.200s)",
7075 Py_TYPE(module_name)->tp_name,
7076 Py_TYPE(global_name)->tp_name);
7077 return NULL;
7078 }
7079 }
7080 else if (PyErr_Occurred()) {
7081 return NULL;
7082 }
7083 else {
7084 /* Check if the module was renamed. */
7085 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7086 if (item) {
7087 if (!PyUnicode_Check(item)) {
7088 PyErr_Format(PyExc_RuntimeError,
7089 "_compat_pickle.IMPORT_MAPPING values should be "
7090 "strings, not %.200s", Py_TYPE(item)->tp_name);
7091 return NULL;
7092 }
7093 module_name = item;
7094 }
7095 else if (PyErr_Occurred()) {
7096 return NULL;
7097 }
7098 }
7099 }
7100
7101 /*
7102 * we don't use PyImport_GetModule here, because it can return partially-
7103 * initialised modules, which then cause the getattribute to fail.
7104 */
7105 module = PyImport_Import(module_name);
7106 if (module == NULL) {
7107 return NULL;
7108 }
7109 global = getattribute(module, global_name, self->proto >= 4);
7110 Py_DECREF(module);
7111 return global;
7112 }
7113
7114 /*[clinic input]
7115
7116 _pickle.Unpickler.__sizeof__ -> Py_ssize_t
7117
7118 Returns size in memory, in bytes.
7119 [clinic start generated code]*/
7120
7121 static Py_ssize_t
_pickle_Unpickler___sizeof___impl(UnpicklerObject * self)7122 _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7123 /*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7124 {
7125 Py_ssize_t res;
7126
7127 res = _PyObject_SIZE(Py_TYPE(self));
7128 if (self->memo != NULL)
7129 res += self->memo_size * sizeof(PyObject *);
7130 if (self->marks != NULL)
7131 res += self->marks_size * sizeof(Py_ssize_t);
7132 if (self->input_line != NULL)
7133 res += strlen(self->input_line) + 1;
7134 if (self->encoding != NULL)
7135 res += strlen(self->encoding) + 1;
7136 if (self->errors != NULL)
7137 res += strlen(self->errors) + 1;
7138 return res;
7139 }
7140
7141 static struct PyMethodDef Unpickler_methods[] = {
7142 _PICKLE_UNPICKLER_LOAD_METHODDEF
7143 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
7144 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
7145 {NULL, NULL} /* sentinel */
7146 };
7147
7148 static void
Unpickler_dealloc(UnpicklerObject * self)7149 Unpickler_dealloc(UnpicklerObject *self)
7150 {
7151 PyObject_GC_UnTrack((PyObject *)self);
7152 Py_XDECREF(self->readline);
7153 Py_XDECREF(self->readinto);
7154 Py_XDECREF(self->read);
7155 Py_XDECREF(self->peek);
7156 Py_XDECREF(self->stack);
7157 Py_XDECREF(self->pers_func);
7158 Py_XDECREF(self->buffers);
7159 if (self->buffer.buf != NULL) {
7160 PyBuffer_Release(&self->buffer);
7161 self->buffer.buf = NULL;
7162 }
7163
7164 _Unpickler_MemoCleanup(self);
7165 PyMem_Free(self->marks);
7166 PyMem_Free(self->input_line);
7167 PyMem_Free(self->encoding);
7168 PyMem_Free(self->errors);
7169
7170 Py_TYPE(self)->tp_free((PyObject *)self);
7171 }
7172
7173 static int
Unpickler_traverse(UnpicklerObject * self,visitproc visit,void * arg)7174 Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7175 {
7176 Py_VISIT(self->readline);
7177 Py_VISIT(self->readinto);
7178 Py_VISIT(self->read);
7179 Py_VISIT(self->peek);
7180 Py_VISIT(self->stack);
7181 Py_VISIT(self->pers_func);
7182 Py_VISIT(self->buffers);
7183 return 0;
7184 }
7185
7186 static int
Unpickler_clear(UnpicklerObject * self)7187 Unpickler_clear(UnpicklerObject *self)
7188 {
7189 Py_CLEAR(self->readline);
7190 Py_CLEAR(self->readinto);
7191 Py_CLEAR(self->read);
7192 Py_CLEAR(self->peek);
7193 Py_CLEAR(self->stack);
7194 Py_CLEAR(self->pers_func);
7195 Py_CLEAR(self->buffers);
7196 if (self->buffer.buf != NULL) {
7197 PyBuffer_Release(&self->buffer);
7198 self->buffer.buf = NULL;
7199 }
7200
7201 _Unpickler_MemoCleanup(self);
7202 PyMem_Free(self->marks);
7203 self->marks = NULL;
7204 PyMem_Free(self->input_line);
7205 self->input_line = NULL;
7206 PyMem_Free(self->encoding);
7207 self->encoding = NULL;
7208 PyMem_Free(self->errors);
7209 self->errors = NULL;
7210
7211 return 0;
7212 }
7213
7214 /*[clinic input]
7215
7216 _pickle.Unpickler.__init__
7217
7218 file: object
7219 *
7220 fix_imports: bool = True
7221 encoding: str = 'ASCII'
7222 errors: str = 'strict'
7223 buffers: object(c_default="NULL") = ()
7224
7225 This takes a binary file for reading a pickle data stream.
7226
7227 The protocol version of the pickle is detected automatically, so no
7228 protocol argument is needed. Bytes past the pickled object's
7229 representation are ignored.
7230
7231 The argument *file* must have two methods, a read() method that takes
7232 an integer argument, and a readline() method that requires no
7233 arguments. Both methods should return bytes. Thus *file* can be a
7234 binary file object opened for reading, an io.BytesIO object, or any
7235 other custom object that meets this interface.
7236
7237 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7238 which are used to control compatibility support for pickle stream
7239 generated by Python 2. If *fix_imports* is True, pickle will try to
7240 map the old Python 2 names to the new names used in Python 3. The
7241 *encoding* and *errors* tell pickle how to decode 8-bit string
7242 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7243 respectively. The *encoding* can be 'bytes' to read these 8-bit
7244 string instances as bytes objects.
7245 [clinic start generated code]*/
7246
7247 static int
_pickle_Unpickler___init___impl(UnpicklerObject * self,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7248 _pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7249 int fix_imports, const char *encoding,
7250 const char *errors, PyObject *buffers)
7251 /*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
7252 {
7253 /* In case of multiple __init__() calls, clear previous content. */
7254 if (self->read != NULL)
7255 (void)Unpickler_clear(self);
7256
7257 if (_Unpickler_SetInputStream(self, file) < 0)
7258 return -1;
7259
7260 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
7261 return -1;
7262
7263 if (_Unpickler_SetBuffers(self, buffers) < 0)
7264 return -1;
7265
7266 self->fix_imports = fix_imports;
7267
7268 if (init_method_ref((PyObject *)self, &_Py_ID(persistent_load),
7269 &self->pers_func, &self->pers_func_self) < 0)
7270 {
7271 return -1;
7272 }
7273
7274 self->stack = (Pdata *)Pdata_New();
7275 if (self->stack == NULL)
7276 return -1;
7277
7278 self->memo_size = 32;
7279 self->memo = _Unpickler_NewMemo(self->memo_size);
7280 if (self->memo == NULL)
7281 return -1;
7282
7283 self->proto = 0;
7284
7285 return 0;
7286 }
7287
7288
7289 /* Define a proxy object for the Unpickler's internal memo object. This is to
7290 * avoid breaking code like:
7291 * unpickler.memo.clear()
7292 * and
7293 * unpickler.memo = saved_memo
7294 * Is this a good idea? Not really, but we don't want to break code that uses
7295 * it. Note that we don't implement the entire mapping API here. This is
7296 * intentional, as these should be treated as black-box implementation details.
7297 *
7298 * We do, however, have to implement pickling/unpickling support because of
7299 * real-world code like cvs2svn.
7300 */
7301
7302 /*[clinic input]
7303 _pickle.UnpicklerMemoProxy.clear
7304
7305 Remove all items from memo.
7306 [clinic start generated code]*/
7307
7308 static PyObject *
_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject * self)7309 _pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
7310 /*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
7311 {
7312 _Unpickler_MemoCleanup(self->unpickler);
7313 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7314 if (self->unpickler->memo == NULL)
7315 return NULL;
7316 Py_RETURN_NONE;
7317 }
7318
7319 /*[clinic input]
7320 _pickle.UnpicklerMemoProxy.copy
7321
7322 Copy the memo to a new object.
7323 [clinic start generated code]*/
7324
7325 static PyObject *
_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject * self)7326 _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
7327 /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
7328 {
7329 size_t i;
7330 PyObject *new_memo = PyDict_New();
7331 if (new_memo == NULL)
7332 return NULL;
7333
7334 for (i = 0; i < self->unpickler->memo_size; i++) {
7335 int status;
7336 PyObject *key, *value;
7337
7338 value = self->unpickler->memo[i];
7339 if (value == NULL)
7340 continue;
7341
7342 key = PyLong_FromSsize_t(i);
7343 if (key == NULL)
7344 goto error;
7345 status = PyDict_SetItem(new_memo, key, value);
7346 Py_DECREF(key);
7347 if (status < 0)
7348 goto error;
7349 }
7350 return new_memo;
7351
7352 error:
7353 Py_DECREF(new_memo);
7354 return NULL;
7355 }
7356
7357 /*[clinic input]
7358 _pickle.UnpicklerMemoProxy.__reduce__
7359
7360 Implement pickling support.
7361 [clinic start generated code]*/
7362
7363 static PyObject *
_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject * self)7364 _pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
7365 /*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
7366 {
7367 PyObject *reduce_value;
7368 PyObject *constructor_args;
7369 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
7370 if (contents == NULL)
7371 return NULL;
7372
7373 reduce_value = PyTuple_New(2);
7374 if (reduce_value == NULL) {
7375 Py_DECREF(contents);
7376 return NULL;
7377 }
7378 constructor_args = PyTuple_New(1);
7379 if (constructor_args == NULL) {
7380 Py_DECREF(contents);
7381 Py_DECREF(reduce_value);
7382 return NULL;
7383 }
7384 PyTuple_SET_ITEM(constructor_args, 0, contents);
7385 Py_INCREF((PyObject *)&PyDict_Type);
7386 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7387 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7388 return reduce_value;
7389 }
7390
7391 static PyMethodDef unpicklerproxy_methods[] = {
7392 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7393 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7394 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
7395 {NULL, NULL} /* sentinel */
7396 };
7397
7398 static void
UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject * self)7399 UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7400 {
7401 PyObject_GC_UnTrack(self);
7402 Py_XDECREF(self->unpickler);
7403 PyObject_GC_Del((PyObject *)self);
7404 }
7405
7406 static int
UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject * self,visitproc visit,void * arg)7407 UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7408 visitproc visit, void *arg)
7409 {
7410 Py_VISIT(self->unpickler);
7411 return 0;
7412 }
7413
7414 static int
UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject * self)7415 UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7416 {
7417 Py_CLEAR(self->unpickler);
7418 return 0;
7419 }
7420
7421 static PyTypeObject UnpicklerMemoProxyType = {
7422 PyVarObject_HEAD_INIT(NULL, 0)
7423 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7424 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7425 0,
7426 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
7427 0, /* tp_vectorcall_offset */
7428 0, /* tp_getattr */
7429 0, /* tp_setattr */
7430 0, /* tp_as_async */
7431 0, /* tp_repr */
7432 0, /* tp_as_number */
7433 0, /* tp_as_sequence */
7434 0, /* tp_as_mapping */
7435 PyObject_HashNotImplemented, /* tp_hash */
7436 0, /* tp_call */
7437 0, /* tp_str */
7438 PyObject_GenericGetAttr, /* tp_getattro */
7439 PyObject_GenericSetAttr, /* tp_setattro */
7440 0, /* tp_as_buffer */
7441 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7442 0, /* tp_doc */
7443 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7444 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7445 0, /* tp_richcompare */
7446 0, /* tp_weaklistoffset */
7447 0, /* tp_iter */
7448 0, /* tp_iternext */
7449 unpicklerproxy_methods, /* tp_methods */
7450 };
7451
7452 static PyObject *
UnpicklerMemoProxy_New(UnpicklerObject * unpickler)7453 UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7454 {
7455 UnpicklerMemoProxyObject *self;
7456
7457 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7458 &UnpicklerMemoProxyType);
7459 if (self == NULL)
7460 return NULL;
7461 Py_INCREF(unpickler);
7462 self->unpickler = unpickler;
7463 PyObject_GC_Track(self);
7464 return (PyObject *)self;
7465 }
7466
7467 /*****************************************************************************/
7468
7469
7470 static PyObject *
Unpickler_get_memo(UnpicklerObject * self,void * Py_UNUSED (ignored))7471 Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
7472 {
7473 return UnpicklerMemoProxy_New(self);
7474 }
7475
7476 static int
Unpickler_set_memo(UnpicklerObject * self,PyObject * obj,void * Py_UNUSED (ignored))7477 Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
7478 {
7479 PyObject **new_memo;
7480 size_t new_memo_size = 0;
7481
7482 if (obj == NULL) {
7483 PyErr_SetString(PyExc_TypeError,
7484 "attribute deletion is not supported");
7485 return -1;
7486 }
7487
7488 if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
7489 UnpicklerObject *unpickler =
7490 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7491
7492 new_memo_size = unpickler->memo_size;
7493 new_memo = _Unpickler_NewMemo(new_memo_size);
7494 if (new_memo == NULL)
7495 return -1;
7496
7497 for (size_t i = 0; i < new_memo_size; i++) {
7498 Py_XINCREF(unpickler->memo[i]);
7499 new_memo[i] = unpickler->memo[i];
7500 }
7501 }
7502 else if (PyDict_Check(obj)) {
7503 Py_ssize_t i = 0;
7504 PyObject *key, *value;
7505
7506 new_memo_size = PyDict_GET_SIZE(obj);
7507 new_memo = _Unpickler_NewMemo(new_memo_size);
7508 if (new_memo == NULL)
7509 return -1;
7510
7511 while (PyDict_Next(obj, &i, &key, &value)) {
7512 Py_ssize_t idx;
7513 if (!PyLong_Check(key)) {
7514 PyErr_SetString(PyExc_TypeError,
7515 "memo key must be integers");
7516 goto error;
7517 }
7518 idx = PyLong_AsSsize_t(key);
7519 if (idx == -1 && PyErr_Occurred())
7520 goto error;
7521 if (idx < 0) {
7522 PyErr_SetString(PyExc_ValueError,
7523 "memo key must be positive integers.");
7524 goto error;
7525 }
7526 if (_Unpickler_MemoPut(self, idx, value) < 0)
7527 goto error;
7528 }
7529 }
7530 else {
7531 PyErr_Format(PyExc_TypeError,
7532 "'memo' attribute must be an UnpicklerMemoProxy object "
7533 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
7534 return -1;
7535 }
7536
7537 _Unpickler_MemoCleanup(self);
7538 self->memo_size = new_memo_size;
7539 self->memo = new_memo;
7540
7541 return 0;
7542
7543 error:
7544 if (new_memo_size) {
7545 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
7546 Py_XDECREF(new_memo[i]);
7547 }
7548 PyMem_Free(new_memo);
7549 }
7550 return -1;
7551 }
7552
7553 static PyObject *
Unpickler_get_persload(UnpicklerObject * self,void * Py_UNUSED (ignored))7554 Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
7555 {
7556 if (self->pers_func == NULL) {
7557 PyErr_SetString(PyExc_AttributeError, "persistent_load");
7558 return NULL;
7559 }
7560 return reconstruct_method(self->pers_func, self->pers_func_self);
7561 }
7562
7563 static int
Unpickler_set_persload(UnpicklerObject * self,PyObject * value,void * Py_UNUSED (ignored))7564 Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
7565 {
7566 if (value == NULL) {
7567 PyErr_SetString(PyExc_TypeError,
7568 "attribute deletion is not supported");
7569 return -1;
7570 }
7571 if (!PyCallable_Check(value)) {
7572 PyErr_SetString(PyExc_TypeError,
7573 "persistent_load must be a callable taking "
7574 "one argument");
7575 return -1;
7576 }
7577
7578 self->pers_func_self = NULL;
7579 Py_INCREF(value);
7580 Py_XSETREF(self->pers_func, value);
7581
7582 return 0;
7583 }
7584
7585 static PyGetSetDef Unpickler_getsets[] = {
7586 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7587 {"persistent_load", (getter)Unpickler_get_persload,
7588 (setter)Unpickler_set_persload},
7589 {NULL}
7590 };
7591
7592 static PyTypeObject Unpickler_Type = {
7593 PyVarObject_HEAD_INIT(NULL, 0)
7594 "_pickle.Unpickler", /*tp_name*/
7595 sizeof(UnpicklerObject), /*tp_basicsize*/
7596 0, /*tp_itemsize*/
7597 (destructor)Unpickler_dealloc, /*tp_dealloc*/
7598 0, /*tp_vectorcall_offset*/
7599 0, /*tp_getattr*/
7600 0, /*tp_setattr*/
7601 0, /*tp_as_async*/
7602 0, /*tp_repr*/
7603 0, /*tp_as_number*/
7604 0, /*tp_as_sequence*/
7605 0, /*tp_as_mapping*/
7606 0, /*tp_hash*/
7607 0, /*tp_call*/
7608 0, /*tp_str*/
7609 0, /*tp_getattro*/
7610 0, /*tp_setattro*/
7611 0, /*tp_as_buffer*/
7612 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7613 _pickle_Unpickler___init____doc__, /*tp_doc*/
7614 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7615 (inquiry)Unpickler_clear, /*tp_clear*/
7616 0, /*tp_richcompare*/
7617 0, /*tp_weaklistoffset*/
7618 0, /*tp_iter*/
7619 0, /*tp_iternext*/
7620 Unpickler_methods, /*tp_methods*/
7621 0, /*tp_members*/
7622 Unpickler_getsets, /*tp_getset*/
7623 0, /*tp_base*/
7624 0, /*tp_dict*/
7625 0, /*tp_descr_get*/
7626 0, /*tp_descr_set*/
7627 0, /*tp_dictoffset*/
7628 _pickle_Unpickler___init__, /*tp_init*/
7629 PyType_GenericAlloc, /*tp_alloc*/
7630 PyType_GenericNew, /*tp_new*/
7631 PyObject_GC_Del, /*tp_free*/
7632 0, /*tp_is_gc*/
7633 };
7634
7635 /*[clinic input]
7636
7637 _pickle.dump
7638
7639 obj: object
7640 file: object
7641 protocol: object = None
7642 *
7643 fix_imports: bool = True
7644 buffer_callback: object = None
7645
7646 Write a pickled representation of obj to the open file object file.
7647
7648 This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7649 be more efficient.
7650
7651 The optional *protocol* argument tells the pickler to use the given
7652 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7653 protocol is 4. It was introduced in Python 3.4, and is incompatible
7654 with previous versions.
7655
7656 Specifying a negative protocol version selects the highest protocol
7657 version supported. The higher the protocol used, the more recent the
7658 version of Python needed to read the pickle produced.
7659
7660 The *file* argument must have a write() method that accepts a single
7661 bytes argument. It can thus be a file object opened for binary
7662 writing, an io.BytesIO instance, or any other custom object that meets
7663 this interface.
7664
7665 If *fix_imports* is True and protocol is less than 3, pickle will try
7666 to map the new Python 3 names to the old module names used in Python
7667 2, so that the pickle data stream is readable with Python 2.
7668
7669 If *buffer_callback* is None (the default), buffer views are serialized
7670 into *file* as part of the pickle stream. It is an error if
7671 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7672
7673 [clinic start generated code]*/
7674
7675 static PyObject *
_pickle_dump_impl(PyObject * module,PyObject * obj,PyObject * file,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7676 _pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
7677 PyObject *protocol, int fix_imports,
7678 PyObject *buffer_callback)
7679 /*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
7680 {
7681 PicklerObject *pickler = _Pickler_New();
7682
7683 if (pickler == NULL)
7684 return NULL;
7685
7686 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7687 goto error;
7688
7689 if (_Pickler_SetOutputStream(pickler, file) < 0)
7690 goto error;
7691
7692 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7693 goto error;
7694
7695 if (dump(pickler, obj) < 0)
7696 goto error;
7697
7698 if (_Pickler_FlushToFile(pickler) < 0)
7699 goto error;
7700
7701 Py_DECREF(pickler);
7702 Py_RETURN_NONE;
7703
7704 error:
7705 Py_XDECREF(pickler);
7706 return NULL;
7707 }
7708
7709 /*[clinic input]
7710
7711 _pickle.dumps
7712
7713 obj: object
7714 protocol: object = None
7715 *
7716 fix_imports: bool = True
7717 buffer_callback: object = None
7718
7719 Return the pickled representation of the object as a bytes object.
7720
7721 The optional *protocol* argument tells the pickler to use the given
7722 protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7723 protocol is 4. It was introduced in Python 3.4, and is incompatible
7724 with previous versions.
7725
7726 Specifying a negative protocol version selects the highest protocol
7727 version supported. The higher the protocol used, the more recent the
7728 version of Python needed to read the pickle produced.
7729
7730 If *fix_imports* is True and *protocol* is less than 3, pickle will
7731 try to map the new Python 3 names to the old module names used in
7732 Python 2, so that the pickle data stream is readable with Python 2.
7733
7734 If *buffer_callback* is None (the default), buffer views are serialized
7735 into *file* as part of the pickle stream. It is an error if
7736 *buffer_callback* is not None and *protocol* is None or smaller than 5.
7737
7738 [clinic start generated code]*/
7739
7740 static PyObject *
_pickle_dumps_impl(PyObject * module,PyObject * obj,PyObject * protocol,int fix_imports,PyObject * buffer_callback)7741 _pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
7742 int fix_imports, PyObject *buffer_callback)
7743 /*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
7744 {
7745 PyObject *result;
7746 PicklerObject *pickler = _Pickler_New();
7747
7748 if (pickler == NULL)
7749 return NULL;
7750
7751 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
7752 goto error;
7753
7754 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7755 goto error;
7756
7757 if (dump(pickler, obj) < 0)
7758 goto error;
7759
7760 result = _Pickler_GetString(pickler);
7761 Py_DECREF(pickler);
7762 return result;
7763
7764 error:
7765 Py_XDECREF(pickler);
7766 return NULL;
7767 }
7768
7769 /*[clinic input]
7770
7771 _pickle.load
7772
7773 file: object
7774 *
7775 fix_imports: bool = True
7776 encoding: str = 'ASCII'
7777 errors: str = 'strict'
7778 buffers: object(c_default="NULL") = ()
7779
7780 Read and return an object from the pickle data stored in a file.
7781
7782 This is equivalent to ``Unpickler(file).load()``, but may be more
7783 efficient.
7784
7785 The protocol version of the pickle is detected automatically, so no
7786 protocol argument is needed. Bytes past the pickled object's
7787 representation are ignored.
7788
7789 The argument *file* must have two methods, a read() method that takes
7790 an integer argument, and a readline() method that requires no
7791 arguments. Both methods should return bytes. Thus *file* can be a
7792 binary file object opened for reading, an io.BytesIO object, or any
7793 other custom object that meets this interface.
7794
7795 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7796 which are used to control compatibility support for pickle stream
7797 generated by Python 2. If *fix_imports* is True, pickle will try to
7798 map the old Python 2 names to the new names used in Python 3. The
7799 *encoding* and *errors* tell pickle how to decode 8-bit string
7800 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7801 respectively. The *encoding* can be 'bytes' to read these 8-bit
7802 string instances as bytes objects.
7803 [clinic start generated code]*/
7804
7805 static PyObject *
_pickle_load_impl(PyObject * module,PyObject * file,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7806 _pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
7807 const char *encoding, const char *errors,
7808 PyObject *buffers)
7809 /*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
7810 {
7811 PyObject *result;
7812 UnpicklerObject *unpickler = _Unpickler_New();
7813
7814 if (unpickler == NULL)
7815 return NULL;
7816
7817 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7818 goto error;
7819
7820 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7821 goto error;
7822
7823 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7824 goto error;
7825
7826 unpickler->fix_imports = fix_imports;
7827
7828 result = load(unpickler);
7829 Py_DECREF(unpickler);
7830 return result;
7831
7832 error:
7833 Py_XDECREF(unpickler);
7834 return NULL;
7835 }
7836
7837 /*[clinic input]
7838
7839 _pickle.loads
7840
7841 data: object
7842 /
7843 *
7844 fix_imports: bool = True
7845 encoding: str = 'ASCII'
7846 errors: str = 'strict'
7847 buffers: object(c_default="NULL") = ()
7848
7849 Read and return an object from the given pickle data.
7850
7851 The protocol version of the pickle is detected automatically, so no
7852 protocol argument is needed. Bytes past the pickled object's
7853 representation are ignored.
7854
7855 Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
7856 which are used to control compatibility support for pickle stream
7857 generated by Python 2. If *fix_imports* is True, pickle will try to
7858 map the old Python 2 names to the new names used in Python 3. The
7859 *encoding* and *errors* tell pickle how to decode 8-bit string
7860 instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861 respectively. The *encoding* can be 'bytes' to read these 8-bit
7862 string instances as bytes objects.
7863 [clinic start generated code]*/
7864
7865 static PyObject *
_pickle_loads_impl(PyObject * module,PyObject * data,int fix_imports,const char * encoding,const char * errors,PyObject * buffers)7866 _pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
7867 const char *encoding, const char *errors,
7868 PyObject *buffers)
7869 /*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
7870 {
7871 PyObject *result;
7872 UnpicklerObject *unpickler = _Unpickler_New();
7873
7874 if (unpickler == NULL)
7875 return NULL;
7876
7877 if (_Unpickler_SetStringInput(unpickler, data) < 0)
7878 goto error;
7879
7880 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881 goto error;
7882
7883 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884 goto error;
7885
7886 unpickler->fix_imports = fix_imports;
7887
7888 result = load(unpickler);
7889 Py_DECREF(unpickler);
7890 return result;
7891
7892 error:
7893 Py_XDECREF(unpickler);
7894 return NULL;
7895 }
7896
7897 static struct PyMethodDef pickle_methods[] = {
7898 _PICKLE_DUMP_METHODDEF
7899 _PICKLE_DUMPS_METHODDEF
7900 _PICKLE_LOAD_METHODDEF
7901 _PICKLE_LOADS_METHODDEF
7902 {NULL, NULL} /* sentinel */
7903 };
7904
7905 static int
pickle_clear(PyObject * m)7906 pickle_clear(PyObject *m)
7907 {
7908 _Pickle_ClearState(_Pickle_GetState(m));
7909 return 0;
7910 }
7911
7912 static void
pickle_free(PyObject * m)7913 pickle_free(PyObject *m)
7914 {
7915 _Pickle_ClearState(_Pickle_GetState(m));
7916 }
7917
7918 static int
pickle_traverse(PyObject * m,visitproc visit,void * arg)7919 pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920 {
7921 PickleState *st = _Pickle_GetState(m);
7922 Py_VISIT(st->PickleError);
7923 Py_VISIT(st->PicklingError);
7924 Py_VISIT(st->UnpicklingError);
7925 Py_VISIT(st->dispatch_table);
7926 Py_VISIT(st->extension_registry);
7927 Py_VISIT(st->extension_cache);
7928 Py_VISIT(st->inverted_registry);
7929 Py_VISIT(st->name_mapping_2to3);
7930 Py_VISIT(st->import_mapping_2to3);
7931 Py_VISIT(st->name_mapping_3to2);
7932 Py_VISIT(st->import_mapping_3to2);
7933 Py_VISIT(st->codecs_encode);
7934 Py_VISIT(st->getattr);
7935 Py_VISIT(st->partial);
7936 return 0;
7937 }
7938
7939 static struct PyModuleDef _picklemodule = {
7940 PyModuleDef_HEAD_INIT,
7941 "_pickle", /* m_name */
7942 pickle_module_doc, /* m_doc */
7943 sizeof(PickleState), /* m_size */
7944 pickle_methods, /* m_methods */
7945 NULL, /* m_reload */
7946 pickle_traverse, /* m_traverse */
7947 pickle_clear, /* m_clear */
7948 (freefunc)pickle_free /* m_free */
7949 };
7950
7951 PyMODINIT_FUNC
PyInit__pickle(void)7952 PyInit__pickle(void)
7953 {
7954 PyObject *m;
7955 PickleState *st;
7956
7957 m = PyState_FindModule(&_picklemodule);
7958 if (m) {
7959 Py_INCREF(m);
7960 return m;
7961 }
7962
7963 if (PyType_Ready(&Pdata_Type) < 0)
7964 return NULL;
7965 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7966 return NULL;
7967 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7968 return NULL;
7969
7970 /* Create the module and add the functions. */
7971 m = PyModule_Create(&_picklemodule);
7972 if (m == NULL)
7973 return NULL;
7974
7975 /* Add types */
7976 if (PyModule_AddType(m, &Pickler_Type) < 0) {
7977 return NULL;
7978 }
7979 if (PyModule_AddType(m, &Unpickler_Type) < 0) {
7980 return NULL;
7981 }
7982 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
7983 return NULL;
7984 }
7985
7986 st = _Pickle_GetState(m);
7987
7988 /* Initialize the exceptions. */
7989 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7990 if (st->PickleError == NULL)
7991 return NULL;
7992 st->PicklingError = \
7993 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7994 if (st->PicklingError == NULL)
7995 return NULL;
7996 st->UnpicklingError = \
7997 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7998 if (st->UnpicklingError == NULL)
7999 return NULL;
8000
8001 if (PyModule_AddObjectRef(m, "PickleError", st->PickleError) < 0) {
8002 return NULL;
8003 }
8004 if (PyModule_AddObjectRef(m, "PicklingError", st->PicklingError) < 0) {
8005 return NULL;
8006 }
8007 if (PyModule_AddObjectRef(m, "UnpicklingError", st->UnpicklingError) < 0) {
8008 return NULL;
8009 }
8010 if (_Pickle_InitState(st) < 0)
8011 return NULL;
8012
8013 return m;
8014 }
8015