blob: e7f30f226101f52f518b2ced0f36010a6242711c [file] [log] [blame]
Haibo Huangd8830302020-03-03 10:09:46 -08001"""Create portable serialized representations of Python objects.
2
3See module copyreg for a mechanism for registering custom picklers.
4See module pickletools source for extensive comments.
5
6Classes:
7
8 Pickler
9 Unpickler
10
11Functions:
12
13 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
Haibo Huang5eba2b42021-01-22 11:22:02 -080016 loads(bytes) -> object
Haibo Huangd8830302020-03-03 10:09:46 -080017
18Misc variables:
19
20 __version__
21 format_version
22 compatible_formats
23
24"""
25
26from types import FunctionType
27from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
29from itertools import islice
30from functools import partial
31import sys
32from sys import maxsize
33from struct import pack, unpack
34import re
35import io
36import codecs
37import _compat_pickle
38
39__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
40 "Unpickler", "dump", "dumps", "load", "loads"]
41
42try:
43 from _pickle import PickleBuffer
44 __all__.append("PickleBuffer")
45 _HAVE_PICKLE_BUFFER = True
46except ImportError:
47 _HAVE_PICKLE_BUFFER = False
48
49
50# Shortcut for use in isinstance testing
51bytes_types = (bytes, bytearray)
52
53# These are purely informational; no code uses these.
54format_version = "4.0" # File format version we write
55compatible_formats = ["1.0", # Original protocol 0
56 "1.1", # Protocol 0 with INST added
57 "1.2", # Original protocol 1
58 "1.3", # Protocol 1 with BINFLOAT added
59 "2.0", # Protocol 2
60 "3.0", # Protocol 3
61 "4.0", # Protocol 4
62 "5.0", # Protocol 5
63 ] # Old format versions we can read
64
65# This is the highest protocol number we know how to read.
66HIGHEST_PROTOCOL = 5
67
68# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
69# Only bump this if the oldest still supported version of Python already
70# includes it.
71DEFAULT_PROTOCOL = 4
72
73class PickleError(Exception):
74 """A common base class for the other pickling exceptions."""
75 pass
76
77class PicklingError(PickleError):
78 """This exception is raised when an unpicklable object is passed to the
79 dump() method.
80
81 """
82 pass
83
84class UnpicklingError(PickleError):
85 """This exception is raised when there is a problem unpickling an object,
86 such as a security violation.
87
88 Note that other exceptions may also be raised during unpickling, including
89 (but not necessarily limited to) AttributeError, EOFError, ImportError,
90 and IndexError.
91
92 """
93 pass
94
95# An instance of _Stop is raised by Unpickler.load_stop() in response to
96# the STOP opcode, passing the object that is the result of unpickling.
97class _Stop(Exception):
98 def __init__(self, value):
99 self.value = value
100
101# Jython has PyStringMap; it's a dict subclass with string keys
102try:
103 from org.python.core import PyStringMap
104except ImportError:
105 PyStringMap = None
106
107# Pickle opcodes. See pickletools.py for extensive docs. The listing
108# here is in kind-of alphabetical order of 1-character pickle code.
109# pickletools groups them by purpose.
110
111MARK = b'(' # push special markobject on stack
112STOP = b'.' # every pickle ends with STOP
113POP = b'0' # discard topmost stack item
114POP_MARK = b'1' # discard stack top through topmost markobject
115DUP = b'2' # duplicate top stack item
116FLOAT = b'F' # push float object; decimal string argument
117INT = b'I' # push integer or bool; decimal string argument
118BININT = b'J' # push four-byte signed int
119BININT1 = b'K' # push 1-byte unsigned int
120LONG = b'L' # push long; decimal string argument
121BININT2 = b'M' # push 2-byte unsigned int
122NONE = b'N' # push None
123PERSID = b'P' # push persistent object; id is taken from string arg
124BINPERSID = b'Q' # " " " ; " " " " stack
125REDUCE = b'R' # apply callable to argtuple, both on stack
126STRING = b'S' # push string; NL-terminated string argument
127BINSTRING = b'T' # push string; counted binary string argument
128SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
129UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
130BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
131APPEND = b'a' # append stack top to list below it
132BUILD = b'b' # call __setstate__ or __dict__.update()
133GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
134DICT = b'd' # build a dict from stack items
135EMPTY_DICT = b'}' # push empty dict
136APPENDS = b'e' # extend list on stack by topmost stack slice
137GET = b'g' # push item from memo on stack; index is string arg
138BINGET = b'h' # " " " " " " ; " " 1-byte arg
139INST = b'i' # build & push class instance
140LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
141LIST = b'l' # build list from topmost stack items
142EMPTY_LIST = b']' # push empty list
143OBJ = b'o' # build & push class instance
144PUT = b'p' # store stack top in memo; index is string arg
145BINPUT = b'q' # " " " " " ; " " 1-byte arg
146LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
147SETITEM = b's' # add key+value pair to dict
148TUPLE = b't' # build tuple from topmost stack items
149EMPTY_TUPLE = b')' # push empty tuple
150SETITEMS = b'u' # modify dict by adding topmost key+value pairs
151BINFLOAT = b'G' # push float; arg is 8-byte float encoding
152
153TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
154FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
155
156# Protocol 2
157
158PROTO = b'\x80' # identify pickle protocol
159NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
160EXT1 = b'\x82' # push object from extension registry; 1-byte index
161EXT2 = b'\x83' # ditto, but 2-byte index
162EXT4 = b'\x84' # ditto, but 4-byte index
163TUPLE1 = b'\x85' # build 1-tuple from stack top
164TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
165TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
166NEWTRUE = b'\x88' # push True
167NEWFALSE = b'\x89' # push False
168LONG1 = b'\x8a' # push long from < 256 bytes
169LONG4 = b'\x8b' # push really big long
170
171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
172
173# Protocol 3 (Python 3.x)
174
175BINBYTES = b'B' # push bytes; counted binary string argument
176SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
177
178# Protocol 4
179
180SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
181BINUNICODE8 = b'\x8d' # push very long string
182BINBYTES8 = b'\x8e' # push very long bytes string
183EMPTY_SET = b'\x8f' # push empty set on the stack
184ADDITEMS = b'\x90' # modify set by adding topmost stack items
185FROZENSET = b'\x91' # build frozenset from topmost stack items
186NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
187STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
188MEMOIZE = b'\x94' # store top of the stack in memo
189FRAME = b'\x95' # indicate the beginning of a new frame
190
191# Protocol 5
192
193BYTEARRAY8 = b'\x96' # push bytearray
194NEXT_BUFFER = b'\x97' # push next out-of-band buffer
195READONLY_BUFFER = b'\x98' # make top of stack readonly
196
197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
198
199
200class _Framer:
201
202 _FRAME_SIZE_MIN = 4
203 _FRAME_SIZE_TARGET = 64 * 1024
204
205 def __init__(self, file_write):
206 self.file_write = file_write
207 self.current_frame = None
208
209 def start_framing(self):
210 self.current_frame = io.BytesIO()
211
212 def end_framing(self):
213 if self.current_frame and self.current_frame.tell() > 0:
214 self.commit_frame(force=True)
215 self.current_frame = None
216
217 def commit_frame(self, force=False):
218 if self.current_frame:
219 f = self.current_frame
220 if f.tell() >= self._FRAME_SIZE_TARGET or force:
221 data = f.getbuffer()
222 write = self.file_write
223 if len(data) >= self._FRAME_SIZE_MIN:
224 # Issue a single call to the write method of the underlying
225 # file object for the frame opcode with the size of the
226 # frame. The concatenation is expected to be less expensive
227 # than issuing an additional call to write.
228 write(FRAME + pack("<Q", len(data)))
229
230 # Issue a separate call to write to append the frame
231 # contents without concatenation to the above to avoid a
232 # memory copy.
233 write(data)
234
235 # Start the new frame with a new io.BytesIO instance so that
236 # the file object can have delayed access to the previous frame
237 # contents via an unreleased memoryview of the previous
238 # io.BytesIO instance.
239 self.current_frame = io.BytesIO()
240
241 def write(self, data):
242 if self.current_frame:
243 return self.current_frame.write(data)
244 else:
245 return self.file_write(data)
246
247 def write_large_bytes(self, header, payload):
248 write = self.file_write
249 if self.current_frame:
250 # Terminate the current frame and flush it to the file.
251 self.commit_frame(force=True)
252
253 # Perform direct write of the header and payload of the large binary
254 # object. Be careful not to concatenate the header and the payload
255 # prior to calling 'write' as we do not want to allocate a large
256 # temporary bytes object.
257 # We intentionally do not insert a protocol 4 frame opcode to make
258 # it possible to optimize file.read calls in the loader.
259 write(header)
260 write(payload)
261
262
263class _Unframer:
264
265 def __init__(self, file_read, file_readline, file_tell=None):
266 self.file_read = file_read
267 self.file_readline = file_readline
268 self.current_frame = None
269
270 def readinto(self, buf):
271 if self.current_frame:
272 n = self.current_frame.readinto(buf)
273 if n == 0 and len(buf) != 0:
274 self.current_frame = None
275 n = len(buf)
276 buf[:] = self.file_read(n)
277 return n
278 if n < len(buf):
279 raise UnpicklingError(
280 "pickle exhausted before end of frame")
281 return n
282 else:
283 n = len(buf)
284 buf[:] = self.file_read(n)
285 return n
286
287 def read(self, n):
288 if self.current_frame:
289 data = self.current_frame.read(n)
290 if not data and n != 0:
291 self.current_frame = None
292 return self.file_read(n)
293 if len(data) < n:
294 raise UnpicklingError(
295 "pickle exhausted before end of frame")
296 return data
297 else:
298 return self.file_read(n)
299
300 def readline(self):
301 if self.current_frame:
302 data = self.current_frame.readline()
303 if not data:
304 self.current_frame = None
305 return self.file_readline()
306 if data[-1] != b'\n'[0]:
307 raise UnpicklingError(
308 "pickle exhausted before end of frame")
309 return data
310 else:
311 return self.file_readline()
312
313 def load_frame(self, frame_size):
314 if self.current_frame and self.current_frame.read() != b'':
315 raise UnpicklingError(
316 "beginning of a new frame before end of current frame")
317 self.current_frame = io.BytesIO(self.file_read(frame_size))
318
319
320# Tools used for pickling.
321
322def _getattribute(obj, name):
323 for subpath in name.split('.'):
324 if subpath == '<locals>':
325 raise AttributeError("Can't get local attribute {!r} on {!r}"
326 .format(name, obj))
327 try:
328 parent = obj
329 obj = getattr(obj, subpath)
330 except AttributeError:
331 raise AttributeError("Can't get attribute {!r} on {!r}"
332 .format(name, obj)) from None
333 return obj, parent
334
335def whichmodule(obj, name):
336 """Find the module an object belong to."""
337 module_name = getattr(obj, '__module__', None)
338 if module_name is not None:
339 return module_name
340 # Protect the iteration by using a list copy of sys.modules against dynamic
341 # modules that trigger imports of other modules upon calls to getattr.
Haibo Huangf5f93a72020-10-19 15:43:42 -0700342 for module_name, module in sys.modules.copy().items():
Haibo Huang5eba2b42021-01-22 11:22:02 -0800343 if (module_name == '__main__'
344 or module_name == '__mp_main__' # bpo-42406
345 or module is None):
Haibo Huangd8830302020-03-03 10:09:46 -0800346 continue
347 try:
348 if _getattribute(module, name)[0] is obj:
349 return module_name
350 except AttributeError:
351 pass
352 return '__main__'
353
354def encode_long(x):
355 r"""Encode a long to a two's complement little-endian binary string.
356 Note that 0 is a special case, returning an empty string, to save a
357 byte in the LONG1 pickling context.
358
359 >>> encode_long(0)
360 b''
361 >>> encode_long(255)
362 b'\xff\x00'
363 >>> encode_long(32767)
364 b'\xff\x7f'
365 >>> encode_long(-256)
366 b'\x00\xff'
367 >>> encode_long(-32768)
368 b'\x00\x80'
369 >>> encode_long(-128)
370 b'\x80'
371 >>> encode_long(127)
372 b'\x7f'
373 >>>
374 """
375 if x == 0:
376 return b''
377 nbytes = (x.bit_length() >> 3) + 1
378 result = x.to_bytes(nbytes, byteorder='little', signed=True)
379 if x < 0 and nbytes > 1:
380 if result[-1] == 0xff and (result[-2] & 0x80) != 0:
381 result = result[:-1]
382 return result
383
384def decode_long(data):
385 r"""Decode a long from a two's complement little-endian binary string.
386
387 >>> decode_long(b'')
388 0
389 >>> decode_long(b"\xff\x00")
390 255
391 >>> decode_long(b"\xff\x7f")
392 32767
393 >>> decode_long(b"\x00\xff")
394 -256
395 >>> decode_long(b"\x00\x80")
396 -32768
397 >>> decode_long(b"\x80")
398 -128
399 >>> decode_long(b"\x7f")
400 127
401 """
402 return int.from_bytes(data, byteorder='little', signed=True)
403
404
405# Pickling machinery
406
407class _Pickler:
408
409 def __init__(self, file, protocol=None, *, fix_imports=True,
410 buffer_callback=None):
411 """This takes a binary file for writing a pickle data stream.
412
413 The optional *protocol* argument tells the pickler to use the
Haibo Huang5980f852020-03-05 12:22:08 -0800414 given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
415 The default protocol is 4. It was introduced in Python 3.4, and
416 is incompatible with previous versions.
Haibo Huangd8830302020-03-03 10:09:46 -0800417
418 Specifying a negative protocol version selects the highest
419 protocol version supported. The higher the protocol used, the
420 more recent the version of Python needed to read the pickle
421 produced.
422
423 The *file* argument must have a write() method that accepts a
424 single bytes argument. It can thus be a file object opened for
425 binary writing, an io.BytesIO instance, or any other custom
426 object that meets this interface.
427
428 If *fix_imports* is True and *protocol* is less than 3, pickle
429 will try to map the new Python 3 names to the old module names
430 used in Python 2, so that the pickle data stream is readable
431 with Python 2.
432
433 If *buffer_callback* is None (the default), buffer views are
434 serialized into *file* as part of the pickle stream.
435
436 If *buffer_callback* is not None, then it can be called any number
437 of times with a buffer view. If the callback returns a false value
438 (such as None), the given buffer is out-of-band; otherwise the
439 buffer is serialized in-band, i.e. inside the pickle stream.
440
441 It is an error if *buffer_callback* is not None and *protocol*
442 is None or smaller than 5.
443 """
444 if protocol is None:
445 protocol = DEFAULT_PROTOCOL
446 if protocol < 0:
447 protocol = HIGHEST_PROTOCOL
448 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
449 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
450 if buffer_callback is not None and protocol < 5:
451 raise ValueError("buffer_callback needs protocol >= 5")
452 self._buffer_callback = buffer_callback
453 try:
454 self._file_write = file.write
455 except AttributeError:
456 raise TypeError("file must have a 'write' attribute")
457 self.framer = _Framer(self._file_write)
458 self.write = self.framer.write
459 self._write_large_bytes = self.framer.write_large_bytes
460 self.memo = {}
461 self.proto = int(protocol)
462 self.bin = protocol >= 1
463 self.fast = 0
464 self.fix_imports = fix_imports and protocol < 3
465
466 def clear_memo(self):
467 """Clears the pickler's "memo".
468
469 The memo is the data structure that remembers which objects the
470 pickler has already seen, so that shared or recursive objects
471 are pickled by reference and not by value. This method is
472 useful when re-using picklers.
473 """
474 self.memo.clear()
475
476 def dump(self, obj):
477 """Write a pickled representation of obj to the open file."""
478 # Check whether Pickler was initialized correctly. This is
479 # only needed to mimic the behavior of _pickle.Pickler.dump().
480 if not hasattr(self, "_file_write"):
481 raise PicklingError("Pickler.__init__() was not called by "
482 "%s.__init__()" % (self.__class__.__name__,))
483 if self.proto >= 2:
484 self.write(PROTO + pack("<B", self.proto))
485 if self.proto >= 4:
486 self.framer.start_framing()
487 self.save(obj)
488 self.write(STOP)
489 self.framer.end_framing()
490
491 def memoize(self, obj):
492 """Store an object in the memo."""
493
494 # The Pickler memo is a dictionary mapping object ids to 2-tuples
495 # that contain the Unpickler memo key and the object being memoized.
496 # The memo key is written to the pickle and will become
497 # the key in the Unpickler's memo. The object is stored in the
498 # Pickler memo so that transient objects are kept alive during
499 # pickling.
500
501 # The use of the Unpickler memo length as the memo key is just a
502 # convention. The only requirement is that the memo values be unique.
503 # But there appears no advantage to any other scheme, and this
504 # scheme allows the Unpickler memo to be implemented as a plain (but
505 # growable) array, indexed by memo key.
506 if self.fast:
507 return
508 assert id(obj) not in self.memo
509 idx = len(self.memo)
510 self.write(self.put(idx))
511 self.memo[id(obj)] = idx, obj
512
513 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
514 def put(self, idx):
515 if self.proto >= 4:
516 return MEMOIZE
517 elif self.bin:
518 if idx < 256:
519 return BINPUT + pack("<B", idx)
520 else:
521 return LONG_BINPUT + pack("<I", idx)
522 else:
523 return PUT + repr(idx).encode("ascii") + b'\n'
524
525 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
526 def get(self, i):
527 if self.bin:
528 if i < 256:
529 return BINGET + pack("<B", i)
530 else:
531 return LONG_BINGET + pack("<I", i)
532
533 return GET + repr(i).encode("ascii") + b'\n'
534
535 def save(self, obj, save_persistent_id=True):
536 self.framer.commit_frame()
537
538 # Check for persistent id (defined by a subclass)
539 pid = self.persistent_id(obj)
540 if pid is not None and save_persistent_id:
541 self.save_pers(pid)
542 return
543
544 # Check the memo
545 x = self.memo.get(id(obj))
546 if x is not None:
547 self.write(self.get(x[0]))
548 return
549
550 rv = NotImplemented
551 reduce = getattr(self, "reducer_override", None)
552 if reduce is not None:
553 rv = reduce(obj)
554
555 if rv is NotImplemented:
556 # Check the type dispatch table
557 t = type(obj)
558 f = self.dispatch.get(t)
559 if f is not None:
560 f(self, obj) # Call unbound method with explicit self
561 return
562
563 # Check private dispatch table if any, or else
564 # copyreg.dispatch_table
565 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
566 if reduce is not None:
567 rv = reduce(obj)
568 else:
569 # Check for a class with a custom metaclass; treat as regular
570 # class
571 if issubclass(t, type):
572 self.save_global(obj)
573 return
574
575 # Check for a __reduce_ex__ method, fall back to __reduce__
576 reduce = getattr(obj, "__reduce_ex__", None)
577 if reduce is not None:
578 rv = reduce(self.proto)
579 else:
580 reduce = getattr(obj, "__reduce__", None)
581 if reduce is not None:
582 rv = reduce()
583 else:
584 raise PicklingError("Can't pickle %r object: %r" %
585 (t.__name__, obj))
586
587 # Check for string returned by reduce(), meaning "save as global"
588 if isinstance(rv, str):
589 self.save_global(obj, rv)
590 return
591
592 # Assert that reduce() returned a tuple
593 if not isinstance(rv, tuple):
594 raise PicklingError("%s must return string or tuple" % reduce)
595
596 # Assert that it returned an appropriately sized tuple
597 l = len(rv)
598 if not (2 <= l <= 6):
599 raise PicklingError("Tuple returned by %s must have "
600 "two to six elements" % reduce)
601
602 # Save the reduce() output and finally memoize the object
603 self.save_reduce(obj=obj, *rv)
604
605 def persistent_id(self, obj):
606 # This exists so a subclass can override it
607 return None
608
609 def save_pers(self, pid):
610 # Save a persistent id reference
611 if self.bin:
612 self.save(pid, save_persistent_id=False)
613 self.write(BINPERSID)
614 else:
615 try:
616 self.write(PERSID + str(pid).encode("ascii") + b'\n')
617 except UnicodeEncodeError:
618 raise PicklingError(
619 "persistent IDs in protocol 0 must be ASCII strings")
620
621 def save_reduce(self, func, args, state=None, listitems=None,
622 dictitems=None, state_setter=None, obj=None):
623 # This API is called by some subclasses
624
625 if not isinstance(args, tuple):
626 raise PicklingError("args from save_reduce() must be a tuple")
627 if not callable(func):
628 raise PicklingError("func from save_reduce() must be callable")
629
630 save = self.save
631 write = self.write
632
633 func_name = getattr(func, "__name__", "")
634 if self.proto >= 2 and func_name == "__newobj_ex__":
635 cls, args, kwargs = args
636 if not hasattr(cls, "__new__"):
637 raise PicklingError("args[0] from {} args has no __new__"
638 .format(func_name))
639 if obj is not None and cls is not obj.__class__:
640 raise PicklingError("args[0] from {} args has the wrong class"
641 .format(func_name))
642 if self.proto >= 4:
643 save(cls)
644 save(args)
645 save(kwargs)
646 write(NEWOBJ_EX)
647 else:
648 func = partial(cls.__new__, cls, *args, **kwargs)
649 save(func)
650 save(())
651 write(REDUCE)
652 elif self.proto >= 2 and func_name == "__newobj__":
653 # A __reduce__ implementation can direct protocol 2 or newer to
654 # use the more efficient NEWOBJ opcode, while still
655 # allowing protocol 0 and 1 to work normally. For this to
656 # work, the function returned by __reduce__ should be
657 # called __newobj__, and its first argument should be a
658 # class. The implementation for __newobj__
659 # should be as follows, although pickle has no way to
660 # verify this:
661 #
662 # def __newobj__(cls, *args):
663 # return cls.__new__(cls, *args)
664 #
665 # Protocols 0 and 1 will pickle a reference to __newobj__,
666 # while protocol 2 (and above) will pickle a reference to
667 # cls, the remaining args tuple, and the NEWOBJ code,
668 # which calls cls.__new__(cls, *args) at unpickling time
669 # (see load_newobj below). If __reduce__ returns a
670 # three-tuple, the state from the third tuple item will be
671 # pickled regardless of the protocol, calling __setstate__
672 # at unpickling time (see load_build below).
673 #
674 # Note that no standard __newobj__ implementation exists;
675 # you have to provide your own. This is to enforce
676 # compatibility with Python 2.2 (pickles written using
677 # protocol 0 or 1 in Python 2.3 should be unpicklable by
678 # Python 2.2).
679 cls = args[0]
680 if not hasattr(cls, "__new__"):
681 raise PicklingError(
682 "args[0] from __newobj__ args has no __new__")
683 if obj is not None and cls is not obj.__class__:
684 raise PicklingError(
685 "args[0] from __newobj__ args has the wrong class")
686 args = args[1:]
687 save(cls)
688 save(args)
689 write(NEWOBJ)
690 else:
691 save(func)
692 save(args)
693 write(REDUCE)
694
695 if obj is not None:
696 # If the object is already in the memo, this means it is
697 # recursive. In this case, throw away everything we put on the
698 # stack, and fetch the object back from the memo.
699 if id(obj) in self.memo:
700 write(POP + self.get(self.memo[id(obj)][0]))
701 else:
702 self.memoize(obj)
703
704 # More new special cases (that work with older protocols as
705 # well): when __reduce__ returns a tuple with 4 or 5 items,
706 # the 4th and 5th item should be iterators that provide list
707 # items and dict items (as (key, value) tuples), or None.
708
709 if listitems is not None:
710 self._batch_appends(listitems)
711
712 if dictitems is not None:
713 self._batch_setitems(dictitems)
714
715 if state is not None:
716 if state_setter is None:
717 save(state)
718 write(BUILD)
719 else:
720 # If a state_setter is specified, call it instead of load_build
721 # to update obj's with its previous state.
722 # First, push state_setter and its tuple of expected arguments
723 # (obj, state) onto the stack.
724 save(state_setter)
725 save(obj) # simple BINGET opcode as obj is already memoized.
726 save(state)
727 write(TUPLE2)
728 # Trigger a state_setter(obj, state) function call.
729 write(REDUCE)
730 # The purpose of state_setter is to carry-out an
731 # inplace modification of obj. We do not care about what the
732 # method might return, so its output is eventually removed from
733 # the stack.
734 write(POP)
735
736 # Methods below this point are dispatched through the dispatch table
737
738 dispatch = {}
739
740 def save_none(self, obj):
741 self.write(NONE)
742 dispatch[type(None)] = save_none
743
744 def save_bool(self, obj):
745 if self.proto >= 2:
746 self.write(NEWTRUE if obj else NEWFALSE)
747 else:
748 self.write(TRUE if obj else FALSE)
749 dispatch[bool] = save_bool
750
751 def save_long(self, obj):
752 if self.bin:
753 # If the int is small enough to fit in a signed 4-byte 2's-comp
754 # format, we can store it more efficiently than the general
755 # case.
756 # First one- and two-byte unsigned ints:
757 if obj >= 0:
758 if obj <= 0xff:
759 self.write(BININT1 + pack("<B", obj))
760 return
761 if obj <= 0xffff:
762 self.write(BININT2 + pack("<H", obj))
763 return
764 # Next check for 4-byte signed ints:
765 if -0x80000000 <= obj <= 0x7fffffff:
766 self.write(BININT + pack("<i", obj))
767 return
768 if self.proto >= 2:
769 encoded = encode_long(obj)
770 n = len(encoded)
771 if n < 256:
772 self.write(LONG1 + pack("<B", n) + encoded)
773 else:
774 self.write(LONG4 + pack("<i", n) + encoded)
775 return
776 if -0x80000000 <= obj <= 0x7fffffff:
777 self.write(INT + repr(obj).encode("ascii") + b'\n')
778 else:
779 self.write(LONG + repr(obj).encode("ascii") + b'L\n')
780 dispatch[int] = save_long
781
782 def save_float(self, obj):
783 if self.bin:
784 self.write(BINFLOAT + pack('>d', obj))
785 else:
786 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
787 dispatch[float] = save_float
788
789 def save_bytes(self, obj):
790 if self.proto < 3:
791 if not obj: # bytes object is empty
792 self.save_reduce(bytes, (), obj=obj)
793 else:
794 self.save_reduce(codecs.encode,
795 (str(obj, 'latin1'), 'latin1'), obj=obj)
796 return
797 n = len(obj)
798 if n <= 0xff:
799 self.write(SHORT_BINBYTES + pack("<B", n) + obj)
800 elif n > 0xffffffff and self.proto >= 4:
801 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
802 elif n >= self.framer._FRAME_SIZE_TARGET:
803 self._write_large_bytes(BINBYTES + pack("<I", n), obj)
804 else:
805 self.write(BINBYTES + pack("<I", n) + obj)
806 self.memoize(obj)
807 dispatch[bytes] = save_bytes
808
809 def save_bytearray(self, obj):
810 if self.proto < 5:
811 if not obj: # bytearray is empty
812 self.save_reduce(bytearray, (), obj=obj)
813 else:
814 self.save_reduce(bytearray, (bytes(obj),), obj=obj)
815 return
816 n = len(obj)
817 if n >= self.framer._FRAME_SIZE_TARGET:
818 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
819 else:
820 self.write(BYTEARRAY8 + pack("<Q", n) + obj)
Yi Kong71199322022-08-30 15:53:45 +0800821 self.memoize(obj)
Haibo Huangd8830302020-03-03 10:09:46 -0800822 dispatch[bytearray] = save_bytearray
823
824 if _HAVE_PICKLE_BUFFER:
825 def save_picklebuffer(self, obj):
826 if self.proto < 5:
827 raise PicklingError("PickleBuffer can only pickled with "
828 "protocol >= 5")
829 with obj.raw() as m:
830 if not m.contiguous:
831 raise PicklingError("PickleBuffer can not be pickled when "
832 "pointing to a non-contiguous buffer")
833 in_band = True
834 if self._buffer_callback is not None:
835 in_band = bool(self._buffer_callback(obj))
836 if in_band:
837 # Write data in-band
838 # XXX The C implementation avoids a copy here
839 if m.readonly:
840 self.save_bytes(m.tobytes())
841 else:
842 self.save_bytearray(m.tobytes())
843 else:
844 # Write data out-of-band
845 self.write(NEXT_BUFFER)
846 if m.readonly:
847 self.write(READONLY_BUFFER)
848
849 dispatch[PickleBuffer] = save_picklebuffer
850
851 def save_str(self, obj):
852 if self.bin:
853 encoded = obj.encode('utf-8', 'surrogatepass')
854 n = len(encoded)
855 if n <= 0xff and self.proto >= 4:
856 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
857 elif n > 0xffffffff and self.proto >= 4:
858 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
859 elif n >= self.framer._FRAME_SIZE_TARGET:
860 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
861 else:
862 self.write(BINUNICODE + pack("<I", n) + encoded)
863 else:
864 obj = obj.replace("\\", "\\u005c")
865 obj = obj.replace("\0", "\\u0000")
866 obj = obj.replace("\n", "\\u000a")
867 obj = obj.replace("\r", "\\u000d")
868 obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
869 self.write(UNICODE + obj.encode('raw-unicode-escape') +
870 b'\n')
871 self.memoize(obj)
872 dispatch[str] = save_str
873
874 def save_tuple(self, obj):
875 if not obj: # tuple is empty
876 if self.bin:
877 self.write(EMPTY_TUPLE)
878 else:
879 self.write(MARK + TUPLE)
880 return
881
882 n = len(obj)
883 save = self.save
884 memo = self.memo
885 if n <= 3 and self.proto >= 2:
886 for element in obj:
887 save(element)
888 # Subtle. Same as in the big comment below.
889 if id(obj) in memo:
890 get = self.get(memo[id(obj)][0])
891 self.write(POP * n + get)
892 else:
893 self.write(_tuplesize2code[n])
894 self.memoize(obj)
895 return
896
897 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
898 # has more than 3 elements.
899 write = self.write
900 write(MARK)
901 for element in obj:
902 save(element)
903
904 if id(obj) in memo:
905 # Subtle. d was not in memo when we entered save_tuple(), so
906 # the process of saving the tuple's elements must have saved
907 # the tuple itself: the tuple is recursive. The proper action
908 # now is to throw away everything we put on the stack, and
909 # simply GET the tuple (it's already constructed). This check
910 # could have been done in the "for element" loop instead, but
911 # recursive tuples are a rare thing.
912 get = self.get(memo[id(obj)][0])
913 if self.bin:
914 write(POP_MARK + get)
915 else: # proto 0 -- POP_MARK not available
916 write(POP * (n+1) + get)
917 return
918
919 # No recursion.
920 write(TUPLE)
921 self.memoize(obj)
922
923 dispatch[tuple] = save_tuple
924
925 def save_list(self, obj):
926 if self.bin:
927 self.write(EMPTY_LIST)
928 else: # proto 0 -- can't use EMPTY_LIST
929 self.write(MARK + LIST)
930
931 self.memoize(obj)
932 self._batch_appends(obj)
933
934 dispatch[list] = save_list
935
936 _BATCHSIZE = 1000
937
938 def _batch_appends(self, items):
939 # Helper to batch up APPENDS sequences
940 save = self.save
941 write = self.write
942
943 if not self.bin:
944 for x in items:
945 save(x)
946 write(APPEND)
947 return
948
949 it = iter(items)
950 while True:
951 tmp = list(islice(it, self._BATCHSIZE))
952 n = len(tmp)
953 if n > 1:
954 write(MARK)
955 for x in tmp:
956 save(x)
957 write(APPENDS)
958 elif n:
959 save(tmp[0])
960 write(APPEND)
961 # else tmp is empty, and we're done
962 if n < self._BATCHSIZE:
963 return
964
965 def save_dict(self, obj):
966 if self.bin:
967 self.write(EMPTY_DICT)
968 else: # proto 0 -- can't use EMPTY_DICT
969 self.write(MARK + DICT)
970
971 self.memoize(obj)
972 self._batch_setitems(obj.items())
973
974 dispatch[dict] = save_dict
975 if PyStringMap is not None:
976 dispatch[PyStringMap] = save_dict
977
978 def _batch_setitems(self, items):
979 # Helper to batch up SETITEMS sequences; proto >= 1 only
980 save = self.save
981 write = self.write
982
983 if not self.bin:
984 for k, v in items:
985 save(k)
986 save(v)
987 write(SETITEM)
988 return
989
990 it = iter(items)
991 while True:
992 tmp = list(islice(it, self._BATCHSIZE))
993 n = len(tmp)
994 if n > 1:
995 write(MARK)
996 for k, v in tmp:
997 save(k)
998 save(v)
999 write(SETITEMS)
1000 elif n:
1001 k, v = tmp[0]
1002 save(k)
1003 save(v)
1004 write(SETITEM)
1005 # else tmp is empty, and we're done
1006 if n < self._BATCHSIZE:
1007 return
1008
1009 def save_set(self, obj):
1010 save = self.save
1011 write = self.write
1012
1013 if self.proto < 4:
1014 self.save_reduce(set, (list(obj),), obj=obj)
1015 return
1016
1017 write(EMPTY_SET)
1018 self.memoize(obj)
1019
1020 it = iter(obj)
1021 while True:
1022 batch = list(islice(it, self._BATCHSIZE))
1023 n = len(batch)
1024 if n > 0:
1025 write(MARK)
1026 for item in batch:
1027 save(item)
1028 write(ADDITEMS)
1029 if n < self._BATCHSIZE:
1030 return
1031 dispatch[set] = save_set
1032
1033 def save_frozenset(self, obj):
1034 save = self.save
1035 write = self.write
1036
1037 if self.proto < 4:
1038 self.save_reduce(frozenset, (list(obj),), obj=obj)
1039 return
1040
1041 write(MARK)
1042 for item in obj:
1043 save(item)
1044
1045 if id(obj) in self.memo:
1046 # If the object is already in the memo, this means it is
1047 # recursive. In this case, throw away everything we put on the
1048 # stack, and fetch the object back from the memo.
1049 write(POP_MARK + self.get(self.memo[id(obj)][0]))
1050 return
1051
1052 write(FROZENSET)
1053 self.memoize(obj)
1054 dispatch[frozenset] = save_frozenset
1055
1056 def save_global(self, obj, name=None):
1057 write = self.write
1058 memo = self.memo
1059
1060 if name is None:
1061 name = getattr(obj, '__qualname__', None)
1062 if name is None:
1063 name = obj.__name__
1064
1065 module_name = whichmodule(obj, name)
1066 try:
1067 __import__(module_name, level=0)
1068 module = sys.modules[module_name]
1069 obj2, parent = _getattribute(module, name)
1070 except (ImportError, KeyError, AttributeError):
1071 raise PicklingError(
1072 "Can't pickle %r: it's not found as %s.%s" %
1073 (obj, module_name, name)) from None
1074 else:
1075 if obj2 is not obj:
1076 raise PicklingError(
1077 "Can't pickle %r: it's not the same object as %s.%s" %
1078 (obj, module_name, name))
1079
1080 if self.proto >= 2:
1081 code = _extension_registry.get((module_name, name))
1082 if code:
1083 assert code > 0
1084 if code <= 0xff:
1085 write(EXT1 + pack("<B", code))
1086 elif code <= 0xffff:
1087 write(EXT2 + pack("<H", code))
1088 else:
1089 write(EXT4 + pack("<i", code))
1090 return
1091 lastname = name.rpartition('.')[2]
1092 if parent is module:
1093 name = lastname
1094 # Non-ASCII identifiers are supported only with protocols >= 3.
1095 if self.proto >= 4:
1096 self.save(module_name)
1097 self.save(name)
1098 write(STACK_GLOBAL)
1099 elif parent is not module:
1100 self.save_reduce(getattr, (parent, lastname))
1101 elif self.proto >= 3:
1102 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
1103 bytes(name, "utf-8") + b'\n')
1104 else:
1105 if self.fix_imports:
1106 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1107 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1108 if (module_name, name) in r_name_mapping:
1109 module_name, name = r_name_mapping[(module_name, name)]
1110 elif module_name in r_import_mapping:
1111 module_name = r_import_mapping[module_name]
1112 try:
1113 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
1114 bytes(name, "ascii") + b'\n')
1115 except UnicodeEncodeError:
1116 raise PicklingError(
1117 "can't pickle global identifier '%s.%s' using "
1118 "pickle protocol %i" % (module, name, self.proto)) from None
1119
1120 self.memoize(obj)
1121
1122 def save_type(self, obj):
1123 if obj is type(None):
1124 return self.save_reduce(type, (None,), obj=obj)
1125 elif obj is type(NotImplemented):
1126 return self.save_reduce(type, (NotImplemented,), obj=obj)
1127 elif obj is type(...):
1128 return self.save_reduce(type, (...,), obj=obj)
1129 return self.save_global(obj)
1130
1131 dispatch[FunctionType] = save_global
1132 dispatch[type] = save_type
1133
1134
1135# Unpickling machinery
1136
1137class _Unpickler:
1138
1139 def __init__(self, file, *, fix_imports=True,
1140 encoding="ASCII", errors="strict", buffers=None):
1141 """This takes a binary file for reading a pickle data stream.
1142
1143 The protocol version of the pickle is detected automatically, so
1144 no proto argument is needed.
1145
1146 The argument *file* must have two methods, a read() method that
1147 takes an integer argument, and a readline() method that requires
1148 no arguments. Both methods should return bytes. Thus *file*
1149 can be a binary file object opened for reading, an io.BytesIO
1150 object, or any other custom object that meets this interface.
1151
1152 The file-like object must have two methods, a read() method
1153 that takes an integer argument, and a readline() method that
1154 requires no arguments. Both methods should return bytes.
1155 Thus file-like object can be a binary file object opened for
1156 reading, a BytesIO object, or any other custom object that
1157 meets this interface.
1158
1159 If *buffers* is not None, it should be an iterable of buffer-enabled
1160 objects that is consumed each time the pickle stream references
1161 an out-of-band buffer view. Such buffers have been given in order
1162 to the *buffer_callback* of a Pickler object.
1163
1164 If *buffers* is None (the default), then the buffers are taken
1165 from the pickle stream, assuming they are serialized there.
1166 It is an error for *buffers* to be None if the pickle stream
1167 was produced with a non-None *buffer_callback*.
1168
1169 Other optional arguments are *fix_imports*, *encoding* and
1170 *errors*, which are used to control compatibility support for
1171 pickle stream generated by Python 2. If *fix_imports* is True,
1172 pickle will try to map the old Python 2 names to the new names
1173 used in Python 3. The *encoding* and *errors* tell pickle how
1174 to decode 8-bit string instances pickled by Python 2; these
1175 default to 'ASCII' and 'strict', respectively. *encoding* can be
Yi Kong71199322022-08-30 15:53:45 +08001176 'bytes' to read these 8-bit string instances as bytes objects.
Haibo Huangd8830302020-03-03 10:09:46 -08001177 """
1178 self._buffers = iter(buffers) if buffers is not None else None
1179 self._file_readline = file.readline
1180 self._file_read = file.read
1181 self.memo = {}
1182 self.encoding = encoding
1183 self.errors = errors
1184 self.proto = 0
1185 self.fix_imports = fix_imports
1186
1187 def load(self):
1188 """Read a pickled object representation from the open file.
1189
1190 Return the reconstituted object hierarchy specified in the file.
1191 """
1192 # Check whether Unpickler was initialized correctly. This is
1193 # only needed to mimic the behavior of _pickle.Unpickler.dump().
1194 if not hasattr(self, "_file_read"):
1195 raise UnpicklingError("Unpickler.__init__() was not called by "
1196 "%s.__init__()" % (self.__class__.__name__,))
1197 self._unframer = _Unframer(self._file_read, self._file_readline)
1198 self.read = self._unframer.read
1199 self.readinto = self._unframer.readinto
1200 self.readline = self._unframer.readline
1201 self.metastack = []
1202 self.stack = []
1203 self.append = self.stack.append
1204 self.proto = 0
1205 read = self.read
1206 dispatch = self.dispatch
1207 try:
1208 while True:
1209 key = read(1)
1210 if not key:
1211 raise EOFError
1212 assert isinstance(key, bytes_types)
1213 dispatch[key[0]](self)
1214 except _Stop as stopinst:
1215 return stopinst.value
1216
1217 # Return a list of items pushed in the stack after last MARK instruction.
1218 def pop_mark(self):
1219 items = self.stack
1220 self.stack = self.metastack.pop()
1221 self.append = self.stack.append
1222 return items
1223
1224 def persistent_load(self, pid):
1225 raise UnpicklingError("unsupported persistent id encountered")
1226
1227 dispatch = {}
1228
1229 def load_proto(self):
1230 proto = self.read(1)[0]
1231 if not 0 <= proto <= HIGHEST_PROTOCOL:
1232 raise ValueError("unsupported pickle protocol: %d" % proto)
1233 self.proto = proto
1234 dispatch[PROTO[0]] = load_proto
1235
1236 def load_frame(self):
1237 frame_size, = unpack('<Q', self.read(8))
1238 if frame_size > sys.maxsize:
1239 raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1240 self._unframer.load_frame(frame_size)
1241 dispatch[FRAME[0]] = load_frame
1242
1243 def load_persid(self):
1244 try:
1245 pid = self.readline()[:-1].decode("ascii")
1246 except UnicodeDecodeError:
1247 raise UnpicklingError(
1248 "persistent IDs in protocol 0 must be ASCII strings")
1249 self.append(self.persistent_load(pid))
1250 dispatch[PERSID[0]] = load_persid
1251
1252 def load_binpersid(self):
1253 pid = self.stack.pop()
1254 self.append(self.persistent_load(pid))
1255 dispatch[BINPERSID[0]] = load_binpersid
1256
1257 def load_none(self):
1258 self.append(None)
1259 dispatch[NONE[0]] = load_none
1260
1261 def load_false(self):
1262 self.append(False)
1263 dispatch[NEWFALSE[0]] = load_false
1264
1265 def load_true(self):
1266 self.append(True)
1267 dispatch[NEWTRUE[0]] = load_true
1268
1269 def load_int(self):
1270 data = self.readline()
1271 if data == FALSE[1:]:
1272 val = False
1273 elif data == TRUE[1:]:
1274 val = True
1275 else:
1276 val = int(data, 0)
1277 self.append(val)
1278 dispatch[INT[0]] = load_int
1279
1280 def load_binint(self):
1281 self.append(unpack('<i', self.read(4))[0])
1282 dispatch[BININT[0]] = load_binint
1283
1284 def load_binint1(self):
1285 self.append(self.read(1)[0])
1286 dispatch[BININT1[0]] = load_binint1
1287
1288 def load_binint2(self):
1289 self.append(unpack('<H', self.read(2))[0])
1290 dispatch[BININT2[0]] = load_binint2
1291
1292 def load_long(self):
1293 val = self.readline()[:-1]
1294 if val and val[-1] == b'L'[0]:
1295 val = val[:-1]
1296 self.append(int(val, 0))
1297 dispatch[LONG[0]] = load_long
1298
1299 def load_long1(self):
1300 n = self.read(1)[0]
1301 data = self.read(n)
1302 self.append(decode_long(data))
1303 dispatch[LONG1[0]] = load_long1
1304
1305 def load_long4(self):
1306 n, = unpack('<i', self.read(4))
1307 if n < 0:
1308 # Corrupt or hostile pickle -- we never write one like this
1309 raise UnpicklingError("LONG pickle has negative byte count")
1310 data = self.read(n)
1311 self.append(decode_long(data))
1312 dispatch[LONG4[0]] = load_long4
1313
1314 def load_float(self):
1315 self.append(float(self.readline()[:-1]))
1316 dispatch[FLOAT[0]] = load_float
1317
1318 def load_binfloat(self):
1319 self.append(unpack('>d', self.read(8))[0])
1320 dispatch[BINFLOAT[0]] = load_binfloat
1321
1322 def _decode_string(self, value):
1323 # Used to allow strings from Python 2 to be decoded either as
1324 # bytes or Unicode strings. This should be used only with the
1325 # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1326 if self.encoding == "bytes":
1327 return value
1328 else:
1329 return value.decode(self.encoding, self.errors)
1330
1331 def load_string(self):
1332 data = self.readline()[:-1]
1333 # Strip outermost quotes
1334 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1335 data = data[1:-1]
1336 else:
1337 raise UnpicklingError("the STRING opcode argument must be quoted")
1338 self.append(self._decode_string(codecs.escape_decode(data)[0]))
1339 dispatch[STRING[0]] = load_string
1340
1341 def load_binstring(self):
1342 # Deprecated BINSTRING uses signed 32-bit length
1343 len, = unpack('<i', self.read(4))
1344 if len < 0:
1345 raise UnpicklingError("BINSTRING pickle has negative byte count")
1346 data = self.read(len)
1347 self.append(self._decode_string(data))
1348 dispatch[BINSTRING[0]] = load_binstring
1349
1350 def load_binbytes(self):
1351 len, = unpack('<I', self.read(4))
1352 if len > maxsize:
1353 raise UnpicklingError("BINBYTES exceeds system's maximum size "
1354 "of %d bytes" % maxsize)
1355 self.append(self.read(len))
1356 dispatch[BINBYTES[0]] = load_binbytes
1357
1358 def load_unicode(self):
1359 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1360 dispatch[UNICODE[0]] = load_unicode
1361
1362 def load_binunicode(self):
1363 len, = unpack('<I', self.read(4))
1364 if len > maxsize:
1365 raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1366 "of %d bytes" % maxsize)
1367 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1368 dispatch[BINUNICODE[0]] = load_binunicode
1369
1370 def load_binunicode8(self):
1371 len, = unpack('<Q', self.read(8))
1372 if len > maxsize:
1373 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1374 "of %d bytes" % maxsize)
1375 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1376 dispatch[BINUNICODE8[0]] = load_binunicode8
1377
1378 def load_binbytes8(self):
1379 len, = unpack('<Q', self.read(8))
1380 if len > maxsize:
1381 raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1382 "of %d bytes" % maxsize)
1383 self.append(self.read(len))
1384 dispatch[BINBYTES8[0]] = load_binbytes8
1385
1386 def load_bytearray8(self):
1387 len, = unpack('<Q', self.read(8))
1388 if len > maxsize:
1389 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1390 "of %d bytes" % maxsize)
1391 b = bytearray(len)
1392 self.readinto(b)
1393 self.append(b)
1394 dispatch[BYTEARRAY8[0]] = load_bytearray8
1395
1396 def load_next_buffer(self):
1397 if self._buffers is None:
1398 raise UnpicklingError("pickle stream refers to out-of-band data "
1399 "but no *buffers* argument was given")
1400 try:
1401 buf = next(self._buffers)
1402 except StopIteration:
1403 raise UnpicklingError("not enough out-of-band buffers")
1404 self.append(buf)
1405 dispatch[NEXT_BUFFER[0]] = load_next_buffer
1406
1407 def load_readonly_buffer(self):
1408 buf = self.stack[-1]
1409 with memoryview(buf) as m:
1410 if not m.readonly:
1411 self.stack[-1] = m.toreadonly()
1412 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1413
1414 def load_short_binstring(self):
1415 len = self.read(1)[0]
1416 data = self.read(len)
1417 self.append(self._decode_string(data))
1418 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1419
1420 def load_short_binbytes(self):
1421 len = self.read(1)[0]
1422 self.append(self.read(len))
1423 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1424
1425 def load_short_binunicode(self):
1426 len = self.read(1)[0]
1427 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1428 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1429
1430 def load_tuple(self):
1431 items = self.pop_mark()
1432 self.append(tuple(items))
1433 dispatch[TUPLE[0]] = load_tuple
1434
1435 def load_empty_tuple(self):
1436 self.append(())
1437 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1438
1439 def load_tuple1(self):
1440 self.stack[-1] = (self.stack[-1],)
1441 dispatch[TUPLE1[0]] = load_tuple1
1442
1443 def load_tuple2(self):
1444 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1445 dispatch[TUPLE2[0]] = load_tuple2
1446
1447 def load_tuple3(self):
1448 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1449 dispatch[TUPLE3[0]] = load_tuple3
1450
1451 def load_empty_list(self):
1452 self.append([])
1453 dispatch[EMPTY_LIST[0]] = load_empty_list
1454
1455 def load_empty_dictionary(self):
1456 self.append({})
1457 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1458
1459 def load_empty_set(self):
1460 self.append(set())
1461 dispatch[EMPTY_SET[0]] = load_empty_set
1462
1463 def load_frozenset(self):
1464 items = self.pop_mark()
1465 self.append(frozenset(items))
1466 dispatch[FROZENSET[0]] = load_frozenset
1467
1468 def load_list(self):
1469 items = self.pop_mark()
1470 self.append(items)
1471 dispatch[LIST[0]] = load_list
1472
1473 def load_dict(self):
1474 items = self.pop_mark()
1475 d = {items[i]: items[i+1]
1476 for i in range(0, len(items), 2)}
1477 self.append(d)
1478 dispatch[DICT[0]] = load_dict
1479
1480 # INST and OBJ differ only in how they get a class object. It's not
1481 # only sensible to do the rest in a common routine, the two routines
1482 # previously diverged and grew different bugs.
1483 # klass is the class to instantiate, and k points to the topmost mark
1484 # object, following which are the arguments for klass.__init__.
1485 def _instantiate(self, klass, args):
1486 if (args or not isinstance(klass, type) or
1487 hasattr(klass, "__getinitargs__")):
1488 try:
1489 value = klass(*args)
1490 except TypeError as err:
1491 raise TypeError("in constructor for %s: %s" %
1492 (klass.__name__, str(err)), sys.exc_info()[2])
1493 else:
1494 value = klass.__new__(klass)
1495 self.append(value)
1496
1497 def load_inst(self):
1498 module = self.readline()[:-1].decode("ascii")
1499 name = self.readline()[:-1].decode("ascii")
1500 klass = self.find_class(module, name)
1501 self._instantiate(klass, self.pop_mark())
1502 dispatch[INST[0]] = load_inst
1503
1504 def load_obj(self):
1505 # Stack is ... markobject classobject arg1 arg2 ...
1506 args = self.pop_mark()
1507 cls = args.pop(0)
1508 self._instantiate(cls, args)
1509 dispatch[OBJ[0]] = load_obj
1510
1511 def load_newobj(self):
1512 args = self.stack.pop()
1513 cls = self.stack.pop()
1514 obj = cls.__new__(cls, *args)
1515 self.append(obj)
1516 dispatch[NEWOBJ[0]] = load_newobj
1517
1518 def load_newobj_ex(self):
1519 kwargs = self.stack.pop()
1520 args = self.stack.pop()
1521 cls = self.stack.pop()
1522 obj = cls.__new__(cls, *args, **kwargs)
1523 self.append(obj)
1524 dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1525
1526 def load_global(self):
1527 module = self.readline()[:-1].decode("utf-8")
1528 name = self.readline()[:-1].decode("utf-8")
1529 klass = self.find_class(module, name)
1530 self.append(klass)
1531 dispatch[GLOBAL[0]] = load_global
1532
1533 def load_stack_global(self):
1534 name = self.stack.pop()
1535 module = self.stack.pop()
1536 if type(name) is not str or type(module) is not str:
1537 raise UnpicklingError("STACK_GLOBAL requires str")
1538 self.append(self.find_class(module, name))
1539 dispatch[STACK_GLOBAL[0]] = load_stack_global
1540
1541 def load_ext1(self):
1542 code = self.read(1)[0]
1543 self.get_extension(code)
1544 dispatch[EXT1[0]] = load_ext1
1545
1546 def load_ext2(self):
1547 code, = unpack('<H', self.read(2))
1548 self.get_extension(code)
1549 dispatch[EXT2[0]] = load_ext2
1550
1551 def load_ext4(self):
1552 code, = unpack('<i', self.read(4))
1553 self.get_extension(code)
1554 dispatch[EXT4[0]] = load_ext4
1555
1556 def get_extension(self, code):
1557 nil = []
1558 obj = _extension_cache.get(code, nil)
1559 if obj is not nil:
1560 self.append(obj)
1561 return
1562 key = _inverted_registry.get(code)
1563 if not key:
1564 if code <= 0: # note that 0 is forbidden
1565 # Corrupt or hostile pickle.
1566 raise UnpicklingError("EXT specifies code <= 0")
1567 raise ValueError("unregistered extension code %d" % code)
1568 obj = self.find_class(*key)
1569 _extension_cache[code] = obj
1570 self.append(obj)
1571
1572 def find_class(self, module, name):
1573 # Subclasses may override this.
1574 sys.audit('pickle.find_class', module, name)
1575 if self.proto < 3 and self.fix_imports:
1576 if (module, name) in _compat_pickle.NAME_MAPPING:
1577 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1578 elif module in _compat_pickle.IMPORT_MAPPING:
1579 module = _compat_pickle.IMPORT_MAPPING[module]
1580 __import__(module, level=0)
1581 if self.proto >= 4:
1582 return _getattribute(sys.modules[module], name)[0]
1583 else:
1584 return getattr(sys.modules[module], name)
1585
1586 def load_reduce(self):
1587 stack = self.stack
1588 args = stack.pop()
1589 func = stack[-1]
1590 stack[-1] = func(*args)
1591 dispatch[REDUCE[0]] = load_reduce
1592
1593 def load_pop(self):
1594 if self.stack:
1595 del self.stack[-1]
1596 else:
1597 self.pop_mark()
1598 dispatch[POP[0]] = load_pop
1599
1600 def load_pop_mark(self):
1601 self.pop_mark()
1602 dispatch[POP_MARK[0]] = load_pop_mark
1603
1604 def load_dup(self):
1605 self.append(self.stack[-1])
1606 dispatch[DUP[0]] = load_dup
1607
1608 def load_get(self):
1609 i = int(self.readline()[:-1])
Haibo Huang5eba2b42021-01-22 11:22:02 -08001610 try:
1611 self.append(self.memo[i])
1612 except KeyError:
1613 msg = f'Memo value not found at index {i}'
1614 raise UnpicklingError(msg) from None
Haibo Huangd8830302020-03-03 10:09:46 -08001615 dispatch[GET[0]] = load_get
1616
1617 def load_binget(self):
1618 i = self.read(1)[0]
Haibo Huang5eba2b42021-01-22 11:22:02 -08001619 try:
1620 self.append(self.memo[i])
1621 except KeyError as exc:
1622 msg = f'Memo value not found at index {i}'
1623 raise UnpicklingError(msg) from None
Haibo Huangd8830302020-03-03 10:09:46 -08001624 dispatch[BINGET[0]] = load_binget
1625
1626 def load_long_binget(self):
1627 i, = unpack('<I', self.read(4))
Haibo Huang5eba2b42021-01-22 11:22:02 -08001628 try:
1629 self.append(self.memo[i])
1630 except KeyError as exc:
1631 msg = f'Memo value not found at index {i}'
1632 raise UnpicklingError(msg) from None
Haibo Huangd8830302020-03-03 10:09:46 -08001633 dispatch[LONG_BINGET[0]] = load_long_binget
1634
1635 def load_put(self):
1636 i = int(self.readline()[:-1])
1637 if i < 0:
1638 raise ValueError("negative PUT argument")
1639 self.memo[i] = self.stack[-1]
1640 dispatch[PUT[0]] = load_put
1641
1642 def load_binput(self):
1643 i = self.read(1)[0]
1644 if i < 0:
1645 raise ValueError("negative BINPUT argument")
1646 self.memo[i] = self.stack[-1]
1647 dispatch[BINPUT[0]] = load_binput
1648
1649 def load_long_binput(self):
1650 i, = unpack('<I', self.read(4))
1651 if i > maxsize:
1652 raise ValueError("negative LONG_BINPUT argument")
1653 self.memo[i] = self.stack[-1]
1654 dispatch[LONG_BINPUT[0]] = load_long_binput
1655
1656 def load_memoize(self):
1657 memo = self.memo
1658 memo[len(memo)] = self.stack[-1]
1659 dispatch[MEMOIZE[0]] = load_memoize
1660
1661 def load_append(self):
1662 stack = self.stack
1663 value = stack.pop()
1664 list = stack[-1]
1665 list.append(value)
1666 dispatch[APPEND[0]] = load_append
1667
1668 def load_appends(self):
1669 items = self.pop_mark()
1670 list_obj = self.stack[-1]
1671 try:
1672 extend = list_obj.extend
1673 except AttributeError:
1674 pass
1675 else:
1676 extend(items)
1677 return
1678 # Even if the PEP 307 requires extend() and append() methods,
1679 # fall back on append() if the object has no extend() method
1680 # for backward compatibility.
1681 append = list_obj.append
1682 for item in items:
1683 append(item)
1684 dispatch[APPENDS[0]] = load_appends
1685
1686 def load_setitem(self):
1687 stack = self.stack
1688 value = stack.pop()
1689 key = stack.pop()
1690 dict = stack[-1]
1691 dict[key] = value
1692 dispatch[SETITEM[0]] = load_setitem
1693
1694 def load_setitems(self):
1695 items = self.pop_mark()
1696 dict = self.stack[-1]
1697 for i in range(0, len(items), 2):
1698 dict[items[i]] = items[i + 1]
1699 dispatch[SETITEMS[0]] = load_setitems
1700
1701 def load_additems(self):
1702 items = self.pop_mark()
1703 set_obj = self.stack[-1]
1704 if isinstance(set_obj, set):
1705 set_obj.update(items)
1706 else:
1707 add = set_obj.add
1708 for item in items:
1709 add(item)
1710 dispatch[ADDITEMS[0]] = load_additems
1711
1712 def load_build(self):
1713 stack = self.stack
1714 state = stack.pop()
1715 inst = stack[-1]
1716 setstate = getattr(inst, "__setstate__", None)
1717 if setstate is not None:
1718 setstate(state)
1719 return
1720 slotstate = None
1721 if isinstance(state, tuple) and len(state) == 2:
1722 state, slotstate = state
1723 if state:
1724 inst_dict = inst.__dict__
1725 intern = sys.intern
1726 for k, v in state.items():
1727 if type(k) is str:
1728 inst_dict[intern(k)] = v
1729 else:
1730 inst_dict[k] = v
1731 if slotstate:
1732 for k, v in slotstate.items():
1733 setattr(inst, k, v)
1734 dispatch[BUILD[0]] = load_build
1735
1736 def load_mark(self):
1737 self.metastack.append(self.stack)
1738 self.stack = []
1739 self.append = self.stack.append
1740 dispatch[MARK[0]] = load_mark
1741
1742 def load_stop(self):
1743 value = self.stack.pop()
1744 raise _Stop(value)
1745 dispatch[STOP[0]] = load_stop
1746
1747
1748# Shorthands
1749
1750def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1751 _Pickler(file, protocol, fix_imports=fix_imports,
1752 buffer_callback=buffer_callback).dump(obj)
1753
1754def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
1755 f = io.BytesIO()
1756 _Pickler(f, protocol, fix_imports=fix_imports,
1757 buffer_callback=buffer_callback).dump(obj)
1758 res = f.getvalue()
1759 assert isinstance(res, bytes_types)
1760 return res
1761
1762def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1763 buffers=None):
1764 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1765 encoding=encoding, errors=errors).load()
1766
Haibo Huang5eba2b42021-01-22 11:22:02 -08001767def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict",
Haibo Huangd8830302020-03-03 10:09:46 -08001768 buffers=None):
1769 if isinstance(s, str):
1770 raise TypeError("Can't load pickle from unicode string")
1771 file = io.BytesIO(s)
1772 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
1773 encoding=encoding, errors=errors).load()
1774
1775# Use the faster _pickle if possible
1776try:
1777 from _pickle import (
1778 PickleError,
1779 PicklingError,
1780 UnpicklingError,
1781 Pickler,
1782 Unpickler,
1783 dump,
1784 dumps,
1785 load,
1786 loads
1787 )
1788except ImportError:
1789 Pickler, Unpickler = _Pickler, _Unpickler
1790 dump, dumps, load, loads = _dump, _dumps, _load, _loads
1791
1792# Doctest
1793def _test():
1794 import doctest
1795 return doctest.testmod()
1796
1797if __name__ == "__main__":
1798 import argparse
1799 parser = argparse.ArgumentParser(
1800 description='display contents of the pickle files')
1801 parser.add_argument(
1802 'pickle_file', type=argparse.FileType('br'),
1803 nargs='*', help='the pickle file')
1804 parser.add_argument(
1805 '-t', '--test', action='store_true',
1806 help='run self-test suite')
1807 parser.add_argument(
1808 '-v', action='store_true',
1809 help='run verbosely; only affects self-test run')
1810 args = parser.parse_args()
1811 if args.test:
1812 _test()
1813 else:
1814 if not args.pickle_file:
1815 parser.print_help()
1816 else:
1817 import pprint
1818 for f in args.pickle_file:
1819 obj = load(f)
1820 pprint.pprint(obj)