Blame - clang-r487747c/python3/Lib/pickletools.py - platform/prebuilts/clang/host/windows-x86

blob: 95706e746c9870c92996a622ef56a25c41aa88cd [file] [log] [blame]

Stephen Hines	c6ca60f	2023-05-09 02:19:22 -0700	[diff] [blame^]	1	'''"Executable documentation" for the pickle module.
				2
				3	Extensive comments about the pickle protocols and pickle-machine opcodes
				4	can be found here. Some functions meant for external use:
				5
				6	genops(pickle)
				7	Generate all the opcodes in a pickle, as (opcode, arg, position) triples.
				8
				9	dis(pickle, out=None, memo=None, indentlevel=4)
				10	Print a symbolic disassembly of a pickle.
				11	'''
				12
				13	import codecs
				14	import io
				15	import pickle
				16	import re
				17	import sys
				18
				19	__all__ = ['dis', 'genops', 'optimize']
				20
				21	bytes_types = pickle.bytes_types
				22
				23	# Other ideas:
				24	#
				25	# - A pickle verifier: read a pickle and check it exhaustively for
				26	# well-formedness. dis() does a lot of this already.
				27	#
				28	# - A protocol identifier: examine a pickle and return its protocol number
				29	# (== the highest .proto attr value among all the opcodes in the pickle).
				30	# dis() already prints this info at the end.
				31	#
				32	# - A pickle optimizer: for example, tuple-building code is sometimes more
				33	# elaborate than necessary, catering for the possibility that the tuple
				34	# is recursive. Or lots of times a PUT is generated that's never accessed
				35	# by a later GET.
				36
				37
				38	# "A pickle" is a program for a virtual pickle machine (PM, but more accurately
				39	# called an unpickling machine). It's a sequence of opcodes, interpreted by the
				40	# PM, building an arbitrarily complex Python object.
				41	#
				42	# For the most part, the PM is very simple: there are no looping, testing, or
				43	# conditional instructions, no arithmetic and no function calls. Opcodes are
				44	# executed once each, from first to last, until a STOP opcode is reached.
				45	#
				46	# The PM has two data areas, "the stack" and "the memo".
				47	#
				48	# Many opcodes push Python objects onto the stack; e.g., INT pushes a Python
				49	# integer object on the stack, whose value is gotten from a decimal string
				50	# literal immediately following the INT opcode in the pickle bytestream. Other
				51	# opcodes take Python objects off the stack. The result of unpickling is
				52	# whatever object is left on the stack when the final STOP opcode is executed.
				53	#
				54	# The memo is simply an array of objects, or it can be implemented as a dict
				55	# mapping little integers to objects. The memo serves as the PM's "long term
				56	# memory", and the little integers indexing the memo are akin to variable
				57	# names. Some opcodes pop a stack object into the memo at a given index,
				58	# and others push a memo object at a given index onto the stack again.
				59	#
				60	# At heart, that's all the PM has. Subtleties arise for these reasons:
				61	#
				62	# + Object identity. Objects can be arbitrarily complex, and subobjects
				63	# may be shared (for example, the list [a, a] refers to the same object a
				64	# twice). It can be vital that unpickling recreate an isomorphic object
				65	# graph, faithfully reproducing sharing.
				66	#
				67	# + Recursive objects. For example, after "L = []; L.append(L)", L is a
				68	# list, and L[0] is the same list. This is related to the object identity
				69	# point, and some sequences of pickle opcodes are subtle in order to
				70	# get the right result in all cases.
				71	#
				72	# + Things pickle doesn't know everything about. Examples of things pickle
				73	# does know everything about are Python's builtin scalar and container
				74	# types, like ints and tuples. They generally have opcodes dedicated to
				75	# them. For things like module references and instances of user-defined
				76	# classes, pickle's knowledge is limited. Historically, many enhancements
				77	# have been made to the pickle protocol in order to do a better (faster,
				78	# and/or more compact) job on those.
				79	#
				80	# + Backward compatibility and micro-optimization. As explained below,
				81	# pickle opcodes never go away, not even when better ways to do a thing
				82	# get invented. The repertoire of the PM just keeps growing over time.
				83	# For example, protocol 0 had two opcodes for building Python integers (INT
				84	# and LONG), protocol 1 added three more for more-efficient pickling of short
				85	# integers, and protocol 2 added two more for more-efficient pickling of
				86	# long integers (before protocol 2, the only ways to pickle a Python long
				87	# took time quadratic in the number of digits, for both pickling and
				88	# unpickling). "Opcode bloat" isn't so much a subtlety as a source of
				89	# wearying complication.
				90	#
				91	#
				92	# Pickle protocols:
				93	#
				94	# For compatibility, the meaning of a pickle opcode never changes. Instead new
				95	# pickle opcodes get added, and each version's unpickler can handle all the
				96	# pickle opcodes in all protocol versions to date. So old pickles continue to
				97	# be readable forever. The pickler can generally be told to restrict itself to
				98	# the subset of opcodes available under previous protocol versions too, so that
				99	# users can create pickles under the current version readable by older
				100	# versions. However, a pickle does not contain its version number embedded
				101	# within it. If an older unpickler tries to read a pickle using a later
				102	# protocol, the result is most likely an exception due to seeing an unknown (in
				103	# the older unpickler) opcode.
				104	#
				105	# The original pickle used what's now called "protocol 0", and what was called
				106	# "text mode" before Python 2.3. The entire pickle bytestream is made up of
				107	# printable 7-bit ASCII characters, plus the newline character, in protocol 0.
				108	# That's why it was called text mode. Protocol 0 is small and elegant, but
				109	# sometimes painfully inefficient.
				110	#
				111	# The second major set of additions is now called "protocol 1", and was called
				112	# "binary mode" before Python 2.3. This added many opcodes with arguments
				113	# consisting of arbitrary bytes, including NUL bytes and unprintable "high bit"
				114	# bytes. Binary mode pickles can be substantially smaller than equivalent
				115	# text mode pickles, and sometimes faster too; e.g., BININT represents a 4-byte
				116	# int as 4 bytes following the opcode, which is cheaper to unpickle than the
				117	# (perhaps) 11-character decimal string attached to INT. Protocol 1 also added
				118	# a number of opcodes that operate on many stack elements at once (like APPENDS
				119	# and SETITEMS), and "shortcut" opcodes (like EMPTY_DICT and EMPTY_TUPLE).
				120	#
				121	# The third major set of additions came in Python 2.3, and is called "protocol
				122	# 2". This added:
				123	#
				124	# - A better way to pickle instances of new-style classes (NEWOBJ).
				125	#
				126	# - A way for a pickle to identify its protocol (PROTO).
				127	#
				128	# - Time- and space- efficient pickling of long ints (LONG{1,4}).
				129	#
				130	# - Shortcuts for small tuples (TUPLE{1,2,3}}.
				131	#
				132	# - Dedicated opcodes for bools (NEWTRUE, NEWFALSE).
				133	#
				134	# - The "extension registry", a vector of popular objects that can be pushed
				135	# efficiently by index (EXT{1,2,4}). This is akin to the memo and GET, but
				136	# the registry contents are predefined (there's nothing akin to the memo's
				137	# PUT).
				138	#
				139	# Another independent change with Python 2.3 is the abandonment of any
				140	# pretense that it might be safe to load pickles received from untrusted
				141	# parties -- no sufficient security analysis has been done to guarantee
				142	# this and there isn't a use case that warrants the expense of such an
				143	# analysis.
				144	#
				145	# To this end, all tests for __safe_for_unpickling__ or for
				146	# copyreg.safe_constructors are removed from the unpickling code.
				147	# References to these variables in the descriptions below are to be seen
				148	# as describing unpickling in Python 2.2 and before.
				149
				150
				151	# Meta-rule: Descriptions are stored in instances of descriptor objects,
				152	# with plain constructors. No meta-language is defined from which
				153	# descriptors could be constructed. If you want, e.g., XML, write a little
				154	# program to generate XML from the objects.
				155
				156	##############################################################################
				157	# Some pickle opcodes have an argument, following the opcode in the
				158	# bytestream. An argument is of a specific type, described by an instance
				159	# of ArgumentDescriptor. These are not to be confused with arguments taken
				160	# off the stack -- ArgumentDescriptor applies only to arguments embedded in
				161	# the opcode stream, immediately following an opcode.
				162
				163	# Represents the number of bytes consumed by an argument delimited by the
				164	# next newline character.
				165	UP_TO_NEWLINE = -1
				166
				167	# Represents the number of bytes consumed by a two-argument opcode where
				168	# the first argument gives the number of bytes in the second argument.
				169	TAKEN_FROM_ARGUMENT1 = -2 # num bytes is 1-byte unsigned int
				170	TAKEN_FROM_ARGUMENT4 = -3 # num bytes is 4-byte signed little-endian int
				171	TAKEN_FROM_ARGUMENT4U = -4 # num bytes is 4-byte unsigned little-endian int
				172	TAKEN_FROM_ARGUMENT8U = -5 # num bytes is 8-byte unsigned little-endian int
				173
				174	class ArgumentDescriptor(object):
				175	__slots__ = (
				176	# name of descriptor record, also a module global name; a string
				177	'name',
				178
				179	# length of argument, in bytes; an int; UP_TO_NEWLINE and
				180	# TAKEN_FROM_ARGUMENT{1,4,8} are negative values for variable-length
				181	# cases
				182	'n',
				183
				184	# a function taking a file-like object, reading this kind of argument
				185	# from the object at the current position, advancing the current
				186	# position by n bytes, and returning the value of the argument
				187	'reader',
				188
				189	# human-readable docs for this arg descriptor; a string
				190	'doc',
				191	)
				192
				193	def __init__(self, name, n, reader, doc):
				194	assert isinstance(name, str)
				195	self.name = name
				196
				197	assert isinstance(n, int) and (n >= 0 or
				198	n in (UP_TO_NEWLINE,
				199	TAKEN_FROM_ARGUMENT1,
				200	TAKEN_FROM_ARGUMENT4,
				201	TAKEN_FROM_ARGUMENT4U,
				202	TAKEN_FROM_ARGUMENT8U))
				203	self.n = n
				204
				205	self.reader = reader
				206
				207	assert isinstance(doc, str)
				208	self.doc = doc
				209
				210	from struct import unpack as _unpack
				211
				212	def read_uint1(f):
				213	r"""
				214	>>> import io
				215	>>> read_uint1(io.BytesIO(b'\xff'))
				216	255
				217	"""
				218
				219	data = f.read(1)
				220	if data:
				221	return data[0]
				222	raise ValueError("not enough data in stream to read uint1")
				223
				224	uint1 = ArgumentDescriptor(
				225	name='uint1',
				226	n=1,
				227	reader=read_uint1,
				228	doc="One-byte unsigned integer.")
				229
				230
				231	def read_uint2(f):
				232	r"""
				233	>>> import io
				234	>>> read_uint2(io.BytesIO(b'\xff\x00'))
				235	255
				236	>>> read_uint2(io.BytesIO(b'\xff\xff'))
				237	65535
				238	"""
				239
				240	data = f.read(2)
				241	if len(data) == 2:
				242	return _unpack("<H", data)[0]
				243	raise ValueError("not enough data in stream to read uint2")
				244
				245	uint2 = ArgumentDescriptor(
				246	name='uint2',
				247	n=2,
				248	reader=read_uint2,
				249	doc="Two-byte unsigned integer, little-endian.")
				250
				251
				252	def read_int4(f):
				253	r"""
				254	>>> import io
				255	>>> read_int4(io.BytesIO(b'\xff\x00\x00\x00'))
				256	255
				257	>>> read_int4(io.BytesIO(b'\x00\x00\x00\x80')) == -(2**31)
				258	True
				259	"""
				260
				261	data = f.read(4)
				262	if len(data) == 4:
				263	return _unpack("<i", data)[0]
				264	raise ValueError("not enough data in stream to read int4")
				265
				266	int4 = ArgumentDescriptor(
				267	name='int4',
				268	n=4,
				269	reader=read_int4,
				270	doc="Four-byte signed integer, little-endian, 2's complement.")
				271
				272
				273	def read_uint4(f):
				274	r"""
				275	>>> import io
				276	>>> read_uint4(io.BytesIO(b'\xff\x00\x00\x00'))
				277	255
				278	>>> read_uint4(io.BytesIO(b'\x00\x00\x00\x80')) == 2**31
				279	True
				280	"""
				281
				282	data = f.read(4)
				283	if len(data) == 4:
				284	return _unpack("<I", data)[0]
				285	raise ValueError("not enough data in stream to read uint4")
				286
				287	uint4 = ArgumentDescriptor(
				288	name='uint4',
				289	n=4,
				290	reader=read_uint4,
				291	doc="Four-byte unsigned integer, little-endian.")
				292
				293
				294	def read_uint8(f):
				295	r"""
				296	>>> import io
				297	>>> read_uint8(io.BytesIO(b'\xff\x00\x00\x00\x00\x00\x00\x00'))
				298	255
				299	>>> read_uint8(io.BytesIO(b'\xff' * 8)) == 2**64-1
				300	True
				301	"""
				302
				303	data = f.read(8)
				304	if len(data) == 8:
				305	return _unpack("<Q", data)[0]
				306	raise ValueError("not enough data in stream to read uint8")
				307
				308	uint8 = ArgumentDescriptor(
				309	name='uint8',
				310	n=8,
				311	reader=read_uint8,
				312	doc="Eight-byte unsigned integer, little-endian.")
				313
				314
				315	def read_stringnl(f, decode=True, stripquotes=True):
				316	r"""
				317	>>> import io
				318	>>> read_stringnl(io.BytesIO(b"'abcd'\nefg\n"))
				319	'abcd'
				320
				321	>>> read_stringnl(io.BytesIO(b"\n"))
				322	Traceback (most recent call last):
				323	...
				324	ValueError: no string quotes around b''
				325
				326	>>> read_stringnl(io.BytesIO(b"\n"), stripquotes=False)
				327	''
				328
				329	>>> read_stringnl(io.BytesIO(b"''\n"))
				330	''
				331
				332	>>> read_stringnl(io.BytesIO(b'"abcd"'))
				333	Traceback (most recent call last):
				334	...
				335	ValueError: no newline found when trying to read stringnl
				336
				337	Embedded escapes are undone in the result.
				338	>>> read_stringnl(io.BytesIO(br"'a\n\\b\x00c\td'" + b"\n'e'"))
				339	'a\n\\b\x00c\td'
				340	"""
				341
				342	data = f.readline()
				343	if not data.endswith(b'\n'):
				344	raise ValueError("no newline found when trying to read stringnl")
				345	data = data[:-1] # lose the newline
				346
				347	if stripquotes:
				348	for q in (b'"', b"'"):
				349	if data.startswith(q):
				350	if not data.endswith(q):
				351	raise ValueError("strinq quote %r not found at both "
				352	"ends of %r" % (q, data))
				353	data = data[1:-1]
				354	break
				355	else:
				356	raise ValueError("no string quotes around %r" % data)
				357
				358	if decode:
				359	data = codecs.escape_decode(data)[0].decode("ascii")
				360	return data
				361
				362	stringnl = ArgumentDescriptor(
				363	name='stringnl',
				364	n=UP_TO_NEWLINE,
				365	reader=read_stringnl,
				366	doc="""A newline-terminated string.
				367
				368	This is a repr-style string, with embedded escapes, and
				369	bracketing quotes.
				370	""")
				371
				372	def read_stringnl_noescape(f):
				373	return read_stringnl(f, stripquotes=False)
				374
				375	stringnl_noescape = ArgumentDescriptor(
				376	name='stringnl_noescape',
				377	n=UP_TO_NEWLINE,
				378	reader=read_stringnl_noescape,
				379	doc="""A newline-terminated string.
				380
				381	This is a str-style string, without embedded escapes,
				382	or bracketing quotes. It should consist solely of
				383	printable ASCII characters.
				384	""")
				385
				386	def read_stringnl_noescape_pair(f):
				387	r"""
				388	>>> import io
				389	>>> read_stringnl_noescape_pair(io.BytesIO(b"Queue\nEmpty\njunk"))
				390	'Queue Empty'
				391	"""
				392
				393	return "%s %s" % (read_stringnl_noescape(f), read_stringnl_noescape(f))
				394
				395	stringnl_noescape_pair = ArgumentDescriptor(
				396	name='stringnl_noescape_pair',
				397	n=UP_TO_NEWLINE,
				398	reader=read_stringnl_noescape_pair,
				399	doc="""A pair of newline-terminated strings.
				400
				401	These are str-style strings, without embedded
				402	escapes, or bracketing quotes. They should
				403	consist solely of printable ASCII characters.
				404	The pair is returned as a single string, with
				405	a single blank separating the two strings.
				406	""")
				407
				408
				409	def read_string1(f):
				410	r"""
				411	>>> import io
				412	>>> read_string1(io.BytesIO(b"\x00"))
				413	''
				414	>>> read_string1(io.BytesIO(b"\x03abcdef"))
				415	'abc'
				416	"""
				417
				418	n = read_uint1(f)
				419	assert n >= 0
				420	data = f.read(n)
				421	if len(data) == n:
				422	return data.decode("latin-1")
				423	raise ValueError("expected %d bytes in a string1, but only %d remain" %
				424	(n, len(data)))
				425
				426	string1 = ArgumentDescriptor(
				427	name="string1",
				428	n=TAKEN_FROM_ARGUMENT1,
				429	reader=read_string1,
				430	doc="""A counted string.
				431
				432	The first argument is a 1-byte unsigned int giving the number
				433	of bytes in the string, and the second argument is that many
				434	bytes.
				435	""")
				436
				437
				438	def read_string4(f):
				439	r"""
				440	>>> import io
				441	>>> read_string4(io.BytesIO(b"\x00\x00\x00\x00abc"))
				442	''
				443	>>> read_string4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
				444	'abc'
				445	>>> read_string4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
				446	Traceback (most recent call last):
				447	...
				448	ValueError: expected 50331648 bytes in a string4, but only 6 remain
				449	"""
				450
				451	n = read_int4(f)
				452	if n < 0:
				453	raise ValueError("string4 byte count < 0: %d" % n)
				454	data = f.read(n)
				455	if len(data) == n:
				456	return data.decode("latin-1")
				457	raise ValueError("expected %d bytes in a string4, but only %d remain" %
				458	(n, len(data)))
				459
				460	string4 = ArgumentDescriptor(
				461	name="string4",
				462	n=TAKEN_FROM_ARGUMENT4,
				463	reader=read_string4,
				464	doc="""A counted string.
				465
				466	The first argument is a 4-byte little-endian signed int giving
				467	the number of bytes in the string, and the second argument is
				468	that many bytes.
				469	""")
				470
				471
				472	def read_bytes1(f):
				473	r"""
				474	>>> import io
				475	>>> read_bytes1(io.BytesIO(b"\x00"))
				476	b''
				477	>>> read_bytes1(io.BytesIO(b"\x03abcdef"))
				478	b'abc'
				479	"""
				480
				481	n = read_uint1(f)
				482	assert n >= 0
				483	data = f.read(n)
				484	if len(data) == n:
				485	return data
				486	raise ValueError("expected %d bytes in a bytes1, but only %d remain" %
				487	(n, len(data)))
				488
				489	bytes1 = ArgumentDescriptor(
				490	name="bytes1",
				491	n=TAKEN_FROM_ARGUMENT1,
				492	reader=read_bytes1,
				493	doc="""A counted bytes string.
				494
				495	The first argument is a 1-byte unsigned int giving the number
				496	of bytes, and the second argument is that many bytes.
				497	""")
				498
				499
				500	def read_bytes4(f):
				501	r"""
				502	>>> import io
				503	>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x00abc"))
				504	b''
				505	>>> read_bytes4(io.BytesIO(b"\x03\x00\x00\x00abcdef"))
				506	b'abc'
				507	>>> read_bytes4(io.BytesIO(b"\x00\x00\x00\x03abcdef"))
				508	Traceback (most recent call last):
				509	...
				510	ValueError: expected 50331648 bytes in a bytes4, but only 6 remain
				511	"""
				512
				513	n = read_uint4(f)
				514	assert n >= 0
				515	if n > sys.maxsize:
				516	raise ValueError("bytes4 byte count > sys.maxsize: %d" % n)
				517	data = f.read(n)
				518	if len(data) == n:
				519	return data
				520	raise ValueError("expected %d bytes in a bytes4, but only %d remain" %
				521	(n, len(data)))
				522
				523	bytes4 = ArgumentDescriptor(
				524	name="bytes4",
				525	n=TAKEN_FROM_ARGUMENT4U,
				526	reader=read_bytes4,
				527	doc="""A counted bytes string.
				528
				529	The first argument is a 4-byte little-endian unsigned int giving
				530	the number of bytes, and the second argument is that many bytes.
				531	""")
				532
				533
				534	def read_bytes8(f):
				535	r"""
				536	>>> import io, struct, sys
				537	>>> read_bytes8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc"))
				538	b''
				539	>>> read_bytes8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef"))
				540	b'abc'
				541	>>> bigsize8 = struct.pack("<Q", sys.maxsize//3)
				542	>>> read_bytes8(io.BytesIO(bigsize8 + b"abcdef")) #doctest: +ELLIPSIS
				543	Traceback (most recent call last):
				544	...
				545	ValueError: expected ... bytes in a bytes8, but only 6 remain
				546	"""
				547
				548	n = read_uint8(f)
				549	assert n >= 0
				550	if n > sys.maxsize:
				551	raise ValueError("bytes8 byte count > sys.maxsize: %d" % n)
				552	data = f.read(n)
				553	if len(data) == n:
				554	return data
				555	raise ValueError("expected %d bytes in a bytes8, but only %d remain" %
				556	(n, len(data)))
				557
				558	bytes8 = ArgumentDescriptor(
				559	name="bytes8",
				560	n=TAKEN_FROM_ARGUMENT8U,
				561	reader=read_bytes8,
				562	doc="""A counted bytes string.
				563
				564	The first argument is an 8-byte little-endian unsigned int giving
				565	the number of bytes, and the second argument is that many bytes.
				566	""")
				567
				568
				569	def read_bytearray8(f):
				570	r"""
				571	>>> import io, struct, sys
				572	>>> read_bytearray8(io.BytesIO(b"\x00\x00\x00\x00\x00\x00\x00\x00abc"))
				573	bytearray(b'')
				574	>>> read_bytearray8(io.BytesIO(b"\x03\x00\x00\x00\x00\x00\x00\x00abcdef"))
				575	bytearray(b'abc')
				576	>>> bigsize8 = struct.pack("<Q", sys.maxsize//3)
				577	>>> read_bytearray8(io.BytesIO(bigsize8 + b"abcdef")) #doctest: +ELLIPSIS
				578	Traceback (most recent call last):
				579	...
				580	ValueError: expected ... bytes in a bytearray8, but only 6 remain
				581	"""
				582
				583	n = read_uint8(f)
				584	assert n >= 0
				585	if n > sys.maxsize:
				586	raise ValueError("bytearray8 byte count > sys.maxsize: %d" % n)
				587	data = f.read(n)
				588	if len(data) == n:
				589	return bytearray(data)
				590	raise ValueError("expected %d bytes in a bytearray8, but only %d remain" %
				591	(n, len(data)))
				592
				593	bytearray8 = ArgumentDescriptor(
				594	name="bytearray8",
				595	n=TAKEN_FROM_ARGUMENT8U,
				596	reader=read_bytearray8,
				597	doc="""A counted bytearray.
				598
				599	The first argument is an 8-byte little-endian unsigned int giving
				600	the number of bytes, and the second argument is that many bytes.
				601	""")
				602
				603	def read_unicodestringnl(f):
				604	r"""
				605	>>> import io
				606	>>> read_unicodestringnl(io.BytesIO(b"abc\\uabcd\njunk")) == 'abc\uabcd'
				607	True
				608	"""
				609
				610	data = f.readline()
				611	if not data.endswith(b'\n'):
				612	raise ValueError("no newline found when trying to read "
				613	"unicodestringnl")
				614	data = data[:-1] # lose the newline
				615	return str(data, 'raw-unicode-escape')
				616
				617	unicodestringnl = ArgumentDescriptor(
				618	name='unicodestringnl',
				619	n=UP_TO_NEWLINE,
				620	reader=read_unicodestringnl,
				621	doc="""A newline-terminated Unicode string.
				622
				623	This is raw-unicode-escape encoded, so consists of
				624	printable ASCII characters, and may contain embedded
				625	escape sequences.
				626	""")
				627
				628
				629	def read_unicodestring1(f):
				630	r"""
				631	>>> import io
				632	>>> s = 'abcd\uabcd'
				633	>>> enc = s.encode('utf-8')
				634	>>> enc
				635	b'abcd\xea\xaf\x8d'
				636	>>> n = bytes([len(enc)]) # little-endian 1-byte length
				637	>>> t = read_unicodestring1(io.BytesIO(n + enc + b'junk'))
				638	>>> s == t
				639	True
				640
				641	>>> read_unicodestring1(io.BytesIO(n + enc[:-1]))
				642	Traceback (most recent call last):
				643	...
				644	ValueError: expected 7 bytes in a unicodestring1, but only 6 remain
				645	"""
				646
				647	n = read_uint1(f)
				648	assert n >= 0
				649	data = f.read(n)
				650	if len(data) == n:
				651	return str(data, 'utf-8', 'surrogatepass')
				652	raise ValueError("expected %d bytes in a unicodestring1, but only %d "
				653	"remain" % (n, len(data)))
				654
				655	unicodestring1 = ArgumentDescriptor(
				656	name="unicodestring1",
				657	n=TAKEN_FROM_ARGUMENT1,
				658	reader=read_unicodestring1,
				659	doc="""A counted Unicode string.
				660
				661	The first argument is a 1-byte little-endian signed int
				662	giving the number of bytes in the string, and the second
				663	argument-- the UTF-8 encoding of the Unicode string --
				664	contains that many bytes.
				665	""")
				666
				667
				668	def read_unicodestring4(f):
				669	r"""
				670	>>> import io
				671	>>> s = 'abcd\uabcd'
				672	>>> enc = s.encode('utf-8')
				673	>>> enc
				674	b'abcd\xea\xaf\x8d'
				675	>>> n = bytes([len(enc), 0, 0, 0]) # little-endian 4-byte length
				676	>>> t = read_unicodestring4(io.BytesIO(n + enc + b'junk'))
				677	>>> s == t
				678	True
				679
				680	>>> read_unicodestring4(io.BytesIO(n + enc[:-1]))
				681	Traceback (most recent call last):
				682	...
				683	ValueError: expected 7 bytes in a unicodestring4, but only 6 remain
				684	"""
				685
				686	n = read_uint4(f)
				687	assert n >= 0
				688	if n > sys.maxsize:
				689	raise ValueError("unicodestring4 byte count > sys.maxsize: %d" % n)
				690	data = f.read(n)
				691	if len(data) == n:
				692	return str(data, 'utf-8', 'surrogatepass')
				693	raise ValueError("expected %d bytes in a unicodestring4, but only %d "
				694	"remain" % (n, len(data)))
				695
				696	unicodestring4 = ArgumentDescriptor(
				697	name="unicodestring4",
				698	n=TAKEN_FROM_ARGUMENT4U,
				699	reader=read_unicodestring4,
				700	doc="""A counted Unicode string.
				701
				702	The first argument is a 4-byte little-endian signed int
				703	giving the number of bytes in the string, and the second
				704	argument-- the UTF-8 encoding of the Unicode string --
				705	contains that many bytes.
				706	""")
				707
				708
				709	def read_unicodestring8(f):
				710	r"""
				711	>>> import io
				712	>>> s = 'abcd\uabcd'
				713	>>> enc = s.encode('utf-8')
				714	>>> enc
				715	b'abcd\xea\xaf\x8d'
				716	>>> n = bytes([len(enc)]) + b'\0' * 7 # little-endian 8-byte length
				717	>>> t = read_unicodestring8(io.BytesIO(n + enc + b'junk'))
				718	>>> s == t
				719	True
				720
				721	>>> read_unicodestring8(io.BytesIO(n + enc[:-1]))
				722	Traceback (most recent call last):
				723	...
				724	ValueError: expected 7 bytes in a unicodestring8, but only 6 remain
				725	"""
				726
				727	n = read_uint8(f)
				728	assert n >= 0
				729	if n > sys.maxsize:
				730	raise ValueError("unicodestring8 byte count > sys.maxsize: %d" % n)
				731	data = f.read(n)
				732	if len(data) == n:
				733	return str(data, 'utf-8', 'surrogatepass')
				734	raise ValueError("expected %d bytes in a unicodestring8, but only %d "
				735	"remain" % (n, len(data)))
				736
				737	unicodestring8 = ArgumentDescriptor(
				738	name="unicodestring8",
				739	n=TAKEN_FROM_ARGUMENT8U,
				740	reader=read_unicodestring8,
				741	doc="""A counted Unicode string.
				742
				743	The first argument is an 8-byte little-endian signed int
				744	giving the number of bytes in the string, and the second
				745	argument-- the UTF-8 encoding of the Unicode string --
				746	contains that many bytes.
				747	""")
				748
				749
				750	def read_decimalnl_short(f):
				751	r"""
				752	>>> import io
				753	>>> read_decimalnl_short(io.BytesIO(b"1234\n56"))
				754	1234
				755
				756	>>> read_decimalnl_short(io.BytesIO(b"1234L\n56"))
				757	Traceback (most recent call last):
				758	...
				759	ValueError: invalid literal for int() with base 10: b'1234L'
				760	"""
				761
				762	s = read_stringnl(f, decode=False, stripquotes=False)
				763
				764	# There's a hack for True and False here.
				765	if s == b"00":
				766	return False
				767	elif s == b"01":
				768	return True
				769
				770	return int(s)
				771
				772	def read_decimalnl_long(f):
				773	r"""
				774	>>> import io
				775
				776	>>> read_decimalnl_long(io.BytesIO(b"1234L\n56"))
				777	1234
				778
				779	>>> read_decimalnl_long(io.BytesIO(b"123456789012345678901234L\n6"))
				780	123456789012345678901234
				781	"""
				782
				783	s = read_stringnl(f, decode=False, stripquotes=False)
				784	if s[-1:] == b'L':
				785	s = s[:-1]
				786	return int(s)
				787
				788
				789	decimalnl_short = ArgumentDescriptor(
				790	name='decimalnl_short',
				791	n=UP_TO_NEWLINE,
				792	reader=read_decimalnl_short,
				793	doc="""A newline-terminated decimal integer literal.
				794
				795	This never has a trailing 'L', and the integer fit
				796	in a short Python int on the box where the pickle
				797	was written -- but there's no guarantee it will fit
				798	in a short Python int on the box where the pickle
				799	is read.
				800	""")
				801
				802	decimalnl_long = ArgumentDescriptor(
				803	name='decimalnl_long',
				804	n=UP_TO_NEWLINE,
				805	reader=read_decimalnl_long,
				806	doc="""A newline-terminated decimal integer literal.
				807
				808	This has a trailing 'L', and can represent integers
				809	of any size.
				810	""")
				811
				812
				813	def read_floatnl(f):
				814	r"""
				815	>>> import io
				816	>>> read_floatnl(io.BytesIO(b"-1.25\n6"))
				817	-1.25
				818	"""
				819	s = read_stringnl(f, decode=False, stripquotes=False)
				820	return float(s)
				821
				822	floatnl = ArgumentDescriptor(
				823	name='floatnl',
				824	n=UP_TO_NEWLINE,
				825	reader=read_floatnl,
				826	doc="""A newline-terminated decimal floating literal.
				827
				828	In general this requires 17 significant digits for roundtrip
				829	identity, and pickling then unpickling infinities, NaNs, and
				830	minus zero doesn't work across boxes, or on some boxes even
				831	on itself (e.g., Windows can't read the strings it produces
				832	for infinities or NaNs).
				833	""")
				834
				835	def read_float8(f):
				836	r"""
				837	>>> import io, struct
				838	>>> raw = struct.pack(">d", -1.25)
				839	>>> raw
				840	b'\xbf\xf4\x00\x00\x00\x00\x00\x00'
				841	>>> read_float8(io.BytesIO(raw + b"\n"))
				842	-1.25
				843	"""
				844
				845	data = f.read(8)
				846	if len(data) == 8:
				847	return _unpack(">d", data)[0]
				848	raise ValueError("not enough data in stream to read float8")
				849
				850
				851	float8 = ArgumentDescriptor(
				852	name='float8',
				853	n=8,
				854	reader=read_float8,
				855	doc="""An 8-byte binary representation of a float, big-endian.
				856
				857	The format is unique to Python, and shared with the struct
				858	module (format string '>d') "in theory" (the struct and pickle
				859	implementations don't share the code -- they should). It's
				860	strongly related to the IEEE-754 double format, and, in normal
				861	cases, is in fact identical to the big-endian 754 double format.
				862	On other boxes the dynamic range is limited to that of a 754
				863	double, and "add a half and chop" rounding is used to reduce
				864	the precision to 53 bits. However, even on a 754 box,
				865	infinities, NaNs, and minus zero may not be handled correctly
				866	(may not survive roundtrip pickling intact).
				867	""")
				868
				869	# Protocol 2 formats
				870
				871	from pickle import decode_long
				872
				873	def read_long1(f):
				874	r"""
				875	>>> import io
				876	>>> read_long1(io.BytesIO(b"\x00"))
				877	0
				878	>>> read_long1(io.BytesIO(b"\x02\xff\x00"))
				879	255
				880	>>> read_long1(io.BytesIO(b"\x02\xff\x7f"))
				881	32767
				882	>>> read_long1(io.BytesIO(b"\x02\x00\xff"))
				883	-256
				884	>>> read_long1(io.BytesIO(b"\x02\x00\x80"))
				885	-32768
				886	"""
				887
				888	n = read_uint1(f)
				889	data = f.read(n)
				890	if len(data) != n:
				891	raise ValueError("not enough data in stream to read long1")
				892	return decode_long(data)
				893
				894	long1 = ArgumentDescriptor(
				895	name="long1",
				896	n=TAKEN_FROM_ARGUMENT1,
				897	reader=read_long1,
				898	doc="""A binary long, little-endian, using 1-byte size.
				899
				900	This first reads one byte as an unsigned size, then reads that
				901	many bytes and interprets them as a little-endian 2's-complement long.
				902	If the size is 0, that's taken as a shortcut for the long 0L.
				903	""")
				904
				905	def read_long4(f):
				906	r"""
				907	>>> import io
				908	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x00"))
				909	255
				910	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\xff\x7f"))
				911	32767
				912	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\xff"))
				913	-256
				914	>>> read_long4(io.BytesIO(b"\x02\x00\x00\x00\x00\x80"))
				915	-32768
				916	>>> read_long1(io.BytesIO(b"\x00\x00\x00\x00"))
				917	0
				918	"""
				919
				920	n = read_int4(f)
				921	if n < 0:
				922	raise ValueError("long4 byte count < 0: %d" % n)
				923	data = f.read(n)
				924	if len(data) != n:
				925	raise ValueError("not enough data in stream to read long4")
				926	return decode_long(data)
				927
				928	long4 = ArgumentDescriptor(
				929	name="long4",
				930	n=TAKEN_FROM_ARGUMENT4,
				931	reader=read_long4,
				932	doc="""A binary representation of a long, little-endian.
				933
				934	This first reads four bytes as a signed size (but requires the
				935	size to be >= 0), then reads that many bytes and interprets them
				936	as a little-endian 2's-complement long. If the size is 0, that's taken
				937	as a shortcut for the int 0, although LONG1 should really be used
				938	then instead (and in any case where # of bytes < 256).
				939	""")
				940
				941
				942	##############################################################################
				943	# Object descriptors. The stack used by the pickle machine holds objects,
				944	# and in the stack_before and stack_after attributes of OpcodeInfo
				945	# descriptors we need names to describe the various types of objects that can
				946	# appear on the stack.
				947
				948	class StackObject(object):
				949	__slots__ = (
				950	# name of descriptor record, for info only
				951	'name',
				952
				953	# type of object, or tuple of type objects (meaning the object can
				954	# be of any type in the tuple)
				955	'obtype',
				956
				957	# human-readable docs for this kind of stack object; a string
				958	'doc',
				959	)
				960
				961	def __init__(self, name, obtype, doc):
				962	assert isinstance(name, str)
				963	self.name = name
				964
				965	assert isinstance(obtype, type) or isinstance(obtype, tuple)
				966	if isinstance(obtype, tuple):
				967	for contained in obtype:
				968	assert isinstance(contained, type)
				969	self.obtype = obtype
				970
				971	assert isinstance(doc, str)
				972	self.doc = doc
				973
				974	def __repr__(self):
				975	return self.name
				976
				977
				978	pyint = pylong = StackObject(
				979	name='int',
				980	obtype=int,
				981	doc="A Python integer object.")
				982
				983	pyinteger_or_bool = StackObject(
				984	name='int_or_bool',
				985	obtype=(int, bool),
				986	doc="A Python integer or boolean object.")
				987
				988	pybool = StackObject(
				989	name='bool',
				990	obtype=bool,
				991	doc="A Python boolean object.")
				992
				993	pyfloat = StackObject(
				994	name='float',
				995	obtype=float,
				996	doc="A Python float object.")
				997
				998	pybytes_or_str = pystring = StackObject(
				999	name='bytes_or_str',
				1000	obtype=(bytes, str),
				1001	doc="A Python bytes or (Unicode) string object.")
				1002
				1003	pybytes = StackObject(
				1004	name='bytes',
				1005	obtype=bytes,
				1006	doc="A Python bytes object.")
				1007
				1008	pybytearray = StackObject(
				1009	name='bytearray',
				1010	obtype=bytearray,
				1011	doc="A Python bytearray object.")
				1012
				1013	pyunicode = StackObject(
				1014	name='str',
				1015	obtype=str,
				1016	doc="A Python (Unicode) string object.")
				1017
				1018	pynone = StackObject(
				1019	name="None",
				1020	obtype=type(None),
				1021	doc="The Python None object.")
				1022
				1023	pytuple = StackObject(
				1024	name="tuple",
				1025	obtype=tuple,
				1026	doc="A Python tuple object.")
				1027
				1028	pylist = StackObject(
				1029	name="list",
				1030	obtype=list,
				1031	doc="A Python list object.")
				1032
				1033	pydict = StackObject(
				1034	name="dict",
				1035	obtype=dict,
				1036	doc="A Python dict object.")
				1037
				1038	pyset = StackObject(
				1039	name="set",
				1040	obtype=set,
				1041	doc="A Python set object.")
				1042
				1043	pyfrozenset = StackObject(
				1044	name="frozenset",
				1045	obtype=set,
				1046	doc="A Python frozenset object.")
				1047
				1048	pybuffer = StackObject(
				1049	name='buffer',
				1050	obtype=object,
				1051	doc="A Python buffer-like object.")
				1052
				1053	anyobject = StackObject(
				1054	name='any',
				1055	obtype=object,
				1056	doc="Any kind of object whatsoever.")
				1057
				1058	markobject = StackObject(
				1059	name="mark",
				1060	obtype=StackObject,
				1061	doc="""'The mark' is a unique object.
				1062
				1063	Opcodes that operate on a variable number of objects
				1064	generally don't embed the count of objects in the opcode,
				1065	or pull it off the stack. Instead the MARK opcode is used
				1066	to push a special marker object on the stack, and then
				1067	some other opcodes grab all the objects from the top of
				1068	the stack down to (but not including) the topmost marker
				1069	object.
				1070	""")
				1071
				1072	stackslice = StackObject(
				1073	name="stackslice",
				1074	obtype=StackObject,
				1075	doc="""An object representing a contiguous slice of the stack.
				1076
				1077	This is used in conjunction with markobject, to represent all
				1078	of the stack following the topmost markobject. For example,
				1079	the POP_MARK opcode changes the stack from
				1080
				1081	[..., markobject, stackslice]
				1082	to
				1083	[...]
				1084
				1085	No matter how many object are on the stack after the topmost
				1086	markobject, POP_MARK gets rid of all of them (including the
				1087	topmost markobject too).
				1088	""")
				1089
				1090	##############################################################################
				1091	# Descriptors for pickle opcodes.
				1092
				1093	class OpcodeInfo(object):
				1094
				1095	__slots__ = (
				1096	# symbolic name of opcode; a string
				1097	'name',
				1098
				1099	# the code used in a bytestream to represent the opcode; a
				1100	# one-character string
				1101	'code',
				1102
				1103	# If the opcode has an argument embedded in the byte string, an
				1104	# instance of ArgumentDescriptor specifying its type. Note that
				1105	# arg.reader(s) can be used to read and decode the argument from
				1106	# the bytestream s, and arg.doc documents the format of the raw
				1107	# argument bytes. If the opcode doesn't have an argument embedded
				1108	# in the bytestream, arg should be None.
				1109	'arg',
				1110
				1111	# what the stack looks like before this opcode runs; a list
				1112	'stack_before',
				1113
				1114	# what the stack looks like after this opcode runs; a list
				1115	'stack_after',
				1116
				1117	# the protocol number in which this opcode was introduced; an int
				1118	'proto',
				1119
				1120	# human-readable docs for this opcode; a string
				1121	'doc',
				1122	)
				1123
				1124	def __init__(self, name, code, arg,
				1125	stack_before, stack_after, proto, doc):
				1126	assert isinstance(name, str)
				1127	self.name = name
				1128
				1129	assert isinstance(code, str)
				1130	assert len(code) == 1
				1131	self.code = code
				1132
				1133	assert arg is None or isinstance(arg, ArgumentDescriptor)
				1134	self.arg = arg
				1135
				1136	assert isinstance(stack_before, list)
				1137	for x in stack_before:
				1138	assert isinstance(x, StackObject)
				1139	self.stack_before = stack_before
				1140
				1141	assert isinstance(stack_after, list)
				1142	for x in stack_after:
				1143	assert isinstance(x, StackObject)
				1144	self.stack_after = stack_after
				1145
				1146	assert isinstance(proto, int) and 0 <= proto <= pickle.HIGHEST_PROTOCOL
				1147	self.proto = proto
				1148
				1149	assert isinstance(doc, str)
				1150	self.doc = doc
				1151
				1152	I = OpcodeInfo
				1153	opcodes = [
				1154
				1155	# Ways to spell integers.
				1156
				1157	I(name='INT',
				1158	code='I',
				1159	arg=decimalnl_short,
				1160	stack_before=[],
				1161	stack_after=[pyinteger_or_bool],
				1162	proto=0,
				1163	doc="""Push an integer or bool.
				1164
				1165	The argument is a newline-terminated decimal literal string.
				1166
				1167	The intent may have been that this always fit in a short Python int,
				1168	but INT can be generated in pickles written on a 64-bit box that
				1169	require a Python long on a 32-bit box. The difference between this
				1170	and LONG then is that INT skips a trailing 'L', and produces a short
				1171	int whenever possible.
				1172
				1173	Another difference is due to that, when bool was introduced as a
				1174	distinct type in 2.3, builtin names True and False were also added to
				1175	2.2.2, mapping to ints 1 and 0. For compatibility in both directions,
				1176	True gets pickled as INT + "I01\\n", and False as INT + "I00\\n".
				1177	Leading zeroes are never produced for a genuine integer. The 2.3
				1178	(and later) unpicklers special-case these and return bool instead;
				1179	earlier unpicklers ignore the leading "0" and return the int.
				1180	"""),
				1181
				1182	I(name='BININT',
				1183	code='J',
				1184	arg=int4,
				1185	stack_before=[],
				1186	stack_after=[pyint],
				1187	proto=1,
				1188	doc="""Push a four-byte signed integer.
				1189
				1190	This handles the full range of Python (short) integers on a 32-bit
				1191	box, directly as binary bytes (1 for the opcode and 4 for the integer).
				1192	If the integer is non-negative and fits in 1 or 2 bytes, pickling via
				1193	BININT1 or BININT2 saves space.
				1194	"""),
				1195
				1196	I(name='BININT1',
				1197	code='K',
				1198	arg=uint1,
				1199	stack_before=[],
				1200	stack_after=[pyint],
				1201	proto=1,
				1202	doc="""Push a one-byte unsigned integer.
				1203
				1204	This is a space optimization for pickling very small non-negative ints,
				1205	in range(256).
				1206	"""),
				1207
				1208	I(name='BININT2',
				1209	code='M',
				1210	arg=uint2,
				1211	stack_before=[],
				1212	stack_after=[pyint],
				1213	proto=1,
				1214	doc="""Push a two-byte unsigned integer.
				1215
				1216	This is a space optimization for pickling small positive ints, in
				1217	range(256, 2**16). Integers in range(256) can also be pickled via
				1218	BININT2, but BININT1 instead saves a byte.
				1219	"""),
				1220
				1221	I(name='LONG',
				1222	code='L',
				1223	arg=decimalnl_long,
				1224	stack_before=[],
				1225	stack_after=[pyint],
				1226	proto=0,
				1227	doc="""Push a long integer.
				1228
				1229	The same as INT, except that the literal ends with 'L', and always
				1230	unpickles to a Python long. There doesn't seem a real purpose to the
				1231	trailing 'L'.
				1232
				1233	Note that LONG takes time quadratic in the number of digits when
				1234	unpickling (this is simply due to the nature of decimal->binary
				1235	conversion). Proto 2 added linear-time (in C; still quadratic-time
				1236	in Python) LONG1 and LONG4 opcodes.
				1237	"""),
				1238
				1239	I(name="LONG1",
				1240	code='\x8a',
				1241	arg=long1,
				1242	stack_before=[],
				1243	stack_after=[pyint],
				1244	proto=2,
				1245	doc="""Long integer using one-byte length.
				1246
				1247	A more efficient encoding of a Python long; the long1 encoding
				1248	says it all."""),
				1249
				1250	I(name="LONG4",
				1251	code='\x8b',
				1252	arg=long4,
				1253	stack_before=[],
				1254	stack_after=[pyint],
				1255	proto=2,
				1256	doc="""Long integer using found-byte length.
				1257
				1258	A more efficient encoding of a Python long; the long4 encoding
				1259	says it all."""),
				1260
				1261	# Ways to spell strings (8-bit, not Unicode).
				1262
				1263	I(name='STRING',
				1264	code='S',
				1265	arg=stringnl,
				1266	stack_before=[],
				1267	stack_after=[pybytes_or_str],
				1268	proto=0,
				1269	doc="""Push a Python string object.
				1270
				1271	The argument is a repr-style string, with bracketing quote characters,
				1272	and perhaps embedded escapes. The argument extends until the next
				1273	newline character. These are usually decoded into a str instance
				1274	using the encoding given to the Unpickler constructor. or the default,
				1275	'ASCII'. If the encoding given was 'bytes' however, they will be
				1276	decoded as bytes object instead.
				1277	"""),
				1278
				1279	I(name='BINSTRING',
				1280	code='T',
				1281	arg=string4,
				1282	stack_before=[],
				1283	stack_after=[pybytes_or_str],
				1284	proto=1,
				1285	doc="""Push a Python string object.
				1286
				1287	There are two arguments: the first is a 4-byte little-endian
				1288	signed int giving the number of bytes in the string, and the
				1289	second is that many bytes, which are taken literally as the string
				1290	content. These are usually decoded into a str instance using the
				1291	encoding given to the Unpickler constructor. or the default,
				1292	'ASCII'. If the encoding given was 'bytes' however, they will be
				1293	decoded as bytes object instead.
				1294	"""),
				1295
				1296	I(name='SHORT_BINSTRING',
				1297	code='U',
				1298	arg=string1,
				1299	stack_before=[],
				1300	stack_after=[pybytes_or_str],
				1301	proto=1,
				1302	doc="""Push a Python string object.
				1303
				1304	There are two arguments: the first is a 1-byte unsigned int giving
				1305	the number of bytes in the string, and the second is that many
				1306	bytes, which are taken literally as the string content. These are
				1307	usually decoded into a str instance using the encoding given to
				1308	the Unpickler constructor. or the default, 'ASCII'. If the
				1309	encoding given was 'bytes' however, they will be decoded as bytes
				1310	object instead.
				1311	"""),
				1312
				1313	# Bytes (protocol 3 and higher)
				1314
				1315	I(name='BINBYTES',
				1316	code='B',
				1317	arg=bytes4,
				1318	stack_before=[],
				1319	stack_after=[pybytes],
				1320	proto=3,
				1321	doc="""Push a Python bytes object.
				1322
				1323	There are two arguments: the first is a 4-byte little-endian unsigned int
				1324	giving the number of bytes, and the second is that many bytes, which are
				1325	taken literally as the bytes content.
				1326	"""),
				1327
				1328	I(name='SHORT_BINBYTES',
				1329	code='C',
				1330	arg=bytes1,
				1331	stack_before=[],
				1332	stack_after=[pybytes],
				1333	proto=3,
				1334	doc="""Push a Python bytes object.
				1335
				1336	There are two arguments: the first is a 1-byte unsigned int giving
				1337	the number of bytes, and the second is that many bytes, which are taken
				1338	literally as the string content.
				1339	"""),
				1340
				1341	I(name='BINBYTES8',
				1342	code='\x8e',
				1343	arg=bytes8,
				1344	stack_before=[],
				1345	stack_after=[pybytes],
				1346	proto=4,
				1347	doc="""Push a Python bytes object.
				1348
				1349	There are two arguments: the first is an 8-byte unsigned int giving
				1350	the number of bytes in the string, and the second is that many bytes,
				1351	which are taken literally as the string content.
				1352	"""),
				1353
				1354	# Bytearray (protocol 5 and higher)
				1355
				1356	I(name='BYTEARRAY8',
				1357	code='\x96',
				1358	arg=bytearray8,
				1359	stack_before=[],
				1360	stack_after=[pybytearray],
				1361	proto=5,
				1362	doc="""Push a Python bytearray object.
				1363
				1364	There are two arguments: the first is an 8-byte unsigned int giving
				1365	the number of bytes in the bytearray, and the second is that many bytes,
				1366	which are taken literally as the bytearray content.
				1367	"""),
				1368
				1369	# Out-of-band buffer (protocol 5 and higher)
				1370
				1371	I(name='NEXT_BUFFER',
				1372	code='\x97',
				1373	arg=None,
				1374	stack_before=[],
				1375	stack_after=[pybuffer],
				1376	proto=5,
				1377	doc="Push an out-of-band buffer object."),
				1378
				1379	I(name='READONLY_BUFFER',
				1380	code='\x98',
				1381	arg=None,
				1382	stack_before=[pybuffer],
				1383	stack_after=[pybuffer],
				1384	proto=5,
				1385	doc="Make an out-of-band buffer object read-only."),
				1386
				1387	# Ways to spell None.
				1388
				1389	I(name='NONE',
				1390	code='N',
				1391	arg=None,
				1392	stack_before=[],
				1393	stack_after=[pynone],
				1394	proto=0,
				1395	doc="Push None on the stack."),
				1396
				1397	# Ways to spell bools, starting with proto 2. See INT for how this was
				1398	# done before proto 2.
				1399
				1400	I(name='NEWTRUE',
				1401	code='\x88',
				1402	arg=None,
				1403	stack_before=[],
				1404	stack_after=[pybool],
				1405	proto=2,
				1406	doc="Push True onto the stack."),
				1407
				1408	I(name='NEWFALSE',
				1409	code='\x89',
				1410	arg=None,
				1411	stack_before=[],
				1412	stack_after=[pybool],
				1413	proto=2,
				1414	doc="Push False onto the stack."),
				1415
				1416	# Ways to spell Unicode strings.
				1417
				1418	I(name='UNICODE',
				1419	code='V',
				1420	arg=unicodestringnl,
				1421	stack_before=[],
				1422	stack_after=[pyunicode],
				1423	proto=0, # this may be pure-text, but it's a later addition
				1424	doc="""Push a Python Unicode string object.
				1425
				1426	The argument is a raw-unicode-escape encoding of a Unicode string,
				1427	and so may contain embedded escape sequences. The argument extends
				1428	until the next newline character.
				1429	"""),
				1430
				1431	I(name='SHORT_BINUNICODE',
				1432	code='\x8c',
				1433	arg=unicodestring1,
				1434	stack_before=[],
				1435	stack_after=[pyunicode],
				1436	proto=4,
				1437	doc="""Push a Python Unicode string object.
				1438
				1439	There are two arguments: the first is a 1-byte little-endian signed int
				1440	giving the number of bytes in the string. The second is that many
				1441	bytes, and is the UTF-8 encoding of the Unicode string.
				1442	"""),
				1443
				1444	I(name='BINUNICODE',
				1445	code='X',
				1446	arg=unicodestring4,
				1447	stack_before=[],
				1448	stack_after=[pyunicode],
				1449	proto=1,
				1450	doc="""Push a Python Unicode string object.
				1451
				1452	There are two arguments: the first is a 4-byte little-endian unsigned int
				1453	giving the number of bytes in the string. The second is that many
				1454	bytes, and is the UTF-8 encoding of the Unicode string.
				1455	"""),
				1456
				1457	I(name='BINUNICODE8',
				1458	code='\x8d',
				1459	arg=unicodestring8,
				1460	stack_before=[],
				1461	stack_after=[pyunicode],
				1462	proto=4,
				1463	doc="""Push a Python Unicode string object.
				1464
				1465	There are two arguments: the first is an 8-byte little-endian signed int
				1466	giving the number of bytes in the string. The second is that many
				1467	bytes, and is the UTF-8 encoding of the Unicode string.
				1468	"""),
				1469
				1470	# Ways to spell floats.
				1471
				1472	I(name='FLOAT',
				1473	code='F',
				1474	arg=floatnl,
				1475	stack_before=[],
				1476	stack_after=[pyfloat],
				1477	proto=0,
				1478	doc="""Newline-terminated decimal float literal.
				1479
				1480	The argument is repr(a_float), and in general requires 17 significant
				1481	digits for roundtrip conversion to be an identity (this is so for
				1482	IEEE-754 double precision values, which is what Python float maps to
				1483	on most boxes).
				1484
				1485	In general, FLOAT cannot be used to transport infinities, NaNs, or
				1486	minus zero across boxes (or even on a single box, if the platform C
				1487	library can't read the strings it produces for such things -- Windows
				1488	is like that), but may do less damage than BINFLOAT on boxes with
				1489	greater precision or dynamic range than IEEE-754 double.
				1490	"""),
				1491
				1492	I(name='BINFLOAT',
				1493	code='G',
				1494	arg=float8,
				1495	stack_before=[],
				1496	stack_after=[pyfloat],
				1497	proto=1,
				1498	doc="""Float stored in binary form, with 8 bytes of data.
				1499
				1500	This generally requires less than half the space of FLOAT encoding.
				1501	In general, BINFLOAT cannot be used to transport infinities, NaNs, or
				1502	minus zero, raises an exception if the exponent exceeds the range of
				1503	an IEEE-754 double, and retains no more than 53 bits of precision (if
				1504	there are more than that, "add a half and chop" rounding is used to
				1505	cut it back to 53 significant bits).
				1506	"""),
				1507
				1508	# Ways to build lists.
				1509
				1510	I(name='EMPTY_LIST',
				1511	code=']',
				1512	arg=None,
				1513	stack_before=[],
				1514	stack_after=[pylist],
				1515	proto=1,
				1516	doc="Push an empty list."),
				1517
				1518	I(name='APPEND',
				1519	code='a',
				1520	arg=None,
				1521	stack_before=[pylist, anyobject],
				1522	stack_after=[pylist],
				1523	proto=0,
				1524	doc="""Append an object to a list.
				1525
				1526	Stack before: ... pylist anyobject
				1527	Stack after: ... pylist+[anyobject]
				1528
				1529	although pylist is really extended in-place.
				1530	"""),
				1531
				1532	I(name='APPENDS',
				1533	code='e',
				1534	arg=None,
				1535	stack_before=[pylist, markobject, stackslice],
				1536	stack_after=[pylist],
				1537	proto=1,
				1538	doc="""Extend a list by a slice of stack objects.
				1539
				1540	Stack before: ... pylist markobject stackslice
				1541	Stack after: ... pylist+stackslice
				1542
				1543	although pylist is really extended in-place.
				1544	"""),
				1545
				1546	I(name='LIST',
				1547	code='l',
				1548	arg=None,
				1549	stack_before=[markobject, stackslice],
				1550	stack_after=[pylist],
				1551	proto=0,
				1552	doc="""Build a list out of the topmost stack slice, after markobject.
				1553
				1554	All the stack entries following the topmost markobject are placed into
				1555	a single Python list, which single list object replaces all of the
				1556	stack from the topmost markobject onward. For example,
				1557
				1558	Stack before: ... markobject 1 2 3 'abc'
				1559	Stack after: ... [1, 2, 3, 'abc']
				1560	"""),
				1561
				1562	# Ways to build tuples.
				1563
				1564	I(name='EMPTY_TUPLE',
				1565	code=')',
				1566	arg=None,
				1567	stack_before=[],
				1568	stack_after=[pytuple],
				1569	proto=1,
				1570	doc="Push an empty tuple."),
				1571
				1572	I(name='TUPLE',
				1573	code='t',
				1574	arg=None,
				1575	stack_before=[markobject, stackslice],
				1576	stack_after=[pytuple],
				1577	proto=0,
				1578	doc="""Build a tuple out of the topmost stack slice, after markobject.
				1579
				1580	All the stack entries following the topmost markobject are placed into
				1581	a single Python tuple, which single tuple object replaces all of the
				1582	stack from the topmost markobject onward. For example,
				1583
				1584	Stack before: ... markobject 1 2 3 'abc'
				1585	Stack after: ... (1, 2, 3, 'abc')
				1586	"""),
				1587
				1588	I(name='TUPLE1',
				1589	code='\x85',
				1590	arg=None,
				1591	stack_before=[anyobject],
				1592	stack_after=[pytuple],
				1593	proto=2,
				1594	doc="""Build a one-tuple out of the topmost item on the stack.
				1595
				1596	This code pops one value off the stack and pushes a tuple of
				1597	length 1 whose one item is that value back onto it. In other
				1598	words:
				1599
				1600	stack[-1] = tuple(stack[-1:])
				1601	"""),
				1602
				1603	I(name='TUPLE2',
				1604	code='\x86',
				1605	arg=None,
				1606	stack_before=[anyobject, anyobject],
				1607	stack_after=[pytuple],
				1608	proto=2,
				1609	doc="""Build a two-tuple out of the top two items on the stack.
				1610
				1611	This code pops two values off the stack and pushes a tuple of
				1612	length 2 whose items are those values back onto it. In other
				1613	words:
				1614
				1615	stack[-2:] = [tuple(stack[-2:])]
				1616	"""),
				1617
				1618	I(name='TUPLE3',
				1619	code='\x87',
				1620	arg=None,
				1621	stack_before=[anyobject, anyobject, anyobject],
				1622	stack_after=[pytuple],
				1623	proto=2,
				1624	doc="""Build a three-tuple out of the top three items on the stack.
				1625
				1626	This code pops three values off the stack and pushes a tuple of
				1627	length 3 whose items are those values back onto it. In other
				1628	words:
				1629
				1630	stack[-3:] = [tuple(stack[-3:])]
				1631	"""),
				1632
				1633	# Ways to build dicts.
				1634
				1635	I(name='EMPTY_DICT',
				1636	code='}',
				1637	arg=None,
				1638	stack_before=[],
				1639	stack_after=[pydict],
				1640	proto=1,
				1641	doc="Push an empty dict."),
				1642
				1643	I(name='DICT',
				1644	code='d',
				1645	arg=None,
				1646	stack_before=[markobject, stackslice],
				1647	stack_after=[pydict],
				1648	proto=0,
				1649	doc="""Build a dict out of the topmost stack slice, after markobject.
				1650
				1651	All the stack entries following the topmost markobject are placed into
				1652	a single Python dict, which single dict object replaces all of the
				1653	stack from the topmost markobject onward. The stack slice alternates
				1654	key, value, key, value, .... For example,
				1655
				1656	Stack before: ... markobject 1 2 3 'abc'
				1657	Stack after: ... {1: 2, 3: 'abc'}
				1658	"""),
				1659
				1660	I(name='SETITEM',
				1661	code='s',
				1662	arg=None,
				1663	stack_before=[pydict, anyobject, anyobject],
				1664	stack_after=[pydict],
				1665	proto=0,
				1666	doc="""Add a key+value pair to an existing dict.
				1667
				1668	Stack before: ... pydict key value
				1669	Stack after: ... pydict
				1670
				1671	where pydict has been modified via pydict[key] = value.
				1672	"""),
				1673
				1674	I(name='SETITEMS',
				1675	code='u',
				1676	arg=None,
				1677	stack_before=[pydict, markobject, stackslice],
				1678	stack_after=[pydict],
				1679	proto=1,
				1680	doc="""Add an arbitrary number of key+value pairs to an existing dict.
				1681
				1682	The slice of the stack following the topmost markobject is taken as
				1683	an alternating sequence of keys and values, added to the dict
				1684	immediately under the topmost markobject. Everything at and after the
				1685	topmost markobject is popped, leaving the mutated dict at the top
				1686	of the stack.
				1687
				1688	Stack before: ... pydict markobject key_1 value_1 ... key_n value_n
				1689	Stack after: ... pydict
				1690
				1691	where pydict has been modified via pydict[key_i] = value_i for i in
				1692	1, 2, ..., n, and in that order.
				1693	"""),
				1694
				1695	# Ways to build sets
				1696
				1697	I(name='EMPTY_SET',
				1698	code='\x8f',
				1699	arg=None,
				1700	stack_before=[],
				1701	stack_after=[pyset],
				1702	proto=4,
				1703	doc="Push an empty set."),
				1704
				1705	I(name='ADDITEMS',
				1706	code='\x90',
				1707	arg=None,
				1708	stack_before=[pyset, markobject, stackslice],
				1709	stack_after=[pyset],
				1710	proto=4,
				1711	doc="""Add an arbitrary number of items to an existing set.
				1712
				1713	The slice of the stack following the topmost markobject is taken as
				1714	a sequence of items, added to the set immediately under the topmost
				1715	markobject. Everything at and after the topmost markobject is popped,
				1716	leaving the mutated set at the top of the stack.
				1717
				1718	Stack before: ... pyset markobject item_1 ... item_n
				1719	Stack after: ... pyset
				1720
				1721	where pyset has been modified via pyset.add(item_i) = item_i for i in
				1722	1, 2, ..., n, and in that order.
				1723	"""),
				1724
				1725	# Way to build frozensets
				1726
				1727	I(name='FROZENSET',
				1728	code='\x91',
				1729	arg=None,
				1730	stack_before=[markobject, stackslice],
				1731	stack_after=[pyfrozenset],
				1732	proto=4,
				1733	doc="""Build a frozenset out of the topmost slice, after markobject.
				1734
				1735	All the stack entries following the topmost markobject are placed into
				1736	a single Python frozenset, which single frozenset object replaces all
				1737	of the stack from the topmost markobject onward. For example,
				1738
				1739	Stack before: ... markobject 1 2 3
				1740	Stack after: ... frozenset({1, 2, 3})
				1741	"""),
				1742
				1743	# Stack manipulation.
				1744
				1745	I(name='POP',
				1746	code='0',
				1747	arg=None,
				1748	stack_before=[anyobject],
				1749	stack_after=[],
				1750	proto=0,
				1751	doc="Discard the top stack item, shrinking the stack by one item."),
				1752
				1753	I(name='DUP',
				1754	code='2',
				1755	arg=None,
				1756	stack_before=[anyobject],
				1757	stack_after=[anyobject, anyobject],
				1758	proto=0,
				1759	doc="Push the top stack item onto the stack again, duplicating it."),
				1760
				1761	I(name='MARK',
				1762	code='(',
				1763	arg=None,
				1764	stack_before=[],
				1765	stack_after=[markobject],
				1766	proto=0,
				1767	doc="""Push markobject onto the stack.
				1768
				1769	markobject is a unique object, used by other opcodes to identify a
				1770	region of the stack containing a variable number of objects for them
				1771	to work on. See markobject.doc for more detail.
				1772	"""),
				1773
				1774	I(name='POP_MARK',
				1775	code='1',
				1776	arg=None,
				1777	stack_before=[markobject, stackslice],
				1778	stack_after=[],
				1779	proto=1,
				1780	doc="""Pop all the stack objects at and above the topmost markobject.
				1781
				1782	When an opcode using a variable number of stack objects is done,
				1783	POP_MARK is used to remove those objects, and to remove the markobject
				1784	that delimited their starting position on the stack.
				1785	"""),
				1786
				1787	# Memo manipulation. There are really only two operations (get and put),
				1788	# each in all-text, "short binary", and "long binary" flavors.
				1789
				1790	I(name='GET',
				1791	code='g',
				1792	arg=decimalnl_short,
				1793	stack_before=[],
				1794	stack_after=[anyobject],
				1795	proto=0,
				1796	doc="""Read an object from the memo and push it on the stack.
				1797
				1798	The index of the memo object to push is given by the newline-terminated
				1799	decimal string following. BINGET and LONG_BINGET are space-optimized
				1800	versions.
				1801	"""),
				1802
				1803	I(name='BINGET',
				1804	code='h',
				1805	arg=uint1,
				1806	stack_before=[],
				1807	stack_after=[anyobject],
				1808	proto=1,
				1809	doc="""Read an object from the memo and push it on the stack.
				1810
				1811	The index of the memo object to push is given by the 1-byte unsigned
				1812	integer following.
				1813	"""),
				1814
				1815	I(name='LONG_BINGET',
				1816	code='j',
				1817	arg=uint4,
				1818	stack_before=[],
				1819	stack_after=[anyobject],
				1820	proto=1,
				1821	doc="""Read an object from the memo and push it on the stack.
				1822
				1823	The index of the memo object to push is given by the 4-byte unsigned
				1824	little-endian integer following.
				1825	"""),
				1826
				1827	I(name='PUT',
				1828	code='p',
				1829	arg=decimalnl_short,
				1830	stack_before=[],
				1831	stack_after=[],
				1832	proto=0,
				1833	doc="""Store the stack top into the memo. The stack is not popped.
				1834
				1835	The index of the memo location to write into is given by the newline-
				1836	terminated decimal string following. BINPUT and LONG_BINPUT are
				1837	space-optimized versions.
				1838	"""),
				1839
				1840	I(name='BINPUT',
				1841	code='q',
				1842	arg=uint1,
				1843	stack_before=[],
				1844	stack_after=[],
				1845	proto=1,
				1846	doc="""Store the stack top into the memo. The stack is not popped.
				1847
				1848	The index of the memo location to write into is given by the 1-byte
				1849	unsigned integer following.
				1850	"""),
				1851
				1852	I(name='LONG_BINPUT',
				1853	code='r',
				1854	arg=uint4,
				1855	stack_before=[],
				1856	stack_after=[],
				1857	proto=1,
				1858	doc="""Store the stack top into the memo. The stack is not popped.
				1859
				1860	The index of the memo location to write into is given by the 4-byte
				1861	unsigned little-endian integer following.
				1862	"""),
				1863
				1864	I(name='MEMOIZE',
				1865	code='\x94',
				1866	arg=None,
				1867	stack_before=[anyobject],
				1868	stack_after=[anyobject],
				1869	proto=4,
				1870	doc="""Store the stack top into the memo. The stack is not popped.
				1871
				1872	The index of the memo location to write is the number of
				1873	elements currently present in the memo.
				1874	"""),
				1875
				1876	# Access the extension registry (predefined objects). Akin to the GET
				1877	# family.
				1878
				1879	I(name='EXT1',
				1880	code='\x82',
				1881	arg=uint1,
				1882	stack_before=[],
				1883	stack_after=[anyobject],
				1884	proto=2,
				1885	doc="""Extension code.
				1886
				1887	This code and the similar EXT2 and EXT4 allow using a registry
				1888	of popular objects that are pickled by name, typically classes.
				1889	It is envisioned that through a global negotiation and
				1890	registration process, third parties can set up a mapping between
				1891	ints and object names.
				1892
				1893	In order to guarantee pickle interchangeability, the extension
				1894	code registry ought to be global, although a range of codes may
				1895	be reserved for private use.
				1896
				1897	EXT1 has a 1-byte integer argument. This is used to index into the
				1898	extension registry, and the object at that index is pushed on the stack.
				1899	"""),
				1900
				1901	I(name='EXT2',
				1902	code='\x83',
				1903	arg=uint2,
				1904	stack_before=[],
				1905	stack_after=[anyobject],
				1906	proto=2,
				1907	doc="""Extension code.
				1908
				1909	See EXT1. EXT2 has a two-byte integer argument.
				1910	"""),
				1911
				1912	I(name='EXT4',
				1913	code='\x84',
				1914	arg=int4,
				1915	stack_before=[],
				1916	stack_after=[anyobject],
				1917	proto=2,
				1918	doc="""Extension code.
				1919
				1920	See EXT1. EXT4 has a four-byte integer argument.
				1921	"""),
				1922
				1923	# Push a class object, or module function, on the stack, via its module
				1924	# and name.
				1925
				1926	I(name='GLOBAL',
				1927	code='c',
				1928	arg=stringnl_noescape_pair,
				1929	stack_before=[],
				1930	stack_after=[anyobject],
				1931	proto=0,
				1932	doc="""Push a global object (module.attr) on the stack.
				1933
				1934	Two newline-terminated strings follow the GLOBAL opcode. The first is
				1935	taken as a module name, and the second as a class name. The class
				1936	object module.class is pushed on the stack. More accurately, the
				1937	object returned by self.find_class(module, class) is pushed on the
				1938	stack, so unpickling subclasses can override this form of lookup.
				1939	"""),
				1940
				1941	I(name='STACK_GLOBAL',
				1942	code='\x93',
				1943	arg=None,
				1944	stack_before=[pyunicode, pyunicode],
				1945	stack_after=[anyobject],
				1946	proto=4,
				1947	doc="""Push a global object (module.attr) on the stack.
				1948	"""),
				1949
				1950	# Ways to build objects of classes pickle doesn't know about directly
				1951	# (user-defined classes). I despair of documenting this accurately
				1952	# and comprehensibly -- you really have to read the pickle code to
				1953	# find all the special cases.
				1954
				1955	I(name='REDUCE',
				1956	code='R',
				1957	arg=None,
				1958	stack_before=[anyobject, anyobject],
				1959	stack_after=[anyobject],
				1960	proto=0,
				1961	doc="""Push an object built from a callable and an argument tuple.
				1962
				1963	The opcode is named to remind of the __reduce__() method.
				1964
				1965	Stack before: ... callable pytuple
				1966	Stack after: ... callable(*pytuple)
				1967
				1968	The callable and the argument tuple are the first two items returned
				1969	by a __reduce__ method. Applying the callable to the argtuple is
				1970	supposed to reproduce the original object, or at least get it started.
				1971	If the __reduce__ method returns a 3-tuple, the last component is an
				1972	argument to be passed to the object's __setstate__, and then the REDUCE
				1973	opcode is followed by code to create setstate's argument, and then a
				1974	BUILD opcode to apply __setstate__ to that argument.
				1975
				1976	If not isinstance(callable, type), REDUCE complains unless the
				1977	callable has been registered with the copyreg module's
				1978	safe_constructors dict, or the callable has a magic
				1979	'__safe_for_unpickling__' attribute with a true value. I'm not sure
				1980	why it does this, but I've sure seen this complaint often enough when
				1981	I didn't want to <wink>.
				1982	"""),
				1983
				1984	I(name='BUILD',
				1985	code='b',
				1986	arg=None,
				1987	stack_before=[anyobject, anyobject],
				1988	stack_after=[anyobject],
				1989	proto=0,
				1990	doc="""Finish building an object, via __setstate__ or dict update.
				1991
				1992	Stack before: ... anyobject argument
				1993	Stack after: ... anyobject
				1994
				1995	where anyobject may have been mutated, as follows:
				1996
				1997	If the object has a __setstate__ method,
				1998
				1999	anyobject.__setstate__(argument)
				2000
				2001	is called.
				2002
				2003	Else the argument must be a dict, the object must have a __dict__, and
				2004	the object is updated via
				2005
				2006	anyobject.__dict__.update(argument)
				2007	"""),
				2008
				2009	I(name='INST',
				2010	code='i',
				2011	arg=stringnl_noescape_pair,
				2012	stack_before=[markobject, stackslice],
				2013	stack_after=[anyobject],
				2014	proto=0,
				2015	doc="""Build a class instance.
				2016
				2017	This is the protocol 0 version of protocol 1's OBJ opcode.
				2018	INST is followed by two newline-terminated strings, giving a
				2019	module and class name, just as for the GLOBAL opcode (and see
				2020	GLOBAL for more details about that). self.find_class(module, name)
				2021	is used to get a class object.
				2022
				2023	In addition, all the objects on the stack following the topmost
				2024	markobject are gathered into a tuple and popped (along with the
				2025	topmost markobject), just as for the TUPLE opcode.
				2026
				2027	Now it gets complicated. If all of these are true:
				2028
				2029	+ The argtuple is empty (markobject was at the top of the stack
				2030	at the start).
				2031
				2032	+ The class object does not have a __getinitargs__ attribute.
				2033
				2034	then we want to create an old-style class instance without invoking
				2035	its __init__() method (pickle has waffled on this over the years; not
				2036	calling __init__() is current wisdom). In this case, an instance of
				2037	an old-style dummy class is created, and then we try to rebind its
				2038	__class__ attribute to the desired class object. If this succeeds,
				2039	the new instance object is pushed on the stack, and we're done.
				2040
				2041	Else (the argtuple is not empty, it's not an old-style class object,
				2042	or the class object does have a __getinitargs__ attribute), the code
				2043	first insists that the class object have a __safe_for_unpickling__
				2044	attribute. Unlike as for the __safe_for_unpickling__ check in REDUCE,
				2045	it doesn't matter whether this attribute has a true or false value, it
				2046	only matters whether it exists (XXX this is a bug). If
				2047	__safe_for_unpickling__ doesn't exist, UnpicklingError is raised.
				2048
				2049	Else (the class object does have a __safe_for_unpickling__ attr),
				2050	the class object obtained from INST's arguments is applied to the
				2051	argtuple obtained from the stack, and the resulting instance object
				2052	is pushed on the stack.
				2053
				2054	NOTE: checks for __safe_for_unpickling__ went away in Python 2.3.
				2055	NOTE: the distinction between old-style and new-style classes does
				2056	not make sense in Python 3.
				2057	"""),
				2058
				2059	I(name='OBJ',
				2060	code='o',
				2061	arg=None,
				2062	stack_before=[markobject, anyobject, stackslice],
				2063	stack_after=[anyobject],
				2064	proto=1,
				2065	doc="""Build a class instance.
				2066
				2067	This is the protocol 1 version of protocol 0's INST opcode, and is
				2068	very much like it. The major difference is that the class object
				2069	is taken off the stack, allowing it to be retrieved from the memo
				2070	repeatedly if several instances of the same class are created. This
				2071	can be much more efficient (in both time and space) than repeatedly
				2072	embedding the module and class names in INST opcodes.
				2073
				2074	Unlike INST, OBJ takes no arguments from the opcode stream. Instead
				2075	the class object is taken off the stack, immediately above the
				2076	topmost markobject:
				2077
				2078	Stack before: ... markobject classobject stackslice
				2079	Stack after: ... new_instance_object
				2080
				2081	As for INST, the remainder of the stack above the markobject is
				2082	gathered into an argument tuple, and then the logic seems identical,
				2083	except that no __safe_for_unpickling__ check is done (XXX this is
				2084	a bug). See INST for the gory details.
				2085
				2086	NOTE: In Python 2.3, INST and OBJ are identical except for how they
				2087	get the class object. That was always the intent; the implementations
				2088	had diverged for accidental reasons.
				2089	"""),
				2090
				2091	I(name='NEWOBJ',
				2092	code='\x81',
				2093	arg=None,
				2094	stack_before=[anyobject, anyobject],
				2095	stack_after=[anyobject],
				2096	proto=2,
				2097	doc="""Build an object instance.
				2098
				2099	The stack before should be thought of as containing a class
				2100	object followed by an argument tuple (the tuple being the stack
				2101	top). Call these cls and args. They are popped off the stack,
				2102	and the value returned by cls.__new__(cls, *args) is pushed back
				2103	onto the stack.
				2104	"""),
				2105
				2106	I(name='NEWOBJ_EX',
				2107	code='\x92',
				2108	arg=None,
				2109	stack_before=[anyobject, anyobject, anyobject],
				2110	stack_after=[anyobject],
				2111	proto=4,
				2112	doc="""Build an object instance.
				2113
				2114	The stack before should be thought of as containing a class
				2115	object followed by an argument tuple and by a keyword argument dict
				2116	(the dict being the stack top). Call these cls and args. They are
				2117	popped off the stack, and the value returned by
				2118	cls.__new__(cls, args, kwargs) is pushed back onto the stack.
				2119	"""),
				2120
				2121	# Machine control.
				2122
				2123	I(name='PROTO',
				2124	code='\x80',
				2125	arg=uint1,
				2126	stack_before=[],
				2127	stack_after=[],
				2128	proto=2,
				2129	doc="""Protocol version indicator.
				2130
				2131	For protocol 2 and above, a pickle must start with this opcode.
				2132	The argument is the protocol version, an int in range(2, 256).
				2133	"""),
				2134
				2135	I(name='STOP',
				2136	code='.',
				2137	arg=None,
				2138	stack_before=[anyobject],
				2139	stack_after=[],
				2140	proto=0,
				2141	doc="""Stop the unpickling machine.
				2142
				2143	Every pickle ends with this opcode. The object at the top of the stack
				2144	is popped, and that's the result of unpickling. The stack should be
				2145	empty then.
				2146	"""),
				2147
				2148	# Framing support.
				2149
				2150	I(name='FRAME',
				2151	code='\x95',
				2152	arg=uint8,
				2153	stack_before=[],
				2154	stack_after=[],
				2155	proto=4,
				2156	doc="""Indicate the beginning of a new frame.
				2157
				2158	The unpickler may use this opcode to safely prefetch data from its
				2159	underlying stream.
				2160	"""),
				2161
				2162	# Ways to deal with persistent IDs.
				2163
				2164	I(name='PERSID',
				2165	code='P',
				2166	arg=stringnl_noescape,
				2167	stack_before=[],
				2168	stack_after=[anyobject],
				2169	proto=0,
				2170	doc="""Push an object identified by a persistent ID.
				2171
				2172	The pickle module doesn't define what a persistent ID means. PERSID's
				2173	argument is a newline-terminated str-style (no embedded escapes, no
				2174	bracketing quote characters) string, which is "the persistent ID".
				2175	The unpickler passes this string to self.persistent_load(). Whatever
				2176	object that returns is pushed on the stack. There is no implementation
				2177	of persistent_load() in Python's unpickler: it must be supplied by an
				2178	unpickler subclass.
				2179	"""),
				2180
				2181	I(name='BINPERSID',
				2182	code='Q',
				2183	arg=None,
				2184	stack_before=[anyobject],
				2185	stack_after=[anyobject],
				2186	proto=1,
				2187	doc="""Push an object identified by a persistent ID.
				2188
				2189	Like PERSID, except the persistent ID is popped off the stack (instead
				2190	of being a string embedded in the opcode bytestream). The persistent
				2191	ID is passed to self.persistent_load(), and whatever object that
				2192	returns is pushed on the stack. See PERSID for more detail.
				2193	"""),
				2194	]
				2195	del I
				2196
				2197	# Verify uniqueness of .name and .code members.
				2198	name2i = {}
				2199	code2i = {}
				2200
				2201	for i, d in enumerate(opcodes):
				2202	if d.name in name2i:
				2203	raise ValueError("repeated name %r at indices %d and %d" %
				2204	(d.name, name2i[d.name], i))
				2205	if d.code in code2i:
				2206	raise ValueError("repeated code %r at indices %d and %d" %
				2207	(d.code, code2i[d.code], i))
				2208
				2209	name2i[d.name] = i
				2210	code2i[d.code] = i
				2211
				2212	del name2i, code2i, i, d
				2213
				2214	##############################################################################
				2215	# Build a code2op dict, mapping opcode characters to OpcodeInfo records.
				2216	# Also ensure we've got the same stuff as pickle.py, although the
				2217	# introspection here is dicey.
				2218
				2219	code2op = {}
				2220	for d in opcodes:
				2221	code2op[d.code] = d
				2222	del d
				2223
				2224	def assure_pickle_consistency(verbose=False):
				2225
				2226	copy = code2op.copy()
				2227	for name in pickle.__all__:
				2228	if not re.match("[A-Z][A-Z0-9_]+$", name):
				2229	if verbose:
				2230	print("skipping %r: it doesn't look like an opcode name" % name)
				2231	continue
				2232	picklecode = getattr(pickle, name)
				2233	if not isinstance(picklecode, bytes) or len(picklecode) != 1:
				2234	if verbose:
				2235	print(("skipping %r: value %r doesn't look like a pickle "
				2236	"code" % (name, picklecode)))
				2237	continue
				2238	picklecode = picklecode.decode("latin-1")
				2239	if picklecode in copy:
				2240	if verbose:
				2241	print("checking name %r w/ code %r for consistency" % (
				2242	name, picklecode))
				2243	d = copy[picklecode]
				2244	if d.name != name:
				2245	raise ValueError("for pickle code %r, pickle.py uses name %r "
				2246	"but we're using name %r" % (picklecode,
				2247	name,
				2248	d.name))
				2249	# Forget this one. Any left over in copy at the end are a problem
				2250	# of a different kind.
				2251	del copy[picklecode]
				2252	else:
				2253	raise ValueError("pickle.py appears to have a pickle opcode with "
				2254	"name %r and code %r, but we don't" %
				2255	(name, picklecode))
				2256	if copy:
				2257	msg = ["we appear to have pickle opcodes that pickle.py doesn't have:"]
				2258	for code, d in copy.items():
				2259	msg.append(" name %r with code %r" % (d.name, code))
				2260	raise ValueError("\n".join(msg))
				2261
				2262	assure_pickle_consistency()
				2263	del assure_pickle_consistency
				2264
				2265	##############################################################################
				2266	# A pickle opcode generator.
				2267
				2268	def _genops(data, yield_end_pos=False):
				2269	if isinstance(data, bytes_types):
				2270	data = io.BytesIO(data)
				2271
				2272	if hasattr(data, "tell"):
				2273	getpos = data.tell
				2274	else:
				2275	getpos = lambda: None
				2276
				2277	while True:
				2278	pos = getpos()
				2279	code = data.read(1)
				2280	opcode = code2op.get(code.decode("latin-1"))
				2281	if opcode is None:
				2282	if code == b"":
				2283	raise ValueError("pickle exhausted before seeing STOP")
				2284	else:
				2285	raise ValueError("at position %s, opcode %r unknown" % (
				2286	"<unknown>" if pos is None else pos,
				2287	code))
				2288	if opcode.arg is None:
				2289	arg = None
				2290	else:
				2291	arg = opcode.arg.reader(data)
				2292	if yield_end_pos:
				2293	yield opcode, arg, pos, getpos()
				2294	else:
				2295	yield opcode, arg, pos
				2296	if code == b'.':
				2297	assert opcode.name == 'STOP'
				2298	break
				2299
				2300	def genops(pickle):
				2301	"""Generate all the opcodes in a pickle.
				2302
				2303	'pickle' is a file-like object, or string, containing the pickle.
				2304
				2305	Each opcode in the pickle is generated, from the current pickle position,
				2306	stopping after a STOP opcode is delivered. A triple is generated for
				2307	each opcode:
				2308
				2309	opcode, arg, pos
				2310
				2311	opcode is an OpcodeInfo record, describing the current opcode.
				2312
				2313	If the opcode has an argument embedded in the pickle, arg is its decoded
				2314	value, as a Python object. If the opcode doesn't have an argument, arg
				2315	is None.
				2316
				2317	If the pickle has a tell() method, pos was the value of pickle.tell()
				2318	before reading the current opcode. If the pickle is a bytes object,
				2319	it's wrapped in a BytesIO object, and the latter's tell() result is
				2320	used. Else (the pickle doesn't have a tell(), and it's not obvious how
				2321	to query its current position) pos is None.
				2322	"""
				2323	return _genops(pickle)
				2324
				2325	##############################################################################
				2326	# A pickle optimizer.
				2327
				2328	def optimize(p):
				2329	'Optimize a pickle string by removing unused PUT opcodes'
				2330	put = 'PUT'
				2331	get = 'GET'
				2332	oldids = set() # set of all PUT ids
				2333	newids = {} # set of ids used by a GET opcode
				2334	opcodes = [] # (op, idx) or (pos, end_pos)
				2335	proto = 0
				2336	protoheader = b''
				2337	for opcode, arg, pos, end_pos in _genops(p, yield_end_pos=True):
				2338	if 'PUT' in opcode.name:
				2339	oldids.add(arg)
				2340	opcodes.append((put, arg))
				2341	elif opcode.name == 'MEMOIZE':
				2342	idx = len(oldids)
				2343	oldids.add(idx)
				2344	opcodes.append((put, idx))
				2345	elif 'FRAME' in opcode.name:
				2346	pass
				2347	elif 'GET' in opcode.name:
				2348	if opcode.proto > proto:
				2349	proto = opcode.proto
				2350	newids[arg] = None
				2351	opcodes.append((get, arg))
				2352	elif opcode.name == 'PROTO':
				2353	if arg > proto:
				2354	proto = arg
				2355	if pos == 0:
				2356	protoheader = p[pos:end_pos]
				2357	else:
				2358	opcodes.append((pos, end_pos))
				2359	else:
				2360	opcodes.append((pos, end_pos))
				2361	del oldids
				2362
				2363	# Copy the opcodes except for PUTS without a corresponding GET
				2364	out = io.BytesIO()
				2365	# Write the PROTO header before any framing
				2366	out.write(protoheader)
				2367	pickler = pickle._Pickler(out, proto)
				2368	if proto >= 4:
				2369	pickler.framer.start_framing()
				2370	idx = 0
				2371	for op, arg in opcodes:
				2372	frameless = False
				2373	if op is put:
				2374	if arg not in newids:
				2375	continue
				2376	data = pickler.put(idx)
				2377	newids[arg] = idx
				2378	idx += 1
				2379	elif op is get:
				2380	data = pickler.get(newids[arg])
				2381	else:
				2382	data = p[op:arg]
				2383	frameless = len(data) > pickler.framer._FRAME_SIZE_TARGET
				2384	pickler.framer.commit_frame(force=frameless)
				2385	if frameless:
				2386	pickler.framer.file_write(data)
				2387	else:
				2388	pickler.write(data)
				2389	pickler.framer.end_framing()
				2390	return out.getvalue()
				2391
				2392	##############################################################################
				2393	# A symbolic pickle disassembler.
				2394
				2395	def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0):
				2396	"""Produce a symbolic disassembly of a pickle.
				2397
				2398	'pickle' is a file-like object, or string, containing a (at least one)
				2399	pickle. The pickle is disassembled from the current position, through
				2400	the first STOP opcode encountered.
				2401
				2402	Optional arg 'out' is a file-like object to which the disassembly is
				2403	printed. It defaults to sys.stdout.
				2404
				2405	Optional arg 'memo' is a Python dict, used as the pickle's memo. It
				2406	may be mutated by dis(), if the pickle contains PUT or BINPUT opcodes.
				2407	Passing the same memo object to another dis() call then allows disassembly
				2408	to proceed across multiple pickles that were all created by the same
				2409	pickler with the same memo. Ordinarily you don't need to worry about this.
				2410
				2411	Optional arg 'indentlevel' is the number of blanks by which to indent
				2412	a new MARK level. It defaults to 4.
				2413
				2414	Optional arg 'annotate' if nonzero instructs dis() to add short
				2415	description of the opcode on each line of disassembled output.
				2416	The value given to 'annotate' must be an integer and is used as a
				2417	hint for the column where annotation should start. The default
				2418	value is 0, meaning no annotations.
				2419
				2420	In addition to printing the disassembly, some sanity checks are made:
				2421
				2422	+ All embedded opcode arguments "make sense".
				2423
				2424	+ Explicit and implicit pop operations have enough items on the stack.
				2425
				2426	+ When an opcode implicitly refers to a markobject, a markobject is
				2427	actually on the stack.
				2428
				2429	+ A memo entry isn't referenced before it's defined.
				2430
				2431	+ The markobject isn't stored in the memo.
				2432
				2433	+ A memo entry isn't redefined.
				2434	"""
				2435
				2436	# Most of the hair here is for sanity checks, but most of it is needed
				2437	# anyway to detect when a protocol 0 POP takes a MARK off the stack
				2438	# (which in turn is needed to indent MARK blocks correctly).
				2439
				2440	stack = [] # crude emulation of unpickler stack
				2441	if memo is None:
				2442	memo = {} # crude emulation of unpickler memo
				2443	maxproto = -1 # max protocol number seen
				2444	markstack = [] # bytecode positions of MARK opcodes
				2445	indentchunk = ' ' * indentlevel
				2446	errormsg = None
				2447	annocol = annotate # column hint for annotations
				2448	for opcode, arg, pos in genops(pickle):
				2449	if pos is not None:
				2450	print("%5d:" % pos, end=' ', file=out)
				2451
				2452	line = "%-4s %s%s" % (repr(opcode.code)[1:-1],
				2453	indentchunk * len(markstack),
				2454	opcode.name)
				2455
				2456	maxproto = max(maxproto, opcode.proto)
				2457	before = opcode.stack_before # don't mutate
				2458	after = opcode.stack_after # don't mutate
				2459	numtopop = len(before)
				2460
				2461	# See whether a MARK should be popped.
				2462	markmsg = None
				2463	if markobject in before or (opcode.name == "POP" and
				2464	stack and
				2465	stack[-1] is markobject):
				2466	assert markobject not in after
				2467	if __debug__:
				2468	if markobject in before:
				2469	assert before[-1] is stackslice
				2470	if markstack:
				2471	markpos = markstack.pop()
				2472	if markpos is None:
				2473	markmsg = "(MARK at unknown opcode offset)"
				2474	else:
				2475	markmsg = "(MARK at %d)" % markpos
				2476	# Pop everything at and after the topmost markobject.
				2477	while stack[-1] is not markobject:
				2478	stack.pop()
				2479	stack.pop()
				2480	# Stop later code from popping too much.
				2481	try:
				2482	numtopop = before.index(markobject)
				2483	except ValueError:
				2484	assert opcode.name == "POP"
				2485	numtopop = 0
				2486	else:
				2487	errormsg = markmsg = "no MARK exists on stack"
				2488
				2489	# Check for correct memo usage.
				2490	if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT", "MEMOIZE"):
				2491	if opcode.name == "MEMOIZE":
				2492	memo_idx = len(memo)
				2493	markmsg = "(as %d)" % memo_idx
				2494	else:
				2495	assert arg is not None
				2496	memo_idx = arg
				2497	if memo_idx in memo:
				2498	errormsg = "memo key %r already defined" % arg
				2499	elif not stack:
				2500	errormsg = "stack is empty -- can't store into memo"
				2501	elif stack[-1] is markobject:
				2502	errormsg = "can't store markobject in the memo"
				2503	else:
				2504	memo[memo_idx] = stack[-1]
				2505	elif opcode.name in ("GET", "BINGET", "LONG_BINGET"):
				2506	if arg in memo:
				2507	assert len(after) == 1
				2508	after = [memo[arg]] # for better stack emulation
				2509	else:
				2510	errormsg = "memo key %r has never been stored into" % arg
				2511
				2512	if arg is not None or markmsg:
				2513	# make a mild effort to align arguments
				2514	line += ' ' * (10 - len(opcode.name))
				2515	if arg is not None:
				2516	line += ' ' + repr(arg)
				2517	if markmsg:
				2518	line += ' ' + markmsg
				2519	if annotate:
				2520	line += ' ' * (annocol - len(line))
				2521	# make a mild effort to align annotations
				2522	annocol = len(line)
				2523	if annocol > 50:
				2524	annocol = annotate
				2525	line += ' ' + opcode.doc.split('\n', 1)[0]
				2526	print(line, file=out)
				2527
				2528	if errormsg:
				2529	# Note that we delayed complaining until the offending opcode
				2530	# was printed.
				2531	raise ValueError(errormsg)
				2532
				2533	# Emulate the stack effects.
				2534	if len(stack) < numtopop:
				2535	raise ValueError("tries to pop %d items from stack with "
				2536	"only %d items" % (numtopop, len(stack)))
				2537	if numtopop:
				2538	del stack[-numtopop:]
				2539	if markobject in after:
				2540	assert markobject not in before
				2541	markstack.append(pos)
				2542
				2543	stack.extend(after)
				2544
				2545	print("highest protocol among opcodes =", maxproto, file=out)
				2546	if stack:
				2547	raise ValueError("stack not empty after STOP: %r" % stack)
				2548
				2549	# For use in the doctest, simply as an example of a class to pickle.
				2550	class _Example:
				2551	def __init__(self, value):
				2552	self.value = value
				2553
				2554	_dis_test = r"""
				2555	>>> import pickle
				2556	>>> x = [1, 2, (3, 4), {b'abc': "def"}]
				2557	>>> pkl0 = pickle.dumps(x, 0)
				2558	>>> dis(pkl0)
				2559	0: ( MARK
				2560	1: l LIST (MARK at 0)
				2561	2: p PUT 0
				2562	5: I INT 1
				2563	8: a APPEND
				2564	9: I INT 2
				2565	12: a APPEND
				2566	13: ( MARK
				2567	14: I INT 3
				2568	17: I INT 4
				2569	20: t TUPLE (MARK at 13)
				2570	21: p PUT 1
				2571	24: a APPEND
				2572	25: ( MARK
				2573	26: d DICT (MARK at 25)
				2574	27: p PUT 2
				2575	30: c GLOBAL '_codecs encode'
				2576	46: p PUT 3
				2577	49: ( MARK
				2578	50: V UNICODE 'abc'
				2579	55: p PUT 4
				2580	58: V UNICODE 'latin1'
				2581	66: p PUT 5
				2582	69: t TUPLE (MARK at 49)
				2583	70: p PUT 6
				2584	73: R REDUCE
				2585	74: p PUT 7
				2586	77: V UNICODE 'def'
				2587	82: p PUT 8
				2588	85: s SETITEM
				2589	86: a APPEND
				2590	87: . STOP
				2591	highest protocol among opcodes = 0
				2592
				2593	Try again with a "binary" pickle.
				2594
				2595	>>> pkl1 = pickle.dumps(x, 1)
				2596	>>> dis(pkl1)
				2597	0: ] EMPTY_LIST
				2598	1: q BINPUT 0
				2599	3: ( MARK
				2600	4: K BININT1 1
				2601	6: K BININT1 2
				2602	8: ( MARK
				2603	9: K BININT1 3
				2604	11: K BININT1 4
				2605	13: t TUPLE (MARK at 8)
				2606	14: q BINPUT 1
				2607	16: } EMPTY_DICT
				2608	17: q BINPUT 2
				2609	19: c GLOBAL '_codecs encode'
				2610	35: q BINPUT 3
				2611	37: ( MARK
				2612	38: X BINUNICODE 'abc'
				2613	46: q BINPUT 4
				2614	48: X BINUNICODE 'latin1'
				2615	59: q BINPUT 5
				2616	61: t TUPLE (MARK at 37)
				2617	62: q BINPUT 6
				2618	64: R REDUCE
				2619	65: q BINPUT 7
				2620	67: X BINUNICODE 'def'
				2621	75: q BINPUT 8
				2622	77: s SETITEM
				2623	78: e APPENDS (MARK at 3)
				2624	79: . STOP
				2625	highest protocol among opcodes = 1
				2626
				2627	Exercise the INST/OBJ/BUILD family.
				2628
				2629	>>> import pickletools
				2630	>>> dis(pickle.dumps(pickletools.dis, 0))
				2631	0: c GLOBAL 'pickletools dis'
				2632	17: p PUT 0
				2633	20: . STOP
				2634	highest protocol among opcodes = 0
				2635
				2636	>>> from pickletools import _Example
				2637	>>> x = [_Example(42)] * 2
				2638	>>> dis(pickle.dumps(x, 0))
				2639	0: ( MARK
				2640	1: l LIST (MARK at 0)
				2641	2: p PUT 0
				2642	5: c GLOBAL 'copy_reg _reconstructor'
				2643	30: p PUT 1
				2644	33: ( MARK
				2645	34: c GLOBAL 'pickletools _Example'
				2646	56: p PUT 2
				2647	59: c GLOBAL '__builtin__ object'
				2648	79: p PUT 3
				2649	82: N NONE
				2650	83: t TUPLE (MARK at 33)
				2651	84: p PUT 4
				2652	87: R REDUCE
				2653	88: p PUT 5
				2654	91: ( MARK
				2655	92: d DICT (MARK at 91)
				2656	93: p PUT 6
				2657	96: V UNICODE 'value'
				2658	103: p PUT 7
				2659	106: I INT 42
				2660	110: s SETITEM
				2661	111: b BUILD
				2662	112: a APPEND
				2663	113: g GET 5
				2664	116: a APPEND
				2665	117: . STOP
				2666	highest protocol among opcodes = 0
				2667
				2668	>>> dis(pickle.dumps(x, 1))
				2669	0: ] EMPTY_LIST
				2670	1: q BINPUT 0
				2671	3: ( MARK
				2672	4: c GLOBAL 'copy_reg _reconstructor'
				2673	29: q BINPUT 1
				2674	31: ( MARK
				2675	32: c GLOBAL 'pickletools _Example'
				2676	54: q BINPUT 2
				2677	56: c GLOBAL '__builtin__ object'
				2678	76: q BINPUT 3
				2679	78: N NONE
				2680	79: t TUPLE (MARK at 31)
				2681	80: q BINPUT 4
				2682	82: R REDUCE
				2683	83: q BINPUT 5
				2684	85: } EMPTY_DICT
				2685	86: q BINPUT 6
				2686	88: X BINUNICODE 'value'
				2687	98: q BINPUT 7
				2688	100: K BININT1 42
				2689	102: s SETITEM
				2690	103: b BUILD
				2691	104: h BINGET 5
				2692	106: e APPENDS (MARK at 3)
				2693	107: . STOP
				2694	highest protocol among opcodes = 1
				2695
				2696	Try "the canonical" recursive-object test.
				2697
				2698	>>> L = []
				2699	>>> T = L,
				2700	>>> L.append(T)
				2701	>>> L[0] is T
				2702	True
				2703	>>> T[0] is L
				2704	True
				2705	>>> L[0][0] is L
				2706	True
				2707	>>> T[0][0] is T
				2708	True
				2709	>>> dis(pickle.dumps(L, 0))
				2710	0: ( MARK
				2711	1: l LIST (MARK at 0)
				2712	2: p PUT 0
				2713	5: ( MARK
				2714	6: g GET 0
				2715	9: t TUPLE (MARK at 5)
				2716	10: p PUT 1
				2717	13: a APPEND
				2718	14: . STOP
				2719	highest protocol among opcodes = 0
				2720
				2721	>>> dis(pickle.dumps(L, 1))
				2722	0: ] EMPTY_LIST
				2723	1: q BINPUT 0
				2724	3: ( MARK
				2725	4: h BINGET 0
				2726	6: t TUPLE (MARK at 3)
				2727	7: q BINPUT 1
				2728	9: a APPEND
				2729	10: . STOP
				2730	highest protocol among opcodes = 1
				2731
				2732	Note that, in the protocol 0 pickle of the recursive tuple, the disassembler
				2733	has to emulate the stack in order to realize that the POP opcode at 16 gets
				2734	rid of the MARK at 0.
				2735
				2736	>>> dis(pickle.dumps(T, 0))
				2737	0: ( MARK
				2738	1: ( MARK
				2739	2: l LIST (MARK at 1)
				2740	3: p PUT 0
				2741	6: ( MARK
				2742	7: g GET 0
				2743	10: t TUPLE (MARK at 6)
				2744	11: p PUT 1
				2745	14: a APPEND
				2746	15: 0 POP
				2747	16: 0 POP (MARK at 0)
				2748	17: g GET 1
				2749	20: . STOP
				2750	highest protocol among opcodes = 0
				2751
				2752	>>> dis(pickle.dumps(T, 1))
				2753	0: ( MARK
				2754	1: ] EMPTY_LIST
				2755	2: q BINPUT 0
				2756	4: ( MARK
				2757	5: h BINGET 0
				2758	7: t TUPLE (MARK at 4)
				2759	8: q BINPUT 1
				2760	10: a APPEND
				2761	11: 1 POP_MARK (MARK at 0)
				2762	12: h BINGET 1
				2763	14: . STOP
				2764	highest protocol among opcodes = 1
				2765
				2766	Try protocol 2.
				2767
				2768	>>> dis(pickle.dumps(L, 2))
				2769	0: \x80 PROTO 2
				2770	2: ] EMPTY_LIST
				2771	3: q BINPUT 0
				2772	5: h BINGET 0
				2773	7: \x85 TUPLE1
				2774	8: q BINPUT 1
				2775	10: a APPEND
				2776	11: . STOP
				2777	highest protocol among opcodes = 2
				2778
				2779	>>> dis(pickle.dumps(T, 2))
				2780	0: \x80 PROTO 2
				2781	2: ] EMPTY_LIST
				2782	3: q BINPUT 0
				2783	5: h BINGET 0
				2784	7: \x85 TUPLE1
				2785	8: q BINPUT 1
				2786	10: a APPEND
				2787	11: 0 POP
				2788	12: h BINGET 1
				2789	14: . STOP
				2790	highest protocol among opcodes = 2
				2791
				2792	Try protocol 3 with annotations:
				2793
				2794	>>> dis(pickle.dumps(T, 3), annotate=1)
				2795	0: \x80 PROTO 3 Protocol version indicator.
				2796	2: ] EMPTY_LIST Push an empty list.
				2797	3: q BINPUT 0 Store the stack top into the memo. The stack is not popped.
				2798	5: h BINGET 0 Read an object from the memo and push it on the stack.
				2799	7: \x85 TUPLE1 Build a one-tuple out of the topmost item on the stack.
				2800	8: q BINPUT 1 Store the stack top into the memo. The stack is not popped.
				2801	10: a APPEND Append an object to a list.
				2802	11: 0 POP Discard the top stack item, shrinking the stack by one item.
				2803	12: h BINGET 1 Read an object from the memo and push it on the stack.
				2804	14: . STOP Stop the unpickling machine.
				2805	highest protocol among opcodes = 2
				2806
				2807	"""
				2808
				2809	_memo_test = r"""
				2810	>>> import pickle
				2811	>>> import io
				2812	>>> f = io.BytesIO()
				2813	>>> p = pickle.Pickler(f, 2)
				2814	>>> x = [1, 2, 3]
				2815	>>> p.dump(x)
				2816	>>> p.dump(x)
				2817	>>> f.seek(0)
				2818	0
				2819	>>> memo = {}
				2820	>>> dis(f, memo=memo)
				2821	0: \x80 PROTO 2
				2822	2: ] EMPTY_LIST
				2823	3: q BINPUT 0
				2824	5: ( MARK
				2825	6: K BININT1 1
				2826	8: K BININT1 2
				2827	10: K BININT1 3
				2828	12: e APPENDS (MARK at 5)
				2829	13: . STOP
				2830	highest protocol among opcodes = 2
				2831	>>> dis(f, memo=memo)
				2832	14: \x80 PROTO 2
				2833	16: h BINGET 0
				2834	18: . STOP
				2835	highest protocol among opcodes = 2
				2836	"""
				2837
				2838	__test__ = {'disassembler_test': _dis_test,
				2839	'disassembler_memo_test': _memo_test,
				2840	}
				2841
				2842	def _test():
				2843	import doctest
				2844	return doctest.testmod()
				2845
				2846	if __name__ == "__main__":
				2847	import argparse
				2848	parser = argparse.ArgumentParser(
				2849	description='disassemble one or more pickle files')
				2850	parser.add_argument(
				2851	'pickle_file', type=argparse.FileType('br'),
				2852	nargs='*', help='the pickle file')
				2853	parser.add_argument(
				2854	'-o', '--output', default=sys.stdout, type=argparse.FileType('w'),
				2855	help='the file where the output should be written')
				2856	parser.add_argument(
				2857	'-m', '--memo', action='store_true',
				2858	help='preserve memo between disassemblies')
				2859	parser.add_argument(
				2860	'-l', '--indentlevel', default=4, type=int,
				2861	help='the number of blanks by which to indent a new MARK level')
				2862	parser.add_argument(
				2863	'-a', '--annotate', action='store_true',
				2864	help='annotate each line with a short opcode description')
				2865	parser.add_argument(
				2866	'-p', '--preamble', default="==> {name} <==",
				2867	help='if more than one pickle file is specified, print this before'
				2868	' each disassembly')
				2869	parser.add_argument(
				2870	'-t', '--test', action='store_true',
				2871	help='run self-test suite')
				2872	parser.add_argument(
				2873	'-v', action='store_true',
				2874	help='run verbosely; only affects self-test run')
				2875	args = parser.parse_args()
				2876	if args.test:
				2877	_test()
				2878	else:
				2879	annotate = 30 if args.annotate else 0
				2880	if not args.pickle_file:
				2881	parser.print_help()
				2882	elif len(args.pickle_file) == 1:
				2883	dis(args.pickle_file[0], args.output, None,
				2884	args.indentlevel, annotate)
				2885	else:
				2886	memo = {} if args.memo else None
				2887	for f in args.pickle_file:
				2888	preamble = args.preamble.format(name=f.name)
				2889	args.output.write(preamble + '\n')
				2890	dis(f, args.output, memo, args.indentlevel, annotate)