blob: 9f6665b3eb3baf73b29283e0c0e65a4c1d2f79d6 [file] [log] [blame]
Pirama Arumuga Nainard285ad02022-02-08 09:26:56 -08001"""Fallback pure Python implementation of msgpack"""
2
3from datetime import datetime as _DateTime
4import sys
5import struct
6
7
8PY2 = sys.version_info[0] == 2
9if PY2:
10 int_types = (int, long)
11
12 def dict_iteritems(d):
13 return d.iteritems()
14
15
16else:
17 int_types = int
18 unicode = str
19 xrange = range
20
21 def dict_iteritems(d):
22 return d.items()
23
24
25if sys.version_info < (3, 5):
26 # Ugly hack...
27 RecursionError = RuntimeError
28
29 def _is_recursionerror(e):
30 return (
31 len(e.args) == 1
32 and isinstance(e.args[0], str)
33 and e.args[0].startswith("maximum recursion depth exceeded")
34 )
35
36
37else:
38
39 def _is_recursionerror(e):
40 return True
41
42
43if hasattr(sys, "pypy_version_info"):
44 # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own
45 # StringBuilder is fastest.
46 from __pypy__ import newlist_hint
47
48 try:
49 from __pypy__.builders import BytesBuilder as StringBuilder
50 except ImportError:
51 from __pypy__.builders import StringBuilder
52 USING_STRINGBUILDER = True
53
54 class StringIO(object):
55 def __init__(self, s=b""):
56 if s:
57 self.builder = StringBuilder(len(s))
58 self.builder.append(s)
59 else:
60 self.builder = StringBuilder()
61
62 def write(self, s):
63 if isinstance(s, memoryview):
64 s = s.tobytes()
65 elif isinstance(s, bytearray):
66 s = bytes(s)
67 self.builder.append(s)
68
69 def getvalue(self):
70 return self.builder.build()
71
72
73else:
74 USING_STRINGBUILDER = False
75 from io import BytesIO as StringIO
76
77 newlist_hint = lambda size: []
78
79
80from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError
81
82from .ext import ExtType, Timestamp
83
84
85EX_SKIP = 0
86EX_CONSTRUCT = 1
87EX_READ_ARRAY_HEADER = 2
88EX_READ_MAP_HEADER = 3
89
90TYPE_IMMEDIATE = 0
91TYPE_ARRAY = 1
92TYPE_MAP = 2
93TYPE_RAW = 3
94TYPE_BIN = 4
95TYPE_EXT = 5
96
97DEFAULT_RECURSE_LIMIT = 511
98
99
100def _check_type_strict(obj, t, type=type, tuple=tuple):
101 if type(t) is tuple:
102 return type(obj) in t
103 else:
104 return type(obj) is t
105
106
107def _get_data_from_buffer(obj):
108 view = memoryview(obj)
109 if view.itemsize != 1:
110 raise ValueError("cannot unpack from multi-byte object")
111 return view
112
113
114def unpackb(packed, **kwargs):
115 """
116 Unpack an object from `packed`.
117
118 Raises ``ExtraData`` when *packed* contains extra bytes.
119 Raises ``ValueError`` when *packed* is incomplete.
120 Raises ``FormatError`` when *packed* is not valid msgpack.
121 Raises ``StackError`` when *packed* contains too nested.
122 Other exceptions can be raised during unpacking.
123
124 See :class:`Unpacker` for options.
125 """
126 unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs)
127 unpacker.feed(packed)
128 try:
129 ret = unpacker._unpack()
130 except OutOfData:
131 raise ValueError("Unpack failed: incomplete input")
132 except RecursionError as e:
133 if _is_recursionerror(e):
134 raise StackError
135 raise
136 if unpacker._got_extradata():
137 raise ExtraData(ret, unpacker._get_extradata())
138 return ret
139
140
141if sys.version_info < (2, 7, 6):
142
143 def _unpack_from(f, b, o=0):
144 """Explicit type cast for legacy struct.unpack_from"""
145 return struct.unpack_from(f, bytes(b), o)
146
147
148else:
149 _unpack_from = struct.unpack_from
150
151
152class Unpacker(object):
153 """Streaming unpacker.
154
155 Arguments:
156
157 :param file_like:
158 File-like object having `.read(n)` method.
159 If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
160
161 :param int read_size:
162 Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
163
164 :param bool use_list:
165 If true, unpack msgpack array to Python list.
166 Otherwise, unpack to Python tuple. (default: True)
167
168 :param bool raw:
169 If true, unpack msgpack raw to Python bytes.
170 Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
171
172 :param int timestamp:
173 Control how timestamp type is unpacked:
174
175 0 - Timestamp
176 1 - float (Seconds from the EPOCH)
177 2 - int (Nanoseconds from the EPOCH)
178 3 - datetime.datetime (UTC). Python 2 is not supported.
179
180 :param bool strict_map_key:
181 If true (default), only str or bytes are accepted for map (dict) keys.
182
183 :param callable object_hook:
184 When specified, it should be callable.
185 Unpacker calls it with a dict argument after unpacking msgpack map.
186 (See also simplejson)
187
188 :param callable object_pairs_hook:
189 When specified, it should be callable.
190 Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
191 (See also simplejson)
192
193 :param str unicode_errors:
194 The error handler for decoding unicode. (default: 'strict')
195 This option should be used only when you have msgpack data which
196 contains invalid UTF-8 string.
197
198 :param int max_buffer_size:
199 Limits size of data waiting unpacked. 0 means 2**32-1.
200 The default value is 100*1024*1024 (100MiB).
201 Raises `BufferFull` exception when it is insufficient.
202 You should set this parameter when unpacking data from untrusted source.
203
204 :param int max_str_len:
205 Deprecated, use *max_buffer_size* instead.
206 Limits max length of str. (default: max_buffer_size)
207
208 :param int max_bin_len:
209 Deprecated, use *max_buffer_size* instead.
210 Limits max length of bin. (default: max_buffer_size)
211
212 :param int max_array_len:
213 Limits max length of array.
214 (default: max_buffer_size)
215
216 :param int max_map_len:
217 Limits max length of map.
218 (default: max_buffer_size//2)
219
220 :param int max_ext_len:
221 Deprecated, use *max_buffer_size* instead.
222 Limits max size of ext type. (default: max_buffer_size)
223
224 Example of streaming deserialize from file-like object::
225
226 unpacker = Unpacker(file_like)
227 for o in unpacker:
228 process(o)
229
230 Example of streaming deserialize from socket::
231
232 unpacker = Unpacker(max_buffer_size)
233 while True:
234 buf = sock.recv(1024**2)
235 if not buf:
236 break
237 unpacker.feed(buf)
238 for o in unpacker:
239 process(o)
240
241 Raises ``ExtraData`` when *packed* contains extra bytes.
242 Raises ``OutOfData`` when *packed* is incomplete.
243 Raises ``FormatError`` when *packed* is not valid msgpack.
244 Raises ``StackError`` when *packed* contains too nested.
245 Other exceptions can be raised during unpacking.
246 """
247
248 def __init__(
249 self,
250 file_like=None,
251 read_size=0,
252 use_list=True,
253 raw=False,
254 timestamp=0,
255 strict_map_key=True,
256 object_hook=None,
257 object_pairs_hook=None,
258 list_hook=None,
259 unicode_errors=None,
260 max_buffer_size=100 * 1024 * 1024,
261 ext_hook=ExtType,
262 max_str_len=-1,
263 max_bin_len=-1,
264 max_array_len=-1,
265 max_map_len=-1,
266 max_ext_len=-1,
267 ):
268 if unicode_errors is None:
269 unicode_errors = "strict"
270
271 if file_like is None:
272 self._feeding = True
273 else:
274 if not callable(file_like.read):
275 raise TypeError("`file_like.read` must be callable")
276 self.file_like = file_like
277 self._feeding = False
278
279 #: array of bytes fed.
280 self._buffer = bytearray()
281 #: Which position we currently reads
282 self._buff_i = 0
283
284 # When Unpacker is used as an iterable, between the calls to next(),
285 # the buffer is not "consumed" completely, for efficiency sake.
286 # Instead, it is done sloppily. To make sure we raise BufferFull at
287 # the correct moments, we have to keep track of how sloppy we were.
288 # Furthermore, when the buffer is incomplete (that is: in the case
289 # we raise an OutOfData) we need to rollback the buffer to the correct
290 # state, which _buf_checkpoint records.
291 self._buf_checkpoint = 0
292
293 if not max_buffer_size:
294 max_buffer_size = 2 ** 31 - 1
295 if max_str_len == -1:
296 max_str_len = max_buffer_size
297 if max_bin_len == -1:
298 max_bin_len = max_buffer_size
299 if max_array_len == -1:
300 max_array_len = max_buffer_size
301 if max_map_len == -1:
302 max_map_len = max_buffer_size // 2
303 if max_ext_len == -1:
304 max_ext_len = max_buffer_size
305
306 self._max_buffer_size = max_buffer_size
307 if read_size > self._max_buffer_size:
308 raise ValueError("read_size must be smaller than max_buffer_size")
309 self._read_size = read_size or min(self._max_buffer_size, 16 * 1024)
310 self._raw = bool(raw)
311 self._strict_map_key = bool(strict_map_key)
312 self._unicode_errors = unicode_errors
313 self._use_list = use_list
314 if not (0 <= timestamp <= 3):
315 raise ValueError("timestamp must be 0..3")
316 self._timestamp = timestamp
317 self._list_hook = list_hook
318 self._object_hook = object_hook
319 self._object_pairs_hook = object_pairs_hook
320 self._ext_hook = ext_hook
321 self._max_str_len = max_str_len
322 self._max_bin_len = max_bin_len
323 self._max_array_len = max_array_len
324 self._max_map_len = max_map_len
325 self._max_ext_len = max_ext_len
326 self._stream_offset = 0
327
328 if list_hook is not None and not callable(list_hook):
329 raise TypeError("`list_hook` is not callable")
330 if object_hook is not None and not callable(object_hook):
331 raise TypeError("`object_hook` is not callable")
332 if object_pairs_hook is not None and not callable(object_pairs_hook):
333 raise TypeError("`object_pairs_hook` is not callable")
334 if object_hook is not None and object_pairs_hook is not None:
335 raise TypeError(
336 "object_pairs_hook and object_hook are mutually " "exclusive"
337 )
338 if not callable(ext_hook):
339 raise TypeError("`ext_hook` is not callable")
340
341 def feed(self, next_bytes):
342 assert self._feeding
343 view = _get_data_from_buffer(next_bytes)
344 if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size:
345 raise BufferFull
346
347 # Strip buffer before checkpoint before reading file.
348 if self._buf_checkpoint > 0:
349 del self._buffer[: self._buf_checkpoint]
350 self._buff_i -= self._buf_checkpoint
351 self._buf_checkpoint = 0
352
353 # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython
354 self._buffer.extend(view)
355
356 def _consume(self):
357 """ Gets rid of the used parts of the buffer. """
358 self._stream_offset += self._buff_i - self._buf_checkpoint
359 self._buf_checkpoint = self._buff_i
360
361 def _got_extradata(self):
362 return self._buff_i < len(self._buffer)
363
364 def _get_extradata(self):
365 return self._buffer[self._buff_i :]
366
367 def read_bytes(self, n):
368 ret = self._read(n)
369 self._consume()
370 return ret
371
372 def _read(self, n):
373 # (int) -> bytearray
374 self._reserve(n)
375 i = self._buff_i
376 self._buff_i = i + n
377 return self._buffer[i : i + n]
378
379 def _reserve(self, n):
380 remain_bytes = len(self._buffer) - self._buff_i - n
381
382 # Fast path: buffer has n bytes already
383 if remain_bytes >= 0:
384 return
385
386 if self._feeding:
387 self._buff_i = self._buf_checkpoint
388 raise OutOfData
389
390 # Strip buffer before checkpoint before reading file.
391 if self._buf_checkpoint > 0:
392 del self._buffer[: self._buf_checkpoint]
393 self._buff_i -= self._buf_checkpoint
394 self._buf_checkpoint = 0
395
396 # Read from file
397 remain_bytes = -remain_bytes
398 while remain_bytes > 0:
399 to_read_bytes = max(self._read_size, remain_bytes)
400 read_data = self.file_like.read(to_read_bytes)
401 if not read_data:
402 break
403 assert isinstance(read_data, bytes)
404 self._buffer += read_data
405 remain_bytes -= len(read_data)
406
407 if len(self._buffer) < n + self._buff_i:
408 self._buff_i = 0 # rollback
409 raise OutOfData
410
411 def _read_header(self, execute=EX_CONSTRUCT):
412 typ = TYPE_IMMEDIATE
413 n = 0
414 obj = None
415 self._reserve(1)
416 b = self._buffer[self._buff_i]
417 self._buff_i += 1
418 if b & 0b10000000 == 0:
419 obj = b
420 elif b & 0b11100000 == 0b11100000:
421 obj = -1 - (b ^ 0xFF)
422 elif b & 0b11100000 == 0b10100000:
423 n = b & 0b00011111
424 typ = TYPE_RAW
425 if n > self._max_str_len:
426 raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
427 obj = self._read(n)
428 elif b & 0b11110000 == 0b10010000:
429 n = b & 0b00001111
430 typ = TYPE_ARRAY
431 if n > self._max_array_len:
432 raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
433 elif b & 0b11110000 == 0b10000000:
434 n = b & 0b00001111
435 typ = TYPE_MAP
436 if n > self._max_map_len:
437 raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
438 elif b == 0xC0:
439 obj = None
440 elif b == 0xC2:
441 obj = False
442 elif b == 0xC3:
443 obj = True
444 elif b == 0xC4:
445 typ = TYPE_BIN
446 self._reserve(1)
447 n = self._buffer[self._buff_i]
448 self._buff_i += 1
449 if n > self._max_bin_len:
450 raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
451 obj = self._read(n)
452 elif b == 0xC5:
453 typ = TYPE_BIN
454 self._reserve(2)
455 n = _unpack_from(">H", self._buffer, self._buff_i)[0]
456 self._buff_i += 2
457 if n > self._max_bin_len:
458 raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
459 obj = self._read(n)
460 elif b == 0xC6:
461 typ = TYPE_BIN
462 self._reserve(4)
463 n = _unpack_from(">I", self._buffer, self._buff_i)[0]
464 self._buff_i += 4
465 if n > self._max_bin_len:
466 raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
467 obj = self._read(n)
468 elif b == 0xC7: # ext 8
469 typ = TYPE_EXT
470 self._reserve(2)
471 L, n = _unpack_from("Bb", self._buffer, self._buff_i)
472 self._buff_i += 2
473 if L > self._max_ext_len:
474 raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
475 obj = self._read(L)
476 elif b == 0xC8: # ext 16
477 typ = TYPE_EXT
478 self._reserve(3)
479 L, n = _unpack_from(">Hb", self._buffer, self._buff_i)
480 self._buff_i += 3
481 if L > self._max_ext_len:
482 raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
483 obj = self._read(L)
484 elif b == 0xC9: # ext 32
485 typ = TYPE_EXT
486 self._reserve(5)
487 L, n = _unpack_from(">Ib", self._buffer, self._buff_i)
488 self._buff_i += 5
489 if L > self._max_ext_len:
490 raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
491 obj = self._read(L)
492 elif b == 0xCA:
493 self._reserve(4)
494 obj = _unpack_from(">f", self._buffer, self._buff_i)[0]
495 self._buff_i += 4
496 elif b == 0xCB:
497 self._reserve(8)
498 obj = _unpack_from(">d", self._buffer, self._buff_i)[0]
499 self._buff_i += 8
500 elif b == 0xCC:
501 self._reserve(1)
502 obj = self._buffer[self._buff_i]
503 self._buff_i += 1
504 elif b == 0xCD:
505 self._reserve(2)
506 obj = _unpack_from(">H", self._buffer, self._buff_i)[0]
507 self._buff_i += 2
508 elif b == 0xCE:
509 self._reserve(4)
510 obj = _unpack_from(">I", self._buffer, self._buff_i)[0]
511 self._buff_i += 4
512 elif b == 0xCF:
513 self._reserve(8)
514 obj = _unpack_from(">Q", self._buffer, self._buff_i)[0]
515 self._buff_i += 8
516 elif b == 0xD0:
517 self._reserve(1)
518 obj = _unpack_from("b", self._buffer, self._buff_i)[0]
519 self._buff_i += 1
520 elif b == 0xD1:
521 self._reserve(2)
522 obj = _unpack_from(">h", self._buffer, self._buff_i)[0]
523 self._buff_i += 2
524 elif b == 0xD2:
525 self._reserve(4)
526 obj = _unpack_from(">i", self._buffer, self._buff_i)[0]
527 self._buff_i += 4
528 elif b == 0xD3:
529 self._reserve(8)
530 obj = _unpack_from(">q", self._buffer, self._buff_i)[0]
531 self._buff_i += 8
532 elif b == 0xD4: # fixext 1
533 typ = TYPE_EXT
534 if self._max_ext_len < 1:
535 raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len))
536 self._reserve(2)
537 n, obj = _unpack_from("b1s", self._buffer, self._buff_i)
538 self._buff_i += 2
539 elif b == 0xD5: # fixext 2
540 typ = TYPE_EXT
541 if self._max_ext_len < 2:
542 raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len))
543 self._reserve(3)
544 n, obj = _unpack_from("b2s", self._buffer, self._buff_i)
545 self._buff_i += 3
546 elif b == 0xD6: # fixext 4
547 typ = TYPE_EXT
548 if self._max_ext_len < 4:
549 raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len))
550 self._reserve(5)
551 n, obj = _unpack_from("b4s", self._buffer, self._buff_i)
552 self._buff_i += 5
553 elif b == 0xD7: # fixext 8
554 typ = TYPE_EXT
555 if self._max_ext_len < 8:
556 raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len))
557 self._reserve(9)
558 n, obj = _unpack_from("b8s", self._buffer, self._buff_i)
559 self._buff_i += 9
560 elif b == 0xD8: # fixext 16
561 typ = TYPE_EXT
562 if self._max_ext_len < 16:
563 raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len))
564 self._reserve(17)
565 n, obj = _unpack_from("b16s", self._buffer, self._buff_i)
566 self._buff_i += 17
567 elif b == 0xD9:
568 typ = TYPE_RAW
569 self._reserve(1)
570 n = self._buffer[self._buff_i]
571 self._buff_i += 1
572 if n > self._max_str_len:
573 raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
574 obj = self._read(n)
575 elif b == 0xDA:
576 typ = TYPE_RAW
577 self._reserve(2)
578 (n,) = _unpack_from(">H", self._buffer, self._buff_i)
579 self._buff_i += 2
580 if n > self._max_str_len:
581 raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
582 obj = self._read(n)
583 elif b == 0xDB:
584 typ = TYPE_RAW
585 self._reserve(4)
586 (n,) = _unpack_from(">I", self._buffer, self._buff_i)
587 self._buff_i += 4
588 if n > self._max_str_len:
589 raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
590 obj = self._read(n)
591 elif b == 0xDC:
592 typ = TYPE_ARRAY
593 self._reserve(2)
594 (n,) = _unpack_from(">H", self._buffer, self._buff_i)
595 self._buff_i += 2
596 if n > self._max_array_len:
597 raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
598 elif b == 0xDD:
599 typ = TYPE_ARRAY
600 self._reserve(4)
601 (n,) = _unpack_from(">I", self._buffer, self._buff_i)
602 self._buff_i += 4
603 if n > self._max_array_len:
604 raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
605 elif b == 0xDE:
606 self._reserve(2)
607 (n,) = _unpack_from(">H", self._buffer, self._buff_i)
608 self._buff_i += 2
609 if n > self._max_map_len:
610 raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
611 typ = TYPE_MAP
612 elif b == 0xDF:
613 self._reserve(4)
614 (n,) = _unpack_from(">I", self._buffer, self._buff_i)
615 self._buff_i += 4
616 if n > self._max_map_len:
617 raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
618 typ = TYPE_MAP
619 else:
620 raise FormatError("Unknown header: 0x%x" % b)
621 return typ, n, obj
622
623 def _unpack(self, execute=EX_CONSTRUCT):
624 typ, n, obj = self._read_header(execute)
625
626 if execute == EX_READ_ARRAY_HEADER:
627 if typ != TYPE_ARRAY:
628 raise ValueError("Expected array")
629 return n
630 if execute == EX_READ_MAP_HEADER:
631 if typ != TYPE_MAP:
632 raise ValueError("Expected map")
633 return n
634 # TODO should we eliminate the recursion?
635 if typ == TYPE_ARRAY:
636 if execute == EX_SKIP:
637 for i in xrange(n):
638 # TODO check whether we need to call `list_hook`
639 self._unpack(EX_SKIP)
640 return
641 ret = newlist_hint(n)
642 for i in xrange(n):
643 ret.append(self._unpack(EX_CONSTRUCT))
644 if self._list_hook is not None:
645 ret = self._list_hook(ret)
646 # TODO is the interaction between `list_hook` and `use_list` ok?
647 return ret if self._use_list else tuple(ret)
648 if typ == TYPE_MAP:
649 if execute == EX_SKIP:
650 for i in xrange(n):
651 # TODO check whether we need to call hooks
652 self._unpack(EX_SKIP)
653 self._unpack(EX_SKIP)
654 return
655 if self._object_pairs_hook is not None:
656 ret = self._object_pairs_hook(
657 (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT))
658 for _ in xrange(n)
659 )
660 else:
661 ret = {}
662 for _ in xrange(n):
663 key = self._unpack(EX_CONSTRUCT)
664 if self._strict_map_key and type(key) not in (unicode, bytes):
665 raise ValueError(
666 "%s is not allowed for map key" % str(type(key))
667 )
668 if not PY2 and type(key) is str:
669 key = sys.intern(key)
670 ret[key] = self._unpack(EX_CONSTRUCT)
671 if self._object_hook is not None:
672 ret = self._object_hook(ret)
673 return ret
674 if execute == EX_SKIP:
675 return
676 if typ == TYPE_RAW:
677 if self._raw:
678 obj = bytes(obj)
679 else:
680 obj = obj.decode("utf_8", self._unicode_errors)
681 return obj
682 if typ == TYPE_BIN:
683 return bytes(obj)
684 if typ == TYPE_EXT:
685 if n == -1: # timestamp
686 ts = Timestamp.from_bytes(bytes(obj))
687 if self._timestamp == 1:
688 return ts.to_unix()
689 elif self._timestamp == 2:
690 return ts.to_unix_nano()
691 elif self._timestamp == 3:
692 return ts.to_datetime()
693 else:
694 return ts
695 else:
696 return self._ext_hook(n, bytes(obj))
697 assert typ == TYPE_IMMEDIATE
698 return obj
699
700 def __iter__(self):
701 return self
702
703 def __next__(self):
704 try:
705 ret = self._unpack(EX_CONSTRUCT)
706 self._consume()
707 return ret
708 except OutOfData:
709 self._consume()
710 raise StopIteration
711 except RecursionError:
712 raise StackError
713
714 next = __next__
715
716 def skip(self):
717 self._unpack(EX_SKIP)
718 self._consume()
719
720 def unpack(self):
721 try:
722 ret = self._unpack(EX_CONSTRUCT)
723 except RecursionError:
724 raise StackError
725 self._consume()
726 return ret
727
728 def read_array_header(self):
729 ret = self._unpack(EX_READ_ARRAY_HEADER)
730 self._consume()
731 return ret
732
733 def read_map_header(self):
734 ret = self._unpack(EX_READ_MAP_HEADER)
735 self._consume()
736 return ret
737
738 def tell(self):
739 return self._stream_offset
740
741
742class Packer(object):
743 """
744 MessagePack Packer
745
746 Usage:
747
748 packer = Packer()
749 astream.write(packer.pack(a))
750 astream.write(packer.pack(b))
751
752 Packer's constructor has some keyword arguments:
753
754 :param callable default:
755 Convert user type to builtin type that Packer supports.
756 See also simplejson's document.
757
758 :param bool use_single_float:
759 Use single precision float type for float. (default: False)
760
761 :param bool autoreset:
762 Reset buffer after each pack and return its content as `bytes`. (default: True).
763 If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
764
765 :param bool use_bin_type:
766 Use bin type introduced in msgpack spec 2.0 for bytes.
767 It also enables str8 type for unicode. (default: True)
768
769 :param bool strict_types:
770 If set to true, types will be checked to be exact. Derived classes
771 from serializable types will not be serialized and will be
772 treated as unsupported type and forwarded to default.
773 Additionally tuples will not be serialized as lists.
774 This is useful when trying to implement accurate serialization
775 for python types.
776
777 :param bool datetime:
778 If set to true, datetime with tzinfo is packed into Timestamp type.
779 Note that the tzinfo is stripped in the timestamp.
780 You can get UTC datetime with `timestamp=3` option of the Unpacker.
781 (Python 2 is not supported).
782
783 :param str unicode_errors:
784 The error handler for encoding unicode. (default: 'strict')
785 DO NOT USE THIS!! This option is kept for very specific usage.
786 """
787
788 def __init__(
789 self,
790 default=None,
791 use_single_float=False,
792 autoreset=True,
793 use_bin_type=True,
794 strict_types=False,
795 datetime=False,
796 unicode_errors=None,
797 ):
798 self._strict_types = strict_types
799 self._use_float = use_single_float
800 self._autoreset = autoreset
801 self._use_bin_type = use_bin_type
802 self._buffer = StringIO()
803 if PY2 and datetime:
804 raise ValueError("datetime is not supported in Python 2")
805 self._datetime = bool(datetime)
806 self._unicode_errors = unicode_errors or "strict"
807 if default is not None:
808 if not callable(default):
809 raise TypeError("default must be callable")
810 self._default = default
811
812 def _pack(
813 self,
814 obj,
815 nest_limit=DEFAULT_RECURSE_LIMIT,
816 check=isinstance,
817 check_type_strict=_check_type_strict,
818 ):
819 default_used = False
820 if self._strict_types:
821 check = check_type_strict
822 list_types = list
823 else:
824 list_types = (list, tuple)
825 while True:
826 if nest_limit < 0:
827 raise ValueError("recursion limit exceeded")
828 if obj is None:
829 return self._buffer.write(b"\xc0")
830 if check(obj, bool):
831 if obj:
832 return self._buffer.write(b"\xc3")
833 return self._buffer.write(b"\xc2")
834 if check(obj, int_types):
835 if 0 <= obj < 0x80:
836 return self._buffer.write(struct.pack("B", obj))
837 if -0x20 <= obj < 0:
838 return self._buffer.write(struct.pack("b", obj))
839 if 0x80 <= obj <= 0xFF:
840 return self._buffer.write(struct.pack("BB", 0xCC, obj))
841 if -0x80 <= obj < 0:
842 return self._buffer.write(struct.pack(">Bb", 0xD0, obj))
843 if 0xFF < obj <= 0xFFFF:
844 return self._buffer.write(struct.pack(">BH", 0xCD, obj))
845 if -0x8000 <= obj < -0x80:
846 return self._buffer.write(struct.pack(">Bh", 0xD1, obj))
847 if 0xFFFF < obj <= 0xFFFFFFFF:
848 return self._buffer.write(struct.pack(">BI", 0xCE, obj))
849 if -0x80000000 <= obj < -0x8000:
850 return self._buffer.write(struct.pack(">Bi", 0xD2, obj))
851 if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF:
852 return self._buffer.write(struct.pack(">BQ", 0xCF, obj))
853 if -0x8000000000000000 <= obj < -0x80000000:
854 return self._buffer.write(struct.pack(">Bq", 0xD3, obj))
855 if not default_used and self._default is not None:
856 obj = self._default(obj)
857 default_used = True
858 continue
859 raise OverflowError("Integer value out of range")
860 if check(obj, (bytes, bytearray)):
861 n = len(obj)
862 if n >= 2 ** 32:
863 raise ValueError("%s is too large" % type(obj).__name__)
864 self._pack_bin_header(n)
865 return self._buffer.write(obj)
866 if check(obj, unicode):
867 obj = obj.encode("utf-8", self._unicode_errors)
868 n = len(obj)
869 if n >= 2 ** 32:
870 raise ValueError("String is too large")
871 self._pack_raw_header(n)
872 return self._buffer.write(obj)
873 if check(obj, memoryview):
874 n = len(obj) * obj.itemsize
875 if n >= 2 ** 32:
876 raise ValueError("Memoryview is too large")
877 self._pack_bin_header(n)
878 return self._buffer.write(obj)
879 if check(obj, float):
880 if self._use_float:
881 return self._buffer.write(struct.pack(">Bf", 0xCA, obj))
882 return self._buffer.write(struct.pack(">Bd", 0xCB, obj))
883 if check(obj, (ExtType, Timestamp)):
884 if check(obj, Timestamp):
885 code = -1
886 data = obj.to_bytes()
887 else:
888 code = obj.code
889 data = obj.data
890 assert isinstance(code, int)
891 assert isinstance(data, bytes)
892 L = len(data)
893 if L == 1:
894 self._buffer.write(b"\xd4")
895 elif L == 2:
896 self._buffer.write(b"\xd5")
897 elif L == 4:
898 self._buffer.write(b"\xd6")
899 elif L == 8:
900 self._buffer.write(b"\xd7")
901 elif L == 16:
902 self._buffer.write(b"\xd8")
903 elif L <= 0xFF:
904 self._buffer.write(struct.pack(">BB", 0xC7, L))
905 elif L <= 0xFFFF:
906 self._buffer.write(struct.pack(">BH", 0xC8, L))
907 else:
908 self._buffer.write(struct.pack(">BI", 0xC9, L))
909 self._buffer.write(struct.pack("b", code))
910 self._buffer.write(data)
911 return
912 if check(obj, list_types):
913 n = len(obj)
914 self._pack_array_header(n)
915 for i in xrange(n):
916 self._pack(obj[i], nest_limit - 1)
917 return
918 if check(obj, dict):
919 return self._pack_map_pairs(
920 len(obj), dict_iteritems(obj), nest_limit - 1
921 )
922
923 if self._datetime and check(obj, _DateTime):
924 obj = Timestamp.from_datetime(obj)
925 default_used = 1
926 continue
927
928 if not default_used and self._default is not None:
929 obj = self._default(obj)
930 default_used = 1
931 continue
932 raise TypeError("Cannot serialize %r" % (obj,))
933
934 def pack(self, obj):
935 try:
936 self._pack(obj)
937 except:
938 self._buffer = StringIO() # force reset
939 raise
940 if self._autoreset:
941 ret = self._buffer.getvalue()
942 self._buffer = StringIO()
943 return ret
944
945 def pack_map_pairs(self, pairs):
946 self._pack_map_pairs(len(pairs), pairs)
947 if self._autoreset:
948 ret = self._buffer.getvalue()
949 self._buffer = StringIO()
950 return ret
951
952 def pack_array_header(self, n):
953 if n >= 2 ** 32:
954 raise ValueError
955 self._pack_array_header(n)
956 if self._autoreset:
957 ret = self._buffer.getvalue()
958 self._buffer = StringIO()
959 return ret
960
961 def pack_map_header(self, n):
962 if n >= 2 ** 32:
963 raise ValueError
964 self._pack_map_header(n)
965 if self._autoreset:
966 ret = self._buffer.getvalue()
967 self._buffer = StringIO()
968 return ret
969
970 def pack_ext_type(self, typecode, data):
971 if not isinstance(typecode, int):
972 raise TypeError("typecode must have int type.")
973 if not 0 <= typecode <= 127:
974 raise ValueError("typecode should be 0-127")
975 if not isinstance(data, bytes):
976 raise TypeError("data must have bytes type")
977 L = len(data)
978 if L > 0xFFFFFFFF:
979 raise ValueError("Too large data")
980 if L == 1:
981 self._buffer.write(b"\xd4")
982 elif L == 2:
983 self._buffer.write(b"\xd5")
984 elif L == 4:
985 self._buffer.write(b"\xd6")
986 elif L == 8:
987 self._buffer.write(b"\xd7")
988 elif L == 16:
989 self._buffer.write(b"\xd8")
990 elif L <= 0xFF:
991 self._buffer.write(b"\xc7" + struct.pack("B", L))
992 elif L <= 0xFFFF:
993 self._buffer.write(b"\xc8" + struct.pack(">H", L))
994 else:
995 self._buffer.write(b"\xc9" + struct.pack(">I", L))
996 self._buffer.write(struct.pack("B", typecode))
997 self._buffer.write(data)
998
999 def _pack_array_header(self, n):
1000 if n <= 0x0F:
1001 return self._buffer.write(struct.pack("B", 0x90 + n))
1002 if n <= 0xFFFF:
1003 return self._buffer.write(struct.pack(">BH", 0xDC, n))
1004 if n <= 0xFFFFFFFF:
1005 return self._buffer.write(struct.pack(">BI", 0xDD, n))
1006 raise ValueError("Array is too large")
1007
1008 def _pack_map_header(self, n):
1009 if n <= 0x0F:
1010 return self._buffer.write(struct.pack("B", 0x80 + n))
1011 if n <= 0xFFFF:
1012 return self._buffer.write(struct.pack(">BH", 0xDE, n))
1013 if n <= 0xFFFFFFFF:
1014 return self._buffer.write(struct.pack(">BI", 0xDF, n))
1015 raise ValueError("Dict is too large")
1016
1017 def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
1018 self._pack_map_header(n)
1019 for (k, v) in pairs:
1020 self._pack(k, nest_limit - 1)
1021 self._pack(v, nest_limit - 1)
1022
1023 def _pack_raw_header(self, n):
1024 if n <= 0x1F:
1025 self._buffer.write(struct.pack("B", 0xA0 + n))
1026 elif self._use_bin_type and n <= 0xFF:
1027 self._buffer.write(struct.pack(">BB", 0xD9, n))
1028 elif n <= 0xFFFF:
1029 self._buffer.write(struct.pack(">BH", 0xDA, n))
1030 elif n <= 0xFFFFFFFF:
1031 self._buffer.write(struct.pack(">BI", 0xDB, n))
1032 else:
1033 raise ValueError("Raw is too large")
1034
1035 def _pack_bin_header(self, n):
1036 if not self._use_bin_type:
1037 return self._pack_raw_header(n)
1038 elif n <= 0xFF:
1039 return self._buffer.write(struct.pack(">BB", 0xC4, n))
1040 elif n <= 0xFFFF:
1041 return self._buffer.write(struct.pack(">BH", 0xC5, n))
1042 elif n <= 0xFFFFFFFF:
1043 return self._buffer.write(struct.pack(">BI", 0xC6, n))
1044 else:
1045 raise ValueError("Bin is too large")
1046
1047 def bytes(self):
1048 """Return internal buffer contents as bytes object"""
1049 return self._buffer.getvalue()
1050
1051 def reset(self):
1052 """Reset internal buffer.
1053
1054 This method is useful only when autoreset=False.
1055 """
1056 self._buffer = StringIO()
1057
1058 def getbuffer(self):
1059 """Return view of internal buffer."""
1060 if USING_STRINGBUILDER or PY2:
1061 return memoryview(self.bytes())
1062 else:
1063 return self._buffer.getbuffer()