Blame - clang-r510928/python3/Lib/codecs.py - platform/prebuilts/clang/host/windows-x86

blob: e6ad6e3a0523643fab643798d818e5e4927217d6 [file] [log] [blame]

Yi Kong	8328301	2023-12-13 12:57:00 +0900	[diff] [blame^]	1	""" codecs -- Python Codec Registry, API and helpers.
				2
				3
				4	Written by Marc-Andre Lemburg (mal@lemburg.com).
				5
				6	(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
				7
				8	"""
				9
				10	import builtins
				11	import sys
				12
				13	### Registry and builtin stateless codec functions
				14
				15	try:
				16	from _codecs import *
				17	except ImportError as why:
				18	raise SystemError('Failed to load the builtin codecs: %s' % why)
				19
				20	__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
				21	"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
				22	"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
				23	"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
				24	"CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder",
				25	"StreamReader", "StreamWriter",
				26	"StreamReaderWriter", "StreamRecoder",
				27	"getencoder", "getdecoder", "getincrementalencoder",
				28	"getincrementaldecoder", "getreader", "getwriter",
				29	"encode", "decode", "iterencode", "iterdecode",
				30	"strict_errors", "ignore_errors", "replace_errors",
				31	"xmlcharrefreplace_errors",
				32	"backslashreplace_errors", "namereplace_errors",
				33	"register_error", "lookup_error"]
				34
				35	### Constants
				36
				37	#
				38	# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
				39	# and its possible byte string values
				40	# for UTF8/UTF16/UTF32 output and little/big endian machines
				41	#
				42
				43	# UTF-8
				44	BOM_UTF8 = b'\xef\xbb\xbf'
				45
				46	# UTF-16, little endian
				47	BOM_LE = BOM_UTF16_LE = b'\xff\xfe'
				48
				49	# UTF-16, big endian
				50	BOM_BE = BOM_UTF16_BE = b'\xfe\xff'
				51
				52	# UTF-32, little endian
				53	BOM_UTF32_LE = b'\xff\xfe\x00\x00'
				54
				55	# UTF-32, big endian
				56	BOM_UTF32_BE = b'\x00\x00\xfe\xff'
				57
				58	if sys.byteorder == 'little':
				59
				60	# UTF-16, native endianness
				61	BOM = BOM_UTF16 = BOM_UTF16_LE
				62
				63	# UTF-32, native endianness
				64	BOM_UTF32 = BOM_UTF32_LE
				65
				66	else:
				67
				68	# UTF-16, native endianness
				69	BOM = BOM_UTF16 = BOM_UTF16_BE
				70
				71	# UTF-32, native endianness
				72	BOM_UTF32 = BOM_UTF32_BE
				73
				74	# Old broken names (don't use in new code)
				75	BOM32_LE = BOM_UTF16_LE
				76	BOM32_BE = BOM_UTF16_BE
				77	BOM64_LE = BOM_UTF32_LE
				78	BOM64_BE = BOM_UTF32_BE
				79
				80
				81	### Codec base classes (defining the API)
				82
				83	class CodecInfo(tuple):
				84	"""Codec details when looking up the codec registry"""
				85
				86	# Private API to allow Python 3.4 to denylist the known non-Unicode
				87	# codecs in the standard library. A more general mechanism to
				88	# reliably distinguish test encodings from other codecs will hopefully
				89	# be defined for Python 3.5
				90	#
				91	# See http://bugs.python.org/issue19619
				92	_is_text_encoding = True # Assume codecs are text encodings by default
				93
				94	def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
				95	incrementalencoder=None, incrementaldecoder=None, name=None,
				96	*, _is_text_encoding=None):
				97	self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
				98	self.name = name
				99	self.encode = encode
				100	self.decode = decode
				101	self.incrementalencoder = incrementalencoder
				102	self.incrementaldecoder = incrementaldecoder
				103	self.streamwriter = streamwriter
				104	self.streamreader = streamreader
				105	if _is_text_encoding is not None:
				106	self._is_text_encoding = _is_text_encoding
				107	return self
				108
				109	def __repr__(self):
				110	return "<%s.%s object for encoding %s at %#x>" % \
				111	(self.__class__.__module__, self.__class__.__qualname__,
				112	self.name, id(self))
				113
				114	class Codec:
				115
				116	""" Defines the interface for stateless encoders/decoders.
				117
				118	The .encode()/.decode() methods may use different error
				119	handling schemes by providing the errors argument. These
				120	string values are predefined:
				121
				122	'strict' - raise a ValueError error (or a subclass)
				123	'ignore' - ignore the character and continue with the next
				124	'replace' - replace with a suitable replacement character;
				125	Python will use the official U+FFFD REPLACEMENT
				126	CHARACTER for the builtin Unicode codecs on
				127	decoding and '?' on encoding.
				128	'surrogateescape' - replace with private code points U+DCnn.
				129	'xmlcharrefreplace' - Replace with the appropriate XML
				130	character reference (only for encoding).
				131	'backslashreplace' - Replace with backslashed escape sequences.
				132	'namereplace' - Replace with \\N{...} escape sequences
				133	(only for encoding).
				134
				135	The set of allowed values can be extended via register_error.
				136
				137	"""
				138	def encode(self, input, errors='strict'):
				139
				140	""" Encodes the object input and returns a tuple (output
				141	object, length consumed).
				142
				143	errors defines the error handling to apply. It defaults to
				144	'strict' handling.
				145
				146	The method may not store state in the Codec instance. Use
				147	StreamWriter for codecs which have to keep state in order to
				148	make encoding efficient.
				149
				150	The encoder must be able to handle zero length input and
				151	return an empty object of the output object type in this
				152	situation.
				153
				154	"""
				155	raise NotImplementedError
				156
				157	def decode(self, input, errors='strict'):
				158
				159	""" Decodes the object input and returns a tuple (output
				160	object, length consumed).
				161
				162	input must be an object which provides the bf_getreadbuf
				163	buffer slot. Python strings, buffer objects and memory
				164	mapped files are examples of objects providing this slot.
				165
				166	errors defines the error handling to apply. It defaults to
				167	'strict' handling.
				168
				169	The method may not store state in the Codec instance. Use
				170	StreamReader for codecs which have to keep state in order to
				171	make decoding efficient.
				172
				173	The decoder must be able to handle zero length input and
				174	return an empty object of the output object type in this
				175	situation.
				176
				177	"""
				178	raise NotImplementedError
				179
				180	class IncrementalEncoder(object):
				181	"""
				182	An IncrementalEncoder encodes an input in multiple steps. The input can
				183	be passed piece by piece to the encode() method. The IncrementalEncoder
				184	remembers the state of the encoding process between calls to encode().
				185	"""
				186	def __init__(self, errors='strict'):
				187	"""
				188	Creates an IncrementalEncoder instance.
				189
				190	The IncrementalEncoder may use different error handling schemes by
				191	providing the errors keyword argument. See the module docstring
				192	for a list of possible values.
				193	"""
				194	self.errors = errors
				195	self.buffer = ""
				196
				197	def encode(self, input, final=False):
				198	"""
				199	Encodes input and returns the resulting object.
				200	"""
				201	raise NotImplementedError
				202
				203	def reset(self):
				204	"""
				205	Resets the encoder to the initial state.
				206	"""
				207
				208	def getstate(self):
				209	"""
				210	Return the current state of the encoder.
				211	"""
				212	return 0
				213
				214	def setstate(self, state):
				215	"""
				216	Set the current state of the encoder. state must have been
				217	returned by getstate().
				218	"""
				219
				220	class BufferedIncrementalEncoder(IncrementalEncoder):
				221	"""
				222	This subclass of IncrementalEncoder can be used as the baseclass for an
				223	incremental encoder if the encoder must keep some of the output in a
				224	buffer between calls to encode().
				225	"""
				226	def __init__(self, errors='strict'):
				227	IncrementalEncoder.__init__(self, errors)
				228	# unencoded input that is kept between calls to encode()
				229	self.buffer = ""
				230
				231	def _buffer_encode(self, input, errors, final):
				232	# Overwrite this method in subclasses: It must encode input
				233	# and return an (output, length consumed) tuple
				234	raise NotImplementedError
				235
				236	def encode(self, input, final=False):
				237	# encode input (taking the buffer into account)
				238	data = self.buffer + input
				239	(result, consumed) = self._buffer_encode(data, self.errors, final)
				240	# keep unencoded input until the next call
				241	self.buffer = data[consumed:]
				242	return result
				243
				244	def reset(self):
				245	IncrementalEncoder.reset(self)
				246	self.buffer = ""
				247
				248	def getstate(self):
				249	return self.buffer or 0
				250
				251	def setstate(self, state):
				252	self.buffer = state or ""
				253
				254	class IncrementalDecoder(object):
				255	"""
				256	An IncrementalDecoder decodes an input in multiple steps. The input can
				257	be passed piece by piece to the decode() method. The IncrementalDecoder
				258	remembers the state of the decoding process between calls to decode().
				259	"""
				260	def __init__(self, errors='strict'):
				261	"""
				262	Create an IncrementalDecoder instance.
				263
				264	The IncrementalDecoder may use different error handling schemes by
				265	providing the errors keyword argument. See the module docstring
				266	for a list of possible values.
				267	"""
				268	self.errors = errors
				269
				270	def decode(self, input, final=False):
				271	"""
				272	Decode input and returns the resulting object.
				273	"""
				274	raise NotImplementedError
				275
				276	def reset(self):
				277	"""
				278	Reset the decoder to the initial state.
				279	"""
				280
				281	def getstate(self):
				282	"""
				283	Return the current state of the decoder.
				284
				285	This must be a (buffered_input, additional_state_info) tuple.
				286	buffered_input must be a bytes object containing bytes that
				287	were passed to decode() that have not yet been converted.
				288	additional_state_info must be a non-negative integer
				289	representing the state of the decoder WITHOUT yet having
				290	processed the contents of buffered_input. In the initial state
				291	and after reset(), getstate() must return (b"", 0).
				292	"""
				293	return (b"", 0)
				294
				295	def setstate(self, state):
				296	"""
				297	Set the current state of the decoder.
				298
				299	state must have been returned by getstate(). The effect of
				300	setstate((b"", 0)) must be equivalent to reset().
				301	"""
				302
				303	class BufferedIncrementalDecoder(IncrementalDecoder):
				304	"""
				305	This subclass of IncrementalDecoder can be used as the baseclass for an
				306	incremental decoder if the decoder must be able to handle incomplete
				307	byte sequences.
				308	"""
				309	def __init__(self, errors='strict'):
				310	IncrementalDecoder.__init__(self, errors)
				311	# undecoded input that is kept between calls to decode()
				312	self.buffer = b""
				313
				314	def _buffer_decode(self, input, errors, final):
				315	# Overwrite this method in subclasses: It must decode input
				316	# and return an (output, length consumed) tuple
				317	raise NotImplementedError
				318
				319	def decode(self, input, final=False):
				320	# decode input (taking the buffer into account)
				321	data = self.buffer + input
				322	(result, consumed) = self._buffer_decode(data, self.errors, final)
				323	# keep undecoded input until the next call
				324	self.buffer = data[consumed:]
				325	return result
				326
				327	def reset(self):
				328	IncrementalDecoder.reset(self)
				329	self.buffer = b""
				330
				331	def getstate(self):
				332	# additional state info is always 0
				333	return (self.buffer, 0)
				334
				335	def setstate(self, state):
				336	# ignore additional state info
				337	self.buffer = state[0]
				338
				339	#
				340	# The StreamWriter and StreamReader class provide generic working
				341	# interfaces which can be used to implement new encoding submodules
				342	# very easily. See encodings/utf_8.py for an example on how this is
				343	# done.
				344	#
				345
				346	class StreamWriter(Codec):
				347
				348	def __init__(self, stream, errors='strict'):
				349
				350	""" Creates a StreamWriter instance.
				351
				352	stream must be a file-like object open for writing.
				353
				354	The StreamWriter may use different error handling
				355	schemes by providing the errors keyword argument. These
				356	parameters are predefined:
				357
				358	'strict' - raise a ValueError (or a subclass)
				359	'ignore' - ignore the character and continue with the next
				360	'replace'- replace with a suitable replacement character
				361	'xmlcharrefreplace' - Replace with the appropriate XML
				362	character reference.
				363	'backslashreplace' - Replace with backslashed escape
				364	sequences.
				365	'namereplace' - Replace with \\N{...} escape sequences.
				366
				367	The set of allowed parameter values can be extended via
				368	register_error.
				369	"""
				370	self.stream = stream
				371	self.errors = errors
				372
				373	def write(self, object):
				374
				375	""" Writes the object's contents encoded to self.stream.
				376	"""
				377	data, consumed = self.encode(object, self.errors)
				378	self.stream.write(data)
				379
				380	def writelines(self, list):
				381
				382	""" Writes the concatenated list of strings to the stream
				383	using .write().
				384	"""
				385	self.write(''.join(list))
				386
				387	def reset(self):
				388
				389	""" Resets the codec buffers used for keeping internal state.
				390
				391	Calling this method should ensure that the data on the
				392	output is put into a clean state, that allows appending
				393	of new fresh data without having to rescan the whole
				394	stream to recover state.
				395
				396	"""
				397	pass
				398
				399	def seek(self, offset, whence=0):
				400	self.stream.seek(offset, whence)
				401	if whence == 0 and offset == 0:
				402	self.reset()
				403
				404	def __getattr__(self, name,
				405	getattr=getattr):
				406
				407	""" Inherit all other methods from the underlying stream.
				408	"""
				409	return getattr(self.stream, name)
				410
				411	def __enter__(self):
				412	return self
				413
				414	def __exit__(self, type, value, tb):
				415	self.stream.close()
				416
				417	###
				418
				419	class StreamReader(Codec):
				420
				421	charbuffertype = str
				422
				423	def __init__(self, stream, errors='strict'):
				424
				425	""" Creates a StreamReader instance.
				426
				427	stream must be a file-like object open for reading.
				428
				429	The StreamReader may use different error handling
				430	schemes by providing the errors keyword argument. These
				431	parameters are predefined:
				432
				433	'strict' - raise a ValueError (or a subclass)
				434	'ignore' - ignore the character and continue with the next
				435	'replace'- replace with a suitable replacement character
				436	'backslashreplace' - Replace with backslashed escape sequences;
				437
				438	The set of allowed parameter values can be extended via
				439	register_error.
				440	"""
				441	self.stream = stream
				442	self.errors = errors
				443	self.bytebuffer = b""
				444	self._empty_charbuffer = self.charbuffertype()
				445	self.charbuffer = self._empty_charbuffer
				446	self.linebuffer = None
				447
				448	def decode(self, input, errors='strict'):
				449	raise NotImplementedError
				450
				451	def read(self, size=-1, chars=-1, firstline=False):
				452
				453	""" Decodes data from the stream self.stream and returns the
				454	resulting object.
				455
				456	chars indicates the number of decoded code points or bytes to
				457	return. read() will never return more data than requested,
				458	but it might return less, if there is not enough available.
				459
				460	size indicates the approximate maximum number of decoded
				461	bytes or code points to read for decoding. The decoder
				462	can modify this setting as appropriate. The default value
				463	-1 indicates to read and decode as much as possible. size
				464	is intended to prevent having to decode huge files in one
				465	step.
				466
				467	If firstline is true, and a UnicodeDecodeError happens
				468	after the first line terminator in the input only the first line
				469	will be returned, the rest of the input will be kept until the
				470	next call to read().
				471
				472	The method should use a greedy read strategy, meaning that
				473	it should read as much data as is allowed within the
				474	definition of the encoding and the given size, e.g. if
				475	optional encoding endings or state markers are available
				476	on the stream, these should be read too.
				477	"""
				478	# If we have lines cached, first merge them back into characters
				479	if self.linebuffer:
				480	self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
				481	self.linebuffer = None
				482
				483	if chars < 0:
				484	# For compatibility with other read() methods that take a
				485	# single argument
				486	chars = size
				487
				488	# read until we get the required number of characters (if available)
				489	while True:
				490	# can the request be satisfied from the character buffer?
				491	if chars >= 0:
				492	if len(self.charbuffer) >= chars:
				493	break
				494	# we need more data
				495	if size < 0:
				496	newdata = self.stream.read()
				497	else:
				498	newdata = self.stream.read(size)
				499	# decode bytes (those remaining from the last call included)
				500	data = self.bytebuffer + newdata
				501	if not data:
				502	break
				503	try:
				504	newchars, decodedbytes = self.decode(data, self.errors)
				505	except UnicodeDecodeError as exc:
				506	if firstline:
				507	newchars, decodedbytes = \
				508	self.decode(data[:exc.start], self.errors)
				509	lines = newchars.splitlines(keepends=True)
				510	if len(lines)<=1:
				511	raise
				512	else:
				513	raise
				514	# keep undecoded bytes until the next call
				515	self.bytebuffer = data[decodedbytes:]
				516	# put new characters in the character buffer
				517	self.charbuffer += newchars
				518	# there was no data available
				519	if not newdata:
				520	break
				521	if chars < 0:
				522	# Return everything we've got
				523	result = self.charbuffer
				524	self.charbuffer = self._empty_charbuffer
				525	else:
				526	# Return the first chars characters
				527	result = self.charbuffer[:chars]
				528	self.charbuffer = self.charbuffer[chars:]
				529	return result
				530
				531	def readline(self, size=None, keepends=True):
				532
				533	""" Read one line from the input stream and return the
				534	decoded data.
				535
				536	size, if given, is passed as size argument to the
				537	read() method.
				538
				539	"""
				540	# If we have lines cached from an earlier read, return
				541	# them unconditionally
				542	if self.linebuffer:
				543	line = self.linebuffer[0]
				544	del self.linebuffer[0]
				545	if len(self.linebuffer) == 1:
				546	# revert to charbuffer mode; we might need more data
				547	# next time
				548	self.charbuffer = self.linebuffer[0]
				549	self.linebuffer = None
				550	if not keepends:
				551	line = line.splitlines(keepends=False)[0]
				552	return line
				553
				554	readsize = size or 72
				555	line = self._empty_charbuffer
				556	# If size is given, we call read() only once
				557	while True:
				558	data = self.read(readsize, firstline=True)
				559	if data:
				560	# If we're at a "\r" read one extra character (which might
				561	# be a "\n") to get a proper line ending. If the stream is
				562	# temporarily exhausted we return the wrong line ending.
				563	if (isinstance(data, str) and data.endswith("\r")) or \
				564	(isinstance(data, bytes) and data.endswith(b"\r")):
				565	data += self.read(size=1, chars=1)
				566
				567	line += data
				568	lines = line.splitlines(keepends=True)
				569	if lines:
				570	if len(lines) > 1:
				571	# More than one line result; the first line is a full line
				572	# to return
				573	line = lines[0]
				574	del lines[0]
				575	if len(lines) > 1:
				576	# cache the remaining lines
				577	lines[-1] += self.charbuffer
				578	self.linebuffer = lines
				579	self.charbuffer = None
				580	else:
				581	# only one remaining line, put it back into charbuffer
				582	self.charbuffer = lines[0] + self.charbuffer
				583	if not keepends:
				584	line = line.splitlines(keepends=False)[0]
				585	break
				586	line0withend = lines[0]
				587	line0withoutend = lines[0].splitlines(keepends=False)[0]
				588	if line0withend != line0withoutend: # We really have a line end
				589	# Put the rest back together and keep it until the next call
				590	self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
				591	self.charbuffer
				592	if keepends:
				593	line = line0withend
				594	else:
				595	line = line0withoutend
				596	break
				597	# we didn't get anything or this was our only try
				598	if not data or size is not None:
				599	if line and not keepends:
				600	line = line.splitlines(keepends=False)[0]
				601	break
				602	if readsize < 8000:
				603	readsize *= 2
				604	return line
				605
				606	def readlines(self, sizehint=None, keepends=True):
				607
				608	""" Read all lines available on the input stream
				609	and return them as a list.
				610
				611	Line breaks are implemented using the codec's decoder
				612	method and are included in the list entries.
				613
				614	sizehint, if given, is ignored since there is no efficient
				615	way to finding the true end-of-line.
				616
				617	"""
				618	data = self.read()
				619	return data.splitlines(keepends)
				620
				621	def reset(self):
				622
				623	""" Resets the codec buffers used for keeping internal state.
				624
				625	Note that no stream repositioning should take place.
				626	This method is primarily intended to be able to recover
				627	from decoding errors.
				628
				629	"""
				630	self.bytebuffer = b""
				631	self.charbuffer = self._empty_charbuffer
				632	self.linebuffer = None
				633
				634	def seek(self, offset, whence=0):
				635	""" Set the input stream's current position.
				636
				637	Resets the codec buffers used for keeping state.
				638	"""
				639	self.stream.seek(offset, whence)
				640	self.reset()
				641
				642	def __next__(self):
				643
				644	""" Return the next decoded line from the input stream."""
				645	line = self.readline()
				646	if line:
				647	return line
				648	raise StopIteration
				649
				650	def __iter__(self):
				651	return self
				652
				653	def __getattr__(self, name,
				654	getattr=getattr):
				655
				656	""" Inherit all other methods from the underlying stream.
				657	"""
				658	return getattr(self.stream, name)
				659
				660	def __enter__(self):
				661	return self
				662
				663	def __exit__(self, type, value, tb):
				664	self.stream.close()
				665
				666	###
				667
				668	class StreamReaderWriter:
				669
				670	""" StreamReaderWriter instances allow wrapping streams which
				671	work in both read and write modes.
				672
				673	The design is such that one can use the factory functions
				674	returned by the codec.lookup() function to construct the
				675	instance.
				676
				677	"""
				678	# Optional attributes set by the file wrappers below
				679	encoding = 'unknown'
				680
				681	def __init__(self, stream, Reader, Writer, errors='strict'):
				682
				683	""" Creates a StreamReaderWriter instance.
				684
				685	stream must be a Stream-like object.
				686
				687	Reader, Writer must be factory functions or classes
				688	providing the StreamReader, StreamWriter interface resp.
				689
				690	Error handling is done in the same way as defined for the
				691	StreamWriter/Readers.
				692
				693	"""
				694	self.stream = stream
				695	self.reader = Reader(stream, errors)
				696	self.writer = Writer(stream, errors)
				697	self.errors = errors
				698
				699	def read(self, size=-1):
				700
				701	return self.reader.read(size)
				702
				703	def readline(self, size=None):
				704
				705	return self.reader.readline(size)
				706
				707	def readlines(self, sizehint=None):
				708
				709	return self.reader.readlines(sizehint)
				710
				711	def __next__(self):
				712
				713	""" Return the next decoded line from the input stream."""
				714	return next(self.reader)
				715
				716	def __iter__(self):
				717	return self
				718
				719	def write(self, data):
				720
				721	return self.writer.write(data)
				722
				723	def writelines(self, list):
				724
				725	return self.writer.writelines(list)
				726
				727	def reset(self):
				728
				729	self.reader.reset()
				730	self.writer.reset()
				731
				732	def seek(self, offset, whence=0):
				733	self.stream.seek(offset, whence)
				734	self.reader.reset()
				735	if whence == 0 and offset == 0:
				736	self.writer.reset()
				737
				738	def __getattr__(self, name,
				739	getattr=getattr):
				740
				741	""" Inherit all other methods from the underlying stream.
				742	"""
				743	return getattr(self.stream, name)
				744
				745	# these are needed to make "with StreamReaderWriter(...)" work properly
				746
				747	def __enter__(self):
				748	return self
				749
				750	def __exit__(self, type, value, tb):
				751	self.stream.close()
				752
				753	###
				754
				755	class StreamRecoder:
				756
				757	""" StreamRecoder instances translate data from one encoding to another.
				758
				759	They use the complete set of APIs returned by the
				760	codecs.lookup() function to implement their task.
				761
				762	Data written to the StreamRecoder is first decoded into an
				763	intermediate format (depending on the "decode" codec) and then
				764	written to the underlying stream using an instance of the provided
				765	Writer class.
				766
				767	In the other direction, data is read from the underlying stream using
				768	a Reader instance and then encoded and returned to the caller.
				769
				770	"""
				771	# Optional attributes set by the file wrappers below
				772	data_encoding = 'unknown'
				773	file_encoding = 'unknown'
				774
				775	def __init__(self, stream, encode, decode, Reader, Writer,
				776	errors='strict'):
				777
				778	""" Creates a StreamRecoder instance which implements a two-way
				779	conversion: encode and decode work on the frontend (the
				780	data visible to .read() and .write()) while Reader and Writer
				781	work on the backend (the data in stream).
				782
				783	You can use these objects to do transparent
				784	transcodings from e.g. latin-1 to utf-8 and back.
				785
				786	stream must be a file-like object.
				787
				788	encode and decode must adhere to the Codec interface; Reader and
				789	Writer must be factory functions or classes providing the
				790	StreamReader and StreamWriter interfaces resp.
				791
				792	Error handling is done in the same way as defined for the
				793	StreamWriter/Readers.
				794
				795	"""
				796	self.stream = stream
				797	self.encode = encode
				798	self.decode = decode
				799	self.reader = Reader(stream, errors)
				800	self.writer = Writer(stream, errors)
				801	self.errors = errors
				802
				803	def read(self, size=-1):
				804
				805	data = self.reader.read(size)
				806	data, bytesencoded = self.encode(data, self.errors)
				807	return data
				808
				809	def readline(self, size=None):
				810
				811	if size is None:
				812	data = self.reader.readline()
				813	else:
				814	data = self.reader.readline(size)
				815	data, bytesencoded = self.encode(data, self.errors)
				816	return data
				817
				818	def readlines(self, sizehint=None):
				819
				820	data = self.reader.read()
				821	data, bytesencoded = self.encode(data, self.errors)
				822	return data.splitlines(keepends=True)
				823
				824	def __next__(self):
				825
				826	""" Return the next decoded line from the input stream."""
				827	data = next(self.reader)
				828	data, bytesencoded = self.encode(data, self.errors)
				829	return data
				830
				831	def __iter__(self):
				832	return self
				833
				834	def write(self, data):
				835
				836	data, bytesdecoded = self.decode(data, self.errors)
				837	return self.writer.write(data)
				838
				839	def writelines(self, list):
				840
				841	data = b''.join(list)
				842	data, bytesdecoded = self.decode(data, self.errors)
				843	return self.writer.write(data)
				844
				845	def reset(self):
				846
				847	self.reader.reset()
				848	self.writer.reset()
				849
				850	def seek(self, offset, whence=0):
				851	# Seeks must be propagated to both the readers and writers
				852	# as they might need to reset their internal buffers.
				853	self.reader.seek(offset, whence)
				854	self.writer.seek(offset, whence)
				855
				856	def __getattr__(self, name,
				857	getattr=getattr):
				858
				859	""" Inherit all other methods from the underlying stream.
				860	"""
				861	return getattr(self.stream, name)
				862
				863	def __enter__(self):
				864	return self
				865
				866	def __exit__(self, type, value, tb):
				867	self.stream.close()
				868
				869	### Shortcuts
				870
				871	def open(filename, mode='r', encoding=None, errors='strict', buffering=-1):
				872
				873	""" Open an encoded file using the given mode and return
				874	a wrapped version providing transparent encoding/decoding.
				875
				876	Note: The wrapped version will only accept the object format
				877	defined by the codecs, i.e. Unicode objects for most builtin
				878	codecs. Output is also codec dependent and will usually be
				879	Unicode as well.
				880
				881	Underlying encoded files are always opened in binary mode.
				882	The default file mode is 'r', meaning to open the file in read mode.
				883
				884	encoding specifies the encoding which is to be used for the
				885	file.
				886
				887	errors may be given to define the error handling. It defaults
				888	to 'strict' which causes ValueErrors to be raised in case an
				889	encoding error occurs.
				890
				891	buffering has the same meaning as for the builtin open() API.
				892	It defaults to -1 which means that the default buffer size will
				893	be used.
				894
				895	The returned wrapped file object provides an extra attribute
				896	.encoding which allows querying the used encoding. This
				897	attribute is only available if an encoding was specified as
				898	parameter.
				899
				900	"""
				901	if encoding is not None and \
				902	'b' not in mode:
				903	# Force opening of the file in binary mode
				904	mode = mode + 'b'
				905	file = builtins.open(filename, mode, buffering)
				906	if encoding is None:
				907	return file
				908
				909	try:
				910	info = lookup(encoding)
				911	srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
				912	# Add attributes to simplify introspection
				913	srw.encoding = encoding
				914	return srw
				915	except:
				916	file.close()
				917	raise
				918
				919	def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
				920
				921	""" Return a wrapped version of file which provides transparent
				922	encoding translation.
				923
				924	Data written to the wrapped file is decoded according
				925	to the given data_encoding and then encoded to the underlying
				926	file using file_encoding. The intermediate data type
				927	will usually be Unicode but depends on the specified codecs.
				928
				929	Bytes read from the file are decoded using file_encoding and then
				930	passed back to the caller encoded using data_encoding.
				931
				932	If file_encoding is not given, it defaults to data_encoding.
				933
				934	errors may be given to define the error handling. It defaults
				935	to 'strict' which causes ValueErrors to be raised in case an
				936	encoding error occurs.
				937
				938	The returned wrapped file object provides two extra attributes
				939	.data_encoding and .file_encoding which reflect the given
				940	parameters of the same name. The attributes can be used for
				941	introspection by Python programs.
				942
				943	"""
				944	if file_encoding is None:
				945	file_encoding = data_encoding
				946	data_info = lookup(data_encoding)
				947	file_info = lookup(file_encoding)
				948	sr = StreamRecoder(file, data_info.encode, data_info.decode,
				949	file_info.streamreader, file_info.streamwriter, errors)
				950	# Add attributes to simplify introspection
				951	sr.data_encoding = data_encoding
				952	sr.file_encoding = file_encoding
				953	return sr
				954
				955	### Helpers for codec lookup
				956
				957	def getencoder(encoding):
				958
				959	""" Lookup up the codec for the given encoding and return
				960	its encoder function.
				961
				962	Raises a LookupError in case the encoding cannot be found.
				963
				964	"""
				965	return lookup(encoding).encode
				966
				967	def getdecoder(encoding):
				968
				969	""" Lookup up the codec for the given encoding and return
				970	its decoder function.
				971
				972	Raises a LookupError in case the encoding cannot be found.
				973
				974	"""
				975	return lookup(encoding).decode
				976
				977	def getincrementalencoder(encoding):
				978
				979	""" Lookup up the codec for the given encoding and return
				980	its IncrementalEncoder class or factory function.
				981
				982	Raises a LookupError in case the encoding cannot be found
				983	or the codecs doesn't provide an incremental encoder.
				984
				985	"""
				986	encoder = lookup(encoding).incrementalencoder
				987	if encoder is None:
				988	raise LookupError(encoding)
				989	return encoder
				990
				991	def getincrementaldecoder(encoding):
				992
				993	""" Lookup up the codec for the given encoding and return
				994	its IncrementalDecoder class or factory function.
				995
				996	Raises a LookupError in case the encoding cannot be found
				997	or the codecs doesn't provide an incremental decoder.
				998
				999	"""
				1000	decoder = lookup(encoding).incrementaldecoder
				1001	if decoder is None:
				1002	raise LookupError(encoding)
				1003	return decoder
				1004
				1005	def getreader(encoding):
				1006
				1007	""" Lookup up the codec for the given encoding and return
				1008	its StreamReader class or factory function.
				1009
				1010	Raises a LookupError in case the encoding cannot be found.
				1011
				1012	"""
				1013	return lookup(encoding).streamreader
				1014
				1015	def getwriter(encoding):
				1016
				1017	""" Lookup up the codec for the given encoding and return
				1018	its StreamWriter class or factory function.
				1019
				1020	Raises a LookupError in case the encoding cannot be found.
				1021
				1022	"""
				1023	return lookup(encoding).streamwriter
				1024
				1025	def iterencode(iterator, encoding, errors='strict', **kwargs):
				1026	"""
				1027	Encoding iterator.
				1028
				1029	Encodes the input strings from the iterator using an IncrementalEncoder.
				1030
				1031	errors and kwargs are passed through to the IncrementalEncoder
				1032	constructor.
				1033	"""
				1034	encoder = getincrementalencoder(encoding)(errors, **kwargs)
				1035	for input in iterator:
				1036	output = encoder.encode(input)
				1037	if output:
				1038	yield output
				1039	output = encoder.encode("", True)
				1040	if output:
				1041	yield output
				1042
				1043	def iterdecode(iterator, encoding, errors='strict', **kwargs):
				1044	"""
				1045	Decoding iterator.
				1046
				1047	Decodes the input strings from the iterator using an IncrementalDecoder.
				1048
				1049	errors and kwargs are passed through to the IncrementalDecoder
				1050	constructor.
				1051	"""
				1052	decoder = getincrementaldecoder(encoding)(errors, **kwargs)
				1053	for input in iterator:
				1054	output = decoder.decode(input)
				1055	if output:
				1056	yield output
				1057	output = decoder.decode(b"", True)
				1058	if output:
				1059	yield output
				1060
				1061	### Helpers for charmap-based codecs
				1062
				1063	def make_identity_dict(rng):
				1064
				1065	""" make_identity_dict(rng) -> dict
				1066
				1067	Return a dictionary where elements of the rng sequence are
				1068	mapped to themselves.
				1069
				1070	"""
				1071	return {i:i for i in rng}
				1072
				1073	def make_encoding_map(decoding_map):
				1074
				1075	""" Creates an encoding map from a decoding map.
				1076
				1077	If a target mapping in the decoding map occurs multiple
				1078	times, then that target is mapped to None (undefined mapping),
				1079	causing an exception when encountered by the charmap codec
				1080	during translation.
				1081
				1082	One example where this happens is cp875.py which decodes
				1083	multiple character to \\u001a.
				1084
				1085	"""
				1086	m = {}
				1087	for k,v in decoding_map.items():
				1088	if not v in m:
				1089	m[v] = k
				1090	else:
				1091	m[v] = None
				1092	return m
				1093
				1094	### error handlers
				1095
				1096	try:
				1097	strict_errors = lookup_error("strict")
				1098	ignore_errors = lookup_error("ignore")
				1099	replace_errors = lookup_error("replace")
				1100	xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
				1101	backslashreplace_errors = lookup_error("backslashreplace")
				1102	namereplace_errors = lookup_error("namereplace")
				1103	except LookupError:
				1104	# In --disable-unicode builds, these error handler are missing
				1105	strict_errors = None
				1106	ignore_errors = None
				1107	replace_errors = None
				1108	xmlcharrefreplace_errors = None
				1109	backslashreplace_errors = None
				1110	namereplace_errors = None
				1111
				1112	# Tell modulefinder that using codecs probably needs the encodings
				1113	# package
				1114	_false = 0
				1115	if _false:
				1116	import encodings
				1117
				1118	### Tests
				1119
				1120	if __name__ == '__main__':
				1121
				1122	# Make stdout translate Latin-1 output into UTF-8 output
				1123	sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
				1124
				1125	# Have stdin translate Latin-1 input into UTF-8 input
				1126	sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')