Blame - libcpp/lex.c - toolchain/gcc-aarch64

blob: 23809bc4b0aafea79d5ccf645d0fe23da66198df [file] [log] [blame]

Bernhard Rosenkraenzer	c83ebe5	2012-09-18 21:38:03 +0159	[diff] [blame]	1	/* CPP Library - lexical analysis.
				2	Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010,
				3	2011, 2012 Free Software Foundation, Inc.
				4	Contributed by Per Bothner, 1994-95.
				5	Based on CCCP program by Paul Rubin, June 1986
				6	Adapted to ANSI C, Richard Stallman, Jan 1987
				7	Broken out to separate file, Zack Weinberg, Mar 2000
				8
				9	This program is free software; you can redistribute it and/or modify it
				10	under the terms of the GNU General Public License as published by the
				11	Free Software Foundation; either version 3, or (at your option) any
				12	later version.
				13
				14	This program is distributed in the hope that it will be useful,
				15	but WITHOUT ANY WARRANTY; without even the implied warranty of
				16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				17	GNU General Public License for more details.
				18
				19	You should have received a copy of the GNU General Public License
				20	along with this program; see the file COPYING3. If not see
				21	<http://www.gnu.org/licenses/>. */
				22
				23	#include "config.h"
				24	#include "system.h"
				25	#include "cpplib.h"
				26	#include "internal.h"
				27
				28	enum spell_type
				29	{
				30	SPELL_OPERATOR = 0,
				31	SPELL_IDENT,
				32	SPELL_LITERAL,
				33	SPELL_NONE
				34	};
				35
				36	struct token_spelling
				37	{
				38	enum spell_type category;
				39	const unsigned char *name;
				40	};
				41
				42	static const unsigned char *const digraph_spellings[] =
				43	{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
				44
				45	#define OP(e, s) { SPELL_OPERATOR, UC s },
				46	#define TK(e, s) { SPELL_ ## s, UC #e },
				47	static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
				48	#undef OP
				49	#undef TK
				50
				51	#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
				52	#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
				53
				54	static void add_line_note (cpp_buffer , const uchar , unsigned int);
				55	static int skip_line_comment (cpp_reader *);
				56	static void skip_whitespace (cpp_reader *, cppchar_t);
				57	static void lex_string (cpp_reader , cpp_token , const uchar *);
				58	static void save_comment (cpp_reader , cpp_token , const uchar *, cppchar_t);
				59	static void store_comment (cpp_reader , cpp_token );
				60	static void create_literal (cpp_reader , cpp_token , const uchar *,
				61	unsigned int, enum cpp_ttype);
				62	static bool warn_in_comment (cpp_reader , _cpp_line_note );
				63	static int name_p (cpp_reader , const cpp_string );
				64	static tokenrun next_tokenrun (tokenrun );
				65
				66	static _cpp_buff *new_buff (size_t);
				67
				68
				69	/* Utility routine:
				70
				71	Compares, the token TOKEN to the NUL-terminated string STRING.
				72	TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
				73	int
				74	cpp_ideq (const cpp_token token, const char string)
				75	{
				76	if (token->type != CPP_NAME)
				77	return 0;
				78
				79	return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
				80	}
				81
				82	/* Record a note TYPE at byte POS into the current cleaned logical
				83	line. */
				84	static void
				85	add_line_note (cpp_buffer buffer, const uchar pos, unsigned int type)
				86	{
				87	if (buffer->notes_used == buffer->notes_cap)
				88	{
				89	buffer->notes_cap = buffer->notes_cap * 2 + 200;
				90	buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
				91	buffer->notes_cap);
				92	}
				93
				94	buffer->notes[buffer->notes_used].pos = pos;
				95	buffer->notes[buffer->notes_used].type = type;
				96	buffer->notes_used++;
				97	}
				98
				99
				100	/* Fast path to find line special characters using optimized character
				101	scanning algorithms. Anything complicated falls back to the slow
				102	path below. Since this loop is very hot it's worth doing these kinds
				103	of optimizations.
				104
				105	One of the paths through the ifdefs should provide
				106
				107	const uchar search_line_fast (const uchar s, const uchar *end);
				108
				109	Between S and END, search for \n, \r, \\, ?. Return a pointer to
				110	the found character.
				111
				112	Note that the last character of the buffer is always a newline,
				113	as forced by _cpp_convert_input. This fact can be used to avoid
				114	explicitly looking for the end of the buffer. */
				115
				116	/* Configure gives us an ifdef test. */
				117	#ifndef WORDS_BIGENDIAN
				118	#define WORDS_BIGENDIAN 0
				119	#endif
				120
				121	/* We'd like the largest integer that fits into a register. There's nothing
				122	in <stdint.h> that gives us that. For most hosts this is unsigned long,
				123	but MS decided on an LLP64 model. Thankfully when building with GCC we
				124	can get the "real" word size. */
				125	#ifdef __GNUC__
				126	typedef unsigned int word_type __attribute__((__mode__(__word__)));
				127	#else
				128	typedef unsigned long word_type;
				129	#endif
				130
				131	/* The code below is only expecting sizes 4 or 8.
				132	Die at compile-time if this expectation is violated. */
				133	typedef char check_word_type_size
				134	[(sizeof(word_type) == 8 \|\| sizeof(word_type) == 4) * 2 - 1];
				135
				136	/* Return X with the first N bytes forced to values that won't match one
				137	of the interesting characters. Note that NUL is not interesting. */
				138
				139	static inline word_type
				140	acc_char_mask_misalign (word_type val, unsigned int n)
				141	{
				142	word_type mask = -1;
				143	if (WORDS_BIGENDIAN)
				144	mask >>= n * 8;
				145	else
				146	mask <<= n * 8;
				147	return val & mask;
				148	}
				149
				150	/* Return X replicated to all byte positions within WORD_TYPE. */
				151
				152	static inline word_type
				153	acc_char_replicate (uchar x)
				154	{
				155	word_type ret;
				156
				157	ret = (x << 24) \| (x << 16) \| (x << 8) \| x;
				158	if (sizeof(word_type) == 8)
				159	ret = (ret << 16 << 16) \| ret;
				160	return ret;
				161	}
				162
				163	/* Return non-zero if some byte of VAL is (probably) C. */
				164
				165	static inline word_type
				166	acc_char_cmp (word_type val, word_type c)
				167	{
				168	#if defined(__GNUC__) && defined(__alpha__)
				169	/* We can get exact results using a compare-bytes instruction.
				170	Get (val == c) via (0 >= (val ^ c)). */
				171	return __builtin_alpha_cmpbge (0, val ^ c);
				172	#else
				173	word_type magic = 0x7efefefeU;
				174	if (sizeof(word_type) == 8)
				175	magic = (magic << 16 << 16) \| 0xfefefefeU;
				176	magic \|= 1;
				177
				178	val ^= c;
				179	return ((val + magic) ^ ~val) & ~magic;
				180	#endif
				181	}
				182
				183	/* Given the result of acc_char_cmp is non-zero, return the index of
				184	the found character. If this was a false positive, return -1. */
				185
				186	static inline int
				187	acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
				188	word_type val ATTRIBUTE_UNUSED)
				189	{
				190	#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
				191	/* The cmpbge instruction sets bits of the result corresponding to
				192	matches in the bytes with no false positives. */
				193	return __builtin_ctzl (cmp);
				194	#else
				195	unsigned int i;
				196
				197	/* ??? It would be nice to force unrolling here,
				198	and have all of these constants folded. */
				199	for (i = 0; i < sizeof(word_type); ++i)
				200	{
				201	uchar c;
				202	if (WORDS_BIGENDIAN)
				203	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
				204	else
				205	c = (val >> i * 8) & 0xff;
				206
				207	if (c == '\n' \|\| c == '\r' \|\| c == '\\' \|\| c == '?')
				208	return i;
				209	}
				210
				211	return -1;
				212	#endif
				213	}
				214
				215	/* A version of the fast scanner using bit fiddling techniques.
				216
				217	For 32-bit words, one would normally perform 16 comparisons and
				218	16 branches. With this algorithm one performs 24 arithmetic
				219	operations and one branch. Whether this is faster with a 32-bit
				220	word size is going to be somewhat system dependent.
				221
				222	For 64-bit words, we eliminate twice the number of comparisons
				223	and branches without increasing the number of arithmetic operations.
				224	It's almost certainly going to be a win with 64-bit word size. */
				225
				226	static const uchar * search_line_acc_char (const uchar , const uchar )
				227	ATTRIBUTE_UNUSED;
				228
				229	static const uchar *
				230	search_line_acc_char (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				231	{
				232	const word_type repl_nl = acc_char_replicate ('\n');
				233	const word_type repl_cr = acc_char_replicate ('\r');
				234	const word_type repl_bs = acc_char_replicate ('\\');
				235	const word_type repl_qm = acc_char_replicate ('?');
				236
				237	unsigned int misalign;
				238	const word_type *p;
				239	word_type val, t;
				240
				241	/* Align the buffer. Mask out any bytes from before the beginning. */
				242	p = (word_type *)((uintptr_t)s & -sizeof(word_type));
				243	val = *p;
				244	misalign = (uintptr_t)s & (sizeof(word_type) - 1);
				245	if (misalign)
				246	val = acc_char_mask_misalign (val, misalign);
				247
				248	/* Main loop. */
				249	while (1)
				250	{
				251	t = acc_char_cmp (val, repl_nl);
				252	t \|= acc_char_cmp (val, repl_cr);
				253	t \|= acc_char_cmp (val, repl_bs);
				254	t \|= acc_char_cmp (val, repl_qm);
				255
				256	if (__builtin_expect (t != 0, 0))
				257	{
				258	int i = acc_char_index (t, val);
				259	if (i >= 0)
				260	return (const uchar *)p + i;
				261	}
				262
				263	val = *++p;
				264	}
				265	}
				266
				267	/* Disable on Solaris 2/x86 until the following problems can be properly
				268	autoconfed:
				269
				270	The Solaris 9 assembler cannot assemble SSE4.2 insns.
				271	Before Solaris 9 Update 6, SSE insns cannot be executed.
				272	The Solaris 10+ assembler tags objects with the instruction set
				273	extensions used, so SSE4.2 executables cannot run on machines that
				274	don't support that extension. */
				275
				276	#if (GCC_VERSION >= 4005) && (defined(__i386__) \|\| defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
				277
				278	/* Replicated character data to be shared between implementations.
				279	Recall that outside of a context with vector support we can't
				280	define compatible vector types, therefore these are all defined
				281	in terms of raw characters. */
				282	static const char repl_chars[4][16] __attribute__((aligned(16))) = {
				283	{ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				284	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
				285	{ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				286	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
				287	{ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				288	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
				289	{ '?', '?', '?', '?', '?', '?', '?', '?',
				290	'?', '?', '?', '?', '?', '?', '?', '?' },
				291	};
				292
				293	/* A version of the fast scanner using MMX vectorized byte compare insns.
				294
				295	This uses the PMOVMSKB instruction which was introduced with "MMX2",
				296	which was packaged into SSE1; it is also present in the AMD MMX
				297	extension. Mark the function as using "sse" so that we emit a real
				298	"emms" instruction, rather than the 3dNOW "femms" instruction. */
				299
				300	static const uchar *
				301	#ifndef __SSE__
				302	__attribute__((__target__("sse")))
				303	#endif
				304	search_line_mmx (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				305	{
				306	typedef char v8qi __attribute__ ((__vector_size__ (8)));
				307	typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
				308
				309	const v8qi repl_nl = (const v8qi )repl_chars[0];
				310	const v8qi repl_cr = (const v8qi )repl_chars[1];
				311	const v8qi repl_bs = (const v8qi )repl_chars[2];
				312	const v8qi repl_qm = (const v8qi )repl_chars[3];
				313
				314	unsigned int misalign, found, mask;
				315	const v8qi *p;
				316	v8qi data, t, c;
				317
				318	/* Align the source pointer. While MMX doesn't generate unaligned data
				319	faults, this allows us to safely scan to the end of the buffer without
				320	reading beyond the end of the last page. */
				321	misalign = (uintptr_t)s & 7;
				322	p = (const v8qi *)((uintptr_t)s & -8);
				323	data = *p;
				324
				325	/* Create a mask for the bytes that are valid within the first
				326	16-byte block. The Idea here is that the AND with the mask
				327	within the loop is "free", since we need some AND or TEST
				328	insn in order to set the flags for the branch anyway. */
				329	mask = -1u << misalign;
				330
				331	/* Main loop processing 8 bytes at a time. */
				332	goto start;
				333	do
				334	{
				335	data = *++p;
				336	mask = -1;
				337
				338	start:
				339	t = __builtin_ia32_pcmpeqb(data, repl_nl);
				340	c = __builtin_ia32_pcmpeqb(data, repl_cr);
				341	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				342	c = __builtin_ia32_pcmpeqb(data, repl_bs);
				343	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				344	c = __builtin_ia32_pcmpeqb(data, repl_qm);
				345	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				346	found = __builtin_ia32_pmovmskb (t);
				347	found &= mask;
				348	}
				349	while (!found);
				350
				351	__builtin_ia32_emms ();
				352
				353	/* FOUND contains 1 in bits for which we matched a relevant
				354	character. Conversion to the byte index is trivial. */
				355	found = __builtin_ctz(found);
				356	return (const uchar *)p + found;
				357	}
				358
				359	/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
				360
				361	static const uchar *
				362	#ifndef __SSE2__
				363	__attribute__((__target__("sse2")))
				364	#endif
				365	search_line_sse2 (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				366	{
				367	typedef char v16qi __attribute__ ((__vector_size__ (16)));
				368
				369	const v16qi repl_nl = (const v16qi )repl_chars[0];
				370	const v16qi repl_cr = (const v16qi )repl_chars[1];
				371	const v16qi repl_bs = (const v16qi )repl_chars[2];
				372	const v16qi repl_qm = (const v16qi )repl_chars[3];
				373
				374	unsigned int misalign, found, mask;
				375	const v16qi *p;
				376	v16qi data, t;
				377
				378	/* Align the source pointer. */
				379	misalign = (uintptr_t)s & 15;
				380	p = (const v16qi *)((uintptr_t)s & -16);
				381	data = *p;
				382
				383	/* Create a mask for the bytes that are valid within the first
				384	16-byte block. The Idea here is that the AND with the mask
				385	within the loop is "free", since we need some AND or TEST
				386	insn in order to set the flags for the branch anyway. */
				387	mask = -1u << misalign;
				388
				389	/* Main loop processing 16 bytes at a time. */
				390	goto start;
				391	do
				392	{
				393	data = *++p;
				394	mask = -1;
				395
				396	start:
				397	t = __builtin_ia32_pcmpeqb128(data, repl_nl);
				398	t \|= __builtin_ia32_pcmpeqb128(data, repl_cr);
				399	t \|= __builtin_ia32_pcmpeqb128(data, repl_bs);
				400	t \|= __builtin_ia32_pcmpeqb128(data, repl_qm);
				401	found = __builtin_ia32_pmovmskb128 (t);
				402	found &= mask;
				403	}
				404	while (!found);
				405
				406	/* FOUND contains 1 in bits for which we matched a relevant
				407	character. Conversion to the byte index is trivial. */
				408	found = __builtin_ctz(found);
				409	return (const uchar *)p + found;
				410	}
				411
				412	#ifdef HAVE_SSE4
				413	/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
				414
				415	static const uchar *
				416	#ifndef __SSE4_2__
				417	__attribute__((__target__("sse4.2")))
				418	#endif
				419	search_line_sse42 (const uchar s, const uchar end)
				420	{
				421	typedef char v16qi __attribute__ ((__vector_size__ (16)));
				422	static const v16qi search = { '\n', '\r', '?', '\\' };
				423
				424	uintptr_t si = (uintptr_t)s;
				425	uintptr_t index;
				426
				427	/* Check for unaligned input. */
				428	if (si & 15)
				429	{
				430	v16qi sv;
				431
				432	if (__builtin_expect (end - s < 16, 0)
				433	&& __builtin_expect ((si & 0xfff) > 0xff0, 0))
				434	{
				435	/* There are less than 16 bytes left in the buffer, and less
				436	than 16 bytes left on the page. Reading 16 bytes at this
				437	point might generate a spurious page fault. Defer to the
				438	SSE2 implementation, which already handles alignment. */
				439	return search_line_sse2 (s, end);
				440	}
				441
				442	/* ??? The builtin doesn't understand that the PCMPESTRI read from
				443	memory need not be aligned. */
				444	sv = __builtin_ia32_loaddqu ((const char *) s);
				445	index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
				446
				447	if (__builtin_expect (index < 16, 0))
				448	goto found;
				449
				450	/* Advance the pointer to an aligned address. We will re-scan a
				451	few bytes, but we no longer need care for reading past the
				452	end of a page, since we're guaranteed a match. */
				453	s = (const uchar *)((si + 16) & -16);
				454	}
				455
				456	/* Main loop, processing 16 bytes at a time. By doing the whole loop
				457	in inline assembly, we can make proper use of the flags set. */
				458	__asm ( "sub $16, %1\n"
				459	" .balign 16\n"
				460	"0: add $16, %1\n"
				461	" %vpcmpestri $0, (%1), %2\n"
				462	" jnc 0b"
				463	: "=&c"(index), "+r"(s)
				464	: "x"(search), "a"(4), "d"(16));
				465
				466	found:
				467	return s + index;
				468	}
				469
				470	#else
				471	/* Work around out-dated assemblers without sse4 support. */
				472	#define search_line_sse42 search_line_sse2
				473	#endif
				474
				475	/* Check the CPU capabilities. */
				476
				477	#include "../gcc/config/i386/cpuid.h"
				478
				479	typedef const uchar * (search_line_fast_type) (const uchar , const uchar *);
				480	static search_line_fast_type search_line_fast;
				481
				482	#define HAVE_init_vectorized_lexer 1
				483	static inline void
				484	init_vectorized_lexer (void)
				485	{
				486	unsigned dummy, ecx = 0, edx = 0;
				487	search_line_fast_type impl = search_line_acc_char;
				488	int minimum = 0;
				489
				490	#if defined(__SSE4_2__)
				491	minimum = 3;
				492	#elif defined(__SSE2__)
				493	minimum = 2;
				494	#elif defined(__SSE__)
				495	minimum = 1;
				496	#endif
				497
				498	if (minimum == 3)
				499	impl = search_line_sse42;
				500	else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) \|\| minimum == 2)
				501	{
				502	if (minimum == 3 \|\| (ecx & bit_SSE4_2))
				503	impl = search_line_sse42;
				504	else if (minimum == 2 \|\| (edx & bit_SSE2))
				505	impl = search_line_sse2;
				506	else if (minimum == 1 \|\| (edx & bit_SSE))
				507	impl = search_line_mmx;
				508	}
				509	else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
				510	{
				511	if (minimum == 1
				512	\|\| (edx & (bit_MMXEXT \| bit_CMOV)) == (bit_MMXEXT \| bit_CMOV))
				513	impl = search_line_mmx;
				514	}
				515
				516	search_line_fast = impl;
				517	}
				518
				519	#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__)
				520
				521	/* A vection of the fast scanner using AltiVec vectorized byte compares. */
				522	/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
				523	so we can't compile this function without -maltivec on the command line
				524	(or implied by some other switch). */
				525
				526	static const uchar *
				527	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				528	{
				529	typedef __attribute__((altivec(vector))) unsigned char vc;
				530
				531	const vc repl_nl = {
				532	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				533	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
				534	};
				535	const vc repl_cr = {
				536	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				537	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
				538	};
				539	const vc repl_bs = {
				540	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				541	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
				542	};
				543	const vc repl_qm = {
				544	'?', '?', '?', '?', '?', '?', '?', '?',
				545	'?', '?', '?', '?', '?', '?', '?', '?',
				546	};
				547	const vc ones = {
				548	-1, -1, -1, -1, -1, -1, -1, -1,
				549	-1, -1, -1, -1, -1, -1, -1, -1,
				550	};
				551	const vc zero = { 0 };
				552
				553	vc data, mask, t;
				554
				555	/* Altivec loads automatically mask addresses with -16. This lets us
				556	issue the first load as early as possible. */
				557	data = __builtin_vec_ld(0, (const vc *)s);
				558
				559	/* Discard bytes before the beginning of the buffer. Do this by
				560	beginning with all ones and shifting in zeros according to the
				561	mis-alignment. The LVSR instruction pulls the exact shift we
				562	want from the address. */
				563	mask = __builtin_vec_lvsr(0, s);
				564	mask = __builtin_vec_perm(zero, ones, mask);
				565	data &= mask;
				566
				567	/* While altivec loads mask addresses, we still need to align S so
				568	that the offset we compute at the end is correct. */
				569	s = (const uchar *)((uintptr_t)s & -16);
				570
				571	/* Main loop processing 16 bytes at a time. */
				572	goto start;
				573	do
				574	{
				575	vc m_nl, m_cr, m_bs, m_qm;
				576
				577	s += 16;
				578	data = __builtin_vec_ld(0, (const vc *)s);
				579
				580	start:
				581	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
				582	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
				583	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
				584	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
				585	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
				586
				587	/* T now contains 0xff in bytes for which we matched one of the relevant
				588	characters. We want to exit the loop if any byte in T is non-zero.
				589	Below is the expansion of vec_any_ne(t, zero). */
				590	}
				591	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/3, t, zero));
				592
				593	{
				594	#define N (sizeof(vc) / sizeof(long))
				595
				596	union {
				597	vc v;
				598	/* Statically assert that N is 2 or 4. */
				599	unsigned long l[(N == 2 \|\| N == 4) ? N : -1];
				600	} u;
				601	unsigned long l, i = 0;
				602
				603	u.v = t;
				604
				605	/* Find the first word of T that is non-zero. */
				606	switch (N)
				607	{
				608	case 4:
				609	l = u.l[i++];
				610	if (l != 0)
				611	break;
				612	s += sizeof(unsigned long);
				613	l = u.l[i++];
				614	if (l != 0)
				615	break;
				616	s += sizeof(unsigned long);
				617	case 2:
				618	l = u.l[i++];
				619	if (l != 0)
				620	break;
				621	s += sizeof(unsigned long);
				622	l = u.l[i];
				623	}
				624
				625	/* L now contains 0xff in bytes for which we matched one of the
				626	relevant characters. We can find the byte index by finding
				627	its bit index and dividing by 8. */
				628	l = __builtin_clzl(l) >> 3;
				629	return s + l;
				630
				631	#undef N
				632	}
				633	}
				634
				635	#elif defined (__ARM_NEON__)
				636	#include "arm_neon.h"
				637
				638	static const uchar *
				639	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				640	{
				641	const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
				642	const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
				643	const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
				644	const uint8x16_t repl_qm = vdupq_n_u8 ('?');
				645	const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
				646
				647	unsigned int misalign, found, mask;
				648	const uint8_t *p;
				649	uint8x16_t data;
				650
				651	/* Align the source pointer. */
				652	misalign = (uintptr_t)s & 15;
				653	p = (const uint8_t *)((uintptr_t)s & -16);
				654	data = vld1q_u8 (p);
				655
				656	/* Create a mask for the bytes that are valid within the first
				657	16-byte block. The Idea here is that the AND with the mask
				658	within the loop is "free", since we need some AND or TEST
				659	insn in order to set the flags for the branch anyway. */
				660	mask = (-1u << misalign) & 0xffff;
				661
				662	/* Main loop, processing 16 bytes at a time. */
				663	goto start;
				664
				665	do
				666	{
				667	uint8x8_t l;
				668	uint16x4_t m;
				669	uint32x2_t n;
				670	uint8x16_t t, u, v, w;
				671
				672	p += 16;
				673	data = vld1q_u8 (p);
				674	mask = 0xffff;
				675
				676	start:
				677	t = vceqq_u8 (data, repl_nl);
				678	u = vceqq_u8 (data, repl_cr);
				679	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
				680	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
				681	t = vandq_u8 (vorrq_u8 (v, w), xmask);
				682	l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
				683	m = vpaddl_u8 (l);
				684	n = vpaddl_u16 (m);
				685
				686	found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
				687	vshr_n_u64 ((uint64x1_t) n, 24)), 0);
				688	found &= mask;
				689	}
				690	while (!found);
				691
				692	/* FOUND contains 1 in bits for which we matched a relevant
				693	character. Conversion to the byte index is trivial. */
				694	found = __builtin_ctz (found);
				695	return (const uchar *)p + found;
				696	}
				697
				698	#else
				699
				700	/* We only have one accellerated alternative. Use a direct call so that
				701	we encourage inlining. */
				702
				703	#define search_line_fast search_line_acc_char
				704
				705	#endif
				706
				707	/* Initialize the lexer if needed. */
				708
				709	void
				710	_cpp_init_lexer (void)
				711	{
				712	#ifdef HAVE_init_vectorized_lexer
				713	init_vectorized_lexer ();
				714	#endif
				715	}
				716
				717	/* Returns with a logical line that contains no escaped newlines or
				718	trigraphs. This is a time-critical inner loop. */
				719	void
				720	_cpp_clean_line (cpp_reader *pfile)
				721	{
				722	cpp_buffer *buffer;
				723	const uchar *s;
				724	uchar c, d, p;
				725
				726	buffer = pfile->buffer;
				727	buffer->cur_note = buffer->notes_used = 0;
				728	buffer->cur = buffer->line_base = buffer->next_line;
				729	buffer->need_line = false;
				730	s = buffer->next_line;
				731
				732	if (!buffer->from_stage3)
				733	{
				734	const uchar *pbackslash = NULL;
				735
				736	/* Fast path. This is the common case of an un-escaped line with
				737	no trigraphs. The primary win here is by not writing any
				738	data back to memory until we have to. */
				739	while (1)
				740	{
				741	/* Perform an optimized search for \n, \r, \\, ?. */
				742	s = search_line_fast (s, buffer->rlimit);
				743
				744	c = *s;
				745	if (c == '\\')
				746	{
				747	/* Record the location of the backslash and continue. */
				748	pbackslash = s++;
				749	}
				750	else if (__builtin_expect (c == '?', 0))
				751	{
				752	if (__builtin_expect (s[1] == '?', false)
				753	&& _cpp_trigraph_map[s[2]])
				754	{
				755	/* Have a trigraph. We may or may not have to convert
				756	it. Add a line note regardless, for -Wtrigraphs. */
				757	add_line_note (buffer, s, s[2]);
				758	if (CPP_OPTION (pfile, trigraphs))
				759	{
				760	/* We do, and that means we have to switch to the
				761	slow path. */
				762	d = (uchar *) s;
				763	*d = _cpp_trigraph_map[s[2]];
				764	s += 2;
				765	goto slow_path;
				766	}
				767	}
				768	/* Not a trigraph. Continue on fast-path. */
				769	s++;
				770	}
				771	else
				772	break;
				773	}
				774
				775	/* This must be \r or \n. We're either done, or we'll be forced
				776	to write back to the buffer and continue on the slow path. */
				777	d = (uchar *) s;
				778
				779	if (__builtin_expect (s == buffer->rlimit, false))
				780	goto done;
				781
				782	/* DOS line ending? */
				783	if (__builtin_expect (c == '\r', false) && s[1] == '\n')
				784	{
				785	s++;
				786	if (s == buffer->rlimit)
				787	goto done;
				788	}
				789
				790	if (__builtin_expect (pbackslash == NULL, true))
				791	goto done;
				792
				793	/* Check for escaped newline. */
				794	p = d;
				795	while (is_nvspace (p[-1]))
				796	p--;
				797	if (p - 1 != pbackslash)
				798	goto done;
				799
				800	/* Have an escaped newline; process it and proceed to
				801	the slow path. */
				802	add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
				803	d = p - 2;
				804	buffer->next_line = p - 1;
				805
				806	slow_path:
				807	while (1)
				808	{
				809	c = *++s;
				810	*++d = c;
				811
				812	if (c == '\n' \|\| c == '\r')
				813	{
				814	/* Handle DOS line endings. */
				815	if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
				816	s++;
				817	if (s == buffer->rlimit)
				818	break;
				819
				820	/* Escaped? */
				821	p = d;
				822	while (p != buffer->next_line && is_nvspace (p[-1]))
				823	p--;
				824	if (p == buffer->next_line \|\| p[-1] != '\\')
				825	break;
				826
				827	add_line_note (buffer, p - 1, p != d ? ' ': '\\');
				828	d = p - 2;
				829	buffer->next_line = p - 1;
				830	}
				831	else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
				832	{
				833	/* Add a note regardless, for the benefit of -Wtrigraphs. */
				834	add_line_note (buffer, d, s[2]);
				835	if (CPP_OPTION (pfile, trigraphs))
				836	{
				837	*d = _cpp_trigraph_map[s[2]];
				838	s += 2;
				839	}
				840	}
				841	}
				842	}
				843	else
				844	{
				845	while (s != '\n' && s != '\r')
				846	s++;
				847	d = (uchar *) s;
				848
				849	/* Handle DOS line endings. */
				850	if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
				851	s++;
				852	}
				853
				854	done:
				855	*d = '\n';
				856	/* A sentinel note that should never be processed. */
				857	add_line_note (buffer, d + 1, '\n');
				858	buffer->next_line = s + 1;
				859	}
				860
				861	/* Return true if the trigraph indicated by NOTE should be warned
				862	about in a comment. */
				863	static bool
				864	warn_in_comment (cpp_reader pfile, _cpp_line_note note)
				865	{
				866	const uchar *p;
				867
				868	/* Within comments we don't warn about trigraphs, unless the
				869	trigraph forms an escaped newline, as that may change
				870	behavior. */
				871	if (note->type != '/')
				872	return false;
				873
				874	/* If -trigraphs, then this was an escaped newline iff the next note
				875	is coincident. */
				876	if (CPP_OPTION (pfile, trigraphs))
				877	return note[1].pos == note->pos;
				878
				879	/* Otherwise, see if this forms an escaped newline. */
				880	p = note->pos + 3;
				881	while (is_nvspace (*p))
				882	p++;
				883
				884	/* There might have been escaped newlines between the trigraph and the
				885	newline we found. Hence the position test. */
				886	return (*p == '\n' && p < note[1].pos);
				887	}
				888
				889	/* Process the notes created by add_line_note as far as the current
				890	location. */
				891	void
				892	_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
				893	{
				894	cpp_buffer *buffer = pfile->buffer;
				895
				896	for (;;)
				897	{
				898	_cpp_line_note *note = &buffer->notes[buffer->cur_note];
				899	unsigned int col;
				900
				901	if (note->pos > buffer->cur)
				902	break;
				903
				904	buffer->cur_note++;
				905	col = CPP_BUF_COLUMN (buffer, note->pos + 1);
				906
				907	if (note->type == '\\' \|\| note->type == ' ')
				908	{
				909	if (note->type == ' ' && !in_comment)
				910	cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
				911	"backslash and newline separated by space");
				912
				913	if (buffer->next_line > buffer->rlimit)
				914	{
				915	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
				916	"backslash-newline at end of file");
				917	/* Prevent "no newline at end of file" warning. */
				918	buffer->next_line = buffer->rlimit;
				919	}
				920
				921	buffer->line_base = note->pos;
				922	CPP_INCREMENT_LINE (pfile, 0);
				923	}
				924	else if (_cpp_trigraph_map[note->type])
				925	{
				926	if (CPP_OPTION (pfile, warn_trigraphs)
				927	&& (!in_comment \|\| warn_in_comment (pfile, note)))
				928	{
				929	if (CPP_OPTION (pfile, trigraphs))
				930	cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
				931	pfile->line_table->highest_line, col,
				932	"trigraph ??%c converted to %c",
				933	note->type,
				934	(int) _cpp_trigraph_map[note->type]);
				935	else
				936	{
				937	cpp_warning_with_line
				938	(pfile, CPP_W_TRIGRAPHS,
				939	pfile->line_table->highest_line, col,
				940	"trigraph ??%c ignored, use -trigraphs to enable",
				941	note->type);
				942	}
				943	}
				944	}
				945	else if (note->type == 0)
				946	/* Already processed in lex_raw_string. */;
				947	else
				948	abort ();
				949	}
				950	}
				951
				952	/* Skip a C-style block comment. We find the end of the comment by
				953	seeing if an asterisk is before every '/' we encounter. Returns
				954	nonzero if comment terminated by EOF, zero otherwise.
				955
				956	Buffer->cur points to the initial asterisk of the comment. */
				957	bool
				958	_cpp_skip_block_comment (cpp_reader *pfile)
				959	{
				960	cpp_buffer *buffer = pfile->buffer;
				961	const uchar *cur = buffer->cur;
				962	uchar c;
				963
				964	cur++;
				965	if (*cur == '/')
				966	cur++;
				967
				968	for (;;)
				969	{
				970	/* People like decorating comments with '*', so check for '/'
				971	instead for efficiency. */
				972	c = *cur++;
				973
				974	if (c == '/')
				975	{
				976	if (cur[-2] == '*')
				977	break;
				978
				979	/* Warn about potential nested comments, but not if the '/'
				980	comes immediately before the true comment delimiter.
				981	Don't bother to get it right across escaped newlines. */
				982	if (CPP_OPTION (pfile, warn_comments)
				983	&& cur[0] == '*' && cur[1] != '/')
				984	{
				985	buffer->cur = cur;
				986	cpp_warning_with_line (pfile, CPP_W_COMMENTS,
				987	pfile->line_table->highest_line,
				988	CPP_BUF_COL (buffer),
				989	"\"/*\" within comment");
				990	}
				991	}
				992	else if (c == '\n')
				993	{
				994	unsigned int cols;
				995	buffer->cur = cur - 1;
				996	_cpp_process_line_notes (pfile, true);
				997	if (buffer->next_line >= buffer->rlimit)
				998	return true;
				999	_cpp_clean_line (pfile);
				1000
				1001	cols = buffer->next_line - buffer->line_base;
				1002	CPP_INCREMENT_LINE (pfile, cols);
				1003
				1004	cur = buffer->cur;
				1005	}
				1006	}
				1007
				1008	buffer->cur = cur;
				1009	_cpp_process_line_notes (pfile, true);
				1010	return false;
				1011	}
				1012
				1013	/* Skip a C++ line comment, leaving buffer->cur pointing to the
				1014	terminating newline. Handles escaped newlines. Returns nonzero
				1015	if a multiline comment. */
				1016	static int
				1017	skip_line_comment (cpp_reader *pfile)
				1018	{
				1019	cpp_buffer *buffer = pfile->buffer;
				1020	source_location orig_line = pfile->line_table->highest_line;
				1021
				1022	while (*buffer->cur != '\n')
				1023	buffer->cur++;
				1024
				1025	_cpp_process_line_notes (pfile, true);
				1026	return orig_line != pfile->line_table->highest_line;
				1027	}
				1028
				1029	/* Skips whitespace, saving the next non-whitespace character. */
				1030	static void
				1031	skip_whitespace (cpp_reader *pfile, cppchar_t c)
				1032	{
				1033	cpp_buffer *buffer = pfile->buffer;
				1034	bool saw_NUL = false;
				1035
				1036	do
				1037	{
				1038	/* Horizontal space always OK. */
				1039	if (c == ' ' \|\| c == '\t')
				1040	;
				1041	/* Just \f \v or \0 left. */
				1042	else if (c == '\0')
				1043	saw_NUL = true;
				1044	else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
				1045	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
				1046	CPP_BUF_COL (buffer),
				1047	"%s in preprocessing directive",
				1048	c == '\f' ? "form feed" : "vertical tab");
				1049
				1050	c = *buffer->cur++;
				1051	}
				1052	/* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
				1053	while (is_nvspace (c));
				1054
				1055	if (saw_NUL)
				1056	cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
				1057
				1058	buffer->cur--;
				1059	}
				1060
				1061	/* See if the characters of a number token are valid in a name (no
				1062	'.', '+' or '-'). */
				1063	static int
				1064	name_p (cpp_reader pfile, const cpp_string string)
				1065	{
				1066	unsigned int i;
				1067
				1068	for (i = 0; i < string->len; i++)
				1069	if (!is_idchar (string->text[i]))
				1070	return 0;
				1071
				1072	return 1;
				1073	}
				1074
				1075	/* After parsing an identifier or other sequence, produce a warning about
				1076	sequences not in NFC/NFKC. */
				1077	static void
				1078	warn_about_normalization (cpp_reader *pfile,
				1079	const cpp_token *token,
				1080	const struct normalize_state *s)
				1081	{
				1082	if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
				1083	&& !pfile->state.skipping)
				1084	{
				1085	/* Make sure that the token is printed using UCNs, even
				1086	if we'd otherwise happily print UTF-8. */
				1087	unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
				1088	size_t sz;
				1089
				1090	sz = cpp_spell_token (pfile, token, buf, false) - buf;
				1091	if (NORMALIZE_STATE_RESULT (s) == normalized_C)
				1092	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
				1093	"`%.*s' is not in NFKC", (int) sz, buf);
				1094	else
				1095	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
				1096	"`%.*s' is not in NFC", (int) sz, buf);
Bernhard Rosenkraenzer	7d3ad0b	2012-10-23 01:39:53 +0159	[diff] [blame^]	1097	free (buf);
Bernhard Rosenkraenzer	c83ebe5	2012-09-18 21:38:03 +0159	[diff] [blame]	1098	}
				1099	}
				1100
				1101	/* Returns TRUE if the sequence starting at buffer->cur is invalid in
				1102	an identifier. FIRST is TRUE if this starts an identifier. */
				1103	static bool
				1104	forms_identifier_p (cpp_reader *pfile, int first,
				1105	struct normalize_state *state)
				1106	{
				1107	cpp_buffer *buffer = pfile->buffer;
				1108
				1109	if (*buffer->cur == '$')
				1110	{
				1111	if (!CPP_OPTION (pfile, dollars_in_ident))
				1112	return false;
				1113
				1114	buffer->cur++;
				1115	if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
				1116	{
				1117	CPP_OPTION (pfile, warn_dollars) = 0;
				1118	cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
				1119	}
				1120
				1121	return true;
				1122	}
				1123
				1124	/* Is this a syntactically valid UCN? */
				1125	if (CPP_OPTION (pfile, extended_identifiers)
				1126	&& *buffer->cur == '\\'
				1127	&& (buffer->cur[1] == 'u' \|\| buffer->cur[1] == 'U'))
				1128	{
				1129	buffer->cur += 2;
				1130	if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
				1131	state))
				1132	return true;
				1133	buffer->cur -= 2;
				1134	}
				1135
				1136	return false;
				1137	}
				1138
				1139	/* Helper function to get the cpp_hashnode of the identifier BASE. */
				1140	static cpp_hashnode *
				1141	lex_identifier_intern (cpp_reader pfile, const uchar base)
				1142	{
				1143	cpp_hashnode *result;
				1144	const uchar *cur;
				1145	unsigned int len;
				1146	unsigned int hash = HT_HASHSTEP (0, *base);
				1147
				1148	cur = base + 1;
				1149	while (ISIDNUM (*cur))
				1150	{
				1151	hash = HT_HASHSTEP (hash, *cur);
				1152	cur++;
				1153	}
				1154	len = cur - base;
				1155	hash = HT_HASHFINISH (hash, len);
				1156	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1157	base, len, hash, HT_ALLOC));
				1158
				1159	/* Rarely, identifiers require diagnostics when lexed. */
				1160	if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
				1161	&& !pfile->state.skipping, 0))
				1162	{
				1163	/* It is allowed to poison the same identifier twice. */
				1164	if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
				1165	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
				1166	NODE_NAME (result));
				1167
				1168	/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
				1169	replacement list of a variadic macro. */
				1170	if (result == pfile->spec_nodes.n__VA_ARGS__
				1171	&& !pfile->state.va_args_ok)
				1172	cpp_error (pfile, CPP_DL_PEDWARN,
				1173	"__VA_ARGS__ can only appear in the expansion"
				1174	" of a C99 variadic macro");
				1175
				1176	/* For -Wc++-compat, warn about use of C++ named operators. */
				1177	if (result->flags & NODE_WARN_OPERATOR)
				1178	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
				1179	"identifier \"%s\" is a special operator name in C++",
				1180	NODE_NAME (result));
				1181	}
				1182
				1183	return result;
				1184	}
				1185
				1186	/* Get the cpp_hashnode of an identifier specified by NAME in
				1187	the current cpp_reader object. If none is found, NULL is returned. */
				1188	cpp_hashnode *
				1189	_cpp_lex_identifier (cpp_reader pfile, const char name)
				1190	{
				1191	cpp_hashnode *result;
				1192	result = lex_identifier_intern (pfile, (uchar *) name);
				1193	return result;
				1194	}
				1195
				1196	/* Lex an identifier starting at BUFFER->CUR - 1. */
				1197	static cpp_hashnode *
				1198	lex_identifier (cpp_reader pfile, const uchar base, bool starts_ucn,
				1199	struct normalize_state *nst)
				1200	{
				1201	cpp_hashnode *result;
				1202	const uchar *cur;
				1203	unsigned int len;
				1204	unsigned int hash = HT_HASHSTEP (0, *base);
				1205
				1206	cur = pfile->buffer->cur;
				1207	if (! starts_ucn)
				1208	while (ISIDNUM (*cur))
				1209	{
				1210	hash = HT_HASHSTEP (hash, *cur);
				1211	cur++;
				1212	}
				1213	pfile->buffer->cur = cur;
				1214	if (starts_ucn \|\| forms_identifier_p (pfile, false, nst))
				1215	{
				1216	/* Slower version for identifiers containing UCNs (or $). */
				1217	do {
				1218	while (ISIDNUM (*pfile->buffer->cur))
				1219	{
				1220	pfile->buffer->cur++;
				1221	NORMALIZE_STATE_UPDATE_IDNUM (nst);
				1222	}
				1223	} while (forms_identifier_p (pfile, false, nst));
				1224	result = _cpp_interpret_identifier (pfile, base,
				1225	pfile->buffer->cur - base);
				1226	}
				1227	else
				1228	{
				1229	len = cur - base;
				1230	hash = HT_HASHFINISH (hash, len);
				1231
				1232	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1233	base, len, hash, HT_ALLOC));
				1234	}
				1235
				1236	/* Rarely, identifiers require diagnostics when lexed. */
				1237	if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
				1238	&& !pfile->state.skipping, 0))
				1239	{
				1240	/* It is allowed to poison the same identifier twice. */
				1241	if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
				1242	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
				1243	NODE_NAME (result));
				1244
				1245	/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
				1246	replacement list of a variadic macro. */
				1247	if (result == pfile->spec_nodes.n__VA_ARGS__
				1248	&& !pfile->state.va_args_ok)
				1249	cpp_error (pfile, CPP_DL_PEDWARN,
				1250	"__VA_ARGS__ can only appear in the expansion"
				1251	" of a C99 variadic macro");
				1252
				1253	/* For -Wc++-compat, warn about use of C++ named operators. */
				1254	if (result->flags & NODE_WARN_OPERATOR)
				1255	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
				1256	"identifier \"%s\" is a special operator name in C++",
				1257	NODE_NAME (result));
				1258	}
				1259
				1260	return result;
				1261	}
				1262
				1263	/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
				1264	static void
				1265	lex_number (cpp_reader pfile, cpp_string number,
				1266	struct normalize_state *nst)
				1267	{
				1268	const uchar *cur;
				1269	const uchar *base;
				1270	uchar *dest;
				1271
				1272	base = pfile->buffer->cur - 1;
				1273	do
				1274	{
				1275	cur = pfile->buffer->cur;
				1276
				1277	/* N.B. ISIDNUM does not include $. */
				1278	while (ISIDNUM (cur) \|\| cur == '.' \|\| VALID_SIGN (*cur, cur[-1]))
				1279	{
				1280	cur++;
				1281	NORMALIZE_STATE_UPDATE_IDNUM (nst);
				1282	}
				1283
				1284	pfile->buffer->cur = cur;
				1285	}
				1286	while (forms_identifier_p (pfile, false, nst));
				1287
				1288	number->len = cur - base;
				1289	dest = _cpp_unaligned_alloc (pfile, number->len + 1);
				1290	memcpy (dest, base, number->len);
				1291	dest[number->len] = '\0';
				1292	number->text = dest;
				1293	}
				1294
				1295	/* Create a token of type TYPE with a literal spelling. */
				1296	static void
				1297	create_literal (cpp_reader pfile, cpp_token token, const uchar *base,
				1298	unsigned int len, enum cpp_ttype type)
				1299	{
				1300	uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
				1301
				1302	memcpy (dest, base, len);
				1303	dest[len] = '\0';
				1304	token->type = type;
				1305	token->val.str.len = len;
				1306	token->val.str.text = dest;
				1307	}
				1308
				1309	/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
				1310	sequence from FIRST_BUFF_P to LAST_BUFF_P. /
				1311
				1312	static void
				1313	bufring_append (cpp_reader pfile, const uchar base, size_t len,
				1314	_cpp_buff first_buff_p, _cpp_buff last_buff_p)
				1315	{
				1316	_cpp_buff first_buff = first_buff_p;
				1317	_cpp_buff last_buff = last_buff_p;
				1318
				1319	if (first_buff == NULL)
				1320	first_buff = last_buff = _cpp_get_buff (pfile, len);
				1321	else if (len > BUFF_ROOM (last_buff))
				1322	{
				1323	size_t room = BUFF_ROOM (last_buff);
				1324	memcpy (BUFF_FRONT (last_buff), base, room);
				1325	BUFF_FRONT (last_buff) += room;
				1326	base += room;
				1327	len -= room;
				1328	last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
				1329	}
				1330
				1331	memcpy (BUFF_FRONT (last_buff), base, len);
				1332	BUFF_FRONT (last_buff) += len;
				1333
				1334	*first_buff_p = first_buff;
				1335	*last_buff_p = last_buff;
				1336	}
				1337
				1338	/* Lexes a raw string. The stored string contains the spelling, including
				1339	double quotes, delimiter string, '(' and ')', any leading
				1340	'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
				1341	literal, or CPP_OTHER if it was not properly terminated.
				1342
				1343	The spelling is NUL-terminated, but it is not guaranteed that this
				1344	is the first NUL since embedded NULs are preserved. */
				1345
				1346	static void
				1347	lex_raw_string (cpp_reader pfile, cpp_token token, const uchar *base,
				1348	const uchar *cur)
				1349	{
				1350	const uchar *raw_prefix;
				1351	unsigned int raw_prefix_len = 0;
				1352	enum cpp_ttype type;
				1353	size_t total_len = 0;
				1354	_cpp_buff first_buff = NULL, last_buff = NULL;
				1355	_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
				1356
				1357	type = (*base == 'L' ? CPP_WSTRING :
				1358	*base == 'U' ? CPP_STRING32 :
				1359	*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
				1360	: CPP_STRING);
				1361
				1362	raw_prefix = cur + 1;
				1363	while (raw_prefix_len < 16)
				1364	{
				1365	switch (raw_prefix[raw_prefix_len])
				1366	{
				1367	case ' ': case '(': case ')': case '\\': case '\t':
				1368	case '\v': case '\f': case '\n': default:
				1369	break;
				1370	/* Basic source charset except the above chars. */
				1371	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				1372	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				1373	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
				1374	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
				1375	case 'y': case 'z':
				1376	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
				1377	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
				1378	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
				1379	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
				1380	case 'Y': case 'Z':
				1381	case '0': case '1': case '2': case '3': case '4': case '5':
				1382	case '6': case '7': case '8': case '9':
				1383	case '_': case '{': case '}': case '#': case '[': case ']':
				1384	case '<': case '>': case '%': case ':': case ';': case '.':
				1385	case '?': case '*': case '+': case '-': case '/': case '^':
				1386	case '&': case '\|': case '~': case '!': case '=': case ',':
				1387	case '"': case '\'':
				1388	raw_prefix_len++;
				1389	continue;
				1390	}
				1391	break;
				1392	}
				1393
				1394	if (raw_prefix[raw_prefix_len] != '(')
				1395	{
				1396	int col = CPP_BUF_COLUMN (pfile->buffer, raw_prefix + raw_prefix_len)
				1397	+ 1;
				1398	if (raw_prefix_len == 16)
				1399	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
				1400	"raw string delimiter longer than 16 characters");
				1401	else
				1402	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, col,
				1403	"invalid character '%c' in raw string delimiter",
				1404	(int) raw_prefix[raw_prefix_len]);
				1405	pfile->buffer->cur = raw_prefix - 1;
				1406	create_literal (pfile, token, base, raw_prefix - 1 - base, CPP_OTHER);
				1407	return;
				1408	}
				1409
				1410	cur = raw_prefix + raw_prefix_len + 1;
				1411	for (;;)
				1412	{
				1413	#define BUF_APPEND(STR,LEN) \
				1414	do { \
				1415	bufring_append (pfile, (const uchar *)(STR), (LEN), \
				1416	&first_buff, &last_buff); \
				1417	total_len += (LEN); \
				1418	} while (0);
				1419
				1420	cppchar_t c;
				1421
				1422	/* If we previously performed any trigraph or line splicing
				1423	transformations, undo them within the body of the raw string. */
				1424	while (note->pos < cur)
				1425	++note;
				1426	for (; note->pos == cur; ++note)
				1427	{
				1428	switch (note->type)
				1429	{
				1430	case '\\':
				1431	case ' ':
				1432	/* Restore backslash followed by newline. */
				1433	BUF_APPEND (base, cur - base);
				1434	base = cur;
				1435	BUF_APPEND ("\\", 1);
				1436	after_backslash:
				1437	if (note->type == ' ')
				1438	{
				1439	/* GNU backslash whitespace newline extension. FIXME
				1440	could be any sequence of non-vertical space. When we
				1441	can properly restore any such sequence, we should mark
				1442	this note as handled so _cpp_process_line_notes
				1443	doesn't warn. */
				1444	BUF_APPEND (" ", 1);
				1445	}
				1446
				1447	BUF_APPEND ("\n", 1);
				1448	break;
				1449
				1450	case 0:
				1451	/* Already handled. */
				1452	break;
				1453
				1454	default:
				1455	if (_cpp_trigraph_map[note->type])
				1456	{
				1457	/* Don't warn about this trigraph in
				1458	_cpp_process_line_notes, since trigraphs show up as
				1459	trigraphs in raw strings. */
				1460	uchar type = note->type;
				1461	note->type = 0;
				1462
				1463	if (!CPP_OPTION (pfile, trigraphs))
				1464	/* If we didn't convert the trigraph in the first
				1465	place, don't do anything now either. */
				1466	break;
				1467
				1468	BUF_APPEND (base, cur - base);
				1469	base = cur;
				1470	BUF_APPEND ("??", 2);
				1471
				1472	/* ??/ followed by newline gets two line notes, one for
				1473	the trigraph and one for the backslash/newline. */
				1474	if (type == '/' && note[1].pos == cur)
				1475	{
				1476	if (note[1].type != '\\'
				1477	&& note[1].type != ' ')
				1478	abort ();
				1479	BUF_APPEND ("/", 1);
				1480	++note;
				1481	goto after_backslash;
				1482	}
				1483	/* The ) from ??) could be part of the suffix. */
				1484	else if (type == ')'
				1485	&& strncmp ((const char *) cur+1,
				1486	(const char *) raw_prefix,
				1487	raw_prefix_len) == 0
				1488	&& cur[raw_prefix_len+1] == '"')
				1489	{
				1490	BUF_APPEND (")", 1);
				1491	base++;
				1492	cur += raw_prefix_len + 2;
				1493	goto break_outer_loop;
				1494	}
				1495	else
				1496	{
				1497	/* Skip the replacement character. */
				1498	base = ++cur;
				1499	BUF_APPEND (&type, 1);
				1500	}
				1501	}
				1502	else
				1503	abort ();
				1504	break;
				1505	}
				1506	}
				1507	c = *cur++;
				1508
				1509	if (c == ')'
				1510	&& strncmp ((const char ) cur, (const char ) raw_prefix,
				1511	raw_prefix_len) == 0
				1512	&& cur[raw_prefix_len] == '"')
				1513	{
				1514	cur += raw_prefix_len + 1;
				1515	break;
				1516	}
				1517	else if (c == '\n')
				1518	{
				1519	if (pfile->state.in_directive
				1520	\|\| pfile->state.parsing_args
				1521	\|\| pfile->state.in_deferred_pragma)
				1522	{
				1523	cur--;
				1524	type = CPP_OTHER;
				1525	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
				1526	"unterminated raw string");
				1527	break;
				1528	}
				1529
				1530	BUF_APPEND (base, cur - base);
				1531
				1532	if (pfile->buffer->cur < pfile->buffer->rlimit)
				1533	CPP_INCREMENT_LINE (pfile, 0);
				1534	pfile->buffer->need_line = true;
				1535
				1536	pfile->buffer->cur = cur-1;
				1537	_cpp_process_line_notes (pfile, false);
				1538	if (!_cpp_get_fresh_line (pfile))
				1539	{
				1540	source_location src_loc = token->src_loc;
				1541	token->type = CPP_EOF;
				1542	/* Tell the compiler the line number of the EOF token. */
				1543	token->src_loc = pfile->line_table->highest_line;
				1544	token->flags = BOL;
				1545	if (first_buff != NULL)
				1546	_cpp_release_buff (pfile, first_buff);
				1547	cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
				1548	"unterminated raw string");
				1549	return;
				1550	}
				1551
				1552	cur = base = pfile->buffer->cur;
				1553	note = &pfile->buffer->notes[pfile->buffer->cur_note];
				1554	}
				1555	}
				1556	break_outer_loop:
				1557
				1558	if (CPP_OPTION (pfile, user_literals))
				1559	{
				1560	/* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
				1561	underscore is ill-formed. Since this breaks programs using macros
				1562	from inttypes.h, we generate a warning and treat the ud-suffix as a
				1563	separate preprocessing token. This approach is under discussion by
				1564	the standards committee, and has been adopted as a conforming
				1565	extension by other front ends such as clang. */
				1566	if (ISALPHA (*cur))
				1567	{
				1568	/* Raise a warning, but do not consume subsequent tokens. */
				1569	if (CPP_OPTION (pfile, warn_literal_suffix))
				1570	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
				1571	token->src_loc, 0,
				1572	"invalid suffix on literal; C++11 requires "
				1573	"a space between literal and identifier");
				1574	}
				1575	/* Grab user defined literal suffix. */
				1576	else if (*cur == '_')
				1577	{
				1578	type = cpp_userdef_string_add_type (type);
				1579	++cur;
				1580
				1581	while (ISIDNUM (*cur))
				1582	++cur;
				1583	}
				1584	}
				1585
				1586	pfile->buffer->cur = cur;
				1587	if (first_buff == NULL)
				1588	create_literal (pfile, token, base, cur - base, type);
				1589	else
				1590	{
				1591	uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
				1592
				1593	token->type = type;
				1594	token->val.str.len = total_len + (cur - base);
				1595	token->val.str.text = dest;
				1596	last_buff = first_buff;
				1597	while (last_buff != NULL)
				1598	{
				1599	memcpy (dest, last_buff->base,
				1600	BUFF_FRONT (last_buff) - last_buff->base);
				1601	dest += BUFF_FRONT (last_buff) - last_buff->base;
				1602	last_buff = last_buff->next;
				1603	}
				1604	_cpp_release_buff (pfile, first_buff);
				1605	memcpy (dest, base, cur - base);
				1606	dest[cur - base] = '\0';
				1607	}
				1608	}
				1609
				1610	/* Lexes a string, character constant, or angle-bracketed header file
				1611	name. The stored string contains the spelling, including opening
				1612	quote and any leading 'L', 'u', 'U' or 'u8' and optional
				1613	'R' modifier. It returns the type of the literal, or CPP_OTHER
				1614	if it was not properly terminated, or CPP_LESS for an unterminated
				1615	header name which must be relexed as normal tokens.
				1616
				1617	The spelling is NUL-terminated, but it is not guaranteed that this
				1618	is the first NUL since embedded NULs are preserved. */
				1619	static void
				1620	lex_string (cpp_reader pfile, cpp_token token, const uchar *base)
				1621	{
				1622	bool saw_NUL = false;
				1623	const uchar *cur;
				1624	cppchar_t terminator;
				1625	enum cpp_ttype type;
				1626
				1627	cur = base;
				1628	terminator = *cur++;
				1629	if (terminator == 'L' \|\| terminator == 'U')
				1630	terminator = *cur++;
				1631	else if (terminator == 'u')
				1632	{
				1633	terminator = *cur++;
				1634	if (terminator == '8')
				1635	terminator = *cur++;
				1636	}
				1637	if (terminator == 'R')
				1638	{
				1639	lex_raw_string (pfile, token, base, cur);
				1640	return;
				1641	}
				1642	if (terminator == '"')
				1643	type = (*base == 'L' ? CPP_WSTRING :
				1644	*base == 'U' ? CPP_STRING32 :
				1645	*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
				1646	: CPP_STRING);
				1647	else if (terminator == '\'')
				1648	type = (*base == 'L' ? CPP_WCHAR :
				1649	*base == 'U' ? CPP_CHAR32 :
				1650	*base == 'u' ? CPP_CHAR16 : CPP_CHAR);
				1651	else
				1652	terminator = '>', type = CPP_HEADER_NAME;
				1653
				1654	for (;;)
				1655	{
				1656	cppchar_t c = *cur++;
				1657
				1658	/* In #include-style directives, terminators are not escapable. */
				1659	if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
				1660	cur++;
				1661	else if (c == terminator)
				1662	break;
				1663	else if (c == '\n')
				1664	{
				1665	cur--;
				1666	/* Unmatched quotes always yield undefined behavior, but
				1667	greedy lexing means that what appears to be an unterminated
				1668	header name may actually be a legitimate sequence of tokens. */
				1669	if (terminator == '>')
				1670	{
				1671	token->type = CPP_LESS;
				1672	return;
				1673	}
				1674	type = CPP_OTHER;
				1675	break;
				1676	}
				1677	else if (c == '\0')
				1678	saw_NUL = true;
				1679	}
				1680
				1681	if (saw_NUL && !pfile->state.skipping)
				1682	cpp_error (pfile, CPP_DL_WARNING,
				1683	"null character(s) preserved in literal");
				1684
				1685	if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
				1686	cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
				1687	(int) terminator);
				1688
				1689	if (CPP_OPTION (pfile, user_literals))
				1690	{
				1691	/* According to C++11 [lex.ext]p10, a ud-suffix not starting with an
				1692	underscore is ill-formed. Since this breaks programs using macros
				1693	from inttypes.h, we generate a warning and treat the ud-suffix as a
				1694	separate preprocessing token. This approach is under discussion by
				1695	the standards committee, and has been adopted as a conforming
				1696	extension by other front ends such as clang. */
				1697	if (ISALPHA (*cur))
				1698	{
				1699	/* Raise a warning, but do not consume subsequent tokens. */
				1700	if (CPP_OPTION (pfile, warn_literal_suffix))
				1701	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
				1702	token->src_loc, 0,
				1703	"invalid suffix on literal; C++11 requires "
				1704	"a space between literal and identifier");
				1705	}
				1706	/* Grab user defined literal suffix. */
				1707	else if (*cur == '_')
				1708	{
				1709	type = cpp_userdef_char_add_type (type);
				1710	type = cpp_userdef_string_add_type (type);
				1711	++cur;
				1712
				1713	while (ISIDNUM (*cur))
				1714	++cur;
				1715	}
				1716	}
				1717
				1718	pfile->buffer->cur = cur;
				1719	create_literal (pfile, token, base, cur - base, type);
				1720	}
				1721
				1722	/* Return the comment table. The client may not make any assumption
				1723	about the ordering of the table. */
				1724	cpp_comment_table *
				1725	cpp_get_comments (cpp_reader *pfile)
				1726	{
				1727	return &pfile->comments;
				1728	}
				1729
				1730	/* Append a comment to the end of the comment table. */
				1731	static void
				1732	store_comment (cpp_reader pfile, cpp_token token)
				1733	{
				1734	int len;
				1735
				1736	if (pfile->comments.allocated == 0)
				1737	{
				1738	pfile->comments.allocated = 256;
				1739	pfile->comments.entries = (cpp_comment *) xmalloc
				1740	(pfile->comments.allocated * sizeof (cpp_comment));
				1741	}
				1742
				1743	if (pfile->comments.count == pfile->comments.allocated)
				1744	{
				1745	pfile->comments.allocated *= 2;
				1746	pfile->comments.entries = (cpp_comment *) xrealloc
				1747	(pfile->comments.entries,
				1748	pfile->comments.allocated * sizeof (cpp_comment));
				1749	}
				1750
				1751	len = token->val.str.len;
				1752
				1753	/* Copy comment. Note, token may not be NULL terminated. */
				1754	pfile->comments.entries[pfile->comments.count].comment =
				1755	(char ) xmalloc (sizeof (char) (len + 1));
				1756	memcpy (pfile->comments.entries[pfile->comments.count].comment,
				1757	token->val.str.text, len);
				1758	pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
				1759
				1760	/* Set source location. */
				1761	pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
				1762
				1763	/* Increment the count of entries in the comment table. */
				1764	pfile->comments.count++;
				1765	}
				1766
				1767	/* The stored comment includes the comment start and any terminator. */
				1768	static void
				1769	save_comment (cpp_reader pfile, cpp_token token, const unsigned char *from,
				1770	cppchar_t type)
				1771	{
				1772	unsigned char *buffer;
				1773	unsigned int len, clen, i;
				1774
				1775	len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
				1776
				1777	/* C++ comments probably (not definitely) have moved past a new
				1778	line, which we don't want to save in the comment. */
				1779	if (is_vspace (pfile->buffer->cur[-1]))
				1780	len--;
				1781
				1782	/* If we are currently in a directive or in argument parsing, then
				1783	we need to store all C++ comments as C comments internally, and
				1784	so we need to allocate a little extra space in that case.
				1785
				1786	Note that the only time we encounter a directive here is
				1787	when we are saving comments in a "#define". */
				1788	clen = ((pfile->state.in_directive \|\| pfile->state.parsing_args)
				1789	&& type == '/') ? len + 2 : len;
				1790
				1791	buffer = _cpp_unaligned_alloc (pfile, clen);
				1792
				1793	token->type = CPP_COMMENT;
				1794	token->val.str.len = clen;
				1795	token->val.str.text = buffer;
				1796
				1797	buffer[0] = '/';
				1798	memcpy (buffer + 1, from, len - 1);
				1799
				1800	/* Finish conversion to a C comment, if necessary. */
				1801	if ((pfile->state.in_directive \|\| pfile->state.parsing_args) && type == '/')
				1802	{
				1803	buffer[1] = '*';
				1804	buffer[clen - 2] = '*';
				1805	buffer[clen - 1] = '/';
				1806	/* As there can be in a C++ comments illegal sequences for C comments
				1807	we need to filter them out. */
				1808	for (i = 2; i < (clen - 2); i++)
				1809	if (buffer[i] == '/' && (buffer[i - 1] == '' \|\| buffer[i + 1] == ''))
				1810	buffer[i] = '\|';
				1811	}
				1812
				1813	/* Finally store this comment for use by clients of libcpp. */
				1814	store_comment (pfile, token);
				1815	}
				1816
				1817	/* Allocate COUNT tokens for RUN. */
				1818	void
				1819	_cpp_init_tokenrun (tokenrun *run, unsigned int count)
				1820	{
				1821	run->base = XNEWVEC (cpp_token, count);
				1822	run->limit = run->base + count;
				1823	run->next = NULL;
				1824	}
				1825
				1826	/* Returns the next tokenrun, or creates one if there is none. */
				1827	static tokenrun *
				1828	next_tokenrun (tokenrun *run)
				1829	{
				1830	if (run->next == NULL)
				1831	{
				1832	run->next = XNEW (tokenrun);
				1833	run->next->prev = run;
				1834	_cpp_init_tokenrun (run->next, 250);
				1835	}
				1836
				1837	return run->next;
				1838	}
				1839
				1840	/* Return the number of not yet processed token in a given
				1841	context. */
				1842	int
				1843	_cpp_remaining_tokens_num_in_context (cpp_context *context)
				1844	{
				1845	if (context->tokens_kind == TOKENS_KIND_DIRECT)
				1846	return (LAST (context).token - FIRST (context).token);
				1847	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
				1848	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
				1849	return (LAST (context).ptoken - FIRST (context).ptoken);
				1850	else
				1851	abort ();
				1852	}
				1853
				1854	/* Returns the token present at index INDEX in a given context. If
				1855	INDEX is zero, the next token to be processed is returned. */
				1856	static const cpp_token*
				1857	_cpp_token_from_context_at (cpp_context *context, int index)
				1858	{
				1859	if (context->tokens_kind == TOKENS_KIND_DIRECT)
				1860	return &(FIRST (context).token[index]);
				1861	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
				1862	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
				1863	return FIRST (context).ptoken[index];
				1864	else
				1865	abort ();
				1866	}
				1867
				1868	/* Look ahead in the input stream. */
				1869	const cpp_token *
				1870	cpp_peek_token (cpp_reader *pfile, int index)
				1871	{
				1872	cpp_context *context = pfile->context;
				1873	const cpp_token *peektok;
				1874	int count;
				1875
				1876	/* First, scan through any pending cpp_context objects. */
				1877	while (context->prev)
				1878	{
				1879	ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
				1880
				1881	if (index < (int) sz)
				1882	return _cpp_token_from_context_at (context, index);
				1883	index -= (int) sz;
				1884	context = context->prev;
				1885	}
				1886
				1887	/* We will have to read some new tokens after all (and do so
				1888	without invalidating preceding tokens). */
				1889	count = index;
				1890	pfile->keep_tokens++;
				1891
				1892	do
				1893	{
				1894	peektok = _cpp_lex_token (pfile);
				1895	if (peektok->type == CPP_EOF)
				1896	return peektok;
				1897	}
				1898	while (index--);
				1899
				1900	_cpp_backup_tokens_direct (pfile, count + 1);
				1901	pfile->keep_tokens--;
				1902
				1903	return peektok;
				1904	}
				1905
				1906	/* Allocate a single token that is invalidated at the same time as the
				1907	rest of the tokens on the line. Has its line and col set to the
				1908	same as the last lexed token, so that diagnostics appear in the
				1909	right place. */
				1910	cpp_token *
				1911	_cpp_temp_token (cpp_reader *pfile)
				1912	{
				1913	cpp_token old, result;
				1914	ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
				1915	ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
				1916
				1917	old = pfile->cur_token - 1;
				1918	/* Any pre-existing lookaheads must not be clobbered. */
				1919	if (la)
				1920	{
				1921	if (sz <= la)
				1922	{
				1923	tokenrun *next = next_tokenrun (pfile->cur_run);
				1924
				1925	if (sz < la)
				1926	memmove (next->base + 1, next->base,
				1927	(la - sz) * sizeof (cpp_token));
				1928
				1929	next->base[0] = pfile->cur_run->limit[-1];
				1930	}
				1931
				1932	if (sz > 1)
				1933	memmove (pfile->cur_token + 1, pfile->cur_token,
				1934	MIN (la, sz - 1) * sizeof (cpp_token));
				1935	}
				1936
				1937	if (!sz && pfile->cur_token == pfile->cur_run->limit)
				1938	{
				1939	pfile->cur_run = next_tokenrun (pfile->cur_run);
				1940	pfile->cur_token = pfile->cur_run->base;
				1941	}
				1942
				1943	result = pfile->cur_token++;
				1944	result->src_loc = old->src_loc;
				1945	return result;
				1946	}
				1947
				1948	/* Lex a token into RESULT (external interface). Takes care of issues
				1949	like directive handling, token lookahead, multiple include
				1950	optimization and skipping. */
				1951	const cpp_token *
				1952	_cpp_lex_token (cpp_reader *pfile)
				1953	{
				1954	cpp_token *result;
				1955
				1956	for (;;)
				1957	{
				1958	if (pfile->cur_token == pfile->cur_run->limit)
				1959	{
				1960	pfile->cur_run = next_tokenrun (pfile->cur_run);
				1961	pfile->cur_token = pfile->cur_run->base;
				1962	}
				1963	/* We assume that the current token is somewhere in the current
				1964	run. */
				1965	if (pfile->cur_token < pfile->cur_run->base
				1966	\|\| pfile->cur_token >= pfile->cur_run->limit)
				1967	abort ();
				1968
				1969	if (pfile->lookaheads)
				1970	{
				1971	pfile->lookaheads--;
				1972	result = pfile->cur_token++;
				1973	}
				1974	else
				1975	result = _cpp_lex_direct (pfile);
				1976
				1977	if (result->flags & BOL)
				1978	{
				1979	/* Is this a directive. If _cpp_handle_directive returns
				1980	false, it is an assembler #. */
				1981	if (result->type == CPP_HASH
				1982	/* 6.10.3 p 11: Directives in a list of macro arguments
				1983	gives undefined behavior. This implementation
				1984	handles the directive as normal. */
				1985	&& pfile->state.parsing_args != 1)
				1986	{
				1987	if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
				1988	{
				1989	if (pfile->directive_result.type == CPP_PADDING)
				1990	continue;
				1991	result = &pfile->directive_result;
				1992	}
				1993	}
				1994	else if (pfile->state.in_deferred_pragma)
				1995	result = &pfile->directive_result;
				1996
				1997	if (pfile->cb.line_change && !pfile->state.skipping)
				1998	pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
				1999	}
				2000
				2001	/* We don't skip tokens in directives. */
				2002	if (pfile->state.in_directive \|\| pfile->state.in_deferred_pragma)
				2003	break;
				2004
				2005	/* Outside a directive, invalidate controlling macros. At file
				2006	EOF, _cpp_lex_direct takes care of popping the buffer, so we never
				2007	get here and MI optimization works. */
				2008	pfile->mi_valid = false;
				2009
				2010	if (!pfile->state.skipping \|\| result->type == CPP_EOF)
				2011	break;
				2012	}
				2013
				2014	return result;
				2015	}
				2016
				2017	/* Returns true if a fresh line has been loaded. */
				2018	bool
				2019	_cpp_get_fresh_line (cpp_reader *pfile)
				2020	{
				2021	int return_at_eof;
				2022
				2023	/* We can't get a new line until we leave the current directive. */
				2024	if (pfile->state.in_directive)
				2025	return false;
				2026
				2027	for (;;)
				2028	{
				2029	cpp_buffer *buffer = pfile->buffer;
				2030
				2031	if (!buffer->need_line)
				2032	return true;
				2033
				2034	if (buffer->next_line < buffer->rlimit)
				2035	{
				2036	_cpp_clean_line (pfile);
				2037	return true;
				2038	}
				2039
				2040	/* First, get out of parsing arguments state. */
				2041	if (pfile->state.parsing_args)
				2042	return false;
				2043
				2044	/* End of buffer. Non-empty files should end in a newline. */
				2045	if (buffer->buf != buffer->rlimit
				2046	&& buffer->next_line > buffer->rlimit
				2047	&& !buffer->from_stage3)
				2048	{
				2049	/* Clip to buffer size. */
				2050	buffer->next_line = buffer->rlimit;
				2051	}
				2052
				2053	return_at_eof = buffer->return_at_eof;
				2054	_cpp_pop_buffer (pfile);
				2055	if (pfile->buffer == NULL \|\| return_at_eof)
				2056	return false;
				2057	}
				2058	}
				2059
				2060	#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
				2061	do \
				2062	{ \
				2063	result->type = ELSE_TYPE; \
				2064	if (*buffer->cur == CHAR) \
				2065	buffer->cur++, result->type = THEN_TYPE; \
				2066	} \
				2067	while (0)
				2068
				2069	/* Lex a token into pfile->cur_token, which is also incremented, to
				2070	get diagnostics pointing to the correct location.
				2071
				2072	Does not handle issues such as token lookahead, multiple-include
				2073	optimization, directives, skipping etc. This function is only
				2074	suitable for use by _cpp_lex_token, and in special cases like
				2075	lex_expansion_token which doesn't care for any of these issues.
				2076
				2077	When meeting a newline, returns CPP_EOF if parsing a directive,
				2078	otherwise returns to the start of the token buffer if permissible.
				2079	Returns the location of the lexed token. */
				2080	cpp_token *
				2081	_cpp_lex_direct (cpp_reader *pfile)
				2082	{
				2083	cppchar_t c;
				2084	cpp_buffer *buffer;
				2085	const unsigned char *comment_start;
				2086	cpp_token *result = pfile->cur_token++;
				2087
				2088	fresh_line:
				2089	result->flags = 0;
				2090	buffer = pfile->buffer;
				2091	if (buffer->need_line)
				2092	{
				2093	if (pfile->state.in_deferred_pragma)
				2094	{
				2095	result->type = CPP_PRAGMA_EOL;
				2096	pfile->state.in_deferred_pragma = false;
				2097	if (!pfile->state.pragma_allow_expansion)
				2098	pfile->state.prevent_expansion--;
				2099	return result;
				2100	}
				2101	if (!_cpp_get_fresh_line (pfile))
				2102	{
				2103	result->type = CPP_EOF;
				2104	if (!pfile->state.in_directive)
				2105	{
				2106	/* Tell the compiler the line number of the EOF token. */
				2107	result->src_loc = pfile->line_table->highest_line;
				2108	result->flags = BOL;
				2109	}
				2110	return result;
				2111	}
				2112	if (!pfile->keep_tokens)
				2113	{
				2114	pfile->cur_run = &pfile->base_run;
				2115	result = pfile->base_run.base;
				2116	pfile->cur_token = result + 1;
				2117	}
				2118	result->flags = BOL;
				2119	if (pfile->state.parsing_args == 2)
				2120	result->flags \|= PREV_WHITE;
				2121	}
				2122	buffer = pfile->buffer;
				2123	update_tokens_line:
				2124	result->src_loc = pfile->line_table->highest_line;
				2125
				2126	skipped_white:
				2127	if (buffer->cur >= buffer->notes[buffer->cur_note].pos
				2128	&& !pfile->overlaid_buffer)
				2129	{
				2130	_cpp_process_line_notes (pfile, false);
				2131	result->src_loc = pfile->line_table->highest_line;
				2132	}
				2133	c = *buffer->cur++;
				2134
				2135	if (pfile->forced_token_location_p)
				2136	result->src_loc = *pfile->forced_token_location_p;
				2137	else
				2138	result->src_loc = linemap_position_for_column (pfile->line_table,
				2139	CPP_BUF_COLUMN (buffer, buffer->cur));
				2140
				2141	switch (c)
				2142	{
				2143	case ' ': case '\t': case '\f': case '\v': case '\0':
				2144	result->flags \|= PREV_WHITE;
				2145	skip_whitespace (pfile, c);
				2146	goto skipped_white;
				2147
				2148	case '\n':
				2149	if (buffer->cur < buffer->rlimit)
				2150	CPP_INCREMENT_LINE (pfile, 0);
				2151	buffer->need_line = true;
				2152	goto fresh_line;
				2153
				2154	case '0': case '1': case '2': case '3': case '4':
				2155	case '5': case '6': case '7': case '8': case '9':
				2156	{
				2157	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
				2158	result->type = CPP_NUMBER;
				2159	lex_number (pfile, &result->val.str, &nst);
				2160	warn_about_normalization (pfile, result, &nst);
				2161	break;
				2162	}
				2163
				2164	case 'L':
				2165	case 'u':
				2166	case 'U':
				2167	case 'R':
				2168	/* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
				2169	wide strings or raw strings. */
				2170	if (c == 'L' \|\| CPP_OPTION (pfile, rliterals)
				2171	\|\| (c != 'R' && CPP_OPTION (pfile, uliterals)))
				2172	{
				2173	if ((*buffer->cur == '\'' && c != 'R')
				2174	\|\| *buffer->cur == '"'
				2175	\|\| (*buffer->cur == 'R'
				2176	&& c != 'R'
				2177	&& buffer->cur[1] == '"'
				2178	&& CPP_OPTION (pfile, rliterals))
				2179	\|\| (*buffer->cur == '8'
				2180	&& c == 'u'
				2181	&& (buffer->cur[1] == '"'
				2182	\|\| (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
				2183	&& CPP_OPTION (pfile, rliterals)))))
				2184	{
				2185	lex_string (pfile, result, buffer->cur - 1);
				2186	break;
				2187	}
				2188	}
				2189	/* Fall through. */
				2190
				2191	case '_':
				2192	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				2193	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				2194	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
				2195	case 's': case 't': case 'v': case 'w': case 'x':
				2196	case 'y': case 'z':
				2197	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
				2198	case 'G': case 'H': case 'I': case 'J': case 'K':
				2199	case 'M': case 'N': case 'O': case 'P': case 'Q':
				2200	case 'S': case 'T': case 'V': case 'W': case 'X':
				2201	case 'Y': case 'Z':
				2202	result->type = CPP_NAME;
				2203	{
				2204	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
				2205	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
				2206	&nst);
				2207	warn_about_normalization (pfile, result, &nst);
				2208	}
				2209
				2210	/* Convert named operators to their proper types. */
				2211	if (result->val.node.node->flags & NODE_OPERATOR)
				2212	{
				2213	result->flags \|= NAMED_OP;
				2214	result->type = (enum cpp_ttype) result->val.node.node->directive_index;
				2215	}
				2216	break;
				2217
				2218	case '\'':
				2219	case '"':
				2220	lex_string (pfile, result, buffer->cur - 1);
				2221	break;
				2222
				2223	case '/':
				2224	/* A potential block or line comment. */
				2225	comment_start = buffer->cur;
				2226	c = *buffer->cur;
				2227
				2228	if (c == '*')
				2229	{
				2230	if (_cpp_skip_block_comment (pfile))
				2231	cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
				2232	}
				2233	else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
				2234	\|\| cpp_in_system_header (pfile)))
				2235	{
				2236	/* Warn about comments only if pedantically GNUC89, and not
				2237	in system headers. */
				2238	if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
				2239	&& ! buffer->warned_cplusplus_comments)
				2240	{
				2241	cpp_error (pfile, CPP_DL_PEDWARN,
				2242	"C++ style comments are not allowed in ISO C90");
				2243	cpp_error (pfile, CPP_DL_PEDWARN,
				2244	"(this will be reported only once per input file)");
				2245	buffer->warned_cplusplus_comments = 1;
				2246	}
				2247
				2248	if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
				2249	cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
				2250	}
				2251	else if (c == '=')
				2252	{
				2253	buffer->cur++;
				2254	result->type = CPP_DIV_EQ;
				2255	break;
				2256	}
				2257	else
				2258	{
				2259	result->type = CPP_DIV;
				2260	break;
				2261	}
				2262
				2263	if (!pfile->state.save_comments)
				2264	{
				2265	result->flags \|= PREV_WHITE;
				2266	goto update_tokens_line;
				2267	}
				2268
				2269	/* Save the comment as a token in its own right. */
				2270	save_comment (pfile, result, comment_start, c);
				2271	break;
				2272
				2273	case '<':
				2274	if (pfile->state.angled_headers)
				2275	{
				2276	lex_string (pfile, result, buffer->cur - 1);
				2277	if (result->type != CPP_LESS)
				2278	break;
				2279	}
				2280
				2281	result->type = CPP_LESS;
				2282	if (*buffer->cur == '=')
				2283	buffer->cur++, result->type = CPP_LESS_EQ;
				2284	else if (*buffer->cur == '<')
				2285	{
				2286	buffer->cur++;
				2287	IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
				2288	}
				2289	else if (CPP_OPTION (pfile, digraphs))
				2290	{
				2291	if (*buffer->cur == ':')
				2292	{
				2293	buffer->cur++;
				2294	result->flags \|= DIGRAPH;
				2295	result->type = CPP_OPEN_SQUARE;
				2296	}
				2297	else if (*buffer->cur == '%')
				2298	{
				2299	buffer->cur++;
				2300	result->flags \|= DIGRAPH;
				2301	result->type = CPP_OPEN_BRACE;
				2302	}
				2303	}
				2304	break;
				2305
				2306	case '>':
				2307	result->type = CPP_GREATER;
				2308	if (*buffer->cur == '=')
				2309	buffer->cur++, result->type = CPP_GREATER_EQ;
				2310	else if (*buffer->cur == '>')
				2311	{
				2312	buffer->cur++;
				2313	IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
				2314	}
				2315	break;
				2316
				2317	case '%':
				2318	result->type = CPP_MOD;
				2319	if (*buffer->cur == '=')
				2320	buffer->cur++, result->type = CPP_MOD_EQ;
				2321	else if (CPP_OPTION (pfile, digraphs))
				2322	{
				2323	if (*buffer->cur == ':')
				2324	{
				2325	buffer->cur++;
				2326	result->flags \|= DIGRAPH;
				2327	result->type = CPP_HASH;
				2328	if (*buffer->cur == '%' && buffer->cur[1] == ':')
				2329	buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
				2330	}
				2331	else if (*buffer->cur == '>')
				2332	{
				2333	buffer->cur++;
				2334	result->flags \|= DIGRAPH;
				2335	result->type = CPP_CLOSE_BRACE;
				2336	}
				2337	}
				2338	break;
				2339
				2340	case '.':
				2341	result->type = CPP_DOT;
				2342	if (ISDIGIT (*buffer->cur))
				2343	{
				2344	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
				2345	result->type = CPP_NUMBER;
				2346	lex_number (pfile, &result->val.str, &nst);
				2347	warn_about_normalization (pfile, result, &nst);
				2348	}
				2349	else if (*buffer->cur == '.' && buffer->cur[1] == '.')
				2350	buffer->cur += 2, result->type = CPP_ELLIPSIS;
				2351	else if (buffer->cur == '' && CPP_OPTION (pfile, cplusplus))
				2352	buffer->cur++, result->type = CPP_DOT_STAR;
				2353	break;
				2354
				2355	case '+':
				2356	result->type = CPP_PLUS;
				2357	if (*buffer->cur == '+')
				2358	buffer->cur++, result->type = CPP_PLUS_PLUS;
				2359	else if (*buffer->cur == '=')
				2360	buffer->cur++, result->type = CPP_PLUS_EQ;
				2361	break;
				2362
				2363	case '-':
				2364	result->type = CPP_MINUS;
				2365	if (*buffer->cur == '>')
				2366	{
				2367	buffer->cur++;
				2368	result->type = CPP_DEREF;
				2369	if (buffer->cur == '' && CPP_OPTION (pfile, cplusplus))
				2370	buffer->cur++, result->type = CPP_DEREF_STAR;
				2371	}
				2372	else if (*buffer->cur == '-')
				2373	buffer->cur++, result->type = CPP_MINUS_MINUS;
				2374	else if (*buffer->cur == '=')
				2375	buffer->cur++, result->type = CPP_MINUS_EQ;
				2376	break;
				2377
				2378	case '&':
				2379	result->type = CPP_AND;
				2380	if (*buffer->cur == '&')
				2381	buffer->cur++, result->type = CPP_AND_AND;
				2382	else if (*buffer->cur == '=')
				2383	buffer->cur++, result->type = CPP_AND_EQ;
				2384	break;
				2385
				2386	case '\|':
				2387	result->type = CPP_OR;
				2388	if (*buffer->cur == '\|')
				2389	buffer->cur++, result->type = CPP_OR_OR;
				2390	else if (*buffer->cur == '=')
				2391	buffer->cur++, result->type = CPP_OR_EQ;
				2392	break;
				2393
				2394	case ':':
				2395	result->type = CPP_COLON;
				2396	if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
				2397	buffer->cur++, result->type = CPP_SCOPE;
				2398	else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
				2399	{
				2400	buffer->cur++;
				2401	result->flags \|= DIGRAPH;
				2402	result->type = CPP_CLOSE_SQUARE;
				2403	}
				2404	break;
				2405
				2406	case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
				2407	case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
				2408	case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
				2409	case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
				2410	case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
				2411
				2412	case '?': result->type = CPP_QUERY; break;
				2413	case '~': result->type = CPP_COMPL; break;
				2414	case ',': result->type = CPP_COMMA; break;
				2415	case '(': result->type = CPP_OPEN_PAREN; break;
				2416	case ')': result->type = CPP_CLOSE_PAREN; break;
				2417	case '[': result->type = CPP_OPEN_SQUARE; break;
				2418	case ']': result->type = CPP_CLOSE_SQUARE; break;
				2419	case '{': result->type = CPP_OPEN_BRACE; break;
				2420	case '}': result->type = CPP_CLOSE_BRACE; break;
				2421	case ';': result->type = CPP_SEMICOLON; break;
				2422
				2423	/* @ is a punctuator in Objective-C. */
				2424	case '@': result->type = CPP_ATSIGN; break;
				2425
				2426	case '$':
				2427	case '\\':
				2428	{
				2429	const uchar *base = --buffer->cur;
				2430	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
				2431
				2432	if (forms_identifier_p (pfile, true, &nst))
				2433	{
				2434	result->type = CPP_NAME;
				2435	result->val.node.node = lex_identifier (pfile, base, true, &nst);
				2436	warn_about_normalization (pfile, result, &nst);
				2437	break;
				2438	}
				2439	buffer->cur++;
				2440	}
				2441
				2442	default:
				2443	create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
				2444	break;
				2445	}
				2446
				2447	return result;
				2448	}
				2449
				2450	/* An upper bound on the number of bytes needed to spell TOKEN.
				2451	Does not include preceding whitespace. */
				2452	unsigned int
				2453	cpp_token_len (const cpp_token *token)
				2454	{
				2455	unsigned int len;
				2456
				2457	switch (TOKEN_SPELL (token))
				2458	{
				2459	default: len = 6; break;
				2460	case SPELL_LITERAL: len = token->val.str.len; break;
				2461	case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
				2462	}
				2463
				2464	return len;
				2465	}
				2466
				2467	/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
				2468	Return the number of bytes read out of NAME. (There are always
				2469	10 bytes written to BUFFER.) */
				2470
				2471	static size_t
				2472	utf8_to_ucn (unsigned char buffer, const unsigned char name)
				2473	{
				2474	int j;
				2475	int ucn_len = 0;
				2476	int ucn_len_c;
				2477	unsigned t;
				2478	unsigned long utf32;
				2479
				2480	/* Compute the length of the UTF-8 sequence. */
				2481	for (t = *name; t & 0x80; t <<= 1)
				2482	ucn_len++;
				2483
				2484	utf32 = *name & (0x7F >> ucn_len);
				2485	for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
				2486	{
				2487	utf32 = (utf32 << 6) \| (*++name & 0x3F);
				2488
				2489	/* Ill-formed UTF-8. */
				2490	if ((*name & ~0x3F) != 0x80)
				2491	abort ();
				2492	}
				2493
				2494	*buffer++ = '\\';
				2495	*buffer++ = 'U';
				2496	for (j = 7; j >= 0; j--)
				2497	buffer++ = "0123456789abcdef"[(utf32 >> (4 j)) & 0xF];
				2498	return ucn_len;
				2499	}
				2500
				2501	/* Given a token TYPE corresponding to a digraph, return a pointer to
				2502	the spelling of the digraph. */
				2503	static const unsigned char *
				2504	cpp_digraph2name (enum cpp_ttype type)
				2505	{
				2506	return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
				2507	}
				2508
				2509	/* Write the spelling of a token TOKEN to BUFFER. The buffer must
				2510	already contain the enough space to hold the token's spelling.
				2511	Returns a pointer to the character after the last character written.
				2512	FORSTRING is true if this is to be the spelling after translation
				2513	phase 1 (this is different for UCNs).
				2514	FIXME: Would be nice if we didn't need the PFILE argument. */
				2515	unsigned char *
				2516	cpp_spell_token (cpp_reader pfile, const cpp_token token,
				2517	unsigned char *buffer, bool forstring)
				2518	{
				2519	switch (TOKEN_SPELL (token))
				2520	{
				2521	case SPELL_OPERATOR:
				2522	{
				2523	const unsigned char *spelling;
				2524	unsigned char c;
				2525
				2526	if (token->flags & DIGRAPH)
				2527	spelling = cpp_digraph2name (token->type);
				2528	else if (token->flags & NAMED_OP)
				2529	goto spell_ident;
				2530	else
				2531	spelling = TOKEN_NAME (token);
				2532
				2533	while ((c = *spelling++) != '\0')
				2534	*buffer++ = c;
				2535	}
				2536	break;
				2537
				2538	spell_ident:
				2539	case SPELL_IDENT:
				2540	if (forstring)
				2541	{
				2542	memcpy (buffer, NODE_NAME (token->val.node.node),
				2543	NODE_LEN (token->val.node.node));
				2544	buffer += NODE_LEN (token->val.node.node);
				2545	}
				2546	else
				2547	{
				2548	size_t i;
				2549	const unsigned char * name = NODE_NAME (token->val.node.node);
				2550
				2551	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
				2552	if (name[i] & ~0x7F)
				2553	{
				2554	i += utf8_to_ucn (buffer, name + i) - 1;
				2555	buffer += 10;
				2556	}
				2557	else
				2558	*buffer++ = NODE_NAME (token->val.node.node)[i];
				2559	}
				2560	break;
				2561
				2562	case SPELL_LITERAL:
				2563	memcpy (buffer, token->val.str.text, token->val.str.len);
				2564	buffer += token->val.str.len;
				2565	break;
				2566
				2567	case SPELL_NONE:
				2568	cpp_error (pfile, CPP_DL_ICE,
				2569	"unspellable token %s", TOKEN_NAME (token));
				2570	break;
				2571	}
				2572
				2573	return buffer;
				2574	}
				2575
				2576	/* Returns TOKEN spelt as a null-terminated string. The string is
				2577	freed when the reader is destroyed. Useful for diagnostics. */
				2578	unsigned char *
				2579	cpp_token_as_text (cpp_reader pfile, const cpp_token token)
				2580	{
				2581	unsigned int len = cpp_token_len (token) + 1;
				2582	unsigned char start = _cpp_unaligned_alloc (pfile, len), end;
				2583
				2584	end = cpp_spell_token (pfile, token, start, false);
				2585	end[0] = '\0';
				2586
				2587	return start;
				2588	}
				2589
				2590	/* Returns a pointer to a string which spells the token defined by
				2591	TYPE and FLAGS. Used by C front ends, which really should move to
				2592	using cpp_token_as_text. */
				2593	const char *
				2594	cpp_type2name (enum cpp_ttype type, unsigned char flags)
				2595	{
				2596	if (flags & DIGRAPH)
				2597	return (const char *) cpp_digraph2name (type);
				2598	else if (flags & NAMED_OP)
				2599	return cpp_named_operator2name (type);
				2600
				2601	return (const char *) token_spellings[type].name;
				2602	}
				2603
				2604	/* Writes the spelling of token to FP, without any preceding space.
				2605	Separated from cpp_spell_token for efficiency - to avoid stdio
				2606	double-buffering. */
				2607	void
				2608	cpp_output_token (const cpp_token token, FILE fp)
				2609	{
				2610	switch (TOKEN_SPELL (token))
				2611	{
				2612	case SPELL_OPERATOR:
				2613	{
				2614	const unsigned char *spelling;
				2615	int c;
				2616
				2617	if (token->flags & DIGRAPH)
				2618	spelling = cpp_digraph2name (token->type);
				2619	else if (token->flags & NAMED_OP)
				2620	goto spell_ident;
				2621	else
				2622	spelling = TOKEN_NAME (token);
				2623
				2624	c = *spelling;
				2625	do
				2626	putc (c, fp);
				2627	while ((c = *++spelling) != '\0');
				2628	}
				2629	break;
				2630
				2631	spell_ident:
				2632	case SPELL_IDENT:
				2633	{
				2634	size_t i;
				2635	const unsigned char * name = NODE_NAME (token->val.node.node);
				2636
				2637	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
				2638	if (name[i] & ~0x7F)
				2639	{
				2640	unsigned char buffer[10];
				2641	i += utf8_to_ucn (buffer, name + i) - 1;
				2642	fwrite (buffer, 1, 10, fp);
				2643	}
				2644	else
				2645	fputc (NODE_NAME (token->val.node.node)[i], fp);
				2646	}
				2647	break;
				2648
				2649	case SPELL_LITERAL:
				2650	fwrite (token->val.str.text, 1, token->val.str.len, fp);
				2651	break;
				2652
				2653	case SPELL_NONE:
				2654	/* An error, most probably. */
				2655	break;
				2656	}
				2657	}
				2658
				2659	/* Compare two tokens. */
				2660	int
				2661	_cpp_equiv_tokens (const cpp_token a, const cpp_token b)
				2662	{
				2663	if (a->type == b->type && a->flags == b->flags)
				2664	switch (TOKEN_SPELL (a))
				2665	{
				2666	default: /* Keep compiler happy. */
				2667	case SPELL_OPERATOR:
				2668	/* token_no is used to track where multiple consecutive ##
				2669	tokens were originally located. */
				2670	return (a->type != CPP_PASTE \|\| a->val.token_no == b->val.token_no);
				2671	case SPELL_NONE:
				2672	return (a->type != CPP_MACRO_ARG
				2673	\|\| a->val.macro_arg.arg_no == b->val.macro_arg.arg_no);
				2674	case SPELL_IDENT:
				2675	return a->val.node.node == b->val.node.node;
				2676	case SPELL_LITERAL:
				2677	return (a->val.str.len == b->val.str.len
				2678	&& !memcmp (a->val.str.text, b->val.str.text,
				2679	a->val.str.len));
				2680	}
				2681
				2682	return 0;
				2683	}
				2684
				2685	/* Returns nonzero if a space should be inserted to avoid an
				2686	accidental token paste for output. For simplicity, it is
				2687	conservative, and occasionally advises a space where one is not
				2688	needed, e.g. "." and ".2". */
				2689	int
				2690	cpp_avoid_paste (cpp_reader pfile, const cpp_token token1,
				2691	const cpp_token *token2)
				2692	{
				2693	enum cpp_ttype a = token1->type, b = token2->type;
				2694	cppchar_t c;
				2695
				2696	if (token1->flags & NAMED_OP)
				2697	a = CPP_NAME;
				2698	if (token2->flags & NAMED_OP)
				2699	b = CPP_NAME;
				2700
				2701	c = EOF;
				2702	if (token2->flags & DIGRAPH)
				2703	c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
				2704	else if (token_spellings[b].category == SPELL_OPERATOR)
				2705	c = token_spellings[b].name[0];
				2706
				2707	/* Quickly get everything that can paste with an '='. */
				2708	if ((int) a <= (int) CPP_LAST_EQ && c == '=')
				2709	return 1;
				2710
				2711	switch (a)
				2712	{
				2713	case CPP_GREATER: return c == '>';
				2714	case CPP_LESS: return c == '<' \|\| c == '%' \|\| c == ':';
				2715	case CPP_PLUS: return c == '+';
				2716	case CPP_MINUS: return c == '-' \|\| c == '>';
				2717	case CPP_DIV: return c == '/' \|\| c == ''; / Comments. */
				2718	case CPP_MOD: return c == ':' \|\| c == '>';
				2719	case CPP_AND: return c == '&';
				2720	case CPP_OR: return c == '\|';
				2721	case CPP_COLON: return c == ':' \|\| c == '>';
				2722	case CPP_DEREF: return c == '*';
				2723	case CPP_DOT: return c == '.' \|\| c == '%' \|\| b == CPP_NUMBER;
				2724	case CPP_HASH: return c == '#' \|\| c == '%'; /* Digraph form. */
				2725	case CPP_NAME: return ((b == CPP_NUMBER
				2726	&& name_p (pfile, &token2->val.str))
				2727	\|\| b == CPP_NAME
				2728	\|\| b == CPP_CHAR \|\| b == CPP_STRING); /* L */
				2729	case CPP_NUMBER: return (b == CPP_NUMBER \|\| b == CPP_NAME
				2730	\|\| c == '.' \|\| c == '+' \|\| c == '-');
				2731	/* UCNs */
				2732	case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
				2733	&& b == CPP_NAME)
				2734	\|\| (CPP_OPTION (pfile, objc)
				2735	&& token1->val.str.text[0] == '@'
				2736	&& (b == CPP_NAME \|\| b == CPP_STRING)));
				2737	default: break;
				2738	}
				2739
				2740	return 0;
				2741	}
				2742
				2743	/* Output all the remaining tokens on the current line, and a newline
				2744	character, to FP. Leading whitespace is removed. If there are
				2745	macros, special token padding is not performed. */
				2746	void
				2747	cpp_output_line (cpp_reader pfile, FILE fp)
				2748	{
				2749	const cpp_token *token;
				2750
				2751	token = cpp_get_token (pfile);
				2752	while (token->type != CPP_EOF)
				2753	{
				2754	cpp_output_token (token, fp);
				2755	token = cpp_get_token (pfile);
				2756	if (token->flags & PREV_WHITE)
				2757	putc (' ', fp);
				2758	}
				2759
				2760	putc ('\n', fp);
				2761	}
				2762
				2763	/* Return a string representation of all the remaining tokens on the
				2764	current line. The result is allocated using xmalloc and must be
				2765	freed by the caller. */
				2766	unsigned char *
				2767	cpp_output_line_to_string (cpp_reader pfile, const unsigned char dir_name)
				2768	{
				2769	const cpp_token *token;
				2770	unsigned int out = dir_name ? ustrlen (dir_name) : 0;
				2771	unsigned int alloced = 120 + out;
				2772	unsigned char result = (unsigned char ) xmalloc (alloced);
				2773
				2774	/* If DIR_NAME is empty, there are no initial contents. */
				2775	if (dir_name)
				2776	{
				2777	sprintf ((char *) result, "#%s ", dir_name);
				2778	out += 2;
				2779	}
				2780
				2781	token = cpp_get_token (pfile);
				2782	while (token->type != CPP_EOF)
				2783	{
				2784	unsigned char *last;
				2785	/* Include room for a possible space and the terminating nul. */
				2786	unsigned int len = cpp_token_len (token) + 2;
				2787
				2788	if (out + len > alloced)
				2789	{
				2790	alloced *= 2;
				2791	if (out + len > alloced)
				2792	alloced = out + len;
				2793	result = (unsigned char *) xrealloc (result, alloced);
				2794	}
				2795
				2796	last = cpp_spell_token (pfile, token, &result[out], 0);
				2797	out = last - result;
				2798
				2799	token = cpp_get_token (pfile);
				2800	if (token->flags & PREV_WHITE)
				2801	result[out++] = ' ';
				2802	}
				2803
				2804	result[out] = '\0';
				2805	return result;
				2806	}
				2807
				2808	/* Memory buffers. Changing these three constants can have a dramatic
				2809	effect on performance. The values here are reasonable defaults,
				2810	but might be tuned. If you adjust them, be sure to test across a
				2811	range of uses of cpplib, including heavy nested function-like macro
				2812	expansion. Also check the change in peak memory usage (NJAMD is a
				2813	good tool for this). */
				2814	#define MIN_BUFF_SIZE 8000
				2815	#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
				2816	#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
				2817	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
				2818
				2819	#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
				2820	#error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
				2821	#endif
				2822
				2823	/* Create a new allocation buffer. Place the control block at the end
				2824	of the buffer, so that buffer overflows will cause immediate chaos. */
				2825	static _cpp_buff *
				2826	new_buff (size_t len)
				2827	{
				2828	_cpp_buff *result;
				2829	unsigned char *base;
				2830
				2831	if (len < MIN_BUFF_SIZE)
				2832	len = MIN_BUFF_SIZE;
				2833	len = CPP_ALIGN (len);
				2834
				2835	base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
				2836	result = (_cpp_buff *) (base + len);
				2837	result->base = base;
				2838	result->cur = base;
				2839	result->limit = base + len;
				2840	result->next = NULL;
				2841	return result;
				2842	}
				2843
				2844	/* Place a chain of unwanted allocation buffers on the free list. */
				2845	void
				2846	_cpp_release_buff (cpp_reader pfile, _cpp_buff buff)
				2847	{
				2848	_cpp_buff *end = buff;
				2849
				2850	while (end->next)
				2851	end = end->next;
				2852	end->next = pfile->free_buffs;
				2853	pfile->free_buffs = buff;
				2854	}
				2855
				2856	/* Return a free buffer of size at least MIN_SIZE. */
				2857	_cpp_buff *
				2858	_cpp_get_buff (cpp_reader *pfile, size_t min_size)
				2859	{
				2860	_cpp_buff result, *p;
				2861
				2862	for (p = &pfile->free_buffs;; p = &(*p)->next)
				2863	{
				2864	size_t size;
				2865
				2866	if (*p == NULL)
				2867	return new_buff (min_size);
				2868	result = *p;
				2869	size = result->limit - result->base;
				2870	/* Return a buffer that's big enough, but don't waste one that's
				2871	way too big. */
				2872	if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
				2873	break;
				2874	}
				2875
				2876	*p = result->next;
				2877	result->next = NULL;
				2878	result->cur = result->base;
				2879	return result;
				2880	}
				2881
				2882	/* Creates a new buffer with enough space to hold the uncommitted
				2883	remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
				2884	the excess bytes to the new buffer. Chains the new buffer after
				2885	BUFF, and returns the new buffer. */
				2886	_cpp_buff *
				2887	_cpp_append_extend_buff (cpp_reader pfile, _cpp_buff buff, size_t min_extra)
				2888	{
				2889	size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
				2890	_cpp_buff *new_buff = _cpp_get_buff (pfile, size);
				2891
				2892	buff->next = new_buff;
				2893	memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
				2894	return new_buff;
				2895	}
				2896
				2897	/* Creates a new buffer with enough space to hold the uncommitted
				2898	remaining bytes of the buffer pointed to by BUFF, and at least
				2899	MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
				2900	Chains the new buffer before the buffer pointed to by BUFF, and
				2901	updates the pointer to point to the new buffer. */
				2902	void
				2903	_cpp_extend_buff (cpp_reader pfile, _cpp_buff *pbuff, size_t min_extra)
				2904	{
				2905	_cpp_buff new_buff, old_buff = *pbuff;
				2906	size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
				2907
				2908	new_buff = _cpp_get_buff (pfile, size);
				2909	memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
				2910	new_buff->next = old_buff;
				2911	*pbuff = new_buff;
				2912	}
				2913
				2914	/* Free a chain of buffers starting at BUFF. */
				2915	void
				2916	_cpp_free_buff (_cpp_buff *buff)
				2917	{
				2918	_cpp_buff *next;
				2919
				2920	for (; buff; buff = next)
				2921	{
				2922	next = buff->next;
				2923	free (buff->base);
				2924	}
				2925	}
				2926
				2927	/* Allocate permanent, unaligned storage of length LEN. */
				2928	unsigned char *
				2929	_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
				2930	{
				2931	_cpp_buff *buff = pfile->u_buff;
				2932	unsigned char *result = buff->cur;
				2933
				2934	if (len > (size_t) (buff->limit - result))
				2935	{
				2936	buff = _cpp_get_buff (pfile, len);
				2937	buff->next = pfile->u_buff;
				2938	pfile->u_buff = buff;
				2939	result = buff->cur;
				2940	}
				2941
				2942	buff->cur = result + len;
				2943	return result;
				2944	}
				2945
				2946	/* Allocate permanent, unaligned storage of length LEN from a_buff.
				2947	That buffer is used for growing allocations when saving macro
				2948	replacement lists in a #define, and when parsing an answer to an
				2949	assertion in #assert, #unassert or #if (and therefore possibly
				2950	whilst expanding macros). It therefore must not be used by any
				2951	code that they might call: specifically the lexer and the guts of
				2952	the macro expander.
				2953
				2954	All existing other uses clearly fit this restriction: storing
				2955	registered pragmas during initialization. */
				2956	unsigned char *
				2957	_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
				2958	{
				2959	_cpp_buff *buff = pfile->a_buff;
				2960	unsigned char *result = buff->cur;
				2961
				2962	if (len > (size_t) (buff->limit - result))
				2963	{
				2964	buff = _cpp_get_buff (pfile, len);
				2965	buff->next = pfile->a_buff;
				2966	pfile->a_buff = buff;
				2967	result = buff->cur;
				2968	}
				2969
				2970	buff->cur = result + len;
				2971	return result;
				2972	}
				2973
				2974	/* Say which field of TOK is in use. */
				2975
				2976	enum cpp_token_fld_kind
				2977	cpp_token_val_index (cpp_token *tok)
				2978	{
				2979	switch (TOKEN_SPELL (tok))
				2980	{
				2981	case SPELL_IDENT:
				2982	return CPP_TOKEN_FLD_NODE;
				2983	case SPELL_LITERAL:
				2984	return CPP_TOKEN_FLD_STR;
				2985	case SPELL_OPERATOR:
				2986	if (tok->type == CPP_PASTE)
				2987	return CPP_TOKEN_FLD_TOKEN_NO;
				2988	else
				2989	return CPP_TOKEN_FLD_NONE;
				2990	case SPELL_NONE:
				2991	if (tok->type == CPP_MACRO_ARG)
				2992	return CPP_TOKEN_FLD_ARG_NO;
				2993	else if (tok->type == CPP_PADDING)
				2994	return CPP_TOKEN_FLD_SOURCE;
				2995	else if (tok->type == CPP_PRAGMA)
				2996	return CPP_TOKEN_FLD_PRAGMA;
				2997	/* else fall through */
				2998	default:
				2999	return CPP_TOKEN_FLD_NONE;
				3000	}
				3001	}
				3002
				3003	/* All tokens lexed in R after calling this function will be forced to have
				3004	their source_location the same as the location referenced by P, until
				3005	cpp_stop_forcing_token_locations is called for R. */
				3006
				3007	void
				3008	cpp_force_token_locations (cpp_reader r, source_location p)
				3009	{
				3010	r->forced_token_location_p = p;
				3011	}
				3012
				3013	/* Go back to assigning locations naturally for lexed tokens. */
				3014
				3015	void
				3016	cpp_stop_forcing_token_locations (cpp_reader *r)
				3017	{
				3018	r->forced_token_location_p = NULL;
				3019	}