Blame - renderscript/clang-include/mmintrin.h - platform/prebuilts/fullsdk-darwin/build-tools/30.0.3

blob: cefd6053aa804747ea7ca7c670ad8b31de188147 [file] [log] [blame]

Chris Warrington	17cc286	2021-06-28 17:43:40 +0100	[diff] [blame]	1	/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
				2	*
				3	* Permission is hereby granted, free of charge, to any person obtaining a copy
				4	* of this software and associated documentation files (the "Software"), to deal
				5	* in the Software without restriction, including without limitation the rights
				6	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				7	* copies of the Software, and to permit persons to whom the Software is
				8	* furnished to do so, subject to the following conditions:
				9	*
				10	* The above copyright notice and this permission notice shall be included in
				11	* all copies or substantial portions of the Software.
				12	*
				13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				15	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				16	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				17	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				18	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				19	* THE SOFTWARE.
				20	*
				21	*===-----------------------------------------------------------------------===
				22	*/
				23
				24	#ifndef __MMINTRIN_H
				25	#define __MMINTRIN_H
				26
				27	typedef long long __m64 __attribute__((__vector_size__(8)));
				28
				29	typedef long long __v1di __attribute__((__vector_size__(8)));
				30	typedef int __v2si __attribute__((__vector_size__(8)));
				31	typedef short __v4hi __attribute__((__vector_size__(8)));
				32	typedef char __v8qi __attribute__((__vector_size__(8)));
				33
				34	/* Define the default attributes for the functions in this file. */
				35	#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
				36
				37	/// \brief Clears the MMX state by setting the state of the x87 stack registers
				38	/// to empty.
				39	///
				40	/// \headerfile <x86intrin.h>
				41	///
				42	/// This intrinsic corresponds to the \c EMMS instruction.
				43	///
				44	static __inline__ void __DEFAULT_FN_ATTRS
				45	_mm_empty(void)
				46	{
				47	__builtin_ia32_emms();
				48	}
				49
				50	/// \brief Constructs a 64-bit integer vector, setting the lower 32 bits to the
				51	/// value of the 32-bit integer parameter and setting the upper 32 bits to 0.
				52	///
				53	/// \headerfile <x86intrin.h>
				54	///
				55	/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
				56	///
				57	/// \param __i
				58	/// A 32-bit integer value.
				59	/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
				60	/// parameter. The upper 32 bits are set to 0.
				61	static __inline__ __m64 __DEFAULT_FN_ATTRS
				62	_mm_cvtsi32_si64(int __i)
				63	{
				64	return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
				65	}
				66
				67	/// \brief Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
				68	/// signed integer.
				69	///
				70	/// \headerfile <x86intrin.h>
				71	///
				72	/// This intrinsic corresponds to the \c VMOVD / MOVD instruction.
				73	///
				74	/// \param __m
				75	/// A 64-bit integer vector.
				76	/// \returns A 32-bit signed integer value containing the lower 32 bits of the
				77	/// parameter.
				78	static __inline__ int __DEFAULT_FN_ATTRS
				79	_mm_cvtsi64_si32(__m64 __m)
				80	{
				81	return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
				82	}
				83
				84	/// \brief Casts a 64-bit signed integer value into a 64-bit integer vector.
				85	///
				86	/// \headerfile <x86intrin.h>
				87	///
				88	/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
				89	///
				90	/// \param __i
				91	/// A 64-bit signed integer.
				92	/// \returns A 64-bit integer vector containing the same bitwise pattern as the
				93	/// parameter.
				94	static __inline__ __m64 __DEFAULT_FN_ATTRS
				95	_mm_cvtsi64_m64(long long __i)
				96	{
				97	return (__m64)__i;
				98	}
				99
				100	/// \brief Casts a 64-bit integer vector into a 64-bit signed integer value.
				101	///
				102	/// \headerfile <x86intrin.h>
				103	///
				104	/// This intrinsic corresponds to the \c VMOVQ / MOVD instruction.
				105	///
				106	/// \param __m
				107	/// A 64-bit integer vector.
				108	/// \returns A 64-bit signed integer containing the same bitwise pattern as the
				109	/// parameter.
				110	static __inline__ long long __DEFAULT_FN_ATTRS
				111	_mm_cvtm64_si64(__m64 __m)
				112	{
				113	return (long long)__m;
				114	}
				115
				116	/// \brief Converts 16-bit signed integers from both 64-bit integer vector
				117	/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
				118	/// a 64-bit integer vector of [8 x i8] as the result. Positive values
				119	/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
				120	/// are saturated to 0x80.
				121	///
				122	/// \headerfile <x86intrin.h>
				123	///
				124	/// This intrinsic corresponds to the \c PACKSSWB instruction.
				125	///
				126	/// \param __m1
				127	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
				128	/// 16-bit signed integer and is converted to an 8-bit signed integer with
				129	/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
				130	/// Negative values less than 0x80 are saturated to 0x80. The converted
				131	/// [4 x i8] values are written to the lower 32 bits of the result.
				132	/// \param __m2
				133	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
				134	/// 16-bit signed integer and is converted to an 8-bit signed integer with
				135	/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
				136	/// Negative values less than 0x80 are saturated to 0x80. The converted
				137	/// [4 x i8] values are written to the upper 32 bits of the result.
				138	/// \returns A 64-bit integer vector of [8 x i8] containing the converted
				139	/// values.
				140	static __inline__ __m64 __DEFAULT_FN_ATTRS
				141	_mm_packs_pi16(__m64 __m1, __m64 __m2)
				142	{
				143	return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
				144	}
				145
				146	/// \brief Converts 32-bit signed integers from both 64-bit integer vector
				147	/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
				148	/// a 64-bit integer vector of [4 x i16] as the result. Positive values
				149	/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
				150	/// 0x8000 are saturated to 0x8000.
				151	///
				152	/// \headerfile <x86intrin.h>
				153	///
				154	/// This intrinsic corresponds to the \c PACKSSDW instruction.
				155	///
				156	/// \param __m1
				157	/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
				158	/// 32-bit signed integer and is converted to a 16-bit signed integer with
				159	/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
				160	/// Negative values less than 0x8000 are saturated to 0x8000. The converted
				161	/// [2 x i16] values are written to the lower 32 bits of the result.
				162	/// \param __m2
				163	/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
				164	/// 32-bit signed integer and is converted to a 16-bit signed integer with
				165	/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
				166	/// Negative values less than 0x8000 are saturated to 0x8000. The converted
				167	/// [2 x i16] values are written to the upper 32 bits of the result.
				168	/// \returns A 64-bit integer vector of [4 x i16] containing the converted
				169	/// values.
				170	static __inline__ __m64 __DEFAULT_FN_ATTRS
				171	_mm_packs_pi32(__m64 __m1, __m64 __m2)
				172	{
				173	return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
				174	}
				175
				176	/// \brief Converts 16-bit signed integers from both 64-bit integer vector
				177	/// parameters of [4 x i16] into 8-bit unsigned integer values, and
				178	/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
				179	/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
				180	/// to 0.
				181	///
				182	/// \headerfile <x86intrin.h>
				183	///
				184	/// This intrinsic corresponds to the \c PACKUSWB instruction.
				185	///
				186	/// \param __m1
				187	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
				188	/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
				189	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
				190	/// than 0 are saturated to 0. The converted [4 x i8] values are written to
				191	/// the lower 32 bits of the result.
				192	/// \param __m2
				193	/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
				194	/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
				195	/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
				196	/// than 0 are saturated to 0. The converted [4 x i8] values are written to
				197	/// the upper 32 bits of the result.
				198	/// \returns A 64-bit integer vector of [8 x i8] containing the converted
				199	/// values.
				200	static __inline__ __m64 __DEFAULT_FN_ATTRS
				201	_mm_packs_pu16(__m64 __m1, __m64 __m2)
				202	{
				203	return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
				204	}
				205
				206	/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
				207	/// and interleaves them into a 64-bit integer vector of [8 x i8].
				208	///
				209	/// \headerfile <x86intrin.h>
				210	///
				211	/// This intrinsic corresponds to the \c PUNPCKHBW instruction.
				212	///
				213	/// \param __m1
				214	/// A 64-bit integer vector of [8 x i8].
				215	/// Bits [39:32] are written to bits [7:0] of the result.
				216	/// Bits [47:40] are written to bits [23:16] of the result.
				217	/// Bits [55:48] are written to bits [39:32] of the result.
				218	/// Bits [63:56] are written to bits [55:48] of the result.
				219	/// \param __m2
				220	/// A 64-bit integer vector of [8 x i8].
				221	/// Bits [39:32] are written to bits [15:8] of the result.
				222	/// Bits [47:40] are written to bits [31:24] of the result.
				223	/// Bits [55:48] are written to bits [47:40] of the result.
				224	/// Bits [63:56] are written to bits [63:56] of the result.
				225	/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
				226	/// values.
				227	static __inline__ __m64 __DEFAULT_FN_ATTRS
				228	_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
				229	{
				230	return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
				231	}
				232
				233	/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
				234	/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
				235	///
				236	/// \headerfile <x86intrin.h>
				237	///
				238	/// This intrinsic corresponds to the \c PUNPCKHWD instruction.
				239	///
				240	/// \param __m1
				241	/// A 64-bit integer vector of [4 x i16].
				242	/// Bits [47:32] are written to bits [15:0] of the result.
				243	/// Bits [63:48] are written to bits [47:32] of the result.
				244	/// \param __m2
				245	/// A 64-bit integer vector of [4 x i16].
				246	/// Bits [47:32] are written to bits [31:16] of the result.
				247	/// Bits [63:48] are written to bits [63:48] of the result.
				248	/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
				249	/// values.
				250	static __inline__ __m64 __DEFAULT_FN_ATTRS
				251	_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
				252	{
				253	return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
				254	}
				255
				256	/// \brief Unpacks the upper 32 bits from two 64-bit integer vectors of
				257	/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
				258	///
				259	/// \headerfile <x86intrin.h>
				260	///
				261	/// This intrinsic corresponds to the \c PUNPCKHDQ instruction.
				262	///
				263	/// \param __m1
				264	/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
				265	/// the lower 32 bits of the result.
				266	/// \param __m2
				267	/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
				268	/// the upper 32 bits of the result.
				269	/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
				270	/// values.
				271	static __inline__ __m64 __DEFAULT_FN_ATTRS
				272	_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
				273	{
				274	return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
				275	}
				276
				277	/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
				278	/// and interleaves them into a 64-bit integer vector of [8 x i8].
				279	///
				280	/// \headerfile <x86intrin.h>
				281	///
				282	/// This intrinsic corresponds to the \c PUNPCKLBW instruction.
				283	///
				284	/// \param __m1
				285	/// A 64-bit integer vector of [8 x i8].
				286	/// Bits [7:0] are written to bits [7:0] of the result.
				287	/// Bits [15:8] are written to bits [23:16] of the result.
				288	/// Bits [23:16] are written to bits [39:32] of the result.
				289	/// Bits [31:24] are written to bits [55:48] of the result.
				290	/// \param __m2
				291	/// A 64-bit integer vector of [8 x i8].
				292	/// Bits [7:0] are written to bits [15:8] of the result.
				293	/// Bits [15:8] are written to bits [31:24] of the result.
				294	/// Bits [23:16] are written to bits [47:40] of the result.
				295	/// Bits [31:24] are written to bits [63:56] of the result.
				296	/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
				297	/// values.
				298	static __inline__ __m64 __DEFAULT_FN_ATTRS
				299	_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
				300	{
				301	return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
				302	}
				303
				304	/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
				305	/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
				306	///
				307	/// \headerfile <x86intrin.h>
				308	///
				309	/// This intrinsic corresponds to the \c PUNPCKLWD instruction.
				310	///
				311	/// \param __m1
				312	/// A 64-bit integer vector of [4 x i16].
				313	/// Bits [15:0] are written to bits [15:0] of the result.
				314	/// Bits [31:16] are written to bits [47:32] of the result.
				315	/// \param __m2
				316	/// A 64-bit integer vector of [4 x i16].
				317	/// Bits [15:0] are written to bits [31:16] of the result.
				318	/// Bits [31:16] are written to bits [63:48] of the result.
				319	/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
				320	/// values.
				321	static __inline__ __m64 __DEFAULT_FN_ATTRS
				322	_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
				323	{
				324	return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
				325	}
				326
				327	/// \brief Unpacks the lower 32 bits from two 64-bit integer vectors of
				328	/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
				329	///
				330	/// \headerfile <x86intrin.h>
				331	///
				332	/// This intrinsic corresponds to the \c PUNPCKLDQ instruction.
				333	///
				334	/// \param __m1
				335	/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
				336	/// the lower 32 bits of the result.
				337	/// \param __m2
				338	/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
				339	/// the upper 32 bits of the result.
				340	/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
				341	/// values.
				342	static __inline__ __m64 __DEFAULT_FN_ATTRS
				343	_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
				344	{
				345	return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
				346	}
				347
				348	/// \brief Adds each 8-bit integer element of the first 64-bit integer vector
				349	/// of [8 x i8] to the corresponding 8-bit integer element of the second
				350	/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
				351	/// packed into a 64-bit integer vector of [8 x i8].
				352	///
				353	/// \headerfile <x86intrin.h>
				354	///
				355	/// This intrinsic corresponds to the \c PADDB instruction.
				356	///
				357	/// \param __m1
				358	/// A 64-bit integer vector of [8 x i8].
				359	/// \param __m2
				360	/// A 64-bit integer vector of [8 x i8].
				361	/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
				362	/// parameters.
				363	static __inline__ __m64 __DEFAULT_FN_ATTRS
				364	_mm_add_pi8(__m64 __m1, __m64 __m2)
				365	{
				366	return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
				367	}
				368
				369	/// \brief Adds each 16-bit integer element of the first 64-bit integer vector
				370	/// of [4 x i16] to the corresponding 16-bit integer element of the second
				371	/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
				372	/// packed into a 64-bit integer vector of [4 x i16].
				373	///
				374	/// \headerfile <x86intrin.h>
				375	///
				376	/// This intrinsic corresponds to the \c PADDW instruction.
				377	///
				378	/// \param __m1
				379	/// A 64-bit integer vector of [4 x i16].
				380	/// \param __m2
				381	/// A 64-bit integer vector of [4 x i16].
				382	/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
				383	/// parameters.
				384	static __inline__ __m64 __DEFAULT_FN_ATTRS
				385	_mm_add_pi16(__m64 __m1, __m64 __m2)
				386	{
				387	return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
				388	}
				389
				390	/// \brief Adds each 32-bit integer element of the first 64-bit integer vector
				391	/// of [2 x i32] to the corresponding 32-bit integer element of the second
				392	/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
				393	/// packed into a 64-bit integer vector of [2 x i32].
				394	///
				395	/// \headerfile <x86intrin.h>
				396	///
				397	/// This intrinsic corresponds to the \c PADDD instruction.
				398	///
				399	/// \param __m1
				400	/// A 64-bit integer vector of [2 x i32].
				401	/// \param __m2
				402	/// A 64-bit integer vector of [2 x i32].
				403	/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
				404	/// parameters.
				405	static __inline__ __m64 __DEFAULT_FN_ATTRS
				406	_mm_add_pi32(__m64 __m1, __m64 __m2)
				407	{
				408	return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
				409	}
				410
				411	/// \brief Adds each 8-bit signed integer element of the first 64-bit integer
				412	/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
				413	/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
				414	/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
				415	/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
				416	///
				417	/// \headerfile <x86intrin.h>
				418	///
				419	/// This intrinsic corresponds to the \c PADDSB instruction.
				420	///
				421	/// \param __m1
				422	/// A 64-bit integer vector of [8 x i8].
				423	/// \param __m2
				424	/// A 64-bit integer vector of [8 x i8].
				425	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
				426	/// of both parameters.
				427	static __inline__ __m64 __DEFAULT_FN_ATTRS
				428	_mm_adds_pi8(__m64 __m1, __m64 __m2)
				429	{
				430	return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
				431	}
				432
				433	/// \brief Adds each 16-bit signed integer element of the first 64-bit integer
				434	/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
				435	/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
				436	/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
				437	/// saturated to 0x8000. The results are packed into a 64-bit integer vector
				438	/// of [4 x i16].
				439	///
				440	/// \headerfile <x86intrin.h>
				441	///
				442	/// This intrinsic corresponds to the \c PADDSW instruction.
				443	///
				444	/// \param __m1
				445	/// A 64-bit integer vector of [4 x i16].
				446	/// \param __m2
				447	/// A 64-bit integer vector of [4 x i16].
				448	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
				449	/// of both parameters.
				450	static __inline__ __m64 __DEFAULT_FN_ATTRS
				451	_mm_adds_pi16(__m64 __m1, __m64 __m2)
				452	{
				453	return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
				454	}
				455
				456	/// \brief Adds each 8-bit unsigned integer element of the first 64-bit integer
				457	/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
				458	/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
				459	/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
				460	/// [8 x i8].
				461	///
				462	/// \headerfile <x86intrin.h>
				463	///
				464	/// This intrinsic corresponds to the \c PADDUSB instruction.
				465	///
				466	/// \param __m1
				467	/// A 64-bit integer vector of [8 x i8].
				468	/// \param __m2
				469	/// A 64-bit integer vector of [8 x i8].
				470	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
				471	/// unsigned sums of both parameters.
				472	static __inline__ __m64 __DEFAULT_FN_ATTRS
				473	_mm_adds_pu8(__m64 __m1, __m64 __m2)
				474	{
				475	return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
				476	}
				477
				478	/// \brief Adds each 16-bit unsigned integer element of the first 64-bit integer
				479	/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
				480	/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
				481	/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
				482	/// integer vector of [4 x i16].
				483	///
				484	/// \headerfile <x86intrin.h>
				485	///
				486	/// This intrinsic corresponds to the \c PADDUSW instruction.
				487	///
				488	/// \param __m1
				489	/// A 64-bit integer vector of [4 x i16].
				490	/// \param __m2
				491	/// A 64-bit integer vector of [4 x i16].
				492	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
				493	/// unsigned sums of both parameters.
				494	static __inline__ __m64 __DEFAULT_FN_ATTRS
				495	_mm_adds_pu16(__m64 __m1, __m64 __m2)
				496	{
				497	return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
				498	}
				499
				500	/// \brief Subtracts each 8-bit integer element of the second 64-bit integer
				501	/// vector of [8 x i8] from the corresponding 8-bit integer element of the
				502	/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
				503	/// are packed into a 64-bit integer vector of [8 x i8].
				504	///
				505	/// \headerfile <x86intrin.h>
				506	///
				507	/// This intrinsic corresponds to the \c PSUBB instruction.
				508	///
				509	/// \param __m1
				510	/// A 64-bit integer vector of [8 x i8] containing the minuends.
				511	/// \param __m2
				512	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
				513	/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
				514	/// both parameters.
				515	static __inline__ __m64 __DEFAULT_FN_ATTRS
				516	_mm_sub_pi8(__m64 __m1, __m64 __m2)
				517	{
				518	return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
				519	}
				520
				521	/// \brief Subtracts each 16-bit integer element of the second 64-bit integer
				522	/// vector of [4 x i16] from the corresponding 16-bit integer element of the
				523	/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
				524	/// results are packed into a 64-bit integer vector of [4 x i16].
				525	///
				526	/// \headerfile <x86intrin.h>
				527	///
				528	/// This intrinsic corresponds to the \c PSUBW instruction.
				529	///
				530	/// \param __m1
				531	/// A 64-bit integer vector of [4 x i16] containing the minuends.
				532	/// \param __m2
				533	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
				534	/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
				535	/// both parameters.
				536	static __inline__ __m64 __DEFAULT_FN_ATTRS
				537	_mm_sub_pi16(__m64 __m1, __m64 __m2)
				538	{
				539	return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
				540	}
				541
				542	/// \brief Subtracts each 32-bit integer element of the second 64-bit integer
				543	/// vector of [2 x i32] from the corresponding 32-bit integer element of the
				544	/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
				545	/// results are packed into a 64-bit integer vector of [2 x i32].
				546	///
				547	/// \headerfile <x86intrin.h>
				548	///
				549	/// This intrinsic corresponds to the \c PSUBD instruction.
				550	///
				551	/// \param __m1
				552	/// A 64-bit integer vector of [2 x i32] containing the minuends.
				553	/// \param __m2
				554	/// A 64-bit integer vector of [2 x i32] containing the subtrahends.
				555	/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
				556	/// both parameters.
				557	static __inline__ __m64 __DEFAULT_FN_ATTRS
				558	_mm_sub_pi32(__m64 __m1, __m64 __m2)
				559	{
				560	return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
				561	}
				562
				563	/// \brief Subtracts each 8-bit signed integer element of the second 64-bit
				564	/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
				565	/// element of the first 64-bit integer vector of [8 x i8]. Positive results
				566	/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
				567	/// are saturated to 0x80. The results are packed into a 64-bit integer
				568	/// vector of [8 x i8].
				569	///
				570	/// \headerfile <x86intrin.h>
				571	///
				572	/// This intrinsic corresponds to the \c PSUBSB instruction.
				573	///
				574	/// \param __m1
				575	/// A 64-bit integer vector of [8 x i8] containing the minuends.
				576	/// \param __m2
				577	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
				578	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
				579	/// differences of both parameters.
				580	static __inline__ __m64 __DEFAULT_FN_ATTRS
				581	_mm_subs_pi8(__m64 __m1, __m64 __m2)
				582	{
				583	return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
				584	}
				585
				586	/// \brief Subtracts each 16-bit signed integer element of the second 64-bit
				587	/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
				588	/// element of the first 64-bit integer vector of [4 x i16]. Positive results
				589	/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
				590	/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
				591	/// integer vector of [4 x i16].
				592	///
				593	/// \headerfile <x86intrin.h>
				594	///
				595	/// This intrinsic corresponds to the \c PSUBSW instruction.
				596	///
				597	/// \param __m1
				598	/// A 64-bit integer vector of [4 x i16] containing the minuends.
				599	/// \param __m2
				600	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
				601	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
				602	/// differences of both parameters.
				603	static __inline__ __m64 __DEFAULT_FN_ATTRS
				604	_mm_subs_pi16(__m64 __m1, __m64 __m2)
				605	{
				606	return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
				607	}
				608
				609	/// \brief Subtracts each 8-bit unsigned integer element of the second 64-bit
				610	/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
				611	/// element of the first 64-bit integer vector of [8 x i8]. If an element of
				612	/// the first vector is less than the corresponding element of the second
				613	/// vector, the result is saturated to 0. The results are packed into a
				614	/// 64-bit integer vector of [8 x i8].
				615	///
				616	/// \headerfile <x86intrin.h>
				617	///
				618	/// This intrinsic corresponds to the \c PSUBUSB instruction.
				619	///
				620	/// \param __m1
				621	/// A 64-bit integer vector of [8 x i8] containing the minuends.
				622	/// \param __m2
				623	/// A 64-bit integer vector of [8 x i8] containing the subtrahends.
				624	/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
				625	/// differences of both parameters.
				626	static __inline__ __m64 __DEFAULT_FN_ATTRS
				627	_mm_subs_pu8(__m64 __m1, __m64 __m2)
				628	{
				629	return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
				630	}
				631
				632	/// \brief Subtracts each 16-bit unsigned integer element of the second 64-bit
				633	/// integer vector of [4 x i16] from the corresponding 16-bit unsigned
				634	/// integer element of the first 64-bit integer vector of [4 x i16]. If an
				635	/// element of the first vector is less than the corresponding element of the
				636	/// second vector, the result is saturated to 0. The results are packed into
				637	/// a 64-bit integer vector of [4 x i16].
				638	///
				639	/// \headerfile <x86intrin.h>
				640	///
				641	/// This intrinsic corresponds to the \c PSUBUSW instruction.
				642	///
				643	/// \param __m1
				644	/// A 64-bit integer vector of [4 x i16] containing the minuends.
				645	/// \param __m2
				646	/// A 64-bit integer vector of [4 x i16] containing the subtrahends.
				647	/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
				648	/// differences of both parameters.
				649	static __inline__ __m64 __DEFAULT_FN_ATTRS
				650	_mm_subs_pu16(__m64 __m1, __m64 __m2)
				651	{
				652	return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
				653	}
				654
				655	/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
				656	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
				657	/// element of the second 64-bit integer vector of [4 x i16] and get four
				658	/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
				659	/// The lower 32 bits of these two sums are packed into a 64-bit integer
				660	/// vector of [2 x i32]. For example, bits [15:0] of both parameters are
				661	/// multiplied, bits [31:16] of both parameters are multiplied, and the sum
				662	/// of both results is written to bits [31:0] of the result.
				663	///
				664	/// \headerfile <x86intrin.h>
				665	///
				666	/// This intrinsic corresponds to the \c PMADDWD instruction.
				667	///
				668	/// \param __m1
				669	/// A 64-bit integer vector of [4 x i16].
				670	/// \param __m2
				671	/// A 64-bit integer vector of [4 x i16].
				672	/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
				673	/// products of both parameters.
				674	static __inline__ __m64 __DEFAULT_FN_ATTRS
				675	_mm_madd_pi16(__m64 __m1, __m64 __m2)
				676	{
				677	return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
				678	}
				679
				680	/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
				681	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
				682	/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper
				683	/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
				684	///
				685	/// \headerfile <x86intrin.h>
				686	///
				687	/// This intrinsic corresponds to the \c PMULHW instruction.
				688	///
				689	/// \param __m1
				690	/// A 64-bit integer vector of [4 x i16].
				691	/// \param __m2
				692	/// A 64-bit integer vector of [4 x i16].
				693	/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
				694	/// of the products of both parameters.
				695	static __inline__ __m64 __DEFAULT_FN_ATTRS
				696	_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
				697	{
				698	return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
				699	}
				700
				701	/// \brief Multiplies each 16-bit signed integer element of the first 64-bit
				702	/// integer vector of [4 x i16] by the corresponding 16-bit signed integer
				703	/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower
				704	/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
				705	///
				706	/// \headerfile <x86intrin.h>
				707	///
				708	/// This intrinsic corresponds to the \c PMULLW instruction.
				709	///
				710	/// \param __m1
				711	/// A 64-bit integer vector of [4 x i16].
				712	/// \param __m2
				713	/// A 64-bit integer vector of [4 x i16].
				714	/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
				715	/// of the products of both parameters.
				716	static __inline__ __m64 __DEFAULT_FN_ATTRS
				717	_mm_mullo_pi16(__m64 __m1, __m64 __m2)
				718	{
				719	return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
				720	}
				721
				722	/// \brief Left-shifts each 16-bit signed integer element of the first
				723	/// parameter, which is a 64-bit integer vector of [4 x i16], by the number
				724	/// of bits specified by the second parameter, which is a 64-bit integer. The
				725	/// lower 16 bits of the results are packed into a 64-bit integer vector of
				726	/// [4 x i16].
				727	///
				728	/// \headerfile <x86intrin.h>
				729	///
				730	/// This intrinsic corresponds to the \c PSLLW instruction.
				731	///
				732	/// \param __m
				733	/// A 64-bit integer vector of [4 x i16].
				734	/// \param __count
				735	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				736	/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
				737	/// values. If __count is greater or equal to 16, the result is set to all 0.
				738	static __inline__ __m64 __DEFAULT_FN_ATTRS
				739	_mm_sll_pi16(__m64 __m, __m64 __count)
				740	{
				741	return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
				742	}
				743
				744	/// \brief Left-shifts each 16-bit signed integer element of a 64-bit integer
				745	/// vector of [4 x i16] by the number of bits specified by a 32-bit integer.
				746	/// The lower 16 bits of the results are packed into a 64-bit integer vector
				747	/// of [4 x i16].
				748	///
				749	/// \headerfile <x86intrin.h>
				750	///
				751	/// This intrinsic corresponds to the \c PSLLW instruction.
				752	///
				753	/// \param __m
				754	/// A 64-bit integer vector of [4 x i16].
				755	/// \param __count
				756	/// A 32-bit integer value.
				757	/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
				758	/// values. If __count is greater or equal to 16, the result is set to all 0.
				759	static __inline__ __m64 __DEFAULT_FN_ATTRS
				760	_mm_slli_pi16(__m64 __m, int __count)
				761	{
				762	return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
				763	}
				764
				765	/// \brief Left-shifts each 32-bit signed integer element of the first
				766	/// parameter, which is a 64-bit integer vector of [2 x i32], by the number
				767	/// of bits specified by the second parameter, which is a 64-bit integer. The
				768	/// lower 32 bits of the results are packed into a 64-bit integer vector of
				769	/// [2 x i32].
				770	///
				771	/// \headerfile <x86intrin.h>
				772	///
				773	/// This intrinsic corresponds to the \c PSLLD instruction.
				774	///
				775	/// \param __m
				776	/// A 64-bit integer vector of [2 x i32].
				777	/// \param __count
				778	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				779	/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
				780	/// values. If __count is greater or equal to 32, the result is set to all 0.
				781	static __inline__ __m64 __DEFAULT_FN_ATTRS
				782	_mm_sll_pi32(__m64 __m, __m64 __count)
				783	{
				784	return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
				785	}
				786
				787	/// \brief Left-shifts each 32-bit signed integer element of a 64-bit integer
				788	/// vector of [2 x i32] by the number of bits specified by a 32-bit integer.
				789	/// The lower 32 bits of the results are packed into a 64-bit integer vector
				790	/// of [2 x i32].
				791	///
				792	/// \headerfile <x86intrin.h>
				793	///
				794	/// This intrinsic corresponds to the \c PSLLD instruction.
				795	///
				796	/// \param __m
				797	/// A 64-bit integer vector of [2 x i32].
				798	/// \param __count
				799	/// A 32-bit integer value.
				800	/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
				801	/// values. If __count is greater or equal to 32, the result is set to all 0.
				802	static __inline__ __m64 __DEFAULT_FN_ATTRS
				803	_mm_slli_pi32(__m64 __m, int __count)
				804	{
				805	return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
				806	}
				807
				808	/// \brief Left-shifts the first 64-bit integer parameter by the number of bits
				809	/// specified by the second 64-bit integer parameter. The lower 64 bits of
				810	/// result are returned.
				811	///
				812	/// \headerfile <x86intrin.h>
				813	///
				814	/// This intrinsic corresponds to the \c PSLLQ instruction.
				815	///
				816	/// \param __m
				817	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				818	/// \param __count
				819	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				820	/// \returns A 64-bit integer vector containing the left-shifted value. If
				821	/// __count is greater or equal to 64, the result is set to 0.
				822	static __inline__ __m64 __DEFAULT_FN_ATTRS
				823	_mm_sll_si64(__m64 __m, __m64 __count)
				824	{
				825	return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
				826	}
				827
				828	/// \brief Left-shifts the first parameter, which is a 64-bit integer, by the
				829	/// number of bits specified by the second parameter, which is a 32-bit
				830	/// integer. The lower 64 bits of result are returned.
				831	///
				832	/// \headerfile <x86intrin.h>
				833	///
				834	/// This intrinsic corresponds to the \c PSLLQ instruction.
				835	///
				836	/// \param __m
				837	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				838	/// \param __count
				839	/// A 32-bit integer value.
				840	/// \returns A 64-bit integer vector containing the left-shifted value. If
				841	/// __count is greater or equal to 64, the result is set to 0.
				842	static __inline__ __m64 __DEFAULT_FN_ATTRS
				843	_mm_slli_si64(__m64 __m, int __count)
				844	{
				845	return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
				846	}
				847
				848	/// \brief Right-shifts each 16-bit integer element of the first parameter,
				849	/// which is a 64-bit integer vector of [4 x i16], by the number of bits
				850	/// specified by the second parameter, which is a 64-bit integer. High-order
				851	/// bits are filled with the sign bit of the initial value of each 16-bit
				852	/// element. The 16-bit results are packed into a 64-bit integer vector of
				853	/// [4 x i16].
				854	///
				855	/// \headerfile <x86intrin.h>
				856	///
				857	/// This intrinsic corresponds to the \c PSRAW instruction.
				858	///
				859	/// \param __m
				860	/// A 64-bit integer vector of [4 x i16].
				861	/// \param __count
				862	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				863	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
				864	/// values.
				865	static __inline__ __m64 __DEFAULT_FN_ATTRS
				866	_mm_sra_pi16(__m64 __m, __m64 __count)
				867	{
				868	return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
				869	}
				870
				871	/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
				872	/// of [4 x i16] by the number of bits specified by a 32-bit integer.
				873	/// High-order bits are filled with the sign bit of the initial value of each
				874	/// 16-bit element. The 16-bit results are packed into a 64-bit integer
				875	/// vector of [4 x i16].
				876	///
				877	/// \headerfile <x86intrin.h>
				878	///
				879	/// This intrinsic corresponds to the \c PSRAW instruction.
				880	///
				881	/// \param __m
				882	/// A 64-bit integer vector of [4 x i16].
				883	/// \param __count
				884	/// A 32-bit integer value.
				885	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
				886	/// values.
				887	static __inline__ __m64 __DEFAULT_FN_ATTRS
				888	_mm_srai_pi16(__m64 __m, int __count)
				889	{
				890	return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
				891	}
				892
				893	/// \brief Right-shifts each 32-bit integer element of the first parameter,
				894	/// which is a 64-bit integer vector of [2 x i32], by the number of bits
				895	/// specified by the second parameter, which is a 64-bit integer. High-order
				896	/// bits are filled with the sign bit of the initial value of each 32-bit
				897	/// element. The 32-bit results are packed into a 64-bit integer vector of
				898	/// [2 x i32].
				899	///
				900	/// \headerfile <x86intrin.h>
				901	///
				902	/// This intrinsic corresponds to the \c PSRAD instruction.
				903	///
				904	/// \param __m
				905	/// A 64-bit integer vector of [2 x i32].
				906	/// \param __count
				907	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				908	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
				909	/// values.
				910	static __inline__ __m64 __DEFAULT_FN_ATTRS
				911	_mm_sra_pi32(__m64 __m, __m64 __count)
				912	{
				913	return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
				914	}
				915
				916	/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
				917	/// of [2 x i32] by the number of bits specified by a 32-bit integer.
				918	/// High-order bits are filled with the sign bit of the initial value of each
				919	/// 32-bit element. The 32-bit results are packed into a 64-bit integer
				920	/// vector of [2 x i32].
				921	///
				922	/// \headerfile <x86intrin.h>
				923	///
				924	/// This intrinsic corresponds to the \c PSRAD instruction.
				925	///
				926	/// \param __m
				927	/// A 64-bit integer vector of [2 x i32].
				928	/// \param __count
				929	/// A 32-bit integer value.
				930	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
				931	/// values.
				932	static __inline__ __m64 __DEFAULT_FN_ATTRS
				933	_mm_srai_pi32(__m64 __m, int __count)
				934	{
				935	return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
				936	}
				937
				938	/// \brief Right-shifts each 16-bit integer element of the first parameter,
				939	/// which is a 64-bit integer vector of [4 x i16], by the number of bits
				940	/// specified by the second parameter, which is a 64-bit integer. High-order
				941	/// bits are cleared. The 16-bit results are packed into a 64-bit integer
				942	/// vector of [4 x i16].
				943	///
				944	/// \headerfile <x86intrin.h>
				945	///
				946	/// This intrinsic corresponds to the \c PSRLW instruction.
				947	///
				948	/// \param __m
				949	/// A 64-bit integer vector of [4 x i16].
				950	/// \param __count
				951	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				952	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
				953	/// values.
				954	static __inline__ __m64 __DEFAULT_FN_ATTRS
				955	_mm_srl_pi16(__m64 __m, __m64 __count)
				956	{
				957	return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
				958	}
				959
				960	/// \brief Right-shifts each 16-bit integer element of a 64-bit integer vector
				961	/// of [4 x i16] by the number of bits specified by a 32-bit integer.
				962	/// High-order bits are cleared. The 16-bit results are packed into a 64-bit
				963	/// integer vector of [4 x i16].
				964	///
				965	/// \headerfile <x86intrin.h>
				966	///
				967	/// This intrinsic corresponds to the \c PSRLW instruction.
				968	///
				969	/// \param __m
				970	/// A 64-bit integer vector of [4 x i16].
				971	/// \param __count
				972	/// A 32-bit integer value.
				973	/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
				974	/// values.
				975	static __inline__ __m64 __DEFAULT_FN_ATTRS
				976	_mm_srli_pi16(__m64 __m, int __count)
				977	{
				978	return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
				979	}
				980
				981	/// \brief Right-shifts each 32-bit integer element of the first parameter,
				982	/// which is a 64-bit integer vector of [2 x i32], by the number of bits
				983	/// specified by the second parameter, which is a 64-bit integer. High-order
				984	/// bits are cleared. The 32-bit results are packed into a 64-bit integer
				985	/// vector of [2 x i32].
				986	///
				987	/// \headerfile <x86intrin.h>
				988	///
				989	/// This intrinsic corresponds to the \c PSRLD instruction.
				990	///
				991	/// \param __m
				992	/// A 64-bit integer vector of [2 x i32].
				993	/// \param __count
				994	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				995	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
				996	/// values.
				997	static __inline__ __m64 __DEFAULT_FN_ATTRS
				998	_mm_srl_pi32(__m64 __m, __m64 __count)
				999	{
				1000	return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
				1001	}
				1002
				1003	/// \brief Right-shifts each 32-bit integer element of a 64-bit integer vector
				1004	/// of [2 x i32] by the number of bits specified by a 32-bit integer.
				1005	/// High-order bits are cleared. The 32-bit results are packed into a 64-bit
				1006	/// integer vector of [2 x i32].
				1007	///
				1008	/// \headerfile <x86intrin.h>
				1009	///
				1010	/// This intrinsic corresponds to the \c PSRLD instruction.
				1011	///
				1012	/// \param __m
				1013	/// A 64-bit integer vector of [2 x i32].
				1014	/// \param __count
				1015	/// A 32-bit integer value.
				1016	/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
				1017	/// values.
				1018	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1019	_mm_srli_pi32(__m64 __m, int __count)
				1020	{
				1021	return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
				1022	}
				1023
				1024	/// \brief Right-shifts the first 64-bit integer parameter by the number of bits
				1025	/// specified by the second 64-bit integer parameter. High-order bits are
				1026	/// cleared.
				1027	///
				1028	/// \headerfile <x86intrin.h>
				1029	///
				1030	/// This intrinsic corresponds to the \c PSRLQ instruction.
				1031	///
				1032	/// \param __m
				1033	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				1034	/// \param __count
				1035	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				1036	/// \returns A 64-bit integer vector containing the right-shifted value.
				1037	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1038	_mm_srl_si64(__m64 __m, __m64 __count)
				1039	{
				1040	return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
				1041	}
				1042
				1043	/// \brief Right-shifts the first parameter, which is a 64-bit integer, by the
				1044	/// number of bits specified by the second parameter, which is a 32-bit
				1045	/// integer. High-order bits are cleared.
				1046	///
				1047	/// \headerfile <x86intrin.h>
				1048	///
				1049	/// This intrinsic corresponds to the \c PSRLQ instruction.
				1050	///
				1051	/// \param __m
				1052	/// A 64-bit integer vector interpreted as a single 64-bit integer.
				1053	/// \param __count
				1054	/// A 32-bit integer value.
				1055	/// \returns A 64-bit integer vector containing the right-shifted value.
				1056	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1057	_mm_srli_si64(__m64 __m, int __count)
				1058	{
				1059	return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
				1060	}
				1061
				1062	/// \brief Performs a bitwise AND of two 64-bit integer vectors.
				1063	///
				1064	/// \headerfile <x86intrin.h>
				1065	///
				1066	/// This intrinsic corresponds to the \c PAND instruction.
				1067	///
				1068	/// \param __m1
				1069	/// A 64-bit integer vector.
				1070	/// \param __m2
				1071	/// A 64-bit integer vector.
				1072	/// \returns A 64-bit integer vector containing the bitwise AND of both
				1073	/// parameters.
				1074	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1075	_mm_and_si64(__m64 __m1, __m64 __m2)
				1076	{
				1077	return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
				1078	}
				1079
				1080	/// \brief Performs a bitwise NOT of the first 64-bit integer vector, and then
				1081	/// performs a bitwise AND of the intermediate result and the second 64-bit
				1082	/// integer vector.
				1083	///
				1084	/// \headerfile <x86intrin.h>
				1085	///
				1086	/// This intrinsic corresponds to the \c PANDN instruction.
				1087	///
				1088	/// \param __m1
				1089	/// A 64-bit integer vector. The one's complement of this parameter is used
				1090	/// in the bitwise AND.
				1091	/// \param __m2
				1092	/// A 64-bit integer vector.
				1093	/// \returns A 64-bit integer vector containing the bitwise AND of the second
				1094	/// parameter and the one's complement of the first parameter.
				1095	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1096	_mm_andnot_si64(__m64 __m1, __m64 __m2)
				1097	{
				1098	return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
				1099	}
				1100
				1101	/// \brief Performs a bitwise OR of two 64-bit integer vectors.
				1102	///
				1103	/// \headerfile <x86intrin.h>
				1104	///
				1105	/// This intrinsic corresponds to the \c POR instruction.
				1106	///
				1107	/// \param __m1
				1108	/// A 64-bit integer vector.
				1109	/// \param __m2
				1110	/// A 64-bit integer vector.
				1111	/// \returns A 64-bit integer vector containing the bitwise OR of both
				1112	/// parameters.
				1113	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1114	_mm_or_si64(__m64 __m1, __m64 __m2)
				1115	{
				1116	return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
				1117	}
				1118
				1119	/// \brief Performs a bitwise exclusive OR of two 64-bit integer vectors.
				1120	///
				1121	/// \headerfile <x86intrin.h>
				1122	///
				1123	/// This intrinsic corresponds to the \c PXOR instruction.
				1124	///
				1125	/// \param __m1
				1126	/// A 64-bit integer vector.
				1127	/// \param __m2
				1128	/// A 64-bit integer vector.
				1129	/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
				1130	/// parameters.
				1131	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1132	_mm_xor_si64(__m64 __m1, __m64 __m2)
				1133	{
				1134	return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
				1135	}
				1136
				1137	/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
				1138	/// [8 x i8] to determine if the element of the first vector is equal to the
				1139	/// corresponding element of the second vector. The comparison yields 0 for
				1140	/// false, 0xFF for true.
				1141	///
				1142	/// \headerfile <x86intrin.h>
				1143	///
				1144	/// This intrinsic corresponds to the \c PCMPEQB instruction.
				1145	///
				1146	/// \param __m1
				1147	/// A 64-bit integer vector of [8 x i8].
				1148	/// \param __m2
				1149	/// A 64-bit integer vector of [8 x i8].
				1150	/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
				1151	/// results.
				1152	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1153	_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
				1154	{
				1155	return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
				1156	}
				1157
				1158	/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
				1159	/// [4 x i16] to determine if the element of the first vector is equal to the
				1160	/// corresponding element of the second vector. The comparison yields 0 for
				1161	/// false, 0xFFFF for true.
				1162	///
				1163	/// \headerfile <x86intrin.h>
				1164	///
				1165	/// This intrinsic corresponds to the \c PCMPEQW instruction.
				1166	///
				1167	/// \param __m1
				1168	/// A 64-bit integer vector of [4 x i16].
				1169	/// \param __m2
				1170	/// A 64-bit integer vector of [4 x i16].
				1171	/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
				1172	/// results.
				1173	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1174	_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
				1175	{
				1176	return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
				1177	}
				1178
				1179	/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
				1180	/// [2 x i32] to determine if the element of the first vector is equal to the
				1181	/// corresponding element of the second vector. The comparison yields 0 for
				1182	/// false, 0xFFFFFFFF for true.
				1183	///
				1184	/// \headerfile <x86intrin.h>
				1185	///
				1186	/// This intrinsic corresponds to the \c PCMPEQD instruction.
				1187	///
				1188	/// \param __m1
				1189	/// A 64-bit integer vector of [2 x i32].
				1190	/// \param __m2
				1191	/// A 64-bit integer vector of [2 x i32].
				1192	/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
				1193	/// results.
				1194	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1195	_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
				1196	{
				1197	return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
				1198	}
				1199
				1200	/// \brief Compares the 8-bit integer elements of two 64-bit integer vectors of
				1201	/// [8 x i8] to determine if the element of the first vector is greater than
				1202	/// the corresponding element of the second vector. The comparison yields 0
				1203	/// for false, 0xFF for true.
				1204	///
				1205	/// \headerfile <x86intrin.h>
				1206	///
				1207	/// This intrinsic corresponds to the \c PCMPGTB instruction.
				1208	///
				1209	/// \param __m1
				1210	/// A 64-bit integer vector of [8 x i8].
				1211	/// \param __m2
				1212	/// A 64-bit integer vector of [8 x i8].
				1213	/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
				1214	/// results.
				1215	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1216	_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
				1217	{
				1218	return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
				1219	}
				1220
				1221	/// \brief Compares the 16-bit integer elements of two 64-bit integer vectors of
				1222	/// [4 x i16] to determine if the element of the first vector is greater than
				1223	/// the corresponding element of the second vector. The comparison yields 0
				1224	/// for false, 0xFFFF for true.
				1225	///
				1226	/// \headerfile <x86intrin.h>
				1227	///
				1228	/// This intrinsic corresponds to the \c PCMPGTW instruction.
				1229	///
				1230	/// \param __m1
				1231	/// A 64-bit integer vector of [4 x i16].
				1232	/// \param __m2
				1233	/// A 64-bit integer vector of [4 x i16].
				1234	/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
				1235	/// results.
				1236	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1237	_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
				1238	{
				1239	return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
				1240	}
				1241
				1242	/// \brief Compares the 32-bit integer elements of two 64-bit integer vectors of
				1243	/// [2 x i32] to determine if the element of the first vector is greater than
				1244	/// the corresponding element of the second vector. The comparison yields 0
				1245	/// for false, 0xFFFFFFFF for true.
				1246	///
				1247	/// \headerfile <x86intrin.h>
				1248	///
				1249	/// This intrinsic corresponds to the \c PCMPGTD instruction.
				1250	///
				1251	/// \param __m1
				1252	/// A 64-bit integer vector of [2 x i32].
				1253	/// \param __m2
				1254	/// A 64-bit integer vector of [2 x i32].
				1255	/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
				1256	/// results.
				1257	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1258	_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
				1259	{
				1260	return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
				1261	}
				1262
				1263	/// \brief Constructs a 64-bit integer vector initialized to zero.
				1264	///
				1265	/// \headerfile <x86intrin.h>
				1266	///
				1267	/// This intrinsic corresponds to the the \c VXORPS / XORPS instruction.
				1268	///
				1269	/// \returns An initialized 64-bit integer vector with all elements set to zero.
				1270	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1271	_mm_setzero_si64(void)
				1272	{
				1273	return (__m64){ 0LL };
				1274	}
				1275
				1276	/// \brief Constructs a 64-bit integer vector initialized with the specified
				1277	/// 32-bit integer values.
				1278	///
				1279	/// \headerfile <x86intrin.h>
				1280	///
				1281	/// This intrinsic is a utility function and does not correspond to a specific
				1282	/// instruction.
				1283	///
				1284	/// \param __i1
				1285	/// A 32-bit integer value used to initialize the upper 32 bits of the
				1286	/// result.
				1287	/// \param __i0
				1288	/// A 32-bit integer value used to initialize the lower 32 bits of the
				1289	/// result.
				1290	/// \returns An initialized 64-bit integer vector.
				1291	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1292	_mm_set_pi32(int __i1, int __i0)
				1293	{
				1294	return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
				1295	}
				1296
				1297	/// \brief Constructs a 64-bit integer vector initialized with the specified
				1298	/// 16-bit integer values.
				1299	///
				1300	/// \headerfile <x86intrin.h>
				1301	///
				1302	/// This intrinsic is a utility function and does not correspond to a specific
				1303	/// instruction.
				1304	///
				1305	/// \param __s3
				1306	/// A 16-bit integer value used to initialize bits [63:48] of the result.
				1307	/// \param __s2
				1308	/// A 16-bit integer value used to initialize bits [47:32] of the result.
				1309	/// \param __s1
				1310	/// A 16-bit integer value used to initialize bits [31:16] of the result.
				1311	/// \param __s0
				1312	/// A 16-bit integer value used to initialize bits [15:0] of the result.
				1313	/// \returns An initialized 64-bit integer vector.
				1314	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1315	_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
				1316	{
				1317	return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
				1318	}
				1319
				1320	/// \brief Constructs a 64-bit integer vector initialized with the specified
				1321	/// 8-bit integer values.
				1322	///
				1323	/// \headerfile <x86intrin.h>
				1324	///
				1325	/// This intrinsic is a utility function and does not correspond to a specific
				1326	/// instruction.
				1327	///
				1328	/// \param __b7
				1329	/// An 8-bit integer value used to initialize bits [63:56] of the result.
				1330	/// \param __b6
				1331	/// An 8-bit integer value used to initialize bits [55:48] of the result.
				1332	/// \param __b5
				1333	/// An 8-bit integer value used to initialize bits [47:40] of the result.
				1334	/// \param __b4
				1335	/// An 8-bit integer value used to initialize bits [39:32] of the result.
				1336	/// \param __b3
				1337	/// An 8-bit integer value used to initialize bits [31:24] of the result.
				1338	/// \param __b2
				1339	/// An 8-bit integer value used to initialize bits [23:16] of the result.
				1340	/// \param __b1
				1341	/// An 8-bit integer value used to initialize bits [15:8] of the result.
				1342	/// \param __b0
				1343	/// An 8-bit integer value used to initialize bits [7:0] of the result.
				1344	/// \returns An initialized 64-bit integer vector.
				1345	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1346	_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
				1347	char __b1, char __b0)
				1348	{
				1349	return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
				1350	__b4, __b5, __b6, __b7);
				1351	}
				1352
				1353	/// \brief Constructs a 64-bit integer vector of [2 x i32], with each of the
				1354	/// 32-bit integer vector elements set to the specified 32-bit integer
				1355	/// value.
				1356	///
				1357	/// \headerfile <x86intrin.h>
				1358	///
				1359	/// This intrinsic corresponds to the \c VPSHUFD / PSHUFD instruction.
				1360	///
				1361	/// \param __i
				1362	/// A 32-bit integer value used to initialize each vector element of the
				1363	/// result.
				1364	/// \returns An initialized 64-bit integer vector of [2 x i32].
				1365	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1366	_mm_set1_pi32(int __i)
				1367	{
				1368	return _mm_set_pi32(__i, __i);
				1369	}
				1370
				1371	/// \brief Constructs a 64-bit integer vector of [4 x i16], with each of the
				1372	/// 16-bit integer vector elements set to the specified 16-bit integer
				1373	/// value.
				1374	///
				1375	/// \headerfile <x86intrin.h>
				1376	///
				1377	/// This intrinsic corresponds to the \c VPSHUFLW / PSHUFLW instruction.
				1378	///
				1379	/// \param __w
				1380	/// A 16-bit integer value used to initialize each vector element of the
				1381	/// result.
				1382	/// \returns An initialized 64-bit integer vector of [4 x i16].
				1383	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1384	_mm_set1_pi16(short __w)
				1385	{
				1386	return _mm_set_pi16(__w, __w, __w, __w);
				1387	}
				1388
				1389	/// \brief Constructs a 64-bit integer vector of [8 x i8], with each of the
				1390	/// 8-bit integer vector elements set to the specified 8-bit integer value.
				1391	///
				1392	/// \headerfile <x86intrin.h>
				1393	///
				1394	/// This intrinsic corresponds to the \c VPUNPCKLBW + VPSHUFLW / \c PUNPCKLBW +
				1395	/// PSHUFLW instruction.
				1396	///
				1397	/// \param __b
				1398	/// An 8-bit integer value used to initialize each vector element of the
				1399	/// result.
				1400	/// \returns An initialized 64-bit integer vector of [8 x i8].
				1401	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1402	_mm_set1_pi8(char __b)
				1403	{
				1404	return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
				1405	}
				1406
				1407	/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
				1408	/// the specified 32-bit integer values.
				1409	///
				1410	/// \headerfile <x86intrin.h>
				1411	///
				1412	/// This intrinsic is a utility function and does not correspond to a specific
				1413	/// instruction.
				1414	///
				1415	/// \param __i0
				1416	/// A 32-bit integer value used to initialize the lower 32 bits of the
				1417	/// result.
				1418	/// \param __i1
				1419	/// A 32-bit integer value used to initialize the upper 32 bits of the
				1420	/// result.
				1421	/// \returns An initialized 64-bit integer vector.
				1422	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1423	_mm_setr_pi32(int __i0, int __i1)
				1424	{
				1425	return _mm_set_pi32(__i1, __i0);
				1426	}
				1427
				1428	/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
				1429	/// the specified 16-bit integer values.
				1430	///
				1431	/// \headerfile <x86intrin.h>
				1432	///
				1433	/// This intrinsic is a utility function and does not correspond to a specific
				1434	/// instruction.
				1435	///
				1436	/// \param __w0
				1437	/// A 16-bit integer value used to initialize bits [15:0] of the result.
				1438	/// \param __w1
				1439	/// A 16-bit integer value used to initialize bits [31:16] of the result.
				1440	/// \param __w2
				1441	/// A 16-bit integer value used to initialize bits [47:32] of the result.
				1442	/// \param __w3
				1443	/// A 16-bit integer value used to initialize bits [63:48] of the result.
				1444	/// \returns An initialized 64-bit integer vector.
				1445	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1446	_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
				1447	{
				1448	return _mm_set_pi16(__w3, __w2, __w1, __w0);
				1449	}
				1450
				1451	/// \brief Constructs a 64-bit integer vector, initialized in reverse order with
				1452	/// the specified 8-bit integer values.
				1453	///
				1454	/// \headerfile <x86intrin.h>
				1455	///
				1456	/// This intrinsic is a utility function and does not correspond to a specific
				1457	/// instruction.
				1458	///
				1459	/// \param __b0
				1460	/// An 8-bit integer value used to initialize bits [7:0] of the result.
				1461	/// \param __b1
				1462	/// An 8-bit integer value used to initialize bits [15:8] of the result.
				1463	/// \param __b2
				1464	/// An 8-bit integer value used to initialize bits [23:16] of the result.
				1465	/// \param __b3
				1466	/// An 8-bit integer value used to initialize bits [31:24] of the result.
				1467	/// \param __b4
				1468	/// An 8-bit integer value used to initialize bits [39:32] of the result.
				1469	/// \param __b5
				1470	/// An 8-bit integer value used to initialize bits [47:40] of the result.
				1471	/// \param __b6
				1472	/// An 8-bit integer value used to initialize bits [55:48] of the result.
				1473	/// \param __b7
				1474	/// An 8-bit integer value used to initialize bits [63:56] of the result.
				1475	/// \returns An initialized 64-bit integer vector.
				1476	static __inline__ __m64 __DEFAULT_FN_ATTRS
				1477	_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
				1478	char __b6, char __b7)
				1479	{
				1480	return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
				1481	}
				1482
				1483	#undef __DEFAULT_FN_ATTRS
				1484
				1485	/* Aliases for compatibility. */
				1486	#define _m_empty _mm_empty
				1487	#define _m_from_int _mm_cvtsi32_si64
				1488	#define _m_from_int64 _mm_cvtsi64_m64
				1489	#define _m_to_int _mm_cvtsi64_si32
				1490	#define _m_to_int64 _mm_cvtm64_si64
				1491	#define _m_packsswb _mm_packs_pi16
				1492	#define _m_packssdw _mm_packs_pi32
				1493	#define _m_packuswb _mm_packs_pu16
				1494	#define _m_punpckhbw _mm_unpackhi_pi8
				1495	#define _m_punpckhwd _mm_unpackhi_pi16
				1496	#define _m_punpckhdq _mm_unpackhi_pi32
				1497	#define _m_punpcklbw _mm_unpacklo_pi8
				1498	#define _m_punpcklwd _mm_unpacklo_pi16
				1499	#define _m_punpckldq _mm_unpacklo_pi32
				1500	#define _m_paddb _mm_add_pi8
				1501	#define _m_paddw _mm_add_pi16
				1502	#define _m_paddd _mm_add_pi32
				1503	#define _m_paddsb _mm_adds_pi8
				1504	#define _m_paddsw _mm_adds_pi16
				1505	#define _m_paddusb _mm_adds_pu8
				1506	#define _m_paddusw _mm_adds_pu16
				1507	#define _m_psubb _mm_sub_pi8
				1508	#define _m_psubw _mm_sub_pi16
				1509	#define _m_psubd _mm_sub_pi32
				1510	#define _m_psubsb _mm_subs_pi8
				1511	#define _m_psubsw _mm_subs_pi16
				1512	#define _m_psubusb _mm_subs_pu8
				1513	#define _m_psubusw _mm_subs_pu16
				1514	#define _m_pmaddwd _mm_madd_pi16
				1515	#define _m_pmulhw _mm_mulhi_pi16
				1516	#define _m_pmullw _mm_mullo_pi16
				1517	#define _m_psllw _mm_sll_pi16
				1518	#define _m_psllwi _mm_slli_pi16
				1519	#define _m_pslld _mm_sll_pi32
				1520	#define _m_pslldi _mm_slli_pi32
				1521	#define _m_psllq _mm_sll_si64
				1522	#define _m_psllqi _mm_slli_si64
				1523	#define _m_psraw _mm_sra_pi16
				1524	#define _m_psrawi _mm_srai_pi16
				1525	#define _m_psrad _mm_sra_pi32
				1526	#define _m_psradi _mm_srai_pi32
				1527	#define _m_psrlw _mm_srl_pi16
				1528	#define _m_psrlwi _mm_srli_pi16
				1529	#define _m_psrld _mm_srl_pi32
				1530	#define _m_psrldi _mm_srli_pi32
				1531	#define _m_psrlq _mm_srl_si64
				1532	#define _m_psrlqi _mm_srli_si64
				1533	#define _m_pand _mm_and_si64
				1534	#define _m_pandn _mm_andnot_si64
				1535	#define _m_por _mm_or_si64
				1536	#define _m_pxor _mm_xor_si64
				1537	#define _m_pcmpeqb _mm_cmpeq_pi8
				1538	#define _m_pcmpeqw _mm_cmpeq_pi16
				1539	#define _m_pcmpeqd _mm_cmpeq_pi32
				1540	#define _m_pcmpgtb _mm_cmpgt_pi8
				1541	#define _m_pcmpgtw _mm_cmpgt_pi16
				1542	#define _m_pcmpgtd _mm_cmpgt_pi32
				1543
				1544	#endif /* __MMINTRIN_H */
				1545