Blame - src/harfbuzz-khmer.c - platform/external/harfbuzz

blob: 958069e8669d41c8f2bad225f72b05e0060d3b41 [file] [log] [blame]

claireho	5569331	2010-04-26 13:43:16 -0700	[diff] [blame]	1	/*
				2	* Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
				3	*
				4	* This is part of HarfBuzz, an OpenType Layout engine library.
				5	*
				6	* Permission is hereby granted, without written agreement and without
				7	* license or royalty fees, to use, copy, modify, and distribute this
				8	* software and its documentation for any purpose, provided that the
				9	* above copyright notice and the following two paragraphs appear in
				10	* all copies of this software.
				11	*
				12	* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
				13	* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
				14	* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
				15	* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
				16	* DAMAGE.
				17	*
				18	* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
				19	* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
				20	* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
				21	* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
				22	* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
				23	*/
				24
				25	#include "harfbuzz-shaper.h"
				26	#include "harfbuzz-shaper-private.h"
				27
				28	#include <assert.h>
				29	#include <stdio.h>
				30
				31	/*
				32	// Vocabulary
				33	// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the
				34	// center of the syllable, it can be surrounded by coeng (subscript) consonants, vowels,
				35	// split vowels, signs... but there is only one base in a syllable, it has to be coded as
				36	// the first character of the syllable.
				37	// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
				38	// Khmer language has five of them. Khmer split vowels either have one part before the
				39	// base and one after the base or they have a part before the base and a part above the base.
				40	// The first part of all Khmer split vowels is the same character, identical to
				41	// the glyph of Khmer dependent vowel SRA EI
				42	// coeng --> modifier used in Khmer to construct coeng (subscript) consonants
				43	// Differently than indian languages, the coeng modifies the consonant that follows it,
				44	// not the one preceding it Each consonant has two forms, the base form and the subscript form
				45	// the base form is the normal one (using the consonants code-point), the subscript form is
				46	// displayed when the combination coeng + consonant is encountered.
				47	// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
				48	// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
				49	// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
				50	// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
				51	// if it is attached to a consonant of the first series or a consonant of the second series
				52	// Most consonants have an equivalent in the other series, but some of theme exist only in
				53	// one series (for example SA). If we want to use the consonant SA with a vowel sound that
				54	// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
				55	// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
				56	// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
				57	// MUSIKATOAN a second series consonant to have a first series vowel sound.
				58	// Consonant shifter are both normally supercript marks, but, when they are followed by a
				59	// superscript, they change shape and take the form of subscript dependent vowel SRA U.
				60	// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
				61	// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
				62	// be placed after the coeng consonant.
				63	// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base
				64	// Each vowel has its own position. Only one vowel per syllable is allowed.
				65	// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are
				66	// Allowed in a syllable.
				67	//
				68	//
				69	// order is important here! This order must be the same that is found in each horizontal
				70	// line in the statetable for Khmer (see khmerStateTable) .
				71	*/
				72	enum KhmerCharClassValues {
				73	CC_RESERVED = 0,
				74	CC_CONSONANT = 1, /* Consonant of type 1 or independent vowel */
				75	CC_CONSONANT2 = 2, /* Consonant of type 2 */
				76	CC_CONSONANT3 = 3, /* Consonant of type 3 */
				77	CC_ZERO_WIDTH_NJ_MARK = 4, /* Zero Width non joiner character (0x200C) */
				78	CC_CONSONANT_SHIFTER = 5,
				79	CC_ROBAT = 6, /* Khmer special diacritic accent -treated differently in state table */
				80	CC_COENG = 7, /* Subscript consonant combining character */
				81	CC_DEPENDENT_VOWEL = 8,
				82	CC_SIGN_ABOVE = 9,
				83	CC_SIGN_AFTER = 10,
				84	CC_ZERO_WIDTH_J_MARK = 11, /* Zero width joiner character */
				85	CC_COUNT = 12 /* This is the number of character classes */
				86	};
				87
				88
				89	enum KhmerCharClassFlags {
				90	CF_CLASS_MASK = 0x0000FFFF,
				91
				92	CF_CONSONANT = 0x01000000, /* flag to speed up comparing */
				93	CF_SPLIT_VOWEL = 0x02000000, /* flag for a split vowel -> the first part is added in front of the syllable */
				94	CF_DOTTED_CIRCLE = 0x04000000, /* add a dotted circle if a character with this flag is the first in a syllable */
				95	CF_COENG = 0x08000000, /* flag to speed up comparing */
				96	CF_SHIFTER = 0x10000000, /* flag to speed up comparing */
				97	CF_ABOVE_VOWEL = 0x20000000, /* flag to speed up comparing */
				98
				99	/* position flags */
				100	CF_POS_BEFORE = 0x00080000,
				101	CF_POS_BELOW = 0x00040000,
				102	CF_POS_ABOVE = 0x00020000,
				103	CF_POS_AFTER = 0x00010000,
				104	CF_POS_MASK = 0x000f0000
				105	};
				106
				107
				108	/* Characters that get referred to by name */
				109	enum KhmerChar {
				110	C_SIGN_ZWNJ = 0x200C,
				111	C_SIGN_ZWJ = 0x200D,
				112	C_RO = 0x179A,
				113	C_VOWEL_AA = 0x17B6,
				114	C_SIGN_NIKAHIT = 0x17C6,
				115	C_VOWEL_E = 0x17C1,
				116	C_COENG = 0x17D2
				117	};
				118
				119
				120	/*
				121	// simple classes, they are used in the statetable (in this file) to control the length of a syllable
				122	// they are also used to know where a character should be placed (location in reference to the base character)
				123	// and also to know if a character, when independently displayed, should be displayed with a dotted-circle to
				124	// indicate error in syllable construction
				125	*/
				126	enum {
				127	_xx = CC_RESERVED,
				128	_sa = CC_SIGN_ABOVE \| CF_DOTTED_CIRCLE \| CF_POS_ABOVE,
				129	_sp = CC_SIGN_AFTER \| CF_DOTTED_CIRCLE\| CF_POS_AFTER,
				130	_c1 = CC_CONSONANT \| CF_CONSONANT,
				131	_c2 = CC_CONSONANT2 \| CF_CONSONANT,
				132	_c3 = CC_CONSONANT3 \| CF_CONSONANT,
				133	_rb = CC_ROBAT \| CF_POS_ABOVE \| CF_DOTTED_CIRCLE,
				134	_cs = CC_CONSONANT_SHIFTER \| CF_DOTTED_CIRCLE \| CF_SHIFTER,
				135	_dl = CC_DEPENDENT_VOWEL \| CF_POS_BEFORE \| CF_DOTTED_CIRCLE,
				136	_db = CC_DEPENDENT_VOWEL \| CF_POS_BELOW \| CF_DOTTED_CIRCLE,
				137	_da = CC_DEPENDENT_VOWEL \| CF_POS_ABOVE \| CF_DOTTED_CIRCLE \| CF_ABOVE_VOWEL,
				138	_dr = CC_DEPENDENT_VOWEL \| CF_POS_AFTER \| CF_DOTTED_CIRCLE,
				139	_co = CC_COENG \| CF_COENG \| CF_DOTTED_CIRCLE,
				140
				141	/* split vowel */
				142	_va = _da \| CF_SPLIT_VOWEL,
				143	_vr = _dr \| CF_SPLIT_VOWEL
				144	};
				145
				146
				147	/*
				148	// Character class: a character class value
				149	// ORed with character class flags.
				150	*/
				151	typedef unsigned long KhmerCharClass;
				152
				153
				154	/*
				155	// Character class tables
				156	// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs...
				157	// _sa Sign placed above the base
				158	// _sp Sign placed after the base
				159	// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants)
				160	// _c2 Consonant of type 2 (only RO)
				161	// _c3 Consonant of type 3
				162	// _rb Khmer sign robat u17CC. combining mark for subscript consonants
				163	// _cd Consonant-shifter
				164	// _dl Dependent vowel placed before the base (left of the base)
				165	// _db Dependent vowel placed below the base
				166	// _da Dependent vowel placed above the base
				167	// _dr Dependent vowel placed behind the base (right of the base)
				168	// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following
				169	// it to create a subscript consonant or independent vowel
				170	// _va Khmer split vowel in which the first part is before the base and the second one above the base
				171	// _vr Khmer split vowel in which the first part is before the base and the second one behind (right of) the base
				172	*/
				173	static const KhmerCharClass khmerCharClasses[] = {
				174	_c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, /* 1780 - 178F */
				175	_c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, /* 1790 - 179F */
				176	_c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, /* 17A0 - 17AF */
				177	_c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, /* 17B0 - 17BF */
				178	_vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, /* 17C0 - 17CF */
				179	_sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx /* 17D0 - 17DF */
				180	};
				181
				182	/* this enum must reflect the range of khmerCharClasses */
				183	enum KhmerCharClassesRange {
				184	KhmerFirstChar = 0x1780,
				185	KhmerLastChar = 0x17df
				186	};
				187
				188	/*
				189	// Below we define how a character in the input string is either in the khmerCharClasses table
				190	// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear
				191	// within the syllable, but are not in the table) we also get their type back, or an unknown object
				192	// in which case we get _xx (CC_RESERVED) back
				193	*/
				194	static KhmerCharClass getKhmerCharClass(HB_UChar16 uc)
				195	{
				196	if (uc == C_SIGN_ZWJ) {
				197	return CC_ZERO_WIDTH_J_MARK;
				198	}
				199
				200	if (uc == C_SIGN_ZWNJ) {
				201	return CC_ZERO_WIDTH_NJ_MARK;
				202	}
				203
				204	if (uc < KhmerFirstChar \|\| uc > KhmerLastChar) {
				205	return CC_RESERVED;
				206	}
				207
				208	return khmerCharClasses[uc - KhmerFirstChar];
				209	}
				210
				211
				212	/*
				213	// The stateTable is used to calculate the end (the length) of a well
				214	// formed Khmer Syllable.
				215	//
				216	// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable
				217	// CharClassValues. This coincidence of values allows the follow up of the table.
				218	//
				219	// Each line corresponds to a state, which does not necessarily need to be a type
				220	// of component... for example, state 2 is a base, with is always a first character
				221	// in the syllable, but the state could be produced a consonant of any type when
				222	// it is the first character that is analysed (in ground state).
				223	//
				224	// Differentiating 3 types of consonants is necessary in order to
				225	// forbid the use of certain combinations, such as having a second
				226	// coeng after a coeng RO,
				227	// The inexistent possibility of having a type 3 after another type 3 is permitted,
				228	// eliminating it would very much complicate the table, and it does not create typing
				229	// problems, as the case above.
				230	//
				231	// The table is quite complex, in order to limit the number of coeng consonants
				232	// to 2 (by means of the table).
				233	//
				234	// There a peculiarity, as far as Unicode is concerned:
				235	// - The consonant-shifter is considered in two possible different
				236	// locations, the one considered in Unicode 3.0 and the one considered in
				237	// Unicode 4.0. (there is a backwards compatibility problem in this standard).
				238	//
				239	//
				240	// xx independent character, such as a number, punctuation sign or non-khmer char
				241	//
				242	// c1 Khmer consonant of type 1 or an independent vowel
				243	// that is, a letter in which the subscript for is only under the
				244	// base, not taking any space to the right or to the left
				245	//
				246	// c2 Khmer consonant of type 2, the coeng form takes space under
				247	// and to the left of the base (only RO is of this type)
				248	//
				249	// c3 Khmer consonant of type 3. Its subscript form takes space under
				250	// and to the right of the base.
				251	//
				252	// cs Khmer consonant shifter
				253	//
				254	// rb Khmer robat
				255	//
				256	// co coeng character (u17D2)
				257	//
				258	// dv dependent vowel (including split vowels, they are treated in the same way).
				259	// even if dv is not defined above, the component that is really tested for is
				260	// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels
				261	//
				262	// zwj Zero Width joiner
				263	//
				264	// zwnj Zero width non joiner
				265	//
				266	// sa above sign
				267	//
				268	// sp post sign
				269	//
				270	// there are lines with equal content but for an easier understanding
				271	// (and maybe change in the future) we did not join them
				272	*/
				273	static const signed char khmerStateTable[][CC_COUNT] =
				274	{
				275	/* xx c1 c2 c3 zwnj cs rb co dv sa sp zwj */
				276	{ 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, /* 0 - ground state */
				277	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 1 - exit state (or sign to the right of the syllable) */
				278	{-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, /* 2 - Base consonant */
				279	{-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, /* 3 - First ZWNJ before a register shifter It can only be followed by a shifter or a vowel */
				280	{-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, /* 4 - First register shifter */
				281	{-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, /* 5 - Robat */
				282	{-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, /* 6 - First Coeng */
				283	{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 7 - First consonant of type 1 after coeng */
				284	{-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, /* 8 - First consonant of type 2 after coeng */
				285	{-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, /* 9 - First consonant or type 3 after ceong */
				286	{-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, /* 10 - Second Coeng (no register shifter before) */
				287	{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 11 - Second coeng consonant (or ind. vowel) no register shifter before */
				288	{-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, /* 12 - Second ZWNJ before a register shifter */
				289	{-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, /* 13 - Second register shifter */
				290	{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 14 - ZWJ before vowel */
				291	{-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, /* 15 - ZWNJ before vowel */
				292	{-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, /* 16 - dependent vowel */
				293	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, /* 17 - sign above */
				294	{-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, /* 18 - ZWJ after vowel */
				295	{-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, /* 19 - Third coeng */
				296	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, /* 20 - dependent vowel after a Robat */
				297	};
				298
				299
				300	/* #define KHMER_DEBUG */
				301	#ifdef KHMER_DEBUG
				302	#define KHDEBUG qDebug
				303	#else
				304	#define KHDEBUG if(0) printf
				305	#endif
				306
				307	/*
				308	// Given an input string of characters and a location in which to start looking
				309	// calculate, using the state table, which one is the last character of the syllable
				310	// that starts in the starting position.
				311	*/
				312	static int khmer_nextSyllableBoundary(const HB_UChar16 s, int start, int end, HB_Bool invalid)
				313	{
				314	const HB_UChar16 *uc = s + start;
				315	int state = 0;
				316	int pos = start;
				317	*invalid = FALSE;
				318
				319	while (pos < end) {
				320	KhmerCharClass charClass = getKhmerCharClass(*uc);
				321	if (pos == start) {
				322	*invalid = (charClass > 0) && ! (charClass & CF_CONSONANT);
				323	}
				324	state = khmerStateTable[state][charClass & CF_CLASS_MASK];
				325
				326	KHDEBUG("state[%d]=%d class=%8lx (uc=%4x)", pos - start, state,
				327	charClass, *uc );
				328
				329	if (state < 0) {
				330	break;
				331	}
				332	++uc;
				333	++pos;
				334	}
				335	return pos;
				336	}
				337
				338	#ifndef NO_OPENTYPE
				339	static const HB_OpenTypeFeature khmer_features[] = {
				340	{ HB_MAKE_TAG( 'p', 'r', 'e', 'f' ), PreFormProperty },
				341	{ HB_MAKE_TAG( 'b', 'l', 'w', 'f' ), BelowFormProperty },
				342	{ HB_MAKE_TAG( 'a', 'b', 'v', 'f' ), AboveFormProperty },
				343	{ HB_MAKE_TAG( 'p', 's', 't', 'f' ), PostFormProperty },
				344	{ HB_MAKE_TAG( 'p', 'r', 'e', 's' ), PreSubstProperty },
				345	{ HB_MAKE_TAG( 'b', 'l', 'w', 's' ), BelowSubstProperty },
				346	{ HB_MAKE_TAG( 'a', 'b', 'v', 's' ), AboveSubstProperty },
				347	{ HB_MAKE_TAG( 'p', 's', 't', 's' ), PostSubstProperty },
				348	{ HB_MAKE_TAG( 'c', 'l', 'i', 'g' ), CligProperty },
				349	{ 0, 0 }
				350	};
				351	#endif
				352
				353
				354	static HB_Bool khmer_shape_syllable(HB_Bool openType, HB_ShaperItem *item)
				355	{
				356	/* KHDEBUG("syllable from %d len %d, str='%s'", item->from, item->length,
				357	item->string->mid(item->from, item->length).toUtf8().data()); */
				358
				359	int len = 0;
				360	int syllableEnd = item->item.pos + item->item.length;
				361	unsigned short reordered[16];
				362	unsigned char properties[16];
				363	enum {
				364	AboveForm = 0x01,
				365	PreForm = 0x02,
				366	PostForm = 0x04,
				367	BelowForm = 0x08
				368	};
				369	#ifndef NO_OPENTYPE
				370	const int availableGlyphs = item->num_glyphs;
				371	#endif
				372	int coengRo;
				373	int i;
				374
				375	/* according to the specs this is the max length one can get
				376	### the real value should be smaller */
				377	assert(item->item.length < 13);
				378
				379	memset(properties, 0, 16*sizeof(unsigned char));
				380
				381	#ifdef KHMER_DEBUG
				382	qDebug("original:");
				383	for (int i = from; i < syllableEnd; i++) {
				384	qDebug(" %d: %4x", i, string[i]);
				385	}
				386	#endif
				387
				388	/*
				389	// write a pre vowel or the pre part of a split vowel first
				390	// and look out for coeng + ro. RO is the only vowel of type 2, and
				391	// therefore the only one that requires saving space before the base.
				392	*/
				393	coengRo = -1; /* There is no Coeng Ro, if found this value will change */
				394	for (i = item->item.pos; i < syllableEnd; i += 1) {
				395	KhmerCharClass charClass = getKhmerCharClass(item->string[i]);
				396
				397	/* if a split vowel, write the pre part. In Khmer the pre part
				398	is the same for all split vowels, same glyph as pre vowel C_VOWEL_E */
				399	if (charClass & CF_SPLIT_VOWEL) {
				400	reordered[len] = C_VOWEL_E;
				401	properties[len] = PreForm;
				402	++len;
				403	break; /* there can be only one vowel */
				404	}
				405	/* if a vowel with pos before write it out */
				406	if (charClass & CF_POS_BEFORE) {
				407	reordered[len] = item->string[i];
				408	properties[len] = PreForm;
				409	++len;
				410	break; /* there can be only one vowel */
				411	}
				412	/* look for coeng + ro and remember position
				413	works because coeng + ro is always in front of a vowel (if there is a vowel)
				414	and because CC_CONSONANT2 is enough to identify it, as it is the only consonant
				415	with this flag */
				416	if ( (charClass & CF_COENG) && (i + 1 < syllableEnd) &&
				417	( (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT2) ) {
				418	coengRo = i;
				419	}
				420	}
				421
				422	/* write coeng + ro if found */
				423	if (coengRo > -1) {
				424	reordered[len] = C_COENG;
				425	properties[len] = PreForm;
				426	++len;
				427	reordered[len] = C_RO;
				428	properties[len] = PreForm;
				429	++len;
				430	}
				431
				432	/*
				433	shall we add a dotted circle?
				434	If in the position in which the base should be (first char in the string) there is
				435	a character that has the Dotted circle flag (a character that cannot be a base)
				436	then write a dotted circle */
				437	if (getKhmerCharClass(item->string[item->item.pos]) & CF_DOTTED_CIRCLE) {
				438	reordered[len] = C_DOTTED_CIRCLE;
				439	++len;
				440	}
				441
				442	/* copy what is left to the output, skipping before vowels and
				443	coeng Ro if they are present */
				444	for (i = item->item.pos; i < syllableEnd; i += 1) {
				445	HB_UChar16 uc = item->string[i];
				446	KhmerCharClass charClass = getKhmerCharClass(uc);
				447
				448	/* skip a before vowel, it was already processed */
				449	if (charClass & CF_POS_BEFORE) {
				450	continue;
				451	}
				452
				453	/* skip coeng + ro, it was already processed */
				454	if (i == coengRo) {
				455	i += 1;
				456	continue;
				457	}
				458
				459	switch (charClass & CF_POS_MASK)
				460	{
				461	case CF_POS_ABOVE :
				462	reordered[len] = uc;
				463	properties[len] = AboveForm;
				464	++len;
				465	break;
				466
				467	case CF_POS_AFTER :
				468	reordered[len] = uc;
				469	properties[len] = PostForm;
				470	++len;
				471	break;
				472
				473	case CF_POS_BELOW :
				474	reordered[len] = uc;
				475	properties[len] = BelowForm;
				476	++len;
				477	break;
				478
				479	default:
				480	/* assign the correct flags to a coeng consonant
				481	Consonants of type 3 are taged as Post forms and those type 1 as below forms */
				482	if ( (charClass & CF_COENG) && i + 1 < syllableEnd ) {
				483	unsigned char property = (getKhmerCharClass(item->string[i+1]) & CF_CLASS_MASK) == CC_CONSONANT3 ?
				484	PostForm : BelowForm;
				485	reordered[len] = uc;
				486	properties[len] = property;
				487	++len;
				488	i += 1;
				489	reordered[len] = item->string[i];
				490	properties[len] = property;
				491	++len;
				492	break;
				493	}
				494
				495	/* if a shifter is followed by an above vowel change the shifter to below form,
				496	an above vowel can have two possible positions i + 1 or i + 3
				497	(position i+1 corresponds to unicode 3, position i+3 to Unicode 4)
				498	and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two
				499	different positions, right after the shifter or after a vowel (Unicode 4) */
				500	if ( (charClass & CF_SHIFTER) && (i + 1 < syllableEnd) ) {
				501	if (getKhmerCharClass(item->string[i+1]) & CF_ABOVE_VOWEL ) {
				502	reordered[len] = uc;
				503	properties[len] = BelowForm;
				504	++len;
				505	break;
				506	}
				507	if (i + 2 < syllableEnd &&
				508	(item->string[i+1] == C_VOWEL_AA) &&
				509	(item->string[i+2] == C_SIGN_NIKAHIT) )
				510	{
				511	reordered[len] = uc;
				512	properties[len] = BelowForm;
				513	++len;
				514	break;
				515	}
				516	if (i + 3 < syllableEnd && (getKhmerCharClass(item->string[i+3]) & CF_ABOVE_VOWEL) ) {
				517	reordered[len] = uc;
				518	properties[len] = BelowForm;
				519	++len;
				520	break;
				521	}
				522	if (i + 4 < syllableEnd &&
				523	(item->string[i+3] == C_VOWEL_AA) &&
				524	(item->string[i+4] == C_SIGN_NIKAHIT) )
				525	{
				526	reordered[len] = uc;
				527	properties[len] = BelowForm;
				528	++len;
				529	break;
				530	}
				531	}
				532
				533	/* default - any other characters */
				534	reordered[len] = uc;
				535	++len;
				536	break;
				537	} /* switch */
				538	} /* for */
				539
				540	if (!item->font->klass->convertStringToGlyphIndices(item->font,
				541	reordered, len,
				542	item->glyphs, &item->num_glyphs,
				543	item->item.bidiLevel % 2))
				544	return FALSE;
				545
				546
				547	KHDEBUG("after shaping: len=%d", len);
				548	for (i = 0; i < len; i++) {
				549	item->attributes[i].mark = FALSE;
				550	item->attributes[i].clusterStart = FALSE;
				551	item->attributes[i].justification = 0;
				552	item->attributes[i].zeroWidth = FALSE;
				553	KHDEBUG(" %d: %4x property=%x", i, reordered[i], properties[i]);
				554	}
				555
				556	/* now we have the syllable in the right order, and can start running it through open type. */
				557
				558	#ifndef NO_OPENTYPE
				559	if (openType) {
				560	hb_uint32 where[16];
				561	for (i = 0; i < len; ++i) {
				562	where[i] = ~(PreSubstProperty
				563	\| BelowSubstProperty
				564	\| AboveSubstProperty
				565	\| PostSubstProperty
				566	\| CligProperty
				567	\| PositioningProperties);
				568	if (properties[i] == PreForm)
				569	where[i] &= ~PreFormProperty;
				570	else if (properties[i] == BelowForm)
				571	where[i] &= ~BelowFormProperty;
				572	else if (properties[i] == AboveForm)
				573	where[i] &= ~AboveFormProperty;
				574	else if (properties[i] == PostForm)
				575	where[i] &= ~PostFormProperty;
				576	}
				577
				578	HB_OpenTypeShape(item, where);
				579	if (!HB_OpenTypePosition(item, availableGlyphs, /doLogClusters/FALSE))
				580	return FALSE;
				581	} else
				582	#endif
				583	{
				584	KHDEBUG("Not using openType");
				585	HB_HeuristicPosition(item);
				586	}
				587
				588	item->attributes[0].clusterStart = TRUE;
				589	return TRUE;
				590	}
				591
				592	HB_Bool HB_KhmerShape(HB_ShaperItem *item)
				593	{
				594	HB_Bool openType = FALSE;
				595	unsigned short *logClusters = item->log_clusters;
				596	int i;
				597
				598	HB_ShaperItem syllable = *item;
				599	int first_glyph = 0;
				600
				601	int sstart = item->item.pos;
				602	int end = sstart + item->item.length;
				603
				604	assert(item->item.script == HB_Script_Khmer);
				605
				606	#ifndef NO_OPENTYPE
				607	openType = HB_SelectScript(item, khmer_features);
				608	#endif
				609
				610	KHDEBUG("khmer_shape: from %d length %d", item->item.pos, item->item.length);
				611	while (sstart < end) {
				612	HB_Bool invalid;
				613	int send = khmer_nextSyllableBoundary(item->string, sstart, end, &invalid);
				614	KHDEBUG("syllable from %d, length %d, invalid=%s", sstart, send-sstart,
				615	invalid ? "TRUE" : "FALSE");
				616	syllable.item.pos = sstart;
				617	syllable.item.length = send-sstart;
				618	syllable.glyphs = item->glyphs + first_glyph;
				619	syllable.attributes = item->attributes + first_glyph;
				620	syllable.offsets = item->offsets + first_glyph;
				621	syllable.advances = item->advances + first_glyph;
				622	syllable.num_glyphs = item->num_glyphs - first_glyph;
				623	if (!khmer_shape_syllable(openType, &syllable)) {
				624	KHDEBUG("syllable shaping failed, syllable requests %d glyphs", syllable.num_glyphs);
				625	item->num_glyphs += syllable.num_glyphs;
				626	return FALSE;
				627	}
				628	/* fix logcluster array */
				629	KHDEBUG("syllable:");
				630	for (i = first_glyph; i < first_glyph + (int)syllable.num_glyphs; ++i)
				631	KHDEBUG(" %d -> glyph %x", i, item->glyphs[i]);
				632	KHDEBUG(" logclusters:");
				633	for (i = sstart; i < send; ++i) {
				634	KHDEBUG(" %d -> glyph %d", i, first_glyph);
				635	logClusters[i-item->item.pos] = first_glyph;
				636	}
				637	sstart = send;
				638	first_glyph += syllable.num_glyphs;
				639	}
				640	item->num_glyphs = first_glyph;
				641	return TRUE;
				642	}
				643
				644	void HB_KhmerAttributes(HB_Script script, const HB_UChar16 text, hb_uint32 from, hb_uint32 len, HB_CharAttributes attributes)
				645	{
				646	int end = from + len;
				647	const HB_UChar16 *uc = text + from;
				648	hb_uint32 i = 0;
				649	HB_UNUSED(script);
				650	attributes += from;
				651	while ( i < len ) {
				652	HB_Bool invalid;
				653	hb_uint32 boundary = khmer_nextSyllableBoundary( text, from+i, end, &invalid ) - from;
				654
				655	attributes[i].charStop = TRUE;
				656
				657	if ( boundary > len-1 ) boundary = len;
				658	i++;
				659	while ( i < boundary ) {
				660	attributes[i].charStop = FALSE;
				661	++uc;
				662	++i;
				663	}
				664	assert( i == boundary );
				665	}
				666	}
				667