android/vendor/unicode-linebreak-0.1.2/build.rs - toolchain/cargo-vet - Gitiles

 //! Parses the rules into a state machine using a pair table. Each value in the table specifies the
 //! next state and whether it's an forced/allowed break. To handles rules such as
 //!
 //! B SP* ÷ A
 //!
 //! the extra state BSP is employed in the pair table friendly equivalent rules
 //!
 //! (B | BSP) ÷ A, Treat (B | BSP) SP as if it were BSP, Treat BSP as if it were SP
 #![recursion_limit = "512"]

 use regex::Regex;
 use std::env;
 use std::error::Error;
 use std::fs::File;
 use std::io::{BufRead, BufReader, BufWriter, Write};
 use std::iter;
 use std::path::Path;
 use std::str::FromStr;

 include!("src/shared.rs");

 impl FromStr for BreakClass {
     type Err = &'static str;

     fn from_str(s: &str) -> Result<Self, Self::Err> {
         Ok(match s {
             "BK" => BK,
             "CR" => CR,
             "LF" => LF,
             "CM" => CM,
             "NL" => NL,
             "SG" => SG,
             "WJ" => WJ,
             "ZW" => ZW,
             "GL" => GL,
             "SP" => SP,
             "ZWJ" => ZWJ,
             "B2" => B2,
             "BA" => BA,
             "BB" => BB,
             "HY" => HY,
             "CB" => CB,
             "CL" => CL,
             "CP" => CP,
             "EX" => EX,
             "IN" => IN,
             "NS" => NS,
             "OP" => OP,
             "QU" => QU,
             "IS" => IS,
             "NU" => NU,
             "PO" => PO,
             "PR" => PR,
             "SY" => SY,
             "AI" => AI,
             "AL" => AL,
             "CJ" => CJ,
             "EB" => EB,
             "EM" => EM,
             "H2" => H2,
             "H3" => H3,
             "HL" => HL,
             "ID" => ID,
             "JL" => JL,
             "JV" => JV,
             "JT" => JT,
             "RI" => RI,
             "SA" => SA,
             "XX" => XX,
             _ => return Err("Invalid break class"),
         })
     }
 }

 const NUM_CLASSES: usize = 43;
 static BREAK_CLASS_TABLE: [&str; NUM_CLASSES] = [
     "BK", "CR", "LF", "CM", "NL", "SG", "WJ", "ZW", "GL", "SP", "ZWJ", "B2", "BA", "BB", "HY",
     "CB", "CL", "CP", "EX", "IN", "NS", "OP", "QU", "IS", "NU", "PO", "PR", "SY", "AI", "AL", "CJ",
     "EB", "EM", "H2", "H3", "HL", "ID", "JL", "JV", "JT", "RI", "SA", "XX",
 ];

 fn default_value(codepoint: u32) -> BreakClass {
     match codepoint {
         // The unassigned code points in the following blocks default to "ID"
         0x3400..=0x4DBF | 0x4E00..=0x9FFF | 0xF900..=0xFAFF => ID,
         // All undesignated code points in Planes 2 and 3, whether inside or outside of allocated blocks, default to "ID"
         0x20000..=0x2FFFD | 0x30000..=0x3FFFD => ID,
         // All unassigned code points in the following Plane 1 range, whether inside or outside of allocated blocks, also default to "ID"
         0x1F000..=0x1FAFF | 0x1FC00..=0x1FFFD => ID,
         // The unassigned code points in the following block default to "PR"
         0x20A0..=0x20CF => PR,
         // All code points, assigned and unassigned, that are not listed explicitly are given the value "XX"
         _ => XX,
     }
 }

 #[derive(Copy, Clone)]
 #[repr(u8)]
 enum ExtraState {
     ZWSP = sot + 1,
     OPSP,
     QUSP,
     CLSP,
     CPSP,
     B2SP,
     HLHYBA,
     RIRI,
 }

 use ExtraState::*;

 /// The number of classes plus the eot state.
 const NUM_CLASSES_EOT: usize = NUM_CLASSES + 1;
 const NUM_STATES: usize = NUM_CLASSES + 10;

 /// Separate implementation to prevent infinite recursion.
 #[doc(hidden)]
 macro_rules! rules2table_impl {
     // Operators
     (($len:ident $($args:tt)*) '÷' $($tt:tt)+) => {rules2table_impl! {(NUM_CLASSES_EOT $($args)* '÷') $($tt)+}};
     (($len:ident $($args:tt)*) '×' $($tt:tt)+) => {rules2table_impl! {(NUM_CLASSES_EOT $($args)* '×') $($tt)+}};
     (($len:ident $($args:tt)*) '!' $($tt:tt)+) => {rules2table_impl! {(NUM_CLASSES_EOT $($args)* '!') $($tt)+}};

     // Perform operator
     (($len:ident $pair_table:ident $($first:ident)? $operator:literal $($second:ident)?) $(, $($tt:tt)*)?) => {
         $(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

         #[allow(unused)] let first = 0..NUM_STATES; // Default to ALL
         $(let first = $first;)?
         #[allow(unused)] let second = 0..NUM_CLASSES_EOT; // Default to ALL
         $(let second = $second;)?
         for i in first {
             for j in second.clone() {
                 let cell = &mut $pair_table[i][j];
                 match $operator {
                     '!' => *cell |= ALLOWED_BREAK_BIT | MANDATORY_BREAK_BIT,
                     '÷' => *cell |= ALLOWED_BREAK_BIT,
                     '×' => *cell &= !(ALLOWED_BREAK_BIT | MANDATORY_BREAK_BIT),
                     _ => unreachable!("Bad operator"),
                 }
             }
         }
     };

     (($len:ident $($args:tt)*) Treat X $($tt:tt)*) => {
         rules2table_impl! {(NUM_CLASSES_EOT $($args)* treat_x) $($tt)*}
     };
     (($len:ident $($args:tt)*) Treat $($tt:tt)*) => {
         rules2table_impl! {(NUM_STATES $($args)* treat) $($tt)*}
     };
     (($len:ident $($args:tt)*) * as if it were X where X = $($tt:tt)*) => {
         rules2table_impl! {(NUM_STATES $($args)* as_if_it_were_x_where_x_is) $($tt)*}
     };

     (($len:ident $pair_table:ident treat_x $second:ident as_if_it_were_x_where_x_is $X:ident) $(, $($tt:tt)*)?) => {
         $(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

         for i in $X {
             for j in $second.clone() {
                 $pair_table[i][j] = i as u8;
             }
         }
     };
     (($len:ident $pair_table:ident treat $first:ident $second:ident) as if it were $cls:ident $(, $($tt:tt)*)?) => {
         $(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

         let cls = $cls as u8;
         for i in $first {
             for j in $second.clone() {
                 $pair_table[i][j] = cls;
             }
         }
     };
     (($len:ident $pair_table:ident treat $first:ident) as if it were $cls:ident $(, $($tt:tt)*)?) => {
         $(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

         for j in $first.clone().filter(|&j| j < NUM_CLASSES_EOT) {
             for row in $pair_table.iter_mut() {
                 row[j] = row[$cls as usize];
             }
         }
         for i in $first {
             $pair_table.copy_within($cls as usize..$cls as usize + 1, i);
         }
     };

     // All classes pattern
     (($len:ident $($args:tt)*) ALL $($tt:tt)*) => {
         let indices = 0..$len;
         rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
     };
     // Single class pattern
     (($len:ident $($args:tt)*) $cls:ident $($tt:tt)*) => {
         let indices = iter::once($cls as usize);
         rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
     };
     // Parse (X | ...) patterns
     (($len:ident $($args:tt)*) ($($cls:ident)|+) $($tt:tt)*) => {
         let indices = [$($cls as usize),+].iter().cloned();
         rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
     };
     // Parse [^ ...] patterns
     (($len:ident $($args:tt)*) [^$($cls:ident)+] $($tt:tt)*) => {
         let excluded = [$($cls as usize),+];
         let indices = (0..$len).filter(|i| !excluded.contains(i)).collect::<Vec<_>>();
         let indices = indices.iter().cloned();
         rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
     };

     (($len:ident $pair_table:ident)) => {}; // Exit condition
 }

 /// Returns a pair table conforming to the specified rules.
 ///
 /// The rule syntax is a modified subset of the one in Unicode Standard Annex #14.
 macro_rules! rules2table {
     ($($tt:tt)+) => {{
         let mut pair_table = [[
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
         ]; NUM_STATES];
         rules2table_impl! {(NUM_STATES pair_table) $($tt)+}
         pair_table
     }};
 }

 trait IteratorExt: Iterator {
     /// Tests if all elements of the iterator are equal.
     fn all_equal(&mut self) -> bool
     where
         <Self as Iterator>::Item: PartialEq,
         Self: Sized,
     {
         if let Some(first) = self.next() {
             self.all(|x| x == first)
         } else {
             true
         }
     }
 }

 impl<I: Iterator> IteratorExt for I {}

 fn main() -> Result<(), Box<dyn Error>> {
     println!("cargo:rerun-if-changed=LineBreak.txt");
     assert!(NUM_STATES <= 0x3F, "Too many states");

     let pair_table = rules2table! {
         // Non-tailorable Line Breaking Rules
         // LB1 Assign a line breaking class to each code point of the input. Resolve AI, CB, CJ,
         // SA, SG, and XX into other line breaking classes depending on criteria outside the scope
         // of this algorithm.
         Treat (AI | SG | XX | SA) as if it were AL, Treat CJ as if it were NS,
         // Start and end of text:
         sot '×', // LB2 Never break at the start of text.
         '!' eot, // LB3 Always break at the end of text.
         // Mandatory breaks:
         BK '!', // LB4 Always break after hard line breaks.
         // LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
         CR '×' LF, CR '!', LF '!', NL '!',
         '×' (BK | CR | LF | NL), // LB6 Do not break before hard line breaks.
         // Explicit breaks and non-breaks:
         '×' SP, '×' ZW, // LB7 Do not break before spaces or zero width space.
         // LB8 Break before any character following a zero-width space, even if one or more spaces
         // intervene.
         (ZW | ZWSP) '÷', Treat (ZW | ZWSP) SP as if it were ZWSP, Treat ZWSP as if it were SP,
         // ZWJ '×', // XXX Handled explicitly // LB8a Do not break after a zero width joiner.
         // Combining marks:
         // LB9 Do not break a combining character sequence; treat it as if it has the line breaking
         // class of the base character in all of the following rules. Treat ZWJ as if it were CM.
         Treat X (CM | ZWJ)* as if it were X where X = [^BK CR LF NL SP ZW sot eot ZWSP OPSP QUSP CLSP CPSP B2SP],
         Treat (CM | ZWJ) as if it were AL, // LB10 Treat any remaining combining mark or ZWJ as AL.
         // Word joiner:
         '×' WJ, WJ '×', // LB11 Do not break before or after Word joiner and related characters.
         // Non-breaking characters:
         GL '×', // LB12 Do not break after NBSP and related characters.

         // Tailorable Line Breaking Rules
         [^SP BA HY sot eot ZWSP OPSP QUSP CLSP CPSP B2SP] '×' GL, // LB12a Do not break before NBSP and related characters, except after spaces and hyphens.
         // LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
         '×' CL, '×' CP, '×' EX, '×' IS, '×' SY,
         // LB14 Do not break after ‘[’, even after spaces.
         (OP | OPSP) '×', Treat (OP | OPSP) SP as if it were OPSP, Treat ZWSP as if it were SP,
         // LB15 Do not break within ‘”[’, even with intervening spaces.
         (QU | QUSP) '×' OP, Treat (QU | QUSP) SP as if it were QUSP, Treat QUSP as if it were SP,
         // LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with
         // intervening spaces.
         (CL | CLSP | CP | CPSP) '×' NS,
         Treat (CL | CLSP) SP as if it were CLSP, Treat CLSP as if it were SP,
         Treat (CP | CPSP) SP as if it were CPSP, Treat CPSP as if it were SP,
         // LB17 Do not break within ‘——’, even with intervening spaces.
         (B2 | B2SP) '×' B2, Treat (B2 | B2SP) SP as if it were B2SP, Treat B2SP as if it were SP,
         // Spaces:
         SP '÷', // LB18 Break after spaces.
         // Special case rules:
         '×' QU, QU '×', // LB19 Do not break before or after quotation marks, such as ‘”’.
         '÷' CB, CB '÷', // LB20 Break before and after unresolved CB.
         // LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana,
         // and other non-starters, or after acute accents.
         '×' BA, '×' HY, '×' NS, BB '×',
         // LB21a Don't break after Hebrew + Hyphen. // XXX Use a single state, HLHYBA, for HLHY and HLBA
         HLHYBA '×', Treat HL (HY | BA) as if it were HLHYBA, Treat HLHYBA as if it were HY,
         SY '×' HL, // LB21b Don’t break between Solidus and Hebrew letters.
         '×' IN, // LB22 Do not break before ellipses.
         // Numbers:
         (AL | HL) '×' NU, NU '×' (AL | HL), // LB23 Do not break between digits and letters.
         // LB23a Do not break between numeric prefixes and ideographs, or between ideographs and
         // numeric postfixes.
         PR '×' (ID | EB | EM), (ID | EB | EM) '×' PO,
         // LB24 Do not break between numeric prefix/postfix and letters, or between letters and
         // prefix/postfix.
         (PR | PO) '×' (AL | HL), (AL | HL) '×' (PR | PO),
         // LB25 Do not break between the following pairs of classes relevant to numbers:
         CL '×' PO, CP '×' PO, CL '×' PR, CP '×' PR, NU '×' PO, NU '×' PR, PO '×' OP, PO '×' NU, PR '×' OP, PR '×' NU, HY '×' NU, IS '×' NU, NU '×' NU, SY '×' NU,
         // Korean syllable blocks
         // LB26 Do not break a Korean syllable.
         JL '×' (JL | JV | H2 | H3), (JV | H2) '×' (JV | JT), (JT | H3) '×' JT,
         // LB27 Treat a Korean Syllable Block the same as ID.
         (JL | JV | JT | H2 | H3) '×' IN, (JL | JV | JT | H2 | H3) '×' PO, PR '×' (JL | JV | JT | H2 | H3),
         // Finally, join alphabetic letters into words and break everything else.
         (AL | HL) '×' (AL | HL), // LB28 Do not break between alphabetics (“at”).
         IS '×' (AL | HL), // LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
         // LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing
         // parentheses.
         (AL | HL | NU) '×' OP, CP '×' (AL | HL | NU),
         // LB30a Break between two regional indicator symbols if and only if there are an even
         // number of regional indicators preceding the position of the break.
         RI '×' RI, Treat RI RI as if it were RIRI, Treat RIRI as if it were RI,
         EB '×' EM, // LB30b Do not break between an emoji base and an emoji modifier.
         '÷' ALL, ALL '÷', // LB31 Break everywhere else.
     };

     // Synthesize all non-"safe" pairs from pair table. There are generally more safe pairs.
     let unsafe_pairs = (0..NUM_CLASSES).into_iter().flat_map(|j| {
         (0..NUM_CLASSES).into_iter().filter_map(move |i| {
             // All states that could have resulted from break class "i"
             let possible_states = pair_table
                 .iter()
                 .map(|row| (row[i] & !(ALLOWED_BREAK_BIT | MANDATORY_BREAK_BIT)) as usize);
             // Check if all state transitions due to "j" are the same
             if possible_states.map(|s| pair_table[s][j]).all_equal() {
                 None
             } else {
                 Some((i, j))
             }
         })
     });

     let out_dir = env::var("OUT_DIR")?;
     let dest_path = Path::new(&out_dir).join("tables.rs");
     let mut stream = BufWriter::new(File::create(&dest_path)?);

     stream.write_all(b"static BREAK_PROP_DATA: [[BreakClass; 256]; PAGE_COUNT] = [")?;

     let re = Regex::new(
         r"(?x)^
         (?P<start>[[:xdigit:]]{4,}) # Unicode code point
         (?:\.{2}(?P<end>[[:xdigit:]]{4,}))? # End range
         ;
         (?P<lb>\w{2,3}) # Line_Break property",
     )?;

     let mut values = BufReader::new(File::open("LineBreak.txt")?)
         .lines()
         .map(Result::unwrap)
         .filter(|l| !(l.starts_with('#') || l.is_empty()))
         .scan(0, |next, l| {
             let caps = re.captures(&l).unwrap();
             let start = u32::from_str_radix(&caps["start"], 16).unwrap();
             let end = caps
                 .name("end")
                 .map(|m| u32::from_str_radix(m.as_str(), 16).unwrap())
                 .unwrap_or(start);
             let lb = caps["lb"].parse().unwrap();

             let iter = (*next..=end).map(move |code| {
                 if code < start {
                     default_value(code)
                 } else {
                     lb
                 }
             });
             *next = end + 1;
             Some(iter)
         })
         .flatten();

     let mut page = Vec::with_capacity(256);
     let mut page_count = 0;
     let mut page_indices = Vec::new();
     loop {
         page.clear();
         page.extend(values.by_ref().take(256));

         if let Some(&first) = page.first() {
             page_indices.push(if page.iter().all_equal() {
                 first as usize | UNIFORM_PAGE
             } else {
                 writeln!(
                     stream,
                     "[{}],",
                     page.iter()
                         .copied()
                         .chain(iter::repeat(XX))
                         .take(256)
                         .map(|v| BREAK_CLASS_TABLE[v as usize])
                         .collect::<Vec<_>>()
                         .join(",")
                 )?;
                 let page_index = page_count;
                 page_count += 1;
                 page_index
             });
         } else {
             break;
         }
     }

     writeln!(
         stream,
         r"];

         const PAGE_COUNT: usize = {};
         static PAGE_INDICES: [usize; {}] = [",
         page_count,
         page_indices.len()
     )?;
     for page_idx in page_indices {
         write!(stream, "{},", page_idx)?;
     }
     write!(
         stream,
         r"];

         static PAIR_TABLE: [[u8; {}]; {}] = [",
         NUM_CLASSES_EOT, NUM_STATES
     )?;
     for row in &pair_table {
         write!(stream, "[")?;
         for x in row {
             write!(stream, "{},", x)?;
         }
         write!(stream, "],")?;
     }
     writeln!(
         stream,
         r"];

         fn is_safe_pair(a: BreakClass, b: BreakClass) -> bool {{
             !matches!((a, b), {})
         }}",
         unsafe_pairs
             .map(|(i, j)| format!("({}, {})", BREAK_CLASS_TABLE[i], BREAK_CLASS_TABLE[j]))
             .collect::<Vec<_>>()
             .join("|")
     )?;

     Ok(())
 }
	//! Parses the rules into a state machine using a pair table. Each value in the table specifies the
	//! next state and whether it's an forced/allowed break. To handles rules such as
	//!
	//! B SP* ÷ A
	//!
	//! the extra state BSP is employed in the pair table friendly equivalent rules
	//!
	//! (B \| BSP) ÷ A, Treat (B \| BSP) SP as if it were BSP, Treat BSP as if it were SP
	#![recursion_limit = "512"]

	use regex::Regex;
	use std::env;
	use std::error::Error;
	use std::fs::File;
	use std::io::{BufRead, BufReader, BufWriter, Write};
	use std::iter;
	use std::path::Path;
	use std::str::FromStr;

	include!("src/shared.rs");

	impl FromStr for BreakClass {
	type Err = &'static str;

	fn from_str(s: &str) -> Result<Self, Self::Err> {
	Ok(match s {
	"BK" => BK,
	"CR" => CR,
	"LF" => LF,
	"CM" => CM,
	"NL" => NL,
	"SG" => SG,
	"WJ" => WJ,
	"ZW" => ZW,
	"GL" => GL,
	"SP" => SP,
	"ZWJ" => ZWJ,
	"B2" => B2,
	"BA" => BA,
	"BB" => BB,
	"HY" => HY,
	"CB" => CB,
	"CL" => CL,
	"CP" => CP,
	"EX" => EX,
	"IN" => IN,
	"NS" => NS,
	"OP" => OP,
	"QU" => QU,
	"IS" => IS,
	"NU" => NU,
	"PO" => PO,
	"PR" => PR,
	"SY" => SY,
	"AI" => AI,
	"AL" => AL,
	"CJ" => CJ,
	"EB" => EB,
	"EM" => EM,
	"H2" => H2,
	"H3" => H3,
	"HL" => HL,
	"ID" => ID,
	"JL" => JL,
	"JV" => JV,
	"JT" => JT,
	"RI" => RI,
	"SA" => SA,
	"XX" => XX,
	_ => return Err("Invalid break class"),
	})
	}
	}

	const NUM_CLASSES: usize = 43;
	static BREAK_CLASS_TABLE: [&str; NUM_CLASSES] = [
	"BK", "CR", "LF", "CM", "NL", "SG", "WJ", "ZW", "GL", "SP", "ZWJ", "B2", "BA", "BB", "HY",
	"CB", "CL", "CP", "EX", "IN", "NS", "OP", "QU", "IS", "NU", "PO", "PR", "SY", "AI", "AL", "CJ",
	"EB", "EM", "H2", "H3", "HL", "ID", "JL", "JV", "JT", "RI", "SA", "XX",
	];

	fn default_value(codepoint: u32) -> BreakClass {
	match codepoint {
	// The unassigned code points in the following blocks default to "ID"
	0x3400..=0x4DBF \| 0x4E00..=0x9FFF \| 0xF900..=0xFAFF => ID,
	// All undesignated code points in Planes 2 and 3, whether inside or outside of allocated blocks, default to "ID"
	0x20000..=0x2FFFD \| 0x30000..=0x3FFFD => ID,
	// All unassigned code points in the following Plane 1 range, whether inside or outside of allocated blocks, also default to "ID"
	0x1F000..=0x1FAFF \| 0x1FC00..=0x1FFFD => ID,
	// The unassigned code points in the following block default to "PR"
	0x20A0..=0x20CF => PR,
	// All code points, assigned and unassigned, that are not listed explicitly are given the value "XX"
	_ => XX,
	}
	}

	#[derive(Copy, Clone)]
	#[repr(u8)]
	enum ExtraState {
	ZWSP = sot + 1,
	OPSP,
	QUSP,
	CLSP,
	CPSP,
	B2SP,
	HLHYBA,
	RIRI,
	}

	use ExtraState::*;

	/// The number of classes plus the eot state.
	const NUM_CLASSES_EOT: usize = NUM_CLASSES + 1;
	const NUM_STATES: usize = NUM_CLASSES + 10;

	/// Separate implementation to prevent infinite recursion.
	#[doc(hidden)]
	macro_rules! rules2table_impl {
	// Operators
	(($len:ident $($args:tt)) '÷' $($tt:tt)+) => {rules2table_impl! {(NUM_CLASSES_EOT $($args) '÷') $($tt)+}};
	(($len:ident $($args:tt)) '×' $($tt:tt)+) => {rules2table_impl! {(NUM_CLASSES_EOT $($args) '×') $($tt)+}};
	(($len:ident $($args:tt)) '!' $($tt:tt)+) => {rules2table_impl! {(NUM_CLASSES_EOT $($args) '!') $($tt)+}};

	// Perform operator
	(($len:ident $pair_table:ident $($first:ident)? $operator:literal $($second:ident)?) $(, $($tt:tt)*)?) => {
	$(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

	#[allow(unused)] let first = 0..NUM_STATES; // Default to ALL
	$(let first = $first;)?
	#[allow(unused)] let second = 0..NUM_CLASSES_EOT; // Default to ALL
	$(let second = $second;)?
	for i in first {
	for j in second.clone() {
	let cell = &mut $pair_table[i][j];
	match $operator {
	'!' => *cell \|= ALLOWED_BREAK_BIT \| MANDATORY_BREAK_BIT,
	'÷' => *cell \|= ALLOWED_BREAK_BIT,
	'×' => *cell &= !(ALLOWED_BREAK_BIT \| MANDATORY_BREAK_BIT),
	_ => unreachable!("Bad operator"),
	}
	}
	}
	};

	(($len:ident $($args:tt)) Treat X $($tt:tt)) => {
	rules2table_impl! {(NUM_CLASSES_EOT $($args)* treat_x) $($tt)*}
	};
	(($len:ident $($args:tt)) Treat $($tt:tt)) => {
	rules2table_impl! {(NUM_STATES $($args)* treat) $($tt)*}
	};
	(($len:ident $($args:tt)) as if it were X where X = $($tt:tt)*) => {
	rules2table_impl! {(NUM_STATES $($args)* as_if_it_were_x_where_x_is) $($tt)*}
	};

	(($len:ident $pair_table:ident treat_x $second:ident as_if_it_were_x_where_x_is $X:ident) $(, $($tt:tt)*)?) => {
	$(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

	for i in $X {
	for j in $second.clone() {
	$pair_table[i][j] = i as u8;
	}
	}
	};
	(($len:ident $pair_table:ident treat $first:ident $second:ident) as if it were $cls:ident $(, $($tt:tt)*)?) => {
	$(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

	let cls = $cls as u8;
	for i in $first {
	for j in $second.clone() {
	$pair_table[i][j] = cls;
	}
	}
	};
	(($len:ident $pair_table:ident treat $first:ident) as if it were $cls:ident $(, $($tt:tt)*)?) => {
	$(rules2table_impl! {(NUM_STATES $pair_table) $($tt)*})?

	for j in $first.clone().filter(\|&j\| j < NUM_CLASSES_EOT) {
	for row in $pair_table.iter_mut() {
	row[j] = row[$cls as usize];
	}
	}
	for i in $first {
	$pair_table.copy_within($cls as usize..$cls as usize + 1, i);
	}
	};

	// All classes pattern
	(($len:ident $($args:tt)) ALL $($tt:tt)) => {
	let indices = 0..$len;
	rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
	};
	// Single class pattern
	(($len:ident $($args:tt)) $cls:ident $($tt:tt)) => {
	let indices = iter::once($cls as usize);
	rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
	};
	// Parse (X \| ...) patterns
	(($len:ident $($args:tt)) ($($cls:ident)\|+) $($tt:tt)) => {
	let indices = [$($cls as usize),+].iter().cloned();
	rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
	};
	// Parse [^ ...] patterns
	(($len:ident $($args:tt)) [^$($cls:ident)+] $($tt:tt)) => {
	let excluded = [$($cls as usize),+];
	let indices = (0..$len).filter(\|i\| !excluded.contains(i)).collect::<Vec<_>>();
	let indices = indices.iter().cloned();
	rules2table_impl! {(NUM_CLASSES_EOT $($args)* indices) $($tt)*}
	};

	(($len:ident $pair_table:ident)) => {}; // Exit condition
	}

	/// Returns a pair table conforming to the specified rules.
	///
	/// The rule syntax is a modified subset of the one in Unicode Standard Annex #14.
	macro_rules! rules2table {
	($($tt:tt)+) => {{
	let mut pair_table = [[
	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
	24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
	]; NUM_STATES];
	rules2table_impl! {(NUM_STATES pair_table) $($tt)+}
	pair_table
	}};
	}

	trait IteratorExt: Iterator {
	/// Tests if all elements of the iterator are equal.
	fn all_equal(&mut self) -> bool
	where
	<Self as Iterator>::Item: PartialEq,
	Self: Sized,
	{
	if let Some(first) = self.next() {
	self.all(\|x\| x == first)
	} else {
	true
	}
	}
	}

	impl<I: Iterator> IteratorExt for I {}

	fn main() -> Result<(), Box<dyn Error>> {
	println!("cargo:rerun-if-changed=LineBreak.txt");
	assert!(NUM_STATES <= 0x3F, "Too many states");

	let pair_table = rules2table! {
	// Non-tailorable Line Breaking Rules
	// LB1 Assign a line breaking class to each code point of the input. Resolve AI, CB, CJ,
	// SA, SG, and XX into other line breaking classes depending on criteria outside the scope
	// of this algorithm.
	Treat (AI \| SG \| XX \| SA) as if it were AL, Treat CJ as if it were NS,
	// Start and end of text:
	sot '×', // LB2 Never break at the start of text.
	'!' eot, // LB3 Always break at the end of text.
	// Mandatory breaks:
	BK '!', // LB4 Always break after hard line breaks.
	// LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
	CR '×' LF, CR '!', LF '!', NL '!',
	'×' (BK \| CR \| LF \| NL), // LB6 Do not break before hard line breaks.
	// Explicit breaks and non-breaks:
	'×' SP, '×' ZW, // LB7 Do not break before spaces or zero width space.
	// LB8 Break before any character following a zero-width space, even if one or more spaces
	// intervene.
	(ZW \| ZWSP) '÷', Treat (ZW \| ZWSP) SP as if it were ZWSP, Treat ZWSP as if it were SP,
	// ZWJ '×', // XXX Handled explicitly // LB8a Do not break after a zero width joiner.
	// Combining marks:
	// LB9 Do not break a combining character sequence; treat it as if it has the line breaking
	// class of the base character in all of the following rules. Treat ZWJ as if it were CM.
	Treat X (CM \| ZWJ)* as if it were X where X = [^BK CR LF NL SP ZW sot eot ZWSP OPSP QUSP CLSP CPSP B2SP],
	Treat (CM \| ZWJ) as if it were AL, // LB10 Treat any remaining combining mark or ZWJ as AL.
	// Word joiner:
	'×' WJ, WJ '×', // LB11 Do not break before or after Word joiner and related characters.
	// Non-breaking characters:
	GL '×', // LB12 Do not break after NBSP and related characters.

	// Tailorable Line Breaking Rules
	[^SP BA HY sot eot ZWSP OPSP QUSP CLSP CPSP B2SP] '×' GL, // LB12a Do not break before NBSP and related characters, except after spaces and hyphens.
	// LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
	'×' CL, '×' CP, '×' EX, '×' IS, '×' SY,
	// LB14 Do not break after ‘[’, even after spaces.
	(OP \| OPSP) '×', Treat (OP \| OPSP) SP as if it were OPSP, Treat ZWSP as if it were SP,
	// LB15 Do not break within ‘”[’, even with intervening spaces.
	(QU \| QUSP) '×' OP, Treat (QU \| QUSP) SP as if it were QUSP, Treat QUSP as if it were SP,
	// LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with
	// intervening spaces.
	(CL \| CLSP \| CP \| CPSP) '×' NS,
	Treat (CL \| CLSP) SP as if it were CLSP, Treat CLSP as if it were SP,
	Treat (CP \| CPSP) SP as if it were CPSP, Treat CPSP as if it were SP,
	// LB17 Do not break within ‘——’, even with intervening spaces.
	(B2 \| B2SP) '×' B2, Treat (B2 \| B2SP) SP as if it were B2SP, Treat B2SP as if it were SP,
	// Spaces:
	SP '÷', // LB18 Break after spaces.
	// Special case rules:
	'×' QU, QU '×', // LB19 Do not break before or after quotation marks, such as ‘”’.
	'÷' CB, CB '÷', // LB20 Break before and after unresolved CB.
	// LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana,
	// and other non-starters, or after acute accents.
	'×' BA, '×' HY, '×' NS, BB '×',
	// LB21a Don't break after Hebrew + Hyphen. // XXX Use a single state, HLHYBA, for HLHY and HLBA
	HLHYBA '×', Treat HL (HY \| BA) as if it were HLHYBA, Treat HLHYBA as if it were HY,
	SY '×' HL, // LB21b Don’t break between Solidus and Hebrew letters.
	'×' IN, // LB22 Do not break before ellipses.
	// Numbers:
	(AL \| HL) '×' NU, NU '×' (AL \| HL), // LB23 Do not break between digits and letters.
	// LB23a Do not break between numeric prefixes and ideographs, or between ideographs and
	// numeric postfixes.
	PR '×' (ID \| EB \| EM), (ID \| EB \| EM) '×' PO,
	// LB24 Do not break between numeric prefix/postfix and letters, or between letters and
	// prefix/postfix.
	(PR \| PO) '×' (AL \| HL), (AL \| HL) '×' (PR \| PO),
	// LB25 Do not break between the following pairs of classes relevant to numbers:
	CL '×' PO, CP '×' PO, CL '×' PR, CP '×' PR, NU '×' PO, NU '×' PR, PO '×' OP, PO '×' NU, PR '×' OP, PR '×' NU, HY '×' NU, IS '×' NU, NU '×' NU, SY '×' NU,
	// Korean syllable blocks
	// LB26 Do not break a Korean syllable.
	JL '×' (JL \| JV \| H2 \| H3), (JV \| H2) '×' (JV \| JT), (JT \| H3) '×' JT,
	// LB27 Treat a Korean Syllable Block the same as ID.
	(JL \| JV \| JT \| H2 \| H3) '×' IN, (JL \| JV \| JT \| H2 \| H3) '×' PO, PR '×' (JL \| JV \| JT \| H2 \| H3),
	// Finally, join alphabetic letters into words and break everything else.
	(AL \| HL) '×' (AL \| HL), // LB28 Do not break between alphabetics (“at”).
	IS '×' (AL \| HL), // LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
	// LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing
	// parentheses.
	(AL \| HL \| NU) '×' OP, CP '×' (AL \| HL \| NU),
	// LB30a Break between two regional indicator symbols if and only if there are an even
	// number of regional indicators preceding the position of the break.
	RI '×' RI, Treat RI RI as if it were RIRI, Treat RIRI as if it were RI,
	EB '×' EM, // LB30b Do not break between an emoji base and an emoji modifier.
	'÷' ALL, ALL '÷', // LB31 Break everywhere else.
	};

	// Synthesize all non-"safe" pairs from pair table. There are generally more safe pairs.
	let unsafe_pairs = (0..NUM_CLASSES).into_iter().flat_map(\|j\| {
	(0..NUM_CLASSES).into_iter().filter_map(move \|i\| {
	// All states that could have resulted from break class "i"
	let possible_states = pair_table
	.iter()
	.map(\|row\| (row[i] & !(ALLOWED_BREAK_BIT \| MANDATORY_BREAK_BIT)) as usize);
	// Check if all state transitions due to "j" are the same
	if possible_states.map(\|s\| pair_table[s][j]).all_equal() {
	None
	} else {
	Some((i, j))
	}
	})
	});

	let out_dir = env::var("OUT_DIR")?;
	let dest_path = Path::new(&out_dir).join("tables.rs");
	let mut stream = BufWriter::new(File::create(&dest_path)?);

	stream.write_all(b"static BREAK_PROP_DATA: [[BreakClass; 256]; PAGE_COUNT] = [")?;

	let re = Regex::new(
	r"(?x)^
	(?P<start>[[:xdigit:]]{4,}) # Unicode code point
	(?:\.{2}(?P<end>[[:xdigit:]]{4,}))? # End range
	;
	(?P<lb>\w{2,3}) # Line_Break property",
	)?;

	let mut values = BufReader::new(File::open("LineBreak.txt")?)
	.lines()
	.map(Result::unwrap)
	.filter(\|l\| !(l.starts_with('#') \|\| l.is_empty()))
	.scan(0, \|next, l\| {
	let caps = re.captures(&l).unwrap();
	let start = u32::from_str_radix(&caps["start"], 16).unwrap();
	let end = caps
	.name("end")
	.map(\|m\| u32::from_str_radix(m.as_str(), 16).unwrap())
	.unwrap_or(start);
	let lb = caps["lb"].parse().unwrap();

	let iter = (*next..=end).map(move \|code\| {
	if code < start {
	default_value(code)
	} else {
	lb
	}
	});
	*next = end + 1;
	Some(iter)
	})
	.flatten();

	let mut page = Vec::with_capacity(256);
	let mut page_count = 0;
	let mut page_indices = Vec::new();
	loop {
	page.clear();
	page.extend(values.by_ref().take(256));

	if let Some(&first) = page.first() {
	page_indices.push(if page.iter().all_equal() {
	first as usize \| UNIFORM_PAGE
	} else {
	writeln!(
	stream,
	"[{}],",
	page.iter()
	.copied()
	.chain(iter::repeat(XX))
	.take(256)
	.map(\|v\| BREAK_CLASS_TABLE[v as usize])
	.collect::<Vec<_>>()
	.join(",")
	)?;
	let page_index = page_count;
	page_count += 1;
	page_index
	});
	} else {
	break;
	}
	}

	writeln!(
	stream,
	r"];

	const PAGE_COUNT: usize = {};
	static PAGE_INDICES: [usize; {}] = [",
	page_count,
	page_indices.len()
	)?;
	for page_idx in page_indices {
	write!(stream, "{},", page_idx)?;
	}
	write!(
	stream,
	r"];

	static PAIR_TABLE: [[u8; {}]; {}] = [",
	NUM_CLASSES_EOT, NUM_STATES
	)?;
	for row in &pair_table {
	write!(stream, "[")?;
	for x in row {
	write!(stream, "{},", x)?;
	}
	write!(stream, "],")?;
	}
	writeln!(
	stream,
	r"];

	fn is_safe_pair(a: BreakClass, b: BreakClass) -> bool {{
	!matches!((a, b), {})
	}}",
	unsafe_pairs
	.map(\|(i, j)\| format!("({}, {})", BREAK_CLASS_TABLE[i], BREAK_CLASS_TABLE[j]))
	.collect::<Vec<_>>()
	.join("\|")
	)?;

	Ok(())
	}