blob: 9ef25dd0e2d777f43ae211783ab3430eef30fa4b [file] [log] [blame]
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
//! Types for the pest's abstract syntax tree.
/// A grammar rule
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Rule {
/// The name of the rule
pub name: String,
/// The rule's type (silent, atomic, ...)
pub ty: RuleType,
/// The rule's expression
pub expr: Expr,
}
/// All possible rule types
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum RuleType {
/// The normal rule type
Normal,
/// Silent rules are just like normal rules
/// — when run, they function the same way —
/// except they do not produce pairs or tokens.
/// If a rule is silent, it will never appear in a parse result.
/// (their syntax is `_{ ... }`)
Silent,
/// atomic rule prevent implicit whitespace: inside an atomic rule,
/// the tilde ~ means "immediately followed by",
/// and repetition operators (asterisk * and plus sign +)
/// have no implicit separation. In addition, all other rules
/// called from an atomic rule are also treated as atomic.
/// In an atomic rule, interior matching rules are silent.
/// (their syntax is `@{ ... }`)
Atomic,
/// Compound atomic rules are similar to atomic rules,
/// but they produce inner tokens as normal.
/// (their syntax is `${ ... }`)
CompoundAtomic,
/// Non-atomic rules cancel the effect of atomic rules.
/// (their syntax is `!{ ... }`)
NonAtomic,
}
/// All possible rule expressions
///
/// # Warning: Semantic Versioning
/// There may be non-breaking changes to the meta-grammar
/// between minor versions. Those non-breaking changes, however,
/// may translate into semver-breaking changes due to the additional variants
/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
/// future (e.g. by increasing MSRV and non_exhaustive annotations).
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Expr {
/// Matches an exact string, e.g. `"a"`
Str(String),
/// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
Insens(String),
/// Matches one character in the range, e.g. `'a'..'z'`
Range(String, String),
/// Matches the rule with the given name, e.g. `a`
Ident(String),
/// Matches a custom part of the stack, e.g. `PEEK[..]`
PeekSlice(i32, Option<i32>),
/// Positive lookahead; matches expression without making progress, e.g. `&e`
PosPred(Box<Expr>),
/// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
NegPred(Box<Expr>),
/// Matches a sequence of two expressions, e.g. `e1 ~ e2`
Seq(Box<Expr>, Box<Expr>),
/// Matches either of two expressions, e.g. `e1 | e2`
Choice(Box<Expr>, Box<Expr>),
/// Optionally matches an expression, e.g. `e?`
Opt(Box<Expr>),
/// Matches an expression zero or more times, e.g. `e*`
Rep(Box<Expr>),
/// Matches an expression one or more times, e.g. `e+`
RepOnce(Box<Expr>),
/// Matches an expression an exact number of times, e.g. `e{n}`
RepExact(Box<Expr>, u32),
/// Matches an expression at least a number of times, e.g. `e{n,}`
RepMin(Box<Expr>, u32),
/// Matches an expression at most a number of times, e.g. `e{,n}`
RepMax(Box<Expr>, u32),
/// Matches an expression a number of times within a range, e.g. `e{m, n}`
RepMinMax(Box<Expr>, u32, u32),
/// Continues to match expressions until one of the strings in the `Vec` is found
Skip(Vec<String>),
/// Matches an expression and pushes it to the stack, e.g. `push(e)`
Push(Box<Expr>),
/// Matches an expression and assigns a label to it, e.g. #label = exp
#[cfg(feature = "grammar-extras")]
NodeTag(Box<Expr>, String),
}
impl Expr {
/// Returns the iterator that steps the expression from top to bottom.
pub fn iter_top_down(&self) -> ExprTopDownIterator {
ExprTopDownIterator::new(self)
}
/// Applies `f` to the expression and all its children (top to bottom).
pub fn map_top_down<F>(self, mut f: F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
let expr = f(expr);
match expr {
Expr::PosPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::PosPred(mapped)
}
Expr::NegPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NegPred(mapped)
}
Expr::Seq(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Seq(mapped_lhs, mapped_rhs)
}
Expr::Choice(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Choice(mapped_lhs, mapped_rhs)
}
Expr::Rep(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Rep(mapped)
}
Expr::RepOnce(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepOnce(mapped)
}
Expr::RepExact(expr, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepExact(mapped, max)
}
Expr::RepMin(expr, num) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMin(mapped, num)
}
Expr::RepMax(expr, num) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMax(mapped, num)
}
Expr::RepMinMax(expr, min, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMinMax(mapped, min, max)
}
Expr::Opt(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Opt(mapped)
}
Expr::Push(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Push(mapped)
}
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, tag) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NodeTag(mapped, tag)
}
expr => expr,
}
}
map_internal(self, &mut f)
}
/// Applies `f` to the expression and all its children (bottom up).
pub fn map_bottom_up<F>(self, mut f: F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
let mapped = match expr {
Expr::PosPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::PosPred(mapped)
}
Expr::NegPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NegPred(mapped)
}
Expr::Seq(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Seq(mapped_lhs, mapped_rhs)
}
Expr::Choice(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Choice(mapped_lhs, mapped_rhs)
}
Expr::Rep(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Rep(mapped)
}
Expr::RepOnce(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepOnce(mapped)
}
Expr::RepExact(expr, num) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepExact(mapped, num)
}
Expr::RepMin(expr, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMin(mapped, max)
}
Expr::RepMax(expr, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMax(mapped, max)
}
Expr::RepMinMax(expr, min, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMinMax(mapped, min, max)
}
Expr::Opt(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Opt(mapped)
}
Expr::Push(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Push(mapped)
}
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, tag) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NodeTag(mapped, tag)
}
expr => expr,
};
f(mapped)
}
map_internal(self, &mut f)
}
}
impl core::fmt::Display for Expr {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Expr::Str(s) => write!(f, "{:?}", s),
Expr::Insens(s) => write!(f, "^{:?}", s),
Expr::Range(start, end) => {
let start = start.chars().next().expect("Empty range start.");
let end = end.chars().next().expect("Empty range end.");
write!(f, "({:?}..{:?})", start, end)
}
Expr::Ident(id) => write!(f, "{}", id),
Expr::PeekSlice(start, end) => match end {
Some(end) => write!(f, "PEEK[{}..{}]", start, end),
None => write!(f, "PEEK[{}..]", start),
},
Expr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
Expr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
Expr::Seq(lhs, rhs) => {
let mut nodes = Vec::new();
nodes.push(lhs);
let mut current = rhs;
while let Expr::Seq(lhs, rhs) = current.as_ref() {
nodes.push(lhs);
current = rhs;
}
nodes.push(current);
let sequence = nodes
.iter()
.map(|node| format!("{}", node))
.collect::<Vec<_>>()
.join(" ~ ");
write!(f, "({})", sequence)
}
Expr::Choice(lhs, rhs) => {
let mut nodes = Vec::new();
nodes.push(lhs);
let mut current = rhs;
while let Expr::Choice(lhs, rhs) = current.as_ref() {
nodes.push(lhs);
current = rhs;
}
nodes.push(current);
let sequence = nodes
.iter()
.map(|node| format!("{}", node))
.collect::<Vec<_>>()
.join(" | ");
write!(f, "({})", sequence)
}
Expr::Opt(expr) => write!(f, "{}?", expr),
Expr::Rep(expr) => write!(f, "{}*", expr),
Expr::RepOnce(expr) => write!(f, "{}+", expr),
Expr::RepExact(expr, n) => write!(f, "{}{{{}}}", expr, n),
Expr::RepMin(expr, min) => write!(f, "{}{{{},}}", expr, min),
Expr::RepMax(expr, max) => write!(f, "{}{{,{}}}", expr, max),
Expr::RepMinMax(expr, min, max) => write!(f, "{}{{{}, {}}}", expr, min, max),
Expr::Skip(strings) => {
let strings = strings
.iter()
.map(|s| format!("{:?}", s))
.collect::<Vec<_>>()
.join(" | ");
write!(f, "(!({}) ~ ANY)*", strings)
}
Expr::Push(expr) => write!(f, "PUSH({})", expr),
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, tag) => {
write!(f, "(#{} = {})", tag, expr)
}
}
}
}
/// The top down iterator for an expression.
pub struct ExprTopDownIterator {
current: Option<Expr>,
next: Option<Expr>,
right_branches: Vec<Expr>,
}
impl ExprTopDownIterator {
/// Constructs a top-down iterator from the expression.
pub fn new(expr: &Expr) -> Self {
let mut iter = ExprTopDownIterator {
current: None,
next: None,
right_branches: vec![],
};
iter.iterate_expr(expr.clone());
iter
}
fn iterate_expr(&mut self, expr: Expr) {
self.current = Some(expr.clone());
match expr {
Expr::Seq(lhs, rhs) => {
self.right_branches.push(*rhs);
self.next = Some(*lhs);
}
Expr::Choice(lhs, rhs) => {
self.right_branches.push(*rhs);
self.next = Some(*lhs);
}
Expr::PosPred(expr)
| Expr::NegPred(expr)
| Expr::Rep(expr)
| Expr::RepOnce(expr)
| Expr::RepExact(expr, _)
| Expr::RepMin(expr, _)
| Expr::RepMax(expr, _)
| Expr::RepMinMax(expr, ..)
| Expr::Opt(expr)
| Expr::Push(expr) => {
self.next = Some(*expr);
}
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, _) => {
self.next = Some(*expr);
}
_ => {
self.next = None;
}
}
}
}
impl Iterator for ExprTopDownIterator {
type Item = Expr;
fn next(&mut self) -> Option<Self::Item> {
let result = self.current.take();
if let Some(expr) = self.next.take() {
self.iterate_expr(expr);
} else if let Some(expr) = self.right_branches.pop() {
self.iterate_expr(expr);
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn top_down_iterator() {
let expr = Expr::Choice(
Box::new(Expr::Str(String::from("a"))),
Box::new(Expr::Str(String::from("b"))),
);
let mut top_down = expr.iter_top_down();
assert_eq!(top_down.next(), Some(expr));
assert_eq!(top_down.next(), Some(Expr::Str(String::from("a"))));
assert_eq!(top_down.next(), Some(Expr::Str(String::from("b"))));
assert_eq!(top_down.next(), None);
}
#[test]
fn identity() {
let expr = Expr::Choice(
Box::new(Expr::Seq(
Box::new(Expr::Ident("a".to_owned())),
Box::new(Expr::Str("b".to_owned())),
)),
Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep(
Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice(
Box::new(Expr::Insens("c".to_owned())),
Box::new(Expr::Push(Box::new(Expr::Range(
"'d'".to_owned(),
"'e'".to_owned(),
)))),
)))))),
)))))),
);
assert_eq!(
expr.clone()
.map_bottom_up(|expr| expr)
.map_top_down(|expr| expr),
expr,
);
}
mod display {
use super::super::*;
#[test]
fn string() {
assert_eq!(Expr::Str("a".to_owned()).to_string(), r#""a""#);
}
#[test]
fn insens() {
assert_eq!(Expr::Insens("a".to_owned()).to_string(), r#"^"a""#);
}
#[test]
fn range() {
assert_eq!(
Expr::Range("a".to_owned(), "z".to_owned()).to_string(),
r#"('a'..'z')"#,
);
}
#[test]
fn ident() {
assert_eq!(Expr::Ident("a".to_owned()).to_string(), "a");
}
#[test]
fn peek_slice() {
assert_eq!(Expr::PeekSlice(0, None).to_string(), "PEEK[0..]");
assert_eq!(Expr::PeekSlice(0, Some(-1)).to_string(), "PEEK[0..-1]");
}
#[test]
fn pos_pred() {
assert_eq!(
Expr::PosPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"&e",
);
}
#[test]
fn neg_pred() {
assert_eq!(
Expr::NegPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"!e",
);
}
#[test]
fn seq() {
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Ident("e2".to_owned())),
)
.to_string(),
"(e1 ~ e2)",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Ident("e3".to_owned())),
)),
)
.to_string(),
"(e1 ~ e2 ~ e3)",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 ~ e2 ~ e3 ~ e4)",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 ~ (e2 | (e3 ~ e4)))",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 ~ e2 ~ (e3 | e4))",
);
}
#[test]
fn choice() {
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Ident("e2".to_owned())),
)
.to_string(),
"(e1 | e2)",
);
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Ident("e3".to_owned())),
)),
)
.to_string(),
"(e1 | e2 | e3)",
);
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 | e2 | e3 | e4)",
);
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 | (e2 ~ (e3 | e4)))",
);
}
#[test]
fn opt() {
assert_eq!(
Expr::Opt(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"e?",
);
}
#[test]
fn rep() {
assert_eq!(
Expr::Rep(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"e*",
);
}
#[test]
fn rep_once() {
assert_eq!(
Expr::RepOnce(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"e+",
);
}
#[test]
fn rep_exact() {
assert_eq!(
Expr::RepExact(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
"e{1}",
);
}
#[test]
fn rep_min() {
assert_eq!(
Expr::RepMin(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
"e{1,}",
);
}
#[test]
fn rep_max() {
assert_eq!(
Expr::RepMax(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
"e{,1}",
);
}
#[test]
fn rep_min_max() {
assert_eq!(
Expr::RepMinMax(Box::new(Expr::Ident("e".to_owned())), 1, 2).to_string(),
"e{1, 2}",
);
}
#[test]
fn skip() {
assert_eq!(
Expr::Skip(
["a", "bc"]
.into_iter()
.map(|s| s.to_owned())
.collect::<Vec<_>>(),
)
.to_string(),
r#"(!("a" | "bc") ~ ANY)*"#,
);
}
#[test]
fn push() {
assert_eq!(
Expr::Push(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"PUSH(e)",
);
}
#[test]
#[cfg(feature = "grammar-extras")]
fn node_tag() {
assert_eq!(
Expr::NodeTag(Box::new(Expr::Ident("expr".to_owned())), "label".to_owned())
.to_string(),
"(#label = expr)",
);
}
}
}