Skip to main content

winnow/combinator/
expression.rs

1use crate::combinator::empty;
2use crate::combinator::fail;
3use crate::combinator::opt;
4use crate::combinator::trace;
5use crate::error::ParserError;
6use crate::stream::Stream;
7use crate::stream::StreamIsPartial;
8use crate::Parser;
9use crate::Result;
10
11/// Parses an expression based on operator precedence.
12///
13/// It uses a Pratt parsing algorithm, where operators are
14/// associated with a binding power. The higher the power,
15/// the more tightly an operator will bind to its operands.
16///
17/// This method returns an [`Expression`], which configures
18/// the Pratt parser.
19///
20/// Each operator type is configured with [`Prefix`], [`Postfix`],
21/// and [`Infix`]. These describe the operator's binding power,
22/// a function that applies the operator to its operand, and the
23/// operator's associativity (infix only).
24///
25/// For a more full-featured example, look at the [C-style Expressions][crate::_topic::language#c-style-expressions]
26/// topic.
27///
28/// # Example
29///
30/// Parsing a simple arithmetic expression without parenthesis.
31///
32/// ```rust
33/// # #[cfg(feature = "ascii")] {
34/// # use winnow::prelude::*;
35/// # use winnow::error::ContextError;
36/// # use winnow::ascii::digit1;
37/// # use winnow::combinator::{dispatch, fail};
38/// # use winnow::token::any;
39/// use winnow::combinator::expression;
40/// use winnow::combinator::{Prefix, Postfix, Infix};
41///
42/// fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
43///     move |i: &mut &str| {
44///         use Infix::*;
45///         expression(digit1.parse_to::<i32>()) // operands are 32-bit integers
46///             .prefix(dispatch! {any;
47///                 '-' => Prefix(12, |_, a: i32| Ok(-a)),
48///                 _ => fail,
49///             })
50///             .infix(dispatch! {any;
51///                 '+' => Left(5, |_, a, b| Ok(a + b)),
52///                 '-' => Left(5, |_, a, b| Ok(a - b)),
53///                 '*' => Left(7, |_, a, b| Ok(a * b)),
54///                 '/' => Left(7, |_, a: i32, b| Ok(a.checked_div(b).unwrap_or_default())),
55///                 _ => fail,
56///             })
57///             .postfix(dispatch! {any;
58///                 '!' => Postfix(15, |_, a| if a < 1 { Ok(1) } else { Ok((1..=a).fold(1, |acc, a| acc*a)) }),
59///                 _ => fail,
60///             })
61///             .parse_next(i)
62///     }
63/// }
64///
65/// assert_eq!(parser().parse("1+1"), Ok(2));
66/// assert_eq!(parser().parse("0!"), Ok(1));
67/// assert_eq!(parser().parse("-1*5*2*10+30/3!"), Ok(-95));
68/// # }
69/// ```
70#[doc(alias = "pratt")]
71#[doc(alias = "separated")]
72#[doc(alias = "shunting_yard")]
73#[doc(alias = "precedence_climbing")]
74#[inline(always)]
75#[allow(clippy::type_complexity)]
76pub fn expression<I, ParseOperand, O, E>(
77    parse_operand: ParseOperand,
78) -> Expression<
79    I,
80    O,
81    ParseOperand,
82    impl Parser<I, Prefix<I, O, E>, E>,
83    impl Parser<I, Postfix<I, O, E>, E>,
84    impl Parser<I, Infix<I, O, E>, E>,
85    E,
86>
87where
88    I: Stream + StreamIsPartial,
89    ParseOperand: Parser<I, O, E>,
90    E: ParserError<I>,
91{
92    Expression {
93        precedence_level: 0,
94        parse_operand,
95        parse_prefix: fail,
96        parse_postfix: fail,
97        parse_infix: fail,
98        marker: Default::default(),
99    }
100}
101
102/// A helper struct for [`expression()`].
103///
104/// Holds the configuration for the Pratt parser, including
105/// the operator and operand parsers. A precedence level can
106/// also be set, which is useful to disambiguate parse trees
107/// based on the parent operator's precedence.
108///
109/// Implements [`Parser`]. When parsing an input, it applies
110/// the Pratt parser.
111pub struct Expression<I, O, ParseOperand, Pre, Post, Pix, E>
112where
113    I: Stream + StreamIsPartial,
114    ParseOperand: Parser<I, O, E>,
115    E: ParserError<I>,
116{
117    precedence_level: i64,
118    parse_operand: ParseOperand,
119    parse_prefix: Pre,
120    parse_postfix: Post,
121    parse_infix: Pix,
122    marker: core::marker::PhantomData<(I, O, E)>,
123}
124
125impl<I, O, ParseOperand, Pre, Post, Pix, E> Expression<I, O, ParseOperand, Pre, Post, Pix, E>
126where
127    ParseOperand: Parser<I, O, E>,
128    I: Stream + StreamIsPartial,
129    E: ParserError<I>,
130{
131    /// Sets the prefix operator parser.
132    ///
133    /// The parser should parse the input to a [`Prefix`],
134    /// which contains the operator's binding power and
135    /// a fold function which applies the operator to its
136    /// operands.
137    #[inline(always)]
138    pub fn prefix<NewParsePrefix>(
139        self,
140        parser: NewParsePrefix,
141    ) -> Expression<I, O, ParseOperand, NewParsePrefix, Post, Pix, E>
142    where
143        NewParsePrefix: Parser<I, Prefix<I, O, E>, E>,
144    {
145        Expression {
146            precedence_level: self.precedence_level,
147            parse_operand: self.parse_operand,
148            parse_prefix: parser,
149            parse_postfix: self.parse_postfix,
150            parse_infix: self.parse_infix,
151            marker: Default::default(),
152        }
153    }
154
155    /// Sets the postfix operator parser.
156    ///
157    /// The parser should parse the input to a [`Postfix`],
158    /// which contains the operator's binding power and
159    /// a fold function which applies the operator to its
160    /// operands.
161    #[inline(always)]
162    pub fn postfix<NewParsePostfix>(
163        self,
164        parser: NewParsePostfix,
165    ) -> Expression<I, O, ParseOperand, Pre, NewParsePostfix, Pix, E>
166    where
167        NewParsePostfix: Parser<I, Postfix<I, O, E>, E>,
168    {
169        Expression {
170            precedence_level: self.precedence_level,
171            parse_operand: self.parse_operand,
172            parse_prefix: self.parse_prefix,
173            parse_postfix: parser,
174            parse_infix: self.parse_infix,
175            marker: Default::default(),
176        }
177    }
178
179    /// Sets the infix operator parser.
180    ///
181    /// The parser should parse the input to a [`Infix`],
182    /// which contains the operator's binding power and
183    /// a fold function which applies the operator to its
184    /// operands.
185    #[inline(always)]
186    pub fn infix<NewParseInfix>(
187        self,
188        parser: NewParseInfix,
189    ) -> Expression<I, O, ParseOperand, Pre, Post, NewParseInfix, E>
190    where
191        NewParseInfix: Parser<I, Infix<I, O, E>, E>,
192    {
193        Expression {
194            precedence_level: self.precedence_level,
195            parse_operand: self.parse_operand,
196            parse_prefix: self.parse_prefix,
197            parse_postfix: self.parse_postfix,
198            parse_infix: parser,
199            marker: Default::default(),
200        }
201    }
202
203    /// Sets the precedence level for the current instance of the parser.
204    ///
205    /// It defaults to 0, which is traditionally treated as the "lowest"
206    /// possible precedence when parsing an expression.
207    ///
208    /// This is useful to disambiguate grammars based on the parent operator's
209    /// precedence. This comes up primarily when parsing recursive expressions.
210    ///
211    /// The parsing machinery underpinning [`Expression`] assumes that a "more
212    /// tightly binding" operator is numerically large, while a "more loosely
213    /// binding" operator is numerically small. For example, `13` is a higher
214    /// precedence level than `1` because `13 > 1`.
215    ///
216    /// Other ways of describing this relationship:
217    /// - `13` has a higher precedence compared to `1`
218    /// - `13` has a higher binding power compared to `1`
219    ///
220    /// Note: Binding power and precedence both refer to the same concept and
221    /// may be used interchangeably.
222    ///
223    /// # Motivation
224    ///
225    /// If you don't understand why this is useful to have, this section tries
226    /// to explain in more detail.
227    ///
228    /// The [C-style Expressions][crate::_topic::arithmetic#c-style-expression]
229    /// example has source code for parsing the expression described below, and
230    /// can provide a clearer usage example.
231    ///
232    /// Consider the following expression in the C language:
233    ///
234    /// ```c
235    /// int x = (1 == 1 ? 0 : 1, -123); // <-- let's parse this
236    /// printf("%d\n", x); // -123
237    /// ```
238    ///
239    /// Let's look at the right-hand side of the expression on the first line,
240    /// and replace some of the sub-expressions with symbols:
241    ///
242    /// ```text
243    /// (1 == 1 ? 0 : 1, -123) // rhs
244    /// (a      ? b : c, d  )  // symbolic
245    /// (a ? b : c, d)         // remove whitespace
246    /// (, (? a b c) d)        // prefix notation
247    /// ```
248    ///
249    /// Written symbolically:
250    /// - `a` is the condition, like `1 == 1`
251    /// - `b` is the value when the condition is true
252    /// - `c` is the value when the condition is false
253    /// - `d` is a secondary expression unrelated to the ternary
254    ///
255    /// In prefix notation, it's easier to see the specific operators and what
256    /// they bind to:
257    /// - COMMA (`,`) binds to `(? a b c)` and `d`
258    /// - TERNARY (`?`) binds to `a`, `b`, and `c`
259    ///
260    /// ## Parsing `c` and `d`
261    ///
262    /// Let's focus on parsing the sub-expressions `c` and `d`, as that
263    /// motivates why a parser precedence level is necessary.
264    ///
265    /// To parse `c`, we would really like to re-use the parser produced by
266    /// [`expression()`], because `c` is really *any* valid expression that
267    /// can be parsed by `expression()` already.
268    ///
269    /// However, we can't re-use the parser naively. When parsing `c`, we need
270    /// to "escape" from the inner parser when encountering the comma separating
271    /// `c` from `d`.
272    ///
273    /// The reason we have to "escape" is because of how operator precedence is
274    /// defined in the C language: the comma operator has the lowest precedence
275    /// among all the operators. When we're parsing `c`, we're in the context of
276    /// the ternary operator. We don't want to parse any valid expression! Just
277    /// what the ternary operator captures.
278    ///
279    /// That's where the precedence level comes in: you specify the minimum
280    /// precedence this parser is willing to accept. If you come across an
281    /// expression in the top-level with a lower binding power than the starting
282    /// precedence, you know to stop parsing.
283    ///
284    /// The parsing machinery inside of [`Expression`] handles most of this for
285    /// you, but it can't determine what the precedence level should be for a
286    /// given expression. That's a language-specific detail, and it depends on
287    /// what you want to parse.
288    #[inline(always)]
289    pub fn current_precedence_level(
290        mut self,
291        level: i64,
292    ) -> Expression<I, O, ParseOperand, Pre, Post, Pix, E> {
293        self.precedence_level = level;
294        self
295    }
296}
297
298impl<I, O, Pop, Pre, Post, Pix, E> Parser<I, O, E> for Expression<I, O, Pop, Pre, Post, Pix, E>
299where
300    I: Stream + StreamIsPartial,
301    Pop: Parser<I, O, E>,
302    Pix: Parser<I, Infix<I, O, E>, E>,
303    Pre: Parser<I, Prefix<I, O, E>, E>,
304    Post: Parser<I, Postfix<I, O, E>, E>,
305    E: ParserError<I>,
306{
307    #[inline(always)]
308    fn parse_next(&mut self, input: &mut I) -> Result<O, E> {
309        trace("expression", move |i: &mut I| {
310            expression_impl(
311                i,
312                &mut self.parse_operand,
313                &mut self.parse_prefix,
314                &mut self.parse_postfix,
315                &mut self.parse_infix,
316                self.precedence_level,
317            )
318        })
319        .parse_next(input)
320    }
321}
322
323/// Opaque implementation of the Pratt parser.
324fn expression_impl<I, O, Pop, Pre, Post, Pix, E>(
325    i: &mut I,
326    parse_operand: &mut Pop,
327    prefix: &mut Pre,
328    postfix: &mut Post,
329    infix: &mut Pix,
330    min_power: i64,
331) -> Result<O, E>
332where
333    I: Stream + StreamIsPartial,
334    Pop: Parser<I, O, E>,
335    Pix: Parser<I, Infix<I, O, E>, E>,
336    Pre: Parser<I, Prefix<I, O, E>, E>,
337    Post: Parser<I, Postfix<I, O, E>, E>,
338    E: ParserError<I>,
339{
340    let operand = opt(trace("operand", parse_operand.by_ref())).parse_next(i)?;
341    let mut operand = if let Some(operand) = operand {
342        operand
343    } else {
344        // Prefix unary operators
345        let len = i.eof_offset();
346        let Prefix(power, fold_prefix) = trace("prefix", prefix.by_ref()).parse_next(i)?;
347        // infinite loop check: the parser must always consume
348        if i.eof_offset() == len {
349            return Err(E::assert(i, "`prefix` parsers must always consume"));
350        }
351        let operand = expression_impl(i, parse_operand, prefix, postfix, infix, power)?;
352        fold_prefix(i, operand)?
353    };
354
355    // A variable to stop the `'parse` loop when `Assoc::Neither` with the same
356    // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar
357    // associativity rules as `Assoc::Left`, but we stop parsing when the next operator
358    // is the same as the current one.
359    let mut prev_op_is_neither = None;
360    'parse: while i.eof_offset() > 0 {
361        // Postfix unary operators
362        let start = i.checkpoint();
363        if let Some(Postfix(power, fold_postfix)) =
364            opt(trace("postfix", postfix.by_ref())).parse_next(i)?
365        {
366            // control precedence over the prefix e.g.:
367            // `--(i++)` or `(--i)++`
368            if power < min_power {
369                i.reset(&start);
370                break 'parse;
371            }
372            operand = fold_postfix(i, operand)?;
373
374            continue 'parse;
375        }
376
377        // Infix binary operators
378        let start = i.checkpoint();
379        let parse_result = opt(trace("infix", infix.by_ref())).parse_next(i)?;
380        if let Some(infix_op) = parse_result {
381            let mut is_neither = None;
382            let (lpower, rpower, fold_infix) = match infix_op {
383                Infix::Right(p, f) => (p, p - 1, f),
384                Infix::Left(p, f) => (p, p + 1, f),
385                Infix::Neither(p, f) => {
386                    is_neither = Some(p);
387                    (p, p + 1, f)
388                }
389            };
390            if lpower < min_power
391                // MSRV: `is_some_and`
392                || match prev_op_is_neither {
393                    None => false,
394                    Some(p) => lpower == p,
395                }
396            {
397                i.reset(&start);
398                break 'parse;
399            }
400            prev_op_is_neither = is_neither;
401            let rhs = expression_impl(i, parse_operand, prefix, postfix, infix, rpower)?;
402            operand = fold_infix(i, operand, rhs)?;
403
404            continue 'parse;
405        }
406
407        break 'parse;
408    }
409
410    Ok(operand)
411}
412
413/// Define an [`expression()`]'s prefix operator
414///
415/// It requires an operator binding power, as well as a
416/// fold function which applies the operator.
417pub struct Prefix<I, O, E>(
418    /// Binding power
419    pub i64,
420    /// Unary operator
421    pub fn(&mut I, O) -> Result<O, E>,
422);
423
424impl<I, O, E> Clone for Prefix<I, O, E> {
425    #[inline(always)]
426    fn clone(&self) -> Self {
427        Prefix(self.0, self.1)
428    }
429}
430
431impl<I: Stream, O, E: ParserError<I>> Parser<I, Prefix<I, O, E>, E> for Prefix<I, O, E> {
432    #[inline(always)]
433    fn parse_next(&mut self, input: &mut I) -> Result<Prefix<I, O, E>, E> {
434        empty.value(self.clone()).parse_next(input)
435    }
436}
437
438/// Define an [`expression()`]'s postfix operator
439///
440/// It requires an operator binding power, as well as a
441/// fold function which applies the operator.
442pub struct Postfix<I, O, E>(
443    /// Binding power
444    pub i64,
445    /// Unary operator
446    pub fn(&mut I, O) -> Result<O, E>,
447);
448
449impl<I, O, E> Clone for Postfix<I, O, E> {
450    #[inline(always)]
451    fn clone(&self) -> Self {
452        Postfix(self.0, self.1)
453    }
454}
455
456impl<I: Stream, O, E: ParserError<I>> Parser<I, Postfix<I, O, E>, E> for Postfix<I, O, E> {
457    #[inline(always)]
458    fn parse_next(&mut self, input: &mut I) -> Result<Postfix<I, O, E>, E> {
459        empty.value(self.clone()).parse_next(input)
460    }
461}
462
463/// Define an [`expression()`]'s infix operator
464///
465/// It requires an operator binding power, as well as a
466/// fold function which applies the operator.
467pub enum Infix<I, O, E> {
468    /// Left-associative operator
469    ///
470    /// The operators will bind more tightly to their rightmost operands.
471    ///
472    /// e.g `A op B op C` -> `(A op B) op C`
473    Left(
474        /// Binding power
475        i64,
476        /// Binary operator
477        fn(&mut I, O, O) -> Result<O, E>,
478    ),
479    /// Right-associative operator
480    ///
481    /// The operators will bind more tightly to their leftmost operands.
482    ///
483    /// e.g `A op B op C` -> `A op (B op C)`
484    Right(
485        /// Binding power
486        i64,
487        /// Binary operator
488        fn(&mut I, O, O) -> Result<O, E>,
489    ),
490    /// Neither left or right associative
491    ///
492    /// `Infix::Neither` has similar associativity rules as `Assoc::Left`, but we stop
493    /// parsing when the next operator is the same as the current one.
494    ///
495    /// e.g. `a == b == c` -> `(a == b)`, fail: `(== c)`
496    Neither(
497        /// Binding power
498        i64,
499        /// Binary operator
500        fn(&mut I, O, O) -> Result<O, E>,
501    ),
502}
503
504impl<I, O, E> Clone for Infix<I, O, E> {
505    #[inline(always)]
506    fn clone(&self) -> Self {
507        match self {
508            Infix::Left(p, f) => Infix::Left(*p, *f),
509            Infix::Right(p, f) => Infix::Right(*p, *f),
510            Infix::Neither(p, f) => Infix::Neither(*p, *f),
511        }
512    }
513}
514
515impl<I: Stream, O, E: ParserError<I>> Parser<I, Infix<I, O, E>, E> for Infix<I, O, E> {
516    #[inline(always)]
517    fn parse_next(&mut self, input: &mut I) -> Result<Infix<I, O, E>, E> {
518        empty.value(self.clone()).parse_next(input)
519    }
520}
521
522#[cfg(all(test, feature = "ascii"))]
523mod tests {
524    use crate::ascii::digit1;
525    use crate::combinator::fail;
526    use crate::dispatch;
527    use crate::error::ContextError;
528    use crate::token::any;
529
530    use super::*;
531
532    fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
533        move |i: &mut &str| {
534            use Infix::*;
535            expression(digit1.parse_to::<i32>())
536                .current_precedence_level(0)
537                .prefix(dispatch! {any;
538                    '+' => Prefix(12, |_, a| Ok(a)),
539                    '-' => Prefix(12, |_, a: i32| Ok(-a)),
540                    _ => fail
541                })
542                .infix(dispatch! {any;
543                   '+' => Left(5, |_, a, b| Ok(a + b)),
544                   '-' => Left(5, |_, a, b| Ok(a - b)),
545                   '*' => Left(7, |_, a, b| Ok(a * b)),
546                   '/' => Left(7, |_, a, b| Ok(a / b)),
547                   '%' => Left(7, |_, a, b| Ok(a % b)),
548                   '^' => Left(9, |_, a, b| Ok(a ^ b)),
549                   _ => fail
550                })
551                .parse_next(i)
552        }
553    }
554
555    #[test]
556    fn test_expression() {
557        assert_eq!(parser().parse("-3+-3*4"), Ok(-15));
558        assert_eq!(parser().parse("+2+3*4"), Ok(14));
559        assert_eq!(parser().parse("2*3+4"), Ok(10));
560    }
561}