winnow/combinator/expression.rs
1use crate::combinator::empty;
2use crate::combinator::fail;
3use crate::combinator::opt;
4use crate::combinator::trace;
5use crate::error::ParserError;
6use crate::stream::Stream;
7use crate::stream::StreamIsPartial;
8use crate::Parser;
9use crate::Result;
10
11/// Parses an expression based on operator precedence.
12///
13/// It uses a Pratt parsing algorithm, where operators are
14/// associated with a binding power. The higher the power,
15/// the more tightly an operator will bind to its operands.
16///
17/// This method returns an [`Expression`], which configures
18/// the Pratt parser.
19///
20/// Each operator type is configured with [`Prefix`], [`Postfix`],
21/// and [`Infix`]. These describe the operator's binding power,
22/// a function that applies the operator to its operand, and the
23/// operator's associativity (infix only).
24///
25/// For a more full-featured example, look at the [C-style Expressions][crate::_topic::language#c-style-expressions]
26/// topic.
27///
28/// # Example
29///
30/// Parsing a simple arithmetic expression without parenthesis.
31///
32/// ```rust
33/// # #[cfg(feature = "ascii")] {
34/// # use winnow::prelude::*;
35/// # use winnow::error::ContextError;
36/// # use winnow::ascii::digit1;
37/// # use winnow::combinator::{dispatch, fail};
38/// # use winnow::token::any;
39/// use winnow::combinator::expression;
40/// use winnow::combinator::{Prefix, Postfix, Infix};
41///
42/// fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
43/// move |i: &mut &str| {
44/// use Infix::*;
45/// expression(digit1.parse_to::<i32>()) // operands are 32-bit integers
46/// .prefix(dispatch! {any;
47/// '-' => Prefix(12, |_, a: i32| Ok(-a)),
48/// _ => fail,
49/// })
50/// .infix(dispatch! {any;
51/// '+' => Left(5, |_, a, b| Ok(a + b)),
52/// '-' => Left(5, |_, a, b| Ok(a - b)),
53/// '*' => Left(7, |_, a, b| Ok(a * b)),
54/// '/' => Left(7, |_, a: i32, b| Ok(a.checked_div(b).unwrap_or_default())),
55/// _ => fail,
56/// })
57/// .postfix(dispatch! {any;
58/// '!' => Postfix(15, |_, a| if a < 1 { Ok(1) } else { Ok((1..=a).fold(1, |acc, a| acc*a)) }),
59/// _ => fail,
60/// })
61/// .parse_next(i)
62/// }
63/// }
64///
65/// assert_eq!(parser().parse("1+1"), Ok(2));
66/// assert_eq!(parser().parse("0!"), Ok(1));
67/// assert_eq!(parser().parse("-1*5*2*10+30/3!"), Ok(-95));
68/// # }
69/// ```
70#[doc(alias = "pratt")]
71#[doc(alias = "separated")]
72#[doc(alias = "shunting_yard")]
73#[doc(alias = "precedence_climbing")]
74#[inline(always)]
75#[allow(clippy::type_complexity)]
76pub fn expression<I, ParseOperand, O, E>(
77 parse_operand: ParseOperand,
78) -> Expression<
79 I,
80 O,
81 ParseOperand,
82 impl Parser<I, Prefix<I, O, E>, E>,
83 impl Parser<I, Postfix<I, O, E>, E>,
84 impl Parser<I, Infix<I, O, E>, E>,
85 E,
86>
87where
88 I: Stream + StreamIsPartial,
89 ParseOperand: Parser<I, O, E>,
90 E: ParserError<I>,
91{
92 Expression {
93 precedence_level: 0,
94 parse_operand,
95 parse_prefix: fail,
96 parse_postfix: fail,
97 parse_infix: fail,
98 marker: Default::default(),
99 }
100}
101
102/// A helper struct for [`expression()`].
103///
104/// Holds the configuration for the Pratt parser, including
105/// the operator and operand parsers. A precedence level can
106/// also be set, which is useful to disambiguate parse trees
107/// based on the parent operator's precedence.
108///
109/// Implements [`Parser`]. When parsing an input, it applies
110/// the Pratt parser.
111pub struct Expression<I, O, ParseOperand, Pre, Post, Pix, E>
112where
113 I: Stream + StreamIsPartial,
114 ParseOperand: Parser<I, O, E>,
115 E: ParserError<I>,
116{
117 precedence_level: i64,
118 parse_operand: ParseOperand,
119 parse_prefix: Pre,
120 parse_postfix: Post,
121 parse_infix: Pix,
122 marker: core::marker::PhantomData<(I, O, E)>,
123}
124
125impl<I, O, ParseOperand, Pre, Post, Pix, E> Expression<I, O, ParseOperand, Pre, Post, Pix, E>
126where
127 ParseOperand: Parser<I, O, E>,
128 I: Stream + StreamIsPartial,
129 E: ParserError<I>,
130{
131 /// Sets the prefix operator parser.
132 ///
133 /// The parser should parse the input to a [`Prefix`],
134 /// which contains the operator's binding power and
135 /// a fold function which applies the operator to its
136 /// operands.
137 #[inline(always)]
138 pub fn prefix<NewParsePrefix>(
139 self,
140 parser: NewParsePrefix,
141 ) -> Expression<I, O, ParseOperand, NewParsePrefix, Post, Pix, E>
142 where
143 NewParsePrefix: Parser<I, Prefix<I, O, E>, E>,
144 {
145 Expression {
146 precedence_level: self.precedence_level,
147 parse_operand: self.parse_operand,
148 parse_prefix: parser,
149 parse_postfix: self.parse_postfix,
150 parse_infix: self.parse_infix,
151 marker: Default::default(),
152 }
153 }
154
155 /// Sets the postfix operator parser.
156 ///
157 /// The parser should parse the input to a [`Postfix`],
158 /// which contains the operator's binding power and
159 /// a fold function which applies the operator to its
160 /// operands.
161 #[inline(always)]
162 pub fn postfix<NewParsePostfix>(
163 self,
164 parser: NewParsePostfix,
165 ) -> Expression<I, O, ParseOperand, Pre, NewParsePostfix, Pix, E>
166 where
167 NewParsePostfix: Parser<I, Postfix<I, O, E>, E>,
168 {
169 Expression {
170 precedence_level: self.precedence_level,
171 parse_operand: self.parse_operand,
172 parse_prefix: self.parse_prefix,
173 parse_postfix: parser,
174 parse_infix: self.parse_infix,
175 marker: Default::default(),
176 }
177 }
178
179 /// Sets the infix operator parser.
180 ///
181 /// The parser should parse the input to a [`Infix`],
182 /// which contains the operator's binding power and
183 /// a fold function which applies the operator to its
184 /// operands.
185 #[inline(always)]
186 pub fn infix<NewParseInfix>(
187 self,
188 parser: NewParseInfix,
189 ) -> Expression<I, O, ParseOperand, Pre, Post, NewParseInfix, E>
190 where
191 NewParseInfix: Parser<I, Infix<I, O, E>, E>,
192 {
193 Expression {
194 precedence_level: self.precedence_level,
195 parse_operand: self.parse_operand,
196 parse_prefix: self.parse_prefix,
197 parse_postfix: self.parse_postfix,
198 parse_infix: parser,
199 marker: Default::default(),
200 }
201 }
202
203 /// Sets the precedence level for the current instance of the parser.
204 ///
205 /// It defaults to 0, which is traditionally treated as the "lowest"
206 /// possible precedence when parsing an expression.
207 ///
208 /// This is useful to disambiguate grammars based on the parent operator's
209 /// precedence. This comes up primarily when parsing recursive expressions.
210 ///
211 /// The parsing machinery underpinning [`Expression`] assumes that a "more
212 /// tightly binding" operator is numerically large, while a "more loosely
213 /// binding" operator is numerically small. For example, `13` is a higher
214 /// precedence level than `1` because `13 > 1`.
215 ///
216 /// Other ways of describing this relationship:
217 /// - `13` has a higher precedence compared to `1`
218 /// - `13` has a higher binding power compared to `1`
219 ///
220 /// Note: Binding power and precedence both refer to the same concept and
221 /// may be used interchangeably.
222 ///
223 /// # Motivation
224 ///
225 /// If you don't understand why this is useful to have, this section tries
226 /// to explain in more detail.
227 ///
228 /// The [C-style Expressions][crate::_topic::arithmetic#c-style-expression]
229 /// example has source code for parsing the expression described below, and
230 /// can provide a clearer usage example.
231 ///
232 /// Consider the following expression in the C language:
233 ///
234 /// ```c
235 /// int x = (1 == 1 ? 0 : 1, -123); // <-- let's parse this
236 /// printf("%d\n", x); // -123
237 /// ```
238 ///
239 /// Let's look at the right-hand side of the expression on the first line,
240 /// and replace some of the sub-expressions with symbols:
241 ///
242 /// ```text
243 /// (1 == 1 ? 0 : 1, -123) // rhs
244 /// (a ? b : c, d ) // symbolic
245 /// (a ? b : c, d) // remove whitespace
246 /// (, (? a b c) d) // prefix notation
247 /// ```
248 ///
249 /// Written symbolically:
250 /// - `a` is the condition, like `1 == 1`
251 /// - `b` is the value when the condition is true
252 /// - `c` is the value when the condition is false
253 /// - `d` is a secondary expression unrelated to the ternary
254 ///
255 /// In prefix notation, it's easier to see the specific operators and what
256 /// they bind to:
257 /// - COMMA (`,`) binds to `(? a b c)` and `d`
258 /// - TERNARY (`?`) binds to `a`, `b`, and `c`
259 ///
260 /// ## Parsing `c` and `d`
261 ///
262 /// Let's focus on parsing the sub-expressions `c` and `d`, as that
263 /// motivates why a parser precedence level is necessary.
264 ///
265 /// To parse `c`, we would really like to re-use the parser produced by
266 /// [`expression()`], because `c` is really *any* valid expression that
267 /// can be parsed by `expression()` already.
268 ///
269 /// However, we can't re-use the parser naively. When parsing `c`, we need
270 /// to "escape" from the inner parser when encountering the comma separating
271 /// `c` from `d`.
272 ///
273 /// The reason we have to "escape" is because of how operator precedence is
274 /// defined in the C language: the comma operator has the lowest precedence
275 /// among all the operators. When we're parsing `c`, we're in the context of
276 /// the ternary operator. We don't want to parse any valid expression! Just
277 /// what the ternary operator captures.
278 ///
279 /// That's where the precedence level comes in: you specify the minimum
280 /// precedence this parser is willing to accept. If you come across an
281 /// expression in the top-level with a lower binding power than the starting
282 /// precedence, you know to stop parsing.
283 ///
284 /// The parsing machinery inside of [`Expression`] handles most of this for
285 /// you, but it can't determine what the precedence level should be for a
286 /// given expression. That's a language-specific detail, and it depends on
287 /// what you want to parse.
288 #[inline(always)]
289 pub fn current_precedence_level(
290 mut self,
291 level: i64,
292 ) -> Expression<I, O, ParseOperand, Pre, Post, Pix, E> {
293 self.precedence_level = level;
294 self
295 }
296}
297
298impl<I, O, Pop, Pre, Post, Pix, E> Parser<I, O, E> for Expression<I, O, Pop, Pre, Post, Pix, E>
299where
300 I: Stream + StreamIsPartial,
301 Pop: Parser<I, O, E>,
302 Pix: Parser<I, Infix<I, O, E>, E>,
303 Pre: Parser<I, Prefix<I, O, E>, E>,
304 Post: Parser<I, Postfix<I, O, E>, E>,
305 E: ParserError<I>,
306{
307 #[inline(always)]
308 fn parse_next(&mut self, input: &mut I) -> Result<O, E> {
309 trace("expression", move |i: &mut I| {
310 expression_impl(
311 i,
312 &mut self.parse_operand,
313 &mut self.parse_prefix,
314 &mut self.parse_postfix,
315 &mut self.parse_infix,
316 self.precedence_level,
317 )
318 })
319 .parse_next(input)
320 }
321}
322
323/// Opaque implementation of the Pratt parser.
324fn expression_impl<I, O, Pop, Pre, Post, Pix, E>(
325 i: &mut I,
326 parse_operand: &mut Pop,
327 prefix: &mut Pre,
328 postfix: &mut Post,
329 infix: &mut Pix,
330 min_power: i64,
331) -> Result<O, E>
332where
333 I: Stream + StreamIsPartial,
334 Pop: Parser<I, O, E>,
335 Pix: Parser<I, Infix<I, O, E>, E>,
336 Pre: Parser<I, Prefix<I, O, E>, E>,
337 Post: Parser<I, Postfix<I, O, E>, E>,
338 E: ParserError<I>,
339{
340 let operand = opt(trace("operand", parse_operand.by_ref())).parse_next(i)?;
341 let mut operand = if let Some(operand) = operand {
342 operand
343 } else {
344 // Prefix unary operators
345 let len = i.eof_offset();
346 let Prefix(power, fold_prefix) = trace("prefix", prefix.by_ref()).parse_next(i)?;
347 // infinite loop check: the parser must always consume
348 if i.eof_offset() == len {
349 return Err(E::assert(i, "`prefix` parsers must always consume"));
350 }
351 let operand = expression_impl(i, parse_operand, prefix, postfix, infix, power)?;
352 fold_prefix(i, operand)?
353 };
354
355 // A variable to stop the `'parse` loop when `Assoc::Neither` with the same
356 // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar
357 // associativity rules as `Assoc::Left`, but we stop parsing when the next operator
358 // is the same as the current one.
359 let mut prev_op_is_neither = None;
360 'parse: while i.eof_offset() > 0 {
361 // Postfix unary operators
362 let start = i.checkpoint();
363 if let Some(Postfix(power, fold_postfix)) =
364 opt(trace("postfix", postfix.by_ref())).parse_next(i)?
365 {
366 // control precedence over the prefix e.g.:
367 // `--(i++)` or `(--i)++`
368 if power < min_power {
369 i.reset(&start);
370 break 'parse;
371 }
372 operand = fold_postfix(i, operand)?;
373
374 continue 'parse;
375 }
376
377 // Infix binary operators
378 let start = i.checkpoint();
379 let parse_result = opt(trace("infix", infix.by_ref())).parse_next(i)?;
380 if let Some(infix_op) = parse_result {
381 let mut is_neither = None;
382 let (lpower, rpower, fold_infix) = match infix_op {
383 Infix::Right(p, f) => (p, p - 1, f),
384 Infix::Left(p, f) => (p, p + 1, f),
385 Infix::Neither(p, f) => {
386 is_neither = Some(p);
387 (p, p + 1, f)
388 }
389 };
390 if lpower < min_power
391 // MSRV: `is_some_and`
392 || match prev_op_is_neither {
393 None => false,
394 Some(p) => lpower == p,
395 }
396 {
397 i.reset(&start);
398 break 'parse;
399 }
400 prev_op_is_neither = is_neither;
401 let rhs = expression_impl(i, parse_operand, prefix, postfix, infix, rpower)?;
402 operand = fold_infix(i, operand, rhs)?;
403
404 continue 'parse;
405 }
406
407 break 'parse;
408 }
409
410 Ok(operand)
411}
412
413/// Define an [`expression()`]'s prefix operator
414///
415/// It requires an operator binding power, as well as a
416/// fold function which applies the operator.
417pub struct Prefix<I, O, E>(
418 /// Binding power
419 pub i64,
420 /// Unary operator
421 pub fn(&mut I, O) -> Result<O, E>,
422);
423
424impl<I, O, E> Clone for Prefix<I, O, E> {
425 #[inline(always)]
426 fn clone(&self) -> Self {
427 Prefix(self.0, self.1)
428 }
429}
430
431impl<I: Stream, O, E: ParserError<I>> Parser<I, Prefix<I, O, E>, E> for Prefix<I, O, E> {
432 #[inline(always)]
433 fn parse_next(&mut self, input: &mut I) -> Result<Prefix<I, O, E>, E> {
434 empty.value(self.clone()).parse_next(input)
435 }
436}
437
438/// Define an [`expression()`]'s postfix operator
439///
440/// It requires an operator binding power, as well as a
441/// fold function which applies the operator.
442pub struct Postfix<I, O, E>(
443 /// Binding power
444 pub i64,
445 /// Unary operator
446 pub fn(&mut I, O) -> Result<O, E>,
447);
448
449impl<I, O, E> Clone for Postfix<I, O, E> {
450 #[inline(always)]
451 fn clone(&self) -> Self {
452 Postfix(self.0, self.1)
453 }
454}
455
456impl<I: Stream, O, E: ParserError<I>> Parser<I, Postfix<I, O, E>, E> for Postfix<I, O, E> {
457 #[inline(always)]
458 fn parse_next(&mut self, input: &mut I) -> Result<Postfix<I, O, E>, E> {
459 empty.value(self.clone()).parse_next(input)
460 }
461}
462
463/// Define an [`expression()`]'s infix operator
464///
465/// It requires an operator binding power, as well as a
466/// fold function which applies the operator.
467pub enum Infix<I, O, E> {
468 /// Left-associative operator
469 ///
470 /// The operators will bind more tightly to their rightmost operands.
471 ///
472 /// e.g `A op B op C` -> `(A op B) op C`
473 Left(
474 /// Binding power
475 i64,
476 /// Binary operator
477 fn(&mut I, O, O) -> Result<O, E>,
478 ),
479 /// Right-associative operator
480 ///
481 /// The operators will bind more tightly to their leftmost operands.
482 ///
483 /// e.g `A op B op C` -> `A op (B op C)`
484 Right(
485 /// Binding power
486 i64,
487 /// Binary operator
488 fn(&mut I, O, O) -> Result<O, E>,
489 ),
490 /// Neither left or right associative
491 ///
492 /// `Infix::Neither` has similar associativity rules as `Assoc::Left`, but we stop
493 /// parsing when the next operator is the same as the current one.
494 ///
495 /// e.g. `a == b == c` -> `(a == b)`, fail: `(== c)`
496 Neither(
497 /// Binding power
498 i64,
499 /// Binary operator
500 fn(&mut I, O, O) -> Result<O, E>,
501 ),
502}
503
504impl<I, O, E> Clone for Infix<I, O, E> {
505 #[inline(always)]
506 fn clone(&self) -> Self {
507 match self {
508 Infix::Left(p, f) => Infix::Left(*p, *f),
509 Infix::Right(p, f) => Infix::Right(*p, *f),
510 Infix::Neither(p, f) => Infix::Neither(*p, *f),
511 }
512 }
513}
514
515impl<I: Stream, O, E: ParserError<I>> Parser<I, Infix<I, O, E>, E> for Infix<I, O, E> {
516 #[inline(always)]
517 fn parse_next(&mut self, input: &mut I) -> Result<Infix<I, O, E>, E> {
518 empty.value(self.clone()).parse_next(input)
519 }
520}
521
522#[cfg(all(test, feature = "ascii"))]
523mod tests {
524 use crate::ascii::digit1;
525 use crate::combinator::fail;
526 use crate::dispatch;
527 use crate::error::ContextError;
528 use crate::token::any;
529
530 use super::*;
531
532 fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> {
533 move |i: &mut &str| {
534 use Infix::*;
535 expression(digit1.parse_to::<i32>())
536 .current_precedence_level(0)
537 .prefix(dispatch! {any;
538 '+' => Prefix(12, |_, a| Ok(a)),
539 '-' => Prefix(12, |_, a: i32| Ok(-a)),
540 _ => fail
541 })
542 .infix(dispatch! {any;
543 '+' => Left(5, |_, a, b| Ok(a + b)),
544 '-' => Left(5, |_, a, b| Ok(a - b)),
545 '*' => Left(7, |_, a, b| Ok(a * b)),
546 '/' => Left(7, |_, a, b| Ok(a / b)),
547 '%' => Left(7, |_, a, b| Ok(a % b)),
548 '^' => Left(9, |_, a, b| Ok(a ^ b)),
549 _ => fail
550 })
551 .parse_next(i)
552 }
553 }
554
555 #[test]
556 fn test_expression() {
557 assert_eq!(parser().parse("-3+-3*4"), Ok(-15));
558 assert_eq!(parser().parse("+2+3*4"), Ok(14));
559 assert_eq!(parser().parse("2*3+4"), Ok(10));
560 }
561}