winnow/stream/
mod.rs

1//! Stream capability for combinators to parse
2//!
3//! Stream types include:
4//! - `&[u8]` and [`Bytes`] for binary data
5//! - `&str` (aliased as [`Str`]) and [`BStr`] for UTF-8 data
6//! - [`LocatingSlice`] can track the location within the original buffer to report
7//!   [spans][crate::Parser::with_span]
8//! - [`Stateful`] to thread global state through your parsers
9//! - [`Partial`] can mark an input as partial buffer that is being streamed into
10//! - [Custom stream types][crate::_topic::stream]
11
12use core::hash::BuildHasher;
13use core::iter::{Cloned, Enumerate};
14use core::num::NonZeroUsize;
15use core::slice::Iter;
16use core::str::from_utf8;
17use core::str::CharIndices;
18use core::str::FromStr;
19
20#[allow(unused_imports)]
21#[cfg(any(feature = "unstable-doc", feature = "unstable-recover"))]
22use crate::error::ErrMode;
23
24#[cfg(feature = "alloc")]
25use alloc::borrow::Cow;
26#[cfg(feature = "alloc")]
27use alloc::collections::BTreeMap;
28#[cfg(feature = "alloc")]
29use alloc::collections::BTreeSet;
30#[cfg(feature = "alloc")]
31use alloc::collections::VecDeque;
32#[cfg(feature = "alloc")]
33use alloc::string::String;
34#[cfg(feature = "alloc")]
35use alloc::vec::Vec;
36#[cfg(feature = "std")]
37use std::collections::HashMap;
38#[cfg(feature = "std")]
39use std::collections::HashSet;
40
41mod bstr;
42mod bytes;
43mod locating;
44mod partial;
45mod range;
46#[cfg(feature = "unstable-recover")]
47#[cfg(feature = "std")]
48mod recoverable;
49mod stateful;
50#[cfg(test)]
51mod tests;
52mod token;
53
54pub use bstr::BStr;
55pub use bytes::Bytes;
56pub use locating::LocatingSlice;
57pub use partial::Partial;
58pub use range::Range;
59#[cfg(feature = "unstable-recover")]
60#[cfg(feature = "std")]
61pub use recoverable::Recoverable;
62pub use stateful::Stateful;
63pub use token::TokenSlice;
64
65/// UTF-8 Stream
66pub type Str<'i> = &'i str;
67
68/// Abstract method to calculate the input length
69pub trait SliceLen {
70    /// Calculates the input length, as indicated by its name,
71    /// and the name of the trait itself
72    fn slice_len(&self) -> usize;
73}
74
75impl<T> SliceLen for &[T] {
76    #[inline(always)]
77    fn slice_len(&self) -> usize {
78        self.len()
79    }
80}
81
82impl<T, const LEN: usize> SliceLen for [T; LEN] {
83    #[inline(always)]
84    fn slice_len(&self) -> usize {
85        self.len()
86    }
87}
88
89impl<T, const LEN: usize> SliceLen for &[T; LEN] {
90    #[inline(always)]
91    fn slice_len(&self) -> usize {
92        self.len()
93    }
94}
95
96impl SliceLen for &str {
97    #[inline(always)]
98    fn slice_len(&self) -> usize {
99        self.len()
100    }
101}
102
103impl SliceLen for u8 {
104    #[inline(always)]
105    fn slice_len(&self) -> usize {
106        1
107    }
108}
109
110impl SliceLen for char {
111    #[inline(always)]
112    fn slice_len(&self) -> usize {
113        self.len_utf8()
114    }
115}
116
117impl<I> SliceLen for (I, usize, usize)
118where
119    I: SliceLen,
120{
121    #[inline(always)]
122    fn slice_len(&self) -> usize {
123        self.0.slice_len() * 8 + self.2 - self.1
124    }
125}
126
127/// Core definition for parser input state
128pub trait Stream: Offset<<Self as Stream>::Checkpoint> + core::fmt::Debug {
129    /// The smallest unit being parsed
130    ///
131    /// Example: `u8` for `&[u8]` or `char` for `&str`
132    type Token: core::fmt::Debug;
133    /// Sequence of `Token`s
134    ///
135    /// Example: `&[u8]` for `LocatingSlice<&[u8]>` or `&str` for `LocatingSlice<&str>`
136    type Slice: core::fmt::Debug;
137
138    /// Iterate with the offset from the current location
139    type IterOffsets: Iterator<Item = (usize, Self::Token)>;
140
141    /// A parse location within the stream
142    type Checkpoint: Offset + Clone + core::fmt::Debug;
143
144    /// Iterate with the offset from the current location
145    fn iter_offsets(&self) -> Self::IterOffsets;
146
147    /// Returns the offset to the end of the input
148    fn eof_offset(&self) -> usize;
149
150    /// Split off the next token from the input
151    fn next_token(&mut self) -> Option<Self::Token>;
152    /// Split off the next token from the input
153    fn peek_token(&self) -> Option<Self::Token>;
154
155    /// Finds the offset of the next matching token
156    fn offset_for<P>(&self, predicate: P) -> Option<usize>
157    where
158        P: Fn(Self::Token) -> bool;
159    /// Get the offset for the number of `tokens` into the stream
160    ///
161    /// This means "0 tokens" will return `0` offset
162    fn offset_at(&self, tokens: usize) -> Result<usize, Needed>;
163    /// Split off a slice of tokens from the input
164    ///
165    /// <div class="warning">
166    ///
167    /// **Note:** For inputs with variable width tokens, like `&str`'s `char`, `offset` might not correspond
168    /// with the number of tokens. To get a valid offset, use:
169    /// - [`Stream::eof_offset`]
170    /// - [`Stream::iter_offsets`]
171    /// - [`Stream::offset_for`]
172    /// - [`Stream::offset_at`]
173    ///
174    /// </div>
175    ///
176    /// # Panic
177    ///
178    /// This will panic if
179    ///
180    /// * Indexes must be within bounds of the original input;
181    /// * Indexes must uphold invariants of the stream, like for `str` they must lie on UTF-8
182    ///   sequence boundaries.
183    ///
184    fn next_slice(&mut self, offset: usize) -> Self::Slice;
185    /// Split off a slice of tokens from the input
186    ///
187    /// <div class="warning">
188    ///
189    /// **Note:** For inputs with variable width tokens, like `&str`'s `char`, `offset` might not correspond
190    /// with the number of tokens. To get a valid offset, use:
191    /// - [`Stream::eof_offset`]
192    /// - [`Stream::iter_offsets`]
193    /// - [`Stream::offset_for`]
194    /// - [`Stream::offset_at`]
195    ///
196    /// </div>
197    ///
198    /// # Safety
199    ///
200    /// Callers of this function are responsible that these preconditions are satisfied:
201    ///
202    /// * Indexes must be within bounds of the original input;
203    /// * Indexes must uphold invariants of the stream, like for `str` they must lie on UTF-8
204    ///   sequence boundaries.
205    ///
206    unsafe fn next_slice_unchecked(&mut self, offset: usize) -> Self::Slice {
207        // Inherent impl to allow callers to have `unsafe`-free code
208        self.next_slice(offset)
209    }
210    /// Split off a slice of tokens from the input
211    fn peek_slice(&self, offset: usize) -> Self::Slice;
212    /// Split off a slice of tokens from the input
213    ///
214    /// # Safety
215    ///
216    /// Callers of this function are responsible that these preconditions are satisfied:
217    ///
218    /// * Indexes must be within bounds of the original input;
219    /// * Indexes must uphold invariants of the stream, like for `str` they must lie on UTF-8
220    ///   sequence boundaries.
221    unsafe fn peek_slice_unchecked(&self, offset: usize) -> Self::Slice {
222        // Inherent impl to allow callers to have `unsafe`-free code
223        self.peek_slice(offset)
224    }
225
226    /// Advance to the end of the stream
227    #[inline(always)]
228    fn finish(&mut self) -> Self::Slice {
229        self.next_slice(self.eof_offset())
230    }
231    /// Advance to the end of the stream
232    #[inline(always)]
233    fn peek_finish(&self) -> Self::Slice
234    where
235        Self: Clone,
236    {
237        self.peek_slice(self.eof_offset())
238    }
239
240    /// Save the current parse location within the stream
241    fn checkpoint(&self) -> Self::Checkpoint;
242    /// Revert the stream to a prior [`Self::Checkpoint`]
243    ///
244    /// # Panic
245    ///
246    /// May panic if an invalid [`Self::Checkpoint`] is provided
247    fn reset(&mut self, checkpoint: &Self::Checkpoint);
248
249    /// Write out a single-line summary of the current parse location
250    fn trace(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
251}
252
253/// Contains information on needed data if a parser returned `Incomplete`
254///
255/// <div class="warning">
256///
257/// **Note:** This is only possible for `Stream` that are [partial][`crate::stream::StreamIsPartial`],
258/// like [`Partial`].
259///
260/// </div>
261#[derive(Debug, PartialEq, Eq, Clone, Copy)]
262pub enum Needed {
263    /// Needs more data, but we do not know how much
264    Unknown,
265    /// Contains a lower bound on the buffer offset needed to finish parsing
266    ///
267    /// For byte/`&str` streams, this translates to bytes
268    Size(NonZeroUsize),
269}
270
271impl Needed {
272    /// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero
273    pub fn new(s: usize) -> Self {
274        match NonZeroUsize::new(s) {
275            Some(sz) => Needed::Size(sz),
276            None => Needed::Unknown,
277        }
278    }
279
280    /// Indicates if we know how many bytes we need
281    pub fn is_known(&self) -> bool {
282        *self != Needed::Unknown
283    }
284
285    /// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value.
286    #[inline]
287    pub fn map<F: Fn(NonZeroUsize) -> usize>(self, f: F) -> Needed {
288        match self {
289            Needed::Unknown => Needed::Unknown,
290            Needed::Size(n) => Needed::new(f(n)),
291        }
292    }
293}
294
295impl<'i, T> Stream for &'i [T]
296where
297    T: Clone + core::fmt::Debug,
298{
299    type Token = T;
300    type Slice = &'i [T];
301
302    type IterOffsets = Enumerate<Cloned<Iter<'i, T>>>;
303
304    type Checkpoint = Checkpoint<Self, Self>;
305
306    #[inline(always)]
307    fn iter_offsets(&self) -> Self::IterOffsets {
308        self.iter().cloned().enumerate()
309    }
310    #[inline(always)]
311    fn eof_offset(&self) -> usize {
312        self.len()
313    }
314
315    #[inline(always)]
316    fn next_token(&mut self) -> Option<Self::Token> {
317        let (token, next) = self.split_first()?;
318        *self = next;
319        Some(token.clone())
320    }
321
322    #[inline(always)]
323    fn peek_token(&self) -> Option<Self::Token> {
324        if self.is_empty() {
325            None
326        } else {
327            Some(self[0].clone())
328        }
329    }
330
331    #[inline(always)]
332    fn offset_for<P>(&self, predicate: P) -> Option<usize>
333    where
334        P: Fn(Self::Token) -> bool,
335    {
336        self.iter().position(|b| predicate(b.clone()))
337    }
338    #[inline(always)]
339    fn offset_at(&self, tokens: usize) -> Result<usize, Needed> {
340        if let Some(needed) = tokens.checked_sub(self.len()).and_then(NonZeroUsize::new) {
341            Err(Needed::Size(needed))
342        } else {
343            Ok(tokens)
344        }
345    }
346    #[inline(always)]
347    fn next_slice(&mut self, offset: usize) -> Self::Slice {
348        let (slice, next) = self.split_at(offset);
349        *self = next;
350        slice
351    }
352    #[inline(always)]
353    unsafe fn next_slice_unchecked(&mut self, offset: usize) -> Self::Slice {
354        #[cfg(debug_assertions)]
355        self.peek_slice(offset);
356
357        // SAFETY: `Stream::next_slice_unchecked` requires `offset` to be in bounds
358        let slice = unsafe { self.get_unchecked(..offset) };
359        // SAFETY: `Stream::next_slice_unchecked` requires `offset` to be in bounds
360        let next = unsafe { self.get_unchecked(offset..) };
361        *self = next;
362        slice
363    }
364    #[inline(always)]
365    fn peek_slice(&self, offset: usize) -> Self::Slice {
366        &self[..offset]
367    }
368    #[inline(always)]
369    unsafe fn peek_slice_unchecked(&self, offset: usize) -> Self::Slice {
370        #[cfg(debug_assertions)]
371        self.peek_slice(offset);
372
373        // SAFETY: `Stream::next_slice_unchecked` requires `offset` to be in bounds
374        let slice = unsafe { self.get_unchecked(..offset) };
375        slice
376    }
377
378    #[inline(always)]
379    fn checkpoint(&self) -> Self::Checkpoint {
380        Checkpoint::<_, Self>::new(*self)
381    }
382    #[inline(always)]
383    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
384        *self = checkpoint.inner;
385    }
386
387    fn trace(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
388        write!(f, "{self:?}")
389    }
390}
391
392impl<'i> Stream for &'i str {
393    type Token = char;
394    type Slice = &'i str;
395
396    type IterOffsets = CharIndices<'i>;
397
398    type Checkpoint = Checkpoint<Self, Self>;
399
400    #[inline(always)]
401    fn iter_offsets(&self) -> Self::IterOffsets {
402        self.char_indices()
403    }
404    #[inline(always)]
405    fn eof_offset(&self) -> usize {
406        self.len()
407    }
408
409    #[inline(always)]
410    fn next_token(&mut self) -> Option<Self::Token> {
411        let mut iter = self.chars();
412        let c = iter.next()?;
413        *self = iter.as_str();
414        Some(c)
415    }
416
417    #[inline(always)]
418    fn peek_token(&self) -> Option<Self::Token> {
419        self.chars().next()
420    }
421
422    #[inline(always)]
423    fn offset_for<P>(&self, predicate: P) -> Option<usize>
424    where
425        P: Fn(Self::Token) -> bool,
426    {
427        for (o, c) in self.iter_offsets() {
428            if predicate(c) {
429                return Some(o);
430            }
431        }
432        None
433    }
434    #[inline]
435    fn offset_at(&self, tokens: usize) -> Result<usize, Needed> {
436        let mut cnt = 0;
437        for (offset, _) in self.iter_offsets() {
438            if cnt == tokens {
439                return Ok(offset);
440            }
441            cnt += 1;
442        }
443
444        if cnt == tokens {
445            Ok(self.eof_offset())
446        } else {
447            Err(Needed::Unknown)
448        }
449    }
450    #[inline(always)]
451    fn next_slice(&mut self, offset: usize) -> Self::Slice {
452        let (slice, next) = self.split_at(offset);
453        *self = next;
454        slice
455    }
456    #[inline(always)]
457    unsafe fn next_slice_unchecked(&mut self, offset: usize) -> Self::Slice {
458        #[cfg(debug_assertions)]
459        self.peek_slice(offset);
460
461        // SAFETY: `Stream::next_slice_unchecked` requires `offset` to be in bounds and on a UTF-8
462        // sequence boundary
463        let slice = unsafe { self.get_unchecked(..offset) };
464        // SAFETY: `Stream::next_slice_unchecked` requires `offset` to be in bounds and on a UTF-8
465        // sequence boundary
466        let next = unsafe { self.get_unchecked(offset..) };
467        *self = next;
468        slice
469    }
470    #[inline(always)]
471    fn peek_slice(&self, offset: usize) -> Self::Slice {
472        &self[..offset]
473    }
474    #[inline(always)]
475    unsafe fn peek_slice_unchecked(&self, offset: usize) -> Self::Slice {
476        #[cfg(debug_assertions)]
477        self.peek_slice(offset);
478
479        // SAFETY: `Stream::next_slice_unchecked` requires `offset` to be in bounds
480        let slice = unsafe { self.get_unchecked(..offset) };
481        slice
482    }
483
484    #[inline(always)]
485    fn checkpoint(&self) -> Self::Checkpoint {
486        Checkpoint::<_, Self>::new(*self)
487    }
488    #[inline(always)]
489    fn reset(&mut self, checkpoint: &Self::Checkpoint) {
490        *self = checkpoint.inner;
491    }
492
493    fn trace(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
494        write!(f, "{self:#?}")
495    }
496}
497
498/// Current parse locations offset
499///
500/// See [`LocatingSlice`] for adding location tracking to your [`Stream`]
501pub trait Location {
502    /// Previous token's end offset
503    fn previous_token_end(&self) -> usize;
504    /// Current token's start offset
505    fn current_token_start(&self) -> usize;
506}
507
508/// Capture top-level errors in the middle of parsing so parsing can resume
509///
510/// See [`Recoverable`] for adding error recovery tracking to your [`Stream`]
511#[cfg(feature = "unstable-recover")]
512#[cfg(feature = "std")]
513pub trait Recover<E>: Stream {
514    /// Capture a top-level error
515    ///
516    /// May return `Err(err)` if recovery is not possible (e.g. if [`Recover::is_recovery_supported`]
517    /// returns `false`).
518    fn record_err(
519        &mut self,
520        token_start: &Self::Checkpoint,
521        err_start: &Self::Checkpoint,
522        err: E,
523    ) -> Result<(), E>;
524
525    /// Report whether the [`Stream`] can save off errors for recovery
526    fn is_recovery_supported() -> bool;
527}
528
529#[cfg(feature = "unstable-recover")]
530#[cfg(feature = "std")]
531impl<'a, T, E> Recover<E> for &'a [T]
532where
533    &'a [T]: Stream,
534{
535    #[inline(always)]
536    fn record_err(
537        &mut self,
538        _token_start: &Self::Checkpoint,
539        _err_start: &Self::Checkpoint,
540        err: E,
541    ) -> Result<(), E> {
542        Err(err)
543    }
544
545    /// Report whether the [`Stream`] can save off errors for recovery
546    #[inline(always)]
547    fn is_recovery_supported() -> bool {
548        false
549    }
550}
551
552#[cfg(feature = "unstable-recover")]
553#[cfg(feature = "std")]
554impl<E> Recover<E> for &str {
555    #[inline(always)]
556    fn record_err(
557        &mut self,
558        _token_start: &Self::Checkpoint,
559        _err_start: &Self::Checkpoint,
560        err: E,
561    ) -> Result<(), E> {
562        Err(err)
563    }
564
565    /// Report whether the [`Stream`] can save off errors for recovery
566    #[inline(always)]
567    fn is_recovery_supported() -> bool {
568        false
569    }
570}
571
572/// Marks the input as being the complete buffer or a partial buffer for streaming input
573///
574/// See [`Partial`] for marking a presumed complete buffer type as a streaming buffer.
575pub trait StreamIsPartial: Sized {
576    /// Whether the stream is currently partial or complete
577    type PartialState;
578
579    /// Mark the stream is complete
580    #[must_use]
581    fn complete(&mut self) -> Self::PartialState;
582
583    /// Restore the stream back to its previous state
584    fn restore_partial(&mut self, state: Self::PartialState);
585
586    /// Report whether the [`Stream`] is can ever be incomplete
587    fn is_partial_supported() -> bool;
588
589    /// Report whether the [`Stream`] is currently incomplete
590    #[inline(always)]
591    fn is_partial(&self) -> bool {
592        Self::is_partial_supported()
593    }
594}
595
596impl<T> StreamIsPartial for &[T] {
597    type PartialState = ();
598
599    #[inline]
600    fn complete(&mut self) -> Self::PartialState {}
601
602    #[inline]
603    fn restore_partial(&mut self, _state: Self::PartialState) {}
604
605    #[inline(always)]
606    fn is_partial_supported() -> bool {
607        false
608    }
609}
610
611impl StreamIsPartial for &str {
612    type PartialState = ();
613
614    #[inline]
615    fn complete(&mut self) -> Self::PartialState {
616        // Already complete
617    }
618
619    #[inline]
620    fn restore_partial(&mut self, _state: Self::PartialState) {}
621
622    #[inline(always)]
623    fn is_partial_supported() -> bool {
624        false
625    }
626}
627
628/// Useful functions to calculate the offset between slices and show a hexdump of a slice
629pub trait Offset<Start = Self> {
630    /// Offset between the first byte of `start` and the first byte of `self`a
631    ///
632    /// <div class="warning">
633    ///
634    /// **Note:** This is an offset, not an index, and may point to the end of input
635    /// (`start.len()`) when `self` is exhausted.
636    ///
637    /// </div>
638    fn offset_from(&self, start: &Start) -> usize;
639}
640
641impl<T> Offset for &[T] {
642    #[inline]
643    fn offset_from(&self, start: &Self) -> usize {
644        let fst = (*start).as_ptr();
645        let snd = (*self).as_ptr();
646
647        debug_assert!(
648            fst <= snd,
649            "`Offset::offset_from({snd:?}, {fst:?})` only accepts slices of `self`"
650        );
651        (snd as usize - fst as usize) / core::mem::size_of::<T>()
652    }
653}
654
655impl<'a, T> Offset<<&'a [T] as Stream>::Checkpoint> for &'a [T]
656where
657    T: Clone + core::fmt::Debug,
658{
659    #[inline(always)]
660    fn offset_from(&self, other: &<&'a [T] as Stream>::Checkpoint) -> usize {
661        self.checkpoint().offset_from(other)
662    }
663}
664
665impl Offset for &str {
666    #[inline(always)]
667    fn offset_from(&self, start: &Self) -> usize {
668        self.as_bytes().offset_from(&start.as_bytes())
669    }
670}
671
672impl<'a> Offset<<&'a str as Stream>::Checkpoint> for &'a str {
673    #[inline(always)]
674    fn offset_from(&self, other: &<&'a str as Stream>::Checkpoint) -> usize {
675        self.checkpoint().offset_from(other)
676    }
677}
678
679impl<I, S> Offset for Checkpoint<I, S>
680where
681    I: Offset,
682{
683    #[inline(always)]
684    fn offset_from(&self, start: &Self) -> usize {
685        self.inner.offset_from(&start.inner)
686    }
687}
688
689/// Helper trait for types that can be viewed as a byte slice
690pub trait AsBytes {
691    /// Casts the input type to a byte slice
692    fn as_bytes(&self) -> &[u8];
693}
694
695impl AsBytes for &[u8] {
696    #[inline(always)]
697    fn as_bytes(&self) -> &[u8] {
698        self
699    }
700}
701
702/// Helper trait for types that can be viewed as a byte slice
703pub trait AsBStr {
704    /// Casts the input type to a byte slice
705    fn as_bstr(&self) -> &[u8];
706}
707
708impl AsBStr for &[u8] {
709    #[inline(always)]
710    fn as_bstr(&self) -> &[u8] {
711        self
712    }
713}
714
715impl AsBStr for &str {
716    #[inline(always)]
717    fn as_bstr(&self) -> &[u8] {
718        (*self).as_bytes()
719    }
720}
721
722/// Result of [`Compare::compare`]
723#[derive(Debug, Eq, PartialEq)]
724pub enum CompareResult {
725    /// Comparison was successful
726    ///
727    /// `usize` is the end of the successful match within the buffer.
728    /// This is most relevant for caseless UTF-8 where `Compare::compare`'s parameter might be a different
729    /// length than the match within the buffer.
730    Ok(usize),
731    /// We need more data to be sure
732    Incomplete,
733    /// Comparison failed
734    Error,
735}
736
737/// Abstracts comparison operations
738pub trait Compare<T> {
739    /// Compares self to another value for equality
740    fn compare(&self, t: T) -> CompareResult;
741}
742
743impl<'b> Compare<&'b [u8]> for &[u8] {
744    #[inline]
745    fn compare(&self, t: &'b [u8]) -> CompareResult {
746        if t.iter().zip(*self).any(|(a, b)| a != b) {
747            CompareResult::Error
748        } else if self.len() < t.slice_len() {
749            CompareResult::Incomplete
750        } else {
751            CompareResult::Ok(t.slice_len())
752        }
753    }
754}
755
756impl<const LEN: usize> Compare<[u8; LEN]> for &[u8] {
757    #[inline(always)]
758    fn compare(&self, t: [u8; LEN]) -> CompareResult {
759        self.compare(&t[..])
760    }
761}
762
763impl<'b, const LEN: usize> Compare<&'b [u8; LEN]> for &[u8] {
764    #[inline(always)]
765    fn compare(&self, t: &'b [u8; LEN]) -> CompareResult {
766        self.compare(&t[..])
767    }
768}
769
770impl<'b> Compare<&'b str> for &[u8] {
771    #[inline(always)]
772    fn compare(&self, t: &'b str) -> CompareResult {
773        self.compare(t.as_bytes())
774    }
775}
776
777impl Compare<u8> for &[u8] {
778    #[inline]
779    fn compare(&self, t: u8) -> CompareResult {
780        match self.first().copied() {
781            Some(c) if t == c => CompareResult::Ok(t.slice_len()),
782            Some(_) => CompareResult::Error,
783            None => CompareResult::Incomplete,
784        }
785    }
786}
787
788impl Compare<char> for &[u8] {
789    #[inline(always)]
790    fn compare(&self, t: char) -> CompareResult {
791        self.compare(t.encode_utf8(&mut [0; 4]).as_bytes())
792    }
793}
794
795impl<'b> Compare<&'b str> for &str {
796    #[inline(always)]
797    fn compare(&self, t: &'b str) -> CompareResult {
798        self.as_bytes().compare(t.as_bytes())
799    }
800}
801
802impl Compare<char> for &str {
803    #[inline(always)]
804    fn compare(&self, t: char) -> CompareResult {
805        self.as_bytes().compare(t)
806    }
807}
808
809/// Look for a slice in self
810pub trait FindSlice<T> {
811    /// Returns the offset of the slice if it is found
812    fn find_slice(&self, substr: T) -> Option<core::ops::Range<usize>>;
813}
814
815impl<'s> FindSlice<&'s [u8]> for &[u8] {
816    #[inline(always)]
817    fn find_slice(&self, substr: &'s [u8]) -> Option<core::ops::Range<usize>> {
818        memmem(self, substr)
819    }
820}
821
822impl<'s> FindSlice<(&'s [u8],)> for &[u8] {
823    #[inline(always)]
824    fn find_slice(&self, substr: (&'s [u8],)) -> Option<core::ops::Range<usize>> {
825        memmem(self, substr.0)
826    }
827}
828
829impl<'s> FindSlice<(&'s [u8], &'s [u8])> for &[u8] {
830    #[inline(always)]
831    fn find_slice(&self, substr: (&'s [u8], &'s [u8])) -> Option<core::ops::Range<usize>> {
832        memmem2(self, substr)
833    }
834}
835
836impl<'s> FindSlice<(&'s [u8], &'s [u8], &'s [u8])> for &[u8] {
837    #[inline(always)]
838    fn find_slice(
839        &self,
840        substr: (&'s [u8], &'s [u8], &'s [u8]),
841    ) -> Option<core::ops::Range<usize>> {
842        memmem3(self, substr)
843    }
844}
845
846impl FindSlice<char> for &[u8] {
847    #[inline(always)]
848    fn find_slice(&self, substr: char) -> Option<core::ops::Range<usize>> {
849        let mut b = [0; 4];
850        let substr = substr.encode_utf8(&mut b);
851        self.find_slice(&*substr)
852    }
853}
854
855impl FindSlice<(char,)> for &[u8] {
856    #[inline(always)]
857    fn find_slice(&self, substr: (char,)) -> Option<core::ops::Range<usize>> {
858        let mut b = [0; 4];
859        let substr0 = substr.0.encode_utf8(&mut b);
860        self.find_slice((&*substr0,))
861    }
862}
863
864impl FindSlice<(char, char)> for &[u8] {
865    #[inline(always)]
866    fn find_slice(&self, substr: (char, char)) -> Option<core::ops::Range<usize>> {
867        let mut b = [0; 4];
868        let substr0 = substr.0.encode_utf8(&mut b);
869        let mut b = [0; 4];
870        let substr1 = substr.1.encode_utf8(&mut b);
871        self.find_slice((&*substr0, &*substr1))
872    }
873}
874
875impl FindSlice<(char, char, char)> for &[u8] {
876    #[inline(always)]
877    fn find_slice(&self, substr: (char, char, char)) -> Option<core::ops::Range<usize>> {
878        let mut b = [0; 4];
879        let substr0 = substr.0.encode_utf8(&mut b);
880        let mut b = [0; 4];
881        let substr1 = substr.1.encode_utf8(&mut b);
882        let mut b = [0; 4];
883        let substr2 = substr.2.encode_utf8(&mut b);
884        self.find_slice((&*substr0, &*substr1, &*substr2))
885    }
886}
887
888impl FindSlice<u8> for &[u8] {
889    #[inline(always)]
890    fn find_slice(&self, substr: u8) -> Option<core::ops::Range<usize>> {
891        memchr(substr, self).map(|i| i..i + 1)
892    }
893}
894
895impl FindSlice<(u8,)> for &[u8] {
896    #[inline(always)]
897    fn find_slice(&self, substr: (u8,)) -> Option<core::ops::Range<usize>> {
898        memchr(substr.0, self).map(|i| i..i + 1)
899    }
900}
901
902impl FindSlice<(u8, u8)> for &[u8] {
903    #[inline(always)]
904    fn find_slice(&self, substr: (u8, u8)) -> Option<core::ops::Range<usize>> {
905        memchr2(substr, self).map(|i| i..i + 1)
906    }
907}
908
909impl FindSlice<(u8, u8, u8)> for &[u8] {
910    #[inline(always)]
911    fn find_slice(&self, substr: (u8, u8, u8)) -> Option<core::ops::Range<usize>> {
912        memchr3(substr, self).map(|i| i..i + 1)
913    }
914}
915
916impl<'s> FindSlice<&'s str> for &[u8] {
917    #[inline(always)]
918    fn find_slice(&self, substr: &'s str) -> Option<core::ops::Range<usize>> {
919        self.find_slice(substr.as_bytes())
920    }
921}
922
923impl<'s> FindSlice<(&'s str,)> for &[u8] {
924    #[inline(always)]
925    fn find_slice(&self, substr: (&'s str,)) -> Option<core::ops::Range<usize>> {
926        memmem(self, substr.0.as_bytes())
927    }
928}
929
930impl<'s> FindSlice<(&'s str, &'s str)> for &[u8] {
931    #[inline(always)]
932    fn find_slice(&self, substr: (&'s str, &'s str)) -> Option<core::ops::Range<usize>> {
933        memmem2(self, (substr.0.as_bytes(), substr.1.as_bytes()))
934    }
935}
936
937impl<'s> FindSlice<(&'s str, &'s str, &'s str)> for &[u8] {
938    #[inline(always)]
939    fn find_slice(&self, substr: (&'s str, &'s str, &'s str)) -> Option<core::ops::Range<usize>> {
940        memmem3(
941            self,
942            (
943                substr.0.as_bytes(),
944                substr.1.as_bytes(),
945                substr.2.as_bytes(),
946            ),
947        )
948    }
949}
950
951impl<'s> FindSlice<&'s str> for &str {
952    #[inline(always)]
953    fn find_slice(&self, substr: &'s str) -> Option<core::ops::Range<usize>> {
954        self.as_bytes().find_slice(substr)
955    }
956}
957
958impl<'s> FindSlice<(&'s str,)> for &str {
959    #[inline(always)]
960    fn find_slice(&self, substr: (&'s str,)) -> Option<core::ops::Range<usize>> {
961        self.as_bytes().find_slice(substr)
962    }
963}
964
965impl<'s> FindSlice<(&'s str, &'s str)> for &str {
966    #[inline(always)]
967    fn find_slice(&self, substr: (&'s str, &'s str)) -> Option<core::ops::Range<usize>> {
968        self.as_bytes().find_slice(substr)
969    }
970}
971
972impl<'s> FindSlice<(&'s str, &'s str, &'s str)> for &str {
973    #[inline(always)]
974    fn find_slice(&self, substr: (&'s str, &'s str, &'s str)) -> Option<core::ops::Range<usize>> {
975        self.as_bytes().find_slice(substr)
976    }
977}
978
979impl FindSlice<char> for &str {
980    #[inline(always)]
981    fn find_slice(&self, substr: char) -> Option<core::ops::Range<usize>> {
982        self.as_bytes().find_slice(substr)
983    }
984}
985
986impl FindSlice<(char,)> for &str {
987    #[inline(always)]
988    fn find_slice(&self, substr: (char,)) -> Option<core::ops::Range<usize>> {
989        self.as_bytes().find_slice(substr)
990    }
991}
992
993impl FindSlice<(char, char)> for &str {
994    #[inline(always)]
995    fn find_slice(&self, substr: (char, char)) -> Option<core::ops::Range<usize>> {
996        self.as_bytes().find_slice(substr)
997    }
998}
999
1000impl FindSlice<(char, char, char)> for &str {
1001    #[inline(always)]
1002    fn find_slice(&self, substr: (char, char, char)) -> Option<core::ops::Range<usize>> {
1003        self.as_bytes().find_slice(substr)
1004    }
1005}
1006
1007/// Used to integrate `str`'s `parse()` method
1008pub trait ParseSlice<R> {
1009    /// Succeeds if `parse()` succeeded
1010    ///
1011    /// The byte slice implementation will first convert it to a `&str`, then apply the `parse()`
1012    /// function
1013    fn parse_slice(&self) -> Option<R>;
1014}
1015
1016impl<R: FromStr> ParseSlice<R> for &[u8] {
1017    #[inline(always)]
1018    fn parse_slice(&self) -> Option<R> {
1019        from_utf8(self).ok().and_then(|s| s.parse().ok())
1020    }
1021}
1022
1023impl<R: FromStr> ParseSlice<R> for &str {
1024    #[inline(always)]
1025    fn parse_slice(&self) -> Option<R> {
1026        self.parse().ok()
1027    }
1028}
1029
1030/// Convert a `Stream` into an appropriate `Output` type
1031pub trait UpdateSlice: Stream {
1032    /// Convert an `Output` type to be used as `Stream`
1033    fn update_slice(self, inner: Self::Slice) -> Self;
1034}
1035
1036impl<T> UpdateSlice for &[T]
1037where
1038    T: Clone + core::fmt::Debug,
1039{
1040    #[inline(always)]
1041    fn update_slice(self, inner: Self::Slice) -> Self {
1042        inner
1043    }
1044}
1045
1046impl UpdateSlice for &str {
1047    #[inline(always)]
1048    fn update_slice(self, inner: Self::Slice) -> Self {
1049        inner
1050    }
1051}
1052
1053/// Ensure checkpoint details are kept private
1054pub struct Checkpoint<T, S> {
1055    pub(crate) inner: T,
1056    stream: core::marker::PhantomData<S>,
1057}
1058
1059impl<T, S> Checkpoint<T, S> {
1060    pub(crate) fn new(inner: T) -> Self {
1061        Self {
1062            inner,
1063            stream: Default::default(),
1064        }
1065    }
1066}
1067
1068impl<T: Copy, S> Copy for Checkpoint<T, S> {}
1069
1070impl<T: Clone, S> Clone for Checkpoint<T, S> {
1071    #[inline(always)]
1072    fn clone(&self) -> Self {
1073        Self {
1074            inner: self.inner.clone(),
1075            stream: Default::default(),
1076        }
1077    }
1078}
1079
1080impl<T: PartialOrd, S> PartialOrd for Checkpoint<T, S> {
1081    #[inline(always)]
1082    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
1083        self.inner.partial_cmp(&other.inner)
1084    }
1085}
1086
1087impl<T: Ord, S> Ord for Checkpoint<T, S> {
1088    #[inline(always)]
1089    fn cmp(&self, other: &Self) -> core::cmp::Ordering {
1090        self.inner.cmp(&other.inner)
1091    }
1092}
1093
1094impl<T: PartialEq, S> PartialEq for Checkpoint<T, S> {
1095    #[inline(always)]
1096    fn eq(&self, other: &Self) -> bool {
1097        self.inner.eq(&other.inner)
1098    }
1099}
1100
1101impl<T: Eq, S> Eq for Checkpoint<T, S> {}
1102
1103impl<T: core::fmt::Debug, S> core::fmt::Debug for Checkpoint<T, S> {
1104    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1105        self.inner.fmt(f)
1106    }
1107}
1108
1109/// Abstracts something which can extend an `Extend`.
1110///
1111/// Used to build modified input slices in [`escaped`][crate::ascii::escaped].
1112pub trait Accumulate<T>: Sized {
1113    /// Create a new `Extend` of the correct type
1114    fn initial(capacity: Option<usize>) -> Self;
1115    /// Accumulate the input into an accumulator
1116    fn accumulate(&mut self, acc: T);
1117}
1118
1119impl<T> Accumulate<T> for () {
1120    #[inline(always)]
1121    fn initial(_capacity: Option<usize>) -> Self {}
1122    #[inline(always)]
1123    fn accumulate(&mut self, _acc: T) {}
1124}
1125
1126impl<T> Accumulate<T> for usize {
1127    #[inline(always)]
1128    fn initial(_capacity: Option<usize>) -> Self {
1129        0
1130    }
1131    #[inline(always)]
1132    fn accumulate(&mut self, _acc: T) {
1133        *self += 1;
1134    }
1135}
1136
1137#[cfg(feature = "alloc")]
1138impl<T> Accumulate<T> for Vec<T> {
1139    #[inline(always)]
1140    fn initial(capacity: Option<usize>) -> Self {
1141        match capacity {
1142            Some(capacity) => Vec::with_capacity(clamp_capacity::<T>(capacity)),
1143            None => Vec::new(),
1144        }
1145    }
1146    #[inline(always)]
1147    fn accumulate(&mut self, acc: T) {
1148        self.push(acc);
1149    }
1150}
1151
1152#[cfg(feature = "alloc")]
1153impl<'i, T: Clone> Accumulate<&'i [T]> for Vec<T> {
1154    #[inline(always)]
1155    fn initial(capacity: Option<usize>) -> Self {
1156        match capacity {
1157            Some(capacity) => Vec::with_capacity(clamp_capacity::<T>(capacity)),
1158            None => Vec::new(),
1159        }
1160    }
1161    #[inline(always)]
1162    fn accumulate(&mut self, acc: &'i [T]) {
1163        self.extend(acc.iter().cloned());
1164    }
1165}
1166
1167#[cfg(feature = "alloc")]
1168impl Accumulate<char> for String {
1169    #[inline(always)]
1170    fn initial(capacity: Option<usize>) -> Self {
1171        match capacity {
1172            Some(capacity) => String::with_capacity(clamp_capacity::<char>(capacity)),
1173            None => String::new(),
1174        }
1175    }
1176    #[inline(always)]
1177    fn accumulate(&mut self, acc: char) {
1178        self.push(acc);
1179    }
1180}
1181
1182#[cfg(feature = "alloc")]
1183impl<'i> Accumulate<&'i str> for String {
1184    #[inline(always)]
1185    fn initial(capacity: Option<usize>) -> Self {
1186        match capacity {
1187            Some(capacity) => String::with_capacity(clamp_capacity::<char>(capacity)),
1188            None => String::new(),
1189        }
1190    }
1191    #[inline(always)]
1192    fn accumulate(&mut self, acc: &'i str) {
1193        self.push_str(acc);
1194    }
1195}
1196
1197#[cfg(feature = "alloc")]
1198impl<'i> Accumulate<Cow<'i, str>> for String {
1199    #[inline(always)]
1200    fn initial(capacity: Option<usize>) -> Self {
1201        match capacity {
1202            Some(capacity) => String::with_capacity(clamp_capacity::<char>(capacity)),
1203            None => String::new(),
1204        }
1205    }
1206    #[inline(always)]
1207    fn accumulate(&mut self, acc: Cow<'i, str>) {
1208        self.push_str(&acc);
1209    }
1210}
1211
1212#[cfg(feature = "alloc")]
1213impl Accumulate<String> for String {
1214    #[inline(always)]
1215    fn initial(capacity: Option<usize>) -> Self {
1216        match capacity {
1217            Some(capacity) => String::with_capacity(clamp_capacity::<char>(capacity)),
1218            None => String::new(),
1219        }
1220    }
1221    #[inline(always)]
1222    fn accumulate(&mut self, acc: String) {
1223        self.push_str(&acc);
1224    }
1225}
1226
1227#[cfg(feature = "alloc")]
1228impl Accumulate<char> for Cow<'_, str> {
1229    #[inline(always)]
1230    fn initial(_capacity: Option<usize>) -> Self {
1231        Cow::Borrowed("")
1232    }
1233    #[inline(always)]
1234    fn accumulate(&mut self, acc: char) {
1235        self.to_mut().accumulate(acc);
1236    }
1237}
1238
1239#[cfg(feature = "alloc")]
1240impl<'i> Accumulate<&'i str> for Cow<'i, str> {
1241    #[inline(always)]
1242    fn initial(_capacity: Option<usize>) -> Self {
1243        Cow::Borrowed("")
1244    }
1245    #[inline(always)]
1246    fn accumulate(&mut self, acc: &'i str) {
1247        if self.as_ref().is_empty() {
1248            *self = Cow::Borrowed(acc);
1249        } else {
1250            self.to_mut().accumulate(acc);
1251        }
1252    }
1253}
1254
1255#[cfg(feature = "alloc")]
1256impl<'i> Accumulate<Cow<'i, str>> for Cow<'i, str> {
1257    #[inline(always)]
1258    fn initial(_capacity: Option<usize>) -> Self {
1259        Cow::Borrowed("")
1260    }
1261    #[inline(always)]
1262    fn accumulate(&mut self, acc: Cow<'i, str>) {
1263        if self.as_ref().is_empty() {
1264            *self = acc;
1265        } else {
1266            self.to_mut().accumulate(acc);
1267        }
1268    }
1269}
1270
1271#[cfg(feature = "alloc")]
1272impl Accumulate<String> for Cow<'_, str> {
1273    #[inline(always)]
1274    fn initial(_capacity: Option<usize>) -> Self {
1275        Cow::Borrowed("")
1276    }
1277    #[inline(always)]
1278    fn accumulate(&mut self, acc: String) {
1279        self.to_mut().accumulate(acc);
1280    }
1281}
1282
1283#[cfg(feature = "alloc")]
1284impl<K, V> Accumulate<(K, V)> for BTreeMap<K, V>
1285where
1286    K: core::cmp::Ord,
1287{
1288    #[inline(always)]
1289    fn initial(_capacity: Option<usize>) -> Self {
1290        BTreeMap::new()
1291    }
1292    #[inline(always)]
1293    fn accumulate(&mut self, (key, value): (K, V)) {
1294        self.insert(key, value);
1295    }
1296}
1297
1298#[cfg(feature = "std")]
1299impl<K, V, S> Accumulate<(K, V)> for HashMap<K, V, S>
1300where
1301    K: core::cmp::Eq + core::hash::Hash,
1302    S: BuildHasher + Default,
1303{
1304    #[inline(always)]
1305    fn initial(capacity: Option<usize>) -> Self {
1306        let h = S::default();
1307        match capacity {
1308            Some(capacity) => {
1309                HashMap::with_capacity_and_hasher(clamp_capacity::<(K, V)>(capacity), h)
1310            }
1311            None => HashMap::with_hasher(h),
1312        }
1313    }
1314    #[inline(always)]
1315    fn accumulate(&mut self, (key, value): (K, V)) {
1316        self.insert(key, value);
1317    }
1318}
1319
1320#[cfg(feature = "alloc")]
1321impl<K> Accumulate<K> for BTreeSet<K>
1322where
1323    K: core::cmp::Ord,
1324{
1325    #[inline(always)]
1326    fn initial(_capacity: Option<usize>) -> Self {
1327        BTreeSet::new()
1328    }
1329    #[inline(always)]
1330    fn accumulate(&mut self, key: K) {
1331        self.insert(key);
1332    }
1333}
1334
1335#[cfg(feature = "std")]
1336impl<K, S> Accumulate<K> for HashSet<K, S>
1337where
1338    K: core::cmp::Eq + core::hash::Hash,
1339    S: BuildHasher + Default,
1340{
1341    #[inline(always)]
1342    fn initial(capacity: Option<usize>) -> Self {
1343        let h = S::default();
1344        match capacity {
1345            Some(capacity) => HashSet::with_capacity_and_hasher(clamp_capacity::<K>(capacity), h),
1346            None => HashSet::with_hasher(h),
1347        }
1348    }
1349    #[inline(always)]
1350    fn accumulate(&mut self, key: K) {
1351        self.insert(key);
1352    }
1353}
1354
1355#[cfg(feature = "alloc")]
1356impl<'i, T: Clone> Accumulate<&'i [T]> for VecDeque<T> {
1357    #[inline(always)]
1358    fn initial(capacity: Option<usize>) -> Self {
1359        match capacity {
1360            Some(capacity) => VecDeque::with_capacity(clamp_capacity::<T>(capacity)),
1361            None => VecDeque::new(),
1362        }
1363    }
1364    #[inline(always)]
1365    fn accumulate(&mut self, acc: &'i [T]) {
1366        self.extend(acc.iter().cloned());
1367    }
1368}
1369
1370#[cfg(feature = "alloc")]
1371#[inline]
1372pub(crate) fn clamp_capacity<T>(capacity: usize) -> usize {
1373    /// Don't pre-allocate more than 64KiB when calling `Vec::with_capacity`.
1374    ///
1375    /// Pre-allocating memory is a nice optimization but count fields can't
1376    /// always be trusted. We should clamp initial capacities to some reasonable
1377    /// amount. This reduces the risk of a bogus count value triggering a panic
1378    /// due to an OOM error.
1379    ///
1380    /// This does not affect correctness. `winnow` will always read the full number
1381    /// of elements regardless of the capacity cap.
1382    const MAX_INITIAL_CAPACITY_BYTES: usize = 65536;
1383
1384    let max_initial_capacity = MAX_INITIAL_CAPACITY_BYTES / core::mem::size_of::<T>().max(1);
1385    capacity.min(max_initial_capacity)
1386}
1387
1388/// Helper trait to convert numbers to usize.
1389///
1390/// By default, usize implements `From<u8>` and `From<u16>` but not
1391/// `From<u32>` and `From<u64>` because that would be invalid on some
1392/// platforms. This trait implements the conversion for platforms
1393/// with 32 and 64 bits pointer platforms
1394pub trait ToUsize {
1395    /// converts self to usize
1396    fn to_usize(&self) -> usize;
1397}
1398
1399impl ToUsize for u8 {
1400    #[inline(always)]
1401    fn to_usize(&self) -> usize {
1402        *self as usize
1403    }
1404}
1405
1406impl ToUsize for u16 {
1407    #[inline(always)]
1408    fn to_usize(&self) -> usize {
1409        *self as usize
1410    }
1411}
1412
1413impl ToUsize for usize {
1414    #[inline(always)]
1415    fn to_usize(&self) -> usize {
1416        *self
1417    }
1418}
1419
1420#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
1421impl ToUsize for u32 {
1422    #[inline(always)]
1423    fn to_usize(&self) -> usize {
1424        *self as usize
1425    }
1426}
1427
1428#[cfg(target_pointer_width = "64")]
1429impl ToUsize for u64 {
1430    #[inline(always)]
1431    fn to_usize(&self) -> usize {
1432        *self as usize
1433    }
1434}
1435
1436/// Transforms a token into a char for basic string parsing
1437#[allow(clippy::len_without_is_empty)]
1438#[allow(clippy::wrong_self_convention)]
1439pub trait AsChar {
1440    /// Makes a char from self
1441    ///
1442    /// # Example
1443    ///
1444    /// ```
1445    /// use winnow::prelude::*;
1446    ///
1447    /// assert_eq!('a'.as_char(), 'a');
1448    /// assert_eq!(u8::MAX.as_char(), std::char::from_u32(u8::MAX as u32).unwrap());
1449    /// ```
1450    fn as_char(self) -> char;
1451
1452    /// Tests that self is an ASCII alphabetic character
1453    fn is_alpha(self) -> bool;
1454
1455    /// Tests that self is an alphabetic character
1456    /// or a decimal digit
1457    fn is_alphanum(self) -> bool;
1458    /// Tests that self is a decimal digit
1459    fn is_dec_digit(self) -> bool;
1460    /// Tests that self is an hex digit
1461    fn is_hex_digit(self) -> bool;
1462    /// Tests that self is an octal digit
1463    fn is_oct_digit(self) -> bool;
1464    /// Gets the len in bytes for self
1465    fn len(self) -> usize;
1466    /// Tests that self is ASCII space or tab
1467    fn is_space(self) -> bool;
1468    /// Tests if byte is ASCII newline: \n
1469    fn is_newline(self) -> bool;
1470}
1471
1472impl AsChar for u8 {
1473    #[inline(always)]
1474    fn as_char(self) -> char {
1475        self as char
1476    }
1477    #[inline]
1478    fn is_alpha(self) -> bool {
1479        matches!(self, 0x41..=0x5A | 0x61..=0x7A)
1480    }
1481    #[inline]
1482    fn is_alphanum(self) -> bool {
1483        self.is_alpha() || self.is_dec_digit()
1484    }
1485    #[inline]
1486    fn is_dec_digit(self) -> bool {
1487        matches!(self, 0x30..=0x39)
1488    }
1489    #[inline]
1490    fn is_hex_digit(self) -> bool {
1491        matches!(self, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66)
1492    }
1493    #[inline]
1494    fn is_oct_digit(self) -> bool {
1495        matches!(self, 0x30..=0x37)
1496    }
1497    #[inline]
1498    fn len(self) -> usize {
1499        1
1500    }
1501    #[inline]
1502    fn is_space(self) -> bool {
1503        self == b' ' || self == b'\t'
1504    }
1505    #[inline]
1506    fn is_newline(self) -> bool {
1507        self == b'\n'
1508    }
1509}
1510
1511impl AsChar for &u8 {
1512    #[inline(always)]
1513    fn as_char(self) -> char {
1514        (*self).as_char()
1515    }
1516    #[inline(always)]
1517    fn is_alpha(self) -> bool {
1518        (*self).is_alpha()
1519    }
1520    #[inline(always)]
1521    fn is_alphanum(self) -> bool {
1522        (*self).is_alphanum()
1523    }
1524    #[inline(always)]
1525    fn is_dec_digit(self) -> bool {
1526        (*self).is_dec_digit()
1527    }
1528    #[inline(always)]
1529    fn is_hex_digit(self) -> bool {
1530        (*self).is_hex_digit()
1531    }
1532    #[inline(always)]
1533    fn is_oct_digit(self) -> bool {
1534        (*self).is_oct_digit()
1535    }
1536    #[inline(always)]
1537    fn len(self) -> usize {
1538        (*self).len()
1539    }
1540    #[inline(always)]
1541    fn is_space(self) -> bool {
1542        (*self).is_space()
1543    }
1544    #[inline(always)]
1545    fn is_newline(self) -> bool {
1546        (*self).is_newline()
1547    }
1548}
1549
1550impl AsChar for char {
1551    #[inline(always)]
1552    fn as_char(self) -> char {
1553        self
1554    }
1555    #[inline]
1556    fn is_alpha(self) -> bool {
1557        self.is_ascii_alphabetic()
1558    }
1559    #[inline]
1560    fn is_alphanum(self) -> bool {
1561        self.is_alpha() || self.is_dec_digit()
1562    }
1563    #[inline]
1564    fn is_dec_digit(self) -> bool {
1565        self.is_ascii_digit()
1566    }
1567    #[inline]
1568    fn is_hex_digit(self) -> bool {
1569        self.is_ascii_hexdigit()
1570    }
1571    #[inline]
1572    fn is_oct_digit(self) -> bool {
1573        self.is_digit(8)
1574    }
1575    #[inline]
1576    fn len(self) -> usize {
1577        self.len_utf8()
1578    }
1579    #[inline]
1580    fn is_space(self) -> bool {
1581        self == ' ' || self == '\t'
1582    }
1583    #[inline]
1584    fn is_newline(self) -> bool {
1585        self == '\n'
1586    }
1587}
1588
1589impl AsChar for &char {
1590    #[inline(always)]
1591    fn as_char(self) -> char {
1592        (*self).as_char()
1593    }
1594    #[inline(always)]
1595    fn is_alpha(self) -> bool {
1596        (*self).is_alpha()
1597    }
1598    #[inline(always)]
1599    fn is_alphanum(self) -> bool {
1600        (*self).is_alphanum()
1601    }
1602    #[inline(always)]
1603    fn is_dec_digit(self) -> bool {
1604        (*self).is_dec_digit()
1605    }
1606    #[inline(always)]
1607    fn is_hex_digit(self) -> bool {
1608        (*self).is_hex_digit()
1609    }
1610    #[inline(always)]
1611    fn is_oct_digit(self) -> bool {
1612        (*self).is_oct_digit()
1613    }
1614    #[inline(always)]
1615    fn len(self) -> usize {
1616        (*self).len()
1617    }
1618    #[inline(always)]
1619    fn is_space(self) -> bool {
1620        (*self).is_space()
1621    }
1622    #[inline(always)]
1623    fn is_newline(self) -> bool {
1624        (*self).is_newline()
1625    }
1626}
1627
1628/// Check if a token is in a set of possible tokens
1629///
1630/// While this can be implemented manually, you can also build up sets using:
1631/// - `b'c'` and `'c'`
1632/// - `b""`
1633/// - `|c| true`
1634/// - `b'a'..=b'z'`, `'a'..='z'` (etc for each [range type][std::ops])
1635/// - `(set1, set2, ...)`
1636///
1637/// # Example
1638///
1639/// For example, you could implement `hex_digit0` as:
1640/// ```
1641/// # #[cfg(feature = "parser")] {
1642/// # use winnow::prelude::*;
1643/// # use winnow::{error::ErrMode, error::ContextError};
1644/// # use winnow::token::take_while;
1645/// fn hex_digit1<'s>(input: &mut &'s str) -> ModalResult<&'s str, ContextError> {
1646///     take_while(1.., ('a'..='f', 'A'..='F', '0'..='9')).parse_next(input)
1647/// }
1648///
1649/// assert_eq!(hex_digit1.parse_peek("21cZ"), Ok(("Z", "21c")));
1650/// assert!(hex_digit1.parse_peek("H2").is_err());
1651/// assert!(hex_digit1.parse_peek("").is_err());
1652/// # }
1653/// ```
1654pub trait ContainsToken<T> {
1655    /// Returns true if self contains the token
1656    fn contains_token(&self, token: T) -> bool;
1657}
1658
1659impl ContainsToken<u8> for u8 {
1660    #[inline(always)]
1661    fn contains_token(&self, token: u8) -> bool {
1662        *self == token
1663    }
1664}
1665
1666impl ContainsToken<&u8> for u8 {
1667    #[inline(always)]
1668    fn contains_token(&self, token: &u8) -> bool {
1669        self.contains_token(*token)
1670    }
1671}
1672
1673impl ContainsToken<char> for u8 {
1674    #[inline(always)]
1675    fn contains_token(&self, token: char) -> bool {
1676        self.as_char() == token
1677    }
1678}
1679
1680impl ContainsToken<&char> for u8 {
1681    #[inline(always)]
1682    fn contains_token(&self, token: &char) -> bool {
1683        self.contains_token(*token)
1684    }
1685}
1686
1687impl<C: AsChar> ContainsToken<C> for char {
1688    #[inline(always)]
1689    fn contains_token(&self, token: C) -> bool {
1690        *self == token.as_char()
1691    }
1692}
1693
1694impl<C, F: Fn(C) -> bool> ContainsToken<C> for F {
1695    #[inline(always)]
1696    fn contains_token(&self, token: C) -> bool {
1697        self(token)
1698    }
1699}
1700
1701impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for core::ops::Range<C2> {
1702    #[inline(always)]
1703    fn contains_token(&self, token: C1) -> bool {
1704        let start = self.start.clone().as_char();
1705        let end = self.end.clone().as_char();
1706        (start..end).contains(&token.as_char())
1707    }
1708}
1709
1710impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for core::ops::RangeInclusive<C2> {
1711    #[inline(always)]
1712    fn contains_token(&self, token: C1) -> bool {
1713        let start = self.start().clone().as_char();
1714        let end = self.end().clone().as_char();
1715        (start..=end).contains(&token.as_char())
1716    }
1717}
1718
1719impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for core::ops::RangeFrom<C2> {
1720    #[inline(always)]
1721    fn contains_token(&self, token: C1) -> bool {
1722        let start = self.start.clone().as_char();
1723        (start..).contains(&token.as_char())
1724    }
1725}
1726
1727impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for core::ops::RangeTo<C2> {
1728    #[inline(always)]
1729    fn contains_token(&self, token: C1) -> bool {
1730        let end = self.end.clone().as_char();
1731        (..end).contains(&token.as_char())
1732    }
1733}
1734
1735impl<C1: AsChar, C2: AsChar + Clone> ContainsToken<C1> for core::ops::RangeToInclusive<C2> {
1736    #[inline(always)]
1737    fn contains_token(&self, token: C1) -> bool {
1738        let end = self.end.clone().as_char();
1739        (..=end).contains(&token.as_char())
1740    }
1741}
1742
1743impl<C1: AsChar> ContainsToken<C1> for core::ops::RangeFull {
1744    #[inline(always)]
1745    fn contains_token(&self, _token: C1) -> bool {
1746        true
1747    }
1748}
1749
1750impl<C: AsChar> ContainsToken<C> for &'_ [u8] {
1751    #[inline]
1752    fn contains_token(&self, token: C) -> bool {
1753        let token = token.as_char();
1754        self.iter().any(|t| t.as_char() == token)
1755    }
1756}
1757
1758impl<C: AsChar> ContainsToken<C> for &'_ [char] {
1759    #[inline]
1760    fn contains_token(&self, token: C) -> bool {
1761        let token = token.as_char();
1762        self.contains(&token)
1763    }
1764}
1765
1766impl<const LEN: usize, C: AsChar> ContainsToken<C> for &'_ [u8; LEN] {
1767    #[inline]
1768    fn contains_token(&self, token: C) -> bool {
1769        let token = token.as_char();
1770        self.iter().any(|t| t.as_char() == token)
1771    }
1772}
1773
1774impl<const LEN: usize, C: AsChar> ContainsToken<C> for &'_ [char; LEN] {
1775    #[inline]
1776    fn contains_token(&self, token: C) -> bool {
1777        let token = token.as_char();
1778        self.contains(&token)
1779    }
1780}
1781
1782impl<const LEN: usize, C: AsChar> ContainsToken<C> for [u8; LEN] {
1783    #[inline]
1784    fn contains_token(&self, token: C) -> bool {
1785        let token = token.as_char();
1786        self.iter().any(|t| t.as_char() == token)
1787    }
1788}
1789
1790impl<const LEN: usize, C: AsChar> ContainsToken<C> for [char; LEN] {
1791    #[inline]
1792    fn contains_token(&self, token: C) -> bool {
1793        let token = token.as_char();
1794        self.contains(&token)
1795    }
1796}
1797
1798impl<T> ContainsToken<T> for () {
1799    #[inline(always)]
1800    fn contains_token(&self, _token: T) -> bool {
1801        false
1802    }
1803}
1804
1805macro_rules! impl_contains_token_for_tuple {
1806    ($($haystack:ident),+) => (
1807        #[allow(non_snake_case)]
1808        impl<T, $($haystack),+> ContainsToken<T> for ($($haystack),+,)
1809        where
1810            T: Clone,
1811            $($haystack: ContainsToken<T>),+
1812        {
1813            #[inline]
1814            fn contains_token(&self, token: T) -> bool {
1815                let ($(ref $haystack),+,) = *self;
1816                $($haystack.contains_token(token.clone()) || )+ false
1817            }
1818        }
1819    )
1820}
1821
1822macro_rules! impl_contains_token_for_tuples {
1823    ($haystack1:ident, $($haystack:ident),+) => {
1824        impl_contains_token_for_tuples!(__impl $haystack1; $($haystack),+);
1825    };
1826    (__impl $($haystack:ident),+; $haystack1:ident $(,$haystack2:ident)*) => {
1827        impl_contains_token_for_tuple!($($haystack),+);
1828        impl_contains_token_for_tuples!(__impl $($haystack),+, $haystack1; $($haystack2),*);
1829    };
1830    (__impl $($haystack:ident),+;) => {
1831        impl_contains_token_for_tuple!($($haystack),+);
1832    }
1833}
1834
1835impl_contains_token_for_tuples!(F1, F2, F3, F4, F5, F6, F7, F8, F9, F10);
1836
1837#[cfg(feature = "simd")]
1838#[inline(always)]
1839fn memchr(token: u8, slice: &[u8]) -> Option<usize> {
1840    memchr::memchr(token, slice)
1841}
1842
1843#[cfg(feature = "simd")]
1844#[inline(always)]
1845fn memchr2(token: (u8, u8), slice: &[u8]) -> Option<usize> {
1846    memchr::memchr2(token.0, token.1, slice)
1847}
1848
1849#[cfg(feature = "simd")]
1850#[inline(always)]
1851fn memchr3(token: (u8, u8, u8), slice: &[u8]) -> Option<usize> {
1852    memchr::memchr3(token.0, token.1, token.2, slice)
1853}
1854
1855#[cfg(not(feature = "simd"))]
1856#[inline(always)]
1857fn memchr(token: u8, slice: &[u8]) -> Option<usize> {
1858    slice.iter().position(|t| *t == token)
1859}
1860
1861#[cfg(not(feature = "simd"))]
1862#[inline(always)]
1863fn memchr2(token: (u8, u8), slice: &[u8]) -> Option<usize> {
1864    slice.iter().position(|t| *t == token.0 || *t == token.1)
1865}
1866
1867#[cfg(not(feature = "simd"))]
1868#[inline(always)]
1869fn memchr3(token: (u8, u8, u8), slice: &[u8]) -> Option<usize> {
1870    slice
1871        .iter()
1872        .position(|t| *t == token.0 || *t == token.1 || *t == token.2)
1873}
1874
1875#[inline(always)]
1876fn memmem(slice: &[u8], literal: &[u8]) -> Option<core::ops::Range<usize>> {
1877    match literal.len() {
1878        0 => Some(0..0),
1879        1 => memchr(literal[0], slice).map(|i| i..i + 1),
1880        _ => memmem_(slice, literal),
1881    }
1882}
1883
1884#[inline(always)]
1885fn memmem2(slice: &[u8], literal: (&[u8], &[u8])) -> Option<core::ops::Range<usize>> {
1886    match (literal.0.len(), literal.1.len()) {
1887        (0, _) | (_, 0) => Some(0..0),
1888        (1, 1) => memchr2((literal.0[0], literal.1[0]), slice).map(|i| i..i + 1),
1889        _ => memmem2_(slice, literal),
1890    }
1891}
1892
1893#[inline(always)]
1894fn memmem3(slice: &[u8], literal: (&[u8], &[u8], &[u8])) -> Option<core::ops::Range<usize>> {
1895    match (literal.0.len(), literal.1.len(), literal.2.len()) {
1896        (0, _, _) | (_, 0, _) | (_, _, 0) => Some(0..0),
1897        (1, 1, 1) => memchr3((literal.0[0], literal.1[0], literal.2[0]), slice).map(|i| i..i + 1),
1898        _ => memmem3_(slice, literal),
1899    }
1900}
1901
1902#[cfg(feature = "simd")]
1903#[inline(always)]
1904fn memmem_(slice: &[u8], literal: &[u8]) -> Option<core::ops::Range<usize>> {
1905    let &prefix = match literal.first() {
1906        Some(x) => x,
1907        None => return Some(0..0),
1908    };
1909    #[allow(clippy::manual_find)] // faster this way
1910    for i in memchr::memchr_iter(prefix, slice) {
1911        if slice[i..].starts_with(literal) {
1912            let i_end = i + literal.len();
1913            return Some(i..i_end);
1914        }
1915    }
1916    None
1917}
1918
1919#[cfg(feature = "simd")]
1920fn memmem2_(slice: &[u8], literal: (&[u8], &[u8])) -> Option<core::ops::Range<usize>> {
1921    let prefix = match (literal.0.first(), literal.1.first()) {
1922        (Some(&a), Some(&b)) => (a, b),
1923        _ => return Some(0..0),
1924    };
1925    #[allow(clippy::manual_find)] // faster this way
1926    for i in memchr::memchr2_iter(prefix.0, prefix.1, slice) {
1927        let subslice = &slice[i..];
1928        if subslice.starts_with(literal.0) {
1929            let i_end = i + literal.0.len();
1930            return Some(i..i_end);
1931        }
1932        if subslice.starts_with(literal.1) {
1933            let i_end = i + literal.1.len();
1934            return Some(i..i_end);
1935        }
1936    }
1937    None
1938}
1939
1940#[cfg(feature = "simd")]
1941fn memmem3_(slice: &[u8], literal: (&[u8], &[u8], &[u8])) -> Option<core::ops::Range<usize>> {
1942    let prefix = match (literal.0.first(), literal.1.first(), literal.2.first()) {
1943        (Some(&a), Some(&b), Some(&c)) => (a, b, c),
1944        _ => return Some(0..0),
1945    };
1946    #[allow(clippy::manual_find)] // faster this way
1947    for i in memchr::memchr3_iter(prefix.0, prefix.1, prefix.2, slice) {
1948        let subslice = &slice[i..];
1949        if subslice.starts_with(literal.0) {
1950            let i_end = i + literal.0.len();
1951            return Some(i..i_end);
1952        }
1953        if subslice.starts_with(literal.1) {
1954            let i_end = i + literal.1.len();
1955            return Some(i..i_end);
1956        }
1957        if subslice.starts_with(literal.2) {
1958            let i_end = i + literal.2.len();
1959            return Some(i..i_end);
1960        }
1961    }
1962    None
1963}
1964
1965#[cfg(not(feature = "simd"))]
1966fn memmem_(slice: &[u8], literal: &[u8]) -> Option<core::ops::Range<usize>> {
1967    for i in 0..slice.len() {
1968        let subslice = &slice[i..];
1969        if subslice.starts_with(literal) {
1970            let i_end = i + literal.len();
1971            return Some(i..i_end);
1972        }
1973    }
1974    None
1975}
1976
1977#[cfg(not(feature = "simd"))]
1978fn memmem2_(slice: &[u8], literal: (&[u8], &[u8])) -> Option<core::ops::Range<usize>> {
1979    for i in 0..slice.len() {
1980        let subslice = &slice[i..];
1981        if subslice.starts_with(literal.0) {
1982            let i_end = i + literal.0.len();
1983            return Some(i..i_end);
1984        }
1985        if subslice.starts_with(literal.1) {
1986            let i_end = i + literal.1.len();
1987            return Some(i..i_end);
1988        }
1989    }
1990    None
1991}
1992
1993#[cfg(not(feature = "simd"))]
1994fn memmem3_(slice: &[u8], literal: (&[u8], &[u8], &[u8])) -> Option<core::ops::Range<usize>> {
1995    for i in 0..slice.len() {
1996        let subslice = &slice[i..];
1997        if subslice.starts_with(literal.0) {
1998            let i_end = i + literal.0.len();
1999            return Some(i..i_end);
2000        }
2001        if subslice.starts_with(literal.1) {
2002            let i_end = i + literal.1.len();
2003            return Some(i..i_end);
2004        }
2005        if subslice.starts_with(literal.2) {
2006            let i_end = i + literal.2.len();
2007            return Some(i..i_end);
2008        }
2009    }
2010    None
2011}
winnow/stream/mod.rs

winnow/stream/
mod.rs