wahgex_core/
lib.rs

1//! `wahgex-core` is a library for compiling regular expressions into
2//! WebAssembly modules that can be executed efficiently.
3
4#![deny(missing_docs, missing_debug_implementations)]
5#![warn(missing_debug_implementations)]
6
7use compile::{compile_from_nfa, CompiledRegex};
8
9pub use crate::{
10    compile::input::{InputOpts, PrepareInputResult},
11    error::BuildError,
12};
13
14mod compile;
15mod error;
16mod runtime;
17mod util;
18
19/// Configuration options for building a [`PikeVM`].
20#[derive(Debug, Clone, Copy, Default)]
21pub struct Config {
22    #[cfg(test)]
23    export_state: Option<bool>,
24    #[cfg(test)]
25    export_all_functions: Option<bool>,
26}
27
28impl Config {
29    /// The default size of a memory page in bytes (64 KiB).
30    pub const DEFAULT_PAGE_SIZE: usize = 64 * 1024;
31
32    /// Creates a new default configuration.
33    pub fn new() -> Self {
34        Self::default()
35    }
36
37    /// Configures whether the internal state memory should be exported.
38    ///
39    /// This is primarily for testing and debugging purposes.
40    #[cfg(test)]
41    pub fn export_state(mut self, export_state: bool) -> Self {
42        self.export_state = Some(export_state);
43        self
44    }
45
46    /// Returns `true` if the internal state memory is configured to be
47    /// exported.
48    #[cfg(test)]
49    pub fn get_export_state(&self) -> bool {
50        self.export_state.unwrap_or(false)
51    }
52
53    /// Configures whether all internal functions should be exported.
54    ///
55    /// This is primarily for testing and debugging purposes.
56    #[cfg(test)]
57    pub fn export_all_functions(mut self, export_all_functions: bool) -> Self {
58        self.export_all_functions = Some(export_all_functions);
59        self
60    }
61
62    /// Returns `true` if all internal functions are configured to be exported.
63    #[cfg(test)]
64    pub fn get_export_all_functions(&self) -> bool {
65        self.export_all_functions.unwrap_or(false)
66    }
67
68    /// Returns the configured memory page size in bytes.
69    pub fn get_page_size(&self) -> usize {
70        Self::DEFAULT_PAGE_SIZE
71    }
72
73    /// Overwrites the current configuration with options from another config.
74    ///
75    /// Options set in `other` take precedence over options in `self`.
76    #[cfg_attr(not(test), expect(unused_variables))]
77    fn overwrite(self, other: Self) -> Self {
78        Self {
79            #[cfg(test)]
80            export_state: other.export_state.or(self.export_state),
81            #[cfg(test)]
82            export_all_functions: other.export_all_functions.or(self.export_all_functions),
83        }
84    }
85}
86
87/// A builder for compiling regular expressions into a [`PikeVM`].
88#[derive(Clone, Debug)]
89pub struct Builder {
90    config: Config,
91    thompson: regex_automata::nfa::thompson::Compiler,
92}
93
94impl Default for Builder {
95    fn default() -> Self {
96        let default_nfa_config = regex_automata::nfa::thompson::Config::new().shrink(false);
97        let mut thompson = regex_automata::nfa::thompson::Compiler::new();
98        thompson.configure(default_nfa_config);
99
100        Builder {
101            config: Config::default(),
102            thompson,
103        }
104    }
105}
106
107impl Builder {
108    /// Creates a new PikeVM builder with its default configuration.
109    pub fn new() -> Builder {
110        Self::default()
111    }
112
113    /// Compiles a single regular expression pattern into a [`PikeVM`].
114    pub fn build(&self, pattern: &str) -> Result<PikeVM, BuildError> {
115        self.build_many(&[pattern])
116    }
117
118    /// Compiles multiple regular expression patterns into a single [`PikeVM`].
119    pub fn build_many<P: AsRef<str>>(&self, patterns: &[P]) -> Result<PikeVM, BuildError> {
120        let nfa = self.thompson.build_many(patterns)?;
121        self.build_from_nfa(nfa)
122    }
123
124    /// Compiles a Thompson NFA into a [`PikeVM`].
125    pub fn build_from_nfa(
126        &self,
127        nfa: regex_automata::nfa::thompson::NFA,
128    ) -> Result<PikeVM, BuildError> {
129        nfa.look_set_any().available()?;
130        let wasm = compile_from_nfa(nfa.clone(), self.config)?;
131        Ok(PikeVM {
132            config: self.config,
133            nfa,
134            wasm,
135        })
136    }
137
138    /// Configures the builder with the given [`Config`].
139    pub fn configure(&mut self, config: Config) -> &mut Builder {
140        self.config = self.config.overwrite(config);
141        self
142    }
143
144    /// Configures the syntax options for the underlying regex compiler.
145    pub fn syntax(&mut self, config: regex_automata::util::syntax::Config) -> &mut Builder {
146        self.thompson.syntax(config);
147        self
148    }
149
150    /// Configures the Thompson NFA compiler options.
151    pub fn thompson(&mut self, config: regex_automata::nfa::thompson::Config) -> &mut Builder {
152        self.thompson.configure(config);
153        self
154    }
155}
156
157/// A compiled regular expression represented as a Pike VM, ready for matching.
158#[derive(Debug)]
159pub struct PikeVM {
160    config: Config,
161    nfa: regex_automata::nfa::thompson::NFA,
162    wasm: CompiledRegex,
163}
164
165impl PikeVM {
166    /// Compiles a single regular expression pattern into a new [`PikeVM`] using
167    /// the default builder.
168    pub fn new(pattern: &str) -> Result<PikeVM, BuildError> {
169        PikeVM::builder().build(pattern)
170    }
171
172    /// Compiles multiple regular expression patterns into a single new
173    /// [`PikeVM`] using the default builder.
174    pub fn new_many<P: AsRef<str>>(patterns: &[P]) -> Result<PikeVM, BuildError> {
175        PikeVM::builder().build_many(patterns)
176    }
177
178    /// Creates a new [`PikeVM`] directly from a Thompson NFA using the default
179    /// builder.
180    pub fn new_from_nfa(nfa: regex_automata::nfa::thompson::NFA) -> Result<PikeVM, BuildError> {
181        PikeVM::builder().build_from_nfa(nfa)
182    }
183
184    /// Creates a [`PikeVM`] that always matches the empty string at any
185    /// position.
186    pub fn always_match() -> Result<PikeVM, BuildError> {
187        let nfa = regex_automata::nfa::thompson::NFA::always_match();
188        PikeVM::new_from_nfa(nfa)
189    }
190
191    /// Creates a [`PikeVM`] that never matches.
192    pub fn never_match() -> Result<PikeVM, BuildError> {
193        let nfa = regex_automata::nfa::thompson::NFA::never_match();
194        PikeVM::new_from_nfa(nfa)
195    }
196
197    /// Returns a new default [`Config`] for configuring a [`Builder`].
198    pub fn config() -> Config {
199        Config::new()
200    }
201
202    /// Returns a new default [`Builder`] for compiling regular expressions.
203    pub fn builder() -> Builder {
204        Builder::new()
205    }
206
207    /// Returns the number of patterns compiled into this PikeVM.
208    pub fn pattern_len(&self) -> usize {
209        self.nfa.pattern_len()
210    }
211
212    /// Return the config for this `PikeVM`.
213    ///
214    /// Note that this is the configuration used to *build* the PikeVM,
215    /// not necessarily the configuration used for a specific match operation.
216    #[inline]
217    pub fn get_config(&self) -> &Config {
218        &self.config
219    }
220
221    /// Returns a reference to the underlying NFA.
222    ///
223    /// This is the NFA that was compiled into the PikeVM.
224    #[inline]
225    pub fn get_nfa(&self) -> &regex_automata::nfa::thompson::NFA {
226        &self.nfa
227    }
228
229    /// Returns a reference to the compiled WASM bytes.
230    ///
231    /// These bytes represent the compiled PikeVM logic.
232    #[inline]
233    pub fn get_wasm(&self) -> &[u8] {
234        self.wasm.as_ref()
235    }
236}
237
238impl PikeVM {
239    // TODO: Need to implement `is_match`, `find`, `captures`, `find_iter`,
240    // `captures_iter`
241}