1use input::{InputFunctions, InputLayout};
5use matching::MatchingFunctions;
6use state::{StateFunctions, StateLayout};
7
8pub use crate::error::BuildError;
9
10use self::context::CompileContext;
11
12mod context;
13mod epsilon_closure;
14pub mod input;
15mod instructions;
16mod lookaround;
17mod matching;
18mod pattern;
19mod sparse_set;
20mod state;
21mod transition;
22
23pub fn compile_from_nfa(
26 nfa: regex_automata::nfa::thompson::NFA,
27 config: super::Config,
28) -> Result<CompiledRegex, BuildError> {
29 let mut ctx = CompileContext::new(nfa, config);
30 let state_layout = StateLayout::new(&mut ctx)?;
31 let state_funcs = StateFunctions::new(&mut ctx, &state_layout)?;
32 let input_layout = InputLayout::new(&mut ctx)?;
33 let input_funcs =
34 InputFunctions::new(&mut ctx, &input_layout, state_funcs.pattern.lookup_start);
35 let _matching_funcs = MatchingFunctions::new(
36 &mut ctx,
37 &state_layout,
38 &state_funcs,
39 &input_layout,
40 &input_funcs,
41 );
42 let module: wasm_encoder::Module = ctx.compile(&state_layout.overall);
43
44 Ok(CompiledRegex {
45 wasm_bytes: module.finish(),
46 })
47}
48
49#[derive(Debug)]
52pub struct CompiledRegex {
53 wasm_bytes: Vec<u8>,
54}
55
56impl AsRef<[u8]> for CompiledRegex {
57 fn as_ref(&self) -> &[u8] {
58 &self.wasm_bytes
59 }
60}
61
62#[cfg(test)]
63mod tests {
64 use super::*;
65
66 pub fn setup_interpreter(
67 module_bytes: impl AsRef<[u8]>,
68 ) -> (
69 wasmi::Engine,
70 wasmi::Module,
71 wasmi::Store<()>,
72 wasmi::Instance,
73 ) {
74 let engine = wasmi::Engine::default();
75 let module = wasmi::Module::new(&engine, module_bytes).unwrap();
76 let mut store = wasmi::Store::new(&engine, ());
77 let linker = wasmi::Linker::<()>::new(&engine);
78 let instance = linker
79 .instantiate(&mut store, &module)
80 .unwrap()
81 .start(&mut store)
82 .unwrap();
83
84 (engine, module, store, instance)
85 }
86
87 #[track_caller]
88 pub fn wasm_print_module(module_bytes: impl AsRef<[u8]>) -> String {
89 let module_bytes = module_bytes.as_ref();
90 let wasm_text = wasmprinter::print_bytes(module_bytes);
91 if let Err(err) = wasmparser::validate(module_bytes) {
92 let mut wasm_text_with_offsets = String::new();
93 let print = wasmprinter::Config::new().print_offsets(true).print(
94 module_bytes,
95 &mut wasmprinter::PrintFmtWrite(&mut wasm_text_with_offsets),
96 );
97
98 match print {
99 Ok(()) => {
100 panic!("{err}:\n{wasm_text_with_offsets}")
101 },
102 Err(print_err) => panic!("{err}:\nUnable to print WAT: {print_err}"),
103 }
104 }
105 wasm_text.expect("should be able to print WASM module in WAT format")
106 }
107
108 fn compile(pattern: &str) -> Result<CompiledRegex, Box<dyn std::error::Error>> {
111 let nfa = regex_automata::nfa::thompson::NFA::new(pattern)?;
112
113 Ok(compile_from_nfa(nfa, crate::Config::new())?)
114 }
115
116 #[test]
117 fn empty_regex() {
118 let compiled = compile("").unwrap();
119 let pretty = wasm_print_module(&compiled);
120 insta::assert_snapshot!(pretty);
121 }
122
123 #[test]
124 fn simple_repetition() {
125 let compiled = compile("(?:abc)+").unwrap();
126 let pretty = wasm_print_module(&compiled);
127 insta::assert_snapshot!(pretty);
128 }
129
130 #[test]
131 fn sparse_transitions() {
132 let compiled = compile("a|b|d|e|g").unwrap();
133 let pretty = wasm_print_module(&compiled);
134 insta::assert_snapshot!(pretty);
135 }
136
137 #[test]
138 fn simple_lookaround() {
139 let compiled = compile("^hell worm$").unwrap();
140 let pretty = wasm_print_module(&compiled);
141 insta::assert_snapshot!(pretty);
142 }
143
144 #[test]
145 fn repeated_lookaround() {
146 let compiled = compile("(?:^|$)+").unwrap();
147 let pretty = wasm_print_module(&compiled);
148 insta::assert_snapshot!(pretty);
149 }
150
151 #[test]
152 fn lookaround_crlf() {
153 let compiled = compile("(?mR)^[a-z]+$").unwrap();
154 let pretty = wasm_print_module(&compiled);
155 insta::assert_snapshot!(pretty);
156 }
157
158 #[test]
159 fn lookaround_lf() {
160 let compiled = compile("(?m)^$").unwrap();
161 let pretty = wasm_print_module(&compiled);
162 insta::assert_snapshot!(pretty);
163 }
164
165 #[test]
166 fn lookaround_is_ascii_word() {
167 let compiled = compile(r"(?-u)hello\B").unwrap();
168 let pretty = wasm_print_module(&compiled);
169 insta::assert_snapshot!(pretty);
170 }
171
172 #[test]
173 fn lookaround_is_ascii_start_end() {
174 let compiled = compile(r"(?-u:\b{start}hello\b{end})").unwrap();
175 let pretty = wasm_print_module(&compiled);
176 insta::assert_snapshot!(pretty);
177 }
178
179 #[test]
180 fn lookaround_is_ascii_half_start_end() {
181 let compiled = compile(r"(?-u:\b{start-half}hello\b{end-half})").unwrap();
182 let pretty = wasm_print_module(&compiled);
183 insta::assert_snapshot!(pretty);
184 }
185}