wahgex_core/compile/
pattern.rs

1//! This module contains functions and lookup tables relating to multiple
2//! patterns in one compiled regex.
3
4use std::alloc::{Layout, LayoutError};
5
6use regex_automata::nfa::thompson::NFA;
7use wasm_encoder::{NameMap, ValType};
8
9use crate::util::repeat;
10
11use super::{
12    context::{
13        ActiveDataSegment, CompileContext, Function, FunctionDefinition, FunctionIdx,
14        FunctionSignature,
15    },
16    instructions::InstructionSinkExt,
17};
18
19/// TODO: Write docs for item
20#[derive(Debug)]
21pub struct PatternLayout {
22    pattern_start_table_pos: usize,
23    pattern_start_stride: usize,
24}
25
26impl PatternLayout {
27    /// TODO: Write docs for item
28    pub fn new(ctx: &mut CompileContext, overall: Layout) -> Result<(Layout, Self), LayoutError> {
29        let pattern_start_table_data = ctx
30            .nfa
31            .patterns()
32            // WASM assumes little endian byte ordering: https://webassembly.org/docs/portability/
33            .flat_map(|pid| ctx.nfa.start_pattern(pid).unwrap().as_u32().to_le_bytes())
34            .collect::<Vec<_>>();
35
36        let (pattern_start_table, pattern_start_stride) =
37            repeat(ctx.state_id_layout(), ctx.nfa.pattern_len())?;
38        let (overall, pattern_start_table_pos) = overall.extend(pattern_start_table)?;
39
40        ctx.sections.add_active_data_segment(ActiveDataSegment {
41            name: "pattern_start_table".into(),
42            position: pattern_start_table_pos,
43            data: pattern_start_table_data,
44        });
45
46        Ok((
47            overall,
48            Self {
49                pattern_start_table_pos,
50                pattern_start_stride,
51            },
52        ))
53    }
54}
55
56/// TODO: Write docs for item
57#[derive(Debug)]
58pub struct PatternFunctions {
59    pub lookup_start: FunctionIdx,
60}
61
62impl PatternFunctions {
63    pub fn new(ctx: &mut CompileContext, layout: &PatternLayout) -> Self {
64        let start_id = ctx.add_function(Self::lookup_start_fn(
65            &ctx.nfa,
66            layout,
67            ctx.state_id_layout(),
68        ));
69
70        Self {
71            lookup_start: start_id,
72        }
73    }
74
75    fn lookup_start_fn(nfa: &NFA, layout: &PatternLayout, state_id_layout: &Layout) -> Function {
76        let mut locals_name_map = NameMap::new();
77        // Parameters
78        locals_name_map.append(0, "pattern_id");
79
80        // Sketch:
81        // ```rust
82        // if pattern_id >= nfa.patterns_len() {
83        //     return (0, false);
84        // }
85        //
86        // start_state_id = pattern_start_table[pattern_id];
87        // return (start_state_id, true);
88        // ```
89
90        let mut body = wasm_encoder::Function::new([]);
91        body.instructions()
92            // if pattern_id >= nfa.patterns_len() {
93            .local_get(0)
94            .i32_const(i32::from_ne_bytes(
95                u32::try_from(nfa.pattern_len()).unwrap().to_ne_bytes(),
96            ))
97            .i32_ge_u()
98            .if_(wasm_encoder::BlockType::Empty)
99            // return (0, false);
100            .i32_const(0)
101            .i32_const(false as i32)
102            .return_()
103            .end()
104            // start_state_id = pattern_start_table[pattern_id];
105            .local_get(0)
106            .i64_extend_i32_u()
107            .i64_const(i64::from_ne_bytes(
108                u64::try_from(layout.pattern_start_stride)
109                    .unwrap()
110                    .to_ne_bytes(),
111            ))
112            .i64_mul()
113            .state_id_load(
114                u64::try_from(layout.pattern_start_table_pos).unwrap(),
115                // state memory
116                state_id_layout,
117            )
118            .i32_const(true as i32)
119            .end();
120
121        Function {
122            sig: FunctionSignature {
123                name: "lookup_start_id".into(),
124                // [pattern_id]
125                params_ty: &[ValType::I32],
126                // [start_state_id, is_some]
127                results_ty: &[ValType::I32, ValType::I32],
128                export: false,
129            },
130            def: FunctionDefinition {
131                body,
132                locals_name_map,
133                labels_name_map: None,
134                branch_hints: None,
135            },
136        }
137    }
138}