wahgex_core/compile/
lookaround.rs

1//! This module contains types and functions related to the implementation of
2//! [`Look`]s in WASM.
3
4use std::alloc::{Layout, LayoutError};
5
6use regex_automata::{
7    nfa::thompson::NFA,
8    util::look::{Look, LookMatcher, LookSet},
9};
10use wasm_encoder::{BlockType, InstructionSink, MemArg, NameMap, ValType};
11
12use crate::util::repeat;
13
14use super::{
15    context::{
16        ActiveDataSegment, CompileContext, FunctionDefinition, FunctionIdx, FunctionSignature,
17    },
18    BuildError,
19};
20
21/// TODO: Write docs for this item
22#[derive(Debug)]
23pub struct LookLayout {
24    is_word_byte_table: Option<IsWordByteTable>,
25}
26
27#[derive(Debug)]
28struct IsWordByteTable {
29    position: u64,
30}
31
32impl LookLayout {
33    /// This lookup table is true for bytes which are considered "word" unicode
34    /// characters.
35    ///
36    /// The logic is copied directly from <https://github.com/rust-lang/regex/blob/master/regex-automata/src/util/utf8.rs#L17-L37>
37    /// As the comment on the function (in the link) mentions, no bit-rot
38    /// because this will not change.
39    const UTF8_IS_WORD_BYTE_LUT: [bool; 256] = {
40        let mut set = [false; 256];
41        set[b'_' as usize] = true;
42
43        let mut byte = b'0';
44        while byte <= b'9' {
45            set[byte as usize] = true;
46            byte += 1;
47        }
48        byte = b'A';
49        while byte <= b'Z' {
50            set[byte as usize] = true;
51            byte += 1;
52        }
53        byte = b'a';
54        while byte <= b'z' {
55            set[byte as usize] = true;
56            byte += 1;
57        }
58        set
59    };
60
61    /// TODO: Write docs for item
62    pub fn new(
63        ctx: &mut CompileContext,
64        mut overall: Layout,
65    ) -> Result<(Layout, Self), LayoutError> {
66        let look_set = modified_lookset_for_dependencies(&ctx.nfa);
67        let is_word_byte_table = if needs_is_word_byte_lut(look_set) {
68            let (table_layout, _table_stride) = repeat(&Layout::new::<u8>(), 256)?;
69
70            let (new_overall, table_pos) = overall.extend(table_layout)?;
71            overall = new_overall;
72
73            ctx.sections.add_active_data_segment(ActiveDataSegment {
74                name: "utf8_is_word_byte_table".into(),
75                data: Self::UTF8_IS_WORD_BYTE_LUT
76                    .into_iter()
77                    .map(|b| b as u8)
78                    .collect(),
79                position: table_pos,
80            });
81
82            Some(IsWordByteTable {
83                position: table_pos.try_into().expect("position should fit in u64"),
84            })
85        } else {
86            None
87        };
88
89        Ok((overall, Self { is_word_byte_table }))
90    }
91}
92
93/// TODO: Write docs for this item
94#[derive(Debug)]
95pub struct LookFunctions {
96    look_matches: [Option<FunctionIdx>; Self::NUM_LOOKS],
97}
98
99impl LookFunctions {
100    // Look::WordEndHalfUnicode.as_repr() is 1 << 17. Its ilog2() is 17.
101    // This means indices for look_matches range from 0 to 17, requiring an array of
102    // size 18.
103    const NUM_LOOKS: usize = (Look::WordEndHalfUnicode.as_repr().ilog2() as usize) + 1;
104
105    /// TODO: Write docs for this item
106    pub fn new(ctx: &mut CompileContext, layout: &LookLayout) -> Result<Self, BuildError> {
107        let mut look_matches = [None; Self::NUM_LOOKS];
108        let look_set = modified_lookset_for_dependencies(&ctx.nfa);
109
110        if look_set.is_empty() {
111            return Ok(Self { look_matches });
112        }
113
114        for look in look_set.iter() {
115            if is_unsupported_lookaround(look) {
116                // TODO: Need to implement the rest of the lookaround assertions
117                return Err(BuildError::unsupported(format!(
118                    "{look:?}/{} is not yet implemented",
119                    look.as_char()
120                )));
121            }
122
123            let sig = lookaround_fn_signature(lookaround_fn_name(look));
124
125            let func = ctx.declare_function(sig);
126            look_matches[look.as_repr().ilog2() as usize] = Some(func);
127        }
128
129        let look_matcher = ctx.nfa.look_matcher().clone();
130        for (look, func_idx) in look_set
131            .iter()
132            .map(|look| (look, look_matches[look.as_repr().ilog2() as usize]))
133        {
134            let func_def = match look {
135                Look::Start => Self::is_start_fn(),
136                Look::End => Self::is_end_fn(),
137                Look::StartLF => Self::is_start_lf_fn(&look_matcher),
138                Look::EndLF => Self::is_end_lf_fn(&look_matcher),
139                Look::StartCRLF => Self::is_start_crlf_fn(),
140                Look::EndCRLF => Self::is_end_crlf_fn(),
141                Look::WordAscii => Self::is_word_ascii_fn(
142                    layout
143                        .is_word_byte_table
144                        .as_ref()
145                        .expect("should have generated table"),
146                ),
147                Look::WordAsciiNegate => Self::is_word_ascii_negate_fn(
148                    look_matches[Look::WordAscii.as_repr().ilog2() as usize]
149                        // See dependency in `modified_lookset_for_dependencies`
150                        .expect("should have generated `look_is_word_ascii` function"),
151                ),
152                Look::WordStartAscii => Self::is_word_start_ascii_fn(
153                    layout
154                        .is_word_byte_table
155                        .as_ref()
156                        .expect("should have generated table"),
157                ),
158                Look::WordEndAscii => Self::is_word_end_ascii_fn(
159                    layout
160                        .is_word_byte_table
161                        .as_ref()
162                        .expect("should have generated table"),
163                ),
164                Look::WordStartHalfAscii => Self::is_word_start_half_ascii_fn(
165                    layout
166                        .is_word_byte_table
167                        .as_ref()
168                        .expect("should have generated table"),
169                ),
170                Look::WordEndHalfAscii => Self::is_word_end_half_ascii_fn(
171                    layout
172                        .is_word_byte_table
173                        .as_ref()
174                        .expect("should have generated table"),
175                ),
176                _ => unreachable!("Should be unreachable due to first loop through lookset"),
177            };
178
179            ctx.define_function(
180                func_idx.expect("should have generated function for supported lookaround"),
181                func_def,
182            );
183        }
184
185        Ok(Self { look_matches })
186    }
187
188    /// TODO: Write docs for item
189    pub fn look_matcher(&self, look: Look) -> Option<FunctionIdx> {
190        self.look_matches[look.as_repr().ilog2() as usize]
191    }
192
193    fn is_start_fn() -> FunctionDefinition {
194        let locals_name_map = lookaround_fn_common_name_map();
195
196        // Sketch:
197        // ```rust
198        // return at_offset == 0;
199        // ```
200
201        let mut body = wasm_encoder::Function::new([]);
202        // TODO(opt): Need to figure out a inlining strategy because this function is
203        // tiny and wasmtime doesn't do inlining.
204        body.instructions()
205            // at_offset == 0
206            .local_get(2)
207            .i64_eqz()
208            .end();
209
210        FunctionDefinition {
211            body,
212            locals_name_map,
213            labels_name_map: None,
214            branch_hints: None,
215        }
216    }
217
218    fn is_end_fn() -> FunctionDefinition {
219        let locals_name_map = lookaround_fn_common_name_map();
220
221        // Sketch:
222        // ```rust
223        // return at_offset == haystack_len;
224        // ```
225
226        let mut body = wasm_encoder::Function::new([]);
227        body.instructions()
228            // at_offset == haystack_len
229            .local_get(1)
230            .local_get(2)
231            .i64_eq()
232            .end();
233
234        FunctionDefinition {
235            body,
236            locals_name_map,
237            labels_name_map: None,
238            branch_hints: None,
239        }
240    }
241
242    fn is_start_lf_fn(look_matcher: &LookMatcher) -> FunctionDefinition {
243        let locals_name_map = lookaround_fn_common_name_map();
244
245        // Sketch:
246        // ```rust
247        // return at_offset == 0 || haystack[at_offset - 1] == lineterm
248        // ```
249
250        let mut body = wasm_encoder::Function::new([]);
251        body.instructions()
252            // at_offset == 0
253            .local_get(2)
254            .i64_eqz()
255            .if_(BlockType::Empty)
256            // TODO(opt): is the branch better here? Or should it just be an unconditional i32.or
257            .i32_const(true as i32)
258            .return_()
259            .end()
260            // haystack[at_offset - 1] == lineterm
261            .local_get(2)
262            .i64_const(1)
263            .i64_sub()
264            .local_get(0)
265            .i64_add()
266            .i32_load8_u(MemArg {
267                offset: 0,
268                align: 0,        // loading single byte
269                memory_index: 0, // haystack
270            })
271            .i32_const(look_matcher.get_line_terminator() as i32)
272            .i32_eq()
273            .end();
274
275        FunctionDefinition {
276            body,
277            locals_name_map,
278            labels_name_map: None,
279            branch_hints: None,
280        }
281    }
282
283    fn is_end_lf_fn(look_matcher: &LookMatcher) -> FunctionDefinition {
284        let locals_name_map = lookaround_fn_common_name_map();
285
286        // Sketch:
287        // ```rust
288        // return at_offset == haystack_len || haystack[at] == self.lineterm.0
289        // ```
290
291        let mut body = wasm_encoder::Function::new([]);
292        body.instructions()
293            // at_offset == haystack_len
294            .local_get(2)
295            .local_get(1)
296            .i64_eq()
297            .if_(BlockType::Empty)
298            // TODO(opt): is the branch better here? Or should it just be an unconditional i32.or
299            .i32_const(true as i32)
300            .return_()
301            .end()
302            // haystack[at_offset] == lineterm
303            .local_get(2)
304            .local_get(0)
305            .i64_add()
306            .i32_load8_u(MemArg {
307                offset: 0,
308                align: 0,        // loading single byte
309                memory_index: 0, // haystack
310            })
311            .i32_const(look_matcher.get_line_terminator() as i32)
312            .i32_eq()
313            .end();
314
315        FunctionDefinition {
316            body,
317            locals_name_map,
318            labels_name_map: None,
319            branch_hints: None,
320        }
321    }
322
323    fn is_start_crlf_fn() -> FunctionDefinition {
324        let locals_name_map = lookaround_fn_common_name_map();
325
326        // Sketch:
327        // ```rust
328        // if at_offset == 0 {
329        //     return true;
330        // }
331        // if haystack[at_offset - 1] == b'\n' {
332        //     return true;
333        // }
334        // if haystack[at_offset - 1] != b'\r' {
335        //     return false;
336        // }
337        // if at_offset >= haystack_len {
338        //     return true;
339        // }
340        // return haystack[at_offset] != b'\n';
341        // ```
342
343        let mut body = wasm_encoder::Function::new([]);
344        body.instructions()
345            // at == 0
346            .local_get(2)
347            .i64_eqz()
348            .if_(BlockType::Empty)
349            .i32_const(true as i32)
350            .return_()
351            .end()
352            // haystack[at - 1] == b'\n'
353            .local_get(2)
354            .i64_const(1)
355            .i64_sub()
356            .local_get(0)
357            .i64_add()
358            .i32_load8_u(MemArg {
359                offset: 0,
360                align: 0,        // byte alignment
361                memory_index: 0, // haystack
362            })
363            .i32_const(b'\n' as i32)
364            .i32_eq()
365            .if_(BlockType::Empty)
366            .i32_const(true as i32)
367            .return_()
368            .end()
369            // haystack[at - 1] != b'\r'
370            .local_get(2)
371            .i64_const(1)
372            .i64_sub()
373            .local_get(0)
374            .i64_add()
375            .i32_load8_u(MemArg {
376                offset: 0,
377                align: 0,        // byte alignment
378                memory_index: 0, // haystack
379            })
380            .i32_const(b'\r' as i32)
381            .i32_ne()
382            .if_(BlockType::Empty)
383            .i32_const(false as i32)
384            .return_()
385            .end()
386            // at >= haystack_len
387            .local_get(2)
388            .local_get(1)
389            .i64_ge_u()
390            .if_(BlockType::Empty)
391            .i32_const(true as i32)
392            .return_()
393            .end()
394            // haystack[at] != b'\n'
395            .local_get(2)
396            .local_get(0)
397            .i64_add()
398            .i32_load8_u(MemArg {
399                offset: 0,
400                align: 0,        // byte alignment
401                memory_index: 0, // haystack
402            })
403            .i32_const(b'\n' as i32)
404            .i32_ne()
405            .end();
406
407        FunctionDefinition {
408            body,
409            locals_name_map,
410            labels_name_map: None,
411            branch_hints: None,
412        }
413    }
414
415    fn is_end_crlf_fn() -> FunctionDefinition {
416        let locals_name_map = lookaround_fn_common_name_map();
417
418        // Sketch:
419        // ```rust
420        // at == haystack.len()
421        //     || haystack[at] == b'\r'
422        //     || (haystack[at] == b'\n'
423        //         && (at == 0 || haystack[at - 1] != b'\r'))
424        // if at == haystack.len() {
425        //     return true;
426        // }
427        // if haystack[at] == b'\r' {
428        //     return true;
429        // }
430        // if haystack[at] != b'\n' {
431        //     return false;
432        // }
433        // if at == 0 {
434        //     return true;
435        // }
436        // return haystack[at - 1] != b'\r';
437        // ```
438
439        let mut body = wasm_encoder::Function::new([]);
440
441        body.instructions()
442            // at == haystack.len()
443            .local_get(2)
444            .local_get(1)
445            .i64_eq()
446            .if_(BlockType::Empty)
447            .i32_const(true as i32)
448            .return_()
449            .end()
450            // haystack[at] == b'\r'
451            .local_get(2)
452            .local_get(0)
453            .i64_add()
454            .i32_load8_u(MemArg {
455                offset: 0,
456                align: 0,        // byte alignment
457                memory_index: 0, // haystack
458            })
459            .i32_const(b'\r' as i32)
460            .i32_eq()
461            .if_(BlockType::Empty)
462            .i32_const(true as i32)
463            .return_()
464            .end()
465            // haystack[at] != b'\n'
466            .local_get(2)
467            .local_get(0)
468            .i64_add()
469            .i32_load8_u(MemArg {
470                offset: 0,
471                align: 0,        // byte alignment
472                memory_index: 0, // haystack
473            })
474            .i32_const(b'\n' as i32)
475            .i32_ne()
476            .if_(BlockType::Empty)
477            .i32_const(false as i32)
478            .return_()
479            .end()
480            // at == 0
481            .local_get(2)
482            .i64_eqz()
483            .if_(BlockType::Empty)
484            .i32_const(true as i32)
485            .return_()
486            .end()
487            // haystack[at - 1] != b'\r'
488            .local_get(2)
489            .i64_const(1)
490            .i64_sub()
491            .local_get(0)
492            .i64_add()
493            .i32_load8_u(MemArg {
494                offset: 0,
495                align: 0,        // byte alignment
496                memory_index: 0, // haystack
497            })
498            .i32_const(b'\r' as i32)
499            .i32_ne()
500            .end();
501
502        FunctionDefinition {
503            body,
504            locals_name_map,
505            labels_name_map: None,
506            branch_hints: None,
507        }
508    }
509
510    fn is_word_ascii_fn(is_word_byte_table: &IsWordByteTable) -> FunctionDefinition {
511        let mut locals_name_map = lookaround_fn_common_name_map();
512        // Locals
513        locals_name_map.append(3, "word_before");
514
515        // Sketch:
516        // ```rust
517        // ...
518        // return word_before != word_after;
519        // ```
520
521        let mut body = wasm_encoder::Function::new([(2, ValType::I32)]);
522
523        let mut instructions = body.instructions();
524        Self::word_before_ascii_instructions(&mut instructions, is_word_byte_table);
525        instructions.local_set(3);
526
527        Self::word_after_ascii_instructions(&mut instructions, is_word_byte_table);
528        instructions
529            // return word_before != word_after;
530            .local_get(3)
531            .i32_ne()
532            .end();
533
534        FunctionDefinition {
535            body,
536            locals_name_map,
537            labels_name_map: None,
538            branch_hints: None,
539        }
540    }
541
542    fn is_word_ascii_negate_fn(is_word_ascii: FunctionIdx) -> FunctionDefinition {
543        let locals_name_map = lookaround_fn_common_name_map();
544
545        // Sketch:
546        // ```rust
547        // return !look_is_word_ascii(haystack_ptr, haystack_len, at_offset);
548        // ```
549
550        let mut body = wasm_encoder::Function::new([]);
551        body.instructions()
552            .local_get(0)
553            .local_get(1)
554            .local_get(2)
555            .call(is_word_ascii.into())
556            .i32_const(1)
557            .i32_xor()
558            .end();
559
560        FunctionDefinition {
561            body,
562            locals_name_map,
563            labels_name_map: None,
564            branch_hints: None,
565        }
566    }
567
568    fn is_word_start_ascii_fn(is_word_byte_table: &IsWordByteTable) -> FunctionDefinition {
569        let mut locals_name_map = lookaround_fn_common_name_map();
570        // Locals
571        locals_name_map.append(3, "word_before");
572
573        // Sketch:
574        // ```rust
575        // ...
576        // return !word_before && word_after;
577        // ```
578
579        let mut body = wasm_encoder::Function::new([(2, ValType::I32)]);
580
581        let mut instructions = body.instructions();
582        Self::word_before_ascii_instructions(&mut instructions, is_word_byte_table);
583        instructions.local_set(3);
584
585        Self::word_after_ascii_instructions(&mut instructions, is_word_byte_table);
586        instructions
587            // return !word_before && word_after;
588            .local_get(3)
589            .i32_const(1)
590            .i32_xor()
591            .i32_and()
592            .end();
593
594        FunctionDefinition {
595            body,
596            locals_name_map,
597            labels_name_map: None,
598            branch_hints: None,
599        }
600    }
601
602    fn is_word_end_ascii_fn(is_word_byte_table: &IsWordByteTable) -> FunctionDefinition {
603        let mut locals_name_map = lookaround_fn_common_name_map();
604        // Locals
605        locals_name_map.append(3, "word_before");
606
607        // Sketch:
608        // ```rust
609        // ...
610        // return word_before && !word_after;
611        // ```
612
613        let mut body = wasm_encoder::Function::new([(2, ValType::I32)]);
614
615        let mut instructions = body.instructions();
616        Self::word_before_ascii_instructions(&mut instructions, is_word_byte_table);
617        instructions.local_set(3);
618
619        Self::word_after_ascii_instructions(&mut instructions, is_word_byte_table);
620        instructions
621            // return word_before && !word_after;
622            .i32_const(1)
623            .i32_xor()
624            .local_get(3)
625            .i32_and()
626            .end();
627
628        FunctionDefinition {
629            body,
630            locals_name_map,
631            labels_name_map: None,
632            branch_hints: None,
633        }
634    }
635
636    fn is_word_start_half_ascii_fn(is_word_byte_table: &IsWordByteTable) -> FunctionDefinition {
637        let locals_name_map = lookaround_fn_common_name_map();
638
639        // Sketch:
640        // ```rust
641        // ...
642        // return !word_before;
643        // ```
644
645        let mut body = wasm_encoder::Function::new([]);
646
647        let mut instructions = body.instructions();
648        Self::word_before_ascii_instructions(&mut instructions, is_word_byte_table);
649        instructions
650            // return !word_before;
651            .i32_const(1)
652            .i32_xor()
653            .end();
654
655        FunctionDefinition {
656            body,
657            locals_name_map,
658            labels_name_map: None,
659            branch_hints: None,
660        }
661    }
662
663    fn is_word_end_half_ascii_fn(is_word_byte_table: &IsWordByteTable) -> FunctionDefinition {
664        let locals_name_map = lookaround_fn_common_name_map();
665
666        // Sketch:
667        // ```rust
668        // ...
669        // return !word_after;
670        // ```
671
672        let mut body = wasm_encoder::Function::new([]);
673        let mut instructions = body.instructions();
674
675        Self::word_after_ascii_instructions(&mut instructions, is_word_byte_table);
676        instructions
677            // return !word_after;
678            .i32_const(1)
679            .i32_xor()
680            .end();
681
682        FunctionDefinition {
683            body,
684            locals_name_map,
685            labels_name_map: None,
686            branch_hints: None,
687        }
688    }
689
690    fn word_before_ascii_instructions(
691        instructions: &mut InstructionSink,
692        is_word_byte_table: &IsWordByteTable,
693    ) {
694        // Sketch:
695        // ```rust
696        // if at_offset == 0 {
697        //    false
698        // } else {
699        //    is_word_byte_table[haystack_ptr[at_offset - 1]]
700        // }
701        // ```
702
703        instructions
704            // if at_offset == 0 {
705            .local_get(2)
706            .i64_eqz()
707            .if_(BlockType::Result(ValType::I32))
708            .i32_const(false as i32)
709            .else_()
710            // word_before = is_word_byte_table[haystack_ptr[at_offset - 1]];
711            .local_get(2)
712            .i64_const(1)
713            .i64_sub()
714            .local_get(0)
715            .i64_add()
716            .i32_load8_u(MemArg {
717                offset: 0,
718                align: 0,        // byte alignement
719                memory_index: 0, // haystack
720            })
721            .i64_extend_i32_u()
722            .i32_load8_u(MemArg {
723                offset: is_word_byte_table.position,
724                align: 0, // byte alignment
725                memory_index: 1,
726            })
727            .end();
728    }
729
730    fn word_after_ascii_instructions(
731        instructions: &mut InstructionSink,
732        is_word_byte_table: &IsWordByteTable,
733    ) {
734        // Sketch:
735        // ```rust
736        // if at_offset >= haystack_len {
737        //    false
738        // } else {
739        //    is_word_byte_table[haystack_ptr[at_offset]]
740        // }
741        // ```
742
743        instructions
744            // let word_after;
745            // if at_offset >= haystack_len {
746            .local_get(2)
747            .local_get(1)
748            .i64_ge_u()
749            .if_(BlockType::Result(ValType::I32))
750            .i32_const(false as i32)
751            .else_()
752            // word_after = is_word_byte_table[haystack_ptr[at_offset]];
753            .local_get(2)
754            .local_get(0)
755            .i64_add()
756            .i32_load8_u(MemArg {
757                offset: 0,
758                align: 0,        // byte alignment
759                memory_index: 0, // haystack
760            })
761            .i64_extend_i32_u()
762            .i32_load8_u(MemArg {
763                offset: is_word_byte_table.position,
764                align: 0, // byte alignment
765                memory_index: 1,
766            })
767            .end();
768    }
769}
770
771fn lookaround_fn_signature(name: &str) -> FunctionSignature {
772    FunctionSignature {
773        name: name.into(),
774        // [haystack_ptr, haystack_len, at_offset]
775        params_ty: &[ValType::I64, ValType::I64, ValType::I64],
776        // [is_match]
777        results_ty: &[ValType::I32],
778        export: false,
779    }
780}
781
782fn lookaround_fn_common_name_map() -> NameMap {
783    let mut locals_name_map = NameMap::new();
784    locals_name_map.append(0, "haystack_ptr");
785    locals_name_map.append(1, "haystack_len");
786    locals_name_map.append(2, "at_offset");
787
788    locals_name_map
789}
790
791fn lookaround_fn_name(look: Look) -> &'static str {
792    match look {
793        Look::Start => "look_is_start",
794        Look::End => "look_is_end",
795        Look::StartLF => "look_is_start_lf",
796        Look::EndLF => "look_is_end_lf",
797        Look::StartCRLF => "look_is_start_crlf",
798        Look::EndCRLF => "look_is_end_crlf",
799        Look::WordAscii => "look_is_word_ascii",
800        Look::WordAsciiNegate => "look_is_word_ascii_negate",
801        Look::WordUnicode => "look_is_word_unicode",
802        Look::WordUnicodeNegate => "look_is_word_unicode_negate",
803        Look::WordStartAscii => "look_is_word_start_ascii",
804        Look::WordEndAscii => "look_is_word_end_ascii",
805        Look::WordStartUnicode => "look_is_word_start_unicode",
806        Look::WordEndUnicode => "look_is_word_end_unicode",
807        Look::WordStartHalfAscii => "look_is_word_start_half_ascii",
808        Look::WordEndHalfAscii => "look_is_word_end_half_ascii",
809        Look::WordStartHalfUnicode => "look_is_word_start_half_unicode",
810        Look::WordEndHalfUnicode => "look_is_word_end_half_unicode",
811    }
812}
813
814fn is_unsupported_lookaround(look: Look) -> bool {
815    matches!(
816        look,
817        Look::WordUnicode
818            | Look::WordUnicodeNegate
819            | Look::WordStartUnicode
820            | Look::WordEndUnicode
821            | Look::WordStartHalfUnicode
822            | Look::WordEndHalfUnicode
823    )
824}
825
826fn needs_is_word_byte_lut(look_set: LookSet) -> bool {
827    look_set.contains(Look::WordAscii)
828        || look_set.contains(Look::WordAsciiNegate)
829        || look_set.contains(Look::WordStartAscii)
830        || look_set.contains(Look::WordEndAscii)
831        || look_set.contains(Look::WordStartHalfAscii)
832        || look_set.contains(Look::WordEndHalfAscii)
833}
834
835fn modified_lookset_for_dependencies(nfa: &NFA) -> LookSet {
836    let mut look_set = nfa.look_set_any();
837
838    // This dependency exists because `look_is_word_ascii_negate` directly calls
839    // `look_is_word_ascii`
840    if look_set.contains(Look::WordAsciiNegate) {
841        look_set = look_set.insert(Look::WordAscii);
842    }
843
844    look_set
845}