正则表达式转 NFA

实现

结合 上一篇文章 的代码,实现了正则表达式转 NFA,主要做了三件小事

  1. 转义字符
  2. 将隐式 concatenation 转为显式运算符&
  3. 表达式转 NFA

re.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main

func re_to_nfa(exp string) *nfa {
priority := map[byte]int{
'(': 1,
'|': 2,
'&': 3,
}
var (
opstack []byte
nfastack []*nfa
)
apply_op := func(op byte) {
switch op {
case '*':
nfastack[len(nfastack)-1] = nfa_star(nfastack[len(nfastack)-1])
case '&':
nfastack[len(nfastack)-2] = nfa_cat(nfastack[len(nfastack)-2], nfastack[len(nfastack)-1])
nfastack = nfastack[:len(nfastack)-1]
case '|':
nfastack[len(nfastack)-2] = nfa_or(nfastack[len(nfastack)-2], nfastack[len(nfastack)-1])
nfastack = nfastack[:len(nfastack)-1]
}
}
add_op := func(op byte) {
switch op {
case '*':
apply_op('*')
case ')':
for len(opstack) > 0 {
top := opstack[len(opstack)-1]
opstack = opstack[:len(opstack)-1]
if top == '(' {
break
}
apply_op(top)
}
case '&', '|':
if len(opstack) > 0 {
prio := priority[op]
for len(opstack) > 0 {
top := opstack[len(opstack)-1]
if priority[top] < prio {
break
}
opstack = opstack[:len(opstack)-1]
apply_op(op)
}
}
opstack = append(opstack, op)
case '(':
opstack = append(opstack, op)
}
}
cat := false
escape := false
for i := 0; i < len(exp); i++ {
c := exp[i]
if escape {
switch c {
case 'n':
c = '\n'
case 't':
c = '\t'
case 'r':
c = '\r'
}
escape = false
} else {
switch c {
case '\\':
escape = true
continue
case '(':
if cat {
add_op('&')
}
fallthrough
case '*', '|', ')':
cat = c != '|' && c != '('
add_op(c)
continue
}
}
if cat {
add_op('&')
}
nfastack = append(nfastack, nfa_char(c))
cat = true
}
for i := len(opstack) - 1; i >= 0; i-- {
apply_op(opstack[i])
}
return nfastack[0]
}

写了一个玩具 re0