NFA to DFA

关于实现

将 NFA 构造成有向图,nfa_state 为顶点,state 到下一个 state 之间,通过接受的 nfa_edge 转移。nfa_edge 接受字符或空串(epsilion)。
NFA 转 DFA 过程中,要把 e_closure(T) 算作一个 DFA 状态,所以在 nfa_to_dfa 之前要先 nfa_alloc_id,为每个 nfa_state 标记 id,以便后续 e_closure(T) 的结果方便比较。在我的实现里,我在把 e_closure(T) 返回的 set 构造成一个按 id 升序排序的状态数组,算 hash 加速查找(我用加法是偷懒)。主要算法可以看龙书不再多说。

实现

fa.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
package main

type dfa struct {
tran [][128]int
accept []bool
}

type dfa_state struct {
states []*nfa_state
hash int
i int
accept bool
}

type nfa_edge struct {
next *nfa_state
epsilion bool
ch byte
}

type nfa_state struct {
edges []*nfa_edge
i int
accept bool
}

func (s *nfa_state) add_e(next *nfa_state) {
s.edges = append(s.edges, &nfa_edge{epsilion: true, next: next})
}

func (s *nfa_state) add_char(c byte, next *nfa_state) {
s.edges = append(s.edges, &nfa_edge{ch: c, next: next})
}

type nfa struct {
start *nfa_state
end *nfa_state
}

func nfa_e() *nfa {
start := &nfa_state{}
end := &nfa_state{}
start.add_e(end)
return &nfa{start, end}
}

func nfa_char(c byte) *nfa {
start := &nfa_state{}
end := &nfa_state{}
start.add_char(c, end)
return &nfa{start, end}
}

func nfa_cat(a *nfa, b *nfa) *nfa {
*a.end = *b.start
return &nfa{a.start, b.end}
}

func nfa_or(a *nfa, b *nfa) *nfa {
start := &nfa_state{}
end := &nfa_state{}
start.add_e(a.start)
a.end.add_e(end)
start.add_e(b.start)
b.end.add_e(end)
return &nfa{start, end}
}

func nfa_star(a *nfa) *nfa {
start := &nfa_state{}
end := &nfa_state{}
start.add_e(a.start)
start.add_e(end)
a.end.add_e(end)
a.end.add_e(a.start)
return &nfa{start, end}
}

func nfa_alloc_id(n *nfa_state, id int) int {
if n.i != 0 {
return id
}
n.i = id
id++
for _, e := range n.edges {
id = nfa_alloc_id(e.next, id)
}
return id
}

func nfa_move(states []*nfa_state, c byte) []*nfa_state {
set := make(map[*nfa_state]struct{})
for _, st := range states {
for _, e := range st.edges {
if !e.epsilion && e.ch == c {
set[e.next] = struct{}{}
}
}
}
next := make([]*nfa_state, 0, len(set))
for k, _ := range set {
next = append(next, k)
}
return next
}

func e_closure(t []*nfa_state) *dfa_state {
stack := make([]*nfa_state, len(t))
set := make(map[*nfa_state]struct{}, len(t))
for i := 0; i < len(t); i++ {
set[t[i]] = struct{}{}
stack[i] = t[i]
}
for len(stack) > 0 {
t := stack[len(stack)-1]
stack = stack[:len(stack)-1]
for _, e := range t.edges {
if !e.epsilion {
continue
}
if _, ok := set[e.next]; !ok {
set[e.next] = struct{}{}
stack = append(stack, e.next)
}
}
}
hash := 0
ec := []*nfa_state{}
accept := false
for st, _ := range set {
if !accept {
accept = st.accept
}
ec = append(ec, st)
hash += st.i
for i := len(ec) - 2; i >= 0; i-- {
if ec[i].i <= st.i {
break
}
t := ec[i+1]
ec[i+1] = ec[i]
ec[i] = t
}
}
return &dfa_state{hash: hash, states: ec, accept: accept}
}

func in_states(states []*dfa_state, u *dfa_state) int {
for i, st := range states {
if u.hash != st.hash || len(u.states) != len(st.states) {
continue
}
diff := false
for j := 0; j < len(u.states); j++ {
if u.states[j] != st.states[j] {
diff = true
break
}
}
if !diff {
return i
}
}
return -1
}

func nfa_to_dfa(n *nfa) *dfa {
dstates := []*dfa_state{e_closure([]*nfa_state{n.start})}
dtran := make([][128]int, 1)
daccept := []bool{false}
stack := []int{0}
for len(stack) > 0 {
t := dstates[stack[len(stack)-1]]
stack = stack[:len(stack)-1]
charset := [128]bool{}
for _, st := range t.states {
for _, e := range st.edges {
if e.epsilion {
continue
}
charset[e.ch] = true
}
}
for a := range charset {
if !charset[a] {
continue
}
u := e_closure(nfa_move(t.states, byte(a)))
i := in_states(dstates, u)
if i == -1 {
u.i = len(dstates)
dstates = append(dstates, u)
dtran = append(dtran, [128]int{})
daccept = append(daccept, u.accept)
stack = append(stack, u.i)
} else {
u = dstates[i]
}
dtran[t.i][a] = u.i
}
}
return &dfa{tran: dtran, accept: daccept}
}

main.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package main

func main() {
// (a|b)*abb
nfa := nfa_cat(
nfa_cat(
nfa_cat(
nfa_star(
nfa_or(
nfa_char(('a')),
nfa_char(('b')))),
nfa_char(('a'))),
nfa_char(('b'))),
nfa_char(('b')))
nfa_alloc_id(nfa.start, 1)
dfa := nfa_to_dfa(nfa)
for _, ln := range dfa.tran {
println(ln['a'], ln['b'])
}
}