Skip to content

Commit ba6346e

Browse files
authored
Merge pull request cch123#56 from wziww/master
compilers
2 parents cfe57fd + 6c789c6 commit ba6346e

File tree

1 file changed

+228
-1
lines changed

1 file changed

+228
-1
lines changed

compilers.md

Lines changed: 228 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ func main() {
609609
ast.Print(fset, f)
610610
}
611611
```
612-
output:
612+
#### AST_output:
613613
```shell
614614
0 *ast.File {
615615
1 . Package: 2:2
@@ -1012,6 +1012,233 @@ ok json1/sql 0.006s
10121012
```
10131013
10141014
## 类型检查
1015+
在进行类型检查这一步的时候, 整个文件可以粗略分为`变量声明及作用域`以及`表达式`两块内容, 在检查 AST 的过程中遇到一些定义的变量会在当前作用域中查找是否有对应的声明, 如果没找到则顺着作用域向上去父级作用域寻找
1016+
1017+
### 声明及作用域
1018+
作用域可大致分为几类
1019+
1. 全局
1020+
2. 对应函数体内
1021+
3. 块级表达式内 {...} // 单独一对花括号包裹范围内
1022+
```go
1023+
// 作用域结构体定义
1024+
type Scope struct {
1025+
parent *Scope
1026+
children []*Scope
1027+
elems map[string]Object // lazily allocated
1028+
pos, end token.Pos // scope extent; may be invalid
1029+
comment string // for debugging only
1030+
isFunc bool // set if this is a function scope (internal use only)
1031+
}
1032+
func (s *Scope) LookupParent(name string, pos token.Pos) (*Scope, Object) {
1033+
for ; s != nil; s = s.parent {
1034+
if obj := s.elems[name]; obj != nil && (!pos.IsValid() || obj.scopePos() <= pos) {
1035+
return s, obj
1036+
}
1037+
}
1038+
return nil, nil
1039+
}
1040+
```
1041+
回顾下 `AST file` 的数据结构
1042+
```go
1043+
type File struct {
1044+
Doc *CommentGroup // associated documentation; or nil
1045+
Package token.Pos // position of "package" keyword
1046+
Name *Ident // package name
1047+
Decls []Decl // top-level declarations; or nil
1048+
Scope *Scope // package scope (this file only)
1049+
Imports []*ImportSpec // imports in this file
1050+
Unresolved []*Ident // unresolved identifiers in this file
1051+
Comments []*CommentGroup // list of all comments in the source file
1052+
}
1053+
```
1054+
再结合之前输出的 [`AST`](#ast_output), 我们可以看到在生成的过程中, 大部分的 `identifiers token` 是能够确认对应类型的, 例如函数声明之类的, 那么对应函数名的 `token` 就可以被成功解析为对应类型的语法树中的一个节点
1055+
1056+
但是依旧存在一些在`AST`初步生成阶段无法被成功解析的, 那么会被存放在`Unresolved`字段中, 就比如上面输出的`int`, 那么这时候就通过向上从父级中依次查找, 如果最终能够找到对应定义, 那么检查成功, 否则则抛出未定义异常
1057+
1058+
例:
1059+
```go
1060+
package main
1061+
1062+
import (
1063+
"go/ast"
1064+
"go/parser"
1065+
"go/token"
1066+
"go/types"
1067+
"log"
1068+
)
1069+
1070+
func main() {
1071+
src := `
1072+
1073+
package main
1074+
1075+
func main() {
1076+
var num1, num2 int
1077+
num1 += num2
1078+
_ = num1
1079+
testval++
1080+
return
1081+
}
1082+
`
1083+
1084+
// Initialize the parser.
1085+
fset := token.NewFileSet() // positions are relative to fset
1086+
f, err := parser.ParseFile(fset, "", src, parser.AllErrors|parser.ParseComments)
1087+
if err != nil {
1088+
log.Fatalln(err)
1089+
}
1090+
pkg, err := new(types.Config).Check("test.go", fset, []*ast.File{f}, nil)
1091+
if err != nil {
1092+
log.Fatal(err)
1093+
}
1094+
1095+
_ = pkg
1096+
}
1097+
1098+
```
1099+
output:
1100+
```shell
1101+
2021/09/20 15:19:01 9:3: undeclared name: testval
1102+
```
1103+
### 表达式检查
1104+
截取之前生成的`AST`中的一小段
1105+
`num1 += num2`
1106+
```shell
1107+
65 . . . . . 1: *ast.AssignStmt {
1108+
66 . . . . . . Lhs: []ast.Expr (len = 1) {
1109+
67 . . . . . . . 0: *ast.Ident {
1110+
68 . . . . . . . . NamePos: 6:3
1111+
69 . . . . . . . . Name: "num1"
1112+
70 . . . . . . . . Obj: *(obj @ 38)
1113+
71 . . . . . . . }
1114+
72 . . . . . . }
1115+
73 . . . . . . TokPos: 6:8
1116+
74 . . . . . . Tok: +=
1117+
75 . . . . . . Rhs: []ast.Expr (len = 1) {
1118+
76 . . . . . . . 0: *ast.Ident {
1119+
77 . . . . . . . . NamePos: 6:11
1120+
78 . . . . . . . . Name: "num2"
1121+
79 . . . . . . . . Obj: *(obj @ 48)
1122+
80 . . . . . . . }
1123+
81 . . . . . . }
1124+
82 . . . . . }
1125+
```
1126+
先看下这个简单的赋值表达式生成的树形结构
1127+
```mermaid
1128+
graph TB
1129+
1130+
A((op: +=))
1131+
B((exprL: num1))
1132+
C((exprR: num2))
1133+
A-->B
1134+
A-->C
1135+
```
1136+
对于当前这部分表达式检查, 需要进行的步骤为
1137+
1. 确认当前操作符(+=)
1138+
2. 左子树表达式递归, 并确认表达式最终类型
1139+
3. 右子树表达式递归, 并确认表达式最终类型
1140+
4. 左右 expr 类型校验, 如符合当前操作符规则, 成功, 反之失败
1141+
1142+
```go
1143+
// The binary expression e may be nil. It's passed in for better error messages only.
1144+
func (check *Checker) binary(x *operand, e *ast.BinaryExpr, lhs, rhs ast.Expr, op token.Token) {
1145+
var y operand
1146+
1147+
check.expr(x, lhs) // 左子树表达式递归
1148+
check.expr(&y, rhs) // 右子树表达式递归
1149+
/* 先判断下特殊的操作类型 */
1150+
if x.mode == invalid {
1151+
return
1152+
}
1153+
if y.mode == invalid {
1154+
x.mode = invalid
1155+
x.expr = y.expr
1156+
return
1157+
}
1158+
1159+
if isShift(op) {
1160+
check.shift(x, &y, e, op)
1161+
return
1162+
}
1163+
1164+
check.convertUntyped(x, y.typ)
1165+
if x.mode == invalid {
1166+
return
1167+
}
1168+
check.convertUntyped(&y, x.typ)
1169+
if y.mode == invalid {
1170+
x.mode = invalid
1171+
return
1172+
}
1173+
1174+
if isComparison(op) {
1175+
check.comparison(x, &y, op)
1176+
return
1177+
}
1178+
/* 默认要求 x y 类型一致 */
1179+
if !check.identical(x.typ, y.typ) { // 类型校验
1180+
// only report an error if we have valid types
1181+
// (otherwise we had an error reported elsewhere already)
1182+
if x.typ != Typ[Invalid] && y.typ != Typ[Invalid] {
1183+
check.invalidOp(x.pos(), "mismatched types %s and %s", x.typ, y.typ)
1184+
}
1185+
x.mode = invalid
1186+
return
1187+
}
1188+
1189+
if !check.op(binaryOpPredicates, x, op) {
1190+
x.mode = invalid
1191+
return
1192+
}
1193+
1194+
if op == token.QUO || op == token.REM {
1195+
// check for zero divisor
1196+
if (x.mode == constant_ || isInteger(x.typ)) && y.mode == constant_ && constant.Sign(y.val) == 0 {
1197+
check.invalidOp(y.pos(), "division by zero")
1198+
x.mode = invalid
1199+
return
1200+
}
1201+
1202+
// check for divisor underflow in complex division (see issue 20227)
1203+
if x.mode == constant_ && y.mode == constant_ && isComplex(x.typ) {
1204+
re, im := constant.Real(y.val), constant.Imag(y.val)
1205+
re2, im2 := constant.BinaryOp(re, token.MUL, re), constant.BinaryOp(im, token.MUL, im)
1206+
if constant.Sign(re2) == 0 && constant.Sign(im2) == 0 {
1207+
check.invalidOp(y.pos(), "division by zero")
1208+
x.mode = invalid
1209+
return
1210+
}
1211+
}
1212+
}
1213+
1214+
if x.mode == constant_ && y.mode == constant_ {
1215+
xval := x.val
1216+
yval := y.val
1217+
typ := x.typ.Underlying().(*Basic)
1218+
// force integer division of integer operands
1219+
if op == token.QUO && isInteger(typ) {
1220+
op = token.QUO_ASSIGN
1221+
}
1222+
x.val = constant.BinaryOp(xval, op, yval)
1223+
// Typed constants must be representable in
1224+
// their type after each constant operation.
1225+
if isTyped(typ) {
1226+
if e != nil {
1227+
x.expr = e // for better error message
1228+
}
1229+
check.representable(x, typ)
1230+
}
1231+
return
1232+
}
1233+
1234+
x.mode = value
1235+
// x.typ is unchanged
1236+
}
1237+
```
1238+
> 这边以 `go/types` 标准库的类型检查作为案例, 编译器整体流程大同小异
1239+
1240+
以上, 通过`TOKEN`声明以及对应作用域的维护及查找, 再结合各操作符下表达式的递归分析过程, 对于一棵语法树的类型检查就可以进行了
1241+
10151242
## 中间代码生成
10161243
## 机器码生成
10171244
## 参考资料

0 commit comments

Comments
 (0)