Go实现词法分析器

警告
本文最后更新于 2024-05-15,文中内容可能已过时。
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
package main

import (
	"bufio"
	"fmt"
	"os"
	"unicode"
)

var keywords = map[string]int{
	"begin": 1,
	"if":    2,
	"then":  3,
	"while": 4,
	"do":    5,
	"end":   6,
}

func lexicalAnalysis(input string) { //词法分析程序
	var p int = 0
	scanner := func() (int, string) {
		var syn int
		var token string
		for p < len(input) && input[p] == ' ' {
			p++ //跳过空格
		}
		if unicode.IsLetter(rune(input[p])) {
			for p < len(input) && (unicode.IsLetter(rune(input[p])) || unicode.IsDigit(rune(input[p]))) { //如果当前是字符,那么把它加入到token中
				token += string(input[p])
				p++
			}
			_, ok := keywords[token]
			if ok {
				syn = keywords[token]
			} else {
				syn = 10
			}
		} else {
			if unicode.IsDigit(rune(input[p])) {
				syn = 11
				token = string(input[p])
				p++

			} else {
				switch input[p] {
				case '+':
					token = "+"
					syn = 13
					p++
				case '-':
					token = "-"
					syn = 14
					p++
				case '*':
					token = "*"
					syn = 15
					p++
				case '/':
					token = "/"
					syn = 16
					p++
				case ':':
					if p+1 < len(input) && input[p+1] == '=' {
						syn = 18
						token = ":="
						p += 2
					} else {
						token = ":"
						syn = 17
						p++
					}
				case '<':
					if p+1 < len(input) && input[p+1] == '>' {
						syn = 21
						token = "<>"
						p += 2
					} else if p+1 < len(input) && input[p+1] == '=' {
						syn = 22
						token = "<="
						p += 2
					} else {
						syn = 20
						token = "<"
						p++
					}
				case '>':
					if p+1 < len(input) && input[p+1] == '=' {
						syn = 24
						token = ">="
						p += 2
					} else {
						syn = 23
						token = ">"
						p++
					}
				case '=':
					syn = 25
					token = "="
					p++
				case ';':
					syn = 26
					token = ";"
					p++
				case '(':
					syn = 27
					token = "("
					p++
				case ')':
					syn = 28
					token = ")"
					p++
				case '#':
					syn = 0
					token = "#"
					p++
				default:
					syn = -1
					token = string(input[p])
					p++
				}
			}
		}

		return syn, token
	}
	for p < len(input) {
		syn, token := scanner()
		if syn == -1 {
			fmt.Println("错误:非法字符", token)
			break
		} else {
			res := fmt.Sprintf("(%d,%s)", syn, token)
			fmt.Print(res + " ")
		}
	}

}

func main() {
	fmt.Printf("请输入代码段\n")
	reader := bufio.NewReader(os.Stdin)
	readBytes, _, _ := reader.ReadLine()
	var codeSegment string = string(readBytes)

	//input := "begin x:=9;if x>0 then x:=2*x+1/3;end#"
	lexicalAnalysis(codeSegment)

}

相关内容

0%