900字范文,内容丰富有趣,生活中的好帮手!
900字范文 > 手工编程实现词法分析器java_编译原理——词法分析器实现

手工编程实现词法分析器java_编译原理——词法分析器实现

时间:2018-12-23 20:15:56

相关推荐

手工编程实现词法分析器java_编译原理——词法分析器实现

1 //Lexical_Analysis.cpp : 定义控制台应用程序的入口点。2 //3 #include "stdio.h"

4 #include "stdlib.h"

5 #include "string.h"

6 #include "iostream"

7 using namespacestd;8 //词法分析程序9 //首先定义种别码

10 /*

11 第一类:标识符 letter(letter | digit)* 无穷集12 第二类:常数 (digit)+ 无穷集13 第三类:保留字(32)14 auto break case char const continue15 default do double else enum extern16 float for goto if int long17 register return short signed sizeof static18 struct switch typedef union unsigned void19 volatile while20

21 第四类:界符 ‘/*’、‘//’、 () { } [ ] " " '22 第五类:运算符 、>=、=、+、-、*、/、^、23

24 对所有可数符号进行编码:25 26 27 ...28 29 30 31 32 ,36>33 <34 <<=,38>35 <>,39>36 <>=,40>37 <=,41>38 <==,42>39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 <<左移55 <>>,59>右移56 57 58 59 60 61 <.>62 ,66>63 <:>64 65 "[","]","{","}"66 67 68

69

70 */

71

72 /****************************************************************************************/

73 //全局变量,保留字表

74 static char reserveWord[32][20] ={75 "auto", "break", "case", "char", "const", "continue",76 "default", "do", "double", "else", "enum", "extern",77 "float", "for", "goto", "if", "int", "long",78 "register", "return", "short", "signed", "sizeof", "static",79 "struct", "switch", "typedef", "union", "unsigned", "void",80 "volatile", "while"

81 };82 //界符运算符表,根据需要可以自行增加

83 static char operatorOrDelimiter[36][10] ={84 "+", "-", "*", "/", "", ">=", "=", "==",85 "!=", ";", "(", ")", "^", ",", "\"", "\'", "#", "&",86 "&&", "|", "||", "%", "~", "<>", "[", "]", "{",87 "}", "\\", ".", "\?", ":", "!"

88 };89

90 static char IDentifierTbl[1000][50] = { "" };//标识符表

91 /****************************************************************************************/

92

93 /********查找保留字*****************/

94 int searchReserve(char reserveWord[][20], chars[])95 {96 for (int i = 0; i < 32; i++)97 {98 if (strcmp(reserveWord[i], s) == 0)99 {//若成功查找,则返回种别码

100 return i + 1;//返回种别码

101 }102 }103 return -1;//否则返回-1,代表查找不成功,即为标识符

104 }105 /********查找保留字*****************/

106

107 /*********************判断是否为字母********************/

108 bool IsLetter(charletter)109 {//注意C语言允许下划线也为标识符的一部分可以放在首部或其他地方

110 if (letter >= 'a'&&letter <= 'z' || letter >= 'A'&&letter <= 'Z'|| letter=='_')111 {112 return true;113 }114 else

115 {116 return false;117 }118 }119 /*********************判断是否为字母********************/

120

121

122 /*****************判断是否为数字************************/

123 bool IsDigit(chardigit)124 {125 if (digit >= '0'&&digit <= '9')126 {127 return true;128 }129 else

130 {131 return false;132 }133 }134 /*****************判断是否为数字************************/

135

136

137 /********************编译预处理,取出无用的字符和注释**********************/

138 void filterResource(char r[], intpProject)139 {140 char tempString[10000];141 int count = 0;142 for (int i = 0; i <= pProject; i++)143 {144 if (r[i] == '/'&&r[i + 1] == '/')145 {//若为单行注释“//”,则去除注释后面的东西,直至遇到回车换行

146 while (r[i] != '\n')147 {148 i++;//向后扫描

149 }150 }151 if (r[i] == '/'&&r[i + 1] == '*')152 {//若为多行注释“/* 。。。*/”则去除该内容

153 i += 2;154 while (r[i] != '*' || r[i + 1] != '/')155 {156 i++;//继续扫描

157 if (r[i] == '$')158 {159 printf("注释出错,没有找到 */,程序结束!!!\n");160 exit(0);161 }162 }163 i += 2;//跨过“*/”

164 }165 if (r[i] != '\n'&&r[i] != '\t'&&r[i] != '\v'&&r[i] != '\r')166 {//若出现无用字符,则过滤;否则加载

167 tempString[count++] =r[i];168 }169 }170 tempString[count] = '\0';171 strcpy(r, tempString);//产生净化之后的源程序

172 }173 /********************编译预处理,取出无用的字符和注释**********************/

174

175

176 /****************************分析子程序,算法核心***********************/

177 void Scanner(int &syn, char resourceProject[], char token[], int &pProject)178 {//根据DFA的状态转换图设计

179 int i, count = 0;//count用来做token[]的指示器,收集有用字符

180 char ch;//作为判断使用

181 ch =resourceProject[pProject];182 while (ch == ' ')183 {//过滤空格,防止程序因识别不了空格而结束

184 pProject++;185 ch =resourceProject[pProject];186 }187 for (i = 0; i<20; i++)188 {//每次收集前先清零

189 token[i] = '\0';190 }191 if(IsLetter(resourceProject[pProject]))192 {//开头为字母

193 token[count++] = resourceProject[pProject];//收集

194 pProject++;//下移

195 while (IsLetter(resourceProject[pProject]) ||IsDigit(resourceProject[pProject]))196 {//后跟字母或数字

197 token[count++] = resourceProject[pProject];//收集

198 pProject++;//下移

199 }//多读了一个字符既是下次将要开始的指针位置

200 token[count] = '\0';201 syn = searchReserve(reserveWord, token);//查表找到种别码

202 if (syn == -1)203 {//若不是保留字则是标识符

204 syn = 100;//标识符种别码

205 }206 return;207 }208 else if(IsDigit(resourceProject[pProject]))209 {//首字符为数字

210 while(IsDigit(resourceProject[pProject]))211 {//后跟数字

212 token[count++] = resourceProject[pProject];//收集

213 pProject++;214 }//多读了一个字符既是下次将要开始的指针位置

215 token[count] = '\0';216 syn = 99;//常数种别码

217 }218 else if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == ';' || ch == '(' || ch == ')' || ch == '^'

219 || ch == ',' || ch == '\"' || ch == '\'' || ch == '~' || ch == '#' || ch == '%' || ch == '['

220 || ch == ']' || ch == '{' || ch == '}' || ch == '\\' || ch == '.' || ch == '\?' || ch == ':')221 {//若为运算符或者界符,查表得到结果

222 token[0] =resourceProject[pProject];223 token[1] = '\0';//形成单字符串

224 for (i = 0; i<36; i++)225 {//查运算符界符表

226 if (strcmp(token, operatorOrDelimiter[i]) == 0)227 {228 syn = 33 + i;//获得种别码,使用了一点技巧,使之呈线性映射

229 break;//查到即推出

230 }231 }232 pProject++;//指针下移,为下一扫描做准备

233 return;234 }235 else if (resourceProject[pProject] == '

237 pProject++;//后移,超前搜索

238 if (resourceProject[pProject] == '=')239 {240 syn = 38;241 }242 else if (resourceProject[pProject] == '

244 pProject--;245 syn = 58;246 }247 else

248 {249 pProject--;250 syn = 37;251 }252 pProject++;//指针下移

253 return;254 }255 else if (resourceProject[pProject] == '>')256 {//>,>=,>>

257 pProject++;258 if (resourceProject[pProject] == '=')259 {260 syn = 40;261 }262 else if (resourceProject[pProject] == '>')263 {264 syn = 59;265 }266 else

267 {268 pProject--;269 syn = 39;270 }271 pProject++;272 return;273 }274 else if (resourceProject[pProject] == '=')275 {//=.==

276 pProject++;277 if (resourceProject[pProject] == '=')278 {279 syn = 42;280 }281 else

282 {283 pProject--;284 syn = 41;285 }286 pProject++;287 return;288 }289 else if (resourceProject[pProject] == '!')290 {//!,!=

291 pProject++;292 if (resourceProject[pProject] == '=')293 {294 syn = 43;295 }296 else

297 {298 syn = 68;299 pProject--;300 }301 pProject++;302 return;303 }304 else if (resourceProject[pProject] == '&')305 {//&,&&

306 pProject++;307 if (resourceProject[pProject] == '&')308 {309 syn = 53;310 }311 else

312 {313 pProject--;314 syn = 52;315 }316 pProject++;317 return;318 }319 else if (resourceProject[pProject] == '|')320 {//|,||

321 pProject++;322 if (resourceProject[pProject] == '|')323 {324 syn = 55;325 }326 else

327 {328 pProject--;329 syn = 54;330 }331 pProject++;332 return;333 }334 else if (resourceProject[pProject] == '$')335 {//结束符

336 syn = 0;//种别码为0

337 }338 else

339 {//不能被以上词法分析识别,则出错。

340 printf("error:there is no exist %c \n", ch);341 exit(0);342 }343 }344

345

346 intmain()347 {348 //打开一个文件,读取其中的源程序

349 char resourceProject[10000];350 char token[20] = { 0};351 int syn = -1, i;//初始化

352 int pProject = 0;//源程序指针

353 FILE *fp, *fp1;354 if ((fp = fopen("D:\\zyr_rc.txt", "r")) ==NULL)355 {//打开源程序

356 cout << "can't open this file";357 exit(0);358 }359 resourceProject[pProject] =fgetc(fp);360 while (resourceProject[pProject] != '$')361 {//将源程序读入resourceProject[]数组

362 pProject++;363 resourceProject[pProject] =fgetc(fp);364 }365 resourceProject[++pProject] = '\0';366 fclose(fp);367 cout << endl << "源程序为:" <

370 filterResource(resourceProject, pProject);371 cout << endl << "过滤之后的程序:" <

374

375 if ((fp1 = fopen("D:\\zyr_compile.txt", "w+")) ==NULL)376 {//打开源程序

377 cout << "can't open this file";378 exit(0);379 }380 while (syn != 0)381 {382 //启动扫描

383 Scanner(syn, resourceProject, token, pProject);384 if (syn == 100)385 {//标识符

386 for (i = 0; i<1000; i++)387 {//插入标识符表中

388 if (strcmp(IDentifierTbl[i], token) == 0)389 {//已在表中

390 break;391 }392 if (strcmp(IDentifierTbl[i], "") == 0)393 {//查找空间

394 strcpy(IDentifierTbl[i], token);395 break;396 }397 }398 printf("(标识符 ,%s)\n", token);399 fprintf(fp1, "(标识符 ,%s)\n", token);400 }401 else if (syn >= 1 && syn <= 32)402 {//保留字

403 printf("(%s , --)\n", reserveWord[syn - 1]);404 fprintf(fp1, "(%s , --)\n", reserveWord[syn - 1]);405 }406 else if (syn == 99)407 {//const 常数

408 printf("(常数 , %s)\n", token);409 fprintf(fp1, "(常数 , %s)\n", token);410 }411 else if (syn >= 33 && syn <= 68)412 {413 printf("(%s , --)\n", operatorOrDelimiter[syn - 33]);414 fprintf(fp1, "(%s , --)\n", operatorOrDelimiter[syn - 33]);415 }416 }417 for (i = 0; i<100; i++)418 {//插入标识符表中

419 printf("第%d个标识符: %s\n", i + 1, IDentifierTbl[i]);420 fprintf(fp1, "第%d个标识符: %s\n", i + 1, IDentifierTbl[i]);421 }422 fclose(fp1);423 return 0;424 }

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。