python代码词频分析(python实现词法分析器)
类别:脚本大全 浏览量:623
时间:2022-03-31 12:17:33 python代码词频分析
python实现词法分析器简单python词法分析器实现,供大家参考,具体内容如下
词法分析器状态转换图:
词法分析器总流程图:
预处理程序:
词法分析器:
词法分析器程序详细设计
详细代码实现:
|
#!/usr/bin/env python3.4 # coding=utf-8 import sys import string keywards = {} # 关键字部分 keywards[ 'false' ] = 101 keywards[ 'class' ] = 102 keywards[ 'finally' ] = 103 keywards[ 'is' ] = 104 keywards[ 'return' ] = 105 keywards[ 'none' ] = 106 keywards[ 'continue' ] = 107 keywards[ 'for' ] = 108 keywards[ 'lambda' ] = 109 keywards[ 'try' ] = 110 keywards[ 'true' ] = 111 keywards[ 'def' ] = 112 keywards[ 'from' ] = 113 keywards[ 'nonlocal' ] = 114 keywards[ 'while' ] = 115 keywards[ 'and' ] = 116 keywards[ 'del' ] = 117 keywards[ 'global' ] = 118 keywards[ 'not' ] = 119 keywards[ 'with' ] = 120 keywards[ 'as' ] = 121 keywards[ 'elif' ] = 122 keywards[ 'if' ] = 123 keywards[ 'or' ] = 124 keywards[ 'yield' ] = 125 keywards[ 'assert' ] = 126 keywards[ 'else' ] = 127 keywards[ 'import' ] = 128 keywards[ 'pass' ] = 129 keywards[ 'break' ] = 130 keywards[ 'except' ] = 131 keywards[ 'in' ] = 132 keywards[ 'raise' ] = 133 # 符号 keywards[ '+' ] = 201 keywards[ '-' ] = 202 keywards[ '*' ] = 203 keywards[ '/' ] = 204 keywards[ '=' ] = 205 keywards[ ':' ] = 206 keywards[ '<' ] = 207 keywards[ '>' ] = 208 keywards[ '%' ] = 209 keywards[ '&' ] = 210 keywards[ '!' ] = 211 keywards[ '(' ] = 212 keywards[ ')' ] = 213 keywards[ '[' ] = 214 keywards[ ']' ] = 215 keywards[ '{' ] = 216 keywards[ '}' ] = 217 keywards[ '#' ] = 218 keywards[ '|' ] = 219 keywards[ ',' ] = 220 # 变量 # keywards['var'] = 301 # 常量 # keywards['const'] = 401 # error # keywards['const'] = 501 signlist = {} # 预处理函数,将文件中的空格,换行等无关字符处理掉 def pretreatment(file_name): try : fp_read = open (file_name, 'r' ) fp_write = open ( 'file.tmp' , 'w' ) sign = 0 while true: read = fp_read.readline() if not read: break length = len (read) i = - 1 while i < length - 1 : i + = 1 if sign = = 0 : if read[i] = = ' ' : continue if read[i] = = '#' : break elif read[i] = = ' ' : if sign = = 1 : continue else : sign = 1 fp_write.write( ' ' ) elif read[i] = = '\t' : if sign = = 1 : continue else : sign = 1 fp_write.write( ' ' ) elif read[i] = = '\n' : if sign = = 1 : continue else : fp_write.write( ' ' ) sign = 1 elif read[i] = = '"': fp_write.write(read[i]) i += 1 while i < length and read[i] != '"' : fp_write.write(read[i]) i + = 1 if i > = length: break fp_write.write(read[i]) elif read[i] = = "'": fp_write.write(read[i]) i += 1 while i < length and read[i] != "'" : fp_write.write(read[i]) i + = 1 if i > = length: break fp_write.write(read[i]) else : sign = 3 fp_write.write(read[i]) except exception: print (file_name, ': this filename not found!' ) def save(string): if string in keywards.keys(): if string not in signlist.keys(): signlist[string] = keywards[string] else : try : float (string) save_const(string) except valueerror: save_var(string) def save_var(string): if string not in signlist.keys(): if len (string.strip()) < 1 : pass else : if is_signal(string) = = 1 : signlist[string] = 301 else : signlist[string] = 501 def save_const(string): if string not in signlist.keys(): signlist[string] = 401 def save_error(string): if string not in signlist.keys(): signlist[string] = 501 def is_signal(s): if s[ 0 ] = = '_' or s[ 0 ] in string.ascii_letters: for i in s: if i in string.ascii_letters or i = = '_' or i in string.digits: pass else : return 0 return 1 else : return 0 def recognition(filename): try : fp_read = open (filename, 'r' ) string = "" sign = 0 while true: read = fp_read.read( 1 ) if not read: break if read = = ' ' : if len (string.strip()) < 1 : sign = 0 pass else : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" sign = 0 elif read = = '(' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( '(' ) elif read = = ')' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( ')' ) elif read = = '[' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( '[' ) elif read = = ']' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( ']' ) elif read = = '{' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( '{' ) elif read = = '}' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( '}' ) elif read = = '<' : save(string) string = "" save( '<' ) elif read = = '>' : save(string) string = "" save( '>' ) elif read = = ',' : save(string) string = "" save( ',' ) elif read = = "'" : string + = read if sign = = 1 : sign = 0 save_const(string) string = "" else : if sign ! = 2 : sign = 1 elif read = = '"' : string + = read if sign = = 2 : sign = 0 save_const(string) string = "" else : if sign ! = 1 : sign = 2 elif read = = ':' : if sign = = 1 or sign = = 2 : string + = read else : save(string) string = "" save( ':' ) elif read = = '+' : save(string) string = "" save( '+' ) elif read = = '=' : save(string) string = "" save( '=' ) else : string + = read except exception as e: print (e) def main(): if len (sys.argv) < 2 : print ( "please input filename" ) else : pretreatment(sys.argv[ 1 ]) recognition( 'file.tmp' ) for i in signlist.keys(): print ( "(" , signlist[i], "," , i, ")" ) if __name__ = = '__main__' : main() |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持开心学习网。
原文链接:https://blog.csdn.net/IT_DREAM_ER/article/details/53485583
您可能感兴趣
- python函数使用方法高级用法(Python骚操作之动态定义函数)
- python编写程序读写数据库(详解js文件通过python访问数据库方法)
- python列表和条件组合的处理(一篇文章带你弄懂Python基础之列表相关操作和嵌套)
- python利用空列表进行数字排序(python实现计数排序与桶排序实例代码)
- python冒泡排序代码通俗理解(详解python算法之冒泡排序)
- python中的数据类型和数字类型(Python数据类型之Number数字操作实例详解)
- pythonrequest包设置编码(解决python3中的requests解析中文页面出现乱码问题)
- pyqt一键部署python环境(python使用PyQt5的简单方法)
- java集成钉钉发送消息(Python实现钉钉发送报警消息的方法)
- python数据分割教程(python3对拉勾数据进行可视化分析的方法详解)
- python对字典值排序(Python实现字典按key或者value进行排序操作示例sorted)
- python数字图像处理入门(python图像处理入门一)
- python中的变量与变量类型(Python变量类型知识点总结)
- python udp通信(Python socket模块实现的udp通信功能示例)
- python学生管理系统设计代码(python学生管理系统开发)
- python3列表的使用教程(对Python3 pyc 文件的使用详解)
- 八一节,说说中国女兵(八一节说说中国女兵)
- 王治郅菜鸟赛季已让八一带入正轨,大郅七大经典语录或是成功秘诀(王治郅菜鸟赛季已让八一带入正轨)
- 庆八一,重读经典红色语录,感悟互联网发展硬道理(重读经典红色语录)
- TVB新剧《黯夜守护者》将播,陈展鹏陈炜首次合作探讨人性(TVB新剧黯夜守护者将播)
- 新晋小花被称女版吴卓羲 将取代滕丽名成为TVB新一代御用女警(新晋小花被称女版吴卓羲)
- 艺人吴卓羲10年警察生涯,演足10年阿Sir,系咩玩法(艺人吴卓羲10年警察生涯)
热门推荐
- 数据库创建索引的注意事项
- 哪里有免费的云服务器价格实惠(安全的美国云服务器哪里比较便宜?)
- ci框架怎样修改为https协议(CI框架教程之优化验证码机制详解验证码辅助函数)
- css不显示滚动条设置(css隐藏移动端滚动条并且ios上平滑滚动的方法)
- 云服务选什么系统(云服务器用什么系统好)
- docker启动sqlserver(用docker运行postgreSQL的方法步骤)
- zabbix基本监控配置(zabbix使用教程+受监控服务器的添加安装教程)
- python高级面试题及答案(python面试题小结附答案实例代码)
- python pandas 匹配值(python 使用pandas计算累积求和的方法)
- tomcat处理乱码(Tomcat中catalina.bat设置为UTF-8控制台出现乱码)
排行榜
- 1
- 2
- 3
- 4
- 5
- 6
- 7
- 8
- 9