python3词法分析(一)词法单元
一、词法单元
词法单元
:词法单元名 + 可选属性值
所有的token都在Grammar/Tokens
ENDMARKER
NAME
NUMBER
STRING
NEWLINE
INDENT
DEDENTLPAR '('
RPAR ')'
LSQB '['
RSQB ']'
COLON ':'
COMMA ','
SEMI ';'
PLUS '+'
MINUS '-'
STAR '*'
SLASH '/'
VBAR '|'
AMPER '&'
LESS '<'
GREATER '>'
EQUAL '='
DOT '.'
PERCENT '%'
LBRACE '{'
RBRACE '}'
EQEQUAL '=='
NOTEQUAL '!='
LESSEQUAL '<='
GREATEREQUAL '>='
TILDE '~'
CIRCUMFLEX '^'
LEFTSHIFT '<<'
RIGHTSHIFT '>>'
DOUBLESTAR '**'
PLUSEQUAL '+='
MINEQUAL '-='
STAREQUAL '*='
SLASHEQUAL '/='
PERCENTEQUAL '%='
AMPEREQUAL '&='
VBAREQUAL '|='
CIRCUMFLEXEQUAL '^='
LEFTSHIFTEQUAL '<<='
RIGHTSHIFTEQUAL '>>='
DOUBLESTAREQUAL '**='
DOUBLESLASH '//'
DOUBLESLASHEQUAL '//='
AT '@'
ATEQUAL '@='
RARROW '->'
ELLIPSIS '...'
COLONEQUAL ':='OP
AWAIT
ASYNC
TYPE_IGNORE
TYPE_COMMENT
SOFT_KEYWORD
ERRORTOKEN# These aren't used by the C tokenizer but are needed for tokenize.py
COMMENT
NL
ENCODING
二、自动生成代码
当执行./configure时将Makefile.pre.in写入到Makefile中。
Makefile.pre.in中包含如下规则
.PHONY: regen-token
regen-token:# Regenerate Doc/library/token-list.inc from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py rst \$(srcdir)/Grammar/Tokens \$(srcdir)/Doc/library/token-list.inc# Regenerate Include/token.h from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py h \$(srcdir)/Grammar/Tokens \$(srcdir)/Include/token.h# Regenerate Parser/token.c from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py c \$(srcdir)/Grammar/Tokens \$(srcdir)/Parser/token.c# Regenerate Lib/token.py from Grammar/Tokens# using Tools/scripts/generate_token.py$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/generate_token.py py \$(srcdir)/Grammar/Tokens \$(srcdir)/Lib/token.py
将调用Tools/scripts/generate_token.py生成token相关代码。
2.1 Include/token.h
/* Auto-generated by Tools/scripts/generate_token.py *//* Token types */
#ifndef Py_LIMITED_API
#ifndef Py_TOKEN_H
#define Py_TOKEN_H
#ifdef __cplusplus
extern "C" {#endif#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */#define ENDMARKER 0
#define NAME 1
#define NUMBER 2
#define STRING 3
#define NEWLINE 4
#define INDENT 5
#define DEDENT 6
#define LPAR 7
#define RPAR 8
#define LSQB 9
#define RSQB 10
#define COLON 11
#define COMMA 12
#define SEMI 13
#define PLUS 14
#define MINUS 15
#define STAR 16
#define SLASH 17
#define VBAR 18
#define AMPER 19
#define LESS 20
#define GREATER 21
#define EQUAL 22
#define DOT 23
#define PERCENT 24
#define LBRACE 25
#define RBRACE 26
#define EQEQUAL 27
#define NOTEQUAL 28
#define LESSEQUAL 29
#define GREATEREQUAL 30
#define TILDE 31
#define CIRCUMFLEX 32
#define LEFTSHIFT 33
#define RIGHTSHIFT 34
#define DOUBLESTAR 35
#define PLUSEQUAL 36
#define MINEQUAL 37
#define STAREQUAL 38
#define SLASHEQUAL 39
#define PERCENTEQUAL 40
#define AMPEREQUAL 41
#define VBAREQUAL 42
#define CIRCUMFLEXEQUAL 43
#define LEFTSHIFTEQUAL 44
#define RIGHTSHIFTEQUAL 45
#define DOUBLESTAREQUAL 46
#define DOUBLESLASH 47
#define DOUBLESLASHEQUAL 48
#define AT 49
#define ATEQUAL 50
#define RARROW 51
#define ELLIPSIS 52
#define COLONEQUAL 53
#define OP 54
#define AWAIT 55
#define ASYNC 56
#define TYPE_IGNORE 57
#define TYPE_COMMENT 58
#define SOFT_KEYWORD 59
#define ERRORTOKEN 60
#define N_TOKENS 64
#define NT_OFFSET 256/* Special definitions for cooperation with parser */#define ISTERMINAL(x) ((x) < NT_OFFSET)
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
#define ISEOF(x) ((x) == ENDMARKER)
#define ISWHITESPACE(x) ((x) == ENDMARKER || \(x) == NEWLINE || \(x) == INDENT || \(x) == DEDENT)PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
PyAPI_FUNC(int) PyToken_OneChar(int);
PyAPI_FUNC(int) PyToken_TwoChars(int, int);
PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);#ifdef __cplusplus
}
#endif
#endif /* !Py_TOKEN_H */
#endif /* Py_LIMITED_API */
2.2 Parser/token.c
/* Auto-generated by Tools/scripts/generate_token.py */#include "Python.h"
#include "token.h"/* Token names */const char * const _PyParser_TokenNames[] = {"ENDMARKER","NAME","NUMBER","STRING","NEWLINE","INDENT","DEDENT","LPAR","RPAR","LSQB","RSQB","COLON","COMMA","SEMI","PLUS","MINUS","STAR","SLASH","VBAR","AMPER","LESS","GREATER","EQUAL","DOT","PERCENT","LBRACE","RBRACE","EQEQUAL","NOTEQUAL","LESSEQUAL","GREATEREQUAL","TILDE","CIRCUMFLEX","LEFTSHIFT","RIGHTSHIFT","DOUBLESTAR","PLUSEQUAL","MINEQUAL","STAREQUAL","SLASHEQUAL","PERCENTEQUAL","AMPEREQUAL","VBAREQUAL","CIRCUMFLEXEQUAL","LEFTSHIFTEQUAL","RIGHTSHIFTEQUAL","DOUBLESTAREQUAL","DOUBLESLASH","DOUBLESLASHEQUAL","AT","ATEQUAL","RARROW","ELLIPSIS","COLONEQUAL","OP","AWAIT","ASYNC","TYPE_IGNORE","TYPE_COMMENT","SOFT_KEYWORD","<ERRORTOKEN>","<COMMENT>","<NL>","<ENCODING>","<N_TOKENS>",
};/* Return the token corresponding to a single character */int
PyToken_OneChar(int c1)
{switch (c1) {case '%': return PERCENT;case '&': return AMPER;case '(': return LPAR;case ')': return RPAR;case '*': return STAR;case '+': return PLUS;case ',': return COMMA;case '-': return MINUS;case '.': return DOT;case '/': return SLASH;case ':': return COLON;case ';': return SEMI;case '<': return LESS;case '=': return EQUAL;case '>': return GREATER;case '@': return AT;case '[': return LSQB;case ']': return RSQB;case '^': return CIRCUMFLEX;case '{': return LBRACE;case '|': return VBAR;case '}': return RBRACE;case '~': return TILDE;}return OP;
}int
PyToken_TwoChars(int c1, int c2)
{switch (c1) {case '!':switch (c2) {case '=': return NOTEQUAL;}break;case '%':switch (c2) {case '=': return PERCENTEQUAL;}break;case '&':switch (c2) {case '=': return AMPEREQUAL;}break;case '*':switch (c2) {case '*': return DOUBLESTAR;case '=': return STAREQUAL;}break;case '+':switch (c2) {case '=': return PLUSEQUAL;}break;case '-':switch (c2) {case '=': return MINEQUAL;case '>': return RARROW;}break;case '/':switch (c2) {case '/': return DOUBLESLASH;case '=': return SLASHEQUAL;}break;case ':':switch (c2) {case '=': return COLONEQUAL;}break;case '<':switch (c2) {case '<': return LEFTSHIFT;case '=': return LESSEQUAL;case '>': return NOTEQUAL;}break;case '=':switch (c2) {case '=': return EQEQUAL;}break;case '>':switch (c2) {case '=': return GREATEREQUAL;case '>': return RIGHTSHIFT;}break;case '@':switch (c2) {case '=': return ATEQUAL;}break;case '^':switch (c2) {case '=': return CIRCUMFLEXEQUAL;}break;case '|':switch (c2) {case '=': return VBAREQUAL;}break;}return OP;
}int
PyToken_ThreeChars(int c1, int c2, int c3)
{switch (c1) {case '*':switch (c2) {case '*':switch (c3) {case '=': return DOUBLESTAREQUAL;}break;}break;case '.':switch (c2) {case '.':switch (c3) {case '.': return ELLIPSIS;}break;}break;case '/':switch (c2) {case '/':switch (c3) {case '=': return DOUBLESLASHEQUAL;}break;}break;case '<':switch (c2) {case '<':switch (c3) {case '=': return LEFTSHIFTEQUAL;}break;}break;case '>':switch (c2) {case '>':switch (c3) {case '=': return RIGHTSHIFTEQUAL;}break;}break;}return OP;
}
python3词法分析(一)词法单元相关推荐
- python3 %%time 表示执行单元格时间 时间指的是CPU时间
python3 %%time 表示执行单元格时间 时间指的是CPU时间 举例 %%time from sagemaker.pytorch import PyTorch from sagemaker.p ...
- ebnf范式_使用Scala基于词法单元的解析器定制EBNF范式文法解析
前言 近期在做Oracle迁移到Spark平台的项目上遇到了一些平台公式翻译为SparkSQL(on Hive)的需求,而Spark采用亲妈语言Scala进行开发.下面是个意外,被论文查重了,移步至我 ...
- 让解析器可以快速处理词法单元之间的空格
2019独角兽企业重金招聘Python工程师标准>>> 空格在字符串中时必要的字符,如果在字符串解析中,空格时必须要处理的. rules / tokens 分别定义区分空格的 rul ...
- python翻译matlab,如何在python3中翻译MATLAB单元?
使用我在链接文章中演示的Octave/scipy save/loadmat: 在八度音阶中>> num_nodes=3 num_nodes = 3 >> num_nodes=3 ...
- python3词法分析(三)识别token
python3.10.2 主要分析Parser/tokenizer.c的tok_get函数 一.预处理行首 1.跳过空白字符 1.1.1 空格.Tab.\014 static int tok_get( ...
- 编译原理 - 词法分析
词法分析 词法分析器 作用 编译过程划分为词法分析和语法分析两个阶段的原因 语法分析中的三个概念 词法分析的实现 如何区分兼容性的标识符 词法分析算法 词法单元 词法单元例子 词法单元的模式 正则表达 ...
- Google V8引擎浅析
前端开发人员都会遇到一个流行词:V8.它的流行程度很大一部分是因为它将JavaScript的性能提升到了一个新的水平.是的,V8很快.但它是如何发挥它的魔力? 前言 源代码:https://sourc ...
- go string 换行_从词法分析角度聊 Go 代码组成
之前的 Go 笔记系列,已经完成到了开发环境搭建,原本接下来的计划就是到语法部分了,但后来一直没有前进.主要是因为当时的工作比较忙,分散了精力,于是就暂时放下了. 最近,准备重新把之前计划捡起来. 第 ...
- php 词法分析,【PHP7源码学习】2019-03-20 PHP词法分析
baiyan 基本概念 在PHP7中,当一个脚本运行请求或到来时,PHP代码首先会被加载到内存中,随后进行词法分析和语法分析并生成抽象语法树(AST),然后进行深度优先遍历并生成opcodes,并在z ...
最新文章
- ios 如何改变UISegmentedControl文本的字体大小?
- C#字符串截取学习总结
- c#设置图片的dpi_被忽视的鼠标设置,影响瞄准的关键因素就是它!
- vue子组件改变父组件的值
- 【翻译】.NET 5 Preview8发布
- AHK 中的字符串拼接和遍历操作
- java 夏令时标志_夏令时随绝对日期而变化
- 江津2021年高考成绩查询,刚刚,重庆市2020年高考分数线出炉!
- 华为在推荐系统中的前沿技术研究与落地(附PPT下载链接)
- 女同志50岁退休可以早领养老金,55岁退休拿的养老金比较高,哪个更划算?
- 如何学习嵌入式开发必备技能
- centos linux 使用无线网卡,CentOS环境下安装无线网卡
- 2021年北京大学软件与微电子学院考研信息
- off-by-one error
- 如何创建自己的Cydia源
- 策略路由(本地策略和接口策略)
- [编程题] 头条校招
- 星巴克利用地理位置应用发布咖啡速递手机应用
- 广州穗雅医院健康汇:为什么口腔溃疡会反反复复?
- 小程序抢购页面倒计时定时器