-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexical_analyze.py
123 lines (97 loc) · 4.16 KB
/
lexical_analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Project : Compilateur (Python) #
# #
# File : lexical_analyze.py #
# #
# Description : Lexical analyze file and functions. #
# #
# Contributors : Corentin TROADEC & Anthony Vuillemin #
# #
# Date : September 2018 #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - #
# IMPORT #
# - - - - - - - - - - - - - - - - - #
# PROJECT MODULES
from conf import *
from token import *
from utils import *
# SYSTEM MODULES
import re
# - - - - - - - - - - - - - - - - - #
# FUNCTIONS #
# - - - - - - - - - - - - - - - - - #
# Launch the lexical analyze line by line
def lexique_analyze(fullCode) :
add_pow_function(True)
# Index line
num_line = 1
# For each lines
for test_code in fullCode:
DEBUG_MSG("CURRENT CODE (Line : "+str(num_line)+") : "+test_code[:-1],"LINE") #WARNING -1 test
# Read line and build token
lexique_analyze_line(test_code,num_line)
# Next line
num_line = num_line + 1
tab_token.append(Token("toke_eop",0,0))
# Fill the token tab
def lexique_analyze_line(code,num_line) :
i = 0
while i < len(code) : #for each char in expression
current_char = code[i]
# Char is a comment
if(current_char == '#'):
break;
# Char is a space
if(current_char == " " or current_char == "\n" or current_char == "\t") :
current_toke = Token(None,None,None);
# Char recognized (ONE CHAR)
elif(current_char in hashmap_toke) :
current_toke = Token(hashmap_toke[current_char],num_line,i)
# Other char
else :
# Get the complete expression (ex : a word, an number or a keyword)
word,rank = check_identifier(code[i:])
# If keyword found
if word in hashmap_toke :
current_toke = Token(hashmap_toke[word],num_line,i)
# If number found
elif RepresentsInt(word) :
current_toke = Token("toke_const",num_line,i)
current_toke.val = int(word)
# Char unknow --> COMPILATION ERROR
elif re.match(r"[a-zA-Z0-9_]*",word) == False or word == "" :
# Take only expression with number, char and underscore
error_compilation(Token("ERR",num_line,i+1),"Incoherent char detected : "+code[i + rank]+".")
# Is a identifier
else :
current_toke = Token("toke_id",num_line,i)
current_toke.val = word
# Go to the next block
i = i + rank - 1
# Expression found is a keyword with two char
if(i+1 < len(code) ):
if(str(current_char + code[i+1]) in hashmap_toke) :
current_toke = Token(hashmap_toke[str(current_char + code[i+1])],num_line,i)
i = i + 1
# Add the token found if it isn't a None token
if(current_toke.token != None) :
tab_token.append(current_toke)
# Go to the next char
i = i + 1
# Return a identifier (composed by number or/and letter)
def check_identifier(text) :
word = ""
i = 0
# Regular expression
while i < len(text) and re.match(r"^[a-zA-Z0-9_]$",text[i]) :
word += text[i]
i = i + 1
return word,i
# Add in the lexical analyze the system function pow if add is true
def add_pow_function(add) :
text = ""
if(add) :
text += "pow_system(a,b){if(b==0){return 1;}var res;var i;res = a;for(i = 0;i < b-1;i = i + 1){res = res * a;}return res;}"
lexique_analyze_line(text,None)
return text