[ New messages · Members · Forum rules · Search · RSS ]
  • Page 1 of 1
  • 1
Forum » Test category » Test forum » pyQBTokenizer
pyQBTokenizer
ZlatkoDate: Sunday, 2018-03-18, 1:31 PM | Message # 1
Sergeant
Group: Administrators
Messages: 24
Reputation: 0
Status: Offline
here is start of mod to o2


Code
string KEYWORDS[] = {"CLS","PRINT","IF","ELSE","FOR","TO","END","ENDIF","WHILE","WEND","UNTIL","DO","LOOP","THEN"}
string SYMBOLS[] = {";", "=", "(", ")", "+", "-", "*", "/", ",", "<", ">"}
string ALPHABETS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz$#"
string NUMBERS = "0123456789"
string NUMBERS_WITH_DECMALPOINT = NUMBERS +"."
ALPHANUMBERS = NUMBERS+ALPHABETS
ALPHANUMBERS_WITH_UNDERSCORE = ALPHANUMBERS+'_'

def tokenizer(codeFilePath):
   code = ''
   with open(codeFilePath,'r') as codeFile:
      for row in codeFile:
         code += row
   codeFile.close()
   i=0
   tokens = []
   while i<len(code):
      token,j='',0
      if code [i]in ALPHABETS:
         while i+j<len(code)and code[i+j] in ALPHABETS:
            token += code[i+j]
            j+=1
         if token.upper() in KEYWORDS: tokens += [(token.upper(),"KEYWORD")]
         else: tokens += [(token,"IDENTIFIER")]
         i+=j
      elif code [i]in SYMBOLS:
         tokens += [(code[i],"SYMBOL")]
         i+=1
      elif code [i]in NUMBERS:
         while i+j<len(code)and code[i+j] in NUMBERS_WITH_DECMALPOINT:
            token += code[i+j]
            j+=1
         tokens+= [(token,"NUMBER")]
         i+=j
      elif code [i]== '"':
         token = ''
         i+=1
         j=1
         while i+j<len(code)and code[i+j] != '"':
            token += code[i+j]
            j+=1
         tokens+=[(token,"STRING-LITERAL")]
         i+=j+1
      elif code [i]in [" ","   "]:
         while i+j<len(code)and code[i+j] in [" ","   "]:
            token += code[i+j]
            j+=1
         tokens+=[(token,"WHITESPACE")]
         i+=j
      elif code[i]=="\n":
         tokens+=[(code[i],"NEWLINE")]
         i+=1
      else:
         tokens+=[(code[i],"UNINDENTIFIED")]
         i+=1
   return tokens
 
Forum » Test category » Test forum » pyQBTokenizer
  • Page 1 of 1
  • 1
Search: