/*
* JLex.java
*
* Created on Wed Dec 15 02:42:58 IST 2004
*/
package compiler;
import java.io.*;
import compiler.Token;
import compiler.TokenConstants;
/**
* @author Sarangan Rajamanickam
*/
public class JLex implements TokenConstants{
private FileReader inputfile;
private PushbackReader input;
private int location;
private int temp;
/**
* Constructor for the class JLex.
*
* @param filename Name of the file to be seperated
into tokens.
* @exception FileNotFoundException.
*/
public JLex(String filename){
try{
inputfile=new
FileReader(filename);
input=new
PushbackReader(inputfile);
}catch(FileNotFoundException filenotfoundexception){
System.out.println(filenotfoundexception);
}
location=0;
temp=1;
location=temp*1000;
}
/**
* Actual function that generates tokens.
*
* @exception IOException.
*/
public Token getToken() throws IOException{
int ch;
String temp_word="";
/*
* Ignore Whitespaces and
Tabstops and Newline
*
*/
do{
ch=input.read();
if(ch=='\n'){
temp++;
location=temp*1000;
}else{
location++;
}
}while(Character.isWhitespace((char)ch)
|| ch=='\n');
/*
* Identifiers should start
only with alphabets.Then they may have numbers or alphabets.
* '_','$' is the only special
character allowed.
*
*/
if(Character.isJavaIdentifierStart((char)ch)){
temp_word+=(char)ch;
location++;
for(ch=input.read();Character.isJavaIdentifierPart((char)ch);ch=input.read()){
temp_word+=(char)ch;
location++;
}
input.unread(ch);
int kind;
kind=this.isKeyWord(temp_word);
if(temp_word.equals("true")){
return(new Token(kind,temp_word,1,location-temp_word.length()));
}else{
if(temp_word.equals("false")){
return(new Token(kind,temp_word,0,location-temp_word.length()));
}else{
return(new Token(kind,temp_word,location-temp_word.length()));
}
}
}
/*
* Numbers must be identified
seperately.
*
* Only Integer Literals
are allowed in our grammar.
*
*/
if(ch>=48 && ch<=57){
temp_word+=(char)ch;
location++;
while((ch=input.read())>=48
&& ch<=57){
temp_word+=(char)ch;
location++;
}
input.unread(ch);
return(new
Token(INTEGERLITERAL,temp_word,Integer.parseInt(temp_word),location-temp_word.length()));
}
/*
* Symbols are identified
seperately in this section.
* 14 Symbols are used in
the grammar.
*/
if(ch=='{'){// LEFTSETBRACKET
location++;
temp_word+=(char)ch;
return(new
Token(LEFTSETBRACKET,temp_word,location));
}
if(ch=='}'){// RIGHTSETBRACKET
location++;
temp_word+=(char)ch;
return(new
Token(RIGHTSETBRACKET,temp_word,location));
}
if(ch==';'){// SEMICOLON
location++;
temp_word+=(char)ch;
return(new
Token(SEMICOLON,temp_word,location));
}
if(ch==','){// COMMA
location++;
temp_word+=(char)ch;
return(new
Token(COMMA,temp_word,location));
}
if(ch=='='){// EQUALS
location++;
temp_word+=(char)ch;
if((ch=input.read())=='='){//EQUIVALENT
location++;
temp_word+=(char)ch;
return(new Token(EQUIVALENT,temp_word,location));
}
input.unread(ch);
return(new
Token(EQUALS,temp_word,location));
}
if(ch=='('){// LEFTBRACES
location++;
temp_word+=(char)ch;
return(new
Token(LEFTBRACES,temp_word,location));
}
if(ch==')'){// RIGHTBRACES
location++;
temp_word+=(char)ch;
return(new
Token(RIGHTBRACES,temp_word,location));
}
if(ch=='.'){// DOT
location++;
temp_word+=(char)ch;
return(new
Token(DOT,temp_word,location));
}
if(ch=='+'){// PLUS
location++;
temp_word+=(char)ch;
return(new
Token(PLUS,temp_word,location));
}
if(ch=='-'){// MINUS
location++;
temp_word+=(char)ch;
return(new
Token(MINUS,temp_word,location));
}
if(ch=='*'){// ASTRIEK
location++;
temp_word+=(char)ch;
return(new
Token(ASTRIEK,temp_word,location));
}
if(ch=='/'){// DIVIDES
location++;
temp_word+=(char)ch;
return(new
Token(DIVIDES,temp_word,location));
}
if(ch=='%'){//MODULO
location++;
temp_word+=(char)ch;
return(new
Token(MODULO,temp_word,location));
}
if(ch=='<'){// LESSTHAN
location++;
temp_word+=(char)ch;
if((ch=input.read())=='='){//LESSEQUALS
location++;
temp_word+=(char)ch;
return(new Token(LESSEQUALS,temp_word,location));
}
input.unread(ch);
return(new
Token(LESSTHAN,temp_word,location));
}
if(ch=='>'){// GREATERTHAN
location++;
temp_word+=(char)ch;
if((ch=input.read())=='='){//GREATEREQUALS
location++;
temp_word+=(char)ch;
return(new Token(GREATEREQUALS,temp_word,location));
}
input.unread(ch);
return(new
Token(GREATERTHAN,temp_word,location));
}
if(ch=='!'){//NOT
location++;
temp_word+=(char)ch;
if((ch=input.read())=='='){//NOTEQUIVALENT
location++;
temp_word+=(char)ch;
return(new Token(NOTEQUIVALENT,temp_word,location));
}
input.unread(ch);
return(new
Token(NOT,temp_word,location));
}
if(ch=='&'){//AND start
location++;
temp_word+=(char)ch;
if((ch=input.read())=='&'){//AND
end
location++;
temp_word+=(char)ch;
return(new Token(AND,temp_word,location));
}
input.unread(ch);
return(new
Token(ERROR,temp_word,location));
}
if(ch=='|'){//OR start
location++;
temp_word+=(char)ch;
if((ch=input.read())=='|'){//OR
end
location++;
temp_word+=(char)ch;
return(new Token(OR,temp_word,location));
}
input.unread(ch);
return(new
Token(ERROR,temp_word,location));
}
/* -1 indicates the end of the
file */
if(ch==-1){
location++;
temp_word+=(char)ch;
return(new
Token(EOF,"eof",location));
}
location++;
temp_word+=(char)ch;
return(new Token(ERROR,temp_word,location));
}
/**
* Checks that the tokenized string is a Keyword
or Identifier.
*
* @return Returns true if it is a Keyword. Returns
false if it is a identifier.
*
* @param word the actual token to be checked.
*/
public int isKeyWord(String word){
if(word.equals("public")) return
PUBLIC;
if(word.equals("private"))return
PRIVATE;
if(word.equals("protected"))return
PROTECTED;
if(word.equals("class"))return
CLASS;
if(word.equals("static"))return
STATIC;
if(word.equals("new"))return NEW;
if(word.equals("void"))return
VOID;
if(word.equals("this"))return
THIS;
if(word.equals("return"))return
RETURN;
if(word.equals("final"))return
FINAL;
if(word.equals("int"))return INT;
//if(word.equals("char"))return
CHAR;
if(word.equals("boolean"))return
BOOLEAN;
if(word.equals("if"))return IF;
if(word.equals("else"))return
ELSE;
if(word.equals("while"))return
WHILE;
if(word.equals("true"))return
BOOLEANLITERAL;
if(word.equals("false"))return
BOOLEANLITERAL;
if(word.equals("null"))return
NULLLITERAL;
return IDENTIFIER;
}
}