/*
 * JLex.java
 *
 * Created on Wed Dec 15 02:42:58 IST 2004
 */

package compiler;

import java.io.*;
import compiler.Token;
import compiler.TokenConstants;

/**
  * @author Sarangan Rajamanickam
  */


public class JLex implements TokenConstants{

    private FileReader inputfile;
    private PushbackReader input;
    private int location;
    private int temp;


    /**
     * Constructor for the class JLex.
     *
     * @param filename Name of the file to be seperated into tokens.
     * @exception FileNotFoundException.
     */    
    public JLex(String filename){
        try{
            inputfile=new FileReader(filename);
            input=new PushbackReader(inputfile);
        }catch(FileNotFoundException filenotfoundexception){
            System.out.println(filenotfoundexception);
        }
        location=0;
        temp=1;
        location=temp*1000;
    }

    /**
     * Actual function that generates tokens.
     *
     * @exception IOException.
     */

    public Token getToken() throws IOException{
        int ch;
        String temp_word="";

        /*
         * Ignore Whitespaces and Tabstops and Newline
         *
         */
        do{
            ch=input.read();
            if(ch=='\n'){
                temp++;
                location=temp*1000;
            }else{
                location++;
            }
        }while(Character.isWhitespace((char)ch) || ch=='\n');

        /*
         * Identifiers should start only with alphabets.Then they may have numbers or alphabets.
         * '_','$' is the only special character allowed.
         *
         */
        if(Character.isJavaIdentifierStart((char)ch)){
            temp_word+=(char)ch;
            location++;
            for(ch=input.read();Character.isJavaIdentifierPart((char)ch);ch=input.read()){
                temp_word+=(char)ch;
                location++;
            }
            input.unread(ch);
            int kind;
            kind=this.isKeyWord(temp_word);
            if(temp_word.equals("true")){
                return(new Token(kind,temp_word,1,location-temp_word.length()));
            }else{
                if(temp_word.equals("false")){
                    return(new Token(kind,temp_word,0,location-temp_word.length()));
                }else{
                    return(new Token(kind,temp_word,location-temp_word.length()));
                }
            }
        }

        /*
         * Numbers must be identified seperately.
         *
         * Only Integer Literals are allowed in our grammar.
         *
         */

        if(ch>=48 && ch<=57){
            temp_word+=(char)ch;
            location++;
            while((ch=input.read())>=48 && ch<=57){
                temp_word+=(char)ch;
                location++;
            }
            input.unread(ch);
            return(new Token(INTEGERLITERAL,temp_word,Integer.parseInt(temp_word),location-temp_word.length()));
        }

        /*
         * Symbols are identified seperately in this section.
         * 14 Symbols are used in the grammar.
         */
        if(ch=='{'){// LEFTSETBRACKET
            location++;
            temp_word+=(char)ch;
            return(new Token(LEFTSETBRACKET,temp_word,location));
        }    
        if(ch=='}'){// RIGHTSETBRACKET
            location++;
            temp_word+=(char)ch;
            return(new Token(RIGHTSETBRACKET,temp_word,location));
        }
        if(ch==';'){// SEMICOLON
            location++;
            temp_word+=(char)ch;
            return(new Token(SEMICOLON,temp_word,location));
        }
        if(ch==','){// COMMA
            location++;
            temp_word+=(char)ch;
            return(new Token(COMMA,temp_word,location));
        }
        if(ch=='='){// EQUALS
            location++;
            temp_word+=(char)ch;
            if((ch=input.read())=='='){//EQUIVALENT
                location++;
                temp_word+=(char)ch;
                return(new Token(EQUIVALENT,temp_word,location));
            }
            input.unread(ch);
            return(new Token(EQUALS,temp_word,location));
        }
        if(ch=='('){// LEFTBRACES
            location++;
            temp_word+=(char)ch;
            return(new Token(LEFTBRACES,temp_word,location));
        }
        if(ch==')'){// RIGHTBRACES
            location++;
            temp_word+=(char)ch;
            return(new Token(RIGHTBRACES,temp_word,location));
        }
        if(ch=='.'){// DOT
            location++;
            temp_word+=(char)ch;
            return(new Token(DOT,temp_word,location));
        }
        if(ch=='+'){// PLUS
            location++;
            temp_word+=(char)ch;
            return(new Token(PLUS,temp_word,location));
        }
        if(ch=='-'){// MINUS
            location++;
            temp_word+=(char)ch;
            return(new Token(MINUS,temp_word,location));
        }
        if(ch=='*'){// ASTRIEK
            location++;
            temp_word+=(char)ch;
            return(new Token(ASTRIEK,temp_word,location));
        }
        if(ch=='/'){// DIVIDES
            location++;
            temp_word+=(char)ch;
            return(new Token(DIVIDES,temp_word,location));            
        }
        if(ch=='%'){//MODULO
            location++;
            temp_word+=(char)ch;
            return(new Token(MODULO,temp_word,location));
        }
        if(ch=='<'){// LESSTHAN
            location++;
            temp_word+=(char)ch;
            if((ch=input.read())=='='){//LESSEQUALS
                location++;
                temp_word+=(char)ch;
                return(new Token(LESSEQUALS,temp_word,location));
            }
            input.unread(ch);
            return(new Token(LESSTHAN,temp_word,location));
        }
        if(ch=='>'){// GREATERTHAN
            location++;
            temp_word+=(char)ch;
            if((ch=input.read())=='='){//GREATEREQUALS
                location++;
                temp_word+=(char)ch;
                return(new Token(GREATEREQUALS,temp_word,location));
            }
            input.unread(ch);
            return(new Token(GREATERTHAN,temp_word,location));
        }
        if(ch=='!'){//NOT
            location++;
            temp_word+=(char)ch;
            if((ch=input.read())=='='){//NOTEQUIVALENT
                location++;
                temp_word+=(char)ch;
                return(new Token(NOTEQUIVALENT,temp_word,location));
            }
            input.unread(ch);
            return(new Token(NOT,temp_word,location));
        }
        if(ch=='&'){//AND start
            location++;
            temp_word+=(char)ch;
            if((ch=input.read())=='&'){//AND end
                location++;
                temp_word+=(char)ch;
                return(new Token(AND,temp_word,location));
            }
            input.unread(ch);
            return(new Token(ERROR,temp_word,location));
        }
        if(ch=='|'){//OR start
            location++;
            temp_word+=(char)ch;
            if((ch=input.read())=='|'){//OR end
                location++;
                temp_word+=(char)ch;
                return(new Token(OR,temp_word,location));
            }
            input.unread(ch);
            return(new Token(ERROR,temp_word,location));
        }

        
        /* -1 indicates the end of the file */
        if(ch==-1){
            location++;
            temp_word+=(char)ch;
            return(new Token(EOF,"eof",location));
        }

        location++;
        temp_word+=(char)ch;
        return(new Token(ERROR,temp_word,location));
    }

    /**
     * Checks that the tokenized string is a Keyword or Identifier.
     *
     * @return Returns true if it is a Keyword. Returns false if it is a identifier.
     *
     * @param word the actual token to be checked.
     */
    public int isKeyWord(String word){
        if(word.equals("public")) return PUBLIC;
        if(word.equals("private"))return PRIVATE;
        if(word.equals("protected"))return PROTECTED;
        if(word.equals("class"))return CLASS;
        if(word.equals("static"))return STATIC;
        if(word.equals("new"))return NEW;
        if(word.equals("void"))return VOID;
        if(word.equals("this"))return THIS;
        if(word.equals("return"))return RETURN;
        if(word.equals("final"))return FINAL;
        if(word.equals("int"))return INT;
        //if(word.equals("char"))return CHAR;
        if(word.equals("boolean"))return BOOLEAN;
        if(word.equals("if"))return IF;
        if(word.equals("else"))return ELSE;
        if(word.equals("while"))return WHILE;
        if(word.equals("true"))return BOOLEANLITERAL;
        if(word.equals("false"))return BOOLEANLITERAL;
        if(word.equals("null"))return NULLLITERAL;
        return IDENTIFIER;
    }
}
Hosted by www.Geocities.ws

1