用Java語言實現簡單的詞法分析器

whatthe89 9年前發布 | 17K 次閱讀 Java Java開發

編譯原理中的詞法分析算是很重要的一個部分,原理比較簡單,不過網上大部分都是用C語言或者C++來編寫,筆者近期在學習Java,故用Java語言實現了簡單的詞法分析器。

要分析的代碼段如下:

輸入文件.PNG

輸出結果如下:

輸出結果(a).PNG

輸出結果(b).PNG

輸出結果(c).PNG

括號里是一個二元式:(單詞類別編碼,單詞位置編號)

代碼如下:

package Yue.LexicalAnalyzer;

import java.io.*;

/*
 * 主程序
 */
public class Main {
    public static void main(String[] args) throws IOException {
        Lexer lexer = new Lexer();
        lexer.printToken();
        lexer.printSymbolsTable();
    }
}
package Yue.LexicalAnalyzer;

import java.io.*;
import java.util.*;

/*
 * 詞法分析并輸出
 */
public class Lexer {
    /*記錄行號*/
    public static int line = 1;
    /*存放最新讀入的字符*/
    char character = ' ';

    /*保留字*/
    Hashtable<String, KeyWord> keywords = new Hashtable<String, KeyWord>();
    /*token序列*/
    private ArrayList<Token> tokens = new ArrayList<Token>();
    /*符號表*/
    private ArrayList<Symbol> symtable = new ArrayList<Symbol>();

    /*讀取文件變量*/
    BufferedReader reader = null;
    /*保存當前是否讀取到了文件的結尾*/
    private Boolean isEnd = false;

    /* 是否讀取到文件的結尾 */
    public Boolean getReaderState() {
        return this.isEnd;
    }

    /*打印tokens序列*/
    public void printToken() throws IOException {
        FileWriter writer = new FileWriter("E:\\lex.txt");
        System.out.println("詞法分析結果如下:");
        System.out.print("杜悅-2015220201031\r\n\n");
        writer.write("杜悅-2015220201031\r\n\r\n");
        while (getReaderState() == false) {
            Token tok = scan();
            String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t"
                    + tok.name + ": " + tok.toString() + "\r\n";
            writer.write(str);
            System.out.print(str);
        }
        writer.flush();

    }

    /*打印符號表*/
    public void printSymbolsTable() throws IOException {
        FileWriter writer = new FileWriter("E:\\symtab1.txt");
        System.out.print("\r\n\r\n符號表\r\n");
        System.out.print("編號\t行號\t名稱\r\n");
        writer.write("符號表\r\n");
        writer.write("編號 " + "\t行號 " + "\t名稱 \r\n");
        Iterator<Symbol> e = symtable.iterator();
        while (e.hasNext()) {
            Symbol symbol = e.next();
            String desc = symbol.pos + "\t" + symbol.line + "\t" + symbol.toString();
            System.out.print(desc + "\r\n");
            writer.write(desc + "\r\n");
        }

        writer.flush();
    }

    /*打印錯誤*/
    public void printError(Token tok) throws IOException{
        FileWriter writer = new FileWriter("E:\\error.txt");
        System.out.print("\r\n\r\n錯誤詞法如下:\r\n");
        writer.write("錯誤詞法如下:\r\n");
        String str = "line " + tok.line + "\t(" + tok.tag + "," + tok.pos + ")\t\t"
                + tok.name + ": " + tok.toString() + "\r\n";
        writer.write(str);
    }

    /*添加保留字*/
    void reserve(KeyWord w) {
        keywords.put(w.lexme, w);
    }

    public Lexer() {
        /*初始化讀取文件變量*/
        try {
            reader = new BufferedReader(new FileReader("E:\\輸入.txt"));
        } catch (IOException e) {
            System.out.print(e);
        }

        /*添加保留字*/
        this.reserve(KeyWord.begin);
        this.reserve(KeyWord.end);
        this.reserve(KeyWord.integer);
        this.reserve(KeyWord.function);
        this.reserve(KeyWord.read);
        this.reserve(KeyWord.write);
        this.reserve(KeyWord.aIf);
        this.reserve(KeyWord.aThen);
        this.reserve(KeyWord.aElse);
    }

    /*按字符讀*/
    public void readch() throws IOException {
        character = (char) reader.read();
        if ((int) character == 0xffff) {
            this.isEnd = true;
        }
    }

    /*判斷是否匹配*/
    public Boolean readch(char ch) throws IOException {
        readch();
        if (this.character != ch) {
            return false;
        }

        this.character = ' ';
        return true;
    }

    /*數字的識別*/
    public Boolean isDigit() throws IOException {
        if (Character.isDigit(character)) {
            int value = 0;
            while (Character.isDigit(character)) {
                value = 10 * value + Character.digit(character, 10);
                readch();
            }

            Num n = new Num(value);
            n.line = line;
            tokens.add(n);
            return true;
        } else
            return false;
    }

    /*保留字、標識符的識別*/
    public Boolean isLetter() throws IOException {
        if (Character.isLetter(character)) {
            StringBuffer sb = new StringBuffer();

            /*首先得到整個的一個分割*/
            while (Character.isLetterOrDigit(character)) {
                sb.append(character);
                readch();
            }

            /*判斷是保留字還是標識符*/
            String s = sb.toString();
            KeyWord w = keywords.get(s);

            /*如果是保留字的話,w不應該是空的*/
            if (w != null) {
                w.line = line;
                tokens.add(w);
            } else {
                /*否則就是標識符,此處多出記錄標識符編號的語句*/
                Symbol sy = new Symbol(s);
                Symbol mark = sy;           //用于標記已存在標識符
                Boolean isRepeat = false;
                sy.line = line;
                for (Symbol i : symtable) {
                    if (sy.toString().equals(i.toString())) {
                        mark = i;
                        isRepeat = true;
                    }
                }
                if (!isRepeat) {
                    sy.pos = symtable.size() + 1;
                    symtable.add(sy);
                } else if (isRepeat) {
                    sy.pos = mark.pos;
                }
                tokens.add(sy);
            }
            return true;
        } else
            return false;
    }

    /*符號的識別*/
    public Boolean isSign() throws IOException {
        switch (character) {
            case '#':
                readch();
                AllEnd.allEnd.line = line;
                tokens.add(AllEnd.allEnd);
                return true;
            case '\r':
                if (readch('\n')) {
                    readch();
                    LineEnd.lineEnd.line = line;
                    tokens.add(LineEnd.lineEnd);
                    line++;
                    return true;
                }
            case '(':
                readch();
                Delimiter.lpar.line = line;
                tokens.add(Delimiter.lpar);
                return true;
            case ')':
                readch();
                Delimiter.rpar.line = line;
                tokens.add(Delimiter.rpar);
                return true;
            case ';':
                readch();
                Delimiter.sem.line = line;
                tokens.add(Delimiter.sem);
                return true;
            case '+':
                readch();
                CalcWord.add.line = line;
                tokens.add(CalcWord.add);
                return true;
            case '-':
                readch();
                CalcWord.sub.line = line;
                tokens.add(CalcWord.sub);
                return true;
            case '*':
                readch();
                CalcWord.mul.line = line;
                tokens.add(CalcWord.mul);
                return true;
            case '/':
                readch();
                CalcWord.div.line = line;
                tokens.add(CalcWord.div);
                return true;
            case ':':
                if (readch('=')) {
                    readch();
                    CalcWord.assign.line = line;
                    tokens.add(CalcWord.assign);
                    return true;
                }
                break;
            case '>':
                if (readch('=')) {
                    readch();
                    CalcWord.ge.line = line;
                    tokens.add(CalcWord.ge);
                    return true;
                }
                break;
            case '<':
                if (readch('=')) {
                    readch();
                    CalcWord.le.line = line;
                    tokens.add(CalcWord.le);
                    return true;
                }
                break;
            case '!':
                if (readch('=')) {
                    readch();
                    CalcWord.ne.line = line;
                    tokens.add(CalcWord.ne);
                    return true;
                }
                break;
        }
        return false;
    }


    /*下面開始分割關鍵字,標識符等信息*/
    public Token scan() throws IOException {
        Token tok;
        while (character == ' ')
            readch();
        if (isDigit() || isSign() || isLetter()) {
            tok = tokens.get(tokens.size() - 1);
        } else {
            tok = new Token(character);
            printError(tok);
        }
        return tok;
    }
}
package Yue.LexicalAnalyzer;

/*
 * Token父類
 */
public class Token {
    public final int tag;
    public int line = 1;
    public String name = "";
    public int pos = 0;

    public Token(int t) {
        this.tag = t;
    }

    public String toString() {
        return "" + (char) tag;
    }

}
package Yue.LexicalAnalyzer;

/*
 * 單詞類別賦值
 */
public class Tag {
    public final static int
            BEGIN = 1,          //保留字
            END = 2,            //保留字
            INTEGER = 3,        //保留字
            FUNCTION = 4,       //保留字
            READ = 5,           //保留字
            WRITE = 6,          //保留字
            IF = 7,             //保留字
            THEN = 8,           //保留字
            ELSE = 9,           //保留字
            SYMBOL = 11,        //標識符
            CONSTANT = 12,      //常數
            ADD = 13,           //運算符 "+"
            SUB = 14,           //運算符 "-"
            MUL = 15,           //運算符 "*"
            DIV = 16,           //運算符 "/"
            LE = 18,            //運算符 "<="
            GE = 19,            //運算符 ">="
            NE = 20,            //運算符 "!="
            ASSIGN = 23,        //運算符 ":="
            LPAR = 24,          //界符 "("
            RPAR = 25,          //界符 ")"
            SEM = 26,           //界符 ";"
            LINE_END = 27,      //行尾符
            ALL_END = 28;       //結尾符 "#"
}
package Yue.LexicalAnalyzer;

/**
 * 保留字
 */
public class KeyWord extends Token {
    public String lexme = "";

    public KeyWord(String s, int t) {
        super(t);
        this.lexme = s;
        this.name = "保留字";
    }

    public String toString() {
        return this.lexme;
    }

    public static final KeyWord
            begin = new KeyWord("begin", Tag.BEGIN),
            end = new KeyWord("end", Tag.END),
            integer = new KeyWord("integer", Tag.INTEGER),
            function = new KeyWord("function", Tag.FUNCTION),
            read = new KeyWord("read", Tag.READ),
            write = new KeyWord("write", Tag.WRITE),
            aIf = new KeyWord("if", Tag.IF),
            aThen = new KeyWord("then", Tag.THEN),
            aElse = new KeyWord("else", Tag.ELSE);
}
package Yue.LexicalAnalyzer;

/*
 * 標識符
 */
public class Symbol extends Token {
    public String lexme = "";

    public Symbol(String s) {
        super(Tag.SYMBOL);
        this.lexme = s;
        this.name = "標識符";
    }

    public String toString() {
        return this.lexme;
    }

}
package Yue.LexicalAnalyzer;

/**
 * 運算符
 */
public class CalcWord extends Token {
    public String lexme = "";

    public CalcWord(String s, int t) {
        super(t);
        this.lexme = s;
        this.name = "運算符";
    }

    public String toString() {
        return this.lexme;
    }

    public static final CalcWord
            add = new CalcWord("+", Tag.ADD),
            sub = new CalcWord("-", Tag.SUB),
            mul = new CalcWord("*", Tag.MUL),
            div = new CalcWord("/", Tag.DIV),
            le = new CalcWord("<=", Tag.LE),
            ge = new CalcWord(">=", Tag.GE),
            ne = new CalcWord("!=", Tag.NE),
            assign = new CalcWord(":=", Tag.ASSIGN);
}
package Yue.LexicalAnalyzer;

/**
 * 界符
 */
public class Delimiter extends Token {
    public String lexme = "";

    public Delimiter(String s, int t) {
        super(t);
        this.lexme = s;
        this.name = "界符";
    }

    public String toString() {
        return this.lexme;
    }

    public static final Delimiter
            lpar = new Delimiter("(", Tag.LPAR),
            rpar = new Delimiter(")", Tag.RPAR),
            sem = new Delimiter(";", Tag.SEM);
}
package Yue.LexicalAnalyzer;

/*
 * 常數
 */
public class Num extends Token {
    public final int value;

    public Num(int v) {
        super(Tag.CONSTANT);
        this.value = v;
        this.name = "常數";
    }

    public String toString() {
        return "" + value;
    }
}
package Yue.LexicalAnalyzer;

/**
 * 行尾符
 */
public class LineEnd extends Token {
    public String lexme = "";

    public LineEnd(String s) {
        super(Tag.LINE_END);
        this.lexme = s;
        this.name = "行尾符";
    }

    public String toString() {
        return this.lexme;
    }

    public static final LineEnd lineEnd = new LineEnd("\r\n");
}
package Yue.LexicalAnalyzer;

/**
 * 結尾符
 */
public class AllEnd extends Token {
    public String lexme = "";

    public AllEnd(String s) {
        super(Tag.ALL_END);
        this.lexme = s;
        this.name = "結尾符";
    }

    public String toString() {
        return this.lexme;
    }

    public static final AllEnd allEnd = new AllEnd("#");
}

 

來自:http://www.jianshu.com/p/209f1fb6a827

 

 本文由用戶 whatthe89 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!