302 lines
5.6 KiB
Java
302 lines
5.6 KiB
Java
/*
|
|
* @(#)Scanner.java 2.1 2003/10/07
|
|
*
|
|
* Copyright (C) 1999, 2003 D.A. Watt and D.F. Brown
|
|
* Dept. of Computing Science, University of Glasgow, Glasgow G12 8QQ Scotland
|
|
* and School of Computer and Math Sciences, The Robert Gordon University,
|
|
* St. Andrew Street, Aberdeen AB25 1HG, Scotland.
|
|
* All rights reserved.
|
|
*
|
|
* This software is provided free for educational use only. It may
|
|
* not be used for commercial purposes without the prior written permission
|
|
* of the authors.
|
|
*/
|
|
|
|
package triangle.syntacticAnalyzer;
|
|
|
|
public final class Scanner {
|
|
|
|
private SourceFile sourceFile;
|
|
private boolean debug;
|
|
|
|
private char currentChar;
|
|
private StringBuffer currentSpelling;
|
|
private boolean currentlyScanningToken;
|
|
|
|
private boolean isLetter(char c) {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
|
}
|
|
|
|
private boolean isDigit(char c) {
|
|
return (c >= '0' && c <= '9');
|
|
}
|
|
|
|
// isOperator returns true iff the given character is an operator character.
|
|
|
|
private boolean isOperator(char c) {
|
|
return (c == '+' || c == '-' || c == '*' || c == '/' || c == '=' || c == '<' || c == '>' || c == '\\'
|
|
|| c == '&' || c == '@' || c == '%' || c == '^' || c == '?' || c == '|');
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
public Scanner(SourceFile source) {
|
|
sourceFile = source;
|
|
currentChar = sourceFile.getSource();
|
|
debug = false;
|
|
}
|
|
|
|
public void enableDebugging() {
|
|
debug = true;
|
|
}
|
|
|
|
// takeIt appends the current character to the current token, and gets
|
|
// the next character from the source program.
|
|
|
|
private void takeIt() {
|
|
if (currentlyScanningToken)
|
|
currentSpelling.append(currentChar);
|
|
currentChar = sourceFile.getSource();
|
|
}
|
|
|
|
// scanSeparator skips a single separator.
|
|
|
|
private void scanSeparator() {
|
|
switch (currentChar) {
|
|
|
|
// ! comment
|
|
case '!': {
|
|
takeIt();
|
|
while ((currentChar != SourceFile.EOL) && (currentChar != SourceFile.EOT))
|
|
takeIt();
|
|
if (currentChar == SourceFile.EOL)
|
|
takeIt();
|
|
}
|
|
break;
|
|
|
|
// new type of comment, the # comment, same as before when it comes to code
|
|
case '#':{
|
|
takeIt();
|
|
while((currentChar != SourceFile.EOL) && (currentChar != SourceFile.EOT))
|
|
takeIt();
|
|
if(currentChar == SourceFile.EOL)
|
|
takeIt();
|
|
}
|
|
break;
|
|
|
|
// new type of comment, the multi-line $ comment
|
|
case '$':{
|
|
takeIt();
|
|
//check if it is not the comment char or EOT, skipping new lines (no EOL)
|
|
while((currentChar != '$') && (currentChar != SourceFile.EOT))
|
|
takeIt();
|
|
//if we reach another $, takeIt and move on, we've reached the end of the multi-line comment
|
|
if(currentChar == '$')
|
|
takeIt();
|
|
}
|
|
break;
|
|
|
|
// whitespace
|
|
case ' ':
|
|
case '\n':
|
|
case '\r':
|
|
case '\t':
|
|
takeIt();
|
|
break;
|
|
}
|
|
}
|
|
|
|
private int scanToken() {
|
|
|
|
switch (currentChar) {
|
|
|
|
case 'a':
|
|
case 'b':
|
|
case 'c':
|
|
case 'd':
|
|
case 'e':
|
|
case 'f':
|
|
case 'g':
|
|
case 'h':
|
|
case 'i':
|
|
case 'j':
|
|
case 'k':
|
|
case 'l':
|
|
case 'm':
|
|
case 'n':
|
|
case 'o':
|
|
case 'p':
|
|
case 'q':
|
|
case 'r':
|
|
case 's':
|
|
case 't':
|
|
case 'u':
|
|
case 'v':
|
|
case 'w':
|
|
case 'x':
|
|
case 'y':
|
|
case 'z':
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
case 'D':
|
|
case 'E':
|
|
case 'F':
|
|
case 'G':
|
|
case 'H':
|
|
case 'I':
|
|
case 'J':
|
|
case 'K':
|
|
case 'L':
|
|
case 'M':
|
|
case 'N':
|
|
case 'O':
|
|
case 'P':
|
|
case 'Q':
|
|
case 'R':
|
|
case 'S':
|
|
case 'T':
|
|
case 'U':
|
|
case 'V':
|
|
case 'W':
|
|
case 'X':
|
|
case 'Y':
|
|
case 'Z':
|
|
takeIt();
|
|
while (isLetter(currentChar) || isDigit(currentChar))
|
|
takeIt();
|
|
return Token.IDENTIFIER;
|
|
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
takeIt();
|
|
while (isDigit(currentChar))
|
|
takeIt();
|
|
return Token.INTLITERAL;
|
|
|
|
case '+':
|
|
case '-':
|
|
case '*':
|
|
case '/':
|
|
case '=':
|
|
case '<':
|
|
case '>':
|
|
case '\\':
|
|
case '&':
|
|
case '@':
|
|
case '%':
|
|
case '^':
|
|
case '?':
|
|
takeIt();
|
|
while (isOperator(currentChar))
|
|
takeIt();
|
|
return Token.OPERATOR;
|
|
|
|
// TODO: Bar operator, week 3
|
|
case '|':
|
|
takeIt();
|
|
while(isOperator(currentChar))
|
|
takeIt();
|
|
return Token.OPERATOR;
|
|
|
|
case '\'':
|
|
takeIt();
|
|
takeIt(); // the quoted character
|
|
if (currentChar == '\'') {
|
|
takeIt();
|
|
return Token.CHARLITERAL;
|
|
} else
|
|
return Token.ERROR;
|
|
|
|
case '.':
|
|
takeIt();
|
|
return Token.DOT;
|
|
|
|
case ':':
|
|
takeIt();
|
|
if (currentChar == '=') {
|
|
takeIt();
|
|
return Token.BECOMES;
|
|
} else
|
|
return Token.COLON;
|
|
|
|
case ';':
|
|
takeIt();
|
|
return Token.SEMICOLON;
|
|
|
|
case ',':
|
|
takeIt();
|
|
return Token.COMMA;
|
|
|
|
case '~':
|
|
takeIt();
|
|
return Token.IS;
|
|
|
|
case '(':
|
|
takeIt();
|
|
return Token.LPAREN;
|
|
|
|
case ')':
|
|
takeIt();
|
|
return Token.RPAREN;
|
|
|
|
case '[':
|
|
takeIt();
|
|
return Token.LBRACKET;
|
|
|
|
case ']':
|
|
takeIt();
|
|
return Token.RBRACKET;
|
|
|
|
case '{':
|
|
takeIt();
|
|
return Token.LCURLY;
|
|
|
|
case '}':
|
|
takeIt();
|
|
return Token.RCURLY;
|
|
|
|
case SourceFile.EOT:
|
|
return Token.EOT;
|
|
|
|
default:
|
|
takeIt();
|
|
return Token.ERROR;
|
|
}
|
|
}
|
|
|
|
public Token scan() {
|
|
Token tok;
|
|
SourcePosition pos;
|
|
int kind;
|
|
|
|
currentlyScanningToken = false;
|
|
// skip any whitespace or comments
|
|
while (currentChar == '!' || currentChar == ' ' || currentChar == '\n' || currentChar == '\r'
|
|
|| currentChar == '\t' || currentChar == '#' || currentChar == '$')
|
|
scanSeparator();
|
|
|
|
currentlyScanningToken = true;
|
|
currentSpelling = new StringBuffer("");
|
|
pos = new SourcePosition();
|
|
pos.start = sourceFile.getCurrentLine();
|
|
|
|
kind = scanToken();
|
|
|
|
pos.finish = sourceFile.getCurrentLine();
|
|
tok = new Token(kind, currentSpelling.toString(), pos);
|
|
if (debug)
|
|
System.out.println(tok);
|
|
return tok;
|
|
}
|
|
|
|
}
|