// lex.cpp
// Glenn G. Chappell
// VERSION 3a
// 9 Feb 2009
//
// For CS 331 Spring 2009
// Source for class Lex
//  State-machine-based lexical analyzer class

#include "lex.h"    // for Lex class definition
#include <cassert>  // for assert
#include <string>
using std::string;
#include <cctype>
using std::isspace;
using std::isalpha;
using std::isdigit;
using std::isalnum;


// ************************************************************************
// class Lex - Definitions of member functions
// ************************************************************************


// next
// Returns next lexeme & advances where_
// Pre: None.
// Post:
//     Return value is lexeme beginning at index old value of where_,
//      or "" if where_ was input_.size().
//     where_ is index of start of following lexeme,
//      or else input_.size(), if none.
Lex::Lexeme Lex::next()
{
    if (done())
        return Lexeme(NONE, "");

    Token type;     // Type of current lexeme
    string s;  // For value of current lexeme

    // The following rather lengthy loop does state-machine-based
    //  lexical analysis to find the lexeme in input_ that begins
    //  at index where_. States (type State) are named according
    //  to the shortest character sequence that allows one to reach
    //  that state. START is the start state, and DONE indicates a
    //  lexeme is ready to be output, and the loop can end.
    enum State { START, LETTER, DIGIT, PLUS, MINUS, DOT,
                 DIG_DOT, PLUS_DOT, MINUS_DOT, DONE };
    State currState = START;
    while (true)
    {
        if (currState == DONE)
            break;
        char c = currChar();
        switch (currState)
        {
            case START:
                assert(c != -1);
                appendChar(s);
                if (isalpha(c) || (c == '_'))
                    currState = LETTER;
                else if (isdigit(c))
                    currState = DIGIT;
                else if (c == '+')
                    currState = PLUS;
                else if (c == '-')
                    currState = MINUS;
                else if (c == '.')
                    currState = DOT;
                else if (c >= ' ' && c <= '~')
                {
                    type = OPERATOR;
                    currState = DONE;
                }
                else
                {
                    type = ILLEGAL;
                    currState = DONE;
                }
                break;
            case LETTER:
                if (isalnum(c) || c == '_')
                    appendChar(s);
                else
                {
                    type = IDENTIFIER;
                    currState = DONE;
                }
                break;
            case DIGIT:
                if (isdigit(c))
                    appendChar(s);
                else if (c == '.')
                {
                    appendChar(s);
                    currState = DIG_DOT;
                }
                else
                {
                    type = NUMBER;
                    currState = DONE;
                }
                break;
            case PLUS:
                if (c == '+')
                {
                    appendChar(s);
                    type = OPERATOR;
                    currState = DONE;
                }
                else if (isdigit(c))
                {
                    appendChar(s);
                    currState = DIGIT;
                }
                else if (c == '.')
                {
                    appendChar(s);
                    currState = PLUS_DOT;
                }
                else
                {
                    type = OPERATOR;
                    currState = DONE;
                }
                break;
            case MINUS:
                if (c == '-')
                {
                    appendChar(s);
                    type = OPERATOR;
                    currState = DONE;
                }
                else if (isdigit(c))
                {
                    appendChar(s);
                    currState = DIGIT;
                }
                else if (c == '.')
                {
                    appendChar(s);
                    currState = MINUS_DOT;
                }
                else
                {
                    type = OPERATOR;
                    currState = DONE;
                }
                break;
            case DOT:
                if (isdigit(c))
                {
                    appendChar(s);
                    currState = DIG_DOT;
                }
                else
                {
                    type = OPERATOR;
                    currState = DONE;
                }
                break;
            case DIG_DOT:
                if (isdigit(c))
                    appendChar(s);
                else
                {
                    type = NUMBER;
                    currState = DONE;
                }
                break;
            case PLUS_DOT:
                if (isdigit(c))
                {
                    appendChar(s);
                    currState = DIG_DOT;
                }
                else
                {
                    // Back up a char & spit out "+" OP
                    --where_;
                    s.resize(s.size() - 1);  // Remove last char
                    type = OPERATOR;
                    currState = DONE;
                }
                break;
            case MINUS_DOT:
                if (isdigit(c))
                {
                    appendChar(s);
                    currState = DIG_DOT;
                }
                else
                {
                    // Back up a char & spit out "-" OP
                    --where_;
                    s.resize(s.size() - 1);  // Remove last char
                    type = OPERATOR;
                    currState = DONE;
                }
                break;
        }
    }
    skip();
    return Lexeme(type, s);
}


// skip
// Skip whitespace & comments
// Advances where to beginning of next lexeme
// or input_.size() if none.
// Pre: None.
// Post:
//     where_ is index of start of next lexeme_,
//      or else input_.size().
void Lex::skip()
{
    while (true)
    {
        // Skip whitespace
        while (where_ < input_.size() && isspace(input_[where_]))
            ++where_;
        // If not at slash, done
        if (where_ == input_.size() || input_[where_] != '/')
            return;
        // If not at double slash, done
        if (where_+1 == input_.size() || input_[where_+1] != '/')
            return;
        // Double slash! Found a comment; skip it
        where_ += 2;
        while (where_ != input_.size() && input_[where_] != '\n')
            ++where_;
    }
}
