// lex.h
// Glenn G. Chappell
// VERSION 3a
// 9 Feb 2009
//
// For CS 331 Spring 2009
// Header for class Lex
//  State-machine-based lexical analyzer class

#ifndef LEX_H_INCLUDED
#define LEX_H_INCLUDED

#include <string>   // for std::string
#include <cstdlib>  // for std::size_t
#include <utility>  // for std::pair


// ************************************************************************
// class Lex - Class definition
// ************************************************************************


// class Lex
// Lexical analyzer class
// Lexemes are as described by CS 311 handout (4 Feb 2009).
// Invariants:
//     0 <= where_ <= input_.size().
//     If where_ != input_.size(), then input_[where_]
//      if the start of a lexeme (as described in handout).
class Lex {

// ***** Lex: Types *****
public:

    enum Token { NONE, IDENTIFIER, OPERATOR, NUMBER, ILLEGAL };
    typedef std::pair<Token, std::string> Lexeme;

// ***** Lex: Ctors, dctor, op= *****
public:

    // Default ctor and ctor from string
    // Pre: None.
    // Post:
    //     input_ == input.
    //     where_ is index of start of first lexeme,
    //      or input_.size() if no lexemes in input.
    Lex(const std::string & input = "")
        :input_(),
         where_()
    { setinput(input); }

    // Compiler-generated copy ctor, copy =, dctor used

// ***** Lex: General public functions *****
public:

    // setinput
    // Sets input "program" to given string.
    // Pre: None.
    // Post:
    //     input_ == input.
    //     where_ is index of start of first lexeme,
    //      or input_.size() if no lexemes in input.
    void setinput(const std::string & input)
    {
        input_ = input;
        where_ = 0;
        skip();
    }

    // done
    // Returns true if no further lexemes
    // Pre: None.
    // Post:
    //     Return value is true if where_ == input_.size().
    bool done() const
    { return where_ == input_.size(); }

    // next
    // Returns next lexeme & advances where_
    // Pre: None.
    // Post:
    //     Return value is lexeme beginning at index old value of where_,
    //      or "" if where_ was input_.size().
    //     where_ is index of start of following lexeme,
    //      or else input_.size(), if none.
    Lexeme next();

// ***** Lex: Internal-use functions *****
private:

    // skip
    // Skip whitespace & comments
    // Advances where to beginning of next lexeme
    // or input_.size() if none.
    // Pre: None.
    // Post:
    //     where_ is index of start of next lexeme_,
    //      or else input_.size().
    void skip();

    // currchar
    // Return current character (input_[where_]),
    //  or -1 if where_ == input_.size().
    // Pre: None.
    // Post:
    //     if original where_ is input_.size(), then return is -1.
    //     Otherwise, return is int(input_[where_]).
    int currChar() const
    {
        if (where_ == input_.size())
            return -1;
        return int(input_[where_]);
    }

    // appendChar
    // Append current character (input_[where_]) to given string,
    //  and advance where_.
    // Pre:
    //     where_ != input_.size().
    // Post:
    //     input_[OLD where_] has been appended to s.
    //     where_ has been advanced.
    void appendChar(std::string & s)
    {
        s.push_back(input_[where_]);
        ++where_;
    }

// ***** Lex: data members *****
private:

    std::string input_;  // "Program" to analyze
    std::size_t where_;  // Index of start of next lexeme in input_,
                         //  or else input_.size().

};  // end of class Lex


#endif // LEX_H_INCLUDED
