If you have comments or questions concerning this source file, discuss them in the forum.
/*
Copyright (c) 2002 Nicolai Haehnle

See the license.txt for details. If that file was not included in the
source distributions, please email <prefect@rtts.org>
*/
// lib_parser.cpp

#include "library.h"

/*
The parser understands C-style comments, C++-style comments (//)
and hashes as rest-of-line comments.

Normal strings are sequences of non-space characters which begin
with a letter or an underscore _

Quoted strings are a series of characters enclosed by "" C-style
escaping is performed inside. Supported escapes:
    \n      new-line character
    \t      tabulator
    \\      the backslash itself

The parser understands ints and floats, as parsed by the C-library.
Values with a 0 fractional part are returned as int, even if they
are written as e.g. 3.00.

The following characters are considered special control characters
and returned as single tokens:

{ } ( ) [ ]
+ - * / % =
& | ^ ~ ?
: ; , .

Note that some of these characters may have a special meaning when
they appear inside numericals.

Every other character without a special meaning such as " and ' are
considered errors.

Combinations of those characters (e.g. ++) are returned seperately,
unless they have a special meaning (e.g. //).

Note that you can change which characters are considered special
characters.
*/

static const char *control_chars = "{}()[]+-*/%=&|^~?:;,.";

struct l_buffer_s {
    l_buffer_t  *parent;

    int     linenum;
    char    name[MAX_OSPATH];

    char    *ptr;
    char    data[1];
};

/*
==============
LParser::LParser

Initialize with an empty buffer and the default
control character set
==============
*/
LParser::LParser()
{
    strcpy(m_szBestName, "<empty>");
    m_pszControlChars = control_chars;
    m_pBuffer = 0;
    tok.type = 0;
}

/*
==============
LParser::~LParser

Free any remaining buffers
==============
*/
LParser::~LParser()
{
    while(m_pBuffer)
        EndBuffer();
}

/*
==============
LParser::Error

Return an error object with filename and line number info added
==============
*/
LError LParser::Error(const char *fmt, ...)
{
    char buf[256];
    va_list va;

    va_start(va, fmt);
    vsnprintf(buf, sizeof(buf), fmt, va);
    va_end(va);

    if (m_pBuffer)
        return LError("%s, %i: %s", m_pBuffer->name, m_pBuffer->linenum, buf);
    else
        return LError("%s: %s", m_szBestName, buf);
}

/*
==============
LParser::EndBuffer

Free the current buffer and return to its parent
==============
*/
void LParser::EndBuffer()
{
    l_buffer_t *pBuf;

    lassert(m_pBuffer);

    pBuf = m_pBuffer;
    m_pBuffer = pBuf->parent;
    if (m_pBuffer)
        xstrcpy(m_szBestName, sizeof(m_szBestName), m_pBuffer->name);

    L_Free(pBuf);
}

/*
==============
LParser::AddBuffer

Add a new buffer to the chain. The buffer will be used as soon
as the current buffer returns.
==============
*/
void LParser::AddBuffer(const char *pszName, const char *pszData)
{
    l_buffer_t *pBuf, **pp;
    int len;

    len = strlen(pszData);

    pBuf = (l_buffer_t *)L_Malloc(sizeof(l_buffer_t)+len+1, TAG_PARSER);
    pBuf->parent = 0;
    xstrcpy(pBuf->name, sizeof(pBuf->name), pszName);
    pBuf->linenum = 1;

    // The +1 is correct; it's kind of a hack to allow inplace escaping
    // of strings etc.. without running into trouble if the very first
    // line of the buffer contains something like "foo{"
    pBuf->ptr = pBuf->data+1;
    memcpy(pBuf->data+1, pszData, len+1);

    for(pp = &m_pBuffer; *pp; pp = &(*pp)->parent) ;
    *pp = pBuf;

    if (m_pBuffer == pBuf)
        xstrcpy(m_szBestName, sizeof(m_szBestName), pszName);
}

/*
==============
LParser::AddString

Add the given string to the parser buffer.
Ownership is not transferred.
==============
*/
void LParser::AddString(const char *pszString)
{
    AddBuffer("<string>", pszString);
}

/*
==============
LParser::AddFile

Adds the given file to the parser buffer.
Throws an exception if the file couldn't be loaded.
==============
*/
void LParser::AddFile(const char *pszFilename)
{
    LFileRead fr;

    fr.Open(pszFilename);
    AddBuffer(pszFilename, (char *)fr.Data(0));
}

/*
==============
LParser::TryNextToken

Tries to get the next token.
Returns true if a token was available, or false when the end of
buffer has been reached.
An exception may be thrown if there is a syntax error.
==============
*/
bool LParser::TryNextToken()
{
    l_buffer_t *buf;
    char *start;
    char *dst;

restart:
    if (!m_pBuffer) {
        tok.string = "EOF";
        tok.type = 0;
        return false;
    }

    // Skip leading whitespaces (and comments)
    buf = m_pBuffer;
    for(;;) {
        if (isspace(*buf->ptr)) {
            if (*buf->ptr == '\n')
                buf->linenum++;
            buf->ptr++;
            continue;
        }

        // single-line comments
        if (*buf->ptr == '#' || (buf->ptr[0] == '/' && buf->ptr[1] == '/')) {
            do {
                buf->ptr++;
            } while(*buf->ptr && *buf->ptr != '\n');

            if (!*buf->ptr) {
                EndBuffer();
                goto restart;
            }

            buf->linenum++;
            buf->ptr++;     // skip the '\n'
            continue;
        }

        // C-style comments
        if (buf->ptr[0] == '/' && buf->ptr[1] == '*') {
            int line = buf->linenum;

// ugly MSVC++ optimization bug.. I want to be 100% sure it doesn't happen anymore,
// thus the volatile
#if defined(_MSC_VER)
#define bufptr(idx) (((volatile char *)buf->ptr)[idx])
#else
#define bufptr(idx) (buf->ptr[idx])
#endif

            while(bufptr(0)) {
                if (bufptr(0) == '*' && bufptr(1) == '/')
                    break;
                if (bufptr(0) == '\n')
                    buf->linenum++;
                buf->ptr++;
            }
            if (!bufptr(0))
                throw LError("%s, %i: unterminated comment", buf->name, line);

#undef bufptr

            buf->ptr += 2; // skip the */
            continue;
        }

        // end-of-buffer
        if (!*buf->ptr) {
            EndBuffer();
            goto restart;
        }

        break;
    }

    // got the beginning of a token, mark it
    start = buf->ptr;

    // Strings
    if (isalpha(*buf->ptr) || *buf->ptr == '_')
    {
        tok.type = TOK_STRING;

        // copy the token over
        dst = buf->data;
        do {
            *dst++ = *buf->ptr++;
        } while(isalnum(*buf->ptr) || *buf->ptr == '_');
        *dst = 0;

        tok.string = buf->data;

        return true;
    }

    // "" Quoted Strings
    if (*buf->ptr == '"')
    {
        bool escape;

        tok.type = TOK_QUOTED;

        buf->ptr++; // skip the '"'

        // Copy the string over, and expand escapes
        // The expansion is done through an FSM which changes state
        // between escape and non-escape
        dst = buf->data;

        escape = false;
        for(dst = buf->data; *buf->ptr != '"'; buf->ptr++) {
            if (!*buf->ptr || *buf->ptr == '\r' || *buf->ptr == '\n')
                throw LError("%s, %i: unterminated quoted string",
                        buf->name, buf->linenum);

            if (*buf->ptr == '"')
                break;

            if (!escape)
            {
                if (*buf->ptr != '\\')
                    *dst++ = *buf->ptr;
                else
                    escape = true;
            }
            else
            {
                switch(*buf->ptr) {
                case 'n': *dst++ = '\n'; break;
                case 't': *dst++ = '\t'; break;
                case '\\': *dst++ = '\\'; break;
                default: *dst++ = *buf->ptr; break;
                }

                escape = false;
            }
        }
        *dst = 0;

        buf->ptr++; // skip the '"'

        tok.string = buf->data;

        return true;
    }

    // Numbers
    if (isdigit(*buf->ptr) || (buf->ptr[0] == '-' && isdigit(buf->ptr[1])))
    {
        int length;

        if (buf->ptr[0] == '0' && buf->ptr[1] == 'x') // special case, hex
        {
            buf->ptr += 2;
            tok.v.f = strtoul(buf->ptr, &buf->ptr, 16);
            tok.type = TOK_INT;
        }
        else
        {
            tok.v.f = strtod(buf->ptr, &buf->ptr);

            // determine whether it's int or float
            if (tok.v.f - (int)tok.v.f)
                tok.type = TOK_FLOAT;
            else
                tok.type = TOK_INT;
        }

        if (*buf->ptr && !isspace(*buf->ptr) && !strchr(m_pszControlChars, *buf->ptr))
            throw LError("%s, %i: malformed numeric", buf->name, buf->linenum);

        length = buf->ptr - start;
        memcpy(buf->data, start, length);
        buf->data[length] = 0;
        tok.string = buf->data;

        return true;
    }

    // Control Characters
    buf->ptr++;

    if (!strchr(m_pszControlChars, *start))
        throw LError("%s, %i: unexpected '%c'", buf->name, buf->linenum, *start);

    tok.type = *start;

    buf->data[0] = *start;
    buf->data[1] = 0;
    tok.string = buf->data;

    return true;
}

/*
==============
LParser::NextToken

Same as TryNextToken, but throw an exception when no token is available
==============
*/
void LParser::NextToken()
{
    if (!TryNextToken())
        throw LError("%s: unexpected EOF", m_szBestName);
}

/*
==============
LParser::Expect

Expect the next token to be the following string.
An exception is thrown if there is an end of buffer or
the token doesn't match.
Quoted strings never match.
==============
*/
void LParser::Expect(const char *pszExpect)
{
    NextToken();

    if (tok.type == TOK_QUOTED || strcmp(tok.string, pszExpect))
        throw LError("%s, %i: '%s' expected instead of '%s'",
            m_pBuffer->name, m_pBuffer->linenum, pszExpect, tok.string);
}

/*
==============
LParser::AnyString

Expects the next token to be a string or quoted string and
returns that string.
==============
*/
const char *LParser::AnyString()
{
    NextToken();

    if (tok.type != TOK_QUOTED && tok.type != TOK_STRING)
        throw LError("%s, %i: string or quoted string expected instead of '%s'",
                m_pBuffer->name, m_pBuffer->linenum, tok.string);

    return tok.string;
}

/*
==============
LParser::TryString

Returns the next string or 0 if the end of buffer has been reached.
Throws an exception if the next token is not a string.
==============
*/
const char *LParser::TryString()
{
    if (!TryNextToken())
        return 0;

    if (tok.type != TOK_STRING)
        throw LError("%s, %i: string expected instead of '%s'", m_pBuffer->name,
                m_pBuffer->linenum, tok.string);

    return tok.string;
}

/*
==============
LParser::String

Returns the next string. Throws an exception if the next token is
not a string. Quoted strings aren't accepted.
==============
*/
const char *LParser::String()
{
    NextToken();

    if (tok.type != TOK_STRING)
        throw LError("%s, %i: string expected instead of '%s'", m_pBuffer->name,
                m_pBuffer->linenum, tok.string);

    return tok.string;
}

/*
==============
LParser::Float

Expects the next token to be a numeric and returns its value.
==============
*/
float LParser::Float()
{
    NextToken();

    if (tok.type != TOK_FLOAT && tok.type != TOK_INT)
        throw LError("%s, %i: floating point value expected instead of '%s'",
                m_pBuffer->name, m_pBuffer->linenum, tok.string);

    return tok.v.f;
}

/*
==============
LParser::Integer

Expects the next token to be an integer and returns its value.
==============
*/
int LParser::Integer()
{
    NextToken();

    if (tok.type != TOK_INT)
        throw LError("%s, %i: integer expected instead of '%s'", m_pBuffer->name,
                m_pBuffer->linenum, tok.string);

    return (int)tok.v.f;
}

/*
==============
LParser::Unsigned

Expects the next token to be an integer and returns its value as unsigned.
==============
*/
unsigned LParser::Unsigned()
{
    NextToken();

    if (tok.type != TOK_INT)
        throw LError("%s, %i: unsigned integer expected instead of '%s'", m_pBuffer->name,
                m_pBuffer->linenum, tok.string);

    return (unsigned)tok.v.f;
}

/*
==============
LParser::RelativePath

Build a path that is relative to the script buffer
==============
*/
void LParser::RelativePath(char *buf, int buflen, const char *fname)
{
    if (!m_pBuffer || m_pBuffer->name[0] == '<')
        xstrcpy(buf, buflen, fname);
    else
        L_RelativePath(buf, buflen, m_pBuffer->name, fname);
}