If you have comments or questions concerning this source file, discuss them in the forum.
/*
Copyright (c) 2002 Nicolai Haehnle
See the license.txt for details. If that file was not included in the
source distributions, please email <prefect@rtts.org>
*/
// lib_parser.cpp
#include "library.h"
/*
The parser understands C-style comments, C++-style comments (//)
and hashes as rest-of-line comments.
Normal strings are sequences of non-space characters which begin
with a letter or an underscore _
Quoted strings are a series of characters enclosed by "" C-style
escaping is performed inside. Supported escapes:
\n new-line character
\t tabulator
\\ the backslash itself
The parser understands ints and floats, as parsed by the C-library.
Values with a 0 fractional part are returned as int, even if they
are written as e.g. 3.00.
The following characters are considered special control characters
and returned as single tokens:
{ } ( ) [ ]
+ - * / % =
& | ^ ~ ?
: ; , .
Note that some of these characters may have a special meaning when
they appear inside numericals.
Every other character without a special meaning such as " and ' are
considered errors.
Combinations of those characters (e.g. ++) are returned seperately,
unless they have a special meaning (e.g. //).
Note that you can change which characters are considered special
characters.
*/
static const char *control_chars = "{}()[]+-*/%=&|^~?:;,.";
struct l_buffer_s {
l_buffer_t *parent;
int linenum;
char name[MAX_OSPATH];
char *ptr;
char data[1];
};
/*
==============
LParser::LParser
Initialize with an empty buffer and the default
control character set
==============
*/
LParser::LParser()
{
strcpy(m_szBestName, "<empty>");
m_pszControlChars = control_chars;
m_pBuffer = 0;
tok.type = 0;
}
/*
==============
LParser::~LParser
Free any remaining buffers
==============
*/
LParser::~LParser()
{
while(m_pBuffer)
EndBuffer();
}
/*
==============
LParser::Error
Return an error object with filename and line number info added
==============
*/
LError LParser::Error(const char *fmt, ...)
{
char buf[256];
va_list va;
va_start(va, fmt);
vsnprintf(buf, sizeof(buf), fmt, va);
va_end(va);
if (m_pBuffer)
return LError("%s, %i: %s", m_pBuffer->name, m_pBuffer->linenum, buf);
else
return LError("%s: %s", m_szBestName, buf);
}
/*
==============
LParser::EndBuffer
Free the current buffer and return to its parent
==============
*/
void LParser::EndBuffer()
{
l_buffer_t *pBuf;
lassert(m_pBuffer);
pBuf = m_pBuffer;
m_pBuffer = pBuf->parent;
if (m_pBuffer)
xstrcpy(m_szBestName, sizeof(m_szBestName), m_pBuffer->name);
L_Free(pBuf);
}
/*
==============
LParser::AddBuffer
Add a new buffer to the chain. The buffer will be used as soon
as the current buffer returns.
==============
*/
void LParser::AddBuffer(const char *pszName, const char *pszData)
{
l_buffer_t *pBuf, **pp;
int len;
len = strlen(pszData);
pBuf = (l_buffer_t *)L_Malloc(sizeof(l_buffer_t)+len+1, TAG_PARSER);
pBuf->parent = 0;
xstrcpy(pBuf->name, sizeof(pBuf->name), pszName);
pBuf->linenum = 1;
// The +1 is correct; it's kind of a hack to allow inplace escaping
// of strings etc.. without running into trouble if the very first
// line of the buffer contains something like "foo{"
pBuf->ptr = pBuf->data+1;
memcpy(pBuf->data+1, pszData, len+1);
for(pp = &m_pBuffer; *pp; pp = &(*pp)->parent) ;
*pp = pBuf;
if (m_pBuffer == pBuf)
xstrcpy(m_szBestName, sizeof(m_szBestName), pszName);
}
/*
==============
LParser::AddString
Add the given string to the parser buffer.
Ownership is not transferred.
==============
*/
void LParser::AddString(const char *pszString)
{
AddBuffer("<string>", pszString);
}
/*
==============
LParser::AddFile
Adds the given file to the parser buffer.
Throws an exception if the file couldn't be loaded.
==============
*/
void LParser::AddFile(const char *pszFilename)
{
LFileRead fr;
fr.Open(pszFilename);
AddBuffer(pszFilename, (char *)fr.Data(0));
}
/*
==============
LParser::TryNextToken
Tries to get the next token.
Returns true if a token was available, or false when the end of
buffer has been reached.
An exception may be thrown if there is a syntax error.
==============
*/
bool LParser::TryNextToken()
{
l_buffer_t *buf;
char *start;
char *dst;
restart:
if (!m_pBuffer) {
tok.string = "EOF";
tok.type = 0;
return false;
}
// Skip leading whitespaces (and comments)
buf = m_pBuffer;
for(;;) {
if (isspace(*buf->ptr)) {
if (*buf->ptr == '\n')
buf->linenum++;
buf->ptr++;
continue;
}
// single-line comments
if (*buf->ptr == '#' || (buf->ptr[0] == '/' && buf->ptr[1] == '/')) {
do {
buf->ptr++;
} while(*buf->ptr && *buf->ptr != '\n');
if (!*buf->ptr) {
EndBuffer();
goto restart;
}
buf->linenum++;
buf->ptr++; // skip the '\n'
continue;
}
// C-style comments
if (buf->ptr[0] == '/' && buf->ptr[1] == '*') {
int line = buf->linenum;
// ugly MSVC++ optimization bug.. I want to be 100% sure it doesn't happen anymore,
// thus the volatile
#
if defined(_MSC_VER)
#define bufptr(idx) (((
volatile char *)buf->ptr)[idx])
#
else
#define bufptr(idx) (buf->ptr[idx])
#endif
while(bufptr(0)) {
if (bufptr(0) == '*' && bufptr(1) == '/')
break;
if (bufptr(0) == '\n')
buf->linenum++;
buf->ptr++;
}
if (!bufptr(0))
throw LError("%s, %i: unterminated comment", buf->name, line);
#undef bufptr
buf->ptr += 2; // skip the */
continue;
}
// end-of-buffer
if (!*buf->ptr) {
EndBuffer();
goto restart;
}
break;
}
// got the beginning of a token, mark it
start = buf->ptr;
// Strings
if (isalpha(*buf->ptr) || *buf->ptr == '_')
{
tok.type = TOK_STRING;
// copy the token over
dst = buf->data;
do {
*dst++ = *buf->ptr++;
} while(isalnum(*buf->ptr) || *buf->ptr == '_');
*dst = 0;
tok.string = buf->data;
return true;
}
// "" Quoted Strings
if (*buf->ptr == '"')
{
bool escape;
tok.type = TOK_QUOTED;
buf->ptr++; // skip the '"'
// Copy the string over, and expand escapes
// The expansion is done through an FSM which changes state
// between escape and non-escape
dst = buf->data;
escape = false;
for(dst = buf->data; *buf->ptr != '"'; buf->ptr++) {
if (!*buf->ptr || *buf->ptr == '\r' || *buf->ptr == '\n')
throw LError("%s, %i: unterminated quoted string",
buf->name, buf->linenum);
if (*buf->ptr == '"')
break;
if (!escape)
{
if (*buf->ptr != '\\')
*dst++ = *buf->ptr;
else
escape = true;
}
else
{
switch(*buf->ptr) {
case 'n': *dst++ = '\n'; break;
case 't': *dst++ = '\t'; break;
case '\\': *dst++ = '\\'; break;
default: *dst++ = *buf->ptr; break;
}
escape = false;
}
}
*dst = 0;
buf->ptr++; // skip the '"'
tok.string = buf->data;
return true;
}
// Numbers
if (isdigit(*buf->ptr) || (buf->ptr[0] == '-' && isdigit(buf->ptr[1])))
{
int length;
if (buf->ptr[0] == '0' && buf->ptr[1] == 'x') // special case, hex
{
buf->ptr += 2;
tok.v.f = strtoul(buf->ptr, &buf->ptr, 16);
tok.type = TOK_INT;
}
else
{
tok.v.f = strtod(buf->ptr, &buf->ptr);
// determine whether it's int or float
if (tok.v.f - (int)tok.v.f)
tok.type = TOK_FLOAT;
else
tok.type = TOK_INT;
}
if (*buf->ptr && !isspace(*buf->ptr) && !strchr(m_pszControlChars, *buf->ptr))
throw LError("%s, %i: malformed numeric", buf->name, buf->linenum);
length = buf->ptr - start;
memcpy(buf->data, start, length);
buf->data[length] = 0;
tok.string = buf->data;
return true;
}
// Control Characters
buf->ptr++;
if (!strchr(m_pszControlChars, *start))
throw LError("%s, %i: unexpected '%c'", buf->name, buf->linenum, *start);
tok.type = *start;
buf->data[0] = *start;
buf->data[1] = 0;
tok.string = buf->data;
return true;
}
/*
==============
LParser::NextToken
Same as TryNextToken, but throw an exception when no token is available
==============
*/
void LParser::NextToken()
{
if (!TryNextToken())
throw LError("%s: unexpected EOF", m_szBestName);
}
/*
==============
LParser::Expect
Expect the next token to be the following string.
An exception is thrown if there is an end of buffer or
the token doesn't match.
Quoted strings never match.
==============
*/
void LParser::Expect(const char *pszExpect)
{
NextToken();
if (tok.type == TOK_QUOTED || strcmp(tok.string, pszExpect))
throw LError("%s, %i: '%s' expected instead of '%s'",
m_pBuffer->name, m_pBuffer->linenum, pszExpect, tok.string);
}
/*
==============
LParser::AnyString
Expects the next token to be a string or quoted string and
returns that string.
==============
*/
const char *LParser::AnyString()
{
NextToken();
if (tok.type != TOK_QUOTED && tok.type != TOK_STRING)
throw LError("%s, %i: string or quoted string expected instead of '%s'",
m_pBuffer->name, m_pBuffer->linenum, tok.string);
return tok.string;
}
/*
==============
LParser::TryString
Returns the next string or 0 if the end of buffer has been reached.
Throws an exception if the next token is not a string.
==============
*/
const char *LParser::TryString()
{
if (!TryNextToken())
return 0;
if (tok.type != TOK_STRING)
throw LError("%s, %i: string expected instead of '%s'", m_pBuffer->name,
m_pBuffer->linenum, tok.string);
return tok.string;
}
/*
==============
LParser::String
Returns the next string. Throws an exception if the next token is
not a string. Quoted strings aren't accepted.
==============
*/
const char *LParser::String()
{
NextToken();
if (tok.type != TOK_STRING)
throw LError("%s, %i: string expected instead of '%s'", m_pBuffer->name,
m_pBuffer->linenum, tok.string);
return tok.string;
}
/*
==============
LParser::Float
Expects the next token to be a numeric and returns its value.
==============
*/
float LParser::Float()
{
NextToken();
if (tok.type != TOK_FLOAT && tok.type != TOK_INT)
throw LError("%s, %i: floating point value expected instead of '%s'",
m_pBuffer->name, m_pBuffer->linenum, tok.string);
return tok.v.f;
}
/*
==============
LParser::Integer
Expects the next token to be an integer and returns its value.
==============
*/
int LParser::Integer()
{
NextToken();
if (tok.type != TOK_INT)
throw LError("%s, %i: integer expected instead of '%s'", m_pBuffer->name,
m_pBuffer->linenum, tok.string);
return (int)tok.v.f;
}
/*
==============
LParser::Unsigned
Expects the next token to be an integer and returns its value as unsigned.
==============
*/
unsigned LParser::Unsigned()
{
NextToken();
if (tok.type != TOK_INT)
throw LError("%s, %i: unsigned integer expected instead of '%s'", m_pBuffer->name,
m_pBuffer->linenum, tok.string);
return (unsigned)tok.v.f;
}
/*
==============
LParser::RelativePath
Build a path that is relative to the script buffer
==============
*/
void LParser::RelativePath(char *buf, int buflen, const char *fname)
{
if (!m_pBuffer || m_pBuffer->name[0] == '<')
xstrcpy(buf, buflen, fname);
else
L_RelativePath(buf, buflen, m_pBuffer->name, fname);
}