Scanning: 1) rationalize end-of-input everywhere, 2) prevent infinite loop at end-of-input, 3) use positive chars.
Fixes issue #25. (char 255 aliased to -1 and missing tests for end of input). 1) All layers of input scanning now share a single EndOfInput value. This avoids translation of it across layers of encapsulation. 2) Some places looking for end of line were not stopping on EndOfInput. 3) Use of "char" for the input made char values > 127 be negative numbers. This allowed for aliasing of 255 to -1, etc. This is fixed by using unsigned char.
This commit is contained in:
parent
91b977e172
commit
c777fc2c4c
15 changed files with 134 additions and 112 deletions
|
|
@ -150,7 +150,7 @@ int TPpContext::CPPdefine(TPpToken* ppToken)
|
|||
}
|
||||
token = scanToken(ppToken);
|
||||
} while (token == ',');
|
||||
if (token != ')') {
|
||||
if (token != ')') {
|
||||
parseContext.ppError(ppToken->loc, "missing parenthesis", "#define", "");
|
||||
|
||||
return token;
|
||||
|
|
@ -164,7 +164,7 @@ int TPpContext::CPPdefine(TPpToken* ppToken)
|
|||
// record the definition of the macro
|
||||
TSourceLoc defineLoc = ppToken->loc; // because ppToken is going to go to the next line before we report errors
|
||||
mac.body = new TokenStream;
|
||||
while (token != '\n') {
|
||||
while (token != '\n' && token != EndOfInput) {
|
||||
RecordToken(mac.body, token, ppToken);
|
||||
token = scanToken(ppToken);
|
||||
if (token != '\n' && ppToken->space)
|
||||
|
|
@ -245,13 +245,13 @@ int TPpContext::CPPelse(int matchelse, TPpToken* ppToken)
|
|||
int depth = 0;
|
||||
int token = scanToken(ppToken);
|
||||
|
||||
while (token != EOF) {
|
||||
while (token != EndOfInput) {
|
||||
if (token != '#') {
|
||||
while (token != '\n' && token != EOF)
|
||||
while (token != '\n' && token != EndOfInput)
|
||||
token = scanToken(ppToken);
|
||||
|
||||
if (token == EOF)
|
||||
return EOF;
|
||||
if (token == EndOfInput)
|
||||
return token;
|
||||
|
||||
token = scanToken(ppToken);
|
||||
continue;
|
||||
|
|
@ -314,7 +314,7 @@ int TPpContext::CPPelse(int matchelse, TPpToken* ppToken)
|
|||
// Call when there should be no more tokens left on a line.
|
||||
int TPpContext::extraTokenCheck(int atom, TPpToken* ppToken, int token)
|
||||
{
|
||||
if (token != '\n') {
|
||||
if (token != '\n' && token != EndOfInput) {
|
||||
static const char* message = "unexpected tokens following directive";
|
||||
|
||||
const char* label;
|
||||
|
|
@ -336,7 +336,7 @@ int TPpContext::extraTokenCheck(int atom, TPpToken* ppToken, int token)
|
|||
else
|
||||
parseContext.ppError(ppToken->loc, message, label, "");
|
||||
|
||||
while (token != '\n')
|
||||
while (token != '\n' && token != EndOfInput)
|
||||
token = scanToken(ppToken);
|
||||
}
|
||||
|
||||
|
|
@ -586,7 +586,7 @@ int TPpContext::CPPifdef(int defined, TPpToken* ppToken)
|
|||
token = scanToken(ppToken);
|
||||
if (token != '\n') {
|
||||
parseContext.ppError(ppToken->loc, "unexpected tokens following #ifdef directive - expected a newline", "#ifdef", "");
|
||||
while (token != '\n')
|
||||
while (token != '\n' && token != EndOfInput)
|
||||
token = scanToken(ppToken);
|
||||
}
|
||||
if (((s && !s->mac.undef) ? 1 : 0) != defined)
|
||||
|
|
@ -648,7 +648,7 @@ int TPpContext::CPPerror(TPpToken* ppToken)
|
|||
std::string message;
|
||||
TSourceLoc loc = ppToken->loc;
|
||||
|
||||
while (token != '\n') {
|
||||
while (token != '\n' && token != EndOfInput) {
|
||||
if (token == PpAtomConstInt || token == PpAtomConstUint ||
|
||||
token == PpAtomConstFloat || token == PpAtomConstDouble) {
|
||||
message.append(ppToken->name);
|
||||
|
|
@ -675,7 +675,7 @@ int TPpContext::CPPpragma(TPpToken* ppToken)
|
|||
|
||||
TSourceLoc loc = ppToken->loc; // because we go to the next line before processing
|
||||
int token = scanToken(ppToken);
|
||||
while (token != '\n' && token != EOF) {
|
||||
while (token != '\n' && token != EndOfInput) {
|
||||
switch (token) {
|
||||
case PpAtomIdentifier:
|
||||
case PpAtomConstInt:
|
||||
|
|
@ -692,7 +692,7 @@ int TPpContext::CPPpragma(TPpToken* ppToken)
|
|||
token = scanToken(ppToken);
|
||||
}
|
||||
|
||||
if (token == EOF)
|
||||
if (token == EndOfInput)
|
||||
parseContext.ppError(loc, "directive must end with a newline", "#pragma", "");
|
||||
else
|
||||
parseContext.handlePragma(loc, tokens);
|
||||
|
|
@ -810,7 +810,7 @@ int TPpContext::readCPPline(TPpToken* ppToken)
|
|||
parseContext.ppError(ppToken->loc, "#elif after #else", "#elif", "");
|
||||
// this token is really a dont care, but we still need to eat the tokens
|
||||
token = scanToken(ppToken);
|
||||
while (token != '\n')
|
||||
while (token != '\n' && token != EndOfInput)
|
||||
token = scanToken(ppToken);
|
||||
token = CPPelse(0, ppToken);
|
||||
break;
|
||||
|
|
@ -854,10 +854,10 @@ int TPpContext::readCPPline(TPpToken* ppToken)
|
|||
parseContext.ppError(ppToken->loc, "invalid directive:", "#", ppToken->name);
|
||||
break;
|
||||
}
|
||||
} else if (token != '\n' && token != EOF)
|
||||
} else if (token != '\n' && token != EndOfInput)
|
||||
parseContext.ppError(ppToken->loc, "invalid directive", "#", "");
|
||||
|
||||
while (token != '\n' && token != 0 && token != EOF)
|
||||
while (token != '\n' && token != EndOfInput)
|
||||
token = scanToken(ppToken);
|
||||
|
||||
return token;
|
||||
|
|
@ -872,9 +872,9 @@ TPpContext::TokenStream* TPpContext::PrescanMacroArg(TokenStream* a, TPpToken* p
|
|||
token = ReadToken(a, ppToken);
|
||||
if (token == PpAtomIdentifier && LookUpSymbol(ppToken->atom))
|
||||
break;
|
||||
} while (token != tInput::endOfInput);
|
||||
} while (token != EndOfInput);
|
||||
|
||||
if (token == tInput::endOfInput)
|
||||
if (token == EndOfInput)
|
||||
return a;
|
||||
|
||||
n = new TokenStream;
|
||||
|
|
@ -914,17 +914,17 @@ int TPpContext::tMacroInput::scan(TPpToken* ppToken)
|
|||
}
|
||||
}
|
||||
|
||||
if (token == endOfInput)
|
||||
if (token == EndOfInput)
|
||||
mac->busy = 0;
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
// return a zero, for scanning a macro that was never defined
|
||||
// return a textual zero, for scanning a macro that was never defined
|
||||
int TPpContext::tZeroInput::scan(TPpToken* ppToken)
|
||||
{
|
||||
if (done)
|
||||
return endOfInput;
|
||||
return EndOfInput;
|
||||
|
||||
strcpy(ppToken->name, "0");
|
||||
ppToken->ival = 0;
|
||||
|
|
@ -1013,14 +1013,14 @@ int TPpContext::MacroExpand(int atom, TPpToken* ppToken, bool expandUndef, bool
|
|||
depth = 0;
|
||||
while (1) {
|
||||
token = scanToken(ppToken);
|
||||
if (token == EOF) {
|
||||
parseContext.ppError(loc, "EOF in macro", "macro expansion", GetAtomString(atom));
|
||||
if (token == EndOfInput) {
|
||||
parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(atom));
|
||||
delete in;
|
||||
return 0;
|
||||
}
|
||||
if (token == '\n') {
|
||||
if (! newLineOkay) {
|
||||
parseContext.ppError(loc, "end of line in macro substitution:", "macro expansion", GetAtomString(atom));
|
||||
parseContext.ppError(loc, "End of line in macro substitution:", "macro expansion", GetAtomString(atom));
|
||||
delete in;
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1055,7 +1055,7 @@ int TPpContext::MacroExpand(int atom, TPpToken* ppToken, bool expandUndef, bool
|
|||
parseContext.ppError(loc, "Too few args in Macro", "macro expansion", GetAtomString(atom));
|
||||
else if (token != ')') {
|
||||
depth=0;
|
||||
while (token != EOF && (depth > 0 || token != ')')) {
|
||||
while (token != EndOfInput && (depth > 0 || token != ')')) {
|
||||
if (token == ')')
|
||||
depth--;
|
||||
token = scanToken(ppToken);
|
||||
|
|
@ -1063,8 +1063,8 @@ int TPpContext::MacroExpand(int atom, TPpToken* ppToken, bool expandUndef, bool
|
|||
depth++;
|
||||
}
|
||||
|
||||
if (token == EOF) {
|
||||
parseContext.ppError(loc, "EOF in macro", "macro expansion", GetAtomString(atom));
|
||||
if (token == EndOfInput) {
|
||||
parseContext.ppError(loc, "End of input in macro", "macro expansion", GetAtomString(atom));
|
||||
delete in;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -144,18 +144,12 @@ int TPpContext::LookUpAddString(const char* s)
|
|||
//
|
||||
const char* TPpContext::GetAtomString(int atom)
|
||||
{
|
||||
if (atom == 0)
|
||||
return "<null atom>";
|
||||
if (atom < 0)
|
||||
return "<EOF>";
|
||||
if ((size_t)atom < stringMap.size()) {
|
||||
if (stringMap[atom] == 0)
|
||||
return "<invalid atom>";
|
||||
else
|
||||
return stringMap[atom]->c_str();
|
||||
}
|
||||
if ((size_t)atom >= stringMap.size())
|
||||
return "<bad token>";
|
||||
|
||||
return "<invalid atom>";
|
||||
const TString* atomString = stringMap[atom];
|
||||
|
||||
return atomString ? atomString->c_str() : "<bad token>";
|
||||
}
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -134,8 +134,6 @@ public:
|
|||
virtual int getch() = 0;
|
||||
virtual void ungetch() = 0;
|
||||
|
||||
static const int endOfInput = -2;
|
||||
|
||||
protected:
|
||||
bool done;
|
||||
TPpContext* pp;
|
||||
|
|
@ -210,21 +208,18 @@ protected:
|
|||
|
||||
// Get the next token from *stack* of input sources, popping input sources
|
||||
// that are out of tokens, down until an input sources is found that has a token.
|
||||
// Return EOF when there are no more tokens to be found by doing this.
|
||||
// Return EndOfInput when there are no more tokens to be found by doing this.
|
||||
int scanToken(TPpToken* ppToken)
|
||||
{
|
||||
int token = EOF;
|
||||
int token = EndOfInput;
|
||||
|
||||
while (! inputStack.empty()) {
|
||||
token = inputStack.back()->scan(ppToken);
|
||||
if (token != tInput::endOfInput)
|
||||
if (token != EndOfInput)
|
||||
break;
|
||||
popInput();
|
||||
}
|
||||
|
||||
if (token == tInput::endOfInput)
|
||||
return EOF;
|
||||
|
||||
return token;
|
||||
}
|
||||
int getChar() { return inputStack.back()->getch(); }
|
||||
|
|
@ -248,7 +243,7 @@ protected:
|
|||
}
|
||||
|
||||
virtual int scan(TPpToken*);
|
||||
virtual int getch() { assert(0); return endOfInput; }
|
||||
virtual int getch() { assert(0); return EndOfInput; }
|
||||
virtual void ungetch() { assert(0); }
|
||||
MacroSymbol *mac;
|
||||
TVector<TokenStream*> args;
|
||||
|
|
@ -260,12 +255,12 @@ protected:
|
|||
virtual int scan(TPpToken*)
|
||||
{
|
||||
if (done)
|
||||
return endOfInput;
|
||||
return EndOfInput;
|
||||
done = true;
|
||||
|
||||
return marker;
|
||||
}
|
||||
virtual int getch() { assert(0); return endOfInput; }
|
||||
virtual int getch() { assert(0); return EndOfInput; }
|
||||
virtual void ungetch() { assert(0); }
|
||||
static const int marker = -3;
|
||||
};
|
||||
|
|
@ -274,7 +269,7 @@ protected:
|
|||
public:
|
||||
tZeroInput(TPpContext* pp) : tInput(pp) { }
|
||||
virtual int scan(TPpToken*);
|
||||
virtual int getch() { assert(0); return endOfInput; }
|
||||
virtual int getch() { assert(0); return EndOfInput; }
|
||||
virtual void ungetch() { assert(0); }
|
||||
};
|
||||
|
||||
|
|
@ -328,7 +323,7 @@ protected:
|
|||
public:
|
||||
tTokenInput(TPpContext* pp, TokenStream* t) : tInput(pp), tokens(t) { }
|
||||
virtual int scan(TPpToken *);
|
||||
virtual int getch() { assert(0); return endOfInput; }
|
||||
virtual int getch() { assert(0); return EndOfInput; }
|
||||
virtual void ungetch() { assert(0); }
|
||||
protected:
|
||||
TokenStream *tokens;
|
||||
|
|
@ -338,7 +333,7 @@ protected:
|
|||
public:
|
||||
tUngotTokenInput(TPpContext* pp, int t, TPpToken* p) : tInput(pp), token(t), lval(*p) { }
|
||||
virtual int scan(TPpToken *);
|
||||
virtual int getch() { assert(0); return endOfInput; }
|
||||
virtual int getch() { assert(0); return EndOfInput; }
|
||||
virtual void ungetch() { assert(0); }
|
||||
protected:
|
||||
int token;
|
||||
|
|
|
|||
|
|
@ -253,12 +253,9 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
|
|||
len = 0;
|
||||
switch (ch) {
|
||||
default:
|
||||
// Single character token, including '#' and '\' (escaped newlines are handled at a lower level, so this is just a '\' token)
|
||||
// Single character token, including EndOfInput, '#' and '\' (escaped newlines are handled at a lower level, so this is just a '\' token)
|
||||
return ch;
|
||||
|
||||
case EOF:
|
||||
return endOfInput;
|
||||
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E':
|
||||
case 'F': case 'G': case 'H': case 'I': case 'J':
|
||||
case 'K': case 'L': case 'M': case 'N': case 'O':
|
||||
|
|
@ -590,28 +587,25 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
|
|||
pp->inComment = true;
|
||||
do {
|
||||
ch = pp->getChar();
|
||||
} while (ch != '\n' && ch != EOF);
|
||||
} while (ch != '\n' && ch != EndOfInput);
|
||||
ppToken->space = true;
|
||||
pp->inComment = false;
|
||||
|
||||
if (ch == EOF)
|
||||
return endOfInput;
|
||||
|
||||
return ch;
|
||||
} else if (ch == '*') {
|
||||
ch = pp->getChar();
|
||||
do {
|
||||
while (ch != '*') {
|
||||
if (ch == EOF) {
|
||||
pp->parseContext.ppError(ppToken->loc, "EOF in comment", "comment", "");
|
||||
return endOfInput;
|
||||
if (ch == EndOfInput) {
|
||||
pp->parseContext.ppError(ppToken->loc, "End of input in comment", "comment", "");
|
||||
return ch;
|
||||
}
|
||||
ch = pp->getChar();
|
||||
}
|
||||
ch = pp->getChar();
|
||||
if (ch == EOF) {
|
||||
pp->parseContext.ppError(ppToken->loc, "EOF in comment", "comment", "");
|
||||
return endOfInput;
|
||||
if (ch == EndOfInput) {
|
||||
pp->parseContext.ppError(ppToken->loc, "End of input in comment", "comment", "");
|
||||
return ch;
|
||||
}
|
||||
} while (ch != '/');
|
||||
ppToken->space = true;
|
||||
|
|
@ -626,7 +620,7 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
|
|||
break;
|
||||
case '"':
|
||||
ch = pp->getChar();
|
||||
while (ch != '"' && ch != '\n' && ch != EOF) {
|
||||
while (ch != '"' && ch != '\n' && ch != EndOfInput) {
|
||||
if (len < MaxTokenLength) {
|
||||
tokenText[len] = (char)ch;
|
||||
len++;
|
||||
|
|
@ -637,7 +631,7 @@ int TPpContext::tStringInput::scan(TPpToken* ppToken)
|
|||
tokenText[len] = '\0';
|
||||
if (ch != '"') {
|
||||
pp->ungetChar();
|
||||
pp->parseContext.ppError(ppToken->loc, "end of line in string", "string", "");
|
||||
pp->parseContext.ppError(ppToken->loc, "End of line in string", "string", "");
|
||||
}
|
||||
return PpAtomConstString;
|
||||
}
|
||||
|
|
@ -660,14 +654,14 @@ const char* TPpContext::tokenize(TPpToken* ppToken)
|
|||
for(;;) {
|
||||
token = scanToken(ppToken);
|
||||
ppToken->token = token;
|
||||
if (token == EOF) {
|
||||
if (token == EndOfInput) {
|
||||
missingEndifCheck();
|
||||
return nullptr;
|
||||
}
|
||||
if (token == '#') {
|
||||
if (previous_token == '\n') {
|
||||
token = readCPPline(ppToken);
|
||||
if (token == EOF) {
|
||||
if (token == EndOfInput) {
|
||||
missingEndifCheck();
|
||||
return nullptr;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ int TPpContext::lReadByte(TokenStream *pTok)
|
|||
if (pTok->current < pTok->data.size())
|
||||
return pTok->data[pTok->current++];
|
||||
else
|
||||
return tInput::endOfInput;
|
||||
return EndOfInput;
|
||||
}
|
||||
|
||||
void TPpContext::lUnreadByte(TokenStream *pTok)
|
||||
|
|
@ -249,7 +249,7 @@ void TPpContext::pushTokenStreamInput(TokenStream* ts)
|
|||
int TPpContext::tUngotTokenInput::scan(TPpToken* ppToken)
|
||||
{
|
||||
if (done)
|
||||
return endOfInput;
|
||||
return EndOfInput;
|
||||
|
||||
int ret = token;
|
||||
*ppToken = lval;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue