Scanning: 1) rationalize end-of-input everywhere, 2) prevent infinite loop at end-of-input, 3) use positive chars.

Fixes issue #25. (char 255 aliased to -1 and missing tests for end of input).

1) All layers of input scanning now share a single EndOfInput value.
This avoids translation of it across layers of encapsulation.

2) Some places looking for end of line were not stopping on EndOfInput.

3) Use of "char" for the input made char values > 127 be negative numbers.
This allowed for aliasing of 255 to -1, etc.  This is fixed by using
unsigned char.
This commit is contained in:
John Kessenich 2015-07-27 09:37:55 -06:00
parent 91b977e172
commit c777fc2c4c
15 changed files with 134 additions and 112 deletions

View file

@ -40,6 +40,11 @@
namespace glslang {
// Use a global end-of-input character, so no tranlation is needed across
// layers of encapsulation. Characters are all 8 bit, and positive, so there is
// no aliasing of character 255 onto -1, for example.
const int EndOfInput = -1;
//
// A character scanner that seamlessly, on read-only strings, reads across an
// array of strings without assuming null termination.
@ -47,7 +52,9 @@ namespace glslang {
class TInputScanner {
public:
TInputScanner(int n, const char* const s[], size_t L[], int b = 0, int f = 0) :
numSources(n), sources(s), lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f)
numSources(n),
sources(reinterpret_cast<const unsigned char* const *>(s)), // up to this point, common usage is "char*", but now we need positive 8-bit characters
lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f)
{
loc = new TSourceLoc[numSources];
loc[currentSource].string = -stringBias;
@ -60,14 +67,11 @@ public:
delete [] loc;
}
// return of -1 means end of strings,
// anything else is the next character
// retrieve the next character and advance one character
int get()
{
if (currentSource >= numSources)
return -1;
return EndOfInput;
int ret = peek();
++loc[currentSource].column;
@ -84,7 +88,7 @@ public:
int peek()
{
if (currentSource >= numSources)
return -1;
return EndOfInput;
// Make sure we do not read off the end of a string.
// N.B. Sources can have a length of 0.
int sourceToRead = currentSource;
@ -93,9 +97,11 @@ public:
charToRead = 0;
sourceToRead += 1;
if (sourceToRead >= numSources) {
return -1;
return EndOfInput;
}
}
// Here, we care about making negative valued characters positive
return sources[sourceToRead][charToRead];
}
@ -106,17 +112,17 @@ public:
--currentChar;
--loc[currentSource].column;
if (loc[currentSource].column < 0) {
// We've moved back past a new line. Find the
// previous newline (or start of the file) to compute
// the column count on the now current line.
size_t ch = currentChar;
while(ch > 0) {
if (sources[currentSource][ch] == '\n') {
break;
// We've moved back past a new line. Find the
// previous newline (or start of the file) to compute
// the column count on the now current line.
size_t chIndex = currentChar;
while (chIndex > 0) {
if (sources[currentSource][chIndex] == '\n') {
break;
}
--chIndex;
}
--ch;
}
loc[currentSource].column = (int)(currentChar - ch);
loc[currentSource].column = (int)(currentChar - chIndex);
}
} else {
do {
@ -170,9 +176,9 @@ protected:
}
}
int numSources; // number of strings in source
const char* const *sources; // array of strings
const size_t *lengths; // length of each string
int numSources; // number of strings in source
const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput
const size_t *lengths; // length of each string
int currentSource;
size_t currentChar;