Scanning: 1) rationalize end-of-input everywhere, 2) prevent infinite loop at end-of-input, 3) use positive chars.
Fixes issue #25. (char 255 aliased to -1 and missing tests for end of input). 1) All layers of input scanning now share a single EndOfInput value. This avoids translation of it across layers of encapsulation. 2) Some places looking for end of line were not stopping on EndOfInput. 3) Use of "char" for the input made char values > 127 be negative numbers. This allowed for aliasing of 255 to -1, etc. This is fixed by using unsigned char.
This commit is contained in:
parent
91b977e172
commit
c777fc2c4c
15 changed files with 134 additions and 112 deletions
|
|
@ -40,6 +40,11 @@
|
|||
|
||||
namespace glslang {
|
||||
|
||||
// Use a global end-of-input character, so no tranlation is needed across
|
||||
// layers of encapsulation. Characters are all 8 bit, and positive, so there is
|
||||
// no aliasing of character 255 onto -1, for example.
|
||||
const int EndOfInput = -1;
|
||||
|
||||
//
|
||||
// A character scanner that seamlessly, on read-only strings, reads across an
|
||||
// array of strings without assuming null termination.
|
||||
|
|
@ -47,7 +52,9 @@ namespace glslang {
|
|||
class TInputScanner {
|
||||
public:
|
||||
TInputScanner(int n, const char* const s[], size_t L[], int b = 0, int f = 0) :
|
||||
numSources(n), sources(s), lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f)
|
||||
numSources(n),
|
||||
sources(reinterpret_cast<const unsigned char* const *>(s)), // up to this point, common usage is "char*", but now we need positive 8-bit characters
|
||||
lengths(L), currentSource(0), currentChar(0), stringBias(b), finale(f)
|
||||
{
|
||||
loc = new TSourceLoc[numSources];
|
||||
loc[currentSource].string = -stringBias;
|
||||
|
|
@ -60,14 +67,11 @@ public:
|
|||
delete [] loc;
|
||||
}
|
||||
|
||||
// return of -1 means end of strings,
|
||||
// anything else is the next character
|
||||
|
||||
// retrieve the next character and advance one character
|
||||
int get()
|
||||
{
|
||||
if (currentSource >= numSources)
|
||||
return -1;
|
||||
return EndOfInput;
|
||||
|
||||
int ret = peek();
|
||||
++loc[currentSource].column;
|
||||
|
|
@ -84,7 +88,7 @@ public:
|
|||
int peek()
|
||||
{
|
||||
if (currentSource >= numSources)
|
||||
return -1;
|
||||
return EndOfInput;
|
||||
// Make sure we do not read off the end of a string.
|
||||
// N.B. Sources can have a length of 0.
|
||||
int sourceToRead = currentSource;
|
||||
|
|
@ -93,9 +97,11 @@ public:
|
|||
charToRead = 0;
|
||||
sourceToRead += 1;
|
||||
if (sourceToRead >= numSources) {
|
||||
return -1;
|
||||
return EndOfInput;
|
||||
}
|
||||
}
|
||||
|
||||
// Here, we care about making negative valued characters positive
|
||||
return sources[sourceToRead][charToRead];
|
||||
}
|
||||
|
||||
|
|
@ -106,17 +112,17 @@ public:
|
|||
--currentChar;
|
||||
--loc[currentSource].column;
|
||||
if (loc[currentSource].column < 0) {
|
||||
// We've moved back past a new line. Find the
|
||||
// previous newline (or start of the file) to compute
|
||||
// the column count on the now current line.
|
||||
size_t ch = currentChar;
|
||||
while(ch > 0) {
|
||||
if (sources[currentSource][ch] == '\n') {
|
||||
break;
|
||||
// We've moved back past a new line. Find the
|
||||
// previous newline (or start of the file) to compute
|
||||
// the column count on the now current line.
|
||||
size_t chIndex = currentChar;
|
||||
while (chIndex > 0) {
|
||||
if (sources[currentSource][chIndex] == '\n') {
|
||||
break;
|
||||
}
|
||||
--chIndex;
|
||||
}
|
||||
--ch;
|
||||
}
|
||||
loc[currentSource].column = (int)(currentChar - ch);
|
||||
loc[currentSource].column = (int)(currentChar - chIndex);
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
|
|
@ -170,9 +176,9 @@ protected:
|
|||
}
|
||||
}
|
||||
|
||||
int numSources; // number of strings in source
|
||||
const char* const *sources; // array of strings
|
||||
const size_t *lengths; // length of each string
|
||||
int numSources; // number of strings in source
|
||||
const unsigned char* const *sources; // array of strings; must be converted to positive values on use, to avoid aliasing with -1 as EndOfInput
|
||||
const size_t *lengths; // length of each string
|
||||
int currentSource;
|
||||
size_t currentChar;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue