Improve multi-threading and move Standalone to a multi-threading model (currently off though).

git-svn-id: https://cvs.khronos.org/svn/repos/ogl/trunk/ecosystem/public/sdk/tools/glslang@22565 e7fa87d3-cd2b-0410-9028-fcbf551c1848
2013-07-31 18:44:13 +00:00 · 2013-07-31 18:44:13 +00:00 · 2b07c7e70a
commit 2b07c7e70a
parent b40a488e89
21 changed files with 402 additions and 203 deletions
--- a/glslang/MachineIndependent/Initialize.h
+++ b/glslang/MachineIndependent/Initialize.h
@ -47,7 +47,7 @@ typedef TVector<TString> TBuiltInStrings;

 class TBuiltIns {
 public:
-    POOL_ALLOCATOR_NEW_DELETE(GlobalPoolAllocator)
+    POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator())
    TBuiltIns();
    virtual ~TBuiltIns();
    void initialize(int version, EProfile);
--- a/glslang/MachineIndependent/PoolAlloc.cpp
+++ b/glslang/MachineIndependent/PoolAlloc.cpp
@ -46,7 +46,7 @@ void InitializeGlobalPools()
    if (globalPools)
        return;

-    TPoolAllocator *globalPoolAllocator = new TPoolAllocator(true);
+    TPoolAllocator *globalPoolAllocator = new TPoolAllocator();

    TThreadGlobalPools* threadData = new TThreadGlobalPools();
    
@ -58,12 +58,12 @@ void InitializeGlobalPools()
 void FreeGlobalPools()
 {
    // Release the allocated memory for this thread.
-    TThreadGlobalPools* globalPools= static_cast<TThreadGlobalPools*>(OS_GetTLSValue(PoolIndex));    
-    if (!globalPools)
+    TThreadGlobalPools* globalPools = static_cast<TThreadGlobalPools*>(OS_GetTLSValue(PoolIndex));    
+    if (! globalPools)
        return;
 	
-    GlobalPoolAllocator.popAll();
-    delete &GlobalPoolAllocator;       
+    GetThreadPoolAllocator().popAll();
+    delete &GetThreadPoolAllocator();       
    delete globalPools;
 }

@ -82,14 +82,14 @@ void FreePoolIndex()
    OS_FreeTLSIndex(PoolIndex);
 }

-TPoolAllocator& GetGlobalPoolAllocator()
+TPoolAllocator& GetThreadPoolAllocator()
 {
    TThreadGlobalPools* threadData = static_cast<TThreadGlobalPools*>(OS_GetTLSValue(PoolIndex));

    return *threadData->globalPoolAllocator;
 }

-void SetGlobalPoolAllocatorPtr(TPoolAllocator& poolAllocator)
+void SetThreadPoolAllocator(TPoolAllocator& poolAllocator)
 {
    TThreadGlobalPools* threadData = static_cast<TThreadGlobalPools*>(OS_GetTLSValue(PoolIndex));

@ -100,8 +100,7 @@ void SetGlobalPoolAllocatorPtr(TPoolAllocator& poolAllocator)
 // Implement the functionality of the TPoolAllocator class, which
 // is documented in PoolAlloc.h.
 //
-TPoolAllocator::TPoolAllocator(bool g, int growthIncrement, int allocationAlignment) : 
-    global(g),
+TPoolAllocator::TPoolAllocator(int growthIncrement, int allocationAlignment) : 
    pageSize(growthIncrement),
    alignment(allocationAlignment),
    freeList(0),
@ -148,19 +147,12 @@ TPoolAllocator::TPoolAllocator(bool g, int growthIncrement, int allocationAlignm

 TPoolAllocator::~TPoolAllocator()
 {
-    if (!global) {
-        //
-        // Then we know that this object is not being 
-        // allocated after other, globally scoped objects
-        // that depend on it.  So we can delete the "in use" memory.
-        //
-	    while (inUseList) {
-	        tHeader* next = inUseList->nextPage;
-            inUseList->~tHeader();
-            delete [] reinterpret_cast<char*>(inUseList);
-	        inUseList = next;
-	    }
-    }
+	while (inUseList) {
+	    tHeader* next = inUseList->nextPage;
+        inUseList->~tHeader();
+        delete [] reinterpret_cast<char*>(inUseList);
+	    inUseList = next;
+	}

    //
    // Always delete the free list memory - it can't be being
--- a/glslang/MachineIndependent/ShaderLang.cpp
+++ b/glslang/MachineIndependent/ShaderLang.cpp
@ -79,7 +79,7 @@ int MapVersionToIndex(int version)
 const int VersionCount = 12;

 //
-// A symbol table per version per profile per language.  This will be sparsely
+// A process-global symbol table per version per profile per language.  This will be sparsely
 // populated, so they will only only be generated as needed.
 // 
 // Each has a different set of built-ins, and we want to preserve that from
@ -163,41 +163,59 @@ bool AddContextSpecificSymbols(const TBuiltInResource* resources, TInfoSink& inf
    return true;
 }

+//
+// To do this on the fly, we want to leave the current state of our thread's 
+// pool allocator intact, so:
+//  - Switch to a new pool for parsing the built-ins
+//  - Do the parsing, which builds the symbol table, using the new pool
+//  - Switch to the process-global pool to save a copy the resulting symbol table
+//  - Free up the new pool used to parse the built-ins
+//  - Switch back to the original thread's pool
+//
+// This only gets done the first time any thread needs a particular symbol table
+// (lazy evaluation).
+//
 void SetupBuiltinSymbolTable(int version, EProfile profile)
 {
    TInfoSink infoSink;

-    // This function is for lazy setup.  See if already done.
+    // Make sure only one thread tries to do this at a time
+    glslang::GetGlobalLock();
+
+    // See if it's already been done.
    int versionIndex = MapVersionToIndex(version);
-    if (SharedSymbolTables[versionIndex][profile][EShLangVertex])
+    if (SharedSymbolTables[versionIndex][profile][EShLangVertex]) {
+        glslang::ReleaseGlobalLock();
+
        return;
+    }

-    TPoolAllocator& savedGPA = GetGlobalPoolAllocator();
-    TPoolAllocator *builtInPoolAllocator = new TPoolAllocator(true);
-    SetGlobalPoolAllocatorPtr(*builtInPoolAllocator);
+    // Switch to a new pool
+    TPoolAllocator& savedGPA = GetThreadPoolAllocator();
+    TPoolAllocator* builtInPoolAllocator = new TPoolAllocator();
+    SetThreadPoolAllocator(*builtInPoolAllocator);

+    // Generate the symbol table using the new pool
    TSymbolTable symTables[EShLangCount];
    if (profile == EEsProfile) {
        for (int stage = 0; stage < EShLangCount; ++stage)
            symTables[stage].setNoBuiltInRedeclarations();
    }
-
    GenerateBuiltInSymbolTable(infoSink, symTables, version, profile);

-    SetGlobalPoolAllocatorPtr(*PerProcessGPA);
+    // Switch to the process-global pool
+    SetThreadPoolAllocator(*PerProcessGPA);

+    // Copy the symbol table from the new pool to the process-global pool
    SharedSymbolTables[versionIndex][profile][EShLangVertex] = new TSymbolTable;
    SharedSymbolTables[versionIndex][profile][EShLangVertex]->copyTable(symTables[EShLangVertex]);
    SharedSymbolTables[versionIndex][profile][EShLangFragment] = new TSymbolTable;
    SharedSymbolTables[versionIndex][profile][EShLangFragment]->copyTable(symTables[EShLangFragment]);
-        
-    symTables[EShLangVertex].pop(0);
-    symTables[EShLangFragment].pop(0);

-    builtInPoolAllocator->popAll();
    delete builtInPoolAllocator;
+    SetThreadPoolAllocator(savedGPA);

-    SetGlobalPoolAllocatorPtr(savedGPA);
+    glslang::ReleaseGlobalLock();
 }

 bool DeduceProfile(TInfoSink& infoSink, int version, EProfile& profile)
@ -261,7 +279,7 @@ int ShInitialize()
        return 0;

    if (! PerProcessGPA) { 
-        PerProcessGPA = new TPoolAllocator(true);
+        PerProcessGPA = new TPoolAllocator();
    }
    
    glslang::TScanContext::fillInKeywordMap();
@ -333,6 +351,7 @@ int __fastcall ShFinalize()
        PerProcessGPA->popAll();
        delete PerProcessGPA;
    }
+
    return 1;
 }

@ -361,6 +380,7 @@ int ShCompile(

    if (handle == 0)
        return 0;
+
    TShHandleBase* base = reinterpret_cast<TShHandleBase*>(handle);
    TCompiler* compiler = base->getAsCompiler();
    if (compiler == 0)
@ -372,7 +392,7 @@ int ShCompile(
    if (numStrings == 0)
        return 1;

-    GlobalPoolAllocator.push();
+    GetThreadPoolAllocator().push();
    
    // move to length-based strings, rather than null-terminated strings
    int* lengths = new int[numStrings];
@ -395,7 +415,6 @@ int ShCompile(
    bool goodProfile = DeduceProfile(compiler->infoSink, version, profile);

    TIntermediate intermediate(compiler->infoSink, version, profile);
-    
    SetupBuiltinSymbolTable(version, profile);
    TSymbolTable symbolTable(*SharedSymbolTables[MapVersionToIndex(version)]
                                                [profile]
@ -480,7 +499,7 @@ int ShCompile(
    //
    // Throw away all the temporary memory used by the compilation process.
    //
-    GlobalPoolAllocator.pop();
+    GetThreadPoolAllocator().pop();
    delete [] lengths;

    return success ? 1 : 0;
@ -510,9 +529,9 @@ int ShLink(
        return 0;

    int returnValue;
-    GlobalPoolAllocator.push();
+    GetThreadPoolAllocator().push();
    returnValue = ShLinkExt(linkHandle, compHandles, numHandles);
-    GlobalPoolAllocator.pop();
+    GetThreadPoolAllocator().pop();

    if (returnValue)
        return 1;
--- a/glslang/MachineIndependent/SymbolTable.h
+++ b/glslang/MachineIndependent/SymbolTable.h
@ -77,7 +77,7 @@ class TFunction;
 class TAnonMember;
 class TSymbol {
 public:
-    POOL_ALLOCATOR_NEW_DELETE(GlobalPoolAllocator)
+    POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator())
    explicit TSymbol(const TString *n) :  name(n) { }
 	virtual TSymbol* clone(TStructureMap& remapper) = 0;
    virtual ~TSymbol() { }
@ -245,7 +245,7 @@ protected:

 class TSymbolTableLevel {
 public:
-    POOL_ALLOCATOR_NEW_DELETE(GlobalPoolAllocator)
+    POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator())
    TSymbolTableLevel() : defaultPrecision (0), anonId(0) { }
 	~TSymbolTableLevel();

@ -365,24 +365,23 @@ protected:

 class TSymbolTable {
 public:
-    TSymbolTable() : uniqueId(0), noBuiltInRedeclarations(false)
+    TSymbolTable() : uniqueId(0), noBuiltInRedeclarations(false), adoptedLevels(1)  // TODO: memory: can we make adoptedLevels be 0 for symbol tables we don't keep?
    {
        //
-        // The symbol table cannot be used until push() is called, but
-        // the lack of an initial call to push() can be used to detect
-        // that the symbol table has not been preloaded with built-ins.
+        // This symbol table cannot be used until push() is called.
        //
    }
    explicit TSymbolTable(TSymbolTable& symTable)
    {
        table.push_back(symTable.table[0]);
+        adoptedLevels = 1;
        uniqueId = symTable.uniqueId;
        noBuiltInRedeclarations = symTable.noBuiltInRedeclarations;
    }
    ~TSymbolTable()
    {
-        // level 0 is always built-in symbols, so we never pop that out
-        while (table.size() > 1)
+        // don't deallocate levels passed in from elsewhere
+        while (table.size() > adoptedLevels)
            pop(0);
    }

@ -463,6 +462,7 @@ protected:
    std::vector<TSymbolTableLevel*> table;
    int uniqueId;     // for unique identification in code generation
    bool noBuiltInRedeclarations;
+    unsigned int adoptedLevels;
 };

 #endif // _SYMBOL_TABLE_INCLUDED_
--- a/glslang/MachineIndependent/glslang.y
+++ b/glslang/MachineIndependent/glslang.y
@ -319,6 +319,7 @@ postfix_expression
            TType newType($1->getType());
            newType.dereference();
            $$->setType(newType);
+            // TODO: functionality: does this drop const qualification for const[const] ?
        }
    }
    | function_call {
--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@ -51,7 +51,7 @@ struct TVectorFields {
 class TInfoSink;
 class TIntermediate {
 public:    
-    POOL_ALLOCATOR_NEW_DELETE(GlobalPoolAllocator)
+    POOL_ALLOCATOR_NEW_DELETE(GetThreadPoolAllocator())

    TIntermediate(TInfoSink& i, int v, EProfile p) : infoSink(i), version(v), profile(p) { }
    TIntermSymbol* addSymbol(int Id, const TString&, const TType&, TSourceLoc);