Merge branch 'master' of github.com:KhronosGroup/glslang into clang-format

2016-03-09 00:38:08 -05:00 · 2016-03-09 00:38:08 -05:00 · 37c2a2d31d
commit 37c2a2d31d
parent 757b3d3421 f7497e289b
286 changed files with 25003 additions and 24226 deletions
--- a/SPIRV/CMakeLists.txt
+++ b/SPIRV/CMakeLists.txt
@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 2.8)

 set(SOURCES
    GlslangToSpv.cpp
+    InReadableOrder.cpp
    SpvBuilder.cpp
    SPVRemapper.cpp
    doc.cpp
--- a/SPIRV/GLSL.std.450.h
+++ b/SPIRV/GLSL.std.450.h
@ -1,5 +1,5 @@
 /*
-** Copyright (c) 2014-2015 The Khronos Group Inc.
+** Copyright (c) 2014-2016 The Khronos Group Inc.
 **
 ** Permission is hereby granted, free of charge, to any person obtaining a copy
 ** of this software and/or associated documentation files (the "Materials"),
@ -27,8 +27,8 @@
 #ifndef GLSLstd450_H
 #define GLSLstd450_H

-const int GLSLstd450Version = 99;
-const int GLSLstd450Revision = 3;
+static const int GLSLstd450Version = 100;
+static const int GLSLstd450Revision = 1;

 enum GLSLstd450 {
    GLSLstd450Bad = 0,              // Don't use
@ -83,7 +83,7 @@ enum GLSLstd450 {
    GLSLstd450UClamp = 44,
    GLSLstd450SClamp = 45,
    GLSLstd450FMix = 46,
-    GLSLstd450IMix = 47,
+    GLSLstd450IMix = 47,            // Reserved
    GLSLstd450Step = 48,
    GLSLstd450SmoothStep = 49,

@ -121,6 +121,10 @@ enum GLSLstd450 {
    GLSLstd450InterpolateAtSample = 77,
    GLSLstd450InterpolateAtOffset = 78,

+    GLSLstd450NMin = 79,
+    GLSLstd450NMax = 80,
+    GLSLstd450NClamp = 81,
+
    GLSLstd450Count
 };

--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
--- a/SPIRV/InReadableOrder.cpp
+++ b/SPIRV/InReadableOrder.cpp
@ -0,0 +1,117 @@
+//
+//Copyright (C) 2016 Google, Inc.
+//
+//All rights reserved.
+//
+//Redistribution and use in source and binary forms, with or without
+//modification, are permitted provided that the following conditions
+//are met:
+//
+//    Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//    Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+//"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+//FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+//COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+//INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+//BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+//LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+//ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+//POSSIBILITY OF SUCH DAMAGE.
+
+//
+// Author: Dejan Mircevski, Google
+//
+
+// The SPIR-V spec requires code blocks to appear in an order satisfying the
+// dominator-tree direction (ie, dominator before the dominated).  This is,
+// actually, easy to achieve: any pre-order CFG traversal algorithm will do it.
+// Because such algorithms visit a block only after traversing some path to it
+// from the root, they necessarily visit the block's idom first.
+//
+// But not every graph-traversal algorithm outputs blocks in an order that
+// appears logical to human readers.  The problem is that unrelated branches may
+// be interspersed with each other, and merge blocks may come before some of the
+// branches being merged.
+//
+// A good, human-readable order of blocks may be achieved by performing
+// depth-first search but delaying merge nodes until after all their branches
+// have been visited.  This is implemented below by the inReadableOrder()
+// function.
+
+#include "spvIR.h"
+
+#include <cassert>
+#include <unordered_map>
+
+using spv::Block;
+using spv::Id;
+
+namespace {
+// Traverses CFG in a readable order, invoking a pre-set callback on each block.
+// Use by calling visit() on the root block.
+class ReadableOrderTraverser {
+public:
+    explicit ReadableOrderTraverser(std::function<void(Block*)> callback) : callback_(callback) {}
+    // Visits the block if it hasn't been visited already and isn't currently
+    // being delayed.  Invokes callback(block), then descends into its
+    // successors.  Delays merge-block and continue-block processing until all
+    // the branches have been completed.
+    void visit(Block* block)
+    {
+        assert(block);
+        if (visited_[block] || delayed_[block])
+            return;
+        callback_(block);
+        visited_[block] = true;
+        Block* mergeBlock = nullptr;
+        Block* continueBlock = nullptr;
+        auto mergeInst = block->getMergeInstruction();
+        if (mergeInst) {
+            Id mergeId = mergeInst->getIdOperand(0);
+            mergeBlock = block->getParent().getParent().getInstruction(mergeId)->getBlock();
+            delayed_[mergeBlock] = true;
+            if (mergeInst->getOpCode() == spv::OpLoopMerge) {
+                Id continueId = mergeInst->getIdOperand(1);
+                continueBlock =
+                    block->getParent().getParent().getInstruction(continueId)->getBlock();
+                delayed_[continueBlock] = true;
+            }
+        }
+        const auto successors = block->getSuccessors();
+        for (auto it = successors.cbegin(); it != successors.cend(); ++it)
+            visit(*it);
+        if (continueBlock) {
+            delayed_[continueBlock] = false;
+            visit(continueBlock);
+        }
+        if (mergeBlock) {
+            delayed_[mergeBlock] = false;
+            visit(mergeBlock);
+        }
+    }
+
+private:
+    std::function<void(Block*)> callback_;
+    // Whether a block has already been visited or is being delayed.
+    std::unordered_map<Block *, bool> visited_, delayed_;
+};
+}
+
+void spv::inReadableOrder(Block* root, std::function<void(Block*)> callback)
+{
+    ReadableOrderTraverser(callback).visit(root);
+}
--- a/SPIRV/SPVRemapper.cpp
+++ b/SPIRV/SPVRemapper.cpp
@ -140,20 +140,17 @@ namespace spv {
        }
    }

-    bool spirvbin_t::isFlowCtrlOpen(spv::Op opCode) const
+    bool spirvbin_t::isFlowCtrl(spv::Op opCode) const
    {
        switch (opCode) {
        case spv::OpBranchConditional:
-        case spv::OpSwitch:         return true;
-        default:                    return false;
-        }
-    }
-
-    bool spirvbin_t::isFlowCtrlClose(spv::Op opCode) const
-    {
-        switch (opCode) {
+        case spv::OpBranch:
+        case spv::OpSwitch:
        case spv::OpLoopMerge:
-        case spv::OpSelectionMerge: return true;
+        case spv::OpSelectionMerge:
+        case spv::OpLabel:
+        case spv::OpFunction:
+        case spv::OpFunctionEnd:    return true;
        default:                    return false;
        }
    }
@ -440,7 +437,7 @@ namespace spv {
        }

        // Store IDs from instruction in our map
-        for (int op = 0; op < spv::InstructionDesc[opCode].operands.getNum(); ++op, --numOperands) {
+        for (int op = 0; numOperands > 0; ++op, --numOperands) {
            switch (spv::InstructionDesc[opCode].operands.getClass(op)) {
            case spv::OperandId:
                idFn(asId(word++));
@ -468,19 +465,36 @@ namespace spv {
                }
                return nextInst;

-            case spv::OperandLiteralString:
-                // word += literalStringWords(literalString(word)); // for clarity
+            case spv::OperandLiteralString: {
+                const int stringWordCount = literalStringWords(literalString(word));
+                word += stringWordCount;
+                numOperands -= (stringWordCount-1); // -1 because for() header post-decrements
+                break;
+            }
+
+            // Execution mode might have extra literal operands.  Skip them.
+            case spv::OperandExecutionMode:
                return nextInst;

-                // Single word operands we simply ignore, as they hold no IDs
+            // Single word operands we simply ignore, as they hold no IDs
            case spv::OperandLiteralNumber:
            case spv::OperandSource:
            case spv::OperandExecutionModel:
            case spv::OperandAddressing:
            case spv::OperandMemory:
-            case spv::OperandExecutionMode:
            case spv::OperandStorage:
            case spv::OperandDimensionality:
+            case spv::OperandSamplerAddressingMode:
+            case spv::OperandSamplerFilterMode:
+            case spv::OperandSamplerImageFormat:
+            case spv::OperandImageChannelOrder:
+            case spv::OperandImageChannelDataType:
+            case spv::OperandImageOperands:
+            case spv::OperandFPFastMath:
+            case spv::OperandFPRoundingMode:
+            case spv::OperandLinkageType:
+            case spv::OperandAccessQualifier:
+            case spv::OperandFuncParamAttr:
            case spv::OperandDecoration:
            case spv::OperandBuiltIn:
            case spv::OperandSelect:
@ -492,10 +506,12 @@ namespace spv {
            case spv::OperandGroupOperation:
            case spv::OperandKernelEnqueueFlags:
            case spv::OperandKernelProfilingInfo:
+            case spv::OperandCapability:
                ++word;
                break;

            default:
+                assert(0 && "Unhandled Operand Class");
                break;
            }
        }
@ -558,7 +574,7 @@ namespace spv {

        // Window size for context-sensitive canonicalization values
        // Emperical best size from a single data set.  TODO: Would be a good tunable.
-        // We essentially performa a little convolution around each instruction,
+        // We essentially perform a little convolution around each instruction,
        // to capture the flavor of nearby code, to hopefully match to similar
        // code in other modules.
        static const unsigned windowSize = 2;
@ -713,49 +729,71 @@ namespace spv {
        strip();          // strip out data we decided to eliminate
    }

-    // remove bodies of uncalled functions
+    // optimize loads and stores
    void spirvbin_t::optLoadStore()
    {
-        idset_t fnLocalVars;
-        // Map of load result IDs to what they load
-        idmap_t idMap;
+        idset_t    fnLocalVars;  // candidates for removal (only locals)
+        idmap_t    idMap;        // Map of load result IDs to what they load
+        blockmap_t blockMap;     // Map of IDs to blocks they first appear in
+        int        blockNum = 0; // block count, to avoid crossing flow control

        // Find all the function local pointers stored at most once, and not via access chains
        process(
            [&](spv::Op opCode, unsigned start) {
                const int wordCount = asWordCount(start);

+                // Count blocks, so we can avoid crossing flow control
+                if (isFlowCtrl(opCode))
+                    ++blockNum;
+
                // Add local variables to the map
-                if ((opCode == spv::OpVariable && spv[start+3] == spv::StorageClassFunction && asWordCount(start) == 4))
+                if ((opCode == spv::OpVariable && spv[start+3] == spv::StorageClassFunction && asWordCount(start) == 4)) {
                    fnLocalVars.insert(asId(start+2));
+                    return true;
+                }

                // Ignore process vars referenced via access chain
                if ((opCode == spv::OpAccessChain || opCode == spv::OpInBoundsAccessChain) && fnLocalVars.count(asId(start+3)) > 0) {
                    fnLocalVars.erase(asId(start+3));
                    idMap.erase(asId(start+3));
+                    return true;
                }

                if (opCode == spv::OpLoad && fnLocalVars.count(asId(start+3)) > 0) {
-                    // Avoid loads before stores (TODO: why?  Crashes driver, but seems like it shouldn't).
-                    if (idMap.find(asId(start+3)) == idMap.end()) {
-                        fnLocalVars.erase(asId(start+3));
-                        idMap.erase(asId(start+3));
+                    const spv::Id varId = asId(start+3);
+
+                    // Avoid loads before stores
+                    if (idMap.find(varId) == idMap.end()) {
+                        fnLocalVars.erase(varId);
+                        idMap.erase(varId);
                    }

                    // don't do for volatile references
                    if (wordCount > 4 && (spv[start+4] & spv::MemoryAccessVolatileMask)) {
-                        fnLocalVars.erase(asId(start+3));
-                        idMap.erase(asId(start+3));
+                        fnLocalVars.erase(varId);
+                        idMap.erase(varId);
                    }
+
+                    // Handle flow control
+                    if (blockMap.find(varId) == blockMap.end()) {
+                        blockMap[varId] = blockNum;  // track block we found it in.
+                    } else if (blockMap[varId] != blockNum) {
+                        fnLocalVars.erase(varId);  // Ignore if crosses flow control
+                        idMap.erase(varId);
+                    }
+
+                    return true;
                }

                if (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) {
-                    if (idMap.find(asId(start+1)) == idMap.end()) {
-                        idMap[asId(start+1)] = asId(start+2);
+                    const spv::Id varId = asId(start+1);
+
+                    if (idMap.find(varId) == idMap.end()) {
+                        idMap[varId] = asId(start+2);
                    } else {
                        // Remove if it has more than one store to the same pointer
-                        fnLocalVars.erase(asId(start+1));
-                        idMap.erase(asId(start+1));
+                        fnLocalVars.erase(varId);
+                        idMap.erase(varId);
                    }

                    // don't do for volatile references
@ -763,11 +801,29 @@ namespace spv {
                        fnLocalVars.erase(asId(start+3));
                        idMap.erase(asId(start+3));
                    }
+
+                    // Handle flow control
+                    if (blockMap.find(varId) == blockMap.end()) {
+                        blockMap[varId] = blockNum;  // track block we found it in.
+                    } else if (blockMap[varId] != blockNum) {
+                        fnLocalVars.erase(varId);  // Ignore if crosses flow control
+                        idMap.erase(varId);
+                    }
+
+                    return true;
                }

-                return true;
+                return false;
            },
-            op_fn_nop);
+
+            // If local var id used anywhere else, don't eliminate
+            [&](spv::Id& id) { 
+                if (fnLocalVars.count(id) > 0) {
+                    fnLocalVars.erase(id);
+                    idMap.erase(id);
+                }
+            }
+        );

        process(
            [&](spv::Op opCode, unsigned start) {
@ -777,12 +833,27 @@ namespace spv {
            },
            op_fn_nop);

+        // Chase replacements to their origins, in case there is a chain such as:
+        //   2 = store 1
+        //   3 = load 2
+        //   4 = store 3
+        //   5 = load 4
+        // We want to replace uses of 5 with 1.
+        for (const auto& idPair : idMap) {
+            spv::Id id = idPair.first;
+            while (idMap.find(id) != idMap.end())  // Chase to end of chain
+                id = idMap[id];
+
+            idMap[idPair.first] = id;              // replace with final result
+        }
+
        // Remove the load/store/variables for the ones we've discovered
        process(
            [&](spv::Op opCode, unsigned start) {
                if ((opCode == spv::OpLoad  && fnLocalVars.count(asId(start+3)) > 0) ||
                    (opCode == spv::OpStore && fnLocalVars.count(asId(start+1)) > 0) ||
                    (opCode == spv::OpVariable && fnLocalVars.count(asId(start+2)) > 0)) {
+
                    stripInst(start);
                    return true;
                }
@ -790,7 +861,9 @@ namespace spv {
                return false;
            },

-            [&](spv::Id& id) { if (idMap.find(id) != idMap.end()) id = idMap[id]; }
+            [&](spv::Id& id) {
+                if (idMap.find(id) != idMap.end()) id = idMap[id];
+            }
        );

        strip();          // strip out data we decided to eliminate
--- a/SPIRV/SPVRemapper.h
+++ b/SPIRV/SPVRemapper.h
@ -131,6 +131,7 @@ private:
   // Local to global, or global to local ID map
   typedef std::unordered_map<spv::Id, spv::Id> idmap_t;
   typedef std::unordered_set<spv::Id>          idset_t;
+   typedef std::unordered_map<spv::Id, int>     blockmap_t;

   void remap(std::uint32_t opts = DO_EVERYTHING);

@ -164,8 +165,7 @@ private:
   bool    isConstOp(spv::Op opCode)       const;
   bool    isTypeOp(spv::Op opCode)        const;
   bool    isStripOp(spv::Op opCode)       const;
-   bool    isFlowCtrlOpen(spv::Op opCode)  const;
-   bool    isFlowCtrlClose(spv::Op opCode) const;
+   bool    isFlowCtrl(spv::Op opCode)      const;
   range_t literalRange(spv::Op opCode)    const;
   range_t typeRange(spv::Op opCode)       const;
   range_t constRange(spv::Op opCode)      const;
--- a/SPIRV/SpvBuilder.cpp
+++ b/SPIRV/SpvBuilder.cpp
--- a/SPIRV/SpvBuilder.h
+++ b/SPIRV/SpvBuilder.h
@ -1,5 +1,6 @@
 //
-// Copyright (C) 2014 LunarG, Inc.
+//Copyright (C) 2014-2015 LunarG, Inc.
+//Copyright (C) 2015-2016 Google, Inc.
 //
 // All rights reserved.
 //
@ -53,6 +54,8 @@

 #include <algorithm>
 #include <map>
+#include <memory>
+#include <set>
 #include <stack>

 namespace spv {
@ -77,7 +80,7 @@ public:
        memoryModel = mem;
    }

-    void addCapability(spv::Capability cap) { capabilities.push_back(cap); }
+    void addCapability(spv::Capability cap) { capabilities.insert(cap); }
    // To get a new <id> for anything needing a new one.
    Id getUniqueId() { return ++uniqueId; }
    // To get a set of new <id>s, e.g., for a set of function parameters
@ -96,13 +99,13 @@ public:
    Id makeIntType(int width) { return makeIntegerType(width, true); }
    Id makeUintType(int width) { return makeIntegerType(width, false); }
    Id makeFloatType(int width);
-    Id makeStructType(std::vector<Id>& members, const char*);
+    Id makeStructType(const std::vector<Id>& members, const char*);
    Id makeStructResultType(Id type0, Id type1);
    Id makeVectorType(Id component, int size);
    Id makeMatrixType(Id component, int cols, int rows);
-    Id makeArrayType(Id element, unsigned size, int stride);  // 0 means no stride decoration
+    Id makeArrayType(Id element, Id sizeId, int stride);  // 0 stride means no stride decoration
    Id makeRuntimeArray(Id element);
-    Id makeFunctionType(Id returnType, std::vector<Id>& paramTypes);
+    Id makeFunctionType(Id returnType, const std::vector<Id>& paramTypes);
    Id makeImageType(Id sampledType, Dim, bool depth, bool arrayed, bool ms, unsigned sampled,
                     ImageFormat format);
    Id makeSamplerType();
@ -121,6 +124,11 @@ public:
    Id getContainedTypeId(Id typeId) const;
    Id getContainedTypeId(Id typeId, int) const;
    StorageClass getTypeStorageClass(Id typeId) const { return module.getStorageClass(typeId); }
+    ImageFormat getImageTypeFormat(Id typeId) const
+    {
+        return (ImageFormat)module.getInstruction(typeId)->getImmediateOperand(6);
+    }
+
    bool isPointer(Id resultId) const { return isPointerType(getTypeId(resultId)); }
    bool isScalar(Id resultId) const { return isScalarType(getTypeId(resultId)); }
    bool isVector(Id resultId) const { return isVectorType(getTypeId(resultId)); }
@ -201,7 +209,7 @@ public:
    Id makeDoubleConstant(double d, bool specConstant = false);

    // Turn the array of constants into a proper spv constant of the requested type.
-    Id makeCompositeConstant(Id type, std::vector<Id>& comps);
+    Id makeCompositeConstant(Id type, std::vector<Id>& comps, bool specConst = false);

    // Methods for adding information outside the CFG.
    Instruction* addEntryPoint(ExecutionModel, Function*, const char* name);
@ -216,13 +224,16 @@ public:
    // At the end of what block do the next create*() instructions go?
    void setBuildPoint(Block* bp) { buildPoint = bp; }
    Block* getBuildPoint() const { return buildPoint; }
-    // Make the main function.
+    // Make the main function. The returned pointer is only valid
+    // for the lifetime of this builder.
    Function* makeMain();

    // Make a shader-style function, and create its entry block if entry is non-zero.
    // Return the function, pass back the entry.
-    Function* makeFunctionEntry(Id returnType, const char* name, std::vector<Id>& paramTypes,
-                                Block** entry = 0);
+    // The returned pointer is only valid for the lifetime of this builder.
+    Function* makeFunctionEntry(Decoration precision, Id returnType, const char* name,
+                                const std::vector<Id>& paramTypes,
+                                const std::vector<Decoration>& precisions, Block** entry = 0);

    // Create a return. An 'implicit' return is one not appearing in the source
    // code.  In the case of an implicit return, no post-return block is inserted.
@ -237,7 +248,7 @@ public:
    // Create a global or function local or IO variable.
    Id createVariable(StorageClass, Id type, const char* name = 0);

-    // Create an imtermediate with an undefined value.
+    // Create an intermediate with an undefined value.
    Id createUndefined(Id type);

    // Store into an Id and return the l-value
@ -274,7 +285,8 @@ public:

    // Take an rvalue (source) and a set of channels to extract from it to
    // make a new rvalue, which is returned.
-    Id createRvalueSwizzle(Id typeId, Id source, std::vector<unsigned>& channels);
+    Id createRvalueSwizzle(Decoration precision, Id typeId, Id source,
+                           std::vector<unsigned>& channels);

    // Take a copy of an lvalue (target) and a source of components, and set the
    // source components into the lvalue where the 'channels' say to put them.
@ -282,13 +294,15 @@ public:
    // (No true lvalue or stores are used.)
    Id createLvalueSwizzle(Id typeId, Id target, Id source, std::vector<unsigned>& channels);

-    // If the value passed in is an instruction and the precision is not NoPrecision,
-    // it gets tagged with the requested precision.
-    void setPrecision(Id /* value */, Decoration precision)
+    // If both the id and precision are valid, the id
+    // gets tagged with the requested precision.
+    // The passed in id is always the returned id, to simplify use patterns.
+    Id setPrecision(Id id, Decoration precision)
    {
-        if (precision != NoPrecision) {
-            ;  // TODO
-        }
+        if (precision != NoPrecision && id != NoResult)
+            addDecoration(id, precision);
+
+        return id;
    }

    // Can smear a scalar to a vector for the following forms:
@ -312,8 +326,7 @@ public:
    Id smearScalar(Decoration precision, Id scalarVal, Id vectorType);

    // Create a call to a built-in function.
-    Id createBuiltinCall(Decoration precision, Id resultType, Id builtins, int entryPoint,
-                         std::vector<Id>& args);
+    Id createBuiltinCall(Id resultType, Id builtins, int entryPoint, std::vector<Id>& args);

    // List of parameters used to create a texture operation
    struct TextureParameters {
@ -328,11 +341,13 @@ public:
        Id gradY;
        Id sample;
        Id comp;
+        Id texelOut;
+        Id lodClamp;
    };

    // Select the correct texture operation based on all inputs, and emit the correct instruction
-    Id createTextureCall(Decoration precision, Id resultType, bool fetch, bool proj, bool gather,
-                         const TextureParameters&);
+    Id createTextureCall(Decoration precision, Id resultType, bool sparse, bool fetch, bool proj,
+                         bool gather, bool noImplicit, const TextureParameters&);

    // Emit the OpTextureQuery* instruction that was passed in.
    // Figure out the right return value and type, and return it.
@ -343,7 +358,7 @@ public:
    Id createBitFieldExtractCall(Decoration precision, Id, Id, Id, bool isSigned);
    Id createBitFieldInsertCall(Decoration precision, Id, Id, Id, Id);

-    // Reduction comparision for composites:  For equal and not-equal resulting in a scalar.
+    // Reduction comparison for composites:  For equal and not-equal resulting in a scalar.
    Id createCompositeCompare(Decoration precision, Id, Id,
                              bool /* true if for equal, false if for not-equal */);

@ -403,28 +418,24 @@ public:
    // Finish off the innermost switch.
    void endSwitch(std::vector<Block*>& segmentBB);

-    // Start the beginning of a new loop, and prepare the builder to
-    // generate code for the loop test.
-    // The loopTestFirst parameter is true when the loop test executes before
-    // the body.  (It is false for do-while loops.)
-    void makeNewLoop(bool loopTestFirst);
+    struct LoopBlocks {
+        Block &head, &body, &merge, &continue_target;
+    };

-    // Add the branch for the loop test, based on the given condition.
-    // The true branch goes to the first block in the loop body, and
-    // the false branch goes to the loop's merge block.  The builder insertion
-    // point will be placed at the start of the body.
-    void createLoopTestBranch(Id condition);
+    // Start a new loop and prepare the builder to generate code for it.  Until
+    // closeLoop() is called for this loop, createLoopContinue() and
+    // createLoopExit() will target its corresponding blocks.
+    LoopBlocks& makeNewLoop();

-    // Generate an unconditional branch to the loop body.  The builder insertion
-    // point will be placed at the start of the body.  Use this when there is
-    // no loop test.
-    void createBranchToBody();
+    // Create a new block in the function containing the build point.  Memory is
+    // owned by the function object.
+    Block& makeNewBlock();

-    // Add a branch to the test of the current (innermost) loop.
-    // The way we generate code, that's also the loop header.
+    // Add a branch to the continue_target of the current (innermost) loop.
    void createLoopContinue();

-    // Add an exit (e.g. "break") for the innermost loop that you're in
+    // Add an exit (e.g. "break") from the innermost loop that we're currently
+    // in.
    void createLoopExit();

    // Close the innermost loop that you're in
@ -515,13 +526,21 @@ public:
    void accessChainStore(Id rvalue);

    // use accessChain and swizzle to load an r-value
-    Id accessChainLoad(Id ResultType);
+    Id accessChainLoad(Decoration precision, Id ResultType);

    // get the direct pointer for an l-value
    Id accessChainGetLValue();

+    // Get the inferred SPIR-V type of the result of the current access chain,
+    // based on the type of the base and the chain of dereferences.
+    Id accessChainGetInferredType();
+
    void dump(std::vector<unsigned int>&) const;

+    void createBranch(Block* block);
+    void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock);
+    void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control);
+
 protected:
    Id makeIntConstant(Id typeId, unsigned value, bool specConstant);
    Id findScalarConstant(Op typeClass, Op opcode, Id typeId, unsigned value) const;
@ -531,21 +550,16 @@ protected:
    void transferAccessChainSwizzle(bool dynamic);
    void simplifyAccessChainSwizzle();
    void createAndSetNoPredecessorBlock(const char*);
-    void createBranch(Block* block);
    void createSelectionMerge(Block* mergeBlock, unsigned int control);
-    void createLoopMerge(Block* mergeBlock, Block* continueBlock, unsigned int control);
-    void createConditionalBranch(Id condition, Block* thenBlock, Block* elseBlock);
-    void dumpInstructions(std::vector<unsigned int>&, const std::vector<Instruction*>&) const;
-
-    struct Loop;  // Defined below.
-    void createBranchToLoopHeaderFromInside(const Loop& loop);
+    void dumpInstructions(std::vector<unsigned int>&,
+                          const std::vector<std::unique_ptr<Instruction> >&) const;

    SourceLanguage source;
    int sourceVersion;
    std::vector<const char*> extensions;
    AddressingModel addressModel;
    MemoryModel memoryModel;
-    std::vector<spv::Capability> capabilities;
+    std::set<spv::Capability> capabilities;
    int builderNumber;
    Module module;
    Block* buildPoint;
@ -554,14 +568,15 @@ protected:
    AccessChain accessChain;

    // special blocks of instructions for output
-    std::vector<Instruction*> imports;
-    std::vector<Instruction*> entryPoints;
-    std::vector<Instruction*> executionModes;
-    std::vector<Instruction*> names;
-    std::vector<Instruction*> lines;
-    std::vector<Instruction*> decorations;
-    std::vector<Instruction*> constantsTypesGlobals;
-    std::vector<Instruction*> externals;
+    std::vector<std::unique_ptr<Instruction> > imports;
+    std::vector<std::unique_ptr<Instruction> > entryPoints;
+    std::vector<std::unique_ptr<Instruction> > executionModes;
+    std::vector<std::unique_ptr<Instruction> > names;
+    std::vector<std::unique_ptr<Instruction> > lines;
+    std::vector<std::unique_ptr<Instruction> > decorations;
+    std::vector<std::unique_ptr<Instruction> > constantsTypesGlobals;
+    std::vector<std::unique_ptr<Instruction> > externals;
+    std::vector<std::unique_ptr<Function> > functions;

    // not output, internally used for quick & dirty canonical (unique) creation
    std::vector<Instruction*> groupedConstants[OpConstant];  // all types appear before OpConstant
@ -570,47 +585,8 @@ protected:
    // stack of switches
    std::stack<Block*> switchMerges;

-    // Data that needs to be kept in order to properly handle loops.
-    struct Loop {
-        // Constructs a default Loop structure containing new header, merge, and
-        // body blocks for the current function.
-        // The testFirst argument indicates whether the loop test executes at
-        // the top of the loop rather than at the bottom.  In the latter case,
-        // also create a phi instruction whose value indicates whether we're on
-        // the first iteration of the loop.  The phi instruction is initialized
-        // with no values or predecessor operands.
-        Loop(Builder& builder, bool testFirst);
-
-        // The function containing the loop.
-        Function* const function;
-        // The header is the first block generated for the loop.
-        // It dominates all the blocks in the loop, i.e. it is always
-        // executed before any others.
-        // If the loop test is executed before the body (as in "while" and
-        // "for" loops), then the header begins with the test code.
-        // Otherwise, the loop is a "do-while" loop and the header contains the
-        // start of the body of the loop (if the body exists).
-        Block* const header;
-        // The merge block marks the end of the loop.  Control is transferred
-        // to the merge block when either the loop test fails, or when a
-        // nested "break" is encountered.
-        Block* const merge;
-        // The body block is the first basic block in the body of the loop, i.e.
-        // the code that is to be repeatedly executed, aside from loop control.
-        // This member is null until we generate code that references the loop
-        // body block.
-        Block* const body;
-        // True when the loop test executes before the body.
-        const bool testFirst;
-        // When the test executes after the body, this is defined as the phi
-        // instruction that tells us whether we are on the first iteration of
-        // the loop.  Otherwise this is null. This is non-const because
-        // it has to be initialized outside of the initializer-list.
-        Instruction* isFirstIteration;
-    };
-
    // Our loop stack.
-    std::stack<Loop> loops;
+    std::stack<LoopBlocks> loops;
 };  // end Builder class

 // Use for non-fatal notes about what's not complete
--- a/SPIRV/disassemble.cpp
+++ b/SPIRV/disassemble.cpp
@ -59,7 +59,7 @@ const char* GlslStd450DebugNames[spv::GLSLstd450Count];

 namespace spv {

-void Kill(std::ostream& out, const char* message)
+static void Kill(std::ostream& out, const char* message)
 {
    out << std::endl << "Disassembly failed: " << message << std::endl;
    exit(1);
@ -473,6 +473,7 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode,
            else
                out << OperandClassParams[operandClass].getName(stream[word++]);
            --numOperands;
+
            break;
        }
    }
@ -480,7 +481,7 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode,
    return;
 }

-void GLSLstd450GetDebugNames(const char** names)
+static void GLSLstd450GetDebugNames(const char** names)
 {
    for (int i = 0; i < GLSLstd450Count; ++i)
        names[i] = "Unknown";
@ -531,7 +532,6 @@ void GLSLstd450GetDebugNames(const char** names)
    names[GLSLstd450SClamp]                  = "SClamp";
    names[GLSLstd450UClamp]                  = "UClamp";
    names[GLSLstd450FMix]                    = "FMix";
-    names[GLSLstd450IMix]                    = "IMix";
    names[GLSLstd450Step]                    = "Step";
    names[GLSLstd450SmoothStep]              = "SmoothStep";
    names[GLSLstd450Fma]                     = "Fma";
@ -568,6 +568,7 @@ void GLSLstd450GetDebugNames(const char** names)
 void Disassemble(std::ostream& out, const std::vector<unsigned int>& stream)
 {
    SpirvStream SpirvStream(out, stream);
+    spv::Parameterize();
    GLSLstd450GetDebugNames(GlslStd450DebugNames);
    SpirvStream.validate();
    SpirvStream.processInstructions();
--- a/SPIRV/doc.cpp
+++ b/SPIRV/doc.cpp
@ -712,7 +712,7 @@ const char* KernelProfilingInfoString(int info)
    }
 }

-const int CapabilityCeiling = 57;
+const int CapabilityCeiling = 58;

 const char* CapabilityString(int info)
 {
@ -775,6 +775,7 @@ const char* CapabilityString(int info)
    case 54: return "GeometryStreams";
    case 55: return "StorageImageReadWithoutFormat";
    case 56: return "StorageImageWriteWithoutFormat";
+    case 57: return "MultiViewport";

    case CapabilityCeiling:
    default: return "Bad";
@ -1104,6 +1105,7 @@ const char* OpcodeString(int op)
    case 317: return "OpNoLine";
    case 318: return "OpAtomicFlagTestAndSet";
    case 319: return "OpAtomicFlagClear";
+    case 320: return "OpImageSparseRead";

    case OpcodeCeiling:
    default:
@ -1311,7 +1313,6 @@ void Parameterize()
    CapabilityParams[CapabilityTessellation].caps.push_back(CapabilityShader);
    CapabilityParams[CapabilityVector16].caps.push_back(CapabilityKernel);
    CapabilityParams[CapabilityFloat16Buffer].caps.push_back(CapabilityKernel);
-    CapabilityParams[CapabilityFloat16].caps.push_back(CapabilityFloat16Buffer);
    CapabilityParams[CapabilityInt64Atomics].caps.push_back(CapabilityInt64);
    CapabilityParams[CapabilityImageBasic].caps.push_back(CapabilityKernel);
    CapabilityParams[CapabilityImageReadWrite].caps.push_back(CapabilityImageBasic);
@ -1353,6 +1354,7 @@ void Parameterize()
    CapabilityParams[CapabilityGeometryStreams].caps.push_back(CapabilityGeometry);
    CapabilityParams[CapabilityStorageImageReadWithoutFormat].caps.push_back(CapabilityShader);
    CapabilityParams[CapabilityStorageImageWriteWithoutFormat].caps.push_back(CapabilityShader);
+    CapabilityParams[CapabilityMultiViewport].caps.push_back(CapabilityGeometry);

    AddressingParams[AddressingModelPhysical32].caps.push_back(CapabilityAddresses);
    AddressingParams[AddressingModelPhysical64].caps.push_back(CapabilityAddresses);
@ -1362,7 +1364,7 @@ void Parameterize()
    MemoryParams[MemoryModelOpenCL].caps.push_back(CapabilityKernel);

    MemorySemanticsParams[MemorySemanticsUniformMemoryShift].caps.push_back(CapabilityShader);
-    MemorySemanticsParams[MemorySemanticsAtomicCounterMemoryShift].caps.push_back(CapabilityShader);
+    MemorySemanticsParams[MemorySemanticsAtomicCounterMemoryShift].caps.push_back(CapabilityAtomicStorage);

    ExecutionModelParams[ExecutionModelVertex].caps.push_back(CapabilityShader);
    ExecutionModelParams[ExecutionModelTessellationControl].caps.push_back(CapabilityTessellation);
@ -1528,7 +1530,7 @@ void Parameterize()
    DecorationParams[DecorationFlat].caps.push_back(CapabilityShader);
    DecorationParams[DecorationPatch].caps.push_back(CapabilityTessellation);
    DecorationParams[DecorationCentroid].caps.push_back(CapabilityShader);
-    DecorationParams[DecorationSample].caps.push_back(CapabilityShader);
+    DecorationParams[DecorationSample].caps.push_back(CapabilitySampleRateShading);
    DecorationParams[DecorationInvariant].caps.push_back(CapabilityShader);
    DecorationParams[DecorationConstant].caps.push_back(CapabilityKernel);
    DecorationParams[DecorationUniform].caps.push_back(CapabilityShader);
@ -1537,14 +1539,14 @@ void Parameterize()
    DecorationParams[DecorationStream].caps.push_back(CapabilityGeometryStreams);
    DecorationParams[DecorationLocation].caps.push_back(CapabilityShader);
    DecorationParams[DecorationComponent].caps.push_back(CapabilityShader);
+    DecorationParams[DecorationOffset].caps.push_back(CapabilityShader);
    DecorationParams[DecorationIndex].caps.push_back(CapabilityShader);
    DecorationParams[DecorationBinding].caps.push_back(CapabilityShader);
    DecorationParams[DecorationDescriptorSet].caps.push_back(CapabilityShader);
    DecorationParams[DecorationXfbBuffer].caps.push_back(CapabilityTransformFeedback);
    DecorationParams[DecorationXfbStride].caps.push_back(CapabilityTransformFeedback);
    DecorationParams[DecorationArrayStride].caps.push_back(CapabilityShader);
-    DecorationParams[DecorationMatrixStride].caps.push_back(CapabilityShader);
-    DecorationParams[DecorationBuiltIn].caps.push_back(CapabilityShader);
+    DecorationParams[DecorationMatrixStride].caps.push_back(CapabilityMatrix);
    DecorationParams[DecorationFuncParamAttr].caps.push_back(CapabilityKernel);
    DecorationParams[DecorationFPRoundingMode].caps.push_back(CapabilityKernel);
    DecorationParams[DecorationFPFastMathMode].caps.push_back(CapabilityKernel);
@ -1556,8 +1558,8 @@ void Parameterize()

    BuiltInParams[BuiltInPosition].caps.push_back(CapabilityShader);
    BuiltInParams[BuiltInPointSize].caps.push_back(CapabilityShader);
-    BuiltInParams[BuiltInClipDistance].caps.push_back(CapabilityShader);
-    BuiltInParams[BuiltInCullDistance].caps.push_back(CapabilityShader);
+    BuiltInParams[BuiltInClipDistance].caps.push_back(CapabilityClipDistance);
+    BuiltInParams[BuiltInCullDistance].caps.push_back(CapabilityCullDistance);

    BuiltInParams[BuiltInVertexId].caps.push_back(CapabilityShader);
    BuiltInParams[BuiltInVertexId].desc = "Vertex ID, which takes on values 0, 1, 2, . . . .";
@ -1576,7 +1578,7 @@ void Parameterize()
    BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityGeometry);
    BuiltInParams[BuiltInInvocationId].caps.push_back(CapabilityTessellation);
    BuiltInParams[BuiltInLayer].caps.push_back(CapabilityGeometry);
-    BuiltInParams[BuiltInViewportIndex].caps.push_back(CapabilityGeometry);
+    BuiltInParams[BuiltInViewportIndex].caps.push_back(CapabilityMultiViewport);
    BuiltInParams[BuiltInTessLevelOuter].caps.push_back(CapabilityTessellation);
    BuiltInParams[BuiltInTessLevelInner].caps.push_back(CapabilityTessellation);
    BuiltInParams[BuiltInTessCoord].caps.push_back(CapabilityTessellation);
@ -1584,9 +1586,9 @@ void Parameterize()
    BuiltInParams[BuiltInFragCoord].caps.push_back(CapabilityShader);
    BuiltInParams[BuiltInPointCoord].caps.push_back(CapabilityShader);
    BuiltInParams[BuiltInFrontFacing].caps.push_back(CapabilityShader);
-    BuiltInParams[BuiltInSampleId].caps.push_back(CapabilityShader);
-    BuiltInParams[BuiltInSamplePosition].caps.push_back(CapabilityShader);
-    BuiltInParams[BuiltInSampleMask].caps.push_back(CapabilityShader);
+    BuiltInParams[BuiltInSampleId].caps.push_back(CapabilitySampleRateShading);
+    BuiltInParams[BuiltInSamplePosition].caps.push_back(CapabilitySampleRateShading);
+    BuiltInParams[BuiltInSampleMask].caps.push_back(CapabilitySampleRateShading);
    BuiltInParams[BuiltInFragDepth].caps.push_back(CapabilityShader);
    BuiltInParams[BuiltInHelperInvocation].caps.push_back(CapabilityShader);
    BuiltInParams[BuiltInWorkDim].caps.push_back(CapabilityKernel);
@ -1962,6 +1964,12 @@ void Parameterize()
    InstructionDesc[OpImageSparseDrefGather].operands.push(OperandVariableIds, "", true);
    InstructionDesc[OpImageSparseDrefGather].capabilities.push_back(CapabilitySparseResidency);

+    InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Image'");
+    InstructionDesc[OpImageSparseRead].operands.push(OperandId, "'Coordinate'");
+    InstructionDesc[OpImageSparseRead].operands.push(OperandImageOperands, "", true);
+    InstructionDesc[OpImageSparseRead].operands.push(OperandVariableIds, "", true);
+    InstructionDesc[OpImageSparseRead].capabilities.push_back(CapabilitySparseResidency);
+
    InstructionDesc[OpImageSparseTexelsResident].operands.push(OperandId, "'Resident Code'");
    InstructionDesc[OpImageSparseTexelsResident].capabilities.push_back(CapabilitySparseResidency);

--- a/SPIRV/doc.h
+++ b/SPIRV/doc.h
@ -67,6 +67,8 @@ const char* SamplerFilterModeString(int);
 const char* ImageFormatString(int);
 const char* ImageChannelOrderString(int);
 const char* ImageChannelTypeString(int);
+const char* ImageChannelDataTypeString(int type);
+const char* ImageOperandsString(int format);
 const char* ImageOperands(int);
 const char* FPFastMathString(int);
 const char* FPRoundingModeString(int);
@ -81,6 +83,7 @@ const char* KernelEnqueueFlagsString(int);
 const char* KernelProfilingInfoString(int);
 const char* CapabilityString(int);
 const char* OpcodeString(int);
+const char* ScopeString(int mem);

 // For grouping opcodes into subsections
 enum OpcodeClass {
@ -243,7 +246,7 @@ protected:
    int resultPresent : 1;
 };

-const int OpcodeCeiling = 320;
+const int OpcodeCeiling = 321;

 // The set of objects that hold all the instruction/operand
 // parameterization information.
--- a/SPIRV/spirv.hpp
+++ b/SPIRV/spirv.hpp
@ -1,4 +1,4 @@
-// Copyright (c) 2014-2015 The Khronos Group Inc.
+// Copyright (c) 2014-2016 The Khronos Group Inc.
 // 
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and/or associated documentation files (the "Materials"),
@ -39,19 +39,19 @@
 // "Mask" in their name, and a parallel enum that has the shift
 // amount (1 << x) for each corresponding enumerant.

-#ifndef spirv_H
-#define spirv_H
+#ifndef spirv_HPP
+#define spirv_HPP

 namespace spv {

 typedef unsigned int Id;

-#define SPV_VERSION 10000
-#define SPV_REVISION 2
+#define SPV_VERSION 0x10000
+#define SPV_REVISION 3

 static const unsigned int MagicNumber = 0x07230203;
 static const unsigned int Version = 0x00010000;
-static const unsigned int Revision = 2;
+static const unsigned int Revision = 3;
 static const unsigned int OpCodeMask = 0xffff;
 static const unsigned int WordCountShift = 16;

@ -563,6 +563,7 @@ enum Capability {
    CapabilityGeometryStreams = 54,
    CapabilityStorageImageReadWithoutFormat = 55,
    CapabilityStorageImageWriteWithoutFormat = 56,
+    CapabilityMultiViewport = 57,
 };

 enum Op {
@ -859,6 +860,7 @@ enum Op {
    OpNoLine = 317,
    OpAtomicFlagTestAndSet = 318,
    OpAtomicFlagClear = 319,
+    OpImageSparseRead = 320,
 };

 // Overload operator| for mask bit combining
@ -874,5 +876,4 @@ inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfil

 }  // end namespace spv

-#endif  // #ifndef spirv_H
-
+#endif  // #ifndef spirv_HPP
--- a/SPIRV/spvIR.h
+++ b/SPIRV/spvIR.h
@ -52,12 +52,16 @@

 #include "spirv.hpp"

-#include <vector>
+#include <algorithm>
+#include <cassert>
+#include <functional>
 #include <iostream>
-#include <assert.h>
+#include <memory>
+#include <vector>

 namespace spv {

+class Block;
 class Function;
 class Module;

@ -66,7 +70,17 @@ const Id NoType = 0;

 const unsigned int BadValue = 0xFFFFFFFF;
 const Decoration NoPrecision = (Decoration)BadValue;
-const MemorySemanticsMask MemorySemanticsAllMemory = (MemorySemanticsMask)0x3FF;
+const MemorySemanticsMask MemorySemanticsAllMemory = 
+                (MemorySemanticsMask)(MemorySemanticsAcquireMask |
+                                      MemorySemanticsReleaseMask |
+                                      MemorySemanticsAcquireReleaseMask |
+                                      MemorySemanticsSequentiallyConsistentMask |
+                                      MemorySemanticsUniformMemoryMask |
+                                      MemorySemanticsSubgroupMemoryMask |
+                                      MemorySemanticsWorkgroupMemoryMask |
+                                      MemorySemanticsCrossWorkgroupMemoryMask |
+                                      MemorySemanticsAtomicCounterMemoryMask |
+                                      MemorySemanticsImageMemoryMask);

 //
 // SPIR-V IR instruction.
@ -74,8 +88,8 @@ const MemorySemanticsMask MemorySemanticsAllMemory = (MemorySemanticsMask)0x3FF;

 class Instruction {
 public:
-    Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode) { }
-    explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode) { }
+    Instruction(Id resultId, Id typeId, Op opCode) : resultId(resultId), typeId(typeId), opCode(opCode), block(nullptr) { }
+    explicit Instruction(Op opCode) : resultId(NoResult), typeId(NoType), opCode(opCode), block(nullptr) { }
    virtual ~Instruction() {}
    void addIdOperand(Id id) { operands.push_back(id); }
    void addImmediateOperand(unsigned int immediate) { operands.push_back(immediate); }
@ -106,6 +120,8 @@ public:
            addImmediateOperand(word);
        }
    }
+    void setBlock(Block* b) { block = b; }
+    Block* getBlock() const { return block; }
    Op getOpCode() const { return opCode; }
    int getNumOperands() const { return (int)operands.size(); }
    Id getResultId() const { return resultId; }
@ -144,6 +160,7 @@ protected:
    Op opCode;
    std::vector<Id> operands;
    std::string originalString;        // could be optimized away; convenience for getting string operand
+    Block* block;
 };

 //
@ -155,18 +172,31 @@ public:
    Block(Id id, Function& parent);
    virtual ~Block()
    {
-        // TODO: free instructions
    }
-    
+
    Id getId() { return instructions.front()->getResultId(); }

    Function& getParent() const { return parent; }
-    void addInstruction(Instruction* inst);
-    void addPredecessor(Block* pred) { predecessors.push_back(pred); }
-    void addLocalVariable(Instruction* inst) { localVariables.push_back(inst); }
-    int getNumPredecessors() const { return (int)predecessors.size(); }
+    void addInstruction(std::unique_ptr<Instruction> inst);
+    void addPredecessor(Block* pred) { predecessors.push_back(pred); pred->successors.push_back(this);}
+    void addLocalVariable(std::unique_ptr<Instruction> inst) { localVariables.push_back(std::move(inst)); }
+    const std::vector<Block*>& getPredecessors() const { return predecessors; }
+    const std::vector<Block*>& getSuccessors() const { return successors; }
    void setUnreachable() { unreachable = true; }
    bool isUnreachable() const { return unreachable; }
+    // Returns the block's merge instruction, if one exists (otherwise null).
+    const Instruction* getMergeInstruction() const {
+        if (instructions.size() < 2) return nullptr;
+        const Instruction* nextToLast = (instructions.cend() - 2)->get();
+        switch (nextToLast->getOpCode()) {
+            case OpSelectionMerge:
+            case OpLoopMerge:
+                return nextToLast;
+            default:
+                return nullptr;
+        }
+        return nullptr;
+    }

    bool isTerminated() const
    {
@ -185,12 +215,6 @@ public:

    void dump(std::vector<unsigned int>& out) const
    {
-        // skip the degenerate unreachable blocks
-        // TODO: code gen: skip all unreachable blocks (transitive closure)
-        //                 (but, until that's done safer to keep non-degenerate unreachable blocks, in case others depend on something)
-        if (unreachable && instructions.size() <= 2)
-            return;
-
        instructions[0]->dump(out);
        for (int i = 0; i < (int)localVariables.size(); ++i)
            localVariables[i]->dump(out);
@ -205,9 +229,9 @@ protected:
    // To enforce keeping parent and ownership in sync:
    friend Function;

-    std::vector<Instruction*> instructions;
-    std::vector<Block*> predecessors;
-    std::vector<Instruction*> localVariables;
+    std::vector<std::unique_ptr<Instruction> > instructions;
+    std::vector<Block*> predecessors, successors;
+    std::vector<std::unique_ptr<Instruction> > localVariables;
    Function& parent;

    // track whether this block is known to be uncreachable (not necessarily 
@ -216,6 +240,11 @@ protected:
    bool unreachable;
 };

+// Traverses the control-flow graph rooted at root in an order suited for
+// readable code generation.  Invokes callback at every node in the traversal
+// order.
+void inReadableOrder(Block* root, std::function<void(Block*)> callback);
+
 //
 // SPIR-V IR Function.
 //
@ -235,12 +264,18 @@ public:
    Id getParamId(int p) { return parameterInstructions[p]->getResultId(); }

    void addBlock(Block* block) { blocks.push_back(block); }
-    void popBlock(Block*) { blocks.pop_back(); }
+    void removeBlock(Block* block)
+    {
+        auto found = find(blocks.begin(), blocks.end(), block);
+        assert(found != blocks.end());
+        blocks.erase(found);
+        delete block;
+    }

    Module& getParent() const { return parent; }
    Block* getEntryBlock() const { return blocks.front(); }
    Block* getLastBlock() const { return blocks.back(); }
-    void addLocalVariable(Instruction* inst);
+    void addLocalVariable(std::unique_ptr<Instruction> inst);
    Id getReturnType() const { return functionInstruction.getTypeId(); }
    void dump(std::vector<unsigned int>& out) const
    {
@ -252,8 +287,7 @@ public:
            parameterInstructions[p]->dump(out);

        // Blocks
-        for (int b = 0; b < (int)blocks.size(); ++b)
-            blocks[b]->dump(out);
+        inReadableOrder(blocks[0], [&out](const Block* b) { b->dump(out); });
        Instruction end(0, 0, OpFunctionEnd);
        end.dump(out);
    }
@ -341,22 +375,27 @@ __inline Function::Function(Id id, Id resultType, Id functionType, Id firstParam
    }
 }

-__inline void Function::addLocalVariable(Instruction* inst)
+__inline void Function::addLocalVariable(std::unique_ptr<Instruction> inst)
 {
-    blocks[0]->addLocalVariable(inst);
-    parent.mapInstruction(inst);
+    Instruction* raw_instruction = inst.get();
+    blocks[0]->addLocalVariable(std::move(inst));
+    parent.mapInstruction(raw_instruction);
 }

 __inline Block::Block(Id id, Function& parent) : parent(parent), unreachable(false)
 {
-    instructions.push_back(new Instruction(id, NoType, OpLabel));
+    instructions.push_back(std::unique_ptr<Instruction>(new Instruction(id, NoType, OpLabel)));
+    instructions.back()->setBlock(this);
+    parent.getParent().mapInstruction(instructions.back().get());
 }

-__inline void Block::addInstruction(Instruction* inst)
+__inline void Block::addInstruction(std::unique_ptr<Instruction> inst)
 {
-    instructions.push_back(inst);
-    if (inst->getResultId())
-        parent.getParent().mapInstruction(inst);
+    Instruction* raw_instruction = inst.get();
+    instructions.push_back(std::move(inst));
+    raw_instruction->setBlock(this);
+    if (raw_instruction->getResultId())
+        parent.getParent().mapInstruction(raw_instruction);
 }

 };  // end spv namespace