Add-support-for-SPV_NV_compute_shader_derivatives

This commit is contained in:
Chao Chen 2018-09-19 11:40:45 -07:00
parent 9eada4b971
commit beae2251b7
14 changed files with 1257 additions and 154 deletions

View file

@ -122,6 +122,158 @@ TBuiltIns::~TBuiltIns()
//
void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvVersion)
{
//============================================================================
//
// Prototypes for built-in functions used repeatly by different shaders
//
//============================================================================
//
// Derivatives Functions.
//
TString derivatives (
"float dFdx(float p);"
"vec2 dFdx(vec2 p);"
"vec3 dFdx(vec3 p);"
"vec4 dFdx(vec4 p);"
"float dFdy(float p);"
"vec2 dFdy(vec2 p);"
"vec3 dFdy(vec3 p);"
"vec4 dFdy(vec4 p);"
"float fwidth(float p);"
"vec2 fwidth(vec2 p);"
"vec3 fwidth(vec3 p);"
"vec4 fwidth(vec4 p);"
);
TString derivativeControls (
"float dFdxFine(float p);"
"vec2 dFdxFine(vec2 p);"
"vec3 dFdxFine(vec3 p);"
"vec4 dFdxFine(vec4 p);"
"float dFdyFine(float p);"
"vec2 dFdyFine(vec2 p);"
"vec3 dFdyFine(vec3 p);"
"vec4 dFdyFine(vec4 p);"
"float fwidthFine(float p);"
"vec2 fwidthFine(vec2 p);"
"vec3 fwidthFine(vec3 p);"
"vec4 fwidthFine(vec4 p);"
"float dFdxCoarse(float p);"
"vec2 dFdxCoarse(vec2 p);"
"vec3 dFdxCoarse(vec3 p);"
"vec4 dFdxCoarse(vec4 p);"
"float dFdyCoarse(float p);"
"vec2 dFdyCoarse(vec2 p);"
"vec3 dFdyCoarse(vec3 p);"
"vec4 dFdyCoarse(vec4 p);"
"float fwidthCoarse(float p);"
"vec2 fwidthCoarse(vec2 p);"
"vec3 fwidthCoarse(vec3 p);"
"vec4 fwidthCoarse(vec4 p);"
);
TString derivativesAndControl16bits (
"float16_t dFdx(float16_t);"
"f16vec2 dFdx(f16vec2);"
"f16vec3 dFdx(f16vec3);"
"f16vec4 dFdx(f16vec4);"
"float16_t dFdy(float16_t);"
"f16vec2 dFdy(f16vec2);"
"f16vec3 dFdy(f16vec3);"
"f16vec4 dFdy(f16vec4);"
"float16_t dFdxFine(float16_t);"
"f16vec2 dFdxFine(f16vec2);"
"f16vec3 dFdxFine(f16vec3);"
"f16vec4 dFdxFine(f16vec4);"
"float16_t dFdyFine(float16_t);"
"f16vec2 dFdyFine(f16vec2);"
"f16vec3 dFdyFine(f16vec3);"
"f16vec4 dFdyFine(f16vec4);"
"float16_t dFdxCoarse(float16_t);"
"f16vec2 dFdxCoarse(f16vec2);"
"f16vec3 dFdxCoarse(f16vec3);"
"f16vec4 dFdxCoarse(f16vec4);"
"float16_t dFdyCoarse(float16_t);"
"f16vec2 dFdyCoarse(f16vec2);"
"f16vec3 dFdyCoarse(f16vec3);"
"f16vec4 dFdyCoarse(f16vec4);"
"float16_t fwidth(float16_t);"
"f16vec2 fwidth(f16vec2);"
"f16vec3 fwidth(f16vec3);"
"f16vec4 fwidth(f16vec4);"
"float16_t fwidthFine(float16_t);"
"f16vec2 fwidthFine(f16vec2);"
"f16vec3 fwidthFine(f16vec3);"
"f16vec4 fwidthFine(f16vec4);"
"float16_t fwidthCoarse(float16_t);"
"f16vec2 fwidthCoarse(f16vec2);"
"f16vec3 fwidthCoarse(f16vec3);"
"f16vec4 fwidthCoarse(f16vec4);"
);
TString derivativesAndControl64bits (
"float64_t dFdx(float64_t);"
"f64vec2 dFdx(f64vec2);"
"f64vec3 dFdx(f64vec3);"
"f64vec4 dFdx(f64vec4);"
"float64_t dFdy(float64_t);"
"f64vec2 dFdy(f64vec2);"
"f64vec3 dFdy(f64vec3);"
"f64vec4 dFdy(f64vec4);"
"float64_t dFdxFine(float64_t);"
"f64vec2 dFdxFine(f64vec2);"
"f64vec3 dFdxFine(f64vec3);"
"f64vec4 dFdxFine(f64vec4);"
"float64_t dFdyFine(float64_t);"
"f64vec2 dFdyFine(f64vec2);"
"f64vec3 dFdyFine(f64vec3);"
"f64vec4 dFdyFine(f64vec4);"
"float64_t dFdxCoarse(float64_t);"
"f64vec2 dFdxCoarse(f64vec2);"
"f64vec3 dFdxCoarse(f64vec3);"
"f64vec4 dFdxCoarse(f64vec4);"
"float64_t dFdyCoarse(float64_t);"
"f64vec2 dFdyCoarse(f64vec2);"
"f64vec3 dFdyCoarse(f64vec3);"
"f64vec4 dFdyCoarse(f64vec4);"
"float64_t fwidth(float64_t);"
"f64vec2 fwidth(f64vec2);"
"f64vec3 fwidth(f64vec3);"
"f64vec4 fwidth(f64vec4);"
"float64_t fwidthFine(float64_t);"
"f64vec2 fwidthFine(f64vec2);"
"f64vec3 fwidthFine(f64vec3);"
"f64vec4 fwidthFine(f64vec4);"
"float64_t fwidthCoarse(float64_t);"
"f64vec2 fwidthCoarse(f64vec2);"
"f64vec3 fwidthCoarse(f64vec3);"
"f64vec4 fwidthCoarse(f64vec4);"
);
//============================================================================
//
// Prototypes for built-in functions seen by both vertex and fragment shaders.
@ -4550,52 +4702,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
"\n");
}
if (profile != EEsProfile && version >= 450) {
stageBuiltins[EShLangFragment].append(derivativesAndControl64bits);
stageBuiltins[EShLangFragment].append(
"float64_t dFdx(float64_t);"
"f64vec2 dFdx(f64vec2);"
"f64vec3 dFdx(f64vec3);"
"f64vec4 dFdx(f64vec4);"
"float64_t dFdy(float64_t);"
"f64vec2 dFdy(f64vec2);"
"f64vec3 dFdy(f64vec3);"
"f64vec4 dFdy(f64vec4);"
"float64_t dFdxFine(float64_t);"
"f64vec2 dFdxFine(f64vec2);"
"f64vec3 dFdxFine(f64vec3);"
"f64vec4 dFdxFine(f64vec4);"
"float64_t dFdyFine(float64_t);"
"f64vec2 dFdyFine(f64vec2);"
"f64vec3 dFdyFine(f64vec3);"
"f64vec4 dFdyFine(f64vec4);"
"float64_t dFdxCoarse(float64_t);"
"f64vec2 dFdxCoarse(f64vec2);"
"f64vec3 dFdxCoarse(f64vec3);"
"f64vec4 dFdxCoarse(f64vec4);"
"float64_t dFdyCoarse(float64_t);"
"f64vec2 dFdyCoarse(f64vec2);"
"f64vec3 dFdyCoarse(f64vec3);"
"f64vec4 dFdyCoarse(f64vec4);"
"float64_t fwidth(float64_t);"
"f64vec2 fwidth(f64vec2);"
"f64vec3 fwidth(f64vec3);"
"f64vec4 fwidth(f64vec4);"
"float64_t fwidthFine(float64_t);"
"f64vec2 fwidthFine(f64vec2);"
"f64vec3 fwidthFine(f64vec3);"
"f64vec4 fwidthFine(f64vec4);"
"float64_t fwidthCoarse(float64_t);"
"f64vec2 fwidthCoarse(f64vec2);"
"f64vec3 fwidthCoarse(f64vec3);"
"f64vec4 fwidthCoarse(f64vec4);"
"float64_t interpolateAtCentroid(float64_t);"
"f64vec2 interpolateAtCentroid(f64vec2);"
"f64vec3 interpolateAtCentroid(f64vec3);"
@ -4784,61 +4892,13 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
"\n");
}
stageBuiltins[EShLangFragment].append(
"float dFdx(float p);"
"vec2 dFdx(vec2 p);"
"vec3 dFdx(vec3 p);"
"vec4 dFdx(vec4 p);"
"float dFdy(float p);"
"vec2 dFdy(vec2 p);"
"vec3 dFdy(vec3 p);"
"vec4 dFdy(vec4 p);"
"float fwidth(float p);"
"vec2 fwidth(vec2 p);"
"vec3 fwidth(vec3 p);"
"vec4 fwidth(vec4 p);"
"\n");
stageBuiltins[EShLangFragment].append(derivatives);
stageBuiltins[EShLangFragment].append("\n");
// GL_ARB_derivative_control
if (profile != EEsProfile && version >= 400) {
stageBuiltins[EShLangFragment].append(
"float dFdxFine(float p);"
"vec2 dFdxFine(vec2 p);"
"vec3 dFdxFine(vec3 p);"
"vec4 dFdxFine(vec4 p);"
"float dFdyFine(float p);"
"vec2 dFdyFine(vec2 p);"
"vec3 dFdyFine(vec3 p);"
"vec4 dFdyFine(vec4 p);"
"float fwidthFine(float p);"
"vec2 fwidthFine(vec2 p);"
"vec3 fwidthFine(vec3 p);"
"vec4 fwidthFine(vec4 p);"
"\n");
stageBuiltins[EShLangFragment].append(
"float dFdxCoarse(float p);"
"vec2 dFdxCoarse(vec2 p);"
"vec3 dFdxCoarse(vec3 p);"
"vec4 dFdxCoarse(vec4 p);"
"float dFdyCoarse(float p);"
"vec2 dFdyCoarse(vec2 p);"
"vec3 dFdyCoarse(vec3 p);"
"vec4 dFdyCoarse(vec4 p);"
"float fwidthCoarse(float p);"
"vec2 fwidthCoarse(vec2 p);"
"vec3 fwidthCoarse(vec3 p);"
"vec4 fwidthCoarse(vec4 p);"
"\n");
stageBuiltins[EShLangFragment].append(derivativeControls);
stageBuiltins[EShLangFragment].append("\n");
}
// GL_OES_shader_multisample_interpolation
@ -4892,52 +4952,10 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
// GL_AMD_gpu_shader_half_float
if (profile != EEsProfile && version >= 450) {
stageBuiltins[EShLangFragment].append(derivativesAndControl16bits);
stageBuiltins[EShLangFragment].append("\n");
stageBuiltins[EShLangFragment].append(
"float16_t dFdx(float16_t);"
"f16vec2 dFdx(f16vec2);"
"f16vec3 dFdx(f16vec3);"
"f16vec4 dFdx(f16vec4);"
"float16_t dFdy(float16_t);"
"f16vec2 dFdy(f16vec2);"
"f16vec3 dFdy(f16vec3);"
"f16vec4 dFdy(f16vec4);"
"float16_t dFdxFine(float16_t);"
"f16vec2 dFdxFine(f16vec2);"
"f16vec3 dFdxFine(f16vec3);"
"f16vec4 dFdxFine(f16vec4);"
"float16_t dFdyFine(float16_t);"
"f16vec2 dFdyFine(f16vec2);"
"f16vec3 dFdyFine(f16vec3);"
"f16vec4 dFdyFine(f16vec4);"
"float16_t dFdxCoarse(float16_t);"
"f16vec2 dFdxCoarse(f16vec2);"
"f16vec3 dFdxCoarse(f16vec3);"
"f16vec4 dFdxCoarse(f16vec4);"
"float16_t dFdyCoarse(float16_t);"
"f16vec2 dFdyCoarse(f16vec2);"
"f16vec3 dFdyCoarse(f16vec3);"
"f16vec4 dFdyCoarse(f16vec4);"
"float16_t fwidth(float16_t);"
"f16vec2 fwidth(f16vec2);"
"f16vec3 fwidth(f16vec3);"
"f16vec4 fwidth(f16vec4);"
"float16_t fwidthFine(float16_t);"
"f16vec2 fwidthFine(f16vec2);"
"f16vec3 fwidthFine(f16vec3);"
"f16vec4 fwidthFine(f16vec4);"
"float16_t fwidthCoarse(float16_t);"
"f16vec2 fwidthCoarse(f16vec2);"
"f16vec3 fwidthCoarse(f16vec3);"
"f16vec4 fwidthCoarse(f16vec4);"
"float16_t interpolateAtCentroid(float16_t);"
"f16vec2 interpolateAtCentroid(f16vec2);"
"f16vec3 interpolateAtCentroid(f16vec3);"
@ -4971,6 +4989,22 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
}
#endif
#ifdef NV_EXTENSIONS
//E_SPV_NV_compute_shader_derivatives
stageBuiltins[EShLangCompute].append(derivatives);
stageBuiltins[EShLangCompute].append(derivativeControls);
stageBuiltins[EShLangCompute].append("\n");
if (profile != EEsProfile && version >= 450) {
stageBuiltins[EShLangCompute].append(derivativesAndControl16bits);
stageBuiltins[EShLangCompute].append(derivativesAndControl64bits);
stageBuiltins[EShLangCompute].append("\n");
}
#endif
//============================================================================
//
// Standard Uniforms
@ -6114,6 +6148,18 @@ void TBuiltIns::addQueryFunctions(TSampler sampler, const TString& typeName, int
#ifdef AMD_EXTENSIONS
}
#endif
#ifdef NV_EXTENSIONS
stageBuiltins[EShLangCompute].append("vec2 textureQueryLod(");
stageBuiltins[EShLangCompute].append(typeName);
if (dimMap[sampler.dim] == 1)
stageBuiltins[EShLangCompute].append(", float");
else {
stageBuiltins[EShLangCompute].append(", vec");
stageBuiltins[EShLangCompute].append(postfixes[dimMap[sampler.dim]]);
}
stageBuiltins[EShLangCompute].append(");\n");
#endif
}
//
@ -6594,9 +6640,12 @@ void TBuiltIns::addSamplingFunctions(TSampler sampler, const TString& typeName,
s.append(");\n");
// Add to the per-language set of built-ins
if (bias || lodClamp)
if (bias || lodClamp) {
stageBuiltins[EShLangFragment].append(s);
else
#ifdef NV_EXTENSIONS
stageBuiltins[EShLangCompute].append(s);
#endif
} else
commonBuiltins.append(s);
}
@ -7923,6 +7972,19 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
BuiltInVariable("gl_BaryCoordNV", EbvBaryCoordNV, symbolTable);
BuiltInVariable("gl_BaryCoordNoPerspNV", EbvBaryCoordNoPerspNV, symbolTable);
}
if (((profile != EEsProfile && version >= 450) ||
(profile == EEsProfile && version >= 320)) &&
language == EShLangCompute) {
symbolTable.setFunctionExtensions("dFdx", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("dFdy", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("fwidth", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("dFdxFine", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("dFdyFine", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("fwidthFine", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("dFdxCoarse", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("dFdyCoarse", 1, &E_GL_NV_compute_shader_derivatives);
symbolTable.setFunctionExtensions("fwidthCoarse", 1, &E_GL_NV_compute_shader_derivatives);
}
#endif
symbolTable.setVariableExtensions("gl_FragDepthEXT", 1, &E_GL_EXT_frag_depth);
@ -8711,6 +8773,20 @@ void TBuiltIns::identifyBuiltIns(int version, EProfile profile, const SpvVersion
symbolTable.relateToOperator("memoryBarrierShared", EOpMemoryBarrierShared);
symbolTable.relateToOperator("groupMemoryBarrier", EOpGroupMemoryBarrier);
symbolTable.relateToOperator("subgroupMemoryBarrierShared", EOpSubgroupMemoryBarrierShared);
#ifdef NV_EXTENSIONS
if ((profile != EEsProfile && version >= 450) ||
(profile == EEsProfile && version >= 320)) {
symbolTable.relateToOperator("dFdx", EOpDPdx);
symbolTable.relateToOperator("dFdy", EOpDPdy);
symbolTable.relateToOperator("fwidth", EOpFwidth);
symbolTable.relateToOperator("dFdxFine", EOpDPdxFine);
symbolTable.relateToOperator("dFdyFine", EOpDPdyFine);
symbolTable.relateToOperator("fwidthFine", EOpFwidthFine);
symbolTable.relateToOperator("dFdxCoarse", EOpDPdxCoarse);
symbolTable.relateToOperator("dFdyCoarse", EOpDPdyCoarse);
symbolTable.relateToOperator("fwidthCoarse",EOpFwidthCoarse);
}
#endif
break;
default:

View file

@ -4623,6 +4623,18 @@ void TParseContext::setLayoutQualifier(const TSourceLoc& loc, TPublicType& publi
return;
}
}
if (language == EShLangCompute) {
if (id.compare(0, 17, "derivative_group_") == 0) {
requireExtensions(loc, 1, &E_GL_NV_compute_shader_derivatives, "compute shader derivatives");
if (id == "derivative_group_quadsnv") {
publicType.shaderQualifiers.layoutDerivativeGroupQuads = true;
return;
} else if (id == "derivative_group_linearnv") {
publicType.shaderQualifiers.layoutDerivativeGroupLinear = true;
return;
}
}
}
#else
}
#endif
@ -7027,6 +7039,36 @@ void TParseContext::updateStandaloneQualifierDefaults(const TSourceLoc& loc, con
error(loc, "can only apply to 'out'", "blend equation", "");
}
#ifdef NV_EXTENSIONS
if (publicType.shaderQualifiers.layoutDerivativeGroupQuads &&
publicType.shaderQualifiers.layoutDerivativeGroupLinear) {
error(loc, "cannot be both specified", "derivative_group_quadsNV and derivative_group_linearNV", "");
}
if (publicType.shaderQualifiers.layoutDerivativeGroupQuads) {
if (publicType.qualifier.storage == EvqVaryingIn) {
if ((intermediate.getLocalSize(0) & 1) ||
(intermediate.getLocalSize(1) & 1))
error(loc, "requires local_size_x and local_size_y to be multiple of two", "derivative_group_quadsNV", "");
else
intermediate.setLayoutDerivativeMode(LayoutDerivativeGroupQuads);
}
else
error(loc, "can only apply to 'in'", "derivative_group_quadsNV", "");
}
if (publicType.shaderQualifiers.layoutDerivativeGroupLinear) {
if (publicType.qualifier.storage == EvqVaryingIn) {
if((intermediate.getLocalSize(0) *
intermediate.getLocalSize(1) *
intermediate.getLocalSize(2)) % 4 != 0)
error(loc, "requires total group size to be multiple of four", "derivative_group_linearNV", "");
else
intermediate.setLayoutDerivativeMode(LayoutDerivativeGroupLinear);
}
else
error(loc, "can only apply to 'in'", "derivative_group_linearNV", "");
}
#endif
const TQualifier& qualifier = publicType.qualifier;
if (qualifier.isAuxiliary() ||

View file

@ -236,6 +236,7 @@ void TParseVersions::initializeExtensionBehavior()
extensionBehavior[E_GL_NV_shader_noperspective_interpolation] = EBhDisable;
extensionBehavior[E_GL_NV_shader_subgroup_partitioned] = EBhDisable;
extensionBehavior[E_GL_NV_fragment_shader_barycentric] = EBhDisable;
extensionBehavior[E_GL_NV_compute_shader_derivatives] = EBhDisable;
#endif
// AEP
@ -407,6 +408,7 @@ void TParseVersions::getPreamble(std::string& preamble)
"#define GL_NV_conservative_raster_underestimation 1\n"
"#define GL_NV_shader_subgroup_partitioned 1\n"
"#define GL_NV_fragment_shader_barycentric 1\n"
"#define GL_NV_compute_shader_derivatives 1\n"
#endif
"#define GL_KHX_shader_explicit_arithmetic_types 1\n"
"#define GL_KHX_shader_explicit_arithmetic_types_int8 1\n"

View file

@ -208,6 +208,7 @@ const char* const E_GL_NV_conservative_raster_underestimation = "GL_NV_conserv
const char* const E_GL_NV_shader_noperspective_interpolation = "GL_NV_shader_noperspective_interpolation";
const char* const E_GL_NV_shader_subgroup_partitioned = "GL_NV_shader_subgroup_partitioned";
const char* const E_GL_NV_fragment_shader_barycentric = "GL_NV_fragment_shader_barycentric";
const char* const E_GL_NV_compute_shader_derivatives = "GL_NV_compute_shader_derivatives";
// Arrays of extensions for the above viewportEXTs duplications
const char* const viewportEXTs[] = { E_GL_ARB_shader_viewport_layer_array, E_GL_NV_viewport_array2 };

View file

@ -206,6 +206,17 @@ class TSymbolTable;
class TSymbol;
class TVariable;
#ifdef NV_EXTENSIONS
//
// Texture and Sampler transformation mode.
//
enum ComputeDerivativeMode {
LayoutDerivativeNone, // default layout as SPV_NV_compute_shader_derivatives not enabled
LayoutDerivativeGroupQuads, // derivative_group_quadsNV
LayoutDerivativeGroupLinear, // derivative_group_linearNV
};
#endif
//
// Set of helper functions to help parse and build the tree.
//
@ -225,6 +236,7 @@ public:
#ifdef NV_EXTENSIONS
layoutOverrideCoverage(false),
geoPassthroughEXT(false),
computeDerivativeMode(LayoutDerivativeNone),
#endif
autoMapBindings(false),
autoMapLocations(false),
@ -622,6 +634,8 @@ public:
bool getLayoutOverrideCoverage() const { return layoutOverrideCoverage; }
void setGeoPassthroughEXT() { geoPassthroughEXT = true; }
bool getGeoPassthroughEXT() const { return geoPassthroughEXT; }
void setLayoutDerivativeMode(ComputeDerivativeMode mode) { computeDerivativeMode = mode; }
ComputeDerivativeMode getLayoutDerivativeModeNone() const { return computeDerivativeMode; }
#endif
const char* addSemanticName(const TString& name)
@ -725,6 +739,7 @@ protected:
#ifdef NV_EXTENSIONS
bool layoutOverrideCoverage;
bool geoPassthroughEXT;
ComputeDerivativeMode computeDerivativeMode;
#endif
// Base shift values