NV_shader_atomic_fp16_vector
This commit is contained in:
parent
9fd0fcd737
commit
48702616ec
12 changed files with 937 additions and 28 deletions
|
|
@ -1465,6 +1465,20 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
|
|||
"\n");
|
||||
}
|
||||
|
||||
// NV_shader_atomic_fp16_vector
|
||||
if (profile != EEsProfile && version >= 430) {
|
||||
commonBuiltins.append(
|
||||
"f16vec2 atomicAdd(coherent volatile inout f16vec2, f16vec2);"
|
||||
"f16vec4 atomicAdd(coherent volatile inout f16vec4, f16vec4);"
|
||||
"f16vec2 atomicMin(coherent volatile inout f16vec2, f16vec2);"
|
||||
"f16vec4 atomicMin(coherent volatile inout f16vec4, f16vec4);"
|
||||
"f16vec2 atomicMax(coherent volatile inout f16vec2, f16vec2);"
|
||||
"f16vec4 atomicMax(coherent volatile inout f16vec4, f16vec4);"
|
||||
"f16vec2 atomicExchange(coherent volatile inout f16vec2, f16vec2);"
|
||||
"f16vec4 atomicExchange(coherent volatile inout f16vec4, f16vec4);"
|
||||
"\n");
|
||||
}
|
||||
|
||||
if ((profile == EEsProfile && version >= 300) ||
|
||||
(profile != EEsProfile && version >= 150)) { // GL_ARB_shader_bit_encoding
|
||||
commonBuiltins.append(
|
||||
|
|
@ -6678,6 +6692,34 @@ void TBuiltIns::addImageFunctions(TSampler sampler, const TString& typeName, int
|
|||
commonBuiltins.append(imageParams);
|
||||
commonBuiltins.append(", float);\n");
|
||||
}
|
||||
|
||||
// GL_NV_shader_atomic_fp16_vector
|
||||
if (profile != EEsProfile && version >= 430) {
|
||||
const int numFp16Builtins = 4;
|
||||
const char* atomicFp16Func[numFp16Builtins] = {
|
||||
" imageAtomicAdd(volatile coherent ",
|
||||
" imageAtomicMin(volatile coherent ",
|
||||
" imageAtomicMax(volatile coherent ",
|
||||
" imageAtomicExchange(volatile coherent "
|
||||
};
|
||||
const int numFp16DataTypes = 2;
|
||||
const char* atomicFp16DataTypes[numFp16DataTypes] = {
|
||||
"f16vec2",
|
||||
"f16vec4"
|
||||
};
|
||||
// Loop twice to add prototypes with/without scope/semantics
|
||||
for (int j = 0; j < numFp16DataTypes; ++j) {
|
||||
for (int i = 0; i < numFp16Builtins; ++i) {
|
||||
commonBuiltins.append(atomicFp16DataTypes[j]);
|
||||
commonBuiltins.append(atomicFp16Func[i]);
|
||||
commonBuiltins.append(imageParams);
|
||||
commonBuiltins.append(", ");
|
||||
commonBuiltins.append(atomicFp16DataTypes[j]);
|
||||
commonBuiltins.append(");\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (profile != EEsProfile && version >= 450) {
|
||||
commonBuiltins.append("float imageAtomicAdd(volatile coherent ");
|
||||
commonBuiltins.append(imageParams);
|
||||
|
|
|
|||
|
|
@ -2524,6 +2524,17 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
|
|||
error(loc, "only supported on image with format r64i", fnCandidate.getName().c_str(), "");
|
||||
else if (callNode.getType().getBasicType() == EbtUint64 && imageType.getQualifier().getFormat() != ElfR64ui)
|
||||
error(loc, "only supported on image with format r64ui", fnCandidate.getName().c_str(), "");
|
||||
} else if(callNode.getType().getBasicType() == EbtFloat16 &&
|
||||
((callNode.getType().getVectorSize() == 2 && arg0->getType().getQualifier().getFormat() == ElfRg16f) ||
|
||||
(callNode.getType().getVectorSize() == 4 && arg0->getType().getQualifier().getFormat() == ElfRgba16f))) {
|
||||
if ((fnCandidate.getName().compare(0, 14, "imageAtomicAdd") == 0) ||
|
||||
(fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) ||
|
||||
(fnCandidate.getName().compare(0, 19, "imageAtomicMin") == 0) ||
|
||||
(fnCandidate.getName().compare(0, 19, "imageAtomicMax") == 0)) {
|
||||
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str());
|
||||
} else {
|
||||
error(loc, "f16vec2/4 operation not supported on: ", fnCandidate.getName().c_str(), "");
|
||||
}
|
||||
} else if (imageType.getSampler().type == EbtFloat) {
|
||||
if (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) {
|
||||
// imageAtomicExchange doesn't require an extension
|
||||
|
|
@ -2582,6 +2593,11 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
|
|||
const char* const extensions[2] = { E_GL_NV_shader_atomic_int64,
|
||||
E_GL_EXT_shader_atomic_int64 };
|
||||
requireExtensions(loc, 2, extensions, fnCandidate.getName().c_str());
|
||||
} else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange ||
|
||||
callNode.getOp() == EOpAtomicMin || callNode.getOp() == EOpAtomicMax) &&
|
||||
arg0->getType().getBasicType() == EbtFloat16 &&
|
||||
(arg0->getType().getVectorSize() == 2 || arg0->getType().getVectorSize() == 4 )) {
|
||||
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str());
|
||||
} else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange) &&
|
||||
(arg0->getType().getBasicType() == EbtFloat ||
|
||||
arg0->getType().getBasicType() == EbtDouble)) {
|
||||
|
|
|
|||
|
|
@ -305,6 +305,7 @@ void TParseVersions::initializeExtensionBehavior()
|
|||
extensionBehavior[E_GL_NV_integer_cooperative_matrix] = EBhDisable;
|
||||
extensionBehavior[E_GL_NV_shader_invocation_reorder] = EBhDisable;
|
||||
extensionBehavior[E_GL_NV_displacement_micromap] = EBhDisable;
|
||||
extensionBehavior[E_GL_NV_shader_atomic_fp16_vector] = EBhDisable;
|
||||
|
||||
// ARM
|
||||
extensionBehavior[E_GL_ARM_shader_core_builtins] = EBhDisable;
|
||||
|
|
|
|||
|
|
@ -278,6 +278,7 @@ const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer
|
|||
const char* const E_GL_NV_shader_invocation_reorder = "GL_NV_shader_invocation_reorder";
|
||||
const char* const E_GL_EXT_ray_tracing_position_fetch = "GL_EXT_ray_tracing_position_fetch";
|
||||
const char* const E_GL_NV_displacement_micromap = "GL_NV_displacement_micromap";
|
||||
const char* const E_GL_NV_shader_atomic_fp16_vector = "GL_NV_shader_atomic_fp16_vector";
|
||||
|
||||
// ARM
|
||||
const char* const E_GL_ARM_shader_core_builtins = "GL_ARM_shader_core_builtins";
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue