NV_shader_atomic_fp16_vector

This commit is contained in:
Jeff Bolz 2024-02-02 12:36:16 -06:00 committed by arcady-lunarg
parent 9fd0fcd737
commit 48702616ec
12 changed files with 937 additions and 28 deletions

View file

@ -1465,6 +1465,20 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
"\n");
}
// NV_shader_atomic_fp16_vector
if (profile != EEsProfile && version >= 430) {
commonBuiltins.append(
"f16vec2 atomicAdd(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicAdd(coherent volatile inout f16vec4, f16vec4);"
"f16vec2 atomicMin(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicMin(coherent volatile inout f16vec4, f16vec4);"
"f16vec2 atomicMax(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicMax(coherent volatile inout f16vec4, f16vec4);"
"f16vec2 atomicExchange(coherent volatile inout f16vec2, f16vec2);"
"f16vec4 atomicExchange(coherent volatile inout f16vec4, f16vec4);"
"\n");
}
if ((profile == EEsProfile && version >= 300) ||
(profile != EEsProfile && version >= 150)) { // GL_ARB_shader_bit_encoding
commonBuiltins.append(
@ -6678,6 +6692,34 @@ void TBuiltIns::addImageFunctions(TSampler sampler, const TString& typeName, int
commonBuiltins.append(imageParams);
commonBuiltins.append(", float);\n");
}
// GL_NV_shader_atomic_fp16_vector
if (profile != EEsProfile && version >= 430) {
const int numFp16Builtins = 4;
const char* atomicFp16Func[numFp16Builtins] = {
" imageAtomicAdd(volatile coherent ",
" imageAtomicMin(volatile coherent ",
" imageAtomicMax(volatile coherent ",
" imageAtomicExchange(volatile coherent "
};
const int numFp16DataTypes = 2;
const char* atomicFp16DataTypes[numFp16DataTypes] = {
"f16vec2",
"f16vec4"
};
// Loop twice to add prototypes with/without scope/semantics
for (int j = 0; j < numFp16DataTypes; ++j) {
for (int i = 0; i < numFp16Builtins; ++i) {
commonBuiltins.append(atomicFp16DataTypes[j]);
commonBuiltins.append(atomicFp16Func[i]);
commonBuiltins.append(imageParams);
commonBuiltins.append(", ");
commonBuiltins.append(atomicFp16DataTypes[j]);
commonBuiltins.append(");\n");
}
}
}
if (profile != EEsProfile && version >= 450) {
commonBuiltins.append("float imageAtomicAdd(volatile coherent ");
commonBuiltins.append(imageParams);

View file

@ -2524,6 +2524,17 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
error(loc, "only supported on image with format r64i", fnCandidate.getName().c_str(), "");
else if (callNode.getType().getBasicType() == EbtUint64 && imageType.getQualifier().getFormat() != ElfR64ui)
error(loc, "only supported on image with format r64ui", fnCandidate.getName().c_str(), "");
} else if(callNode.getType().getBasicType() == EbtFloat16 &&
((callNode.getType().getVectorSize() == 2 && arg0->getType().getQualifier().getFormat() == ElfRg16f) ||
(callNode.getType().getVectorSize() == 4 && arg0->getType().getQualifier().getFormat() == ElfRgba16f))) {
if ((fnCandidate.getName().compare(0, 14, "imageAtomicAdd") == 0) ||
(fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) ||
(fnCandidate.getName().compare(0, 19, "imageAtomicMin") == 0) ||
(fnCandidate.getName().compare(0, 19, "imageAtomicMax") == 0)) {
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str());
} else {
error(loc, "f16vec2/4 operation not supported on: ", fnCandidate.getName().c_str(), "");
}
} else if (imageType.getSampler().type == EbtFloat) {
if (fnCandidate.getName().compare(0, 19, "imageAtomicExchange") == 0) {
// imageAtomicExchange doesn't require an extension
@ -2582,6 +2593,11 @@ void TParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCan
const char* const extensions[2] = { E_GL_NV_shader_atomic_int64,
E_GL_EXT_shader_atomic_int64 };
requireExtensions(loc, 2, extensions, fnCandidate.getName().c_str());
} else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange ||
callNode.getOp() == EOpAtomicMin || callNode.getOp() == EOpAtomicMax) &&
arg0->getType().getBasicType() == EbtFloat16 &&
(arg0->getType().getVectorSize() == 2 || arg0->getType().getVectorSize() == 4 )) {
requireExtensions(loc, 1, &E_GL_NV_shader_atomic_fp16_vector, fnCandidate.getName().c_str());
} else if ((callNode.getOp() == EOpAtomicAdd || callNode.getOp() == EOpAtomicExchange) &&
(arg0->getType().getBasicType() == EbtFloat ||
arg0->getType().getBasicType() == EbtDouble)) {

View file

@ -305,6 +305,7 @@ void TParseVersions::initializeExtensionBehavior()
extensionBehavior[E_GL_NV_integer_cooperative_matrix] = EBhDisable;
extensionBehavior[E_GL_NV_shader_invocation_reorder] = EBhDisable;
extensionBehavior[E_GL_NV_displacement_micromap] = EBhDisable;
extensionBehavior[E_GL_NV_shader_atomic_fp16_vector] = EBhDisable;
// ARM
extensionBehavior[E_GL_ARM_shader_core_builtins] = EBhDisable;

View file

@ -278,6 +278,7 @@ const char* const E_GL_NV_integer_cooperative_matrix = "GL_NV_integer
const char* const E_GL_NV_shader_invocation_reorder = "GL_NV_shader_invocation_reorder";
const char* const E_GL_EXT_ray_tracing_position_fetch = "GL_EXT_ray_tracing_position_fetch";
const char* const E_GL_NV_displacement_micromap = "GL_NV_displacement_micromap";
const char* const E_GL_NV_shader_atomic_fp16_vector = "GL_NV_shader_atomic_fp16_vector";
// ARM
const char* const E_GL_ARM_shader_core_builtins = "GL_ARM_shader_core_builtins";