diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index 1f56c234..6d5309ff 100755 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -5377,17 +5377,34 @@ void TGlslangToSpvTraverser::updateMemberOffset(const glslang::TType& structType int memberAlignment = glslangIntermediate->getMemberAlignment(memberType, memberSize, dummyStride, explicitLayout, matrixLayout == glslang::ElmRowMajor); + bool isVectorLike = memberType.isVector(); + if (memberType.isMatrix()) { + if (matrixLayout == glslang::ElmRowMajor) + isVectorLike = memberType.getMatrixRows() == 1; + else + isVectorLike = memberType.getMatrixCols() == 1; + } + // Adjust alignment for HLSL rules // TODO: make this consistent in early phases of code: // adjusting this late means inconsistencies with earlier code, which for reflection is an issue // Until reflection is brought in sync with these adjustments, don't apply to $Global, // which is the most likely to rely on reflection, and least likely to rely implicit layouts if (glslangIntermediate->usingHlslOffsets() && - ! memberType.isArray() && memberType.isVector() && structType.getTypeName().compare("$Global") != 0) { - int dummySize; - int componentAlignment = glslangIntermediate->getBaseAlignmentScalar(memberType, dummySize); - if (componentAlignment <= 4) + ! memberType.isStruct() && structType.getTypeName().compare("$Global") != 0) { + int componentSize; + int componentAlignment = glslangIntermediate->getBaseAlignmentScalar(memberType, componentSize); + if (! memberType.isArray() && isVectorLike && componentAlignment <= 4) memberAlignment = componentAlignment; + + // Don't add unnecessary padding after this member + if (memberType.isMatrix()) { + if (matrixLayout == glslang::ElmRowMajor) + memberSize -= componentSize * (4 - memberType.getMatrixCols()); + else + memberSize -= componentSize * (4 - memberType.getMatrixRows()); + } else if (memberType.isArray()) + memberSize -= componentSize * (4 - memberType.getVectorSize()); } // Bump up to member alignment @@ -5395,7 +5412,7 @@ void TGlslangToSpvTraverser::updateMemberOffset(const glslang::TType& structType // Bump up to vec4 if there is a bad straddle if (explicitLayout != glslang::ElpScalar && glslangIntermediate->improperStraddle(memberType, memberSize, - currentOffset)) + currentOffset, isVectorLike)) glslang::RoundToPow2(currentOffset, 16); nextOffset = currentOffset + memberSize; diff --git a/Test/baseResults/hlsl.cbuffer-offsets.comp.out b/Test/baseResults/hlsl.cbuffer-offsets.comp.out new file mode 100644 index 00000000..feb0bb69 --- /dev/null +++ b/Test/baseResults/hlsl.cbuffer-offsets.comp.out @@ -0,0 +1,159 @@ +hlsl.cbuffer-offsets.comp +Shader version: 500 +local_size = (1, 1, 1) +0:? Sequence +0:43 Function Definition: @main( ( temp void) +0:43 Function Parameters: +0:43 Function Definition: main( ( temp void) +0:43 Function Parameters: +0:? Sequence +0:43 Function Call: @main( ( temp void) +0:? Linker Objects +0:? 'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform float f1, layout( row_major std140) uniform 3-element array of 3-component vector of float f3a3, layout( row_major std140) uniform float f2, layout( row_major std140) uniform float f3, layout( row_major std140) uniform 1X1 matrix of float m11, layout( row_major std140) uniform 1X2 matrix of float m12, layout( row_major std140) uniform 2X1 matrix of float m21, layout( row_major std140) uniform 2X2 matrix of float m22, layout( row_major std140) uniform 3X3 matrix of float m33, layout( row_major std140) uniform float f4, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform float f5, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform float f6, layout( column_major std140) uniform 1X1 matrix of float rm11, layout( column_major std140) uniform 1X2 matrix of float rm12, layout( column_major std140) uniform 2X1 matrix of float rm21, layout( column_major std140) uniform 2X2 matrix of float rm22, layout( column_major std140) uniform 3X3 matrix of float rm33, layout( row_major std140) uniform float f7, layout( column_major std140) uniform 3X4 matrix of float rm34, layout( row_major std140) uniform float f8, layout( column_major std140) uniform 4X3 matrix of float rm43, layout( row_major std140) uniform float f9, layout( row_major std140) uniform 3-element array of float f1a3, layout( row_major std140) uniform float f10}) + + +Linked compute stage: + + +Shader version: 500 +local_size = (1, 1, 1) +0:? Sequence +0:43 Function Definition: @main( ( temp void) +0:43 Function Parameters: +0:43 Function Definition: main( ( temp void) +0:43 Function Parameters: +0:? Sequence +0:43 Function Call: @main( ( temp void) +0:? Linker Objects +0:? 'anon@0' (layout( row_major std140) uniform block{layout( row_major std140) uniform float f1, layout( row_major std140) uniform 3-element array of 3-component vector of float f3a3, layout( row_major std140) uniform float f2, layout( row_major std140) uniform float f3, layout( row_major std140) uniform 1X1 matrix of float m11, layout( row_major std140) uniform 1X2 matrix of float m12, layout( row_major std140) uniform 2X1 matrix of float m21, layout( row_major std140) uniform 2X2 matrix of float m22, layout( row_major std140) uniform 3X3 matrix of float m33, layout( row_major std140) uniform float f4, layout( row_major std140) uniform 3X4 matrix of float m34, layout( row_major std140) uniform float f5, layout( row_major std140) uniform 4X3 matrix of float m43, layout( row_major std140) uniform float f6, layout( column_major std140) uniform 1X1 matrix of float rm11, layout( column_major std140) uniform 1X2 matrix of float rm12, layout( column_major std140) uniform 2X1 matrix of float rm21, layout( column_major std140) uniform 2X2 matrix of float rm22, layout( column_major std140) uniform 3X3 matrix of float rm33, layout( row_major std140) uniform float f7, layout( column_major std140) uniform 3X4 matrix of float rm34, layout( row_major std140) uniform float f8, layout( column_major std140) uniform 4X3 matrix of float rm43, layout( row_major std140) uniform float f9, layout( row_major std140) uniform 3-element array of float f1a3, layout( row_major std140) uniform float f10}) + +Validation failed +// Module Version 10000 +// Generated by (magic number): 8000b +// Id's are bound by 28 + + Capability Shader + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint GLCompute 4 "main" + ExecutionMode 4 LocalSize 1 1 1 + Source HLSL 500 + Name 4 "main" + Name 6 "@main(" + Name 25 "CB" + MemberName 25(CB) 0 "f1" + MemberName 25(CB) 1 "f3a3" + MemberName 25(CB) 2 "f2" + MemberName 25(CB) 3 "f3" + MemberName 25(CB) 4 "m11" + MemberName 25(CB) 5 "m12" + MemberName 25(CB) 6 "m21" + MemberName 25(CB) 7 "m22" + MemberName 25(CB) 8 "m33" + MemberName 25(CB) 9 "f4" + MemberName 25(CB) 10 "m34" + MemberName 25(CB) 11 "f5" + MemberName 25(CB) 12 "m43" + MemberName 25(CB) 13 "f6" + MemberName 25(CB) 14 "rm11" + MemberName 25(CB) 15 "rm12" + MemberName 25(CB) 16 "rm21" + MemberName 25(CB) 17 "rm22" + MemberName 25(CB) 18 "rm33" + MemberName 25(CB) 19 "f7" + MemberName 25(CB) 20 "rm34" + MemberName 25(CB) 21 "f8" + MemberName 25(CB) 22 "rm43" + MemberName 25(CB) 23 "f9" + MemberName 25(CB) 24 "f1a3" + MemberName 25(CB) 25 "f10" + Name 27 "" + Decorate 13 ArrayStride 16 + Decorate 24 ArrayStride 16 + MemberDecorate 25(CB) 0 Offset 0 + MemberDecorate 25(CB) 1 Offset 16 + MemberDecorate 25(CB) 2 Offset 60 + MemberDecorate 25(CB) 3 Offset 64 + MemberDecorate 25(CB) 4 RowMajor + MemberDecorate 25(CB) 4 Offset 68 + MemberDecorate 25(CB) 4 MatrixStride 16 + MemberDecorate 25(CB) 5 RowMajor + MemberDecorate 25(CB) 5 Offset 80 + MemberDecorate 25(CB) 5 MatrixStride 16 + MemberDecorate 25(CB) 6 RowMajor + MemberDecorate 25(CB) 6 Offset 100 + MemberDecorate 25(CB) 6 MatrixStride 16 + MemberDecorate 25(CB) 7 RowMajor + MemberDecorate 25(CB) 7 Offset 112 + MemberDecorate 25(CB) 7 MatrixStride 16 + MemberDecorate 25(CB) 8 RowMajor + MemberDecorate 25(CB) 8 Offset 144 + MemberDecorate 25(CB) 8 MatrixStride 16 + MemberDecorate 25(CB) 9 Offset 188 + MemberDecorate 25(CB) 10 RowMajor + MemberDecorate 25(CB) 10 Offset 192 + MemberDecorate 25(CB) 10 MatrixStride 16 + MemberDecorate 25(CB) 11 Offset 252 + MemberDecorate 25(CB) 12 RowMajor + MemberDecorate 25(CB) 12 Offset 256 + MemberDecorate 25(CB) 12 MatrixStride 16 + MemberDecorate 25(CB) 13 Offset 304 + MemberDecorate 25(CB) 14 ColMajor + MemberDecorate 25(CB) 14 Offset 308 + MemberDecorate 25(CB) 14 MatrixStride 16 + MemberDecorate 25(CB) 15 ColMajor + MemberDecorate 25(CB) 15 Offset 312 + MemberDecorate 25(CB) 15 MatrixStride 16 + MemberDecorate 25(CB) 16 ColMajor + MemberDecorate 25(CB) 16 Offset 320 + MemberDecorate 25(CB) 16 MatrixStride 16 + MemberDecorate 25(CB) 17 ColMajor + MemberDecorate 25(CB) 17 Offset 352 + MemberDecorate 25(CB) 17 MatrixStride 16 + MemberDecorate 25(CB) 18 ColMajor + MemberDecorate 25(CB) 18 Offset 384 + MemberDecorate 25(CB) 18 MatrixStride 16 + MemberDecorate 25(CB) 19 Offset 428 + MemberDecorate 25(CB) 20 ColMajor + MemberDecorate 25(CB) 20 Offset 432 + MemberDecorate 25(CB) 20 MatrixStride 16 + MemberDecorate 25(CB) 21 Offset 480 + MemberDecorate 25(CB) 22 ColMajor + MemberDecorate 25(CB) 22 Offset 496 + MemberDecorate 25(CB) 22 MatrixStride 16 + MemberDecorate 25(CB) 23 Offset 556 + MemberDecorate 25(CB) 24 Offset 560 + MemberDecorate 25(CB) 25 Offset 596 + Decorate 25(CB) Block + Decorate 27 DescriptorSet 0 + Decorate 27 Binding 0 + 2: TypeVoid + 3: TypeFunction 2 + 9: TypeFloat 32 + 10: TypeVector 9(float) 3 + 11: TypeInt 32 0 + 12: 11(int) Constant 3 + 13: TypeArray 10(fvec3) 12 + 14: TypeVector 9(float) 1 + 15: TypeMatrix 14(fvec) 1 + 16: TypeVector 9(float) 2 + 17: TypeMatrix 16(fvec2) 1 + 18: TypeMatrix 14(fvec) 2 + 19: TypeMatrix 16(fvec2) 2 + 20: TypeMatrix 10(fvec3) 3 + 21: TypeVector 9(float) 4 + 22: TypeMatrix 21(fvec4) 3 + 23: TypeMatrix 10(fvec3) 4 + 24: TypeArray 9(float) 12 + 25(CB): TypeStruct 9(float) 13 9(float) 9(float) 15 17 18 19 20 9(float) 22 9(float) 23 9(float) 15 17 18 19 20 9(float) 22 9(float) 23 9(float) 24 9(float) + 26: TypePointer Uniform 25(CB) + 27: 26(ptr) Variable Uniform + 4(main): 2 Function None 3 + 5: Label + 8: 2 FunctionCall 6(@main() + Return + FunctionEnd + 6(@main(): 2 Function None 3 + 7: Label + Return + FunctionEnd diff --git a/Test/hlsl.cbuffer-offsets.comp b/Test/hlsl.cbuffer-offsets.comp new file mode 100644 index 00000000..de88d81f --- /dev/null +++ b/Test/hlsl.cbuffer-offsets.comp @@ -0,0 +1,44 @@ +// Correct offsets obtained from "HLSL Constant Buffer Visualizer" +// https://maraneshi.github.io/HLSL-ConstantBufferLayoutVisualizer/ + +cbuffer CB { + float f1; + float3 f3a3[3]; + float f2; + float f3; + + float1x1 m11; + float1x2 m12; + float2x1 m21; + float2x2 m22; + + float3x3 m33; + float f4; + + float3x4 m34; + float f5; + + float4x3 m43; + float f6; + + row_major float1x1 rm11; + row_major float1x2 rm12; + row_major float2x1 rm21; + row_major float2x2 rm22; + + row_major float3x3 rm33; + float f7; + + row_major float3x4 rm34; + float f8; + + row_major float4x3 rm43; + float f9; + + float f1a3[3]; + float f10; +}; + +void main() +{ +} diff --git a/glslang/MachineIndependent/linkValidate.cpp b/glslang/MachineIndependent/linkValidate.cpp index 1bd6678b..3b5add9d 100644 --- a/glslang/MachineIndependent/linkValidate.cpp +++ b/glslang/MachineIndependent/linkValidate.cpp @@ -2217,9 +2217,9 @@ int TIntermediate::getBaseAlignment(const TType& type, int& size, int& stride, T } // To aid the basic HLSL rule about crossing vec4 boundaries. -bool TIntermediate::improperStraddle(const TType& type, int size, int offset) +bool TIntermediate::improperStraddle(const TType& type, int size, int offset, bool vectorLike) { - if (! type.isVector() || type.isArray()) + if (! vectorLike || type.isArray()) return false; return size <= 16 ? offset / 16 != (offset + size - 1) / 16 diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h index db0367f5..b453b146 100644 --- a/glslang/MachineIndependent/localintermediate.h +++ b/glslang/MachineIndependent/localintermediate.h @@ -1057,7 +1057,7 @@ public: static int getBaseAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); static int getScalarAlignment(const TType&, int& size, int& stride, bool rowMajor); static int getMemberAlignment(const TType&, int& size, int& stride, TLayoutPacking layoutPacking, bool rowMajor); - static bool improperStraddle(const TType& type, int size, int offset); + static bool improperStraddle(const TType& type, int size, int offset, bool vectorLike); static void updateOffset(const TType& parentType, const TType& memberType, int& offset, int& memberSize); static int getOffset(const TType& type, int index); static int getBlockSize(const TType& blockType); diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp index cc0c9eb3..75af7c8c 100644 --- a/gtests/Hlsl.FromFile.cpp +++ b/gtests/Hlsl.FromFile.cpp @@ -174,6 +174,7 @@ INSTANTIATE_TEST_SUITE_P( {"hlsl.calculatelodunclamped.dx10.frag", "main"}, {"hlsl.cast.frag", "PixelShaderFunction"}, {"hlsl.cbuffer-identifier.vert", "main"}, + {"hlsl.cbuffer-offsets.comp", "main"}, {"hlsl.charLit.vert", "main"}, {"hlsl.clip.frag", "main"}, {"hlsl.clipdistance-1.frag", "main"},