Skip to content

Commit fc85a78

Browse files
committed
Vulkan bindless
1 parent 52449a1 commit fc85a78

29 files changed

+878
-57
lines changed

.clangd

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CompileFlags:
2+
Add: [-Wno-ignored-attributes, -Wno-unused-function]
3+
Remove: [-f*, -m*]
4+
5+
Diagnostics:
6+
Suppress:
7+
- pragma_attribute_no_pop_eof
8+
- pragma_attribute_stack_mismatch
9+
10+
InlayHints:
11+
BlockEnd: No
12+
Designators: Yes
13+
Enabled: Yes
14+
ParameterNames: Yes
15+
DeducedTypes: Yes
16+
DefaultArguments: Yes
17+
TypeNameLimit: 24

scripts/print_hlsl_builtin.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
-- use command:
22
-- xmake lua printer_text_arr.lua
33
-- to execute this script and gen new files
4-
local files_list = {'accel_process', 'bindless_upload', 'bc6_encode_block', 'bc6_header', 'bc6_trymode_g10cs',
4+
local files_list = {'accel_process', 'bindless_upload', 'bindless_upload_vk', 'bc6_encode_block', 'bc6_header', 'bc6_trymode_g10cs',
55
'bc6_trymode_le10cs', 'bc7_encode_block', 'bc7_header', 'bc7_trymode_02cs', 'bc7_trymode_137cs',
66
'bc7_trymode_456cs', 'hlsl_header', 'raytracing_header', 'tex2d_bindless', 'tex3d_bindless',
77
'compute_quad', 'determinant', 'inverse', 'indirect', 'resource_size', 'accel_header', 'copy_sign',
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
struct Ele{
2+
uint a;
3+
uint b;
4+
uint c;
5+
};
6+
struct Src{
7+
uint idx;
8+
Ele e;
9+
};
10+
RWStructuredBuffer<Ele> _Dst:register(u1);
11+
StructuredBuffer<Src> _Src:register(t0);
12+
struct _CBType{
13+
uint v;
14+
};
15+
[[vk::push_constant]] ConstantBuffer<_CBType> dsp:register(b0);
16+
[numthreads(256,1,1)]
17+
void main(uint id:SV_DISPATCHTHREADID){
18+
if(id >= dsp.v) return;
19+
Src s=_Src[id];
20+
_Dst[s.idx]=s.e;
21+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
namespace lc_hlsl{
2+
unsigned char bindless_upload_vk[253]={120,218,101,144,193,75,195,48,20,198,239,131,253,15,57,54,48,138,21,244,80,109,97,107,7,219,77,214,162,135,16,74,151,164,46,232,178,146,151,20,69,252,223,125,173,153,61,120,72,248,120,249,229,123,239,123,224,172,23,142,108,223,213,215,114,225,181,113,164,125,8,226,120,21,2,197,55,30,248,101,43,43,174,172,150,31,88,199,207,68,5,230,240,82,77,148,183,74,110,124,215,41,251,136,207,57,105,74,112,169,85,175,26,156,178,145,79,40,178,255,72,116,70,18,239,153,116,55,116,238,220,20,155,250,179,255,155,116,8,61,25,27,222,210,180,247,112,106,196,197,128,107,141,227,156,20,65,6,239,240,55,39,18,250,217,254,56,217,51,227,207,238,100,85,43,33,186,189,187,95,37,171,132,242,229,98,184,104,73,206,173,54,81,8,155,86,207,77,185,175,158,214,117,177,171,119,135,237,186,220,151,20,167,209,93,132,100,158,141,222,241,64,137,85,152,202,140,9,173,32,144,141,137,152,150,28,11,227,26,24,196,184,55,158,65,60,45,237,7,52,234,125,41};
3+
}

src/backends/common/hlsl/builtin/hlsl_builtin.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
namespace lc_hlsl{
44
extern unsigned char accel_process[];
55
extern unsigned char bindless_upload[];
6+
extern unsigned char bindless_upload_vk[];
67
extern unsigned char bc6_encode_block[];
78
extern unsigned char bc6_header[];
89
extern unsigned char bc6_trymode_g10cs[];
@@ -37,6 +38,7 @@ static HLSLCompressedHeader get_hlsl_builtin(luisa::string_view ss) {
3738
Dict(){
3839
dict.try_emplace("accel_process", HLSLCompressedHeader{accel_process, 525, 1491});
3940
dict.try_emplace("bindless_upload", HLSLCompressedHeader{bindless_upload, 225, 334});
41+
dict.try_emplace("bindless_upload_vk", HLSLCompressedHeader{bindless_upload_vk, 253, 386});
4042
dict.try_emplace("bc6_encode_block", HLSLCompressedHeader{bc6_encode_block, 1805, 11021});
4143
dict.try_emplace("bc6_header", HLSLCompressedHeader{bc6_header, 6209, 80770});
4244
dict.try_emplace("bc6_trymode_g10cs", HLSLCompressedHeader{bc6_trymode_g10cs, 1311, 6513});

src/backends/vk/bindless_array.cpp

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
#include "bindless_array.h"
2+
#include "compute_shader.h"
3+
#include "upload_buffer.h"
4+
#include "device.h"
5+
#include "resource_barrier.h"
6+
#include "stream.h"
7+
#include <vulkan/vulkan_core.h>
8+
#include "log.h"
9+
namespace lc::vk {
10+
BindlessArray::BindlessArray(Device *device, size_t size)
11+
: Resource(device),
12+
_indices_buffer(device, sizeof(BindlessStruct) * size) {
13+
binded.resize(size);
14+
}
15+
BindlessArray::~BindlessArray() {
16+
for (auto &idx : binded) {
17+
auto &i = idx.first;
18+
if (i.buffer != BindlessStruct::n_pos) {
19+
device()->buffer_heap_pool.dealloc(i.buffer);
20+
}
21+
if (i.tex2D != BindlessStruct::n_pos) {
22+
device()->tex2d_heap_pool.dealloc(i.tex2D);
23+
}
24+
if (i.tex3D != BindlessStruct::n_pos) {
25+
device()->tex3d_heap_pool.dealloc(i.tex3D);
26+
}
27+
}
28+
}
29+
void BindlessArray::pre_update(ResourceBarrier *barrier) {
30+
barrier->record(
31+
BufferView{&_indices_buffer},
32+
ResourceBarrier::Usage::ComputeUAV);
33+
}
34+
void BindlessArray::return_value(MapIndex &index, uint type, uint &originValue) {
35+
if (originValue != BindlessStruct::n_pos) {
36+
freeQueue.push_back(FreeValue{
37+
.type = type,
38+
.index = originValue});
39+
originValue = BindlessStruct::n_pos;
40+
auto &&v = index.value();
41+
v--;
42+
if (v == 0) {
43+
ptrMap.remove(index);
44+
}
45+
}
46+
index = {};
47+
}
48+
void BindlessArray::bind(luisa::span<BindlessArrayUpdateCommand::Modification const> mods) {
49+
auto add_index = [&](size_t ptr) {
50+
auto ite = ptrMap.emplace(ptr, 0);
51+
ite.value()++;
52+
return ite;
53+
};
54+
auto emplace_tex = [&]<bool isTex2D>(BindlessStruct &bind_grp, MapIndicies &indices, uint64_t handle, Texture const *tex, Sampler const &samp) {
55+
uint tex_idx;
56+
if constexpr (isTex2D) {
57+
return_value(indices.tex2D, 1, bind_grp.tex2D);
58+
tex_idx = device()->tex2d_heap_pool.alloc();
59+
} else {
60+
return_value(indices.tex2D, 2, bind_grp.tex2D);
61+
tex_idx = device()->tex3d_heap_pool.alloc();
62+
}
63+
auto smp_idx = luisa::to_underlying(samp.filter()) + luisa::to_underlying(samp.address()) * 4;
64+
// auto smpIdx = GlobalSamplers::GetIndex(samp);
65+
if constexpr (isTex2D) {
66+
indices.tex2D = add_index(handle);
67+
bind_grp.write_samp2d(tex_idx, smp_idx);
68+
} else {
69+
indices.tex3D = add_index(handle);
70+
bind_grp.write_samp3d(tex_idx, smp_idx);
71+
}
72+
};
73+
for (auto &&mod : mods) {
74+
auto &bind_grp = binded[mod.slot].first;
75+
auto &indices = binded[mod.slot].second;
76+
using Ope = BindlessArrayUpdateCommand::Modification::Operation;
77+
switch (mod.buffer.op) {
78+
case Ope::REMOVE:
79+
return_value(indices.buffer, 0, bind_grp.buffer);
80+
break;
81+
case Ope::EMPLACE: {
82+
return_value(indices.buffer, 0, bind_grp.buffer);
83+
auto buffer = reinterpret_cast<Buffer *>(mod.buffer.handle);
84+
BufferView v{buffer, mod.buffer.offset_bytes, buffer->byte_size() - mod.buffer.offset_bytes};
85+
auto new_idx = device()->buffer_heap_pool.alloc();
86+
bind_grp.buffer = new_idx;
87+
indices.buffer = add_index(mod.buffer.handle);
88+
break;
89+
}
90+
default: break;
91+
}
92+
switch (mod.tex2d.op) {
93+
case Ope::REMOVE:
94+
return_value(indices.tex2D, 1, bind_grp.tex2D);
95+
break;
96+
case Ope::EMPLACE:
97+
emplace_tex.operator()<true>(bind_grp, indices, mod.tex2d.handle, reinterpret_cast<Texture *>(mod.tex2d.handle), mod.tex2d.sampler);
98+
break;
99+
default: break;
100+
}
101+
switch (mod.tex3d.op) {
102+
case Ope::REMOVE:
103+
return_value(indices.tex3D, 2, bind_grp.tex3D);
104+
break;
105+
case Ope::EMPLACE:
106+
emplace_tex.operator()<false>(bind_grp, indices, mod.tex3d.handle, reinterpret_cast<Texture *>(mod.tex3d.handle), mod.tex3d.sampler);
107+
break;
108+
default: break;
109+
}
110+
}
111+
}
112+
void BindlessArray::update(
113+
CommandBuffer *cmdbuffer,
114+
luisa::vector<VkWriteDescriptorSet> &write_desc_sets,
115+
luisa::vector<uint4> &cache,
116+
luisa::span<BindlessArrayUpdateCommand::Modification const> mods) {
117+
std::lock_guard lck{mtx};
118+
auto dsc_buffer = cmdbuffer->states()->upload_alloc.allocate(16 * mods.size(), 16);
119+
auto shader = cmdbuffer->device()->set_bindless_kernel.Get(cmdbuffer->device());
120+
cache.clear();
121+
cache.reserve(mods.size());
122+
auto emplace_tex = [&]<bool isTex2D>(BindlessStruct &bind_grp, Texture const *tex) {
123+
VkDescriptorSet tex_set;
124+
uint tex_idx;
125+
if constexpr (isTex2D) {
126+
tex_set = device()->bdls_tex2d_set();
127+
tex_idx = bind_grp.tex2D & BindlessStruct::mask;
128+
} else {
129+
tex_set = device()->bdls_tex3d_set();
130+
tex_idx = bind_grp.tex3D & BindlessStruct::mask;
131+
}
132+
auto image_info = cmdbuffer->temp_desc->allocate_memory<VkDescriptorImageInfo>();
133+
auto &img_view = cmdbuffer->states()->img_views.emplace_back();
134+
VkImageViewCreateInfo img_view_create_info = {
135+
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
136+
.flags = 0,
137+
.image = tex->vk_image(),
138+
.viewType = [&]() {
139+
switch (tex->dimension()) {
140+
case 1:
141+
return VK_IMAGE_VIEW_TYPE_1D;
142+
case 2:
143+
return VK_IMAGE_VIEW_TYPE_2D;
144+
case 3:
145+
return VK_IMAGE_VIEW_TYPE_3D;
146+
}
147+
}(),
148+
.format = Texture::to_vk_format(tex->format()),
149+
.subresourceRange = VkImageSubresourceRange{.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = tex->mip(), .baseArrayLayer = 0, .layerCount = 1}};
150+
VK_CHECK_RESULT(vkCreateImageView(device()->logic_device(), &img_view_create_info, Device::alloc_callbacks(), &img_view));
151+
152+
*image_info = VkDescriptorImageInfo{
153+
nullptr,
154+
img_view,
155+
cmdbuffer->resource_barrier->get_layout(tex, 0)};
156+
write_desc_sets.emplace_back(VkWriteDescriptorSet{
157+
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
158+
nullptr,
159+
tex_set,
160+
0,
161+
tex_idx,
162+
1,
163+
VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
164+
image_info,
165+
nullptr,
166+
nullptr});
167+
};
168+
for (auto &mod : mods) {
169+
using Ope = BindlessArrayUpdateCommand::Modification::Operation;
170+
auto &bind_grp = binded[mod.slot].first;
171+
if (mod.buffer.op == Ope::EMPLACE) {
172+
auto buffer = reinterpret_cast<Buffer *>(mod.buffer.handle);
173+
auto buffer_info = cmdbuffer->temp_desc->allocate_memory<VkDescriptorBufferInfo>();
174+
BufferView v{buffer, mod.buffer.offset_bytes, buffer->byte_size() - mod.buffer.offset_bytes};
175+
*buffer_info = VkDescriptorBufferInfo{
176+
buffer->vk_buffer(),
177+
v.offset,
178+
v.size_bytes};
179+
write_desc_sets.emplace_back(VkWriteDescriptorSet{
180+
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
181+
nullptr,
182+
device()->bdls_buffer_set(),
183+
0,
184+
bind_grp.buffer,
185+
1,
186+
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
187+
nullptr,
188+
buffer_info,
189+
nullptr});
190+
}
191+
if (mod.tex2d.op == Ope::EMPLACE) {
192+
emplace_tex.operator()<true>(bind_grp, reinterpret_cast<Texture *>(mod.tex2d.handle));
193+
}
194+
if (mod.tex3d.op == Ope::EMPLACE) {
195+
emplace_tex.operator()<true>(bind_grp, reinterpret_cast<Texture *>(mod.tex3d.handle));
196+
}
197+
auto &v = cache.emplace_back();
198+
v.x = mod.slot;
199+
std::memcpy(&v.y, &bind_grp, sizeof(BindlessStruct));
200+
static_assert(sizeof(BindlessStruct) == 12);
201+
}
202+
//
203+
VkDescriptorSet desc_set;
204+
VkDescriptorSetAllocateInfo alloc_info{
205+
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
206+
.descriptorPool = cmdbuffer->states()->_desc_pool,
207+
.descriptorSetCount = 1,
208+
.pSetLayouts = shader->desc_set_layout().data()};
209+
VK_CHECK_RESULT(
210+
vkAllocateDescriptorSets(
211+
device()->logic_device(),
212+
&alloc_info,
213+
&desc_set));
214+
uint value = mods.size();
215+
vkCmdPushConstants(
216+
cmdbuffer->cmdbuffer(),
217+
shader->pipeline_layout(),
218+
VK_SHADER_STAGE_COMPUTE_BIT,
219+
0,
220+
4,
221+
&value);
222+
223+
VkDescriptorBufferInfo buffer_info{
224+
_indices_buffer.vk_buffer(),
225+
0,
226+
_indices_buffer.byte_size()};
227+
static_cast<UploadBuffer const *>(dsc_buffer.buffer)->copy_from(cache.data(), dsc_buffer.offset, cache.size_bytes());
228+
VkDescriptorBufferInfo arg_buffer_info{
229+
dsc_buffer.buffer->vk_buffer(),
230+
dsc_buffer.offset,
231+
dsc_buffer.size_bytes};
232+
write_desc_sets.emplace_back(VkWriteDescriptorSet{
233+
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
234+
nullptr,
235+
desc_set,
236+
0,
237+
0,
238+
1,
239+
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
240+
nullptr,
241+
&arg_buffer_info,
242+
nullptr});
243+
write_desc_sets.emplace_back(VkWriteDescriptorSet{
244+
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
245+
nullptr,
246+
desc_set,
247+
1,
248+
0,
249+
1,
250+
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
251+
nullptr,
252+
&buffer_info,
253+
nullptr});
254+
vkUpdateDescriptorSets(
255+
device()->logic_device(),
256+
write_desc_sets.size(),
257+
write_desc_sets.data(),
258+
0,
259+
nullptr);
260+
write_desc_sets.clear();
261+
262+
vkCmdBindDescriptorSets(
263+
cmdbuffer->cmdbuffer(),
264+
VK_PIPELINE_BIND_POINT_COMPUTE,
265+
shader->pipeline_layout(),
266+
0,
267+
1,
268+
&desc_set,
269+
0,
270+
nullptr);
271+
vkCmdBindPipeline(cmdbuffer->cmdbuffer(), VK_PIPELINE_BIND_POINT_COMPUTE, shader->pipeline());
272+
vkCmdDispatch(cmdbuffer->cmdbuffer(), (mods.size() + 255) / 256, 1, 1);
273+
if (!freeQueue.empty()) {
274+
cmdbuffer->states()->_callbacks.emplace_back([freeQueue = std::move(freeQueue), device = device()]() {
275+
for (auto &i : freeQueue) {
276+
switch (i.type) {
277+
case 0:
278+
device->buffer_heap_pool.dealloc(i.index);
279+
break;
280+
case 1:
281+
device->tex2d_heap_pool.dealloc(i.index);
282+
break;
283+
case 2:
284+
device->tex3d_heap_pool.dealloc(i.index);
285+
break;
286+
}
287+
}
288+
});
289+
}
290+
}
291+
}// namespace lc::vk

0 commit comments

Comments
 (0)