Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt add single pass scan. #685

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 21 additions & 47 deletions vello/src/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,61 +194,35 @@ impl Render {
);
let ptcl_buf =
ResourceProxy::new_buf(buffer_sizes.ptcl.size_in_bytes().into(), "vello.ptcl_buf");
let reduced_buf = ResourceProxy::new_buf(
buffer_sizes.path_reduced.size_in_bytes().into(),
"vello.reduced_buf",
);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
wg_counts.path_reduce,
[config_buf, scene_buf, reduced_buf],
);
let mut pathtag_parent = reduced_buf;
let mut large_pathtag_bufs = None;
let use_large_path_scan = wg_counts.use_large_path_scan && !shaders.pathtag_is_cpu;
if use_large_path_scan {
let reduced2_buf = ResourceProxy::new_buf(
buffer_sizes.path_reduced2.size_in_bytes().into(),
"vello.reduced2_buf",
);
recording.dispatch(
shaders.pathtag_reduce2,
wg_counts.path_reduce2,
[reduced_buf, reduced2_buf],
);
let reduced_scan_buf = ResourceProxy::new_buf(
buffer_sizes.path_reduced_scan.size_in_bytes().into(),
"reduced_scan_buf",
);
recording.dispatch(
shaders.pathtag_scan1,
wg_counts.path_scan1,
[reduced_buf, reduced2_buf, reduced_scan_buf],
);
pathtag_parent = reduced_scan_buf;
large_pathtag_bufs = Some((reduced2_buf, reduced_scan_buf));
}

let tagmonoid_buf = ResourceProxy::new_buf(
buffer_sizes.path_monoids.size_in_bytes().into(),
"vello.tagmonoid_buf",
);
let pathtag_scan = if use_large_path_scan {
shaders.pathtag_scan_large
} else {
shaders.pathtag_scan
};
let reduced_buf = BufferProxy::new(
buffer_sizes.path_reduced.size_in_bytes().into(),
"vello.reduced_buf",
);
let path_scan_bump_buf = BufferProxy::new(
buffer_sizes.path_scan_bump.size_in_bytes().into(),
"vello.path_scan_bump_buf",
);
recording.clear_all(path_scan_bump_buf);
recording.clear_all(reduced_buf);
let path_scan_bump_buf = ResourceProxy::Buffer(path_scan_bump_buf);
let reduced_buf = ResourceProxy::Buffer(reduced_buf);
recording.dispatch(
pathtag_scan,
shaders.pathtag_scan_csdldf,
wg_counts.path_scan,
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
[
config_buf,
scene_buf,
reduced_buf,
tagmonoid_buf,
path_scan_bump_buf,
],
);
recording.free_resource(reduced_buf);
if let Some((reduced2, reduced_scan)) = large_pathtag_bufs {
recording.free_resource(reduced2);
recording.free_resource(reduced_scan);
}
recording.free_resource(path_scan_bump_buf);
let path_bbox_buf = ResourceProxy::new_buf(
buffer_sizes.path_bboxes.size_in_bytes().into(),
"vello.path_bbox_buf",
Expand Down
39 changes: 6 additions & 33 deletions vello/src/shaders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@ use crate::{

// Shaders for the full pipeline
pub struct FullShaders {
pub pathtag_reduce: ShaderId,
pub pathtag_reduce2: ShaderId,
pub pathtag_scan1: ShaderId,
pub pathtag_scan: ShaderId,
pub pathtag_scan_large: ShaderId,
pub pathtag_scan_csdldf: ShaderId,
pub bbox_clear: ShaderId,
pub flatten: ShaderId,
pub draw_reduce: ShaderId,
Expand All @@ -39,9 +35,6 @@ pub struct FullShaders {
pub fine_area: Option<ShaderId>,
pub fine_msaa8: Option<ShaderId>,
pub fine_msaa16: Option<ShaderId>,
// 2-level dispatch works for CPU pathtag scan even for large
// inputs, 3-level is not yet implemented.
pub pathtag_is_cpu: bool,
}

#[cfg(feature = "wgpu")]
Expand Down Expand Up @@ -101,27 +94,12 @@ pub(crate) fn full_shaders(
};
}

let pathtag_reduce = add_shader!(pathtag_reduce, [Uniform, BufReadOnly, Buffer]);
let pathtag_reduce2 = add_shader!(
pathtag_reduce2,
[BufReadOnly, Buffer],
CpuShaderType::Skipped
);
let pathtag_scan1 = add_shader!(
pathtag_scan1,
[BufReadOnly, BufReadOnly, Buffer],
CpuShaderType::Skipped
);
let pathtag_scan = add_shader!(
pathtag_scan_small,
[Uniform, BufReadOnly, BufReadOnly, Buffer],
let pathtag_scan_csdldf = add_shader!(
pathtag_scan_csdldf,
[Uniform, BufReadOnly, Buffer, Buffer, Buffer],
CpuShaderType::Present(vello_shaders::cpu::pathtag_scan)
);
let pathtag_scan_large = add_shader!(
pathtag_scan_large,
[Uniform, BufReadOnly, BufReadOnly, Buffer],
CpuShaderType::Skipped
);

let bbox_clear = add_shader!(bbox_clear, [Uniform, Buffer]);
let flatten = add_shader!(
flatten,
Expand Down Expand Up @@ -249,11 +227,7 @@ pub(crate) fn full_shaders(
};

Ok(FullShaders {
pathtag_reduce,
pathtag_reduce2,
pathtag_scan,
pathtag_scan1,
pathtag_scan_large,
pathtag_scan_csdldf,
b0nes164 marked this conversation as resolved.
Show resolved Hide resolved
bbox_clear,
flatten,
draw_reduce,
Expand All @@ -271,6 +245,5 @@ pub(crate) fn full_shaders(
fine_area,
fine_msaa8,
fine_msaa16,
pathtag_is_cpu: options.use_cpu,
})
}
9 changes: 0 additions & 9 deletions vello/src/wgpu_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ struct WgpuShader {
pub(crate) enum CpuShaderType {
Present(fn(u32, &[CpuBinding])),
Missing,
Skipped,
}

struct CpuShader {
Expand Down Expand Up @@ -263,14 +262,6 @@ impl WgpuEngine {
label,
});
}
// This shader is unused in CPU mode, create a dummy shader
CpuShaderType::Skipped => {
return add(Shader {
wgpu: None,
cpu: None,
label,
});
}
// Create a GPU shader as we don't have a CPU shader
CpuShaderType::Missing => {}
}
Expand Down
32 changes: 5 additions & 27 deletions vello_encoding/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,6 @@ pub type WorkgroupSize = (u32, u32, u32);
/// Computed sizes for all dispatches.
#[derive(Copy, Clone, Debug, Default)]
pub struct WorkgroupCounts {
pub use_large_path_scan: bool,
pub path_reduce: WorkgroupSize,
pub path_reduce2: WorkgroupSize,
pub path_scan1: WorkgroupSize,
pub path_scan: WorkgroupSize,
pub bbox_clear: WorkgroupSize,
pub flatten: WorkgroupSize,
Expand Down Expand Up @@ -237,12 +233,6 @@ impl WorkgroupCounts {
let n_clips = layout.n_clips;
let path_tag_padded = align_up(n_path_tags, 4 * PATH_REDUCE_WG);
let path_tag_wgs = path_tag_padded / (4 * PATH_REDUCE_WG);
let use_large_path_scan = path_tag_wgs > PATH_REDUCE_WG;
let reduced_size = if use_large_path_scan {
align_up(path_tag_wgs, PATH_REDUCE_WG)
} else {
path_tag_wgs
};
let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
let draw_monoid_wgs = draw_object_wgs.min(PATH_BBOX_WG);
let flatten_wgs = (n_path_tags + FLATTEN_WG - 1) / FLATTEN_WG;
Expand All @@ -252,10 +242,6 @@ impl WorkgroupCounts {
let width_in_bins = (width_in_tiles + 15) / 16;
let height_in_bins = (height_in_tiles + 15) / 16;
Self {
use_large_path_scan,
path_reduce: (path_tag_wgs, 1, 1),
path_reduce2: (PATH_REDUCE_WG, 1, 1),
path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1),
path_scan: (path_tag_wgs, 1, 1),
bbox_clear: (draw_object_wgs, 1, 1),
flatten: (flatten_wgs, 1, 1),
Expand Down Expand Up @@ -334,8 +320,7 @@ impl<T: Sized> PartialOrd for BufferSize<T> {
pub struct BufferSizes {
// Known size buffers
pub path_reduced: BufferSize<PathMonoid>,
pub path_reduced2: BufferSize<PathMonoid>,
pub path_reduced_scan: BufferSize<PathMonoid>,
pub path_scan_bump: BufferSize<u32>,
pub path_monoids: BufferSize<PathMonoid>,
pub path_bboxes: BufferSize<PathBbox>,
pub draw_reduced: BufferSize<DrawMonoid>,
Expand Down Expand Up @@ -365,15 +350,9 @@ impl BufferSizes {
let n_paths = layout.n_paths;
let n_draw_objects = layout.n_draw_objects;
let n_clips = layout.n_clips;
let path_tag_wgs = workgroups.path_reduce.0;
let reduced_size = if workgroups.use_large_path_scan {
align_up(path_tag_wgs, PATH_REDUCE_WG)
} else {
path_tag_wgs
};
let path_reduced = BufferSize::new(reduced_size);
let path_reduced2 = BufferSize::new(PATH_REDUCE_WG);
let path_reduced_scan = BufferSize::new(reduced_size);
let path_tag_wgs = workgroups.path_scan.0;
let path_reduced = BufferSize::new(path_tag_wgs);
let path_scan_bump = BufferSize::new(1);
let path_monoids = BufferSize::new(path_tag_wgs * PATH_REDUCE_WG);
let path_bboxes = BufferSize::new(n_paths);
let binning_wgs = workgroups.binning.0;
Expand Down Expand Up @@ -405,8 +384,7 @@ impl BufferSizes {
let ptcl = BufferSize::new(1 << 23);
Self {
path_reduced,
path_reduced2,
path_reduced_scan,
path_scan_bump,
path_monoids,
path_bboxes,
draw_reduced,
Expand Down
42 changes: 0 additions & 42 deletions vello_shaders/shader/pathtag_reduce.wgsl

This file was deleted.

41 changes: 0 additions & 41 deletions vello_shaders/shader/pathtag_reduce2.wgsl

This file was deleted.

Loading