Skip to content

Commit

Permalink
Allow deeper blend stacks by spilling to a buffer (#657)
Browse files Browse the repository at this point in the history
This brings in support for blend spilling (which was supported in the
old piet-gpu).

I don't have a good heuristic for how big to make the buffer. That is
something which will need to be addressed in #606 (or its successors). I
just guessed that 256 spills would be fine. I think this is probably too
small - I suspect we'll get feedback from @TrueDoctor about this.

I have confirmed that the robustness works as expected with the GPU
shaders.
  • Loading branch information
DJMcNab authored Aug 6, 2024
1 parent 0808fa0 commit c7b615e
Show file tree
Hide file tree
Showing 12 changed files with 123 additions and 12 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ This release has an [MSRV][] of 1.75.

### Added

- Support blends more than four layers deep ([#657][] by [@DJMcNab][])

### Changed

- Breaking: Updated `wgpu` to 22.1.0. ([#635] by [@waywardmonkeys])
Expand Down Expand Up @@ -119,6 +121,7 @@ This release has an [MSRV][] of 1.75.
[#630]: https://github.com/linebender/vello/pull/630
[#631]: https://github.com/linebender/vello/pull/631
[#635]: https://github.com/linebender/vello/pull/635
[#657]: https://github.com/linebender/vello/pull/657

<!-- Note that this still comparing against 0.2.0, because 0.2.1 is a cherry-picked patch -->
[Unreleased]: https://github.com/linebender/vello/compare/v0.2.0...HEAD
Expand Down
37 changes: 37 additions & 0 deletions examples/scenes/src/test_scenes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ export_scenes!(
two_point_radial(two_point_radial),
brush_transform(brush_transform: animated),
blend_grid(blend_grid),
deep_blend(deep_blend),
conflation_artifacts(conflation_artifacts),
labyrinth(labyrinth),
robust_paths(robust_paths),
Expand Down Expand Up @@ -1057,6 +1058,42 @@ mod impls {
}
}

pub(super) fn deep_blend(scene: &mut Scene, params: &mut SceneParams) {
params.resolution = Some(Vec2::new(1000., 1000.));
let main_rect = Rect::from_origin_size((10., 10.), (900., 900.));
scene.fill(
Fill::EvenOdd,
Affine::IDENTITY,
Color::RED,
None,
&main_rect,
);
let options = [
(800., Color::AQUA),
(700., Color::RED),
(600., Color::ALICE_BLUE),
(500., Color::YELLOW),
(400., Color::GREEN),
(300., Color::BLUE),
(200., Color::ORANGE),
(100., Color::WHITE),
];
let mut depth = 0;
for (width, colour) in &options[..params.complexity.min(options.len() - 1)] {
scene.push_layer(
Mix::Normal,
0.9,
Affine::IDENTITY,
&Rect::from_origin_size((10., 10.), (*width, *width)),
);
scene.fill(Fill::EvenOdd, Affine::IDENTITY, colour, None, &main_rect);
depth += 1;
}
for _ in 0..depth {
scene.pop_layer();
}
}

// Support functions

pub(super) fn render_cardioid(scene: &mut Scene) {
Expand Down
8 changes: 8 additions & 0 deletions vello/src/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct FineResources {
gradient_image: ResourceProxy,
info_bin_data_buf: ResourceProxy,
image_atlas: ResourceProxy,
blend_spill_buf: ResourceProxy,

out_image: ImageProxy,
}
Expand Down Expand Up @@ -450,6 +451,10 @@ impl Render {
recording.free_resource(bin_header_buf);
recording.free_resource(path_buf);
let out_image = ImageProxy::new(params.width, params.height, ImageFormat::Rgba8);
let blend_spill_buf = BufferProxy::new(
buffer_sizes.blend_spill.size_in_bytes().into(),
"blend_spill",
);
self.fine_wg_count = Some(wg_counts.fine);
self.fine_resources = Some(FineResources {
aa_config: params.antialiasing_method,
Expand All @@ -460,6 +465,7 @@ impl Render {
ptcl_buf,
gradient_image,
info_bin_data_buf,
blend_spill_buf: ResourceProxy::Buffer(blend_spill_buf),
image_atlas: ResourceProxy::Image(image_atlas),
out_image,
});
Expand Down Expand Up @@ -510,6 +516,7 @@ impl Render {
fine.segments_buf,
fine.ptcl_buf,
fine.info_bin_data_buf,
fine.blend_spill_buf,
ResourceProxy::Image(fine.out_image),
fine.gradient_image,
fine.image_atlas,
Expand Down Expand Up @@ -543,6 +550,7 @@ impl Render {
fine.segments_buf,
fine.ptcl_buf,
fine.info_bin_data_buf,
fine.blend_spill_buf,
ResourceProxy::Image(fine.out_image),
fine.gradient_image,
fine.image_atlas,
Expand Down
1 change: 1 addition & 0 deletions vello/src/shaders.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ pub(crate) fn full_shaders(
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Image(ImageFormat::Rgba8),
BindType::ImageRead(ImageFormat::Rgba8),
BindType::ImageRead(ImageFormat::Rgba8),
Expand Down
8 changes: 8 additions & 0 deletions vello_encoding/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ pub struct ConfigUniform {
pub seg_counts_size: u32,
/// Size of segment buffer allocation (in [`PathSegment`]s).
pub segments_size: u32,
/// Size of blend spill buffer (in `u32` pixels).
// TODO: Maybe store in TILE_WIDTH * TILE_HEIGHT blocks of pixels instead?
pub blend_size: u32,
/// Size of per-tile command list buffer allocation (in `u32`s).
pub ptcl_size: u32,
}
Expand Down Expand Up @@ -184,6 +187,7 @@ impl RenderConfig {
tiles_size: buffer_sizes.tiles.len(),
seg_counts_size: buffer_sizes.seg_counts.len(),
segments_size: buffer_sizes.segments.len(),
blend_size: buffer_sizes.blend_spill.len(),
ptcl_size: buffer_sizes.ptcl.len(),
layout: *layout,
},
Expand Down Expand Up @@ -352,6 +356,7 @@ pub struct BufferSizes {
pub tiles: BufferSize<Tile>,
pub seg_counts: BufferSize<SegmentCount>,
pub segments: BufferSize<PathSegment>,
pub blend_spill: BufferSize<u32>,
pub ptcl: BufferSize<u32>,
}

Expand Down Expand Up @@ -395,6 +400,8 @@ impl BufferSizes {
let lines = BufferSize::new(1 << 21);
let seg_counts = BufferSize::new(1 << 21);
let segments = BufferSize::new(1 << 21);
// 16 * 16 (1 << 8) is one blend spill, so this allows for 4096 spills.
let blend_spill = BufferSize::new(1 << 20);
let ptcl = BufferSize::new(1 << 23);
Self {
path_reduced,
Expand All @@ -419,6 +426,7 @@ impl BufferSizes {
tiles,
seg_counts,
segments,
blend_spill,
ptcl,
}
}
Expand Down
5 changes: 4 additions & 1 deletion vello_shaders/shader/coarse.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,11 @@ fn main(
ptcl[cmd_offset] = CMD_END;
var blend_ix = 0u;
if max_blend_depth > BLEND_STACK_SPLIT {
let scratch_size = max_blend_depth * TILE_WIDTH * TILE_HEIGHT;
let scratch_size = (max_blend_depth - BLEND_STACK_SPLIT) * TILE_WIDTH * TILE_HEIGHT;
blend_ix = atomicAdd(&bump.blend, scratch_size);
if blend_ix + scratch_size > config.blend_size {
atomicOr(&bump.failed, STAGE_COARSE);
}
}
ptcl[blend_offset] = blend_ix;
}
Expand Down
24 changes: 18 additions & 6 deletions vello_shaders/shader/fine.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -39,27 +39,30 @@ var<storage> ptcl: array<u32>;
var<storage> info: array<u32>;

@group(0) @binding(4)
var<storage, read_write> blend_spill: array<u32>;

@group(0) @binding(5)
#ifdef r8
var output: texture_storage_2d<r8unorm, write>;
#else
var output: texture_storage_2d<rgba8unorm, write>;
#endif

#ifdef full
@group(0) @binding(5)
@group(0) @binding(6)
var gradients: texture_2d<f32>;

@group(0) @binding(6)
@group(0) @binding(7)
var image_atlas: texture_2d<f32>;
#endif

// MSAA-only bindings and utilities
#ifdef msaa

#ifdef full
const MASK_LUT_INDEX: u32 = 7;
const MASK_LUT_INDEX: u32 = 8;
#else
const MASK_LUT_INDEX: u32 = 5;
const MASK_LUT_INDEX: u32 = 6;
#endif

#ifdef msaa8
Expand Down Expand Up @@ -947,7 +950,13 @@ fn main(
rgba[i] = vec4(0.0);
}
} else {
// TODO: spill to memory
let blend_in_scratch = clip_depth - BLEND_STACK_SPLIT;
let local_tile_ix = local_id.x * PIXELS_PER_THREAD + local_id.y * TILE_WIDTH;
let local_blend_start = blend_offset + blend_in_scratch * TILE_WIDTH * TILE_HEIGHT + local_tile_ix;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
blend_spill[local_blend_start + i] = pack4x8unorm(rgba[i]);
rgba[i] = vec4(0.0);
}
}
clip_depth += 1u;
cmd_ix += 1u;
Expand All @@ -960,7 +969,10 @@ fn main(
if clip_depth < BLEND_STACK_SPLIT {
bg_rgba = blend_stack[clip_depth][i];
} else {
// load from memory
let blend_in_scratch = clip_depth - BLEND_STACK_SPLIT;
let local_tile_ix = local_id.x * PIXELS_PER_THREAD + local_id.y * TILE_WIDTH;
let local_blend_start = blend_offset + blend_in_scratch * TILE_WIDTH * TILE_HEIGHT + local_tile_ix;
bg_rgba = blend_spill[local_blend_start + i];
}
let bg = unpack4x8unorm(bg_rgba);
let fg = rgba[i] * area[i] * end_clip.alpha;
Expand Down
6 changes: 5 additions & 1 deletion vello_shaders/shader/shared/config.wgsl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2022 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

// This must be kept in sync with the struct in src/encoding/resolve.rs
// This must be kept in sync with `ConfigUniform` in `vello_encoding/src/config.rs`
struct Config {
width_in_tiles: u32,
height_in_tiles: u32,
Expand Down Expand Up @@ -38,6 +38,7 @@ struct Config {
tiles_size: u32,
seg_counts_size: u32,
segments_size: u32,
blend_size: u32,
ptcl_size: u32,
}

Expand All @@ -54,6 +55,9 @@ let N_TILE = 256u;
// Not currently supporting non-square tiles
let TILE_SCALE = 0.0625;

// The "split" point between using local memory in fine for the blend stack and spilling to the blend_spill buffer.
// A higher value will increase vgpr ("register") pressure in fine, but decrease required dynamic memory allocation.
// If changing, also change in vello_shaders/src/cpu/coarse.rs.
let BLEND_STACK_SPLIT = 4u;

// The following are computed in draw_leaf from the generic gradient parameters
Expand Down
21 changes: 19 additions & 2 deletions vello_shaders/src/cpu/coarse.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright 2023 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense

use std::cmp::max;

use vello_encoding::{
BinHeader, BumpAllocators, ConfigUniform, DrawMonoid, DrawTag, Path, Tile,
DRAW_INFO_FLAGS_FILL_RULE_BIT,
Expand All @@ -11,10 +13,18 @@ use super::{
CMD_LIN_GRAD, CMD_RAD_GRAD, CMD_SOLID, CMD_SWEEP_GRAD, PTCL_INITIAL_ALLOC,
};

// Tiles per bin
const N_TILE_X: usize = 16;
const N_TILE_Y: usize = 16;
const N_TILE: usize = N_TILE_X * N_TILE_Y;

// If changing also change in config.wgsl
const BLEND_STACK_SPLIT: u32 = 4;

// Pixels per tile
const TILE_WIDTH: u32 = 16;
const TILE_HEIGHT: u32 = 16;

const PTCL_INCREMENT: u32 = 256;
const PTCL_HEADROOM: u32 = 2;

Expand Down Expand Up @@ -219,6 +229,8 @@ fn coarse_main(
let blend_offset = tile_state.cmd_offset;
tile_state.cmd_offset += 1;
let mut clip_depth = 0;
let mut render_blend_depth = 0;
let mut max_blend_depth = 0_u32;
let mut clip_zero_depth = 0;
for drawobj_ix in &compacted[tile_ix] {
let drawtag = scene[(drawtag_base + drawobj_ix) as usize];
Expand Down Expand Up @@ -306,7 +318,10 @@ fn coarse_main(
clip_zero_depth = clip_depth + 1;
} else {
tile_state.write_begin_clip(config, bump, ptcl);
// TODO: update blend depth
// TODO: Do we need to track this separately, seems like it
// is always the same as clip_depth in this code path
render_blend_depth += 1;
max_blend_depth = max(render_blend_depth, max_blend_depth);
}
clip_depth += 1;
}
Expand All @@ -317,6 +332,7 @@ fn coarse_main(
let blend = scene[dd as usize];
let alpha = f32::from_bits(scene[dd as usize + 1]);
tile_state.write_end_clip(config, bump, ptcl, blend, alpha);
render_blend_depth -= 1;
}
_ => todo!(),
}
Expand All @@ -338,7 +354,8 @@ fn coarse_main(

if bin_tile_x + tile_x < width_in_tiles && bin_tile_y + tile_y < height_in_tiles {
ptcl[tile_state.cmd_offset as usize] = CMD_END;
let scratch_size = 0; // TODO: actually compute blend depth
let scratch_size =
(max_blend_depth.saturating_sub(BLEND_STACK_SPLIT)) * TILE_WIDTH * TILE_HEIGHT;
ptcl[blend_offset as usize] = bump.blend;
bump.blend += scratch_size;
}
Expand Down
3 changes: 3 additions & 0 deletions vello_tests/snapshots/deep_blend.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 9 additions & 1 deletion vello_tests/tests/compare_gpu_cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,15 @@ fn compare_tricky_strokes() {
#[cfg_attr(skip_gpu_tests, ignore)]
fn compare_fill_types() {
let test_scene = test_scenes::fill_types();
assert_eq!(test_scene.config.name, "fill_types");
let params = TestParams::new("compare_fill_types", 1400, 700);
compare_test_scene(test_scene, params);
}

#[test]
#[cfg_attr(skip_gpu_tests, ignore)]
fn compare_deep_blend() {
let test_scene = test_scenes::deep_blend();
assert_eq!(test_scene.config.name, "deep_blend");
let params = TestParams::new("compare_deep_blend", 150, 150);
compare_test_scene(test_scene, params);
}
9 changes: 8 additions & 1 deletion vello_tests/tests/snapshots.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,14 @@ fn snapshot_tricky_strokes() {
#[cfg_attr(skip_gpu_tests, ignore)]
fn snapshot_fill_types() {
let test_scene = test_scenes::fill_types();
assert_eq!(test_scene.config.name, "fill_types");
let params = TestParams::new("fill_types", 700, 350);
snapshot_test_scene(test_scene, params);
}

#[test]
#[cfg_attr(skip_gpu_tests, ignore)]
fn snapshot_deep_blend() {
let test_scene = test_scenes::deep_blend();
let params = TestParams::new("deep_blend", 200, 200);
snapshot_test_scene(test_scene, params);
}

0 comments on commit c7b615e

Please sign in to comment.