diff --git a/Cargo.toml b/Cargo.toml
index 7c5cbc8f8..99abdc86b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,11 @@ repository.workspace = true
 
 [features]
 default = ["wgpu"]
+# Enables GPU memory usage estimation. This performs additional computations
+# in order to estimate the minimum required allocations for buffers backing
+# bump-allocated GPU memory.
+# TODO: Turn this into a runtime option used at resolve time and remove the feature.
+bump_estimate = ["vello_encoding/bump_estimate"]
 hot_reload = []
 buffer_labels = []
 
diff --git a/crates/encoding/Cargo.toml b/crates/encoding/Cargo.toml
index f7868a8b3..c9032f7df 100644
--- a/crates/encoding/Cargo.toml
+++ b/crates/encoding/Cargo.toml
@@ -9,10 +9,16 @@ repository.workspace = true
 
 [features]
 default = ["full"]
+
 # Enables support for the full pipeline including late-bound
 # resources (gradients, images and glyph runs)
 full = ["skrifa", "guillotiere"]
 
+# Enables an optional GPU memory usage estimation utility. This can be used to
+# perform additional computations in order to estimate the minimum required allocations
+# for buffers backing bump-allocated GPU memory.
+bump_estimate = []
+
 [lints]
 workspace = true
 
diff --git a/crates/encoding/src/config.rs b/crates/encoding/src/config.rs
index 37b1906ac..b6ef8857a 100644
--- a/crates/encoding/src/config.rs
+++ b/crates/encoding/src/config.rs
@@ -37,6 +37,73 @@ pub struct BumpAllocators {
     pub lines: u32,
 }
 
+#[derive(Default)]
+pub struct BumpAllocatorMemory {
+    pub total: u32,
+    pub binning: BufferSize<u32>,
+    pub ptcl: BufferSize<u32>,
+    pub tile: BufferSize<Tile>,
+    pub seg_counts: BufferSize<SegmentCount>,
+    pub segments: BufferSize<PathSegment>,
+    pub lines: BufferSize<LineSoup>,
+}
+
+impl BumpAllocators {
+    pub fn memory(&self) -> BumpAllocatorMemory {
+        let binning = BufferSize::new(self.binning);
+        let ptcl = BufferSize::new(self.ptcl);
+        let tile = BufferSize::new(self.tile);
+        let seg_counts = BufferSize::new(self.seg_counts);
+        let segments = BufferSize::new(self.segments);
+        let lines = BufferSize::new(self.lines);
+        BumpAllocatorMemory {
+            total: binning.size_in_bytes()
+                + ptcl.size_in_bytes()
+                + tile.size_in_bytes()
+                + seg_counts.size_in_bytes()
+                + segments.size_in_bytes()
+                + lines.size_in_bytes(),
+            binning,
+            ptcl,
+            tile,
+            seg_counts,
+            segments,
+            lines,
+        }
+    }
+}
+
+impl std::fmt::Display for BumpAllocatorMemory {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "\n \
+                 \tTotal:\t\t\t{} bytes ({:.2} KB | {:.2} MB)\n\
+                 \tBinning\t\t\t{} elements ({} bytes)\n\
+                 \tPTCL\t\t\t{} elements ({} bytes)\n\
+                 \tTile:\t\t\t{} elements ({} bytes)\n\
+                 \tSegment Counts:\t\t{} elements ({} bytes)\n\
+                 \tSegments:\t\t{} elements ({} bytes)\n\
+                 \tLines:\t\t\t{} elements ({} bytes)",
+            self.total,
+            self.total as f32 / (1 << 10) as f32,
+            self.total as f32 / (1 << 20) as f32,
+            self.binning.len(),
+            self.binning.size_in_bytes(),
+            self.ptcl.len(),
+            self.ptcl.size_in_bytes(),
+            self.tile.len(),
+            self.tile.size_in_bytes(),
+            self.seg_counts.len(),
+            self.seg_counts.size_in_bytes(),
+            self.segments.len(),
+            self.segments.size_in_bytes(),
+            self.lines.len(),
+            self.lines.size_in_bytes()
+        )
+    }
+}
+
 /// Storage of indirect dispatch size values.
 ///
 /// The original plan was to reuse [`BumpAllocators`], but the WebGPU compatible
diff --git a/crates/encoding/src/estimate.rs b/crates/encoding/src/estimate.rs
new file mode 100644
index 000000000..8cb21dacb
--- /dev/null
+++ b/crates/encoding/src/estimate.rs
@@ -0,0 +1,287 @@
+// Copyright 2024 the Vello authors
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! This utility provides conservative size estimation for buffer allocations backing
+//! GPU bump memory. This estimate relies on heuristics and naturally overestimates.
+
+use super::{BufferSize, BumpAllocatorMemory, Transform};
+use peniko::kurbo::{Cap, Join, PathEl, Stroke, Vec2};
+
+const RSQRT_OF_TOL: f64 = 2.2360679775; // tol = 0.2
+
+#[derive(Clone, Default)]
+pub struct BumpEstimator {
+    // TODO: support binning
+    // TODO: support ptcl
+    // TODO: support tile
+    // TODO: support segment counts
+    // TODO: support segments
+    lines: LineSoup,
+}
+
+impl BumpEstimator {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn reset(&mut self) {
+        *self = Self::default();
+    }
+
+    /// Combine the counts of this estimator with `other` after applying an optional `transform`.
+    pub fn append(&mut self, other: &Self, transform: Option<&Transform>) {
+        self.lines.add(&other.lines, transform_scale(transform));
+    }
+
+    pub fn count_path(
+        &mut self,
+        path: impl Iterator<Item = PathEl>,
+        t: &Transform,
+        stroke: Option<&Stroke>,
+    ) {
+        let mut caps = 1;
+        let mut joins: u32 = 0;
+        let mut lineto_lines = 0;
+        let mut fill_close_lines = 1;
+        let mut curve_lines = 0;
+        let mut curve_count = 0;
+
+        // Track the path state to correctly count empty paths and close joins.
+        let mut first_pt = None;
+        let mut last_pt = None;
+        for el in path {
+            match el {
+                PathEl::MoveTo(p0) => {
+                    first_pt = Some(p0);
+                    if last_pt.is_none() {
+                        continue;
+                    }
+                    caps += 1;
+                    joins = joins.saturating_sub(1);
+                    last_pt = None;
+                    fill_close_lines += 1;
+                }
+                PathEl::ClosePath => {
+                    if last_pt.is_some() {
+                        joins += 1;
+                        lineto_lines += 1;
+                    }
+                    last_pt = first_pt;
+                }
+                PathEl::LineTo(p0) => {
+                    last_pt = Some(p0);
+                    joins += 1;
+                    lineto_lines += 1;
+                }
+                PathEl::QuadTo(p1, p2) => {
+                    let Some(p0) = last_pt.or(first_pt) else {
+                        continue;
+                    };
+                    curve_count += 1;
+                    curve_lines +=
+                        wang::quadratic(RSQRT_OF_TOL, p0.to_vec2(), p1.to_vec2(), p2.to_vec2(), t);
+                    last_pt = Some(p2);
+                    joins += 1;
+                }
+                PathEl::CurveTo(p1, p2, p3) => {
+                    let Some(p0) = last_pt.or(first_pt) else {
+                        continue;
+                    };
+                    curve_count += 1;
+                    curve_lines += wang::cubic(
+                        RSQRT_OF_TOL,
+                        p0.to_vec2(),
+                        p1.to_vec2(),
+                        p2.to_vec2(),
+                        p3.to_vec2(),
+                        t,
+                    );
+                    last_pt = Some(p3);
+                    joins += 1;
+                }
+            }
+        }
+        let Some(style) = stroke else {
+            self.lines.linetos += lineto_lines + fill_close_lines;
+            self.lines.curves += curve_lines;
+            self.lines.curve_count += curve_count;
+            return;
+        };
+
+        // For strokes, double-count the lines to estimate offset curves.
+        self.lines.linetos += 2 * lineto_lines;
+        self.lines.curves += 2 * curve_lines;
+        self.lines.curve_count += 2 * curve_count;
+
+        let round_scale = transform_scale(Some(t));
+        let width = style.width as f32;
+        self.count_stroke_caps(style.start_cap, width, caps, round_scale);
+        self.count_stroke_caps(style.end_cap, width, caps, round_scale);
+        self.count_stroke_joins(style.join, width, joins, round_scale);
+    }
+
+    /// Produce the final total, applying an optional transform to all content.
+    pub fn tally(&self, transform: Option<&Transform>) -> BumpAllocatorMemory {
+        let scale = transform_scale(transform);
+        let binning = BufferSize::new(0);
+        let ptcl = BufferSize::new(0);
+        let tile = BufferSize::new(0);
+        let seg_counts = BufferSize::new(0);
+        let segments = BufferSize::new(0);
+        let lines = BufferSize::new(self.lines.tally(scale));
+        BumpAllocatorMemory {
+            total: binning.size_in_bytes()
+                + ptcl.size_in_bytes()
+                + tile.size_in_bytes()
+                + seg_counts.size_in_bytes()
+                + lines.size_in_bytes(),
+            binning,
+            ptcl,
+            tile,
+            seg_counts,
+            segments,
+            lines,
+        }
+    }
+
+    fn count_stroke_caps(&mut self, style: Cap, width: f32, count: u32, scale: f32) {
+        match style {
+            Cap::Butt => self.lines.linetos += count,
+            Cap::Square => self.lines.linetos += 3 * count,
+            Cap::Round => {
+                self.lines.curves += count * estimate_arc_lines(width, scale);
+                self.lines.curve_count += 1;
+            }
+        }
+    }
+
+    fn count_stroke_joins(&mut self, style: Join, width: f32, count: u32, scale: f32) {
+        match style {
+            Join::Bevel => self.lines.linetos += count,
+            Join::Miter => self.lines.linetos += 2 * count,
+            Join::Round => {
+                self.lines.curves += count * estimate_arc_lines(width, scale);
+                self.lines.curve_count += 1;
+            }
+        }
+    }
+}
+
+fn estimate_arc_lines(stroke_width: f32, scale: f32) -> u32 {
+    // These constants need to be kept consistent with the definitions in `flatten_arc` in
+    // flatten.wgsl.
+    const MIN_THETA: f32 = 1e-4;
+    const TOL: f32 = 0.1;
+    let radius = TOL.max(scale * stroke_width * 0.5);
+    let theta = (2. * (1. - TOL / radius).acos()).max(MIN_THETA);
+    ((std::f32::consts::FRAC_PI_2 / theta).ceil() as u32).max(1)
+}
+
+#[derive(Clone, Default)]
+struct LineSoup {
+    // Explicit lines (such as linetos and non-round stroke caps/joins) and Bezier curves
+    // get tracked separately to ensure that explicit lines remain scale invariant.
+    linetos: u32,
+    curves: u32,
+
+    // Curve count is simply used to ensure a minimum number of lines get counted for each curve
+    // at very small scales to reduce the chance of under-allocating.
+    curve_count: u32,
+}
+
+impl LineSoup {
+    fn tally(&self, scale: f32) -> u32 {
+        let curves = self
+            .scaled_curve_line_count(scale)
+            .max(5 * self.curve_count);
+
+        self.linetos + curves
+    }
+
+    fn scaled_curve_line_count(&self, scale: f32) -> u32 {
+        (self.curves as f32 * scale.sqrt()).ceil() as u32
+    }
+
+    fn add(&mut self, other: &LineSoup, scale: f32) {
+        self.linetos += other.linetos;
+        self.curves += other.scaled_curve_line_count(scale);
+        self.curve_count += other.curve_count;
+    }
+}
+
+// TODO: The 32-bit Vec2 definition from cpu_shaders/util.rs could come in handy here.
+fn transform(t: &Transform, v: Vec2) -> Vec2 {
+    Vec2::new(
+        t.matrix[0] as f64 * v.x + t.matrix[2] as f64 * v.y,
+        t.matrix[1] as f64 * v.x + t.matrix[3] as f64 * v.y,
+    )
+}
+
+fn transform_scale(t: Option<&Transform>) -> f32 {
+    match t {
+        Some(t) => {
+            let m = t.matrix;
+            let v1x = m[0] + m[3];
+            let v2x = m[0] - m[3];
+            let v1y = m[1] - m[2];
+            let v2y = m[1] + m[2];
+            (v1x * v1x + v1y * v1y).sqrt() + (v2x * v2x + v2y * v2y).sqrt()
+        }
+        None => 1.,
+    }
+}
+
+/// Wang's Formula (as described in Pyramid Algorithms by Ron Goldman, 2003, Chapter 5, Section
+/// 5.6.3 on Bezier Approximation) is a fast method for computing a lower bound on the number of
+/// recursive subdivisions required to approximate a Bezier curve within a certain tolerance. The
+/// formula for a Bezier curve of degree `n`, control points p[0]...p[n], and number of levels of
+/// subdivision `l`, and flattening tolerance `tol` is defined as follows:
+///
+///     m = max([length(p[k+2] - 2 * p[k+1] + p[k]) for (0 <= k <= n-2)])
+///     l >= log_4((n * (n - 1) * m) / (8 * tol))
+///
+/// For recursive subdivisions that split a curve into 2 segments at each level, the minimum number
+/// of segments is given by 2^l. From the formula above it follows that:
+///
+///       segments >= 2^l >= 2^log_4(x)                      (1)
+///     segments^2 >= 2^(2*log_4(x)) >= 4^log_4(x)           (2)
+///     segments^2 >= x
+///       segments >= sqrt((n * (n - 1) * m) / (8 * tol))    (3)
+///
+/// Wang's formula computes an error bound on recursive subdivision based on the second derivative
+/// which tends to result in a suboptimal estimate when the curvature within the curve has a lot of
+/// variation. This is expected to frequently overshoot the flattening formula used in vello, which
+/// is closer to optimal (vello uses a method based on a numerical approximation of the integral
+/// over the continuous change in the number of flattened segments, with an error expressed in terms
+/// of curvature and infinitesimal arclength).
+mod wang {
+    use super::*;
+
+    // The curve degree term sqrt(n * (n - 1) / 8) specialized for cubics:
+    //
+    //    sqrt(3 * (3 - 1) / 8)
+    //
+    const SQRT_OF_DEGREE_TERM_CUBIC: f64 = 0.86602540378;
+
+    // The curve degree term sqrt(n * (n - 1) / 8) specialized for quadratics:
+    //
+    //    sqrt(2 * (2 - 1) / 8)
+    //
+    const SQRT_OF_DEGREE_TERM_QUAD: f64 = 0.5;
+
+    pub fn quadratic(rsqrt_of_tol: f64, p0: Vec2, p1: Vec2, p2: Vec2, t: &Transform) -> u32 {
+        let v = -2. * p1 + p0 + p2;
+        let v = transform(t, v); // transform is distributive
+        let m = v.length();
+        (SQRT_OF_DEGREE_TERM_QUAD * m.sqrt() * rsqrt_of_tol).ceil() as u32
+    }
+
+    pub fn cubic(rsqrt_of_tol: f64, p0: Vec2, p1: Vec2, p2: Vec2, p3: Vec2, t: &Transform) -> u32 {
+        let v1 = -2. * p1 + p0 + p2;
+        let v2 = -2. * p2 + p1 + p3;
+        let v1 = transform(t, v1);
+        let v2 = transform(t, v2);
+        let m = v1.length().max(v2.length()) as f64;
+        (SQRT_OF_DEGREE_TERM_CUBIC * m.sqrt() * rsqrt_of_tol).ceil() as u32
+    }
+}
diff --git a/crates/encoding/src/lib.rs b/crates/encoding/src/lib.rs
index 2ace2819a..30db95000 100644
--- a/crates/encoding/src/lib.rs
+++ b/crates/encoding/src/lib.rs
@@ -8,6 +8,8 @@ mod clip;
 mod config;
 mod draw;
 mod encoding;
+#[cfg(feature = "bump_estimate")]
+mod estimate;
 #[cfg(feature = "full")]
 mod glyph;
 #[cfg(feature = "full")]
@@ -25,8 +27,8 @@ mod resolve;
 pub use binning::BinHeader;
 pub use clip::{Clip, ClipBbox, ClipBic, ClipElement};
 pub use config::{
-    BufferSize, BufferSizes, BumpAllocators, ConfigUniform, IndirectCount, RenderConfig,
-    WorkgroupCounts, WorkgroupSize,
+    BufferSize, BufferSizes, BumpAllocatorMemory, BumpAllocators, ConfigUniform, IndirectCount,
+    RenderConfig, WorkgroupCounts, WorkgroupSize,
 };
 pub use draw::{
     DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid,
@@ -49,3 +51,6 @@ pub use {
     ramp_cache::Ramps,
     resolve::{Patch, Resolver},
 };
+
+#[cfg(feature = "bump_estimate")]
+pub use estimate::BumpEstimator;
diff --git a/src/scene.rs b/src/scene.rs
index 6fffd358d..4e2c0d7fc 100644
--- a/src/scene.rs
+++ b/src/scene.rs
@@ -4,6 +4,8 @@
 use peniko::kurbo::{Affine, Rect, Shape, Stroke};
 use peniko::{BlendMode, BrushRef, Color, Fill, Font, Image, StyleRef};
 use skrifa::instance::NormalizedCoord;
+#[cfg(feature = "bump_estimate")]
+use vello_encoding::BumpAllocatorMemory;
 use vello_encoding::{Encoding, Glyph, GlyphRun, Patch, Transform};
 
 // TODO - Document invariants and edge cases (#470)
@@ -17,6 +19,8 @@ use vello_encoding::{Encoding, Glyph, GlyphRun, Patch, Transform};
 #[derive(Clone, Default)]
 pub struct Scene {
     encoding: Encoding,
+    #[cfg(feature = "bump_estimate")]
+    estimator: vello_encoding::BumpEstimator,
 }
 
 impl Scene {
@@ -28,6 +32,16 @@ impl Scene {
     /// Removes all content from the scene.
     pub fn reset(&mut self) {
         self.encoding.reset();
+        #[cfg(feature = "bump_estimate")]
+        self.estimator.reset();
+    }
+
+    /// Tally up the bump allocator estimate for the current state of the encoding,
+    /// taking into account an optional `transform` applied to the entire scene.
+    #[cfg(feature = "bump_estimate")]
+    pub fn bump_estimate(&self, transform: Option<Affine>) -> BumpAllocatorMemory {
+        self.estimator
+            .tally(transform.as_ref().map(Transform::from_kurbo).as_ref())
     }
 
     /// Returns the underlying raw encoding.
@@ -50,14 +64,17 @@ impl Scene {
         clip: &impl Shape,
     ) {
         let blend = blend.into();
-        self.encoding
-            .encode_transform(Transform::from_kurbo(&transform));
+        let t = Transform::from_kurbo(&transform);
+        self.encoding.encode_transform(t);
         self.encoding.encode_fill_style(Fill::NonZero);
         if !self.encoding.encode_shape(clip, true) {
             // If the layer shape is invalid, encode a valid empty path. This suppresses
             // all drawing until the layer is popped.
             self.encoding
                 .encode_shape(&Rect::new(0.0, 0.0, 0.0, 0.0), true);
+        } else {
+            #[cfg(feature = "bump_estimate")]
+            self.estimator.count_path(clip.path_elements(0.1), &t, None);
         }
         self.encoding
             .encode_begin_clip(blend, alpha.clamp(0.0, 1.0));
@@ -77,8 +94,8 @@ impl Scene {
         brush_transform: Option<Affine>,
         shape: &impl Shape,
     ) {
-        self.encoding
-            .encode_transform(Transform::from_kurbo(&transform));
+        let t = Transform::from_kurbo(&transform);
+        self.encoding.encode_transform(t);
         self.encoding.encode_fill_style(style);
         if self.encoding.encode_shape(shape, true) {
             if let Some(brush_transform) = brush_transform {
@@ -90,6 +107,9 @@ impl Scene {
                 }
             }
             self.encoding.encode_brush(brush, 1.0);
+            #[cfg(feature = "bump_estimate")]
+            self.estimator
+                .count_path(shape.path_elements(0.1), &t, None);
         }
     }
 
@@ -118,22 +138,35 @@ impl Scene {
 
         const GPU_STROKES: bool = false; // Set this to `true` to enable GPU-side stroking
         if GPU_STROKES {
-            self.encoding
-                .encode_transform(Transform::from_kurbo(&transform));
+            let t = Transform::from_kurbo(&transform);
+            self.encoding.encode_transform(t);
             self.encoding.encode_stroke_style(style);
 
             // We currently don't support dashing on the GPU. If the style has a dash pattern, then
             // we convert it into stroked paths on the CPU and encode those as individual draw
             // objects.
             let encode_result = if style.dash_pattern.is_empty() {
+                #[cfg(feature = "bump_estimate")]
+                self.estimator
+                    .count_path(shape.path_elements(SHAPE_TOLERANCE), &t, Some(style));
                 self.encoding.encode_shape(shape, false)
             } else {
+                // TODO: We currently collect the output of the dash iterator because
+                // `encode_path_elements` wants to consume the iterator. We want to avoid calling
+                // `dash` twice when `bump_estimate` is enabled because it internally allocates.
+                // Bump estimation will move to resolve time rather than scene construction time,
+                // so we can revert this back to not collecting when that happens.
                 let dashed = peniko::kurbo::dash(
                     shape.path_elements(SHAPE_TOLERANCE),
                     style.dash_offset,
                     &style.dash_pattern,
-                );
-                self.encoding.encode_path_elements(dashed, false)
+                )
+                .collect::<Vec<_>>();
+                #[cfg(feature = "bump_estimate")]
+                self.estimator
+                    .count_path(dashed.iter().copied(), &t, Some(style));
+                self.encoding
+                    .encode_path_elements(dashed.into_iter(), false)
             };
             if encode_result {
                 if let Some(brush_transform) = brush_transform {
@@ -170,6 +203,7 @@ impl Scene {
 
     /// Returns a builder for encoding a glyph run.
     pub fn draw_glyphs(&mut self, font: &Font) -> DrawGlyphs {
+        // TODO: Integrate `BumpEstimator` with the glyph cache.
         DrawGlyphs::new(&mut self.encoding, font)
     }
 
@@ -178,10 +212,10 @@ impl Scene {
     /// The given transform is applied to every transform in the child.
     /// This is an O(N) operation.
     pub fn append(&mut self, other: &Scene, transform: Option<Affine>) {
-        self.encoding.append(
-            &other.encoding,
-            &transform.map(|xform| Transform::from_kurbo(&xform)),
-        );
+        let t = transform.as_ref().map(Transform::from_kurbo);
+        self.encoding.append(&other.encoding, &t);
+        #[cfg(feature = "bump_estimate")]
+        self.estimator.append(&other.estimator, t.as_ref());
     }
 }
 
@@ -283,8 +317,7 @@ impl<'a> DrawGlyphs<'a> {
         self
     }
 
-    /// Encodes a fill or stroke for for the given sequence of glyphs and consumes
-    /// the builder.
+    /// Encodes a fill or stroke for the given sequence of glyphs and consumes the builder.
     ///
     /// The `style` parameter accepts either `Fill` or `&Stroke` types.
     pub fn draw(mut self, style: impl Into<StyleRef<'a>>, glyphs: impl Iterator<Item = Glyph>) {