Skip to content

Commit

Permalink
Doing Stream-based leb128 encoding (#3211)
Browse files Browse the repository at this point in the history
Implement routines for encoding bignums directly into the stream. This path is slightly more expensive than encoding into a buffer, so we move the tight-loop bottleneck function into `stream.rs`, such that we get access to the (otherwise opaque) `Stream` members.

This fixes a real problem, where arbitrary buffer reservations (extending beyond the legal end of the cache) could interfere with heap traffic because of bignum arithmetic (i.e. 7-bit shifts) happening while encoding.

Resolves the residual problem from #3149.

---------
TODO:
- [x] Why is `stable-exp.mo` failing suddenly? — probably 495c91f
- [ ] add `QuickCheck` (de)serialisation tests for `Nat/Int`
- [x] explore having a `stream::write_(s)leb128`
  • Loading branch information
ggreif authored May 4, 2022
1 parent 1d55ca6 commit 2792670
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 19 deletions.
39 changes: 33 additions & 6 deletions rts/motoko-rts/src/bigint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use crate::buf::{read_byte, Buf};
use crate::mem_utils::memcpy_bytes;
use crate::memory::Memory;
use crate::tommath_bindings::*;
use crate::types::{size_of, BigInt, Bytes, Value, TAG_BIGINT};
use crate::types::{size_of, BigInt, Bytes, Stream, Value, TAG_BIGINT};

use motoko_rts_macros::ic_mem_fn;

Expand Down Expand Up @@ -122,16 +122,18 @@ compiler.
// Trap function generated by compiler. Originally added in e2ca6a1. I think this could be
// simplified now by calling rts_trap and removing generated code from the compiler.
extern "C" {
fn bigint_trap() -> !;
pub(crate) fn bigint_trap() -> !;
}

unsafe fn check(err: mp_err) {
#[inline]
pub(crate) unsafe fn check(err: mp_err) {
if err != 0 {
bigint_trap();
}
}

unsafe fn mp_get_u32(p: *const mp_int) -> u32 {
#[inline]
pub(crate) unsafe fn mp_get_u32(p: *const mp_int) -> u32 {
mp_get_i32(p) as u32
}

Expand All @@ -140,11 +142,11 @@ unsafe fn mp_get_u64(p: *const mp_int) -> u64 {
mp_get_i64(p) as u64
}

unsafe fn mp_isneg(p: *const mp_int) -> bool {
pub(crate) unsafe fn mp_isneg(p: *const mp_int) -> bool {
(*p).sign == 1
}

unsafe fn mp_iszero(p: *const mp_int) -> bool {
pub(crate) unsafe fn mp_iszero(p: *const mp_int) -> bool {
(*p).used == 0
}

Expand Down Expand Up @@ -437,6 +439,13 @@ pub unsafe extern "C" fn bigint_leb128_encode(n: Value, buf: *mut u8) {
bigint_leb128_encode_go(&mut tmp, buf, false)
}

#[no_mangle]
pub unsafe extern "C" fn bigint_leb128_stream_encode(stream: *mut Stream, n: Value) {
let mut tmp: mp_int = core::mem::zeroed(); // or core::mem::uninitialized?
check(mp_init_copy(&mut tmp, n.as_bigint().mp_int_ptr()));
stream.write_leb128(&mut tmp, false)
}

#[no_mangle]
unsafe extern "C" fn bigint_2complement_bits(n: Value) -> u32 {
let mp_int = n.as_bigint().mp_int_ptr();
Expand Down Expand Up @@ -473,6 +482,24 @@ pub unsafe extern "C" fn bigint_sleb128_encode(n: Value, buf: *mut u8) {
}
}

#[no_mangle]
pub unsafe extern "C" fn bigint_sleb128_stream_encode(stream: *mut Stream, n: Value) {
let mut tmp: mp_int = core::mem::zeroed(); // or core::mem::uninitialized?
check(mp_init_copy(&mut tmp, n.as_bigint().mp_int_ptr()));

if mp_isneg(&tmp) {
// Turn negative numbers into the two's complement of the right size
let mut big: mp_int = core::mem::zeroed();
check(mp_init(&mut big));
let bytes = bigint_sleb128_size(n);
check(mp_2expt(&mut big, 7 * bytes as i32));
check(mp_add(&mut tmp, &big, &mut tmp));
stream.write_leb128(&mut tmp, false)
} else {
stream.write_leb128(&mut tmp, true)
}
}

#[no_mangle]
pub unsafe extern "C" fn bigint_leb128_decode(buf: *mut Buf) -> Value {
let mut i = tmp_bigint();
Expand Down
18 changes: 18 additions & 0 deletions rts/motoko-rts/src/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
// (from `compile.ml`) in sync with the layout!
// - Note: `len` and `filled` are relative to the encompassing blob.

use crate::bigint::{check, mp_get_u32, mp_isneg, mp_iszero};
use crate::mem_utils::memcpy_bytes;
use crate::memory::{alloc_blob, Memory};
use crate::rts_trap_with;
use crate::tommath_bindings::{mp_div_2d, mp_int};
use crate::types::{size_of, Blob, Bytes, Stream, Value, TAG_BLOB};

use motoko_rts_macros::ic_mem_fn;
Expand Down Expand Up @@ -136,6 +138,7 @@ impl Stream {
}

/// Ingest a single byte into the stream.
#[inline]
#[export_name = "stream_write_byte"]
pub fn cache_byte(self: *mut Self, byte: u8) {
unsafe {
Expand Down Expand Up @@ -164,6 +167,21 @@ impl Stream {
}
}

/// like `bigint_leb128_encode_go`, but to a stream
pub(crate) unsafe fn write_leb128(self: *mut Stream, tmp: *mut mp_int, add_bit: bool) {
debug_assert!(!mp_isneg(tmp));

loop {
let byte = mp_get_u32(tmp) as u8;
check(mp_div_2d(tmp, 7, tmp, core::ptr::null_mut()));
if !mp_iszero(tmp) || (add_bit && byte & (1 << 6) != 0) {
self.cache_byte(byte | (1 << 7));
} else {
return self.cache_byte(byte);
}
}
}

/// Split the stream object into two `Blob`s, a front-runner (small) one
/// and a latter one that comprises the current amount of the cached bytes.
/// Lengths are adjusted correspondingly.
Expand Down
75 changes: 62 additions & 13 deletions src/codegen/compile.ml
Original file line number Diff line number Diff line change
Expand Up @@ -858,10 +858,12 @@ module RTS = struct
E.add_func_import env "rts" "bigint_abs" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_leb128_size" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_leb128_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "bigint_leb128_stream_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "bigint_leb128_decode" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_leb128_decode_word64" [I64Type; I64Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_sleb128_size" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_sleb128_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "bigint_sleb128_stream_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "bigint_sleb128_decode" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_sleb128_decode_word64" [I64Type; I64Type; I32Type] [I32Type];
E.add_func_import env "rts" "leb128_encode" [I32Type; I32Type] [];
Expand Down Expand Up @@ -2098,6 +2100,13 @@ sig
*)
val compile_store_to_data_buf_signed : E.t -> G.t
val compile_store_to_data_buf_unsigned : E.t -> G.t
(* given on stack
- numeric object (vanilla, TOS)
- (unskewed) stream
store the binary representation of the numeric object into the stream
*)
val compile_store_to_stream_signed : E.t -> G.t
val compile_store_to_stream_unsigned : E.t -> G.t
(* given a ReadBuf on stack, consume bytes from it,
deserializing to a numeric object
and leave it on the stack (vanilla).
Expand Down Expand Up @@ -2186,7 +2195,6 @@ module I32Leb = struct
let compile_store_to_data_buf_signed env get_x get_buf =
get_x ^^ get_buf ^^ E.call_import env "rts" "sleb128_encode" ^^
compile_sleb128_size get_x

end

module MakeCompact (Num : BigNumType) : BigNumType = struct
Expand Down Expand Up @@ -2530,6 +2538,48 @@ module MakeCompact (Num : BigNumType) : BigNumType = struct
get_buf ^^ get_x ^^ Num.compile_store_to_data_buf_signed env)
env

let compile_store_to_stream_unsigned env =
let set_x, get_x = new_local env "x" in
let set_stream, get_stream = new_local env "stream" in
set_x ^^ set_stream ^^
get_x ^^
try_unbox I32Type
(fun env ->
BitTagged.untag_i32 ^^ set_x ^^
(* get size & reserve & encode *)
let dest =
get_stream ^^
I32Leb.compile_leb128_size get_x ^^
E.call_import env "rts" "stream_reserve" in
I32Leb.compile_store_to_data_buf_unsigned env get_x dest)
(fun env ->
G.i Drop ^^
get_stream ^^ get_x ^^ Num.compile_store_to_stream_unsigned env ^^
compile_unboxed_zero)
env ^^
G.i Drop

let compile_store_to_stream_signed env =
let set_x, get_x = new_local env "x" in
let set_stream, get_stream = new_local env "stream" in
set_x ^^ set_stream ^^
get_x ^^
try_unbox I32Type
(fun env ->
BitTagged.untag_i32 ^^ set_x ^^
(* get size & reserve & encode *)
let dest =
get_stream ^^
I32Leb.compile_sleb128_size get_x ^^
E.call_import env "rts" "stream_reserve" in
I32Leb.compile_store_to_data_buf_signed env get_x dest)
(fun env ->
G.i Drop ^^
get_stream ^^ get_x ^^ Num.compile_store_to_stream_signed env ^^
compile_unboxed_zero)
env ^^
G.i Drop

let compile_data_size_unsigned env =
try_unbox I32Type
(fun _ ->
Expand Down Expand Up @@ -2643,13 +2693,20 @@ module BigNumLibtommath : BigNumType = struct
set_n ^^ set_buf ^^
get_n ^^ get_buf ^^ E.call_import env "rts" "bigint_leb128_encode" ^^
get_n ^^ E.call_import env "rts" "bigint_leb128_size"

let compile_store_to_stream_unsigned env =
E.call_import env "rts" "bigint_leb128_stream_encode"

let compile_store_to_data_buf_signed env =
let (set_buf, get_buf) = new_local env "buf" in
let (set_n, get_n) = new_local env "n" in
set_n ^^ set_buf ^^
get_n ^^ get_buf ^^ E.call_import env "rts" "bigint_sleb128_encode" ^^
get_n ^^ E.call_import env "rts" "bigint_sleb128_size"

let compile_store_to_stream_signed env =
E.call_import env "rts" "bigint_sleb128_stream_encode"

let compile_load_from_data_buf env get_data_buf = function
| false -> get_data_buf ^^ E.call_import env "rts" "bigint_leb128_decode"
| true -> get_data_buf ^^ E.call_import env "rts" "bigint_sleb128_decode"
Expand Down Expand Up @@ -5921,20 +5978,12 @@ module BlobStream : Stream = struct
E.call_import env "rts" "stream_write_text"

let write_bignum_leb env get_token get_x =
get_token ^^
get_x ^^ BigNum.compile_data_size_unsigned env ^^
E.call_import env "rts" "stream_reserve" ^^ (* FIXME: might overflow! *)
get_x ^^
BigNum.compile_store_to_data_buf_unsigned env ^^
G.i Drop
get_token ^^ get_x ^^
BigNum.compile_store_to_stream_unsigned env

let write_bignum_sleb env get_token get_x =
get_token ^^
get_x ^^ BigNum.compile_data_size_signed env ^^
E.call_import env "rts" "stream_reserve" ^^ (* FIXME: might overflow! *)
get_x ^^
BigNum.compile_store_to_data_buf_signed env ^^
G.i Drop
get_token ^^ get_x ^^
BigNum.compile_store_to_stream_signed env

end

Expand Down

0 comments on commit 2792670

Please sign in to comment.