Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make clvm_rs a replacement for clvm #253

Draft
wants to merge 46 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
a897e01
checkpoint
richardkiss Sep 21, 2022
01e11d0
Add serialize tests.
richardkiss Sep 21, 2022
f8993f1
Test improvements.
richardkiss Sep 22, 2022
91c2492
Some speed improvements.
richardkiss Sep 28, 2022
62142b9
More improvements.
richardkiss Sep 29, 2022
94080d6
Improve coverage.
richardkiss Oct 5, 2022
e6255a3
Make deserialization time tree-hashing optional.
richardkiss Oct 8, 2022
d797d66
Tree hash on deserialization is now optional.
richardkiss Oct 8, 2022
f7f5eef
Refactor, rename.
richardkiss Oct 17, 2022
a154d7e
First crack at rust tree hashes.
richardkiss Oct 18, 2022
9a2ca75
Use rust parsing if present.
richardkiss Jan 13, 2023
49fe40f
checkpoint
richardkiss Jan 13, 2023
8ecaee4
tests pass
richardkiss Jan 5, 2023
4a11b0f
checkpoint
richardkiss Jan 19, 2023
48b2b74
Rename `base`.
richardkiss Jan 20, 2023
23ab08e
Rename to `eval_error.py`.
richardkiss Jan 20, 2023
a15c2c1
Rename modules.
richardkiss Jan 20, 2023
31af146
Add comments, improve implementations.
richardkiss Jan 20, 2023
0107e8a
Remove `keywords.py`.
richardkiss Jan 20, 2023
f2162a0
Various improvements to python.
richardkiss Jan 24, 2023
d796ee4
More tests.
richardkiss Jan 24, 2023
fbff98c
Improvements to `uncurry`, tests, coverage.
richardkiss Jan 24, 2023
3172fda
Refactor.
richardkiss Jan 24, 2023
f80a0fc
More refactor.
richardkiss Jan 25, 2023
81e54ef
Handle end of stream properly.
richardkiss Jan 25, 2023
3cfbd56
More refactor.
richardkiss Jan 26, 2023
4b8fe5b
Fix name
richardkiss Jan 26, 2023
0acda17
refactor
richardkiss Jan 26, 2023
67dc587
fix benchmarks
richardkiss Jan 26, 2023
edde719
fix benchmark
richardkiss Jan 26, 2023
0c25e04
Support py37
richardkiss Jan 26, 2023
4f5c9dc
Use api
richardkiss Jan 26, 2023
3bc775d
fix comments
richardkiss Jan 27, 2023
0ffa864
lint
richardkiss Feb 1, 2023
c1724d7
Tests pass, coverage seems good, benchmarks seem good.
richardkiss Feb 3, 2023
8f7abca
Speed up `parse` and `__eq__`.
richardkiss Feb 7, 2023
2b7f9cf
Improve benchmarking code.
richardkiss Feb 7, 2023
b6905ea
Factor out `copy_exact` and `skip_bytes`.
richardkiss Feb 7, 2023
c6af427
Add `skip_clvm_object` api.
richardkiss Feb 8, 2023
61146ea
Add more `parse` tests.
richardkiss Feb 8, 2023
3a0183a
Use old error messages.
richardkiss Feb 8, 2023
ac05bc9
Remove a bunch of `.as_*` methods.
richardkiss Feb 10, 2023
818ee69
Don't cast `None`. Add `.at_many`.
richardkiss Feb 10, 2023
c9ece37
Don't cast `None`. Add `.at_many`.
richardkiss Feb 10, 2023
17196e7
Add back `.as_iter()`.
richardkiss Feb 16, 2023
2ba2689
Merge remote-tracking branch 'chia/main' into remove-clvm-dep
richardkiss Apr 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
327 changes: 327 additions & 0 deletions src/serde/de_tree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,327 @@
use std::convert::TryInto;
use std::io::{Error, Read, Result, Write};

use sha2::Digest;

use crate::sha2::Sha256;

use super::parse_atom::decode_size_with_offset;
use super::utils::{copy_exactly, skip_bytes};

const MAX_SINGLE_BYTE: u8 = 0x7f;
const CONS_BOX_MARKER: u8 = 0xff;

struct ShaWrapper(Sha256);

impl Write for ShaWrapper {
fn write(&mut self, blob: &[u8]) -> std::result::Result<usize, Error> {
self.0.update(blob);
Ok(blob.len())
}
fn flush(&mut self) -> std::result::Result<(), Error> {
Ok(())
}
}

/// This data structure is used with `parse_triples`, which returns a triple of
/// integer values for each clvm object in a tree.

#[derive(Debug, PartialEq, Eq)]
pub enum ParsedTriple {
Atom {
start: u64,
end: u64,
atom_offset: u32,
},
Pair {
start: u64,
end: u64,
right_index: u32,
},
}

enum ParseOpRef {
ParseObj,
SaveCursor(usize),
SaveIndex(usize),
}

fn sha_blobs(blobs: &[&[u8]]) -> [u8; 32] {
let mut h = Sha256::new();
for blob in blobs {
h.update(blob);
}
h.finalize()
.as_slice()
.try_into()
.expect("wrong slice length")
}

fn tree_hash_for_byte(b: u8, calculate_tree_hashes: bool) -> Option<[u8; 32]> {
if calculate_tree_hashes {
Some(sha_blobs(&[&[1, b]]))
} else {
None
}
}

fn skip_or_sha_bytes<R: Read>(
f: &mut R,
skip_size: u64,
calculate_tree_hashes: bool,
) -> Result<Option<[u8; 32]>> {
if calculate_tree_hashes {
let mut h = Sha256::new();
h.update([1]);
let mut w = ShaWrapper(h);
copy_exactly(f, &mut w, skip_size)?;
let r: [u8; 32] =
w.0.finalize()
.as_slice()
.try_into()
.expect("wrong slice length");
Ok(Some(r))
} else {
skip_bytes(f, skip_size)?;
Ok(None)
}
}

/// parse a serialized clvm object tree to an array of `ParsedTriple` objects

/// This alternative mechanism of deserialization generates an array of
/// references to each clvm object. A reference contains three values:
/// a start offset within the blob, an end offset, and a third value that
/// is either: an atom offset (relative to the start offset) where the atom
/// data starts (and continues to the end offset); or an index in the array
/// corresponding to the "right" element of the pair (in which case, the
/// "left" element corresponds to the current index + 1).
///
/// Since these values are offsets into the original buffer, that buffer needs
/// to be kept around to get the original atoms.

type ParsedTriplesOutput = (Vec<ParsedTriple>, Option<Vec<[u8; 32]>>);

pub fn parse_triples<R: Read>(
f: &mut R,
calculate_tree_hashes: bool,
) -> Result<ParsedTriplesOutput> {
let mut r = Vec::new();
let mut tree_hashes = Vec::new();
let mut op_stack = vec![ParseOpRef::ParseObj];
let mut cursor: u64 = 0;
loop {
match op_stack.pop() {
None => {
break;
}
Some(op) => match op {
ParseOpRef::ParseObj => {
let mut b: [u8; 1] = [0];
f.read_exact(&mut b)?;
let start = cursor;
cursor += 1;
let b = b[0];
if b == CONS_BOX_MARKER {
let index = r.len();
let new_obj = ParsedTriple::Pair {
start,
end: 0,
right_index: 0,
};
r.push(new_obj);
if calculate_tree_hashes {
tree_hashes.push([0; 32])
}
op_stack.push(ParseOpRef::SaveCursor(index));
op_stack.push(ParseOpRef::ParseObj);
op_stack.push(ParseOpRef::SaveIndex(index));
op_stack.push(ParseOpRef::ParseObj);
} else {
let (start, end, atom_offset, tree_hash) = {
if b <= MAX_SINGLE_BYTE {
(
start,
start + 1,
0,
tree_hash_for_byte(b, calculate_tree_hashes),
)
} else {
let (atom_offset, atom_size) = decode_size_with_offset(f, b)?;
let end = start + (atom_offset as u64) + atom_size;
let h = skip_or_sha_bytes(f, atom_size, calculate_tree_hashes)?;
(start, end, atom_offset as u32, h)
}
};
if calculate_tree_hashes {
tree_hashes.push(tree_hash.expect("failed unwrap"))
}
let new_obj = ParsedTriple::Atom {
start,
end,
atom_offset,
};
cursor = end;
r.push(new_obj);
}
}
ParseOpRef::SaveCursor(index) => {
if let ParsedTriple::Pair {
start,
end: _,
right_index,
} = r[index]
{
if calculate_tree_hashes {
let h = sha_blobs(&[
&[2],
&tree_hashes[index + 1],
&tree_hashes[right_index as usize],
]);
tree_hashes[index] = h;
}
r[index] = ParsedTriple::Pair {
start,
end: cursor,
right_index,
};
}
}
ParseOpRef::SaveIndex(index) => {
if let ParsedTriple::Pair {
start,
end,
right_index: _,
} = r[index]
{
r[index] = ParsedTriple::Pair {
start,
end,
right_index: r.len() as u32,
};
}
}
},
}
}
Ok((
r,
if calculate_tree_hashes {
Some(tree_hashes)
} else {
None
},
))
}

#[cfg(test)]
use std::io::Cursor;

#[cfg(test)]
use hex::FromHex;

#[cfg(test)]
fn check_parse_tree(h: &str, expected: Vec<ParsedTriple>, expected_sha_tree_hex: &str) -> () {
let b = Vec::from_hex(h).unwrap();
println!("{:?}", b);
let mut f = Cursor::new(b);
let (p, tree_hash) = parse_triples(&mut f, false).unwrap();
assert_eq!(p, expected);
assert_eq!(tree_hash, None);

let b = Vec::from_hex(h).unwrap();
let mut f = Cursor::new(b);
let (p, tree_hash) = parse_triples(&mut f, true).unwrap();
assert_eq!(p, expected);

let est = Vec::from_hex(expected_sha_tree_hex).unwrap();
assert_eq!(tree_hash.unwrap()[0].to_vec(), est);
}

#[test]
fn test_parse_tree() {
check_parse_tree(
"80",
vec![ParsedTriple::Atom {
start: 0,
end: 1,
atom_offset: 1,
}],
"4bf5122f344554c53bde2ebb8cd2b7e3d1600ad631c385a5d7cce23c7785459a",
);

check_parse_tree(
"ff648200c8",
vec![
ParsedTriple::Pair {
start: 0,
end: 5,
right_index: 2,
},
ParsedTriple::Atom {
start: 1,
end: 2,
atom_offset: 0,
},
ParsedTriple::Atom {
start: 2,
end: 5,
atom_offset: 1,
},
],
"247f7d3f63b346ea93ca47f571cd0f4455392348b888a4286072bef0ac6069b5",
);

check_parse_tree(
"ff83666f6fff83626172ff8362617a80", // `(foo bar baz)`
vec![
ParsedTriple::Pair {
start: 0,
end: 16,
right_index: 2,
},
ParsedTriple::Atom {
start: 1,
end: 5,
atom_offset: 1,
},
ParsedTriple::Pair {
start: 5,
end: 16,
right_index: 4,
},
ParsedTriple::Atom {
start: 6,
end: 10,
atom_offset: 1,
},
ParsedTriple::Pair {
start: 10,
end: 16,
right_index: 6,
},
ParsedTriple::Atom {
start: 11,
end: 15,
atom_offset: 1,
},
ParsedTriple::Atom {
start: 15,
end: 16,
atom_offset: 1,
},
],
"47f30bf9935e25e4262023124fb5e986d755b9ed65a28ac78925c933bfd57dbd",
);

let s = "c0a0".to_owned() + &hex::encode([0x31u8; 160]);
check_parse_tree(
&s,
vec![ParsedTriple::Atom {
start: 0,
end: 162,
atom_offset: 2,
}],
"d1c109981a9c5a3bbe2d98795a186a0f057dc9a3a7f5e1eb4dfb63a1636efa2d",
);
}
5 changes: 4 additions & 1 deletion src/serde/mod.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
mod bytes32;
mod de;
mod de_br;
mod de_tree;
mod errors;
mod object_cache;
mod parse_atom;
mod read_cache_lookup;
mod ser;
mod ser_br;
mod tools;
mod utils;
mod write_atom;

#[cfg(test)]
mod test;

pub use de::node_from_bytes;
pub use de_br::node_from_bytes_backrefs;
pub use de_tree::{parse_triples, ParsedTriple};
pub use ser::node_to_bytes;
pub use ser_br::node_to_bytes_backrefs;
pub use tools::{serialized_length_from_bytes, tree_hash_from_stream};
pub use tools::{parse_through_clvm_object, serialized_length_from_bytes, tree_hash_from_stream};
Loading