Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jonas/refactor tables #40

Merged
merged 11 commits into from
Nov 7, 2023
2 changes: 1 addition & 1 deletion hacspec-scrambledb/scrambledb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ rand = { version = "0.8.5", optional = true }
getrandom = { version = "0.2.10", features = ["js"], optional = true }
hex = { version = "0.4.3", optional = true }

libcrux = { git = "https://github.com/cryspen/libcrux.git" }
libcrux = { git = "https://github.com/cryspen/libcrux.git", rev = "8889c70b1faf26d131f14442f54a5938ab1deff6" }
jschneider-bensch marked this conversation as resolved.
Show resolved Hide resolved

gloo-utils = { version = "0.1", features = ["serde"] }
serde_json = "1.0.106"
Expand Down
4 changes: 2 additions & 2 deletions hacspec-scrambledb/scrambledb/src/data_transformations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ pub fn blind_pseudonymized_datum(
// Blind recovered raw pseudonym towards receiver.
let blinded_handle = BlindedPseudonymizedHandle(prepare_blind_convert(
*bpk,
store_context.recover_raw_pseudonym(datum.handle.0)?,
store_context.recover_raw_pseudonym(datum.handle)?,
randomness,
)?);

Expand Down Expand Up @@ -204,7 +204,7 @@ pub fn finalize_blinded_datum(
datum: &BlindedPseudonymizedData,
) -> Result<PseudonymizedData, Error> {
// Finalize pseudonym for storage.
let handle = FinalizedPseudonym(store_context.finalize_pseudonym(datum.blinded_handle.0)?);
let handle = store_context.finalize_pseudonym(datum.blinded_handle)?;

// Decrypt data value for storage.
let data_value = hpke_open_level_2(&datum.encrypted_data_value, &store_context.hpke_sk)?;
Expand Down
16 changes: 16 additions & 0 deletions hacspec-scrambledb/scrambledb/src/data_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,26 @@ use oprf::coprf::coprf_online::{BlindInput, BlindOutput};

/// A type for finalized pseudonyms, i.e. those which have been hardened for
/// storage by applying a PRP.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[cfg_attr(test, derive(Hash))]
jschneider-bensch marked this conversation as resolved.
Show resolved Hide resolved
pub struct FinalizedPseudonym(pub(crate) [u8; 64]);
/// A type for blinded identifiable handles.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct BlindedIdentifiableHandle(pub(crate) BlindInput);
/// A type for blinded pseudonymous handles.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct BlindedPseudonymizedHandle(pub(crate) BlindOutput);

/// A plain text data value.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct DataValue {
/// A byte string encoding the data value.
pub(crate) value: Vec<u8>,
/// The name of the attribute the value belongs to.
pub(crate) attribute_name: String,
}
/// An encrypted data value.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct EncryptedDataValue {
/// A byte string encoding the encrypted data value.
pub(crate) value: Vec<u8>,
Expand All @@ -34,14 +40,22 @@ pub struct EncryptedDataValue {
}

/// An identifiable piece of data.
///
/// `PartialOrd` derive:
/// When derived on structs, it will produce a lexicographic ordering based on
/// the top-to-bottom declaration order of the struct’s members.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct IdentifiableData {
/// A plain text handle.
/// Because `PartialOrd` is derived, the order for this struct is
/// lexicographical on this handle.
pub(crate) handle: String,
/// A plain text data value.
pub(crate) data_value: DataValue,
}

/// The blinded version of an identifiable piece of data.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct BlindedIdentifiableData {
/// A blinded plain text handle.
pub(crate) blinded_handle: BlindedIdentifiableHandle,
Expand All @@ -50,6 +64,7 @@ pub struct BlindedIdentifiableData {
}

/// The blinded version of a pseudonymized piece of data.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct BlindedPseudonymizedData {
/// A blinded pseudonymous handle.
pub(crate) blinded_handle: BlindedPseudonymizedHandle,
Expand All @@ -58,6 +73,7 @@ pub struct BlindedPseudonymizedData {
}

/// A pseudonymized piece of data.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PseudonymizedData {
/// A pseudonymous handle.
pub(crate) handle: FinalizedPseudonym,
Expand Down
73 changes: 25 additions & 48 deletions hacspec-scrambledb/scrambledb/src/finalize.rs
Original file line number Diff line number Diff line change
@@ -1,60 +1,37 @@
//! # Conversion Finalization

use crate::{
data_transformations::finalize_blinded_datum,
data_types::{BlindedPseudonymizedData, BlindedPseudonymizedHandle, EncryptedDataValue},
data_types::{BlindedPseudonymizedData, PseudonymizedData},
error::Error,
setup::StoreContext,
table::{Column, ConvertedTable, PseudonymizedTable},
table::Table,
};

/// The result of a split or join conversion is a set of blinded
/// pseudonymized tables which have been encrypted towards a data store.
/// ## Finalization of Pseudonymous and Converted Tables
///
/// Finalization of pseudonyms is the same regardless of pseudonym type,
/// i.e. whether they are long term storage pseudonyms at the Data Lake or
/// join pseudonyms at a Data Processor.
///
/// For permanent storage of the pseudonymized data, the raw pseudonyms have
/// to be unblinded and subsequently hardened into permanent pseudonyms.
/// Finalize a table of blinded pseudonymized data values by applying the
/// finalization operation on each entry and shuffling the result:
///
/// In addition the encrypted values need to be decrypted to be available
/// for future conversions towards other data stores.
pub fn finalize_conversion(
/// Inputs:
/// - `store_context`: The data store's pseudonymization context
/// - `table`: A table of blinded pseudonymized data values
///
/// Output:
/// A table of pseudonymized data values.
pub fn finalize_blinded_table(
store_context: &StoreContext,
converted_tables: Vec<ConvertedTable>,
) -> Result<Vec<PseudonymizedTable>, Error> {
let mut pseudonymized_tables = Vec::new();

for blinded_table in converted_tables {
let mut pseudonymized_column_data = Vec::new();

for (blinded_pseudonym, encrypted_value) in blinded_table.column().data() {
let blinded_pseudonymized_datum = BlindedPseudonymizedData {
blinded_handle: BlindedPseudonymizedHandle(blinded_pseudonym),
encrypted_data_value: EncryptedDataValue {
attribute_name: blinded_table.column().attribute(),
value: encrypted_value,
encryption_level: 2u8,
},
};

let pseudonymized_datum =
finalize_blinded_datum(&store_context, &blinded_pseudonymized_datum)?;

pseudonymized_column_data.push((
pseudonymized_datum.handle.0,
pseudonymized_datum.data_value.value,
));
}

let mut pseudonymized_column = Column::new(
blinded_table.column().attribute(),
pseudonymized_column_data,
);
pseudonymized_column.sort();
table: Table<BlindedPseudonymizedData>,
) -> Result<Table<PseudonymizedData>, Error> {
let mut pseudonymized_data = table
.data()
.iter()
.map(|entry| finalize_blinded_datum(store_context, entry))
.collect::<Result<Vec<PseudonymizedData>, Error>>()?;

pseudonymized_tables.push(PseudonymizedTable::new(
blinded_table.identifier(),
pseudonymized_column,
))
}
pseudonymized_data.sort();

Ok(pseudonymized_tables)
Ok(Table::new(table.identifier().into(), pseudonymized_data))
}
Loading