diff --git a/packages/serialise/serialise.pony b/packages/serialise/serialise.pony index 66e71fc535..d169ba7ce8 100644 --- a/packages/serialise/serialise.pony +++ b/packages/serialise/serialise.pony @@ -15,16 +15,18 @@ invariants. However, if only "trusted" data (i.e. data produced by Pony serialisation from the same binary) is deserialised, it will always maintain a well-formed heap and all object invariants. -Note that serialised data is not usable between different Pony binaries. This is -due to the use of type identifiers rather than a heavy-weight self-describing -serialisation schema. This also means it isn't safe to deserialise something -serialised by the same program compiled for a different platform. +Note that serialised data can be used between binaries compiled with the same +version of the pony compiler. Cross binary serialisation will only work for +binaries of the same bit width (32 bit vs 64 bit), data model (ilp32, lp64, or +llp64), and endianness (big endian or little endian) but is not limited to a +single platform (for example: one can mix and match x86_64 linux and aarch64 +linux because they have the same bitwidth, data model, and endianness). The [Serialise.signature](serialise-Serialise.md#signature) method is provided for the purposes of comparing communicating Pony binaries to determine if they -are the same. Confirming this before deserialising data can help mitigate the -risk of accidental serialisation across different Pony binaries, but does not on -its own address the security issues of accepting data from untrusted sources. +are compatible. Confirming this before deserialising data can help mitigate the +risk of accidental serialisation across incompatible Pony binaries, but does not +on its own address the security issues of accepting data from untrusted sources. """ use @"internal.signature"[Array[U8] val]() diff --git a/src/libponyc/ast/ast.c b/src/libponyc/ast/ast.c index d0774aea49..31d13660d5 100644 --- a/src/libponyc/ast/ast.c +++ b/src/libponyc/ast/ast.c @@ -2013,6 +2013,7 @@ static pony_type_t ast_signature_pony = sizeof(ast_signature_t), 0, 0, + 0, NULL, NULL, ast_signature_serialise_trace, @@ -2092,6 +2093,7 @@ static pony_type_t ast_nominal_pkg_id_signature_pony = sizeof(ast_signature_t), 0, 0, + 0, NULL, NULL, ast_nominal_pkg_id_signature_serialise_trace, @@ -2355,6 +2357,7 @@ static pony_type_t ast_pony = sizeof(ast_t), 0, 0, + 0, NULL, NULL, ast_serialise_trace, diff --git a/src/libponyc/ast/source.c b/src/libponyc/ast/source.c index 1e01cff87e..be0cbd926f 100644 --- a/src/libponyc/ast/source.c +++ b/src/libponyc/ast/source.c @@ -114,6 +114,7 @@ static pony_type_t source_pony = sizeof(source_t), 0, 0, + 0, NULL, NULL, source_serialise_trace, diff --git a/src/libponyc/ast/stringtab.c b/src/libponyc/ast/stringtab.c index 9a83b8efde..7b1ded094c 100644 --- a/src/libponyc/ast/stringtab.c +++ b/src/libponyc/ast/stringtab.c @@ -134,6 +134,7 @@ static __pony_thread_local struct _pony_type_t string_pony = 0, 0, 0, + 0, NULL, NULL, NULL, @@ -220,6 +221,7 @@ static pony_type_t strlist_pony = sizeof(strlist_t), 0, 0, + 0, NULL, NULL, strlist_serialise_trace, diff --git a/src/libponyc/ast/symtab.c b/src/libponyc/ast/symtab.c index 09b9e67b99..b972254e4f 100644 --- a/src/libponyc/ast/symtab.c +++ b/src/libponyc/ast/symtab.c @@ -389,6 +389,7 @@ static pony_type_t symbol_pony = sizeof(symbol_t), 0, 0, + 0, NULL, NULL, symbol_serialise_trace, diff --git a/src/libponyc/ast/token.c b/src/libponyc/ast/token.c index 14ebb96be0..fb876e9492 100644 --- a/src/libponyc/ast/token.c +++ b/src/libponyc/ast/token.c @@ -404,6 +404,7 @@ static pony_type_t token_signature_pony = sizeof(token_signature_t), 0, 0, + 0, NULL, NULL, token_signature_serialise_trace, @@ -458,6 +459,7 @@ static pony_type_t token_docstring_signature_pony = sizeof(token_signature_t), 0, 0, + 0, NULL, NULL, token_docstring_signature_serialise_trace, @@ -546,6 +548,7 @@ static pony_type_t token_pony = sizeof(token_t), 0, 0, + 0, NULL, NULL, token_serialise_trace, diff --git a/src/libponyc/codegen/codegen.c b/src/libponyc/codegen/codegen.c index a4536e70f7..a0727e13a3 100644 --- a/src/libponyc/codegen/codegen.c +++ b/src/libponyc/codegen/codegen.c @@ -15,6 +15,7 @@ #include "../../libponyrt/mem/pool.h" #include "ponyassert.h" +#include #include #include #include @@ -202,6 +203,13 @@ static void init_runtime(compile_t* c) c->msg_type = LLVMStructCreateNamed(c->context, "__message"); LLVMStructSetBody(c->msg_type, params, 2, false); + // descriptor_offset_lookup + // uint32_t (*)(size_t) + params[0] = target_is_ilp32(c->opt->triple) ? c->i32 : c->i64; + c->descriptor_offset_lookup_type = LLVMFunctionType(c->i32, params, 1, false); + c->descriptor_offset_lookup_fn = + LLVMPointerType(c->descriptor_offset_lookup_type, 0); + // trace // void (*)(i8*, __object*) params[0] = c->ptr; @@ -856,6 +864,21 @@ bool codegen_pass_init(pass_opt_t* opt) else opt->cpu = LLVMGetHostCPUName(); + opt->serialise_id_hash_key = (unsigned char*)ponyint_pool_alloc_size(16); + + const char* version = "pony-" PONY_VERSION; + const char* data_model = target_is_ilp32(opt->triple) ? "ilp32" : (target_is_lp64(opt->triple) ? "lp64" : (target_is_llp64(opt->triple) ? "llp64" : "unknown")); + const char* endian = target_is_bigendian(opt->triple) ? "be" : "le"; + + printbuf_t* target_version_buf = printbuf_new(); + printbuf(target_version_buf, "%s-%s-%s", version, data_model, endian); + + int status = blake2b(opt->serialise_id_hash_key, 16, target_version_buf->m, target_version_buf->offset, NULL, 0); + (void)status; + pony_assert(status == 0); + + printbuf_free(target_version_buf); + return true; } @@ -872,6 +895,9 @@ void codegen_pass_cleanup(pass_opt_t* opt) opt->triple = NULL; opt->cpu = NULL; opt->features = NULL; + + ponyint_pool_free_size(16, opt->serialise_id_hash_key); + opt->serialise_id_hash_key = NULL; } bool codegen(ast_t* program, pass_opt_t* opt) diff --git a/src/libponyc/codegen/codegen.h b/src/libponyc/codegen/codegen.h index 822ea3982b..c95d943f05 100644 --- a/src/libponyc/codegen/codegen.h +++ b/src/libponyc/codegen/codegen.h @@ -172,6 +172,7 @@ typedef struct compile_t LLVMValueRef primitives_init; LLVMValueRef primitives_final; LLVMValueRef desc_table; + LLVMValueRef desc_table_offset_lookup_fn; LLVMValueRef numeric_sizes; LLVMTypeRef void_type; @@ -187,6 +188,8 @@ typedef struct compile_t LLVMTypeRef ptr; LLVMTypeRef descriptor_type; + LLVMTypeRef descriptor_offset_lookup_type; + LLVMTypeRef descriptor_offset_lookup_fn; LLVMTypeRef field_descriptor; LLVMTypeRef object_type; LLVMTypeRef msg_type; diff --git a/src/libponyc/codegen/gendesc.c b/src/libponyc/codegen/gendesc.c index b1444d2a4b..f9455f4c52 100644 --- a/src/libponyc/codegen/gendesc.c +++ b/src/libponyc/codegen/gendesc.c @@ -12,24 +12,25 @@ #define DESC_ID 0 #define DESC_SIZE 1 -#define DESC_FIELD_COUNT 2 -#define DESC_FIELD_OFFSET 3 -#define DESC_INSTANCE 4 -#define DESC_TRACE 5 -#define DESC_SERIALISE_TRACE 6 -#define DESC_SERIALISE 7 -#define DESC_DESERIALISE 8 -#define DESC_CUSTOM_SERIALISE_SPACE 9 -#define DESC_CUSTOM_DESERIALISE 10 -#define DESC_DISPATCH 11 -#define DESC_FINALISE 12 -#define DESC_EVENT_NOTIFY 13 -#define DESC_MIGHT_REFERENCE_ACTOR 14 -#define DESC_TRAITS 15 -#define DESC_FIELDS 16 -#define DESC_VTABLE 17 - -#define DESC_LENGTH 18 +#define DESC_SERIALISEID 2 +#define DESC_FIELD_COUNT 3 +#define DESC_FIELD_OFFSET 4 +#define DESC_INSTANCE 5 +#define DESC_TRACE 6 +#define DESC_SERIALISE_TRACE 7 +#define DESC_SERIALISE 8 +#define DESC_DESERIALISE 9 +#define DESC_CUSTOM_SERIALISE_SPACE 10 +#define DESC_CUSTOM_DESERIALISE 11 +#define DESC_DISPATCH 12 +#define DESC_FINALISE 13 +#define DESC_EVENT_NOTIFY 14 +#define DESC_MIGHT_REFERENCE_ACTOR 15 +#define DESC_TRAITS 16 +#define DESC_FIELDS 17 +#define DESC_VTABLE 18 + +#define DESC_LENGTH 19 static LLVMValueRef make_unbox_function(compile_t* c, reach_type_t* t, reach_method_t* m) @@ -330,6 +331,7 @@ void gendesc_basetype(compile_t* c, LLVMTypeRef desc_type) params[DESC_ID] = c->i32; params[DESC_SIZE] = c->i32; + params[DESC_SERIALISEID] = target_is_ilp32(c->opt->triple) ? c->i32 : c->i64; params[DESC_FIELD_COUNT] = c->i32; params[DESC_FIELD_OFFSET] = c->i32; params[DESC_INSTANCE] = c->ptr; @@ -377,6 +379,7 @@ void gendesc_type(compile_t* c, reach_type_t* t) params[DESC_ID] = c->i32; params[DESC_SIZE] = c->i32; + params[DESC_SERIALISEID] = target_is_ilp32(c->opt->triple) ? c->i32 : c->i64; params[DESC_FIELD_COUNT] = c->i32; params[DESC_FIELD_OFFSET] = c->i32; params[DESC_INSTANCE] = c->ptr; @@ -414,6 +417,7 @@ void gendesc_init(compile_t* c, reach_type_t* t) LLVMValueRef args[DESC_LENGTH]; args[DESC_ID] = LLVMConstInt(c->i32, t->type_id, false); args[DESC_SIZE] = LLVMConstInt(c->i32, c_t->abi_size, false); + args[DESC_SERIALISEID] = LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, t->serialise_id, false); args[DESC_FIELD_COUNT] = make_field_count(c, t); args[DESC_FIELD_OFFSET] = make_field_offset(c, t); args[DESC_INSTANCE] = make_desc_ptr(c, c_t->instance); @@ -478,6 +482,58 @@ void gendesc_table(compile_t* c) ponyint_pool_free_size(size, args); } +void gendesc_table_lookup(compile_t* c) +{ + reach_type_t* t; + size_t i = HASHMAP_BEGIN; + + LLVMValueRef desc_lkp_fn = codegen_addfun(c, "__DescOffsetLookupFn", + c->descriptor_offset_lookup_type, false); + codegen_startfun(c, desc_lkp_fn, NULL, NULL, NULL, false); + LLVMSetFunctionCallConv(desc_lkp_fn, LLVMCCallConv); + LLVMSetLinkage(desc_lkp_fn, LLVMExternalLinkage); + + LLVMBasicBlockRef unreachable = codegen_block(c, "unreachable"); + + // Read the serialise ID. + LLVMValueRef serialise_id = LLVMGetParam(desc_lkp_fn, 0); + + // switch based on serialise_id + LLVMValueRef serialise_switch = LLVMBuildSwitch(c->builder, serialise_id, unreachable, 0); + + // the default case is unreachable unless something major has gone wrong + LLVMPositionBuilderAtEnd(c->builder, unreachable); + + LLVMValueRef ret = LLVMConstInt(c->i32, (uint32_t)-1, false); + LLVMBuildRet(c->builder, ret); + + while((t = reach_types_next(&c->reach->types, &i)) != NULL) + { + if(t->is_trait || (t->underlying == TK_STRUCT)) + continue; + + pony_assert(t->serialise_id != (uint64_t)-1); + + LLVMBasicBlockRef type_block = codegen_block(c, + genname_type_with_id(t->name, t->serialise_id)); + + LLVMAddCase(serialise_switch, LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, t->serialise_id, false), + type_block); + + LLVMPositionBuilderAtEnd(c->builder, type_block); + + ret = LLVMConstInt(c->i32, t->type_id, false); + LLVMBuildRet(c->builder, ret); + } + + // Mark the default case as unreachable. + LLVMPositionBuilderAtEnd(c->builder, unreachable); + + codegen_finishfun(c); + + c->desc_table_offset_lookup_fn = make_desc_ptr(c, desc_lkp_fn); +} + static LLVMValueRef desc_field(compile_t* c, LLVMValueRef desc, int index) { LLVMTypeRef field_type = LLVMStructGetTypeAtIndex(c->descriptor_type, index); diff --git a/src/libponyc/codegen/gendesc.h b/src/libponyc/codegen/gendesc.h index 97c76aea27..cd8f7a90a7 100644 --- a/src/libponyc/codegen/gendesc.h +++ b/src/libponyc/codegen/gendesc.h @@ -14,6 +14,8 @@ void gendesc_init(compile_t* c, reach_type_t* t); void gendesc_table(compile_t* c); +void gendesc_table_lookup(compile_t* c); + LLVMValueRef gendesc_fetch(compile_t* c, LLVMValueRef object); LLVMValueRef gendesc_typeid(compile_t* c, LLVMValueRef desc); diff --git a/src/libponyc/codegen/genexe.c b/src/libponyc/codegen/genexe.c index 9ae3ff1679..832c83969a 100644 --- a/src/libponyc/codegen/genexe.c +++ b/src/libponyc/codegen/genexe.c @@ -52,14 +52,16 @@ static LLVMValueRef make_lang_features_init(compile_t* c) boolean = c->i8; uint32_t desc_table_size = reach_max_type_id(c->reach); + LLVMValueRef desc_table_lookup_fn = c->desc_table_offset_lookup_fn; - LLVMTypeRef f_params[4]; + LLVMTypeRef f_params[5]; f_params[0] = boolean; f_params[1] = boolean; f_params[2] = c->ptr; f_params[3] = c->intptr; + f_params[4] = c->descriptor_offset_lookup_fn; - LLVMTypeRef lfi_type = LLVMStructTypeInContext(c->context, f_params, 4, + LLVMTypeRef lfi_type = LLVMStructTypeInContext(c->context, f_params, 5, false); LLVMBasicBlockRef this_block = LLVMGetInsertBlock(c->builder); @@ -89,6 +91,10 @@ static LLVMValueRef make_lang_features_init(compile_t* c) LLVMBuildStore(c->builder, LLVMConstInt(c->intptr, desc_table_size, false), field); + field = LLVMBuildStructGEP2(c->builder, lfi_type, lfi_object, 4, ""); + LLVMBuildStore(c->builder, LLVMBuildBitCast(c->builder, desc_table_lookup_fn, + c->descriptor_offset_lookup_fn, ""), field); + return lfi_object; } diff --git a/src/libponyc/codegen/genname.c b/src/libponyc/codegen/genname.c index a481f31dca..cab07cf70a 100644 --- a/src/libponyc/codegen/genname.c +++ b/src/libponyc/codegen/genname.c @@ -199,3 +199,10 @@ const char* genname_program_fn(const char* program, const char* name) { return stringtab_two(program, name); } + +const char* genname_type_with_id(const char* type, uint64_t type_id) +{ + printbuf_t* buf = printbuf_new(); + printbuf(buf, "%s_%" PRIu64, type, type_id); + return stringtab_buf(buf); +} diff --git a/src/libponyc/codegen/genname.h b/src/libponyc/codegen/genname.h index 13d39ee334..ce160524b0 100644 --- a/src/libponyc/codegen/genname.h +++ b/src/libponyc/codegen/genname.h @@ -42,6 +42,8 @@ const char* genname_unsafe(const char* name); const char* genname_program_fn(const char* program, const char* name); +const char* genname_type_with_id(const char* type, uint64_t type_id); + PONY_EXTERN_C_END #endif diff --git a/src/libponyc/codegen/genprim.c b/src/libponyc/codegen/genprim.c index d5fb517ec2..4da0b2b73b 100644 --- a/src/libponyc/codegen/genprim.c +++ b/src/libponyc/codegen/genprim.c @@ -776,7 +776,7 @@ void genprim_array_serialise(compile_t* c, reach_type_t* t) LLVMValueRef offset_addr = LLVMBuildInBoundsGEP2(c->builder, c->i8, addr, &offset, 1, ""); - genserialise_typeid(c, t, offset_addr); + genserialise_serialiseid(c, t, offset_addr); // Don't serialise our contents if we are opaque. LLVMBasicBlockRef body_block = codegen_block(c, "body"); @@ -890,7 +890,7 @@ void genprim_array_deserialise(compile_t* c, reach_type_t* t) LLVMValueRef ctx = LLVMGetParam(c_t->deserialise_fn, 0); LLVMValueRef object = LLVMGetParam(c_t->deserialise_fn, 1); - gendeserialise_typeid(c, c_t, object); + gendeserialise_serialiseid(c, c_t, object); // Deserialise the array contents. LLVMValueRef alloc = field_value(c, c_t->structure, object, 2); @@ -1010,7 +1010,7 @@ void genprim_string_serialise(compile_t* c, reach_type_t* t) LLVMValueRef offset_addr = LLVMBuildInBoundsGEP2(c->builder, c->i8, addr, &offset, 1, ""); - genserialise_typeid(c, t, offset_addr); + genserialise_serialiseid(c, t, offset_addr); // Don't serialise our contents if we are opaque. LLVMBasicBlockRef body_block = codegen_block(c, "body"); @@ -1069,7 +1069,7 @@ void genprim_string_deserialise(compile_t* c, reach_type_t* t) LLVMValueRef ctx = LLVMGetParam(c_t->deserialise_fn, 0); LLVMValueRef object = LLVMGetParam(c_t->deserialise_fn, 1); - gendeserialise_typeid(c, c_t, object); + gendeserialise_serialiseid(c, c_t, object); // Deserialise the string contents. LLVMValueRef alloc = field_value(c, c_t->structure, object, 2); diff --git a/src/libponyc/codegen/genserialise.c b/src/libponyc/codegen/genserialise.c index e641dcd67a..f2abaf5f66 100644 --- a/src/libponyc/codegen/genserialise.c +++ b/src/libponyc/codegen/genserialise.c @@ -3,6 +3,7 @@ #include "gendesc.h" #include "genfun.h" #include "genname.h" +#include "genopt.h" #include "genprim.h" #include "ponyassert.h" #include "../../libponyrt/mem/pool.h" @@ -19,7 +20,7 @@ static void serialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, { case TK_PRIMITIVE: { - genserialise_typeid(c, t, offset); + genserialise_serialiseid(c, t, offset); if(c_t->primitive != NULL) { @@ -37,7 +38,7 @@ static void serialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, case TK_CLASS: { - genserialise_typeid(c, t, offset); + genserialise_serialiseid(c, t, offset); extra++; break; } @@ -45,7 +46,7 @@ static void serialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, case TK_ACTOR: { // Skip the actor pad. - genserialise_typeid(c, t, offset); + genserialise_serialiseid(c, t, offset); extra += 2; break; } @@ -54,7 +55,7 @@ static void serialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, { if(!is_bare_tuple) { - genserialise_typeid(c, t, offset); + genserialise_serialiseid(c, t, offset); object = LLVMBuildStructGEP2(c->builder, c_t->structure, object, 1, ""); LLVMValueRef size = LLVMConstInt(c->intptr, LLVMOffsetOfElement(c->target_data, structure, 1), false); @@ -103,10 +104,10 @@ static void serialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, } } -void genserialise_typeid(compile_t* c, reach_type_t* t, LLVMValueRef offset) +void genserialise_serialiseid(compile_t* c, reach_type_t* t, LLVMValueRef offset) { - // Write the type id instead of the descriptor. - LLVMValueRef value = LLVMConstInt(c->intptr, t->type_id, false); + // Write the serialise id instead of the descriptor. + LLVMValueRef value = LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, t->serialise_id, false); LLVMBuildStore(c->builder, value, offset); } @@ -136,7 +137,7 @@ static void serialise_bare_interface(compile_t* c, reach_type_t* t, LLVMValueRef test = LLVMBuildICmp(c->builder, LLVMIntEQ, obj, ((compile_type_t*)sub->c_type)->instance, ""); LLVMBuildCondBr(c->builder, test, post_block, next_block); - LLVMValueRef value = LLVMConstInt(c->intptr, sub->type_id, false); + LLVMValueRef value = LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, sub->serialise_id, false); LLVMAddIncoming(phi, &value, ¤t_block, 1); LLVMPositionBuilderAtEnd(c->builder, next_block); sub = next; @@ -145,7 +146,7 @@ static void serialise_bare_interface(compile_t* c, reach_type_t* t, } LLVMBuildBr(c->builder, post_block); - LLVMValueRef value = LLVMConstInt(c->intptr, sub->type_id, false); + LLVMValueRef value = LLVMConstInt(target_is_ilp32(c->opt->triple) ? c->i32 : c->i64, sub->serialise_id, false); LLVMAddIncoming(phi, &value, ¤t_block, 1); LLVMMoveBasicBlockAfter(post_block, current_block); @@ -168,12 +169,12 @@ void genserialise_element(compile_t* c, reach_type_t* t, bool embed, LLVMValueRef value = LLVMBuildLoad2(c->builder, c_t->mem_type, ptr, ""); LLVMBuildStore(c->builder, value, offset); } else if(t->bare_method != NULL) { - // Bare object, either write the type id directly if it is a concrete object - // or compute the type id based on the object value and write it if it isn't. + // Bare object, either write the serialise_id id directly if it is a concrete object + // or compute the serialise_id id based on the object value and write it if it isn't. switch(t->underlying) { case TK_PRIMITIVE: - genserialise_typeid(c, t, offset); + genserialise_serialiseid(c, t, offset); break; case TK_INTERFACE: @@ -237,7 +238,7 @@ static void deserialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, case TK_PRIMITIVE: case TK_CLASS: { - gendeserialise_typeid(c, c_t, object); + gendeserialise_serialiseid(c, c_t, object); extra++; break; } @@ -245,7 +246,7 @@ static void deserialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, case TK_ACTOR: { // Skip the actor pad. - gendeserialise_typeid(c, c_t, object); + gendeserialise_serialiseid(c, c_t, object); extra += 2; break; } @@ -254,7 +255,7 @@ static void deserialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, { if(!is_bare_tuple) { - gendeserialise_typeid(c, c_t, object); + gendeserialise_serialiseid(c, c_t, object); object = LLVMBuildStructGEP2(c->builder, c_t->structure, object, 1, ""); } @@ -274,9 +275,9 @@ static void deserialise(compile_t* c, reach_type_t* t, LLVMValueRef ctx, } } -void gendeserialise_typeid(compile_t* c, compile_type_t* t, LLVMValueRef object) +void gendeserialise_serialiseid(compile_t* c, compile_type_t* t, LLVMValueRef object) { - // Write the descriptor instead of the type id. + // Write the descriptor instead of the serialiseid id. LLVMValueRef desc_ptr = LLVMBuildStructGEP2(c->builder, t->structure, object, 0, ""); LLVMBuildStore(c->builder, t->desc, desc_ptr); diff --git a/src/libponyc/codegen/genserialise.h b/src/libponyc/codegen/genserialise.h index 45743be46e..92d4d9803f 100644 --- a/src/libponyc/codegen/genserialise.h +++ b/src/libponyc/codegen/genserialise.h @@ -11,9 +11,9 @@ typedef struct compile_type_t compile_type_t; void genserialise_element(compile_t* c, reach_type_t* t, bool embed, LLVMValueRef ctx, LLVMValueRef ptr, LLVMValueRef offset); -void genserialise_typeid(compile_t* c, reach_type_t* t, LLVMValueRef offset); +void genserialise_serialiseid(compile_t* c, reach_type_t* t, LLVMValueRef offset); -void gendeserialise_typeid(compile_t* c, compile_type_t* t, LLVMValueRef offset); +void gendeserialise_serialiseid(compile_t* c, compile_type_t* t, LLVMValueRef offset); void gendeserialise_element(compile_t* c, reach_type_t* t, bool embed, LLVMValueRef ctx, LLVMValueRef ptr); diff --git a/src/libponyc/codegen/gentype.c b/src/libponyc/codegen/gentype.c index 049947c0aa..6c93bb7947 100644 --- a/src/libponyc/codegen/gentype.c +++ b/src/libponyc/codegen/gentype.c @@ -744,6 +744,7 @@ bool gentypes(compile_t* c) } gendesc_table(c); + gendesc_table_lookup(c); c->numeric_sizes = gen_numeric_size_table(c); diff --git a/src/libponyc/pass/pass.h b/src/libponyc/pass/pass.h index c3356dcab6..5a52ced437 100644 --- a/src/libponyc/pass/pass.h +++ b/src/libponyc/pass/pass.h @@ -313,6 +313,7 @@ typedef struct pass_opt_t char* abi; char* cpu; char* features; + unsigned char* serialise_id_hash_key; typecheck_t check; diff --git a/src/libponyc/pkg/package.c b/src/libponyc/pkg/package.c index 531f707987..2ee105ac05 100644 --- a/src/libponyc/pkg/package.c +++ b/src/libponyc/pkg/package.c @@ -1430,6 +1430,7 @@ static pony_type_t package_dep_signature_pony = sizeof(package_signature_t), 0, 0, + 0, NULL, NULL, package_dep_signature_serialise_trace, @@ -1459,6 +1460,7 @@ static pony_type_t package_signature_pony = sizeof(package_signature_t), 0, 0, + 0, NULL, NULL, package_signature_serialise_trace, @@ -1537,6 +1539,7 @@ static pony_type_t package_group_dep_signature_pony = sizeof(const char*), 0, 0, + 0, NULL, NULL, package_group_dep_signature_serialise_trace, @@ -1566,6 +1569,7 @@ static pony_type_t package_group_signature_pony = sizeof(const char*), 0, 0, + 0, NULL, NULL, package_group_signature_serialise_trace, @@ -1715,6 +1719,7 @@ static pony_type_t package_pony = sizeof(package_t), 0, 0, + 0, NULL, NULL, package_serialise_trace, @@ -1788,6 +1793,7 @@ static pony_type_t package_group_pony = sizeof(package_group_t), 0, 0, + 0, NULL, NULL, package_group_serialise_trace, diff --git a/src/libponyc/pkg/program.c b/src/libponyc/pkg/program.c index a27095a91b..fb60c2979b 100644 --- a/src/libponyc/pkg/program.c +++ b/src/libponyc/pkg/program.c @@ -425,6 +425,7 @@ static pony_type_t program_pony = sizeof(program_t), 0, 0, + 0, NULL, NULL, program_serialise_trace, diff --git a/src/libponyc/reach/reach.c b/src/libponyc/reach/reach.c index ea2e77ae7d..8e3760595f 100644 --- a/src/libponyc/reach/reach.c +++ b/src/libponyc/reach/reach.c @@ -2,6 +2,7 @@ #include "subtype.h" #include "../ast/astbuild.h" #include "../codegen/genname.h" +#include "../codegen/genopt.h" #include "../pass/expr.h" #include "../type/assemble.h" #include "../type/cap.h" @@ -740,6 +741,7 @@ static reach_type_t* add_reach_type(reach_t* r, ast_t* type) t->ast = set_cap_and_ephemeral(type, TK_REF, TK_NONE); t->ast_cap = ast_dup(type); t->type_id = (uint32_t)-1; + t->serialise_id = (uint64_t)-1; ast_set_scope(t->ast, NULL); ast_set_scope(t->ast_cap, NULL); @@ -794,6 +796,15 @@ static reach_type_t* add_tuple(reach_t* r, ast_t* type, pass_opt_t* opt) t->type_id = get_new_tuple_id(r); t->can_be_boxed = true; + // TODO: ideally should be hashing AST tree for type name, fields and their types/names + // Only use 63/31 bits of the hash because we need the high bit for primitive types + if(target_is_ilp32(opt->triple)) + t->serialise_id = (ponyint_hash_str_custom_key32(opt->serialise_id_hash_key, t->name) >> 1); + else + t->serialise_id = (ponyint_hash_str_custom_key64(opt->serialise_id_hash_key, t->name) >> 1); + + pony_assert(t->serialise_id != ((uint64_t)-1)); // -1 is for `Pointer`s + t->field_count = (uint32_t)ast_childcount(t->ast); t->fields = (reach_field_t*)ponyint_pool_alloc_size( t->field_count * sizeof(reach_field_t)); @@ -926,6 +937,15 @@ static reach_type_t* add_nominal(reach_t* r, ast_t* type, pass_opt_t* opt) t->type_id = get_new_object_id(r); } + // TODO: ideally should be hashing AST tree for type name, fields and their types/names + // Only use 63/31 bits of the hash because we need the high bit for primitive types + if(target_is_ilp32(opt->triple)) + t->serialise_id = (ponyint_hash_str_custom_key32(opt->serialise_id_hash_key, t->name) >> 1); + else + t->serialise_id = (ponyint_hash_str_custom_key64(opt->serialise_id_hash_key, t->name) >> 1); + + pony_assert(t->serialise_id != ((uint64_t)-1)); // -1 is for `Pointer`s + if(ast_id(def) != TK_PRIMITIVE) return t; @@ -1510,6 +1530,10 @@ void reach_dump(reach_t* r) size_t j = HASHMAP_BEGIN; reach_method_name_t* n; + printf(" serialise_id: %" PRIu64 "\n", t->serialise_id); + printf(" is_trait: %s\n", (t->is_trait)?"true":"false"); + printf(" can_be_boxed: %s\n", (t->can_be_boxed)?"true":"false"); + printf(" vtable: %d\n", t->vtable_size); while((n = reach_method_names_next(&t->methods, &j)) != NULL) @@ -1529,6 +1553,8 @@ void reach_dump(reach_t* r) printf(" %s\n", t2->name); } } + + printf(" Total Type Count: %d\n", r->total_type_count); } static void reach_param_serialise_trace(pony_ctx_t* ctx, void* object) @@ -1571,6 +1597,7 @@ static pony_type_t reach_param_pony = sizeof(reach_param_t), 0, 0, + 0, NULL, NULL, reach_param_serialise_trace, @@ -1706,6 +1733,7 @@ static pony_type_t reach_method_pony = sizeof(reach_method_t), 0, 0, + 0, NULL, NULL, reach_method_serialise_trace, @@ -1769,6 +1797,7 @@ static pony_type_t reach_method_name_pony = sizeof(reach_method_name_t), 0, 0, + 0, NULL, NULL, reach_method_name_serialise_trace, @@ -1827,6 +1856,7 @@ static pony_type_t reach_field_pony = sizeof(reach_field_t), 0, 0, + 0, NULL, NULL, reach_field_serialise_trace, @@ -1898,6 +1928,7 @@ static void reach_type_serialise(pony_ctx_t* ctx, void* object, void* buf, dst->vtable_size = t->vtable_size; dst->can_be_boxed = t->can_be_boxed; dst->is_trait = t->is_trait; + dst->serialise_id = t->serialise_id; dst->field_count = t->field_count; dst->fields = (reach_field_t*)pony_serialise_offset(ctx, t->fields); @@ -1951,6 +1982,7 @@ static pony_type_t reach_type_pony = sizeof(reach_type_t), 0, 0, + 0, NULL, NULL, reach_type_serialise_trace, @@ -2010,6 +2042,7 @@ static pony_type_t reach_pony = sizeof(reach_t), 0, 0, + 0, NULL, NULL, reach_serialise_trace, diff --git a/src/libponyc/reach/reach.h b/src/libponyc/reach/reach.h index afd0f79025..eee7f81e25 100644 --- a/src/libponyc/reach/reach.h +++ b/src/libponyc/reach/reach.h @@ -99,6 +99,7 @@ struct reach_type_t reach_type_cache_t subtypes; uint32_t type_id; uint32_t vtable_size; + uint64_t serialise_id; bool can_be_boxed; bool is_trait; diff --git a/src/libponyc/type/reify.c b/src/libponyc/type/reify.c index 5ad7e185d1..7a2586d3bd 100644 --- a/src/libponyc/type/reify.c +++ b/src/libponyc/type/reify.c @@ -561,6 +561,7 @@ static pony_type_t deferred_reification_pony = sizeof(deferred_reification_t), 0, 0, + 0, NULL, NULL, deferred_reification_serialise_trace, diff --git a/src/libponyrt/ds/fun.c b/src/libponyrt/ds/fun.c index a9c14d14ec..d0c36d55b7 100644 --- a/src/libponyrt/ds/fun.c +++ b/src/libponyrt/ds/fun.c @@ -9,8 +9,6 @@ static const unsigned char the_key[16] = { }; -#ifdef PLATFORM_IS_ILP32 - #define ROTL32(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b)))) #define SIPROUND32 \ @@ -76,7 +74,6 @@ static uint32_t halfsiphash24(const unsigned char* key, const unsigned char* in, return v0 ^ v1 ^ v2 ^ v3; } -#endif #define ROTL64(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b)))) @@ -149,6 +146,16 @@ PONY_API uint64_t ponyint_hash_block64(const void* p, size_t len) return siphash24(the_key, (const unsigned char*)p, len); } +uint32_t ponyint_hash_str_custom_key32(const unsigned char* key, const char* str) +{ + return halfsiphash24((const unsigned char *)key, (const unsigned char *)str, strlen(str)); +} + +uint64_t ponyint_hash_str_custom_key64(const unsigned char* key, const char* str) +{ + return siphash24((const unsigned char *)key, (const unsigned char *)str, strlen(str)); +} + size_t ponyint_hash_str(const char* str) { #ifdef PLATFORM_IS_ILP32 diff --git a/src/libponyrt/ds/fun.h b/src/libponyrt/ds/fun.h index b19976bc0d..d8310e837c 100644 --- a/src/libponyrt/ds/fun.h +++ b/src/libponyrt/ds/fun.h @@ -25,6 +25,10 @@ PONY_API size_t ponyint_hash_block(const void* p, size_t len); PONY_API uint64_t ponyint_hash_block64(const void* p, size_t len); +uint32_t ponyint_hash_str_custom_key32(const unsigned char* key, const char* str); + +uint64_t ponyint_hash_str_custom_key64(const unsigned char* key, const char* str); + size_t ponyint_hash_str(const char* str); size_t ponyint_hash_ptr(const void* p); diff --git a/src/libponyrt/ds/hash.h b/src/libponyrt/ds/hash.h index 39f07138f5..7f08c17336 100644 --- a/src/libponyrt/ds/hash.h +++ b/src/libponyrt/ds/hash.h @@ -251,6 +251,7 @@ void ponyint_hashmap_deserialise(pony_ctx_t* ctx, void* object, sizeof(name_t), \ 0, \ 0, \ + 0, \ NULL, \ NULL, \ name##_serialise_trace, \ diff --git a/src/libponyrt/ds/list.h b/src/libponyrt/ds/list.h index 089927e4b2..4a829c691d 100644 --- a/src/libponyrt/ds/list.h +++ b/src/libponyrt/ds/list.h @@ -167,6 +167,7 @@ void ponyint_list_deserialise(pony_ctx_t* ctx, void* object, sizeof(name_t), \ 0, \ 0, \ + 0, \ NULL, \ NULL, \ name##_serialise_trace, \ diff --git a/src/libponyrt/gc/cycle.c b/src/libponyrt/gc/cycle.c index 349f08b1d0..6cac328459 100644 --- a/src/libponyrt/gc/cycle.c +++ b/src/libponyrt/gc/cycle.c @@ -1225,6 +1225,7 @@ static pony_type_t cycle_type = sizeof(detector_t), 0, 0, + 0, NULL, NULL, NULL, diff --git a/src/libponyrt/gc/serialise.c b/src/libponyrt/gc/serialise.c index 624e1309e3..75fbdb4aef 100644 --- a/src/libponyrt/gc/serialise.c +++ b/src/libponyrt/gc/serialise.c @@ -20,6 +20,7 @@ PONY_EXTERN_C_BEGIN static size_t desc_table_size = 0; static pony_type_t** desc_table = NULL; +static desc_offset_lookup_fn desc_table_offset_lookup_fn = NULL; PONY_EXTERN_C_END @@ -81,22 +82,38 @@ static void custom_deserialise(pony_ctx_t* ctx) } } -bool ponyint_serialise_setup(pony_type_t** table, size_t table_size) +bool ponyint_serialise_setup(pony_type_t** table, size_t table_size, + desc_offset_lookup_fn desc_table_offset_lookup) { #ifndef PONY_NDEBUG for(uint32_t i = 0; i < table_size; i++) { if(table[i] != NULL) + { pony_assert(table[i]->id == i); + pony_assert(desc_table_offset_lookup(table[i]->serialise_id) == i); + } } #endif desc_table = table; desc_table_size = table_size; + desc_table_offset_lookup_fn = desc_table_offset_lookup; return true; } +static pony_type_t* get_descriptor(size_t serialise_id) +{ + uint32_t offset = desc_table_offset_lookup_fn(serialise_id); + + // fail even in release builds because this is unrecoverable + if(offset >= desc_table_size) + ponyint_assert_fail("deserialise offset invalid", __FILE__, __LINE__, __func__); + + return desc_table[offset]; +} + void ponyint_serialise_object(pony_ctx_t* ctx, void* p, pony_type_t* t, int mutability) { @@ -194,7 +211,7 @@ PONY_API size_t pony_serialise_offset(pony_ctx_t* ctx, void* p) // If we are not in the map, we are an untraced primitive. Return the type id // with the high bit set. pony_type_t* t = *(pony_type_t**)p; - return (size_t)t->id | HIGH_BIT; + return (size_t)t->serialise_id | HIGH_BIT; } PONY_API void pony_serialise(pony_ctx_t* ctx, void* p, pony_type_t* t, @@ -253,7 +270,7 @@ PONY_API void* pony_deserialise_offset(pony_ctx_t* ctx, pony_type_t* t, // Return the global instance, if there is one. It's ok to return null if // there is no global instance, as this will then be an unserialised // field in an opaque object. - t = desc_table[offset]; + t = get_descriptor(offset); return t->instance; } @@ -277,9 +294,9 @@ PONY_API void* pony_deserialise_offset(pony_ctx_t* ctx, pony_type_t* t, abort(); } - // Turn the type id into a descriptor pointer. - uintptr_t id = *(uintptr_t*)((uintptr_t)ctx->serialise_buffer + offset); - t = desc_table[id]; + // Turn the serialise id into a descriptor pointer. + size_t id = *(uintptr_t*)((uintptr_t)ctx->serialise_buffer + offset); + t = get_descriptor(id); } // If it's a primitive, return the global instance. diff --git a/src/libponyrt/gc/serialise.h b/src/libponyrt/gc/serialise.h index 4a41da9af9..d31b30ad88 100644 --- a/src/libponyrt/gc/serialise.h +++ b/src/libponyrt/gc/serialise.h @@ -23,7 +23,8 @@ typedef struct serialise_t serialise_t; DECLARE_HASHMAP(ponyint_serialise, ponyint_serialise_t, serialise_t); -bool ponyint_serialise_setup(pony_type_t** table, size_t table_size); +bool ponyint_serialise_setup(pony_type_t** table, size_t table_size, + desc_offset_lookup_fn desc_table_offset_lookup); void ponyint_serialise_object(pony_ctx_t* ctx, void* p, pony_type_t* t, int mutability); diff --git a/src/libponyrt/pony.h b/src/libponyrt/pony.h index eed07d3a2e..9aaf8a0a76 100644 --- a/src/libponyrt/pony.h +++ b/src/libponyrt/pony.h @@ -134,6 +134,7 @@ typedef const struct _pony_type_t { uint32_t id; uint32_t size; + size_t serialise_id; uint32_t field_count; uint32_t field_offset; void* instance; @@ -152,6 +153,12 @@ typedef const struct _pony_type_t void* vtable; } pony_type_t; +/** Desc table lookup function. + * + * A function to convert `serialise_id`s to offsets in the desc table + */ +typedef uint32_t (*desc_offset_lookup_fn)(size_t serialise_id); + /** Language feature initialiser. * * Contains initialisers for the various language features initialised by @@ -178,6 +185,9 @@ typedef struct pony_language_features_init_t /// The total size of the descriptor_table array. size_t descriptor_table_size; + + /// The function to translate `serialise_id`s to offsets in the desc_table + desc_offset_lookup_fn desc_table_offset_lookup; } pony_language_features_init_t; /// The currently executing context. diff --git a/src/libponyrt/sched/start.c b/src/libponyrt/sched/start.c index a7acca53fa..71d3621316 100644 --- a/src/libponyrt/sched/start.c +++ b/src/libponyrt/sched/start.c @@ -334,7 +334,8 @@ PONY_API bool pony_start(bool library, int* exit_code, if(language_init.init_serialisation && !ponyint_serialise_setup(language_init.descriptor_table, - language_init.descriptor_table_size)) + language_init.descriptor_table_size, + language_init.desc_table_offset_lookup)) { atomic_store_explicit(&running, NOT_RUNNING, memory_order_relaxed); return false; diff --git a/test/libponyrt/ds/hash.cc b/test/libponyrt/ds/hash.cc index 4a4ea1f1c2..663dfe42c0 100644 --- a/test/libponyrt/ds/hash.cc +++ b/test/libponyrt/ds/hash.cc @@ -72,6 +72,7 @@ static pony_type_t hash_elem_pony = sizeof(hash_elem_t), 0, 0, + 0, NULL, NULL, hash_elem_serialise_trace,