Skip to content

Commit

Permalink
i#7224 skipped header: Get filetype from stream in opcode_mix (#7225)
Browse files Browse the repository at this point in the history
Fixes the opcode_mix tool to get the filetype from the memtrace_stream_t
interface instead of the TRACE_MARKER_TYPE_FILETYPE marker memref which
may not be seen if -skip_instrs is applied by the user.

Modifies opcode_mix_t to use initialize_stream instead of initialize, to
get the serial stream.

Adds a new unit test that runs the opcode_mix tool with -skip_instrs,
which would fail because of an unseen filetype prior to this fix.

Augments decode_cache_t::init() documentation to suggest a reliable way
to obtain the filetype.

Issue: #7224, #7113
  • Loading branch information
abhinav92003 authored Jan 28, 2025
1 parent f007194 commit fe061a7
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 48 deletions.
10 changes: 8 additions & 2 deletions clients/drcachesim/scheduler/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* with no timeout but do not include a corresponding
* #TRACE_MARKER_TYPE_SYSCALL_SCHEDULE for wakeup, an input could remain
* unscheduled.
*
* Also beware that this can skip over trace header entries (like
* #TRACE_MARKER_TYPE_FILETYPE), which should ideally be obtained from the
* #dynamorio::drmemtrace::memtrace_stream_t API instead.
*/
std::vector<range_t> regions_of_interest;
};
Expand Down Expand Up @@ -1041,7 +1045,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* #TRACE_MARKER_TYPE_VERSION record in the trace header.
* This can be queried prior to explicitly retrieving any records from
* output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
* scheduler_options_t.read_inputs_in_init is false.
* scheduler_options_t.read_inputs_in_init is false (which is the
* case for online drmemtrace analysis).
*/
uint64_t
get_version() const override
Expand All @@ -1055,7 +1060,8 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
* #TRACE_MARKER_TYPE_FILETYPE record in the trace header.
* This can be queried prior to explicitly retrieving any records from
* output streams, unless #dynamorio::drmemtrace::scheduler_tmpl_t::
* scheduler_options_t.read_inputs_in_init is false.
* scheduler_options_t.read_inputs_in_init is false (which is the
* case for online drmemtrace analysis).
*/
uint64_t
get_filetype() const override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Hello, world!
Opcode mix tool results:
*[0-9]* : total executed instructions
*[0-9]* : [a-z ]*
*[0-9]* : [a-z ]*
*[0-9]* : [a-z ]*
*[0-9]* : [a-z ]*
.*
27 changes: 22 additions & 5 deletions clients/drcachesim/tests/opcode_mix_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ class test_opcode_mix_t : public opcode_mix_t {
instrlist_t *instrs_;
};

class test_stream_t : public default_memtrace_stream_t {
public:
test_stream_t(uint64_t filetype)
: filetype_(filetype)
{
}
uint64_t
get_filetype() const override
{
return filetype_;
}

private:
uint64_t filetype_;
};

std::string
check_opcode_mix(void *drcontext, bool use_module_mapper)
{
Expand All @@ -92,10 +108,10 @@ check_opcode_mix(void *drcontext, bool use_module_mapper)
instrlist_t *ilist = instrlist_create(drcontext);
instrlist_append(ilist, nop);
instrlist_append(ilist, ret);
uint64_t filetype = OFFLINE_FILE_TYPE_SYSCALL_NUMBERS |
(use_module_mapper ? 0 : OFFLINE_FILE_TYPE_ENCODINGS);
std::vector<memref_with_IR_t> memref_setup = {
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE,
OFFLINE_FILE_TYPE_SYSCALL_NUMBERS |
(use_module_mapper ? 0 : OFFLINE_FILE_TYPE_ENCODINGS)),
{ gen_marker(TID_A, TRACE_MARKER_TYPE_FILETYPE, static_cast<uintptr_t>(filetype)),
nullptr },
{ gen_instr(TID_A), nop },
{ gen_instr(TID_A), ret },
Expand All @@ -116,9 +132,10 @@ check_opcode_mix(void *drcontext, bool use_module_mapper)
// Set up the second nop memref to reuse the same encoding as the first nop.
memrefs[3].instr.encoding_is_new = false;
}
test_stream_t stream(filetype);
test_opcode_mix_t opcode_mix(ilist_for_test);
opcode_mix.initialize();
void *shard_data = opcode_mix.parallel_shard_init_stream(0, nullptr, nullptr);
opcode_mix.initialize_stream(/*serial_stream=*/nullptr);
void *shard_data = opcode_mix.parallel_shard_init_stream(0, nullptr, &stream);
for (const memref_t &memref : memrefs) {
if (!opcode_mix.parallel_shard_memref(shard_data, memref)) {
return opcode_mix.parallel_shard_error(shard_data);
Expand Down
25 changes: 16 additions & 9 deletions clients/drcachesim/tools/common/decode_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -423,15 +423,22 @@ template <class DecodeInfo> class decode_cache_t : public decode_cache_base_t {
* indeed has embedded encodings or not, and initializing the
* #dynamorio::drmemtrace::module_mapper_t if the module path is provided.
*
* It is important to note that the trace filetype may be obtained using the
* get_filetype() API on a #dynamorio::drmemtrace::memtrace_stream_t. However,
* instances of #dynamorio::drmemtrace::memtrace_stream_t have the filetype
* available at init time (before the #TRACE_MARKER_TYPE_FILETYPE is actually
* returned by the stream) only for offline analysis. In the online analysis
* case, the user would need to call this API after init time when the
* #TRACE_MARKER_TYPE_FILETYPE marker is seen (which is fine as it occurs
* before any instr record). For tools intended for both offline and online
* analysis, following just the latter strategy would work fine.
* It is important to note some nuances in how the filetype can be obtained:
* - the trace filetype may be obtained using the get_filetype() API on the
* #dynamorio::drmemtrace::memtrace_stream_t. However, instances of
* #dynamorio::drmemtrace::memtrace_stream_t have the filetype available at
* init time (in the #dynamorio::drmemtrace::analysis_tool_t::initialize()
* or #dynamorio::drmemtrace::analysis_tool_t::initialize_stream()
* functions) only for offline analysis, not for online analysis.
* - when using the -skip_instrs or -skip_timestamp analyzer options, all
* initial header entries are skipped over. Therefore, the analysis tool
* may not see a #TRACE_MARKER_TYPE_FILETYPE at all.
*
* The most reliable way to obtain the filetype (and call this init() API),
* would be to use #dynamorio::drmemtrace::memtrace_stream_t::get_filetype()
* just before processing the first instruction memref in the
* #dynamorio::drmemtrace::analysis_tool_t::process_memref() or
* #dynamorio::drmemtrace::analysis_tool_t::parallel_shard_memref() APIs.
*
* If the \p module_file_path parameter is not empty, it instructs the
* #dynamorio::drmemtrace::decode_cache_t object that it should look for the
Expand Down
47 changes: 24 additions & 23 deletions clients/drcachesim/tools/opcode_mix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,17 @@ bool
opcode_mix_t::init_decode_cache(shard_data_t *shard, void *dcontext,
offline_file_type_t filetype)
{
// XXX: Perhaps this should be moved into decode_cache_t::init().
// We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS
// is not a real ISA and can coexist with any real architecture.
if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS, filetype) &&
!TESTANY(build_target_arch_type(), filetype)) {
shard->error = std::string("Architecture mismatch: trace recorded on ") +
trace_arch_string(static_cast<offline_file_type_t>(filetype)) +
" but tool built for " + trace_arch_string(build_target_arch_type());
return false;
}

shard->decode_cache =
std::unique_ptr<decode_cache_t<opcode_data_t>>(new decode_cache_t<opcode_data_t>(
dcontext,
Expand All @@ -100,9 +111,12 @@ opcode_mix_t::init_decode_cache(shard_data_t *shard, void *dcontext,
}

std::string
opcode_mix_t::initialize()
opcode_mix_t::initialize_stream(dynamorio::drmemtrace::memtrace_stream_t *serial_stream)
{
dcontext_.dcontext = dr_standalone_init();
if (serial_stream != nullptr) {
serial_shard_.stream = serial_stream;
}
return "";
}

Expand All @@ -125,6 +139,7 @@ opcode_mix_t::parallel_shard_init_stream(
dynamorio::drmemtrace::memtrace_stream_t *shard_stream)
{
auto shard = new shard_data_t();
shard->stream = shard_stream;
std::lock_guard<std::mutex> guard(shard_map_mutex_);
shard_map_[shard_index] = shard;
return reinterpret_cast<void *>(shard);
Expand All @@ -145,22 +160,7 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
{
shard_data_t *shard = reinterpret_cast<shard_data_t *>(shard_data);
if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_FILETYPE) {
shard->filetype = static_cast<offline_file_type_t>(memref.marker.marker_value);
/* We remove OFFLINE_FILE_TYPE_ARCH_REGDEPS from this check since DR_ISA_REGDEPS
* is not a real ISA and can coexist with any real architecture.
*/
if (TESTANY(OFFLINE_FILE_TYPE_ARCH_ALL & ~OFFLINE_FILE_TYPE_ARCH_REGDEPS,
memref.marker.marker_value) &&
!TESTANY(build_target_arch_type(), memref.marker.marker_value)) {
shard->error = std::string("Architecture mismatch: trace recorded on ") +
trace_arch_string(static_cast<offline_file_type_t>(
memref.marker.marker_value)) +
" but tool built for " + trace_arch_string(build_target_arch_type());
return false;
}
} else if (memref.marker.type == TRACE_TYPE_MARKER &&
memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
memref.marker.marker_type == TRACE_MARKER_TYPE_VECTOR_LENGTH) {
#ifdef AARCH64
const int new_vl_bits = memref.marker.marker_value * 8;
if (dr_get_vector_length() != new_vl_bits) {
Expand All @@ -177,18 +177,19 @@ opcode_mix_t::parallel_shard_memref(void *shard_data, const memref_t &memref)
}

/* At this point we start processing memref instructions, so we're past the header of
* the trace, which contains the trace filetype. If we didn't encounter a
* TRACE_MARKER_TYPE_FILETYPE we return an error and stop processing the trace.
* We expected the value of TRACE_MARKER_TYPE_FILETYPE to contain at least one of the
* the trace, which contains the trace filetype. If -skip_instrs was used, we may not
* have seen TRACE_MARKER_TYPE_FILETYPE itself but the memtrace_stream_t should be
* able to provide it to us.
*/
shard->filetype = static_cast<offline_file_type_t>(shard->stream->get_filetype());

/* We expected the value of TRACE_MARKER_TYPE_FILETYPE to contain at least one of the
* enum values of offline_file_type_t (e.g., OFFLINE_FILE_TYPE_ARCH_), so
* OFFLINE_FILE_TYPE_DEFAULT (== 0) should never be present alone and can be used as
* uninitialized value of filetype for an error check.
* XXX i#6812: we could allow traces that have some shards with no filetype, as long
* as there is at least one shard with it, by caching the filetype from shards that
* have it and using that one. We can do this using memtrace_stream_t::get_filetype(),
* which requires updating opcode_mix to use parallel_shard_init_stream() rather than
* the current (and deprecated) parallel_shard_init(). However, we should first decide
* whether we want to allow traces that have some shards without a filetype.
*/
if (shard->filetype == OFFLINE_FILE_TYPE_DEFAULT) {
shard->error = "No file type found in this shard";
Expand Down
12 changes: 3 additions & 9 deletions clients/drcachesim/tools/opcode_mix.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class opcode_mix_t : public analysis_tool_t {
const std::string &alt_module_dir = "");
virtual ~opcode_mix_t();
std::string
initialize() override;
initialize_stream(dynamorio::drmemtrace::memtrace_stream_t *serial_stream) override;
bool
process_memref(const memref_t &memref) override;
bool
Expand Down Expand Up @@ -142,20 +142,14 @@ class opcode_mix_t : public analysis_tool_t {

struct shard_data_t {
shard_data_t()
: instr_count(0)
, last_trace_module_start(nullptr)
, last_trace_module_size(0)
, last_mapped_module_start(nullptr)
{
}

int64_t instr_count;
int64_t instr_count = 0;
std::unordered_map<int, int64_t> opcode_counts;
std::unordered_map<uint, int64_t> category_counts;
std::string error;
app_pc last_trace_module_start;
size_t last_trace_module_size;
app_pc last_mapped_module_start;
dynamorio::drmemtrace::memtrace_stream_t *stream = nullptr;
std::unique_ptr<decode_cache_t<opcode_data_t>> decode_cache;
offline_file_type_t filetype = OFFLINE_FILE_TYPE_DEFAULT;
};
Expand Down
4 changes: 4 additions & 0 deletions suite/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4452,6 +4452,7 @@ if (BUILD_CLIENTS)
# TODO i#3544: Port tests to RISC-V 64
torunonly_drcacheoff(opcode_mix ${ci_shared_app} ""
"@-tool@opcode_mix" "")

# Ensure the tool works without the raw/ subdir.
set(tool.drcacheoff.opcode_mix_postcmd
"firstglob@${drraw2trace_path}@-indir@${dir_prefix}.*.dir")
Expand All @@ -4460,6 +4461,9 @@ if (BUILD_CLIENTS)
set(tool.drcacheoff.opcode_mix_postcmd3
"firstglob@${drcachesim_path}@-indir@${dir_prefix}.*.dir@-tool@opcode_mix")

torunonly_drcacheoff(skip_instrs_opcode_mix ${ci_shared_app} ""
"@-tool@opcode_mix@-skip_instrs@1" "")

torunonly_drcacheoff(view ${ci_shared_app} ""
"@-tool@view@-sim_refs@16384" "")
unset(tool.drcacheoff.view_rawtemp) # Use preprocessor
Expand Down

0 comments on commit fe061a7

Please sign in to comment.