Skip to content

Commit

Permalink
Get redex to parse baseline profile interactions
Browse files Browse the repository at this point in the history
Summary:
See https://fb.workplace.com/groups/PoGOStick/permalink/2117221028730017/ for context. This allows Redex to intake the profiles specified in the baseline profile configs. Also changes the redex.py script in all redex branches to avoid any breakage.

**Changes to redex.py**
- Reads baseline profile config json file
  - Compiles list of all interaction ids
  - Compiles list of all default interaction ids
- Adds default interaction ids to dd interaction ids (and to method profile paths)
- Adds other interaction ids (variants) to separate list
- Passes both lists to redex

**Changes to redex**
- Adds an additional arg to take in variant interaction ids
- In method profiles, adds all variant interaction ids to a separate interaction list
- Changes unresolved methods to work on variant interactions and default interactions
- Changes baseline profile creation to first look in manual profile interaction list, then in default interaction list, and finally in variant interaction list

Reviewed By: jimmycFB

Differential Revision: D69270481

fbshipit-source-id: 9985f1c8a53d830e50057fa9596959520bf6a4a0
  • Loading branch information
Koby Chan authored and facebook-github-bot committed Feb 25, 2025
1 parent d9ed2fb commit e4a1446
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 34 deletions.
8 changes: 6 additions & 2 deletions libredex/ConfigFiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,15 @@ void ConfigFiles::ensure_agg_method_stats_loaded() {
}
std::vector<std::string> csv_filenames;
get_json_config().get("agg_method_stats_files", {}, csv_filenames);
if (csv_filenames.empty()) {
std::vector<std::string> baseline_profile_csv_filenames;
get_json_config().get("baseline_profile_agg_method_stats_files", {},
baseline_profile_csv_filenames);
if (csv_filenames.empty() && baseline_profile_csv_filenames.empty()) {
return;
}
m_method_profiles->initialize(
csv_filenames, get_baseline_profile_configs(),
csv_filenames, baseline_profile_csv_filenames,
get_baseline_profile_configs(),
get_json_config().get("ingest_baseline_profile_data", false));
}

Expand Down
1 change: 1 addition & 0 deletions libredex/GlobalConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void GlobalConfig::bind_config() {
Json::Value json_param;
// Sorted alphabetically
bind("agg_method_stats_files", {}, string_vector_param);
bind("baseline_profile_agg_method_stats_files", {}, string_vector_param);
bind("android_sdk_api_15_file", "", string_param);
bind("android_sdk_api_16_file", "", string_param);
bind("android_sdk_api_17_file", "", string_param);
Expand Down
93 changes: 70 additions & 23 deletions libredex/MethodProfiles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ std::tuple<const StatsMap&, bool> method_stats_for_interaction_id(
const StatsMap& MethodProfiles::method_stats_for_baseline_config(
const std::string& interaction_id,
const std::string& baseline_config_name) const {
// Try to find the interaction id first in the manual profile map
if (baseline_config_name !=
baseline_profiles::DEFAULT_BASELINE_PROFILE_CONFIG_NAME) {
if (m_baseline_manual_interactions.count(baseline_config_name)) {
Expand All @@ -87,9 +88,17 @@ const StatsMap& MethodProfiles::method_stats_for_baseline_config(
}
}
}
const auto& [stats, _] =
// If we can't find it there, try to find it in the default method stats
const auto& [stats, found] =
method_stats_for_interaction_id(interaction_id, m_method_stats);
return stats;
if (found) {
return stats;
}
// If we can't find it there, try to find it in the baseline profile variant
// stats
const auto& [variant_stats, _] = method_stats_for_interaction_id(
interaction_id, m_baseline_profile_method_stats);
return variant_stats;
}

const StatsMap& MethodProfiles::method_stats(
Expand Down Expand Up @@ -319,7 +328,8 @@ void MethodProfiles::parse_manual_files(
}
}

bool MethodProfiles::parse_stats_file(const std::string& csv_filename) {
bool MethodProfiles::parse_stats_file(const std::string& csv_filename,
bool baseline_profile_variant) {
TRACE(METH_PROF, 3, "input csv filename: %s", csv_filename.c_str());
if (csv_filename.empty()) {
TRACE(METH_PROF, 2, "No csv file given");
Expand Down Expand Up @@ -349,7 +359,7 @@ bool MethodProfiles::parse_stats_file(const std::string& csv_filename) {
if (m_mode == NONE) {
success = parse_header(line);
} else {
success = parse_line(line);
success = parse_line(line, baseline_profile_variant);
}
if (!success) {
return false;
Expand Down Expand Up @@ -498,7 +508,8 @@ std::optional<MethodProfiles::ParsedMain> MethodProfiles::parse_main_internal(
}

bool MethodProfiles::apply_main_internal_result(ParsedMain v,
std::string* interaction_id) {
std::string* interaction_id,
bool baseline_profile_variant) {
if (v.ref != nullptr) {
if (v.line_interaction_id) {
// Interaction IDs from the current row have priority over the interaction
Expand All @@ -510,7 +521,11 @@ bool MethodProfiles::apply_main_internal_result(ParsedMain v,
TRACE(METH_PROF, 6, "(%s, %s) -> {%f, %f, %f, %d}", SHOW(v.ref),
interaction_id->c_str(), v.stats.appear_percent, v.stats.call_count,
v.stats.order_percent, v.stats.min_api_level);
m_method_stats[*interaction_id].emplace(v.ref, v.stats);
if (baseline_profile_variant) {
m_baseline_profile_method_stats[*interaction_id].emplace(v.ref, v.stats);
} else {
m_method_stats[*interaction_id].emplace(v.ref, v.stats);
}
return true;
} else if (v.ref_str == nullptr) {
std::cerr << "FAILED to parse line. Missing name column\n";
Expand All @@ -520,7 +535,11 @@ bool MethodProfiles::apply_main_internal_result(ParsedMain v,
if (!v.line_interaction_id) {
v.line_interaction_id = std::make_unique<std::string>(*interaction_id);
}
m_unresolved_lines.emplace_back(std::move(v));
if (baseline_profile_variant) {
m_baseline_profile_unresolved_lines.emplace_back(std::move(v));
} else {
m_unresolved_lines.emplace_back(std::move(v));
}
return false;
}
}
Expand Down Expand Up @@ -613,18 +632,21 @@ size_t MethodProfiles::substitute_stats(
}

bool MethodProfiles::parse_main(const std::string& line,
std::string* interaction_id) {
std::string* interaction_id,
bool baseline_profile_variant) {
auto result = parse_main_internal(line);
if (!result) {
return false;
}
(void)apply_main_internal_result(std::move(result.value()), interaction_id);
(void)apply_main_internal_result(std::move(result.value()), interaction_id,
baseline_profile_variant);
return true;
}

bool MethodProfiles::parse_line(const std::string& line) {
bool MethodProfiles::parse_line(const std::string& line,
bool baseline_profile_variant) {
if (m_mode == MAIN) {
return parse_main(line, &m_interaction_id);
return parse_main(line, &m_interaction_id, baseline_profile_variant);
} else if (m_mode == METADATA) {
return parse_metadata(line);
} else {
Expand All @@ -643,16 +665,24 @@ boost::optional<uint32_t> MethodProfiles::get_interaction_count(
}

void MethodProfiles::process_unresolved_lines() {
if (m_unresolved_lines.empty()) {
process_unresolved_lines(false);
process_unresolved_lines(true);
}

void MethodProfiles::process_unresolved_lines(bool baseline_profile_variant) {
auto& unresolved_lines_ref = baseline_profile_variant
? m_baseline_profile_unresolved_lines
: m_unresolved_lines;
if (unresolved_lines_ref.empty()) {
return;
}

auto timer_scope = s_process_unresolved_lines_timer.scope();

std::set<ParsedMain*> resolved;
std::mutex resolved_mutex;
workqueue_run_for<size_t>(0, m_unresolved_lines.size(), [&](size_t index) {
auto& parsed_main = m_unresolved_lines.at(index);
workqueue_run_for<size_t>(0, unresolved_lines_ref.size(), [&](size_t index) {
auto& parsed_main = unresolved_lines_ref.at(index);
always_assert(parsed_main.ref_str != nullptr);
always_assert(parsed_main.mdt);
parsed_main.ref = DexMethod::get_method(*parsed_main.mdt);
Expand All @@ -663,22 +693,23 @@ void MethodProfiles::process_unresolved_lines() {
resolved.emplace(&parsed_main);
}
});
auto unresolved_lines = m_unresolved_lines.size();
auto unresolved_lines = unresolved_lines_ref.size();
// Note that resolved is ordered by the (addresses of the) unresolved lines,
// to ensure determinism
for (auto& parsed_main_ptr : resolved) {
auto interaction_id_ptr = &*parsed_main_ptr->line_interaction_id;
always_assert(parsed_main_ptr->ref != nullptr);
bool success = apply_main_internal_result(std::move(*parsed_main_ptr),
interaction_id_ptr);
interaction_id_ptr,
baseline_profile_variant);
always_assert(success);
}
always_assert(unresolved_lines == m_unresolved_lines.size());
std20::erase_if(m_unresolved_lines, [&](auto& unresolved_line) {
always_assert(unresolved_lines == unresolved_lines_ref.size());
std20::erase_if(unresolved_lines_ref, [&](auto& unresolved_line) {
return resolved.count(&unresolved_line);
});
always_assert(unresolved_lines - resolved.size() ==
m_unresolved_lines.size());
unresolved_lines_ref.size());

size_t total_rows = 0;
for (const auto& pair : m_method_stats) {
Expand All @@ -697,18 +728,33 @@ MethodProfiles::get_unresolved_method_descriptor_tokens() const {
always_assert(parsed_main.mdt);
result.insert(*parsed_main.mdt);
}
for (auto& parsed_main : m_baseline_profile_unresolved_lines) {
always_assert(parsed_main.mdt);
result.insert(*parsed_main.mdt);
}
return result;
}

void MethodProfiles::resolve_method_descriptor_tokens(
const std::unordered_map<dex_member_refs::MethodDescriptorTokens,
std::vector<DexMethodRef*>>& map) {
resolve_method_descriptor_tokens(map, true);
resolve_method_descriptor_tokens(map, false);
}

void MethodProfiles::resolve_method_descriptor_tokens(
const std::unordered_map<dex_member_refs::MethodDescriptorTokens,
std::vector<DexMethodRef*>>& map,
bool baseline_profile_variant) {
size_t removed{0};
size_t added{0};
// Note that we don't remove m_unresolved_lines as we go, as the given map
// Note that we don't remove unresolved_lines_ref as we go, as the given map
// might reference its mdts.
std::unordered_set<std::string*> to_remove;
for (auto& parsed_main : m_unresolved_lines) {
auto& unresolved_lines_ref = baseline_profile_variant
? m_baseline_profile_unresolved_lines
: m_unresolved_lines;
for (auto& parsed_main : unresolved_lines_ref) {
always_assert(parsed_main.mdt);
auto it = map.find(*parsed_main.mdt);
if (it == map.end()) {
Expand All @@ -724,12 +770,13 @@ void MethodProfiles::resolve_method_descriptor_tokens(
auto interaction_id_ptr = &*resolved_parsed_main.line_interaction_id;
always_assert(resolved_parsed_main.ref != nullptr);
bool success = apply_main_internal_result(std::move(resolved_parsed_main),
interaction_id_ptr);
interaction_id_ptr,
baseline_profile_variant);
always_assert(success);
added++;
}
}
std20::erase_if(m_unresolved_lines, [&to_remove](auto& parsed_main) {
std20::erase_if(unresolved_lines_ref, [&to_remove](auto& parsed_main) {
return to_remove.count(parsed_main.ref_str.get());
});
TRACE(METH_PROF, 1,
Expand Down
38 changes: 33 additions & 5 deletions libredex/MethodProfiles.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class MethodProfiles {

void initialize(
const std::vector<std::string>& csv_filenames,
const std::vector<std::string>& baseline_profile_csv_filenames,
const std::unordered_map<std::string,
baseline_profiles::BaselineProfileConfig>&
baseline_profile_configs,
Expand All @@ -81,7 +82,7 @@ class MethodProfiles {
for (const std::string& csv_filename : csv_filenames) {
m_interaction_id = "";
m_mode = NONE;
bool success = parse_stats_file(csv_filename);
bool success = parse_stats_file(csv_filename, false);
always_assert_log(success,
"Failed to parse %s. See stderr for more details",
csv_filename.c_str());
Expand All @@ -91,6 +92,20 @@ class MethodProfiles {
csv_filename.c_str());
}
if (ingest_baseline_profile_data) {
// Parse csv files that are only used in baseline profile variants
for (const std::string& csv_filename : baseline_profile_csv_filenames) {
m_interaction_id = "";
m_mode = NONE;
bool success = parse_stats_file(csv_filename, true);
always_assert_log(success,
"Failed to parse %s. See stderr for more details",
csv_filename.c_str());
always_assert_log(!m_method_stats.empty(),
"No valid data found in the profile %s. See stderr "
"for more details.",
csv_filename.c_str());
}
// Parse manual interactions
std::unordered_map<std::string, std::vector<std::string>>
manual_file_to_config_names;
// Create a mapping of manual_file to config names
Expand Down Expand Up @@ -179,6 +194,7 @@ class MethodProfiles {
private:
static AccumulatingTimer s_process_unresolved_lines_timer;
AllInteractions m_method_stats;
AllInteractions m_baseline_profile_method_stats;
std::map<std::string, AllInteractions*> m_baseline_manual_interactions;
std::map<std::string, AllInteractions> m_manual_profile_interactions;
// Resolution may fail because of renaming or generated methods. Store the
Expand All @@ -192,6 +208,7 @@ class MethodProfiles {
Stats stats;
};
std::vector<ParsedMain> m_unresolved_lines;
std::vector<ParsedMain> m_baseline_profile_unresolved_lines;
ParsingMode m_mode{NONE};
// A map from interaction ID to the number of times that interaction was
// triggered. This can be used to compare relative prevalence of different
Expand All @@ -205,7 +222,8 @@ class MethodProfiles {

// Read a "simple" csv file (no quoted commas or extra spaces) and populate
// m_method_stats
bool parse_stats_file(const std::string& csv_filename);
bool parse_stats_file(const std::string& csv_filename,
bool baseline_profile_variant);

// Read a list of manual profiles and populate m_baseline_manual_interactions
void parse_manual_files(
Expand All @@ -219,12 +237,16 @@ class MethodProfiles {
const std::vector<std::string>& config_names);

// Read a line of data (not a header)
bool parse_line(const std::string& line);
bool parse_line(const std::string& line, bool baseline_profile_variant);
// Read a line from the main section of the aggregated stats file and put an
// entry into m_method_stats
bool parse_main(const std::string& line, std::string* interaction_id);
bool parse_main(const std::string& line,
std::string* interaction_id,
bool baseline_profile_variant);
std::optional<ParsedMain> parse_main_internal(std::string_view line);
bool apply_main_internal_result(ParsedMain v, std::string* interaction_id);
bool apply_main_internal_result(ParsedMain v,
std::string* interaction_id,
bool baseline_profile_variant);
void apply_manual_profile(DexMethodRef* ref,
const std::string& flags,
const std::string& manual_filename,
Expand All @@ -234,6 +256,12 @@ class MethodProfiles {

// Parse the first line and make sure it matches our expectations
bool parse_header(std::string_view line);

void process_unresolved_lines(bool baseline_profile_variant);
void resolve_method_descriptor_tokens(
const std::unordered_map<dex_member_refs::MethodDescriptorTokens,
std::vector<DexMethodRef*>>& map,
bool baseline_profile_variant);
};

// NOTE: Do not use this comparator directly in `std::sort` calls, as it is
Expand Down
Loading

0 comments on commit e4a1446

Please sign in to comment.