Skip to content

Commit

Permalink
experiment changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Diandre Sabale committed Jan 2, 2024
1 parent fd8d6d6 commit f3a2575
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 88 deletions.
162 changes: 81 additions & 81 deletions Experiments/Scripts/just-edge-updates-experiments.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ using Graphs
using Random
include("../Experiments.jl")

datasets::Vector{DATASET} = [aids, human]
datasets::Vector{DATASET} = [aids, human, wordnet]
# datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6
Expand All @@ -13,88 +13,88 @@ proportions_updated = [0, 0.2, 0.4, 0.6, 0.8, .9, 1.0]

experiment_params_list::Vector{ExperimentParams} = [ExperimentParams(dataset=current_dataset, proportion_updated=current_proportion)
for current_dataset in datasets for current_proportion in proportions_updated]
# println("started building")
# shuffled_edges = Dict()
# for experiment_params in experiment_params_list
# build_times = [("Dataset", "Partitioner", "NumColors", "BuildPhase", "BuildTime", "MemoryFootprint")]
# dataset = experiment_params.dataset
# summary_params = experiment_params.summary_params
# data = load_dataset(dataset)
# cloned_data = DataGraph(nv(data.graph))
# cloned_data.vertex_labels = data.vertex_labels
# edges_for_later = []
# shuffled_edges[dataset] = shuffle!(collect(edges(data.graph)))
# edges_to_add = ne(data.graph) - round(convert(Float64, ne(data.graph)) * convert(Float64, experiment_params.summary_params.proportion_updated))
# for edge in shuffled_edges[dataset]
# if edges_to_add > 0
# edges_to_add -= 1
# add_labeled_edge!(cloned_data, (src(edge), dst(edge)), only(data.edge_labels[(src(edge), dst(edge))]))
# else
# push!(edges_for_later, edge)
# end
# end
# summary_name = params_to_summary_filename(experiment_params)
# summary_file_location = "Experiments/SerializedSummaries/" * summary_name
# println("Building Color Summary: ", summary_name)
# timing_vec = Float64[]
# results = @timed generate_color_summary((experiment_params.summary_params.proportion_updated > 0) ? cloned_data : data, summary_params; verbose=1, timing_vec=timing_vec)
# current_summary = results.value
# if (experiment_params.summary_params.proportion_updated > 0)
# for edge in edges_for_later
# add_summary_edge!(current_summary, src(edge), dst(edge), get(data.edge_labels, (src(edge), dst(edge)), []))
# end
# end
# summary_size = Base.summarysize(current_summary)
# serialize(summary_file_location, current_summary)
# push!(build_times, (string(dataset),
# string(summary_params.partitioning_scheme),
# string(summary_params.num_colors),
# "FullTime",
# string(results.time),
# string(summary_size)))
# push!(build_times, (string(dataset),
# string(summary_params.partitioning_scheme),
# string(summary_params.num_colors),
# "Coloring",
# string(timing_vec[1]),
# string(summary_size)))
# push!(build_times, (string(dataset),
# string(summary_params.partitioning_scheme),
# string(summary_params.num_colors),
# "CycleCounting",
# string(timing_vec[2]),
# string(summary_size)))
# push!(build_times, (string(dataset),
# string(summary_params.partitioning_scheme),
# string(summary_params.num_colors),
# "BloomFilter",
# string(timing_vec[3]),
# string(summary_size)))
# push!(build_times, (string(dataset),
# string(summary_params.partitioning_scheme),
# string(summary_params.num_colors),
# "CardinalityCounting",
# string(timing_vec[4]),
# string(summary_size)))
# push!(build_times, (string(dataset),
# string(summary_params.partitioning_scheme),
# string(summary_params.num_colors),
# "EdgeStats",
# string(timing_vec[5]),
# string(summary_size)))
# results_filename = params_to_results_filename(experiment_params)
# result_file_location = "Experiments/Results/Build_" * results_filename
# writedlm(result_file_location, build_times, ",")
# end
println("started building")
shuffled_edges = Dict()
for experiment_params in experiment_params_list
build_times = [("Dataset", "Partitioner", "NumColors", "BuildPhase", "BuildTime", "MemoryFootprint")]
dataset = experiment_params.dataset
summary_params = experiment_params.summary_params
data = load_dataset(dataset)
cloned_data = DataGraph(nv(data.graph))
cloned_data.vertex_labels = data.vertex_labels
edges_for_later = []
shuffled_edges[dataset] = shuffle!(collect(edges(data.graph)))
edges_to_add = ne(data.graph) - round(convert(Float64, ne(data.graph)) * convert(Float64, experiment_params.summary_params.proportion_updated))
for edge in shuffled_edges[dataset]
if edges_to_add > 0
edges_to_add -= 1
add_labeled_edge!(cloned_data, (src(edge), dst(edge)), only(data.edge_labels[(src(edge), dst(edge))]))
else
push!(edges_for_later, edge)
end
end
summary_name = params_to_summary_filename(experiment_params)
summary_file_location = "Experiments/SerializedSummaries/" * summary_name
println("Building Color Summary: ", summary_name)
timing_vec = Float64[]
results = @timed generate_color_summary((experiment_params.summary_params.proportion_updated > 0) ? cloned_data : data, summary_params; verbose=1, timing_vec=timing_vec)
current_summary = results.value
if (experiment_params.summary_params.proportion_updated > 0)
for edge in edges_for_later
add_summary_edge!(current_summary, src(edge), dst(edge), get(data.edge_labels, (src(edge), dst(edge)), []))
end
end
summary_size = Base.summarysize(current_summary)
serialize(summary_file_location, current_summary)
push!(build_times, (string(dataset),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"FullTime",
string(results.time),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"Coloring",
string(timing_vec[1]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"CycleCounting",
string(timing_vec[2]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"BloomFilter",
string(timing_vec[3]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"CardinalityCounting",
string(timing_vec[4]),
string(summary_size)))
push!(build_times, (string(dataset),
string(summary_params.partitioning_scheme),
string(summary_params.num_colors),
"EdgeStats",
string(timing_vec[5]),
string(summary_size)))
results_filename = params_to_results_filename(experiment_params)
result_file_location = "Experiments/Results/Build_" * results_filename
writedlm(result_file_location, build_times, ",")
end

# println("started estimating")
# run_estimation_experiments(experiment_params_list)
# println("started graphing")
println("started estimating")
run_estimation_experiments(experiment_params_list)
println("started graphing")
# compare how overall accuracy is affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=proportion_not_updated, filename="overall-accuracy-and-updates")
# compare how cycle stat accuracies are affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=proportion_deleted, y_type=estimate_error, x_label="proportion added then deleted", y_label="accuracy", grouping=cycle_size, filename="deletion-experiment")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, y_lims=[0, 10], y_ticks = [0, 2, 4 ,6 ,8, 10], legend_pos=:topright, x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, filename="just-edge-updates-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error,y_lims=[-20, 15], x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, filename="just-edge-updates-error")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=runtime, y_lims=[10^-5, 10], y_ticks = [10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 1, 10], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, filename="just-edge-updates-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, y_lims=[0, 20], y_ticks = [0, 5, 10, 15, 20], x_label="Proportion Updated", y_label="Memory (MB)", grouping=proportion_updated, filename="just-edge-updates-memory")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, ylims=[0, 10], y_ticks = [0, 2, 4 ,6 ,8, 10], legend_pos=:topright, x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, filename="just-edge-updates-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error,ylims=[10^-20, 10^15], x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, filename="just-edge-updates-error")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=runtime, ylims=[10^-5, 10], y_ticks = [10^-5, 10^-4, 10^-3, 10^-2, 10^-1, 1, 10], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, filename="just-edge-updates-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, ylims=[0, 20], y_ticks = [0, 5, 10, 15, 20], x_label="Proportion Updated", y_label="Memory (MB)", grouping=proportion_updated, filename="just-edge-updates-memory")
14 changes: 8 additions & 6 deletions Experiments/Scripts/just-node-and-edge-updates-experiment.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@ using Graphs
using Random
include("../Experiments.jl")

datasets::Vector{DATASET} = [aids, human, ]
datasets::Vector{DATASET} = [aids, human, wordnet]
# datasets::Vector{DATASET} = [aids, human, wordnet, dblp]
# datasets::Vector{DATASET} = [aids, human, yeast, wordnet, youtube, dblp, patents]
# datasets::Vector{DATASET} = [aids, human, lubm80, yeast, hprd, wordnet, dblp, youtube, eu2005, patents]
max_cycles = 6
proportions_updated = [0, 0.2, 0.4, 0.6, 0.8]
# proportions_updated = [0, 0.2, 0.4, 0.6, 0.8]
proportions_updated = [0, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9]

# To test deletion, we will add a random node / edge and then delete them...
# proportion_not_updated = 0.5

Expand Down Expand Up @@ -153,8 +155,8 @@ println("started graphing")
# graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, grouping=proportion_not_updated, filename="overall-accuracy-and-updates")
# compare how cycle stat accuracies are affected by summary updates
# graph_grouped_box_plot(experiment_params_list, x_type=proportion_deleted, y_type=estimate_error, x_label="proportion added then deleted", y_label="accuracy", grouping=cycle_size, filename="deletion-experiment")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, ylims=[0, 30], x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-build")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, ylims=[10e-15, 10e15], x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-error")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=runtime, ylims=[0, 0.6], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-runtime")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, ylims=[0, 20], x_label="Proportion Updated", y_label="Memory Footprint (B)", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-memory")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=build_time, ylims=[0, 30], x_label="Proportion Updated", y_label="Build Time (S)", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-build-color")
graph_grouped_box_plot(experiment_params_list, x_type=dataset, y_type=estimate_error, ylims=[10e-15, 10e15], x_label="Proportion Updated", y_label="Estimate Error", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-error-color")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=runtime, ylims=[0, 0.6], x_label="Proportion Updated", y_label="Runtime (S)", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-runtime-color")
graph_grouped_bar_plot(experiment_params_list, x_type=dataset, y_type=memory_footprint, ylims=[0, 20], x_label="Proportion Updated", y_label="Memory Footprint (B)", grouping=proportion_updated, legend_pos=:outerright, filename="ve-update-memory-color")
# graph_multi_group_bar_graph(experiment_params_list, x_type=proportion_updated, y_type=[build_time, update_time, total_time], x_label="Proportion Updated", y_label="Time (s)", y_lims=[0, 5], filename="ve-update-totaltime-aids")
1 change: 1 addition & 0 deletions Source/QuasiStableCardinalityEstimator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ function handle_extra_edges!(query::QueryGraph, summary::ColorSummary{DS}, parti
end
end
probability_no_edge *= 1.0 - summary.total_added_edges/summary.total_nodes^2
# probability_no_edge *= ((summary.total_nodes^2-1)/(summary.total_nodes^2))^summary.total_added_edges
partial_weights[i] = scale_coloring(partial_weights[i], (1.0 - probability_no_edge))
end
end
Expand Down
4 changes: 3 additions & 1 deletion Source/UpdateSummary.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,9 @@ function add_summary_node!(summary::ColorSummary{AvgDegStats}, node_labels, node
for node_label in node_labels
summary.color_label_cardinality[color][node_label] = get(summary.color_label_cardinality[color], node_label, 0) + 1
end
summary.color_label_cardinality[color][-1] = get(summary.color_label_cardinality[color], -1, 0) + 1
if !(-1 in node_labels)
summary.color_label_cardinality[color][-1] = get(summary.color_label_cardinality[color], -1, 0) + 1
end
summary.total_nodes += 1


Expand Down

0 comments on commit f3a2575

Please sign in to comment.