Skip to content

Commit

Permalink
ggml perf bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
John committed Jul 10, 2023
1 parent 1d6e234 commit 9383922
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
13 changes: 8 additions & 5 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -17234,8 +17234,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (GGML_OP_HAS_FINALIZE[node->op]) {
params.type = GGML_TASK_FINALIZE;
ggml_compute_forward(&params, node);
ggml_graph_compute_perf_stats_node(node, state->shared);
}
}
ggml_graph_compute_perf_stats_node(node, state->shared);
} else {
break;
}
Expand Down Expand Up @@ -17269,10 +17269,11 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
if (state->ith < node->n_tasks) {
ggml_compute_forward(&params, node);
}
}
ggml_graph_compute_perf_stats_node(node, state->shared);
}

return 0;
}
return 0;
}

void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) {
const int n_threads = cgraph->n_threads;
Expand Down Expand Up @@ -18246,6 +18247,8 @@ void ggml_graph_print_impl(const struct ggml_cgraph * cgraph, bool print_nodes,
GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i];
node->perf_time_us = MAX(node->perf_time_us, 1); // should not happen anymore
node->perf_runs = MAX(node->perf_runs, 1);

perf_total_per_op_us[node->op] += MAX(1, node->perf_time_us);
perf_array[i] = node->perf_time_us / node->perf_runs;
Expand Down
5 changes: 3 additions & 2 deletions libfalcon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3793,7 +3793,7 @@ struct falcon_context * falcon_init_from_file(
}
};
}

int64_t t_start_us = ggml_time_us();
ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32;
falcon_model *model = falcon_model_load(path_model, params.n_ctx, params.n_batch, params.n_gpu_layers,
params.main_gpu, memory_type, params.use_mmap, params.use_mlock,
Expand All @@ -3808,7 +3808,8 @@ struct falcon_context * falcon_init_from_file(
params.i_gpu_start = model->i_gpu_start; // first layer that's GPU accelerated
params.i_gpu_last = model->i_gpu_last; // last layer that's GPU accelerated
falcon_context * f_ctx = falcon_context_prepare(params, model, "falcon_main",true);

f_ctx->t_load_us = ggml_time_us() - t_start_us;
f_ctx->t_start_us = t_start_us;
//falcon_context_set_buffers(f_ctx,params.n_batch,params.n_ctx);
//const size_t memory_size = ggml_nbytes(model->kv_self.k) + ggml_nbytes(model->kv_self.v);
//fprintf(stderr, "%s: RAM buffers - key_val = %7.2f MB, Compute = %7.2f MB, Scratch 0 = %7.2f MB, Scratch 1 = %7.2f MB \n", __func__, memory_size / 1024.0 / 1024.0, f_ctx->buf_compute.size /1024.0/1024.0, (f_ctx->buf_scratch[0].size)/1024.0/1024.0, (f_ctx->buf_scratch[1].size)/1024.0/1024.0);
Expand Down

0 comments on commit 9383922

Please sign in to comment.