From 1fbb17ebd2af945a05dc44511dc6ac201cdb5c6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Basile=20Cl=C3=A9ment?= Date: Wed, 7 Jun 2023 18:35:15 +0200 Subject: [PATCH] Store prover stdout and stderr in temporary files This changes the way Benchpress collects the prover's outputs from using a pipe to using temporary files that are read once the prover is done. On Unix systems, the temporary files default to being stored in `/dev/shm`; on other platforms, the default temporary directory is used. This ensures that the prover never gets stuck waiting for benchpress to read from pipes, but more importantly minimises context switches between the prover and benchpress. --- src/bin/benchpress_bin.ml | 14 +++++- src/core/Misc.ml | 33 +++++++++++++ src/core/Run_proc.ml | 100 ++++++++++++++++++++++++-------------- 3 files changed, 108 insertions(+), 39 deletions(-) diff --git a/src/bin/benchpress_bin.ml b/src/bin/benchpress_bin.ml index 5c36aef..436c997 100644 --- a/src/bin/benchpress_bin.ml +++ b/src/bin/benchpress_bin.ml @@ -21,9 +21,12 @@ module Run = struct let open Cmdliner in let aux j cpus pp_results dyn paths dir_files proof_dir defs task timeout memory meta provers csv summary no_color output save wal_mode - desktop_notification no_failure update = + desktop_notification no_failure update ramdisk = catch_err @@ fun () -> if no_color then CCFormat.set_color_default false; + (match ramdisk with + | Some ramdisk -> Misc.set_ramdisk ramdisk + | None -> ()); let dyn = if dyn then Some true @@ -160,13 +163,20 @@ module Run = struct ~doc: "if the output file already exists, overwrite it with the new \ one.") + and ramdisk = + let doc = + "Path to the directory to use as temporary storage for prover input \ + and output." + in + Arg.(value & opt (some string) None & info [ "ramdisk" ] ~doc) in + Cmd.v (Cmd.info ~doc "run") Term.( const aux $ j $ cpus $ pp_results $ dyn $ paths $ dir_files $ proof_dir $ defs $ task $ timeout $ memory $ meta $ provers $ csv $ summary $ no_color $ output $ save $ wal_mode $ desktop_notification - $ no_failure $ update) + $ no_failure $ update $ ramdisk) end module Slurm = struct diff --git a/src/core/Misc.ml b/src/core/Misc.ml index eb174f6..5b817be 100644 --- a/src/core/Misc.ml +++ b/src/core/Misc.ml @@ -478,6 +478,39 @@ let establish_server n server_fun sockaddr = let sock, _ = mk_socket sockaddr in start_server n server_fun sock +(* Not actually a ramdisk on windows *) +let ramdisk_ref = + ref + (if Sys.unix then + Some "/dev/shm" + else + None) + +let has_ramdisk () = Option.is_some !ramdisk_ref + +let get_ramdisk () = + match !ramdisk_ref with + | Some ramdisk -> ramdisk + | None -> Filename.get_temp_dir_name () + +let set_ramdisk ramdisk = ramdisk_ref := Some ramdisk + +(* Turns out that [Filename.open_temp_file] is not thread-safe *) +let open_temp_file = CCLock.create Filename.open_temp_file + +let open_ram_file prefix suffix = + let temp_dir = get_ramdisk () in + CCLock.with_lock open_temp_file @@ fun open_temp_file -> + open_temp_file ~temp_dir prefix suffix + +let with_ram_file prefix suffix f = + let fname, oc = open_ram_file prefix suffix in + Fun.protect + ~finally:(fun () -> + close_out oc; + try Sys.remove fname with Sys_error _ -> ()) + (fun () -> f fname) + let with_affinity cpu f = let aff = Processor.Affinity.get_ids () in Processor.Affinity.set_ids [ cpu ]; diff --git a/src/core/Run_proc.ml b/src/core/Run_proc.ml index f0145fc..5b390f6 100644 --- a/src/core/Run_proc.ml +++ b/src/core/Run_proc.ml @@ -3,41 +3,67 @@ module Log = (val Logs.src_log (Logs.Src.create "run-proc")) let int_of_process_status = function | Unix.WEXITED i | Unix.WSIGNALED i | Unix.WSTOPPED i -> i +(* There is no version of [create_process] that opens a shell, so we roll our + own version of [open_process] . *) +let sh cmd = + if Sys.unix || Sys.cygwin then + Unix.create_process "/bin/sh" [| "/bin/sh"; "-c"; cmd |] + else if Sys.win32 then ( + let shell = + try Sys.getenv "COMSPEC" + with Not_found -> raise Unix.(Unix_error (ENOEXEC, "sh", cmd)) + in + Unix.create_process shell [| shell; "/c"; cmd |] + ) else + Format.kasprintf failwith "Unsupported OS type: %s" Sys.os_type + +(* Available as [Filename.null] on OCaml >= 4.10 *) +let null () = + if Sys.unix || Sys.cygwin then + "/dev/null" + else if Sys.win32 then + "NUL" + else + (* Nobody is running benchpress with js_of_ocaml… right? *) + Format.kasprintf failwith "Unsupported OS type: %s" Sys.os_type + let run cmd : Run_proc_result.t = - let start = Ptime_clock.now () in - (* call process and block *) - let p = - try - let oc, ic, errc = Unix.open_process_full cmd (Unix.environment ()) in - close_out ic; - (* read out and err *) - let err = ref "" in - let t_err = Thread.create (fun e -> err := CCIO.read_all e) errc in - let out = CCIO.read_all oc in - Thread.join t_err; - let status = Unix.close_process_full (oc, ic, errc) in - object - method stdout = out - method stderr = !err - method errcode = int_of_process_status status - method status = status - end - with e -> - object - method stdout = "" - method stderr = "process died: " ^ Printexc.to_string e - method errcode = 1 - method status = Unix.WEXITED 1 - end - in - let errcode = p#errcode in - Log.debug (fun k -> - k "(@[run.done@ :errcode %d@ :cmd %a@]" errcode Misc.Pp.pp_str cmd); - (* Compute time used by the command *) - let rtime = Ptime.diff (Ptime_clock.now ()) start |> Ptime.Span.to_float_s in - let utime = 0. in - let stime = 0. in - let stdout = p#stdout in - let stderr = p#stderr in - Log.debug (fun k -> k "stdout:\n%s\nstderr:\n%s" stdout stderr); - { Run_proc_result.stdout; stderr; errcode; rtime; utime; stime } + (* create temporary files for stdout and stderr *) + try + Misc.with_ram_file "stdout" "" @@ fun stdout_f -> + Misc.with_ram_file "stderr" "" @@ fun stderr_f -> + let stdout_fd = Unix.openfile stdout_f [ O_WRONLY; O_KEEPEXEC ] 0o640 in + let stderr_fd = Unix.openfile stderr_f [ O_WRONLY; O_KEEPEXEC ] 0o640 in + let stdin_fd = Unix.openfile (null ()) [ O_RDONLY; O_KEEPEXEC ] 0o000 in + (* call process and block *) + let errcode, rtime = + let start = Ptime_clock.now () in + let pid = sh cmd stdin_fd stdout_fd stderr_fd in + let _, status = Unix.waitpid [] pid in + (* Compute time used by the command *) + let rtime = + Ptime.diff (Ptime_clock.now ()) start |> Ptime.Span.to_float_s + in + int_of_process_status status, rtime + in + let stdout = CCIO.with_in stdout_f @@ CCIO.read_all in + let stderr = CCIO.with_in stderr_f @@ CCIO.read_all in + Unix.close stdout_fd; + Unix.close stderr_fd; + Unix.close stdin_fd; + Log.debug (fun k -> + k "(@[run.done@ :errcode %d@ :cmd %a@ :stdout %s@ :stdout_f %s@]" + errcode Misc.Pp.pp_str cmd stdout stdout_f); + let utime = 0. in + let stime = 0. in + Log.debug (fun k -> k "stdout:\n%s\nstderr:\n%s" stdout stderr); + { Run_proc_result.stdout; stderr; errcode; rtime; utime; stime } + with e -> + { + stdout = ""; + stderr = "benchpress error: " ^ Printexc.to_string e; + errcode = 1; + rtime = 0.; + utime = 0.; + stime = 0.; + }