Improve wake-hash errors (#1399)

This change goes back to the old behavior of files that can't be hashed having their hash set as "BadHash" and then later having that bubble up as an error in getJobOutputs. In general wake hashing error messages have been improved here.
sifive · Aug 24, 2023 · f992d12 · f992d12
1 parent 4d7fbda
commit f992d12
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 30 deletions.
diff --git a/share/wake/lib/core/syntax.wake b/share/wake/lib/core/syntax.wake
@@ -99,4 +99,10 @@ export def wait (f: a => b) (x: a): b =
 export def unreachable (reason: String): a =
     def f x = prim "unreachable"
 
-    f reason
+    f "REACHED UNREACHABLE CODE: {reason}"
+
+# Like unreachable but with a different error message
+export def panic (reason: String): a =
+    def f x = prim "panic"
+
+    f "PANIC: {reason}"
diff --git a/share/wake/lib/system/job.wake b/share/wake/lib/system/job.wake
@@ -635,10 +635,9 @@ def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo st
         def notOk (Pair name hash) =
             if hashcode name ==* hash then
                 Unit
-            else if abort # The panic will not be deadcode dropped, because it's an alternative return of the effect-ful notOk
-            # This use of unreachable is not ok!
-            then
-                unreachable "The hashcode of output file '{name}' has changed from {hash} (when wake last ran) to {hashcode name} (when inspected this time). Presumably it was hand edited. Please move this file out of the way. Aborting the build to prevent loss of your data."
+            # The panic will not be deadcode dropped, because it's an alternative return of the effect-ful notOk
+            else if abort then
+                panic "The hashcode of output file '{name}' has changed from {hash} (when wake last ran) to {hashcode name} (when inspected this time). Presumably it was hand edited. Please move this file out of the way. Aborting the build to prevent loss of your data."
             else
                 printlnLevel
                 logError

diff --git a/share/wake/lib/system/path.wake b/share/wake/lib/system/path.wake
@@ -196,10 +196,12 @@ def computeHashes (prefix: String) (files: List String): List String =
             | Pass
     else mergeSelect which_files_to_hash (map (add _ "BadHash") to_hash) not_to_hash
 
-    require Pass stdout =
-        plan
-        | runJobWith localRunner
-        | getJobStdout
+    def job = runJobWith localRunner plan
+
+    require Exited 0 = getJobStatus job
+    else mergeSelect which_files_to_hash (map (add _ "BadHash") to_hash) not_to_hash
+
+    require Pass stdout = getJobStdout job
     else mergeSelect which_files_to_hash (map (add _ "BadHash") to_hash) not_to_hash
 
     # We want a better error message if the number of lines do not match
@@ -210,7 +212,7 @@ def computeHashes (prefix: String) (files: List String): List String =
         | filter (_ !=* "")
 
     require True = len hash_lines == to_hash_len
-    else unreachable "wake-hash returned {format hash_lines} lines but we expected {str to_hash_len} lines"
+    else panic "wake-hash returned {format hash_lines} lines but we expected {str to_hash_len} lines"
 
     # Finally actually add all the hashes
     def hashed =

diff --git a/src/runtime/exception.cpp b/src/runtime/exception.cpp
@@ -53,16 +53,16 @@ static PRIMFN(prim_stack) {
   RETURN(claim_list(runtime.heap, objs.size(), objs.data()));
 }
 
-static PRIMTYPE(type_unreachable) {
+static PRIMTYPE(type_panic) {
   return args.size() == 1 && args[0]->unify(Data::typeString);
   (void)out;  // leave prim free
 }
 
-static PRIMFN(prim_unreachable) {
+static PRIMFN(prim_panic) {
   EXPECT(1);
   STRING(arg0, 0);
   std::stringstream str;
-  str << "REACHED UNREACHABLE CODE: " << arg0->c_str() << std::endl;
+  str << arg0->c_str() << std::endl;
   status_get_generic_stream(STREAM_ERROR) << str.str() << std::endl;
   require_fail("", 1, runtime, scope);
 }
@@ -85,7 +85,9 @@ static PRIMFN(prim_true) {
 void prim_register_exception(PrimMap &pmap) {
   // These should not be evaluated in const prop, but can be removed
   prim_register(pmap, "stack", prim_stack, type_stack, PRIM_ORDERED);
-  prim_register(pmap, "unreachable", prim_unreachable, type_unreachable, PRIM_ORDERED);
+  prim_register(pmap, "panic", prim_panic, type_panic, PRIM_IMPURE);
+  // We have both panic and unreachable, because "unreachable" is considered to be optimizable away
+  prim_register(pmap, "unreachable", prim_panic, type_panic, PRIM_ORDERED);
   prim_register(pmap, "use", prim_id, type_id, PRIM_IMPURE);
   prim_register(pmap, "true", prim_true, type_true, PRIM_PURE);
 }
diff --git a/tools/wake-hash/main.cpp b/tools/wake-hash/main.cpp
@@ -42,6 +42,7 @@
 
 #include "blake2/blake2.h"
 #include "compat/nofollow.h"
+#include "wcl/optional.h"
 #include "wcl/unique_fd.h"
 #include "wcl/xoshiro_256.h"
 
@@ -117,9 +118,18 @@ struct Hash256 {
   bool operator!=(Hash256 other) { return !(*this == other); }
 };
 
-static Hash256 hash_dir() { return Hash256(); }
+// If a file handle is not a symlink, directory, or regular file
+// then we consider it "exotic". This includes block devices,
+// character devices, FIFOs, and sockets.
+static wcl::optional<Hash256> hash_exotic() {
+  Hash256 out;
+  out.data[0] = 1;
+  return wcl::make_some<Hash256>(out);
+}
+
+static wcl::optional<Hash256> hash_dir() { return wcl::some(Hash256()); }
 
-static Hash256 hash_link(const char* link) {
+static wcl::optional<Hash256> hash_link(const char* link) {
   blake2b_state S;
   uint8_t hash[HASH_BYTES];
   std::vector<char> buffer(8192, 0);
@@ -128,7 +138,7 @@ static Hash256 hash_link(const char* link) {
     int bytes_read = readlink(link, buffer.data(), buffer.size());
     if (bytes_read < 0) {
       std::cerr << "wake-hash: readlink(" << link << "): " << strerror(errno) << std::endl;
-      exit(1);
+      return {};
     }
     if (static_cast<size_t>(bytes_read) != buffer.size()) {
       buffer.resize(bytes_read);
@@ -141,10 +151,10 @@ static Hash256 hash_link(const char* link) {
   blake2b_update(&S, reinterpret_cast<uint8_t*>(buffer.data()), buffer.size());
   blake2b_final(&S, &hash[0], sizeof(hash));
 
-  return Hash256::from_hash(&hash);
+  return wcl::some(Hash256::from_hash(&hash));
 }
 
-static Hash256 hash_file(const char* file, int fd) {
+static wcl::optional<Hash256> hash_file(const char* file, int fd) {
   blake2b_state S;
   uint8_t hash[HASH_BYTES], buffer[8192];
   ssize_t got;
@@ -155,40 +165,42 @@ static Hash256 hash_file(const char* file, int fd) {
 
   if (got < 0) {
     std::cerr << "wake-hash read(" << file << "): " << strerror(errno) << std::endl;
-    exit(1);
+    return {};
   }
 
-  return Hash256::from_hash(&hash);
+  return wcl::some(Hash256::from_hash(&hash));
 }
 
-static Hash256 do_hash(const char* file) {
+static wcl::optional<Hash256> do_hash(const char* file) {
   struct stat stat;
   auto fd = wcl::unique_fd::open(file, O_RDONLY | O_NOFOLLOW);
 
   if (!fd) {
     if (fd.error() == EISDIR) return hash_dir();
     if (fd.error() == ELOOP || errno == EMLINK) return hash_link(file);
+    if (fd.error() == ENXIO) return hash_exotic();
     std::cerr << "wake-hash open(" << file << "): " << strerror(errno);
-    exit(1);
+    return {};
   }
 
   if (fstat(fd->get(), &stat) != 0) {
     if (errno == EISDIR) return hash_dir();
     std::cerr << "wake-hash fstat(" << file << "): " << strerror(errno);
-    exit(1);
+    return {};
   }
 
   if (S_ISDIR(stat.st_mode)) return hash_dir();
   if (S_ISLNK(stat.st_mode)) return hash_link(file);
+  if (S_ISREG(stat.st_mode)) return hash_file(file, fd->get());
 
-  return hash_file(file, fd->get());
+  return hash_exotic();
 }
 
-std::vector<Hash256> hash_all_files(const std::vector<std::string>& files_to_hash) {
+std::vector<wcl::optional<Hash256>> hash_all_files(const std::vector<std::string>& files_to_hash) {
   std::atomic<size_t> counter{0};
   // We have to pre-alocate all the hashes so that we can overwrite them each
   // at anytime and maintain order
-  std::vector<Hash256> hashes(files_to_hash.size());
+  std::vector<wcl::optional<Hash256>> hashes(files_to_hash.size());
   // The cost of thread creation is fairly low with Linux on x86 so we allow opening up-to one
   // thread per-file.
   size_t num_threads = std::min(size_t(std::thread::hardware_concurrency()), files_to_hash.size());
@@ -245,11 +257,16 @@ int main(int argc, char** argv) {
     }
   }
 
-  std::vector<Hash256> hashes = hash_all_files(files_to_hash);
+  std::vector<wcl::optional<Hash256>> hashes = hash_all_files(files_to_hash);
 
-  // Now output them in the same order that we received them
+  // Now output them in the same order that we received them. If we could
+  // not hash something, return "BadHash" in that case.
   for (auto& hash : hashes) {
-    std::cout << hash.to_hex() << std::endl;
+    if (hash) {
+      std::cout << hash->to_hex() << std::endl;
+    } else {
+      std::cout << "BadHash" << std::endl;
+    }
   }
 
   return 0;