Skip to content

Commit

Permalink
Improve wake-hash errors (#1399)
Browse files Browse the repository at this point in the history
This change goes back to the old behavior of files that can't be hashed having their hash set as "BadHash" and then later having that bubble up as an error in getJobOutputs. In general wake hashing error messages have been improved here.
  • Loading branch information
JakeSiFive authored Aug 24, 2023
1 parent 4d7fbda commit f992d12
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 30 deletions.
8 changes: 7 additions & 1 deletion share/wake/lib/core/syntax.wake
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,10 @@ export def wait (f: a => b) (x: a): b =
export def unreachable (reason: String): a =
def f x = prim "unreachable"

f reason
f "REACHED UNREACHABLE CODE: {reason}"

# Like unreachable but with a different error message
export def panic (reason: String): a =
def f x = prim "panic"

f "PANIC: {reason}"
7 changes: 3 additions & 4 deletions share/wake/lib/system/job.wake
Original file line number Diff line number Diff line change
Expand Up @@ -635,10 +635,9 @@ def runAlways cmd env dir stdin res uusage finputs foutputs vis keep run echo st
def notOk (Pair name hash) =
if hashcode name ==* hash then
Unit
else if abort # The panic will not be deadcode dropped, because it's an alternative return of the effect-ful notOk
# This use of unreachable is not ok!
then
unreachable "The hashcode of output file '{name}' has changed from {hash} (when wake last ran) to {hashcode name} (when inspected this time). Presumably it was hand edited. Please move this file out of the way. Aborting the build to prevent loss of your data."
# The panic will not be deadcode dropped, because it's an alternative return of the effect-ful notOk
else if abort then
panic "The hashcode of output file '{name}' has changed from {hash} (when wake last ran) to {hashcode name} (when inspected this time). Presumably it was hand edited. Please move this file out of the way. Aborting the build to prevent loss of your data."
else
printlnLevel
logError
Expand Down
12 changes: 7 additions & 5 deletions share/wake/lib/system/path.wake
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,12 @@ def computeHashes (prefix: String) (files: List String): List String =
| Pass
else mergeSelect which_files_to_hash (map (add _ "BadHash") to_hash) not_to_hash

require Pass stdout =
plan
| runJobWith localRunner
| getJobStdout
def job = runJobWith localRunner plan

require Exited 0 = getJobStatus job
else mergeSelect which_files_to_hash (map (add _ "BadHash") to_hash) not_to_hash

require Pass stdout = getJobStdout job
else mergeSelect which_files_to_hash (map (add _ "BadHash") to_hash) not_to_hash

# We want a better error message if the number of lines do not match
Expand All @@ -210,7 +212,7 @@ def computeHashes (prefix: String) (files: List String): List String =
| filter (_ !=* "")

require True = len hash_lines == to_hash_len
else unreachable "wake-hash returned {format hash_lines} lines but we expected {str to_hash_len} lines"
else panic "wake-hash returned {format hash_lines} lines but we expected {str to_hash_len} lines"

# Finally actually add all the hashes
def hashed =
Expand Down
10 changes: 6 additions & 4 deletions src/runtime/exception.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,16 @@ static PRIMFN(prim_stack) {
RETURN(claim_list(runtime.heap, objs.size(), objs.data()));
}

static PRIMTYPE(type_unreachable) {
static PRIMTYPE(type_panic) {
return args.size() == 1 && args[0]->unify(Data::typeString);
(void)out; // leave prim free
}

static PRIMFN(prim_unreachable) {
static PRIMFN(prim_panic) {
EXPECT(1);
STRING(arg0, 0);
std::stringstream str;
str << "REACHED UNREACHABLE CODE: " << arg0->c_str() << std::endl;
str << arg0->c_str() << std::endl;
status_get_generic_stream(STREAM_ERROR) << str.str() << std::endl;
require_fail("", 1, runtime, scope);
}
Expand All @@ -85,7 +85,9 @@ static PRIMFN(prim_true) {
void prim_register_exception(PrimMap &pmap) {
// These should not be evaluated in const prop, but can be removed
prim_register(pmap, "stack", prim_stack, type_stack, PRIM_ORDERED);
prim_register(pmap, "unreachable", prim_unreachable, type_unreachable, PRIM_ORDERED);
prim_register(pmap, "panic", prim_panic, type_panic, PRIM_IMPURE);
// We have both panic and unreachable, because "unreachable" is considered to be optimizable away
prim_register(pmap, "unreachable", prim_panic, type_panic, PRIM_ORDERED);
prim_register(pmap, "use", prim_id, type_id, PRIM_IMPURE);
prim_register(pmap, "true", prim_true, type_true, PRIM_PURE);
}
49 changes: 33 additions & 16 deletions tools/wake-hash/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@

#include "blake2/blake2.h"
#include "compat/nofollow.h"
#include "wcl/optional.h"
#include "wcl/unique_fd.h"
#include "wcl/xoshiro_256.h"

Expand Down Expand Up @@ -117,9 +118,18 @@ struct Hash256 {
bool operator!=(Hash256 other) { return !(*this == other); }
};

static Hash256 hash_dir() { return Hash256(); }
// If a file handle is not a symlink, directory, or regular file
// then we consider it "exotic". This includes block devices,
// character devices, FIFOs, and sockets.
static wcl::optional<Hash256> hash_exotic() {
Hash256 out;
out.data[0] = 1;
return wcl::make_some<Hash256>(out);
}

static wcl::optional<Hash256> hash_dir() { return wcl::some(Hash256()); }

static Hash256 hash_link(const char* link) {
static wcl::optional<Hash256> hash_link(const char* link) {
blake2b_state S;
uint8_t hash[HASH_BYTES];
std::vector<char> buffer(8192, 0);
Expand All @@ -128,7 +138,7 @@ static Hash256 hash_link(const char* link) {
int bytes_read = readlink(link, buffer.data(), buffer.size());
if (bytes_read < 0) {
std::cerr << "wake-hash: readlink(" << link << "): " << strerror(errno) << std::endl;
exit(1);
return {};
}
if (static_cast<size_t>(bytes_read) != buffer.size()) {
buffer.resize(bytes_read);
Expand All @@ -141,10 +151,10 @@ static Hash256 hash_link(const char* link) {
blake2b_update(&S, reinterpret_cast<uint8_t*>(buffer.data()), buffer.size());
blake2b_final(&S, &hash[0], sizeof(hash));

return Hash256::from_hash(&hash);
return wcl::some(Hash256::from_hash(&hash));
}

static Hash256 hash_file(const char* file, int fd) {
static wcl::optional<Hash256> hash_file(const char* file, int fd) {
blake2b_state S;
uint8_t hash[HASH_BYTES], buffer[8192];
ssize_t got;
Expand All @@ -155,40 +165,42 @@ static Hash256 hash_file(const char* file, int fd) {

if (got < 0) {
std::cerr << "wake-hash read(" << file << "): " << strerror(errno) << std::endl;
exit(1);
return {};
}

return Hash256::from_hash(&hash);
return wcl::some(Hash256::from_hash(&hash));
}

static Hash256 do_hash(const char* file) {
static wcl::optional<Hash256> do_hash(const char* file) {
struct stat stat;
auto fd = wcl::unique_fd::open(file, O_RDONLY | O_NOFOLLOW);

if (!fd) {
if (fd.error() == EISDIR) return hash_dir();
if (fd.error() == ELOOP || errno == EMLINK) return hash_link(file);
if (fd.error() == ENXIO) return hash_exotic();
std::cerr << "wake-hash open(" << file << "): " << strerror(errno);
exit(1);
return {};
}

if (fstat(fd->get(), &stat) != 0) {
if (errno == EISDIR) return hash_dir();
std::cerr << "wake-hash fstat(" << file << "): " << strerror(errno);
exit(1);
return {};
}

if (S_ISDIR(stat.st_mode)) return hash_dir();
if (S_ISLNK(stat.st_mode)) return hash_link(file);
if (S_ISREG(stat.st_mode)) return hash_file(file, fd->get());

return hash_file(file, fd->get());
return hash_exotic();
}

std::vector<Hash256> hash_all_files(const std::vector<std::string>& files_to_hash) {
std::vector<wcl::optional<Hash256>> hash_all_files(const std::vector<std::string>& files_to_hash) {
std::atomic<size_t> counter{0};
// We have to pre-alocate all the hashes so that we can overwrite them each
// at anytime and maintain order
std::vector<Hash256> hashes(files_to_hash.size());
std::vector<wcl::optional<Hash256>> hashes(files_to_hash.size());
// The cost of thread creation is fairly low with Linux on x86 so we allow opening up-to one
// thread per-file.
size_t num_threads = std::min(size_t(std::thread::hardware_concurrency()), files_to_hash.size());
Expand Down Expand Up @@ -245,11 +257,16 @@ int main(int argc, char** argv) {
}
}

std::vector<Hash256> hashes = hash_all_files(files_to_hash);
std::vector<wcl::optional<Hash256>> hashes = hash_all_files(files_to_hash);

// Now output them in the same order that we received them
// Now output them in the same order that we received them. If we could
// not hash something, return "BadHash" in that case.
for (auto& hash : hashes) {
std::cout << hash.to_hex() << std::endl;
if (hash) {
std::cout << hash->to_hex() << std::endl;
} else {
std::cout << "BadHash" << std::endl;
}
}

return 0;
Expand Down

0 comments on commit f992d12

Please sign in to comment.