Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatic crash dump retrieval #921

Merged
merged 23 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/Depthai/DepthaiDeviceSideConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
set(DEPTHAI_DEVICE_SIDE_MATURITY "snapshot")

# "full commit hash of device side binary"
set(DEPTHAI_DEVICE_SIDE_COMMIT "1cf15832ab1f408d8e4dab72e901ce050bf8850b")
set(DEPTHAI_DEVICE_SIDE_COMMIT "7e10e2abee375b9e9144b6619d3db014aee27dbb")

# "version if applicable"
set(DEPTHAI_DEVICE_SIDE_VERSION "")
4 changes: 4 additions & 0 deletions include/depthai/device/DeviceBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,8 @@ class DeviceBase {
void init(Config config, const DeviceInfo& devInfo, UsbSpeed maxUsbSpeed);
void init(Config config, const DeviceInfo& devInfo, const dai::Path& pathToCmd);

void createRpc();

private:
// private functions
void init2(Config cfg, const dai::Path& pathToMvcmd, tl::optional<const Pipeline&> pipeline);
Expand Down Expand Up @@ -932,5 +934,7 @@ class DeviceBase {

// Device config
Config config;

dai::Path firmwarePath;
};
} // namespace dai
127 changes: 100 additions & 27 deletions src/device/DeviceBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "pipeline/Pipeline.hpp"
#include "utility/EepromDataParser.hpp"
#include "utility/Environment.hpp"
#include "utility/Files.hpp"
#include "utility/Initialization.hpp"
#include "utility/PimplImpl.hpp"
#include "utility/Resources.hpp"
Expand Down Expand Up @@ -517,6 +518,15 @@ void DeviceBase::closeImpl() {
auto t1 = steady_clock::now();
pimpl->logger.debug("Device about to be closed...");

bool shouldGetCrashDump = false;
try {
int status = pimpl->rpcClient->call("checkShutdown").as<int>();
pimpl->logger.debug("Device exit status {}", status);
} catch(const std::exception& ex) {
pimpl->logger.debug("checkShutdown call error: {}", ex.what());
shouldGetCrashDump = true;
}

// Close connection first; causes Xlink internal calls to unblock semaphore waits and
// return error codes, which then allows queues to unblock
// always manage ownership because other threads (e.g. watchdog) are running and need to
Expand Down Expand Up @@ -545,6 +555,64 @@ void DeviceBase::closeImpl() {
pimpl->rpcStream = nullptr;
pimpl->rpcClient = nullptr;

// Get crash dump if needed
if(shouldGetCrashDump) {
pimpl->logger.debug("Getting crash dump...");
auto t1 = steady_clock::now();
bool gotDump = false;
bool found = false;
do {
DeviceInfo rebootingDeviceInfo;
std::tie(found, rebootingDeviceInfo) = XLinkConnection::getDeviceByMxId(deviceInfo.getMxId(), X_LINK_ANY_STATE, false);
if(found && (rebootingDeviceInfo.state == X_LINK_UNBOOTED || rebootingDeviceInfo.state == X_LINK_BOOTLOADER)) {
std::vector<std::uint8_t> fwWithConfig = Resources::getInstance().getDeviceFirmware(config, firmwarePath);
deviceInfo = rebootingDeviceInfo;
if(deviceInfo.state == X_LINK_UNBOOTED) {
connection = std::make_shared<XLinkConnection>(deviceInfo, fwWithConfig);
} else {
XLinkDeviceState_t expectedBootState = X_LINK_BOOTED;
if(config.nonExclusiveMode) {
expectedBootState = X_LINK_BOOTED_NON_EXCLUSIVE;
}
{
DeviceBootloader bl(deviceInfo);
auto version = bl.getVersion();
bootloaderVersion = version;

if(version >= DeviceBootloader::Version(0, 0, 12)) {
bl.bootMemory(fwWithConfig);
deviceInfo.state = expectedBootState;
} else {
bl.bootUsbRomBootloader();
deviceInfo.state = X_LINK_UNBOOTED;
}
}
connection = std::make_shared<XLinkConnection>(deviceInfo, fwWithConfig, expectedBootState);
}
createRpc();
auto dump = getCrashDump();
std::vector<uint8_t> data;
utility::serialize<SerializationType::JSON>(dump, data);
auto crashDumpPathStr = utility::getEnv("DEPTHAI_CRASHDUMP");
auto path = saveFileToTemporaryDirectory(data, deviceInfo.getMxId() + "-depthai_crash_dump.json", crashDumpPathStr);
if(path.has_value()) {
pimpl->logger.warn("Device crashed. Crash dump saved to {}", path.value());
} else {
pimpl->logger.warn("Device crashed. Crash dump could not be saved");
}
gotDump = true;
break;
}
} while(!found && steady_clock::now() - t1 < std::chrono::seconds(7));
if(!gotDump) {
pimpl->logger.error("Device likely crashed but did not reboot in time to get the crash dump");
}
// Close rpcStream
connection->close();
pimpl->rpcStream = nullptr;
pimpl->rpcClient = nullptr;
}

asahtik marked this conversation as resolved.
Show resolved Hide resolved
pimpl->logger.debug("Device closed, {}", duration_cast<milliseconds>(steady_clock::now() - t1).count());
}

Expand Down Expand Up @@ -594,12 +662,43 @@ void DeviceBase::init(Config config, UsbSpeed maxUsbSpeed, const dai::Path& path
init2(cfg, pathToMvcmd, {});
}

void DeviceBase::createRpc() {
// prepare rpc for both attached and host controlled mode
pimpl->rpcStream = std::make_shared<XLinkStream>(connection, device::XLINK_CHANNEL_MAIN_RPC, device::XLINK_USB_BUFFER_MAX_SIZE);
auto rpcStream = pimpl->rpcStream;

pimpl->rpcClient = std::make_unique<nanorpc::core::client<nanorpc::packer::nlohmann_msgpack>>([this, rpcStream](nanorpc::core::type::buffer request) {
// Lock for time of the RPC call, to not mix the responses between calling threads.
// Note: might cause issues on Windows on incorrect shutdown. To be investigated
std::unique_lock<std::mutex> lock(pimpl->rpcMutex);

// Log the request data
if(getLogOutputLevel() == LogLevel::TRACE) {
pimpl->logger.trace("RPC: {}", nlohmann::json::from_msgpack(request).dump());
}

try {
// Send request to device
rpcStream->write(std::move(request));

// Receive response back
// Send to nanorpc to parse
return rpcStream->read();
} catch(const std::exception& e) {
// If any exception is thrown, log it and rethrow
pimpl->logger.debug("RPC error: {}", e.what());
throw std::system_error(std::make_error_code(std::errc::io_error), "Device already closed or disconnected");
}
});
}

void DeviceBase::init2(Config cfg, const dai::Path& pathToMvcmd, tl::optional<const Pipeline&> pipeline) {
// Initalize depthai library if not already
initialize();

// Specify cfg
config = cfg;
firmwarePath = pathToMvcmd;

// Apply nonExclusiveMode
config.board.nonExclusiveMode = config.nonExclusiveMode;
Expand Down Expand Up @@ -736,33 +835,7 @@ void DeviceBase::init2(Config cfg, const dai::Path& pathToMvcmd, tl::optional<co

deviceInfo.state = expectedBootState;

// prepare rpc for both attached and host controlled mode
pimpl->rpcStream = std::make_shared<XLinkStream>(connection, device::XLINK_CHANNEL_MAIN_RPC, device::XLINK_USB_BUFFER_MAX_SIZE);
auto rpcStream = pimpl->rpcStream;

pimpl->rpcClient = std::make_unique<nanorpc::core::client<nanorpc::packer::nlohmann_msgpack>>([this, rpcStream](nanorpc::core::type::buffer request) {
// Lock for time of the RPC call, to not mix the responses between calling threads.
// Note: might cause issues on Windows on incorrect shutdown. To be investigated
std::unique_lock<std::mutex> lock(pimpl->rpcMutex);

// Log the request data
if(getLogOutputLevel() == LogLevel::TRACE) {
pimpl->logger.trace("RPC: {}", nlohmann::json::from_msgpack(request).dump());
}

try {
// Send request to device
rpcStream->write(std::move(request));

// Receive response back
// Send to nanorpc to parse
return rpcStream->read();
} catch(const std::exception& e) {
// If any exception is thrown, log it and rethrow
pimpl->logger.debug("RPC error: {}", e.what());
throw std::system_error(std::make_error_code(std::errc::io_error), "Device already closed or disconnected");
}
});
createRpc();

// prepare watchdog thread, which will keep device alive
// separate stream so it doesn't miss between potentially long RPC calls
Expand Down
38 changes: 38 additions & 0 deletions src/utility/Files.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include <spdlog/spdlog.h>

#include <fstream>
#include <tl/optional.hpp>

#include "Platform.hpp"

namespace dai {

tl::optional<std::string> saveFileToTemporaryDirectory(std::vector<uint8_t> data, std::string filename, std::string fpath = "") {
if(fpath.empty()) {
fpath = platform::getTempPath();
}
std::string path = std::string(fpath);
if(path.back() != '/' && path.back() != '\\') {
path += '/';
}
path += filename;

std::ofstream file(path, std::ios::binary);
if(!file.is_open()) {
spdlog::error("Couldn't open file {} for writing", path);
return tl::nullopt;
}

file.write(reinterpret_cast<char*>(data.data()), data.size());
file.close();
if(!file.good()) {
spdlog::error("Couldn't write to file {}", path);
return tl::nullopt;
}
spdlog::debug("Saved file {} to {}", filename, path);
return std::string(path);
}

} // namespace dai
19 changes: 19 additions & 0 deletions src/utility/Platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,24 @@ std::string getIPv4AddressAsString(std::uint32_t binary) {
return {address};
}

std::string getTempPath() {
std::string tmpPath;
#if defined(_WIN32) || defined(__USE_W32_SOCKETS)
char tmpPathBuffer[MAX_PATH];
GetTempPathA(MAX_PATH, tmpPathBuffer);
tmpPath = tmpPathBuffer;
#else
char tmpTemplate[] = "/tmp/depthai_XXXXXX";
char* tmpName = mkdtemp(tmpTemplate);
if(tmpName == nullptr) {
tmpPath = "/tmp";
} else {
tmpPath = tmpName;
tmpPath += '/';
}
#endif
return tmpPath;
}

} // namespace platform
} // namespace dai
7 changes: 4 additions & 3 deletions src/utility/Platform.hpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#pragma once

#include <string>
#include <cstdint>
#include <string>

namespace dai {
namespace platform {

uint32_t getIPv4AddressAsBinary(std::string address);
std::string getIPv4AddressAsString(std::uint32_t binary);
std::string getTempPath();

}
}
} // namespace platform
} // namespace dai
Loading