diff --git a/Makefile b/Makefile index 9007482d..54f498ff 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,9 @@ clean: $(MAKE) -C cpu clean @echo -e "\033[31m----> Cleaning up test kernels\033[0m" $(MAKE) -C tests clean + @echo -e "\033[31m----> Removing bin...\033[0m" + rm -rf bin + @echo -e "\033[31m All done!\033[0m" cuda-gdb: @echo -e "\033[36m----> Building submodules\033[0m" @@ -43,8 +46,9 @@ bin: mkdir bin bin/tests: bin tests +ifneq (, $(test -f "bin/tests")) ln -s ../tests/bin bin/tests - +endif bin/cricket-client.so: bin $(MAKE) -C cpu cricket-client.so cp cpu/cricket-client.so bin diff --git a/cpu/Makefile b/cpu/Makefile index a2d38223..d6f74438 100644 --- a/cpu/Makefile +++ b/cpu/Makefile @@ -32,16 +32,17 @@ SRC_SERVER = $(RPC_XDR) \ cpu-server-driver-hidden.c \ log.c \ cpu-libwrap.c \ - cpu-server-cusolver.c \ - cpu-server-cublas.c \ + cpu-server-cusolver.c \ + cpu-server-cublas.c \ list.c \ - api-recorder.c \ - resource-mg.c \ - cr.c \ - gsched_none.c \ - oob.c \ - mt-memcpy.c \ - cpu-elf2.c + api-recorder.c \ + resource-mg.c \ + cr.c \ + gsched_none.c \ + oob.c \ + mt-memcpy.c \ + cpu-elf2.c \ + gpu/ckp-kernel.c SRC_SERVER_LIB = server-library.c SRC_SERVER_EXE = server-exe.c @@ -80,7 +81,7 @@ LIB_FLAGS += -L$(CUDA_SRC)/lib64 CC_FLAGS += -std=gnu99 $(INC_FLAGS) -O2 # TODO: use extern in header files instead of direct definition e.g. in cpu-common.h to remove -fcommon flag CC_FLAGS += -fcommon -LD_FLAGS = $(LIB_FLAGS) -ltirpc -ldl -lcrypto -lelf +LD_FLAGS = $(LIB_FLAGS) -ltirpc -ldl -lcrypto -lelf ifdef WITH_DEBUG # use ASAN_OPTIONS=protect_shadow_gap=0 LSAN_OPTIONS=fast_unwind_on_malloc=0 when running diff --git a/cpu/cpu-server.c b/cpu/cpu-server.c index e5182324..ea10c4a8 100644 --- a/cpu/cpu-server.c +++ b/cpu/cpu-server.c @@ -18,6 +18,7 @@ #include "cpu-server-driver.h" #include "rpc/xdr.h" #include "cr.h" +#include "gpu/ckp-kernel.h" #include "cpu-elf2.h" #ifdef WITH_IB #include "cpu-ib.h" @@ -90,6 +91,11 @@ int cricket_server_checkpoint(int dump_memory) goto error; } + if ((ret = gpu_checkpoint(/*TODO*/)) != 0) { + LOGE(LOG_ERROR, "gpu_checkpoint returned %d", ret); + goto error; + } + LOG(LOG_INFO, "checkpoint successfully created."); return 0; error: diff --git a/cpu/gpu/ckp-kernel.c b/cpu/gpu/ckp-kernel.c new file mode 100644 index 00000000..45a3c045 --- /dev/null +++ b/cpu/gpu/ckp-kernel.c @@ -0,0 +1,7 @@ +#include "ckp-kernel.h" +#include + +int gpu_checkpoint(void) { + printf("TESTING...\n"); + return 0; +} diff --git a/cpu/gpu/ckp-kernel.h b/cpu/gpu/ckp-kernel.h new file mode 100644 index 00000000..c535216c --- /dev/null +++ b/cpu/gpu/ckp-kernel.h @@ -0,0 +1,6 @@ +#ifndef __CKP_KERNEL_H__ +#define __CKP_KERNEL_H__ + +int gpu_checkpoint(void); + +#endif //!__CKP_KERNEL_H__ diff --git a/gpu/Makefile b/gpu/Makefile index a144539a..09a82d1e 100644 --- a/gpu/Makefile +++ b/gpu/Makefile @@ -20,7 +20,7 @@ INC_DIRS := -I$(CUDA_GDB_PATH)/bfd \ LIB_DIR := ../submodules/lib BUILD_DIR := build -DLIBS = -lncurses -lpthread -lm -lz -ldl -lexpat -llzma -Wl,--dynamic-list=utils/proc-service.list +DLIBS = -lncurses -lpthread -lm -lz -ldl -lexpat -llzma -lmpfr -Wl,--dynamic-list=utils/proc-service.list # Order of .a files is important! SLIBS = libgdb.a libbfd.a libiberty.a libreadline.a libdecnumber.a libcudacore.a libopcodes.a libgnu.a SLIBS:= $(addprefix $(LIB_DIR)/, $(SLIBS)) diff --git a/gpu/src/cricket-cr.c b/gpu/src/cricket-cr.c index fcbfd0d2..10e634b7 100644 --- a/gpu/src/cricket-cr.c +++ b/gpu/src/cricket-cr.c @@ -1113,6 +1113,7 @@ bool cricket_cr_rst_params(CUDBGAPI cudbgAPI, const char *ckp_dir, cudbgGetErrorString(res)); goto cleanup; } + /* for (int i = 0; i != elf_info->param_num; ++i) { if (elf_info->params[i].size != 8) continue; @@ -1150,6 +1151,7 @@ bool cricket_cr_rst_params(CUDBGAPI cudbgAPI, const char *ckp_dir, free(param_data); param_data = NULL; } + */ ret = true; cleanup: free(param_mem); @@ -1170,6 +1172,8 @@ bool cricket_cr_ckp_params(CUDBGAPI cudbgAPI, const char *ckp_dir, /* Parameters are the same for all warps so just use warp 0 * TODO: use first valid warp, because warp 0 may not be in use (is that * possible?) + * + * This seems to cause issues right now. Needs a solution. */ if ((param_mem = (uint8_t*)malloc(elf_info->param_size)) == NULL) return false; @@ -1482,6 +1486,7 @@ bool cricket_cr_ckp_globals(CUDBGAPI cudbgAPI, const char *ckp_dir) if (res != CUDBG_SUCCESS) { LOGE(LOG_ERROR, "cuda error: %s", cudbgGetErrorString(res)); + LOGE(LOG_DEBUG, "encountered in iteration %d of %d\n", i, globals_num); goto cleanup; } offset += globals[i].size; diff --git a/gpu/src/log.c b/gpu/src/log.c index e104890e..81ab6dc5 100644 --- a/gpu/src/log.c +++ b/gpu/src/log.c @@ -59,7 +59,12 @@ void now_time(char* buf) const char* to_string(log_level level) { +#ifdef NOCOLORS static const char* const buffer[] = {"ERROR", "WARNING", "INFO", "DEBUG"}; +#else + static const char* const buffer[] = {"\033[1m\033[31mERROR\033[0m", "\033[33mWARNING\033[0m", "\033[34mINFO\033[0m", "\033[32mDEBUG\033[0m"}; +#endif //NOCOLORS + if(level > LOG_DEBUG){ return buffer[LOG_DEBUG]; } @@ -90,5 +95,10 @@ void loggfe(log_level level, int line, const char* file, const char* formatstr, char stripped[64]; strcpy(stripped, file); str_strip(stripped, get_log_data()->project_offset); - printf("\tin %s(%d)\n", stripped, line); +#ifdef NOCOLORS + printf("\tin %s:%d\n", stripped, line); +#else + printf("\tin \033[4m%s:%d\033[0m\n", stripped, line); +#endif //NOCOLORS + } diff --git a/gpu/src/main.c b/gpu/src/main.c index 7facb092..1fbe97e8 100644 --- a/gpu/src/main.c +++ b/gpu/src/main.c @@ -31,7 +31,7 @@ #include "gdb.h" #ifndef LOG_LEVEL -#define LOG_LEVEL LOG_INFO +#define LOG_LEVEL LOG_DEBUG #endif #define CRICKET_PROFILE 1 @@ -124,7 +124,7 @@ bool cricket_all_warps_broken(CUDBGAPI cudbgAPI, CricketDeviceProp *dev_prop) int cricket_analyze(int argc, char *argv[]) { if (argc != 3) { - LOG(LOG_ERROR, "wrong number of arguments, use: %s ", argv[0]); + LOG(LOG_ERROR, "wrong number of arguments, use: %s analyze ", argv[0]); return -1; } LOG(LOG_INFO, "Analyzing \"%s\"", argv[2]); @@ -155,7 +155,7 @@ int cricket_restore(int argc, char *argv[]) double bt, ct, dt, et, ft, gt, comt; #endif if (argc != 3) { - LOG(LOG_ERROR, "wrong number of arguments, use: %s ", argv[0]); + LOG(LOG_ERROR, "wrong number of arguments, use: %s restore ", argv[0]); return -1; } @@ -827,14 +827,15 @@ int cricket_checkpoint(int argc, char *argv[]) #endif if (argc != 3) { - printf("wrong number of arguments, use: %s \n", argv[0]); + printf("wrong number of arguments, use: %s checkpoint \n", argv[0]); return -1; } + printf("Initializing GDB!\n\n"); gdb_init(argc, argv, NULL, argv[2]); /* attach to process (both CPU and GPU) */ - // printf("attaching...\n"); + printf("attaching...\n"); // attach_command(argv[2], !batch_flag); if (cuda_api_get_state() != CUDA_API_STATE_INITIALIZED) { @@ -849,6 +850,9 @@ int cricket_checkpoint(int argc, char *argv[]) #ifdef CRICKET_PROFILE gettimeofday(&b, NULL); #endif + + printf("attached!\n\n"); + printf("trying to get CUDA debugger API\n"); /* get CUDA debugger API */ res = cudbgGetAPI(CUDBG_API_VERSION_MAJOR, CUDBG_API_VERSION_MINOR, @@ -858,7 +862,7 @@ int cricket_checkpoint(int argc, char *argv[]) goto cuda_error; } printf("got API\n"); - + printf("enumerating devices...\n"); if (!cricket_device_get_num(cudbgAPI, &numDev)) { printf("error getting device num\n"); @@ -1042,11 +1046,14 @@ int cricket_checkpoint(int argc, char *argv[]) //cricket_focus_kernel(!batch_flag); + /// TODO: There is a loop to determine the first warp, however + /// cricket_cr_ckp_params still causes errors over invalid warps... if (!cricket_cr_ckp_params(cudbgAPI, ckp_dir, &elf_info, 0, 0, first_warp)) { printf("cricket_cr_ckp_params unsuccessful\n"); } + /// TODO: work out globals if (!cricket_cr_ckp_globals(cudbgAPI, ckp_dir)) { printf("cricket_cr_ckp_globals unsuccessful\n"); } @@ -1092,6 +1099,8 @@ int cricket_checkpoint(int argc, char *argv[]) int cricket_start(int argc, char *argv[]) { + char* cricket_path; + char cmd_str[1024]; struct cmd_list_element *alias = NULL; struct cmd_list_element *prefix_cmd = NULL; struct cmd_list_element *cmd = NULL; @@ -1101,12 +1110,19 @@ int cricket_start(int argc, char *argv[]) return -1; } + cricket_path = getenv("CRICKET_PATH"); + if (cricket_path == NULL) { + LOG(LOG_DEBUG, "no cricket path specified. assuming /usr/local/cricket\n"); + cricket_path = "/usr/local/cricket"; + } + gdb_init(argc, argv, argv[2], NULL); /* load files */ //exec_file_attach(argv[2], !batch_flag); // - execute_command("set exec-wrapper env 'LD_PRELOAD=/home/eiling/projects/cricket/bin/libtirpc.so.3:/home/eiling/projects/cricket/cpu/cricket-server.so'", !batch_flag); + snprintf(cmd_str, 1024, "set exec-wrapper env 'LD_PRELOAD=%s/bin/libtirpc.so.3:%s/cpu/cricket-server.so'", cricket_path, cricket_path); + execute_command(cmd_str, !batch_flag); //execute_command("break main", !batch_flag); execute_command("starti", !batch_flag); //execute_command("unset exec-wrapper", !batch_flag); diff --git a/tests/Makefile b/tests/Makefile index 8adc5da7..be383ad0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -26,7 +26,9 @@ bin: cpu samples test_apps cp test_apps/*.testapp bin cp samples/matrixMul/matrixMul bin cp samples/bandwidthTest/bandwidthTest bin +ifneq (,$(test -f "samples/nbody/nbody")) cp samples/nbody/nbody bin +endif clean: @echo -e "\033[31m----> Cleaning up tests/test_apps\033[0m" diff --git a/tests/cpu/Makefile b/tests/cpu/Makefile new file mode 100644 index 00000000..ab492786 --- /dev/null +++ b/tests/cpu/Makefile @@ -0,0 +1,7 @@ +.PHONY: all clean + +all: + +clean: + $(MAKE) -C cubin clean + $(MAKE) -C unit clean diff --git a/tests/gpu/checkpoint.sh b/tests/gpu/checkpoint.sh index 2624b66b..5b75f81b 100755 --- a/tests/gpu/checkpoint.sh +++ b/tests/gpu/checkpoint.sh @@ -18,7 +18,7 @@ echo "using $CUDA_APP" CUDA_APP_NAME="$(basename -- $CUDA_APP)" CRICKET_CLIENT=${CRICKET_PATH}/cpu/cricket-client.so CRICKET_SERVER=${CRICKET_PATH}/cpu/cricket-server.so -CRIU=/home/eiling/tmp/criu/criu/criu +CRIU=${HOME}/tmp/criu/criu/criu export REMOTE_GPU_ADDRESS=localhost export CUDA_VISIBLE_DEVICES=0 diff --git a/tests/gpu/restore.sh b/tests/gpu/restore.sh index c34027fc..52764a12 100755 --- a/tests/gpu/restore.sh +++ b/tests/gpu/restore.sh @@ -15,7 +15,7 @@ echo "using $CUDA_APP" CUDA_APP_NAME="$(basename -- $CUDA_APP)" CRICKET_CLIENT=${CRICKET_PATH}/cpu/cricket-client.so CRICKET_SERVER=${CRICKET_PATH}/cpu/cricket-server.so -CRIU=/home/eiling/tmp/criu/criu/criu +CRIU=${HOME}/tmp/criu/criu/criu export REMOTE_GPU_ADDRESS=localhost export CUDA_VISIBLE_DEVICES=0