From bcddcf8ceca00d14ef8316376980c07ccc5b2ec9 Mon Sep 17 00:00:00 2001 From: Pei-Hsuan Hung Date: Sat, 1 Feb 2020 21:34:06 +0800 Subject: [PATCH] Integrate dudect to analyze O(1) time complexity Dudect is a tool to deterimine whether a piece of code runs in constant time or not by given different inputs and measure the execution time. From the lab requirements, `q_insert_tail` and `q_size` are required to meet O(1) time complexity. To test if the function runs in constant time, simply open the option with `option simulation 1`, then run either `it` or `size`. --- Makefile | 10 +- console.c | 4 +- console.h | 4 + dudect/constant.c | 84 ++++++++++++++ dudect/constant.h | 42 +++++++ dudect/cpucycles.h | 12 ++ dudect/fixture.c | 195 +++++++++++++++++++++++++++++++++ dudect/fixture.h | 11 ++ dudect/ttest.c | 50 +++++++++ dudect/ttest.h | 15 +++ qtest.c | 29 +++++ random.c | 46 ++++++++ random.h | 9 ++ scripts/aspell-pws | 1 + scripts/driver.py | 99 +++++++++-------- traces/trace-17-complexity.cmd | 5 + 16 files changed, 568 insertions(+), 48 deletions(-) create mode 100644 dudect/constant.c create mode 100644 dudect/constant.h create mode 100644 dudect/cpucycles.h create mode 100644 dudect/fixture.c create mode 100644 dudect/fixture.h create mode 100644 dudect/ttest.c create mode 100644 dudect/ttest.h create mode 100644 random.c create mode 100644 random.h create mode 100644 traces/trace-17-complexity.cmd diff --git a/Makefile b/Makefile index d746ef298..0356a37a5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ CC = gcc -CFLAGS = -O1 -g -Wall -Werror +CFLAGS = -O1 -g -Wall -Werror -Idudect -I. GIT_HOOKS := .git/hooks/applied +DUT_DIR := dudect all: $(GIT_HOOKS) qtest # Control the build verbosity @@ -24,14 +25,16 @@ $(GIT_HOOKS): @scripts/install-git-hooks @echo -OBJS := qtest.o report.o console.o harness.o queue.o +OBJS := qtest.o report.o console.o harness.o queue.o \ + random.o dudect/constant.o dudect/fixture.o dudect/ttest.o deps := $(OBJS:%.o=.%.o.d) qtest: $(OBJS) $(VECHO) " LD\t$@\n" - $(Q)$(CC) $(LDFLAGS) -o $@ $^ + $(Q)$(CC) $(LDFLAGS) -o $@ $^ -lm %.o: %.c + @mkdir -p .$(DUT_DIR) $(VECHO) " CC\t$@\n" $(Q)$(CC) -o $@ $(CFLAGS) -c -MMD -MF .$@.d $< @@ -58,6 +61,7 @@ valgrind: valgrind_existence clean: rm -f $(OBJS) $(deps) *~ qtest /tmp/qtest.* + rm -rf .$(DUT_DIR) rm -rf *.dSYM (cd traces; rm -f *~) diff --git a/console.c b/console.c index 51c8ead12..c41ee4b56 100644 --- a/console.c +++ b/console.c @@ -17,6 +17,7 @@ #include "report.h" /* Some global values */ +int simulation = 0; static cmd_ptr cmd_list = NULL; static param_ptr param_list = NULL; static bool block_flag = false; @@ -54,7 +55,7 @@ static int fd_max = 0; /* Parameters */ static int err_limit = 5; static int err_cnt = 0; -static int echo = 0; +static bool echo = 0; static bool quit_flag = false; static char *prompt = "cmd> "; @@ -98,6 +99,7 @@ void init_cmd() add_cmd("log", do_log_cmd, " file | Copy output to file"); add_cmd("time", do_time_cmd, " cmd arg ... | Time command execution"); add_cmd("#", do_comment_cmd, " ... | Display comment"); + add_param("simulation", &simulation, "Start/Stop simulation mode", NULL); add_param("verbose", &verblevel, "Verbosity level", NULL); add_param("error", &err_limit, "Number of errors until exit", NULL); add_param("echo", &echo, "Do/don't echo commands", NULL); diff --git a/console.h b/console.h index b88b187f5..c42ee3435 100644 --- a/console.h +++ b/console.h @@ -1,8 +1,12 @@ #ifndef LAB0_CONSOLE_H #define LAB0_CONSOLE_H +#include /* Implementation of simple command-line interface */ +/* Simulation flag of console option */ +extern bool simulation; + /* Each command defined in terms of a function */ typedef bool (*cmd_function)(int argc, char *argv[]); diff --git a/dudect/constant.c b/dudect/constant.c new file mode 100644 index 000000000..202c45b99 --- /dev/null +++ b/dudect/constant.c @@ -0,0 +1,84 @@ +#include "constant.h" +#include +#include +#include +#include +#include +#include +#include +#include "cpucycles.h" +#include "queue.h" +#include "random.h" + +/* Allow random number range from 0 to 65535 */ +const size_t chunk_size = 16; +/* Number of measurements per test */ +const size_t number_measurements = 150; +const int drop_size = 20; +/* Maintain a queue independent from the qtest since + * we do not want the test to affect the original functionality + */ +static queue_t *q = NULL; +static char random_string[100][8]; +static int random_string_iter = 0; +enum { test_insert_tail, test_size }; + +/* Implement the necessary queue interface to simulation */ +void init_dut(void) +{ + q = NULL; +} + +char *get_random_string(void) +{ + random_string_iter = (random_string_iter + 1) % number_measurements; + return random_string[random_string_iter]; +} + +void prepare_inputs(uint8_t *input_data, uint8_t *classes) +{ + randombytes(input_data, number_measurements * chunk_size); + for (size_t i = 0; i < number_measurements; i++) { + classes[i] = randombit(); + if (classes[i] == 0) + *(uint16_t *) (input_data + i * chunk_size) = 0x00; + } + + for (size_t i = 0; i < 100; ++i) { + /* Generate random string */ + randombytes((uint8_t *) random_string[i], 7); + random_string[i][7] = 0; + } +} + +void measure(int64_t *before_ticks, + int64_t *after_ticks, + uint8_t *input_data, + int mode) +{ + assert(mode == test_insert_tail || mode == test_size); + if (mode == test_insert_tail) { + for (size_t i = drop_size; i < number_measurements - drop_size; i++) { + char *s = get_random_string(); + dut_new(); + dut_insert_head( + get_random_string(), + *(uint16_t *) (input_data + i * chunk_size) % 10000); + before_ticks[i] = cpucycles(); + dut_insert_tail(s, 1); + after_ticks[i] = cpucycles(); + dut_free(); + } + } else { + for (size_t i = drop_size; i < number_measurements - drop_size; i++) { + dut_new(); + dut_insert_head( + get_random_string(), + *(uint16_t *) (input_data + i * chunk_size) % 10000); + before_ticks[i] = cpucycles(); + dut_size(1); + after_ticks[i] = cpucycles(); + dut_free(); + } + } +} diff --git a/dudect/constant.h b/dudect/constant.h new file mode 100644 index 000000000..6f0d40c23 --- /dev/null +++ b/dudect/constant.h @@ -0,0 +1,42 @@ +#ifndef DUDECT_CONSTANT_H +#define DUDECT_CONSTANT_H + +#include +#define dut_new() \ + { \ + q = q_new(); \ + } + +#define dut_size(n) \ + do { \ + for (int i = 0; i < n; ++i) \ + q_size(q); \ + } while (0); + +#define dut_insert_head(s, n) \ + do { \ + int j = n; \ + while (j--) \ + q_insert_head(q, s); \ + } while (0); + +#define dut_insert_tail(s, n) \ + do { \ + int j = n; \ + while (j--) \ + q_insert_tail(q, s); \ + } while (0); + +#define dut_free() \ + { \ + q_free(q); \ + } + +void init_dut(); +void prepare_inputs(uint8_t *input_data, uint8_t *classes); +void measure(int64_t *before_ticks, + int64_t *after_ticks, + uint8_t *input_data, + int mode); + +#endif diff --git a/dudect/cpucycles.h b/dudect/cpucycles.h new file mode 100644 index 000000000..558859e6f --- /dev/null +++ b/dudect/cpucycles.h @@ -0,0 +1,12 @@ +#include +// http://www.intel.com/content/www/us/en/embedded/training/ia-32-ia-64-benchmark-code-execution-paper.html +inline int64_t cpucycles(void) +{ +#if defined(__i386__) || defined(__x86_64__) + unsigned int hi, lo; + __asm__ volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); + return ((int64_t) lo) | (((int64_t) hi) << 32); +#else +#error Unsupported Architecture +#endif +} diff --git a/dudect/fixture.c b/dudect/fixture.c new file mode 100644 index 000000000..1a9bd18f5 --- /dev/null +++ b/dudect/fixture.c @@ -0,0 +1,195 @@ +/** dude, is my code constant time? + * + * This file measures the execution time of a given function many times with + * different inputs and performs a Welch's t-test to determine if the function + * runs in constant time or not. This is essentially leakage detection, and + * not a timing attack. + * + * Notes: + * + * - the execution time distribution tends to be skewed towards large + * timings, leading to a fat right tail. Most executions take little time, + * some of them take a lot. We try to speed up the test process by + * throwing away those measurements with large cycle count. (For example, + * those measurements could correspond to the execution being interrupted + * by the OS.) Setting a threshold value for this is not obvious; we just + * keep the x% percent fastest timings, and repeat for several values of x. + * + * - the previous observation is highly heuristic. We also keep the uncropped + * measurement time and do a t-test on that. + * + * - we also test for unequal variances (second order test), but this is + * probably redundant since we're doing as well a t-test on cropped + * measurements (non-linear transform) + * + * - as long as any of the different test fails, the code will be deemed + * variable time. + * + */ + +#include "fixture.h" +#include +#include +#include +#include +#include +#include +#include "../console.h" +#include "../random.h" +#include "constant.h" +#include "ttest.h" + +#define enough_measurements 10000 +#define test_tries 10 + +extern const int drop_size; +extern const size_t chunk_size; +extern const size_t number_measurements; +static t_ctx *t; + +/* threshold values for Welch's t-test */ +#define t_threshold_bananas \ + 500 /* Test failed with overwhelming probability \ + */ +#define t_threshold_moderate 10 /* Test failed */ + +static void __attribute__((noreturn)) die(void) +{ + exit(111); +} + +static void differentiate(int64_t *exec_times, + int64_t *before_ticks, + int64_t *after_ticks) +{ + for (size_t i = 0; i < number_measurements; i++) { + exec_times[i] = after_ticks[i] - before_ticks[i]; + } +} + +static void update_statistics(int64_t *exec_times, uint8_t *classes) +{ + for (size_t i = 0; i < number_measurements; i++) { + int64_t difference = exec_times[i]; + /* Cpu cycle counter overflowed or dropped measurement */ + if (difference <= 0) { + continue; + } + /* do a t-test on the execution time */ + t_push(t, difference, classes[i]); + } +} + +static bool report(void) +{ + double max_t = fabs(t_compute(t)); + double number_traces_max_t = t->n[0] + t->n[1]; + double max_tau = max_t / sqrt(number_traces_max_t); + + printf("\033[A\033[2K"); + printf("meas: %7.2lf M, ", (number_traces_max_t / 1e6)); + if (number_traces_max_t < enough_measurements) { + printf("not enough measurements (%.0f still to go).\n", + enough_measurements - number_traces_max_t); + return false; + } + + /* + * max_t: the t statistic value + * max_tau: a t value normalized by sqrt(number of measurements). + * this way we can compare max_tau taken with different + * number of measurements. This is sort of "distance + * between distributions", independent of number of + * measurements. + * (5/tau)^2: how many measurements we would need to barely + * detect the leak, if present. "barely detect the + * leak" = have a t value greater than 5. + */ + printf("max t: %+7.2f, max tau: %.2e, (5/tau)^2: %.2e.\n", max_t, max_tau, + (double) (5 * 5) / (double) (max_tau * max_tau)); + + if (max_t > t_threshold_bananas) { + return false; + } else if (max_t > t_threshold_moderate) { + return false; + } else { /* max_t < t_threshold_moderate */ + return true; + } +} + +static bool doit(int mode) +{ + int64_t *before_ticks = calloc(number_measurements + 1, sizeof(int64_t)); + int64_t *after_ticks = calloc(number_measurements + 1, sizeof(int64_t)); + int64_t *exec_times = calloc(number_measurements, sizeof(int64_t)); + uint8_t *classes = calloc(number_measurements, sizeof(uint8_t)); + uint8_t *input_data = + calloc(number_measurements * chunk_size, sizeof(uint8_t)); + + if (!before_ticks || !after_ticks || !exec_times || !classes || + !input_data) { + die(); + } + + prepare_inputs(input_data, classes); + + measure(before_ticks, after_ticks, input_data, mode); + differentiate(exec_times, before_ticks, after_ticks); + update_statistics(exec_times, classes); + bool ret = report(); + + free(before_ticks); + free(after_ticks); + free(exec_times); + free(classes); + free(input_data); + + return ret; +} + +static void init_once(void) +{ + init_dut(); + t_init(t); +} + +bool is_insert_tail_const(void) +{ + bool result = false; + t = malloc(sizeof(t_ctx)); + + for (int cnt = 0; cnt < test_tries; ++cnt) { + printf("Testing insert_tail...(%d/%d)\n\n", cnt, test_tries); + init_once(); + for (int i = 0; + i < + enough_measurements / (number_measurements - drop_size * 2) + 1; + ++i) + result = doit(0); + printf("\033[A\033[2K\033[A\033[2K"); + if (result == true) + break; + } + free(t); + return result; +} + +bool is_size_const(void) +{ + bool result = false; + t = malloc(sizeof(t_ctx)); + for (int cnt = 0; cnt < test_tries; ++cnt) { + printf("Testing size...(%d/%d)\n\n", cnt, test_tries); + init_once(); + for (int i = 0; + i < + enough_measurements / (number_measurements - drop_size * 2) + 1; + ++i) + result = doit(1); + printf("\033[A\033[2K\033[A\033[2K"); + if (result == true) + break; + } + free(t); + return result; +} diff --git a/dudect/fixture.h b/dudect/fixture.h new file mode 100644 index 000000000..296a6897c --- /dev/null +++ b/dudect/fixture.h @@ -0,0 +1,11 @@ +#ifndef DUDECT_FIXTURE_H +#define DUDECT_FIXTURE_H + +#include +#include "constant.h" + +/* Interface to test if function is constant */ +bool is_insert_tail_const(void); +bool is_size_const(void); + +#endif diff --git a/dudect/ttest.c b/dudect/ttest.c new file mode 100644 index 000000000..fdb877f1b --- /dev/null +++ b/dudect/ttest.c @@ -0,0 +1,50 @@ +/** + * Online Welch's t-test. + * + * Tests whether two populations have same mean. + * This is basically Student's t-test for unequal + * variances and unequal sample sizes. + * + * see https://en.wikipedia.org/wiki/Welch%27s_t-test + * + */ + +#include "ttest.h" +#include +#include +#include +#include +#include + +void t_push(t_ctx *ctx, double x, uint8_t class) +{ + assert(class == 0 || class == 1); + ctx->n[class]++; + /* Welford method for computing online variance + * in a numerically stable way. + */ + double delta = x - ctx->mean[class]; + ctx->mean[class] = ctx->mean[class] + delta / ctx->n[class]; + ctx->m2[class] = ctx->m2[class] + delta * (x - ctx->mean[class]); +} + +double t_compute(t_ctx *ctx) +{ + double var[2] = {0.0, 0.0}; + var[0] = ctx->m2[0] / (ctx->n[0] - 1); + var[1] = ctx->m2[1] / (ctx->n[1] - 1); + double num = (ctx->mean[0] - ctx->mean[1]); + double den = sqrt(var[0] / ctx->n[0] + var[1] / ctx->n[1]); + double t_value = num / den; + return t_value; +} + +void t_init(t_ctx *ctx) +{ + for (int class = 0; class < 2; class ++) { + ctx->mean[class] = 0.0; + ctx->m2[class] = 0.0; + ctx->n[class] = 0.0; + } + return; +} diff --git a/dudect/ttest.h b/dudect/ttest.h new file mode 100644 index 000000000..4553650c1 --- /dev/null +++ b/dudect/ttest.h @@ -0,0 +1,15 @@ +#ifndef DUDECT_TTEST_H +#define DUDECT_TTEST_H + +#include +typedef struct { + double mean[2]; + double m2[2]; + double n[2]; +} t_ctx; + +void t_push(t_ctx *ctx, double x, uint8_t class); +double t_compute(t_ctx *ctx); +void t_init(t_ctx *ctx); + +#endif diff --git a/qtest.c b/qtest.c index 0f4cc15b0..e2a225d30 100644 --- a/qtest.c +++ b/qtest.c @@ -11,6 +11,7 @@ #include #include #include +#include "dudect/fixture.h" /* Our program needs to use regular malloc/free */ #define INTERNAL 1 @@ -250,6 +251,20 @@ static bool do_insert_head(int argc, char *argv[]) static bool do_insert_tail(int argc, char *argv[]) { + if (simulation) { + if (argc != 1) { + report(1, "%s does not need arguments in simulation mode", argv[0]); + return false; + } + bool ok = is_insert_tail_const(); + if (!ok) { + report(1, "ERROR: Probably not constant time"); + return false; + } + report(1, "Probably constant time"); + return ok; + } + char randstr_buf[MAX_RANDSTR_LEN]; int reps = 1; bool ok = true, need_rand = false; @@ -455,6 +470,20 @@ static bool do_reverse(int argc, char *argv[]) static bool do_size(int argc, char *argv[]) { + if (simulation) { + if (argc != 1) { + report(1, "%s does not need arguments in simulation mode", argv[0]); + return false; + } + bool ok = is_size_const(); + if (!ok) { + report(1, "ERROR: Probably not constant time"); + return false; + } + report(1, "Probably constant time"); + return ok; + } + if (argc != 1 && argc != 2) { report(1, "%s takes 0-1 arguments", argv[0]); return false; diff --git a/random.c b/random.c new file mode 100644 index 000000000..cd57066d5 --- /dev/null +++ b/random.c @@ -0,0 +1,46 @@ +#include "random.h" +#include +#include +#include +#include + +/* shameless stolen from ebacs */ +void randombytes(uint8_t *x, size_t how_much) +{ + ssize_t i; + static int fd = -1; + + ssize_t xlen = (ssize_t) how_much; + assert(xlen >= 0); + if (fd == -1) { + for (;;) { + fd = open("/dev/urandom", O_RDONLY); + if (fd != -1) + break; + sleep(1); + } + } + + while (xlen > 0) { + if (xlen < 1048576) + i = xlen; + else + i = 1048576; + + i = read(fd, x, (size_t) i); + if (i < 1) { + sleep(1); + continue; + } + + x += i; + xlen -= i; + } +} + +uint8_t randombit(void) +{ + uint8_t ret = 0; + randombytes(&ret, 1); + return (ret & 1); +} diff --git a/random.h b/random.h new file mode 100644 index 000000000..d1aae5189 --- /dev/null +++ b/random.h @@ -0,0 +1,9 @@ +#ifndef LAB0_RANDOM_H +#define LAB0_RANDOM_H + +#include +#include +void randombytes(uint8_t *x, size_t xlen); +uint8_t randombit(void); + +#endif diff --git a/scripts/aspell-pws b/scripts/aspell-pws index 698b1ae89..db3e4800c 100644 --- a/scripts/aspell-pws +++ b/scripts/aspell-pws @@ -1,4 +1,5 @@ personal_ws-1.1 en 500 +dudect runtime todo fixme diff --git a/scripts/driver.py b/scripts/driver.py index 61cc718a3..652ed1a67 100755 --- a/scripts/driver.py +++ b/scripts/driver.py @@ -4,9 +4,10 @@ import sys import getopt + # Driver program for C programming exercise class Tracer: - + traceDirectory = "./traces" qtest = "./qtest" command = qtest @@ -15,47 +16,52 @@ class Tracer: useValgrind = False traceDict = { - 1 : "trace-01-ops", - 2 : "trace-02-ops", - 3 : "trace-03-ops", - 4 : "trace-04-ops", - 5 : "trace-05-ops", - 6 : "trace-06-string", - 7 : "trace-07-robust", - 8 : "trace-08-robust", - 9 : "trace-09-robust", - 10 : "trace-10-malloc", - 11 : "trace-11-malloc", - 12 : "trace-12-malloc", - 13 : "trace-13-perf", - 14 : "trace-14-perf", - 15 : "trace-15-perf", - 16 : "trace-16-perf" - } + 1: "trace-01-ops", + 2: "trace-02-ops", + 3: "trace-03-ops", + 4: "trace-04-ops", + 5: "trace-05-ops", + 6: "trace-06-string", + 7: "trace-07-robust", + 8: "trace-08-robust", + 9: "trace-09-robust", + 10: "trace-10-malloc", + 11: "trace-11-malloc", + 12: "trace-12-malloc", + 13: "trace-13-perf", + 14: "trace-14-perf", + 15: "trace-15-perf", + 16: "trace-16-perf", + 17: "trace-17-complexity" + } traceProbs = { - 1 : "Trace-01", - 2 : "Trace-02", - 3 : "Trace-03", - 4 : "Trace-04", - 5 : "Trace-05", - 6 : "Trace-06", - 7 : "Trace-07", - 8 : "Trace-08", - 9 : "Trace-09", - 10 : "Trace-10", - 11 : "Trace-11", - 12 : "Trace-12", - 13 : "Trace-13", - 14 : "Trace-14", - 15 : "Trace-15", - 16 : "Trace-16" - } - - - maxScores = [0, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7] - - def __init__(self, qtest = "", verbLevel = 0, autograde = False, useValgrind = False): + 1: "Trace-01", + 2: "Trace-02", + 3: "Trace-03", + 4: "Trace-04", + 5: "Trace-05", + 6: "Trace-06", + 7: "Trace-07", + 8: "Trace-08", + 9: "Trace-09", + 10: "Trace-10", + 11: "Trace-11", + 12: "Trace-12", + 13: "Trace-13", + 14: "Trace-14", + 15: "Trace-15", + 16: "Trace-16", + 17: "Trace-17" + } + + maxScores = [0, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0] + + def __init__(self, + qtest="", + verbLevel=0, + autograde=False, + useValgrind=False): if qtest != "": self.qtest = qtest self.verbLevel = verbLevel @@ -76,8 +82,8 @@ def runTrace(self, tid): return False return retcode == 0 - def run(self, tid = 0): - scoreDict = { k : 0 for k in self.traceDict.keys() } + def run(self, tid=0): + scoreDict = {k: 0 for k in self.traceDict.keys()} print("---\tTrace\t\tPoints") if tid == 0: tidList = self.traceDict.keys() @@ -117,6 +123,7 @@ def run(self, tid = 0): jstring += '}}' print(jstring) + def usage(name): print("Usage: %s [-h] [-p PROG] [-t TID] [-v VLEVEL] [--valgrind]" % name) print(" -h Print this message") @@ -125,6 +132,7 @@ def usage(name): print(" -v VLEVEL Set verbosity level (0-3)") sys.exit(0) + def run(name, args): prog = "" tid = 0 @@ -132,7 +140,6 @@ def run(name, args): levelFixed = False autograde = False useValgrind = False - optlist, args = getopt.getopt(args, 'hp:t:v:A', ['valgrind']) for (opt, val) in optlist: @@ -154,8 +161,12 @@ def run(name, args): usage(name) if not levelFixed and autograde: vlevel = 0 - t = Tracer(qtest = prog, verbLevel = vlevel, autograde = autograde, useValgrind = useValgrind) + t = Tracer(qtest=prog, + verbLevel=vlevel, + autograde=autograde, + useValgrind=useValgrind) t.run(tid) + if __name__ == "__main__": run(sys.argv[0], sys.argv[1:]) diff --git a/traces/trace-17-complexity.cmd b/traces/trace-17-complexity.cmd new file mode 100644 index 000000000..ee02b5cca --- /dev/null +++ b/traces/trace-17-complexity.cmd @@ -0,0 +1,5 @@ +# Test if q_insert_tail and q_size is constant time complexity +option simulation 1 +it +size +option simulation 0