diff --git a/GPU-MPC/backend/sigma.h b/GPU-MPC/backend/sigma.h index 8f62f5f6..f83e8eb4 100644 --- a/GPU-MPC/backend/sigma.h +++ b/GPU-MPC/backend/sigma.h @@ -1,8 +1,8 @@ // Author: Neha Jawalkar // Copyright: -// +// // Copyright (c) 2024 Microsoft Research -// +// // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -87,15 +87,17 @@ class SIGMA : public Backend double val = double(m + 128) * std::pow(2.0, k - 7); (*invSqrtTab)[i] = GroupElement(double(1LL << (2 * scale)) / sqrt(val / n_embed)); } - - auto filename = keyFile + "_" + std::to_string(party) + ".dat"; - keySize = std::filesystem::file_size(filename); - int fd = openForReading(filename); - printf("%s, %d\n", filename.data(), fd); - getAlignedBuf(&keyBuf, keySize); - readKey(fd, keySize, keyBuf, NULL); - - startPtr = keyBuf; + if (keyFile.compare("") != 0) + { + auto filename = keyFile + "_" + std::to_string(party) + ".dat"; + keySize = std::filesystem::file_size(filename); + int fd = openForReading(filename); + printf("%s, %d\n", filename.data(), fd); + getAlignedBuf(&keyBuf, keySize); + readKey(fd, keySize, keyBuf, NULL); + startPtr = keyBuf; + closeFile(fd); + } LlamaConfig::bitlength = bw; LlamaConfig::party = party + 2; @@ -309,10 +311,13 @@ class SIGMAKeygen : public Backend keyBuf += padding; keySize += padding; assert(keySize < keyBufSize); - std::ofstream f(keyFile + "_" + std::to_string(party) + ".dat"); - f.write((char *)startPtr, keySize); - f.close(); - cpuFree(startPtr); + if (keyFile.compare("") != 0) + { + std::ofstream f(keyFile + "_" + std::to_string(party) + ".dat"); + f.write((char *)startPtr, keySize); + f.close(); + cpuFree(startPtr); + } } void matmul(const Tensor2D &a, const Tensor2D &b, Tensor2D &c) @@ -334,7 +339,6 @@ class SIGMAKeygen : public Backend void silu(const Tensor &in, Tensor &out, u64 scale, u64 mode = 0) { out.d_data = gpuKeyGenGelu(&keyBuf, party, bw, bw - scale, (int)scale, in.size(), in.d_data, &g); - } void SIGMALayernormKeygen(const Tensor1D &A, const Tensor1D &B, const Tensor &x, Tensor &y, u64 scale, bool computeMu) diff --git a/GPU-MPC/experiments/sigma/config.json b/GPU-MPC/experiments/sigma/config.json index 190d5a61..4543b8e8 100644 --- a/GPU-MPC/experiments/sigma/config.json +++ b/GPU-MPC/experiments/sigma/config.json @@ -1,24 +1,12 @@ { "P0": { - "dealer": { - "gpu": 0, - "key_dir": "/tmp/" - }, - "evaluator": { - "gpu": 1, - "peer": "0.0.0.0", - "cpu_threads": 64 - } + "gpu": 0, + "peer": "0.0.0.0", + "cpu_threads": 64 }, "P1": { - "dealer": { - "gpu": 2, - "key_dir": "/tmp/" - }, - "evaluator": { - "gpu": 3, - "peer": "0.0.0.0", - "cpu_threads": 64 - } + "gpu": 2, + "peer": "0.0.0.0", + "cpu_threads": 64 } -} +} \ No newline at end of file diff --git a/GPU-MPC/experiments/sigma/run_experiment.py b/GPU-MPC/experiments/sigma/run_experiment.py index bcc93e90..40824b8f 100644 --- a/GPU-MPC/experiments/sigma/run_experiment.py +++ b/GPU-MPC/experiments/sigma/run_experiment.py @@ -30,7 +30,7 @@ import sys sys.path.insert(0, '../..') -from experiments.utils import run_seq, remove_key +from experiments.utils import run_one def get_time(line): return round(float(line.split('=')[-1].split(' ')[0]) / 10**6, 3) @@ -38,14 +38,11 @@ def get_time(line): def get_comm(line): return round(float(line.split('(')[-1].split(' ')[0]), 3) -def run_perf(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads): +def run_perf(party, gpu, peer_ip, cpu_threads): for model in ['bert-tiny', 'bert-base', 'bert-large', 'gpt2', 'gpt-neo', 'gpt-neo-large', 'llama7b', 'llama13b']: - dealer_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma {} 128 0 {} {}".format(dealer_gpu, model, party, dealer_key_dir) - eval_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma {} 128 1 {} {} {} {}".format(eval_gpu, model, party, dealer_key_dir, peer_ip, cpu_threads) - log_dir = "output/P{}/models/{}-128/logs/".format(party, model) - run_seq(dealer_cmd, eval_cmd, log_dir) - key_file = '{}_inference_key_{}.dat'.format(model, party) - remove_key(dealer_key_dir, key_file) + cmd = "CUDA_VISIBLE_DEVICES={} ./sigma {} 128 {} {} {}".format(gpu, model, party, peer_ip, cpu_threads) + log_dir = "output/P{}/models/{}-128/".format(party, model) + run_one(cmd, log_dir, "logs.txt") stats = dict({'dealer': dict(), 'evaluator': dict()}) for model in ['bert-tiny', 'bert-base', 'bert-large', 'gpt2', 'gpt-neo', 'gpt-neo-large', 'llama7b', 'llama13b']: @@ -130,15 +127,11 @@ def run_perf(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads): writer.writerow((X[i], online_time[i])) -def run_table8(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads): - +def run_table8(party, gpu, peer_ip, cpu_threads): for n_seq in [64, 128, 256, 512, 1024]: - dealer_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma gpt2 {} 0 {} {}".format(dealer_gpu, n_seq, party, dealer_key_dir) - eval_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma gpt2 {} 1 {} {} {} {}".format(eval_gpu, n_seq, party, dealer_key_dir, peer_ip, cpu_threads) - log_dir = 'output/P{}/models/gpt2-{}/logs/'.format(party, n_seq) - run_seq(dealer_cmd, eval_cmd, log_dir) - key_file = 'gpt2_inference_key_{}.dat'.format(party) - remove_key(dealer_key_dir, key_file) + cmd = "CUDA_VISIBLE_DEVICES={} ./sigma gpt2 {} {} {} {}".format(gpu, n_seq, party, peer_ip, cpu_threads) + log_dir = 'output/P{}/models/gpt2-{}/'.format(party, n_seq) + run_one(cmd, log_dir, "logs.txt") with open('output/P{}/Table8.json'.format(party), 'w') as outfile: table8 = dict() @@ -169,15 +162,13 @@ def main(): config = global_config['P0'] else: config = global_config['P1'] - dealer_config = config['dealer'] - eval_config = config['evaluator'] if args.all: - run_perf(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads']) - run_table8(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads']) + run_perf(args.party, config['gpu'], config['peer'], config['cpu_threads']) + run_table8(args.party, config['gpu'], config['peer'], config['cpu_threads']) elif args.perf: - run_perf(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads']) + run_perf(args.party, config['gpu'], config['peer'], config['cpu_threads']) elif args.n_seq: - run_table8(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads']) + run_table8(args.party, config['gpu'], config['peer'], config['cpu_threads']) if __name__ == '__main__': main(); diff --git a/GPU-MPC/experiments/sigma/sigma.cu b/GPU-MPC/experiments/sigma/sigma.cu index 9bf9e154..25c3d040 100644 --- a/GPU-MPC/experiments/sigma/sigma.cu +++ b/GPU-MPC/experiments/sigma/sigma.cu @@ -47,8 +47,6 @@ int main(int __argc, char **__argv) std::string model(__argv[1]); printf("Model=%s\n", model.data()); - std::string keyDir(__argv[4]); - auto keyFile = keyDir + model + "_inference_key"; u64 keyBufSz = 0; SytorchModule *net; Tensor input({n_seq, n_embd}); @@ -175,7 +173,7 @@ int main(int __argc, char **__argv) auto inferenceDir = outDir + model + "-" + std::to_string(n_seq) + "/"; makeDir(inferenceDir); - auto sigmaKeygen = new SIGMAKeygen(party, bw, scale, keyFile, keyBufSz); + auto sigmaKeygen = new SIGMAKeygen(party, bw, scale, "", keyBufSz); net->setBackend(sigmaKeygen); net->optimize(); auto start = std::chrono::high_resolution_clock::now(); @@ -194,8 +192,11 @@ int main(int __argc, char **__argv) statsFile << ss.rdbuf(); statsFile.close(); - std::string ip(__argv[5]); - auto sigma = new SIGMA(party, ip, keyFile, bw, scale, n_seq, n_embd, atoi(__argv[6])); + std::string ip(__argv[4]); + auto sigma = new SIGMA(party, ip, "", bw, scale, n_seq, n_embd, atoi(__argv[5])); + sigma->keyBuf = sigmaKeygen->startPtr; + sigma->startPtr = sigma->keyBuf; + sigma->keySize = sigmaKeygen->keySize; net->setBackend(sigma); sigma->peer->sync(); start = std::chrono::high_resolution_clock::now(); diff --git a/GPU-MPC/experiments/utils.py b/GPU-MPC/experiments/utils.py index f5e275d9..447df063 100644 --- a/GPU-MPC/experiments/utils.py +++ b/GPU-MPC/experiments/utils.py @@ -96,11 +96,11 @@ def run_seq(dealer_cmd, eval_cmd, log_dir): if evaluator.returncode: raise Exception("Evaluator did not run properly. Check logs for errors.") -def run_one(dealer_cmd, log_dir): +def run_one(dealer_cmd, log_dir, log_file="dealer.log"): dealer = None Path(log_dir).mkdir(parents=True, exist_ok=True) - dealer_log = log_dir + "dealer.log" + dealer_log = log_dir + log_file print('Running command={}'.format(dealer_cmd)) with open(dealer_log, 'a') as dealer_file: dealer = subprocess.run(dealer_cmd, shell=True, stdout=dealer_file, stderr=dealer_file, check=True)