Skip to content

Commit

Permalink
Updated run_experiment.py and sigma.cu
Browse files Browse the repository at this point in the history
  • Loading branch information
Neha J committed May 29, 2024
1 parent a3fa0e1 commit 78cd153
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 64 deletions.
36 changes: 20 additions & 16 deletions GPU-MPC/backend/sigma.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Author: Neha Jawalkar
// Copyright:
//
//
// Copyright (c) 2024 Microsoft Research
//
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
Expand Down Expand Up @@ -87,15 +87,17 @@ class SIGMA : public Backend<T>
double val = double(m + 128) * std::pow(2.0, k - 7);
(*invSqrtTab)[i] = GroupElement(double(1LL << (2 * scale)) / sqrt(val / n_embed));
}

auto filename = keyFile + "_" + std::to_string(party) + ".dat";
keySize = std::filesystem::file_size(filename);
int fd = openForReading(filename);
printf("%s, %d\n", filename.data(), fd);
getAlignedBuf(&keyBuf, keySize);
readKey(fd, keySize, keyBuf, NULL);

startPtr = keyBuf;
if (keyFile.compare("") != 0)
{
auto filename = keyFile + "_" + std::to_string(party) + ".dat";
keySize = std::filesystem::file_size(filename);
int fd = openForReading(filename);
printf("%s, %d\n", filename.data(), fd);
getAlignedBuf(&keyBuf, keySize);
readKey(fd, keySize, keyBuf, NULL);
startPtr = keyBuf;
closeFile(fd);
}

LlamaConfig::bitlength = bw;
LlamaConfig::party = party + 2;
Expand Down Expand Up @@ -309,10 +311,13 @@ class SIGMAKeygen : public Backend<T>
keyBuf += padding;
keySize += padding;
assert(keySize < keyBufSize);
std::ofstream f(keyFile + "_" + std::to_string(party) + ".dat");
f.write((char *)startPtr, keySize);
f.close();
cpuFree(startPtr);
if (keyFile.compare("") != 0)
{
std::ofstream f(keyFile + "_" + std::to_string(party) + ".dat");
f.write((char *)startPtr, keySize);
f.close();
cpuFree(startPtr);
}
}

void matmul(const Tensor2D<T> &a, const Tensor2D<T> &b, Tensor2D<T> &c)
Expand All @@ -334,7 +339,6 @@ class SIGMAKeygen : public Backend<T>
void silu(const Tensor<T> &in, Tensor<T> &out, u64 scale, u64 mode = 0)
{
out.d_data = gpuKeyGenGelu<T, u16, 10>(&keyBuf, party, bw, bw - scale, (int)scale, in.size(), in.d_data, &g);

}

void SIGMALayernormKeygen(const Tensor1D<T> &A, const Tensor1D<T> &B, const Tensor<T> &x, Tensor<T> &y, u64 scale, bool computeMu)
Expand Down
26 changes: 7 additions & 19 deletions GPU-MPC/experiments/sigma/config.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
{
"P0": {
"dealer": {
"gpu": 0,
"key_dir": "/tmp/"
},
"evaluator": {
"gpu": 1,
"peer": "0.0.0.0",
"cpu_threads": 64
}
"gpu": 0,
"peer": "0.0.0.0",
"cpu_threads": 64
},
"P1": {
"dealer": {
"gpu": 2,
"key_dir": "/tmp/"
},
"evaluator": {
"gpu": 3,
"peer": "0.0.0.0",
"cpu_threads": 64
}
"gpu": 2,
"peer": "0.0.0.0",
"cpu_threads": 64
}
}
}
35 changes: 13 additions & 22 deletions GPU-MPC/experiments/sigma/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,19 @@

import sys
sys.path.insert(0, '../..')
from experiments.utils import run_seq, remove_key
from experiments.utils import run_one

def get_time(line):
return round(float(line.split('=')[-1].split(' ')[0]) / 10**6, 3)

def get_comm(line):
return round(float(line.split('(')[-1].split(' ')[0]), 3)

def run_perf(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads):
def run_perf(party, gpu, peer_ip, cpu_threads):
for model in ['bert-tiny', 'bert-base', 'bert-large', 'gpt2', 'gpt-neo', 'gpt-neo-large', 'llama7b', 'llama13b']:
dealer_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma {} 128 0 {} {}".format(dealer_gpu, model, party, dealer_key_dir)
eval_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma {} 128 1 {} {} {} {}".format(eval_gpu, model, party, dealer_key_dir, peer_ip, cpu_threads)
log_dir = "output/P{}/models/{}-128/logs/".format(party, model)
run_seq(dealer_cmd, eval_cmd, log_dir)
key_file = '{}_inference_key_{}.dat'.format(model, party)
remove_key(dealer_key_dir, key_file)
cmd = "CUDA_VISIBLE_DEVICES={} ./sigma {} 128 {} {} {}".format(gpu, model, party, peer_ip, cpu_threads)
log_dir = "output/P{}/models/{}-128/".format(party, model)
run_one(cmd, log_dir, "logs.txt")

stats = dict({'dealer': dict(), 'evaluator': dict()})
for model in ['bert-tiny', 'bert-base', 'bert-large', 'gpt2', 'gpt-neo', 'gpt-neo-large', 'llama7b', 'llama13b']:
Expand Down Expand Up @@ -130,15 +127,11 @@ def run_perf(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads):
writer.writerow((X[i], online_time[i]))


def run_table8(party, dealer_gpu, eval_gpu, dealer_key_dir, peer_ip, cpu_threads):

def run_table8(party, gpu, peer_ip, cpu_threads):
for n_seq in [64, 128, 256, 512, 1024]:
dealer_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma gpt2 {} 0 {} {}".format(dealer_gpu, n_seq, party, dealer_key_dir)
eval_cmd = "CUDA_VISIBLE_DEVICES={} ./sigma gpt2 {} 1 {} {} {} {}".format(eval_gpu, n_seq, party, dealer_key_dir, peer_ip, cpu_threads)
log_dir = 'output/P{}/models/gpt2-{}/logs/'.format(party, n_seq)
run_seq(dealer_cmd, eval_cmd, log_dir)
key_file = 'gpt2_inference_key_{}.dat'.format(party)
remove_key(dealer_key_dir, key_file)
cmd = "CUDA_VISIBLE_DEVICES={} ./sigma gpt2 {} {} {} {}".format(gpu, n_seq, party, peer_ip, cpu_threads)
log_dir = 'output/P{}/models/gpt2-{}/'.format(party, n_seq)
run_one(cmd, log_dir, "logs.txt")

with open('output/P{}/Table8.json'.format(party), 'w') as outfile:
table8 = dict()
Expand Down Expand Up @@ -169,15 +162,13 @@ def main():
config = global_config['P0']
else:
config = global_config['P1']
dealer_config = config['dealer']
eval_config = config['evaluator']
if args.all:
run_perf(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads'])
run_table8(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads'])
run_perf(args.party, config['gpu'], config['peer'], config['cpu_threads'])
run_table8(args.party, config['gpu'], config['peer'], config['cpu_threads'])
elif args.perf:
run_perf(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads'])
run_perf(args.party, config['gpu'], config['peer'], config['cpu_threads'])
elif args.n_seq:
run_table8(args.party, dealer_config['gpu'], eval_config['gpu'], dealer_config['key_dir'], eval_config['peer'], eval_config['cpu_threads'])
run_table8(args.party, config['gpu'], config['peer'], config['cpu_threads'])

if __name__ == '__main__':
main();
11 changes: 6 additions & 5 deletions GPU-MPC/experiments/sigma/sigma.cu
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ int main(int __argc, char **__argv)

std::string model(__argv[1]);
printf("Model=%s\n", model.data());
std::string keyDir(__argv[4]);
auto keyFile = keyDir + model + "_inference_key";
u64 keyBufSz = 0;
SytorchModule<u64> *net;
Tensor<u64> input({n_seq, n_embd});
Expand Down Expand Up @@ -175,7 +173,7 @@ int main(int __argc, char **__argv)
auto inferenceDir = outDir + model + "-" + std::to_string(n_seq) + "/";
makeDir(inferenceDir);

auto sigmaKeygen = new SIGMAKeygen<u64>(party, bw, scale, keyFile, keyBufSz);
auto sigmaKeygen = new SIGMAKeygen<u64>(party, bw, scale, "", keyBufSz);
net->setBackend(sigmaKeygen);
net->optimize();
auto start = std::chrono::high_resolution_clock::now();
Expand All @@ -194,8 +192,11 @@ int main(int __argc, char **__argv)
statsFile << ss.rdbuf();
statsFile.close();

std::string ip(__argv[5]);
auto sigma = new SIGMA<u64>(party, ip, keyFile, bw, scale, n_seq, n_embd, atoi(__argv[6]));
std::string ip(__argv[4]);
auto sigma = new SIGMA<u64>(party, ip, "", bw, scale, n_seq, n_embd, atoi(__argv[5]));
sigma->keyBuf = sigmaKeygen->startPtr;
sigma->startPtr = sigma->keyBuf;
sigma->keySize = sigmaKeygen->keySize;
net->setBackend(sigma);
sigma->peer->sync();
start = std::chrono::high_resolution_clock::now();
Expand Down
4 changes: 2 additions & 2 deletions GPU-MPC/experiments/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,11 @@ def run_seq(dealer_cmd, eval_cmd, log_dir):
if evaluator.returncode:
raise Exception("Evaluator did not run properly. Check logs for errors.")

def run_one(dealer_cmd, log_dir):
def run_one(dealer_cmd, log_dir, log_file="dealer.log"):
dealer = None
Path(log_dir).mkdir(parents=True, exist_ok=True)

dealer_log = log_dir + "dealer.log"
dealer_log = log_dir + log_file
print('Running command={}'.format(dealer_cmd))
with open(dealer_log, 'a') as dealer_file:
dealer = subprocess.run(dealer_cmd, shell=True, stdout=dealer_file, stderr=dealer_file, check=True)
Expand Down

0 comments on commit 78cd153

Please sign in to comment.