diff --git a/.ackrc b/.ackrc new file mode 100644 index 0000000..30903c0 --- /dev/null +++ b/.ackrc @@ -0,0 +1,3 @@ +--ignore-dir=build +--ignore-dir=3rdparty +--ignore-dir=bin diff --git a/scripts/exprlib/env.py b/scripts/exprlib/env.py index 56a8b21..07515be 100644 --- a/scripts/exprlib/env.py +++ b/scripts/exprlib/env.py @@ -218,26 +218,31 @@ def get_nodes_with_nics(): return sorted(nodes_with_nics) @staticmethod - def get_cpu_mask_with_nics(only_phys_cores=True): + def get_cpu_mask_with_nics(num_max_cores_per_node, only_phys_cores=True): ''' Return a CPU core index bitmask of all cores in the NUMA nodes that have NICs. ''' core_bits = 0 node_cpus = ExperimentEnv.get_core_topology() nodes_with_nics = ExperimentEnv.get_nodes_with_nics() + ht_div = ExperimentEnv.get_hyperthreading_degree() if only_phys_cores else 1 for node_id in nodes_with_nics: - ht_div = ExperimentEnv.get_hyperthreading_degree() if only_phys_cores else 1 phys_cnt = len(node_cpus[node_id]) // ht_div - for core_id in node_cpus[node_id][:phys_cnt]: + max_cnt = min(num_max_cores_per_node, phys_cnt) + for core_id in node_cpus[node_id][:max_cnt]: core_bits |= (1 << core_id) + # Force add device-handling cores + core_bits |= (1 << node_cpus[node_id][phys_cnt - 1]) return core_bits @staticmethod def mangle_main_args(config_name, click_name, + num_max_cores_per_node, emulate_opts=None, extra_args=None, extra_dpdk_args=None): + core_mask = ExperimentEnv.get_cpu_mask_with_nics(num_max_cores_per_node) args = [ - '-c', hex(ExperimentEnv.get_cpu_mask_with_nics()), + '-c', '{:x}'.format(core_mask), '-n', os.environ.get('NBA_MEM_CHANNELS', '4'), ] # TODO: translate emulate_opts to void_pmd options @@ -269,6 +274,7 @@ def chdir_to_root(): @asyncio.coroutine def execute_main(self, config_name, click_name, running_time=30.0, + num_max_cores_per_node=64, emulate_opts=None, extra_args=None, extra_dpdk_args=None, custom_stdout_coro=None): @@ -278,7 +284,7 @@ def execute_main(self, config_name, click_name, self.chdir_to_root() config_path = os.path.normpath(os.path.join('configs', config_name)) click_path = os.path.normpath(os.path.join('configs', click_name)) - args = self.mangle_main_args(config_path, click_path, + args = self.mangle_main_args(config_path, click_path, num_max_cores_per_node, emulate_opts, extra_args, extra_dpdk_args) # Reset/initialize events. diff --git a/scripts/run_all_apps.sh b/scripts/run_all_apps.sh index 2e5d534..6d2624c 100755 --- a/scripts/run_all_apps.sh +++ b/scripts/run_all_apps.sh @@ -6,9 +6,9 @@ dropbox.py stop ./run_app_perf.py --prefix latency -b bin-backup/main -p 72 -l default.py ipv6-router-cpuonly.click ./run_app_perf.py --prefix latency -b bin-backup/main -p 72 -l default.py ipv6-router-gpuonly.click ./run_app_perf.py --prefix latency -b bin-backup/main.noreuse -p 72 -l default.py ipv6-router-gpuonly.click -./run_app_perf.py --prefix latency -b bin-backup/main -p 64 -l default.py ipsec-encryption-cpuonly.click -./run_app_perf.py --prefix latency -b bin-backup/main -p 64 -l default.py ipsec-encryption-gpuonly.click -./run_app_perf.py --prefix latency -b bin-backup/main.noreuse -p 64 -l default.py ipsec-encryption-gpuonly.click +./run_app_perf.py --prefix latency -b bin-backup/lmain -p 64 -l default.py ipsec-encryption-cpuonly.click +./run_app_perf.py --prefix latency -b bin-backup/lmain -p 64 -l default.py ipsec-encryption-gpuonly.click +./run_app_perf.py --prefix latency -b bin-backup/lmain.noreuse -p 64 -l default.py ipsec-encryption-gpuonly.click ./run_app_perf.py --prefix thruput -b bin-backup/main -p 64,128,256,512,1024,1500 default.py ipv4-router.click --combine-cpu-gpu ./run_app_perf.py --prefix thruput -b bin-backup/main -p 64,128,256,512,1024,1500 default.py ipv6-router.click --combine-cpu-gpu ./run_app_perf.py --prefix thruput -b bin-backup/main -p 64,128,256,512,1024,1500 default.py ipsec-encryption.click --combine-cpu-gpu diff --git a/scripts/run_app_perf.py b/scripts/run_app_perf.py index 5ac6e04..893907c 100755 --- a/scripts/run_app_perf.py +++ b/scripts/run_app_perf.py @@ -34,7 +34,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): result = ExperimentResult() - conf_name, io_batchsz, comp_batchsz, coproc_ppdepth, pktsz = conds + conf_name, io_batchsz, comp_batchsz, coproc_ppdepth, num_cores, pktsz = conds env.envvars['NBA_IO_BATCH_SIZE'] = str(io_batchsz) env.envvars['NBA_COMP_BATCH_SIZE'] = str(comp_batchsz) @@ -50,8 +50,8 @@ async def do_experiment(loop, env, args, conds, thruput_reader): elif 'ipsec' in args.element_config_to_use: # ipv4 pkts with fixed 1K flows pktgen.args = ['-i', 'all', '-f', '1024', '-r', '0', '-v', '4', '-p', str(pktsz)] - extra_nba_args.append('--preserve-latency') if args.latency: + extra_nba_args.append('--preserve-latency') pktgen.args += ['-g', '3', '-l', '--latency-histogram'] else: # All random ipv4 pkts @@ -84,6 +84,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): else: retcode = await env.execute_main(args.sys_config_to_use, conf_name + '.click', + num_max_cores_per_node=num_cores, extra_args=extra_nba_args, running_time=32.0) @@ -108,7 +109,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): for n in range(env.get_num_nodes()): if per_node_cnt[n] > 0: avg_thruput_records.append(( - (conf_name, io_batchsz, comp_batchsz, coproc_ppdepth, n, pktsz), + (conf_name, io_batchsz, comp_batchsz, coproc_ppdepth, num_cores, n, pktsz), (per_node_mpps_sum[n] / per_node_cnt[n], per_node_gbps_sum[n] / per_node_cnt[n]))) result.thruput_records = avg_thruput_records @@ -146,6 +147,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): parser.add_argument('--io-batch-sizes', type=comma_sep_numbers(1, 256), metavar='NUM[,NUM...]', default=[32]) parser.add_argument('--comp-batch-sizes', type=comma_sep_numbers(1, 256), metavar='NUM[,NUM...]', default=[64]) parser.add_argument('--coproc-ppdepths', type=comma_sep_numbers(1, 256), metavar='NUM[,NUM...]', default=[32]) + parser.add_argument('--num-cores', type=comma_sep_numbers(1, 64), metavar='NUM[,NUM...]', default=[64]) parser.add_argument('-t', '--transparent', action='store_true', default=False, help='Pass-through the standard output instead of parsing it. No default timeout is applied.') parser.add_argument('--timeout', type=int, default=None, help='Set a forced timeout for transparent mode.') parser.add_argument('--no-record', action='store_true', default=False, help='Do NOT record the results.') @@ -178,6 +180,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): args.io_batch_sizes, args.comp_batch_sizes, args.coproc_ppdepths, + args.num_cores, tuple(range(env.get_num_nodes())), args.pkt_sizes )) @@ -186,6 +189,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): args.io_batch_sizes, args.comp_batch_sizes, args.coproc_ppdepths, + args.num_cores, args.pkt_sizes )) mi = pd.MultiIndex.from_tuples(combinations, names=[ @@ -193,6 +197,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): 'io_batchsz', 'comp_batchsz', 'coproc_ppdepth', + 'num_cores', 'node_id', 'pktsz', ]) @@ -230,7 +235,7 @@ async def do_experiment(loop, env, args, conds, thruput_reader): pd.set_option('display.expand_frame_repr', False) pd.set_option('display.float_format', lambda f: '{:.2f}'.format(f)) system_tput = all_tput_recs.sum(level=['conf', 'io_batchsz', 'comp_batchsz', - 'coproc_ppdepth', 'pktsz']) + 'coproc_ppdepth', 'num_cores', 'pktsz']) print('Throughput per NUMA node') print('========================') print(all_tput_recs) diff --git a/scripts/run_compbatching.sh b/scripts/run_compbatching.sh new file mode 100755 index 0000000..74a1565 --- /dev/null +++ b/scripts/run_compbatching.sh @@ -0,0 +1,6 @@ +#! /bin/sh +dropbox.py stop +./run_app_perf.py --prefix compbatching -b bin-backup/main --comp-batch-sizes 1,4,8,16,32,64 -p 64,256,1500 default.py ipv4-router.click --combine-cpu-gpu +./run_app_perf.py --prefix compbatching -b bin-backup/main --comp-batch-sizes 1,4,8,16,32,64 -p 64,256,1500 default.py ipv6-router.click --combine-cpu-gpu +./run_app_perf.py --prefix compbatching -b bin-backup/main --comp-batch-sizes 1,4,8,16,32,64 -p 64,256,1500 default.py ipsec-encryption.click --combine-cpu-gpu +dropbox.py start diff --git a/scripts/run_scalability.sh b/scripts/run_scalability.sh new file mode 100755 index 0000000..8fde44b --- /dev/null +++ b/scripts/run_scalability.sh @@ -0,0 +1,6 @@ +#! /bin/sh +dropbox.py stop +./run_app_perf.py --prefix scalability -b bin-backup/main --num-cores 1,2,4,7 -p 64 default.py ipv4-router.click --combine-cpu-gpu +./run_app_perf.py --prefix scalability -b bin-backup/main --num-cores 1,2,4,7 -p 64 default.py ipv6-router.click --combine-cpu-gpu +./run_app_perf.py --prefix scalability -b bin-backup/main --num-cores 1,2,4,7 -p 64 default.py ipsec-encryption.click --combine-cpu-gpu +dropbox.py start