Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue-2831: dump iolog for fio_index mount_kikimr tests #2840

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/ext_mapping.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"5074323334": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/rootfs-compressed.img",
"4399070549": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/blockstore-plugin-stable-23-1.tar.gz",
"3240550068": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/blockstore-plugin-stable-22-2.tar.gz",
"2326530962": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/fio-static.tgz",
"2326530962": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/fio-3.38-static.tgz",
"4449551218": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/qemu-static.tgz",
"3307630510": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/rootfs.img",
"4556399018": "https://storage.eu-north1.nebius.cloud/nbs-oss-resources/virtiofs-server",
Expand Down
3 changes: 2 additions & 1 deletion cloud/filestore/bin/nfs/nfs-storage.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
SchemeShardDir: "/Root/NFS"
MultiTabletForwardingEnabled: true
MultiTabletForwardingEnabled: true
DirectoryCreationInShardsEnabled: true
4 changes: 3 additions & 1 deletion cloud/filestore/tests/fio_index/mount-kikimr-test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ def test_fio(name):
mount_dir = get_filestore_mount_path()
dir_name = fio.get_dir_name(mount_dir, name)

fio.run_index_test(dir_name, TESTS[name], fail_on_errors=True)
# TODO(#2831): remove this debug information
fio.run_index_test(dir_name, TESTS[name], fail_on_errors=True, verbose=True)
# assert False
139 changes: 128 additions & 11 deletions cloud/storage/core/tools/testing/fio/lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
import os
import subprocess
import threading
import time
import uuid

import yatest.common as common
Expand Down Expand Up @@ -175,12 +177,25 @@ def get_fio_cmd(self, fio_bin, file_name):
cmd += ["--filename", file_name]
return cmd

def get_index_fio_cmd(self, fio_bin, directory):
def get_index_fio_cmd(self, fio_bin, directory, verbose=False):
cmd = self.get_common_fio_cmd(fio_bin)
cmd += [
"--directory", directory,
"--numjobs", str(self.numjobs)
]
if verbose:
cmd += [
"--write_iolog",
os.path.join(common.output_path(), f"{self.name}.iolog"),
"--log_issue_time",
"1",
"--write_lat_log",
os.path.join(common.output_path(), f"{self.name}.lat.log"),
"--write_bw_log",
os.path.join(common.output_path(), f"{self.name}.bw.log"),
"--log_offset",
"1",
]
if self.fsync > 0:
cmd += ["--fsync", str(self.fsync)]
if self.fdatasync > 0:
Expand Down Expand Up @@ -280,8 +295,11 @@ def _lay_out_files(directory, name, jobs, size):
_lay_out_file('{}/{}.{}.0'.format(directory, name, i), size)


def _execute_command(cmd, fail_on_errors):
logger.info("execute " + " ".join(cmd))
def _execute_command(cmd, fail_on_errors, debug=False):
if debug:
strace_file = os.path.join(common.output_path(), "strace_" + str(uuid.uuid4()) + ".log")
cmd = ["strace", "-f", "-o", strace_file] + cmd
print("execute " + " ".join(cmd))
ex = common.execute(
cmd,
stdout=subprocess.PIPE,
Expand All @@ -301,24 +319,123 @@ def _execute_command(cmd, fail_on_errors):
def run_test(file_name, test, fail_on_errors=False):
# fio lays out the test file using the job blocksize, which may exhaust the
# run time limit, so do it ourselves
logger.info("laying out file " + file_name)
print("laying out file " + file_name)
_lay_out_file(file_name, test.size)
logger.info("laid out")
print("laid out")

fio_bin = _get_fio_bin()
cmd = test.get_fio_cmd(fio_bin, file_name)

return _execute_command(cmd, fail_on_errors)


def run_index_test(directory, test, fail_on_errors=False):
def run_index_test(directory, test, fail_on_errors=False, verbose=False):
# fio lays out the test file using the job blocksize, which may exhaust the
# run time limit, so do it ourselves
logger.info("laying out files in directory " + directory)
print("laying out files in directory " + directory)
_lay_out_files(directory, test.name, test.numjobs, test.size)
logger.info("laid out")
print("laid out")

fio_bin = _get_fio_bin()
cmd = test.get_index_fio_cmd(fio_bin, directory)

return _execute_command(cmd, fail_on_errors)
cmd = test.get_index_fio_cmd(fio_bin, directory, verbose)

parent_pids = {str(os.getpid())}

def monitor_fio_progress():
nonlocal cmd, parent_pids
period_sec = 0.001
pid_seen = False
start_time = time.time()
timout = 60
while True:
# use pgrep to find the fio process

pgrep_process = subprocess.Popen(
["pgrep", "-P", ",".join(list(parent_pids))],
stdout=subprocess.PIPE,
)

stdout = pgrep_process.stdout.read() if pgrep_process.stdout else ""
stdout = stdout.decode("utf-8")
if len(stdout) > 0:
fio_pids = list(map(int, stdout.split()))
logging.info(
"Fio process is still running with PIDs: {}".format(
fio_pids
)
)
# os.system("ps -p {} -o pid,ppid,cmd,%cpu,%mem,etime".format(",".join(map(str, fio_pids))))
status_process = subprocess.Popen(
[
"ps",
"-F",
"-p",
",".join(map(str, fio_pids)),
],
stdout=subprocess.PIPE,
)
status_process.wait()
logging.info(
"Fio process status: "
+ status_process.stdout.read().decode("utf-8")
if status_process.stdout
else ""
)
pid_seen = True
parent_pids = parent_pids.union(set(map(str, fio_pids)))

if time.time() - start_time > timout:
logging.error("Fio process has timed out")
try:
with open(
common.output_path() + "/dmesg.txt", "w"
) as dmesg_output:
subprocess.run(
["sudo", "-n", "dmesg", "-T"],
stdout=dmesg_output,
stderr=dmesg_output,
timeout=10,
)
logging.info("Saved dmesg output to dmesg.txt")
except Exception as dmesg_error:
logging.info(
f"Failed to save dmesg output: {dmesg_error}"
)
os.system("id")
os.system("sudo id")

for pid in fio_pids:
with open(
common.output_path() + f"/strace_{pid}.txt", "w"
) as strace_output:
try:
subprocess.run(
["sudo", "strace", "-p", str(pid)],
stdout=strace_output,
stderr=strace_output,
timeout=10,
)
logging.info(
f"Saved strace output for PID {pid} to strace_{pid}.txt"
)
except Exception as strace_error:
logging.info(
f"Failed to save strace output: {strace_error}"
)
break

else:
if pid_seen:
logging.info("Fio process has finished")
break
else:
logging.info("Fio process has not started yet")

time.sleep(period_sec)

# Monitoring process to report the progress of the test
monitoring_thread = threading.Thread(target=monitor_fio_progress, args=())
monitoring_thread.start()

# This will call popen and wait for the process to finish
return _execute_command(cmd, fail_on_errors, debug=verbose)
Loading