Skip to content

Commit

Permalink
cluster/dht: use readdir for fix-layout in rebalance
Browse files Browse the repository at this point in the history
fixes: #2241
Change-Id: I5fe2ecea25a399ad58e31a2e322caf69fc7f49eb
Signed-off-by: Pranith Kumar K <[email protected]>
  • Loading branch information
pranithk committed Mar 10, 2021
1 parent 46949c4 commit db5a37e
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 46 deletions.
2 changes: 1 addition & 1 deletion cli/src/cli-rpc-ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type)
sec = ((uint64_t)elapsed % 3600) % 60;

if (fix_layout) {
cli_out("%35s %50s %8d:%d:%d", node_name, status_str, hrs, min,
cli_out("%35s %50s %8d:%02d:%02d", node_name, status_str, hrs, min,
sec);
} else {
if (size_str) {
Expand Down
33 changes: 26 additions & 7 deletions xlators/cluster/dht/src/dht-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -6649,8 +6649,10 @@ dht_queue_readdir(call_frame_t *frame, xlator_t *xl, off_t offset,
{
dht_local_t *local;
int32_t queue;
xlator_t *this;

local = frame->local;
this = frame->this;

local->queue_xl = xl;
local->queue_offset = offset;
Expand All @@ -6677,7 +6679,7 @@ dht_queue_readdir(call_frame_t *frame, xlator_t *xl, off_t offset,
/* A negative value means that an unwind has been called before
* returning from the previous wind. This means that 'local' is
* not needed anymore and must be destroyed. */
dht_local_wipe(frame->this, local);
dht_local_wipe(this, local);
}
}
}
Expand All @@ -6690,8 +6692,10 @@ dht_queue_readdirp(call_frame_t *frame, xlator_t *xl, off_t offset,
{
dht_local_t *local;
int32_t queue;
xlator_t *this;

local = frame->local;
this = frame->this;

local->queue_xl = xl;
local->queue_offset = offset;
Expand All @@ -6705,7 +6709,7 @@ dht_queue_readdirp(call_frame_t *frame, xlator_t *xl, off_t offset,
} while ((queue = uatomic_sub_return(&local->queue, 1)) > 0);

if (queue < 0) {
dht_local_wipe(frame->this, local);
dht_local_wipe(this, local);
}
}
}
Expand Down Expand Up @@ -7030,6 +7034,8 @@ dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
dht_conf_t *conf = NULL;
dht_methods_t *methods = NULL;
gf_boolean_t skip_hashed_check = _gf_false;
gf_boolean_t readdir_optimize = _gf_false;
gf_boolean_t add = _gf_false;

INIT_LIST_HEAD(&entries.list);

Expand All @@ -7039,6 +7045,7 @@ dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
conf = this->private;
GF_VALIDATE_OR_GOTO(this->name, conf, done);

readdir_optimize = conf->readdir_optimize;
methods = &(conf->methods);

if (op_ret <= 0)
Expand All @@ -7062,12 +7069,24 @@ dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
{
next_offset = orig_entry->d_off;

gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
orig_entry->d_name, orig_entry->d_type);

subvol = methods->layout_search(this, layout, orig_entry->d_name);
gf_log(this->name, GF_LOG_INFO, "%s: entry = %s, type = %d %p, %p", prev->name,
orig_entry->d_name, orig_entry->d_type, subvol, prev);

/* a) If rebalance is running, pick from first_up_subvol
*/
if (DT_ISDIR(orig_entry->d_type) && readdir_optimize) {
if (prev == local->first_up_subvol) {
add = _gf_true;
} else {
continue;
}
} else if (!subvol || (subvol == prev)) {
add = _gf_true;
}

if (!subvol || (subvol == prev)) {
if (add) {
add = _gf_false;
entry = gf_dirent_for_name(orig_entry->d_name);
if (!entry) {
gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
Expand All @@ -7080,7 +7099,7 @@ dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
entry->d_type = orig_entry->d_type;
entry->d_len = orig_entry->d_len;

gf_msg_debug(this->name, 0, "%s: Adding = entry %s", prev->name,
gf_log(this->name, GF_LOG_INFO, "%s: Adding = entry %s", prev->name,
entry->d_name);

list_add_tail(&entry->list, &entries.list);
Expand Down
49 changes: 12 additions & 37 deletions xlators/cluster/dht/src/dht-rebalance.c
Original file line number Diff line number Diff line change
Expand Up @@ -3639,6 +3639,12 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}

linked_inode = inode_link(loc->inode, loc->parent, loc->name, &iatt);

inode = loc->inode;
loc->inode = linked_inode;
inode_unref(inode);

fd = fd_create(loc->inode, defrag->pid);
if (!fd) {
gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
Expand Down Expand Up @@ -3671,8 +3677,8 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
fd_bind(fd);
INIT_LIST_HEAD(&entries.list);

while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL,
NULL)) != 0) {
while ((ret = syncop_readdir(this, fd, 131072, offset, &entries, NULL,
NULL)) != 0) {
if (ret < 0) {
if (-ret == ENOENT || -ret == ESTALE) {
if (conf->decommission_subvols_cnt) {
Expand Down Expand Up @@ -3703,11 +3709,14 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
goto out;
}

if (__is_root_gfid (fd->inode->gfid)) {
gf_log(this->name, GF_LOG_INFO, "Entry: %s", entry->d_name);
}
offset = entry->d_off;

if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
continue;
if (!IA_ISDIR(entry->d_stat.ia_type)) {
if (!DT_ISDIR(entry->d_type)) {
continue;
}
loc_wipe(&entry_loc);
Expand All @@ -3730,40 +3739,6 @@ gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
}
}

if (gf_uuid_is_null(entry->d_stat.ia_gfid)) {
gf_log(this->name, GF_LOG_ERROR,
"%s/%s"
" gfid not present",
loc->path, entry->d_name);
continue;
}

gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);

/*In case the gfid stored in the inode by inode_link
* and the gfid obtained in the lookup differs, then
* client3_3_lookup_cbk will return ESTALE and proper
* error will be captured
*/

linked_inode = inode_link(entry_loc.inode, loc->inode,
entry->d_name, &entry->d_stat);

inode = entry_loc.inode;
entry_loc.inode = linked_inode;
inode_unref(inode);

if (gf_uuid_is_null(loc->gfid)) {
gf_log(this->name, GF_LOG_ERROR,
"%s/%s"
" gfid not present",
loc->path, entry->d_name);
defrag->total_failures++;
continue;
}

gf_uuid_copy(entry_loc.pargfid, loc->gfid);

/* A return value of 2 means, either process_dir or
* lookup of a dir failed. Hence, don't commit hash
* for the current directory*/
Expand Down
2 changes: 1 addition & 1 deletion xlators/mgmt/glusterd/src/glusterd-rebalance.c
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,

runner_add_args(
&runner, SBIN_DIR "/glusterfs", "-s", volfileserver, "--volfile-id",
volname, "--xlator-option", "*dht.use-readdirp=yes", "--xlator-option",
volname, "--xlator-option", "*dht.use-readdirp=no", "--xlator-option",
"*dht.lookup-unhashed=yes", "--xlator-option",
"*dht.assert-no-child-down=yes", "--xlator-option",
"*dht.readdir-optimize=on", "--process-name", "rebalance", NULL);
Expand Down

0 comments on commit db5a37e

Please sign in to comment.