Skip to content

Commit

Permalink
Support resetting IndexShard engine on the fly
Browse files Browse the repository at this point in the history
  • Loading branch information
arteam committed Jan 17, 2025
1 parent a867127 commit 588aa8d
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
25 changes: 25 additions & 0 deletions server/src/main/java/org/elasticsearch/index/shard/IndexShard.java
Original file line number Diff line number Diff line change
Expand Up @@ -4361,6 +4361,31 @@ public void afterRefresh(boolean didRefresh) {
}
}

/**
* Reset the current engine to a new one without doing translog recovery.
*/
public void resetEngine() throws IOException {
assert Thread.holdsLock(mutex) == false : "resetting engine under mutex";
assert getActiveOperationsCount() == OPERATIONS_BLOCKED
: "resetting engine without blocking operations; active operations are [" + getActiveOperationsCount() + ']';
var engineConfig = newEngineConfig(replicationTracker);
try {
synchronized (engineMutex) {
verifyNotClosed();
IOUtils.close(currentEngineReference.get());

var newEngine = createEngine(engineConfig);
currentEngineReference.set(newEngine);
onNewEngine(newEngine);
active.set(true);
}
onSettingsChanged();
checkAndCallWaitForEngineOrClosedShardListeners();
} catch (Exception e) {
failShard("Unable to reset engine", e);
}
}

/**
* Rollback the current engine to the safe commit, then replay local translog up to the global checkpoint.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
import org.elasticsearch.index.engine.EngineTestCase;
import org.elasticsearch.index.engine.InternalEngine;
import org.elasticsearch.index.engine.InternalEngineFactory;
import org.elasticsearch.index.engine.NoOpEngine;
import org.elasticsearch.index.engine.ReadOnlyEngine;
import org.elasticsearch.index.engine.Segment;
import org.elasticsearch.index.engine.VersionConflictEngineException;
Expand Down Expand Up @@ -4574,6 +4575,32 @@ public void testResetEngine() throws Exception {
closeShard(shard, false);
}

public void testResetEngineToNoOpEngine() throws Exception {
var newEngineCreated = new CountDownLatch(2);
var indexShard = newStartedShard(true, Settings.EMPTY, config -> {
try {
return new NoOpEngine(config);
} finally {
newEngineCreated.countDown();
}
});
var newEngineNotification = new CountDownLatch(1);
indexShard.waitForEngineOrClosedShard(ActionListener.running(newEngineNotification::countDown));

var onAcquired = new PlainActionFuture<Releasable>();
indexShard.acquireAllPrimaryOperationsPermits(onAcquired, TimeValue.timeValueMinutes(1L));
try (var permit = safeGet(onAcquired)) {
indexShard.resetEngine();
}
safeAwait(newEngineCreated);
safeAwait(newEngineNotification);

assertThat(indexShard.getEngine(), instanceOf(NoOpEngine.class));
assertTrue(indexShard.isActive());

closeShard(indexShard, false);
}

/**
* This test simulates a scenario seen rarely in ConcurrentSeqNoVersioningIT. Closing a shard while engine is inside
* resetEngineToGlobalCheckpoint can lead to check index failure in integration tests.
Expand Down

0 comments on commit 588aa8d

Please sign in to comment.