-
Notifications
You must be signed in to change notification settings - Fork 192
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
debug build: add logic to simulate split-brain
* add hk/sigdeb and ais/usr1deb (tags=debug) * refactor Signed-off-by: Alex Aizman <[email protected]>
- Loading branch information
1 parent
45b9169
commit fce0ada
Showing
16 changed files
with
200 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ import ( | |
|
||
func init() { | ||
xreg.Init() | ||
hk.TestInit() | ||
hk.Init(false) | ||
} | ||
|
||
func TestAIS(t *testing.T) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
//go:build debug | ||
|
||
// Package ais provides core functionality for the AIStore object storage. | ||
/* | ||
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | ||
*/ | ||
package ais | ||
|
||
import ( | ||
"strconv" | ||
|
||
"github.com/NVIDIA/aistore/cmn/debug" | ||
"github.com/NVIDIA/aistore/cmn/nlog" | ||
"github.com/NVIDIA/aistore/core/meta" | ||
"github.com/NVIDIA/aistore/hk" | ||
) | ||
|
||
func (t *target) setusr1() { hk.SetUSR1(t.usr1) } | ||
func (p *proxy) setusr1() { hk.SetUSR1(p.usr1) } | ||
|
||
// When "odd" and "even" nodes cannot coexist together (this code is triggered by | ||
// `SIGUSR1` simultaneously on all nodes in the cluster). | ||
func (h *htrun) usr1() bool /*split done*/ { | ||
smap := h.owner.smap.get() | ||
if smap.CountActivePs() < 2 { | ||
nlog.Errorln("not enough proxies to perform the split:", smap.StringEx()) | ||
return false | ||
} | ||
// 1. current primary port | ||
oport, err := strconv.ParseInt(smap.Primary.PubNet.Port, 10, 64) | ||
debug.AssertNoErr(err) | ||
|
||
// 2. deterministically select the second primary that satisfies the following condition: | ||
// its port must have a different parity (which is why we cannot use `meta.HrwProxy`) | ||
var ( | ||
npsi *meta.Snode | ||
maxH uint64 | ||
) | ||
for _, psi := range smap.Pmap { | ||
if flags := psi.Flags.Clear(meta.SnodeIC); flags != 0 { | ||
continue | ||
} | ||
port, err := strconv.ParseInt(psi.PubNet.Port, 10, 64) | ||
debug.AssertNoErr(err) | ||
if port%2 != oport%2 && psi.IDDigest > maxH { | ||
npsi = psi | ||
maxH = psi.IDDigest | ||
} | ||
} | ||
if npsi == nil { | ||
nlog.Errorln(h.String(), "failed to select the second primary candidate that'd have public port with different parity:", | ||
smap.StringEx()) | ||
return false | ||
} | ||
|
||
// 3. split cluster in two | ||
var ( | ||
myport int64 | ||
clone = smap.clone() | ||
nodes = []meta.NodeMap{clone.Pmap, clone.Tmap} | ||
) | ||
myport, err = strconv.ParseInt(h.si.PubNet.Port, 10, 64) | ||
debug.AssertNoErr(err) | ||
|
||
for _, nmap := range nodes { | ||
for id, si := range nmap { | ||
port, err := strconv.ParseInt(si.PubNet.Port, 10, 64) | ||
debug.AssertNoErr(err) | ||
|
||
if myport%2 != port%2 { | ||
delete(nmap, id) | ||
} | ||
} | ||
} | ||
|
||
clone.Version += 100 // TODO: randomize version bump to create scenarios | ||
if myport%2 != oport%2 { | ||
debug.Assert(clone.GetNode(npsi.ID()) != nil, clone.StringEx()) | ||
clone.Primary = npsi | ||
} | ||
h.owner.smap.put(clone) | ||
|
||
return true | ||
} | ||
|
||
func (p *proxy) usr1() { | ||
smap := p.owner.smap.get() | ||
opid := smap.Primary.ID() | ||
if !p.htrun.usr1() { | ||
return | ||
} | ||
|
||
nsmap := p.owner.smap.get() | ||
nlog.Infoln(p.String(), "split-brain", nsmap.StringEx()) | ||
|
||
if opid != nsmap.Primary.ID() && p.SID() == nsmap.Primary.ID() { | ||
nlog.Infoln(p.String(), "becoming new primary") | ||
p.becomeNewPrimary("") | ||
} | ||
} | ||
|
||
func (t *target) usr1() { | ||
if t.htrun.usr1() { | ||
nsmap := t.owner.smap.get() | ||
nlog.Infoln(t.String(), "split-brain", nsmap.StringEx()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
//go:build !debug | ||
|
||
// Package ais provides core functionality for the AIStore object storage. | ||
/* | ||
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. | ||
*/ | ||
package ais | ||
|
||
func (*htrun) setusr1() {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ import ( | |
) | ||
|
||
func init() { | ||
hk.TestInit() | ||
hk.Init(false) | ||
} | ||
|
||
func TestCore(t *testing.T) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ import ( | |
) | ||
|
||
func init() { | ||
hk.TestInit() | ||
hk.Init(false) | ||
} | ||
|
||
func TestDsort(t *testing.T) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
//go:build debug | ||
|
||
// Package hk provides mechanism for registering cleanup | ||
// functions which are invoked at specified intervals. | ||
/* | ||
* Copyright (c) 2018-2025, NVIDIA CORPORATION. All rights reserved. | ||
*/ | ||
package hk | ||
|
||
import ( | ||
"os/signal" | ||
"runtime" | ||
"strings" | ||
"syscall" | ||
|
||
"github.com/NVIDIA/aistore/cmn/cos" | ||
"github.com/NVIDIA/aistore/cmn/nlog" | ||
"github.com/NVIDIA/aistore/sys" | ||
) | ||
|
||
var cbUSR1 func() | ||
|
||
func SetUSR1(cb func()) { cbUSR1 = cb } | ||
|
||
func (hk *hk) setSignal() { | ||
signal.Notify(hk.sigCh, | ||
// ignore, log | ||
syscall.SIGHUP, // kill -SIGHUP | ||
// terminate | ||
syscall.SIGINT, // kill -SIGINT (Ctrl-C) | ||
syscall.SIGTERM, // kill -SIGTERM | ||
syscall.SIGQUIT, // kill -SIGQUIT | ||
// test | ||
syscall.SIGUSR1, | ||
) | ||
} | ||
|
||
func (hk *hk) handleSignal(s syscall.Signal) (err error) { | ||
switch s { | ||
case syscall.SIGHUP: | ||
var ( | ||
sb strings.Builder | ||
mem sys.MemStat | ||
ngr = runtime.NumGoroutine() | ||
) | ||
erm := mem.Get() | ||
mem.Str(&sb) | ||
nfd, erf := numOpenFiles() | ||
nlog.Infoln("ngr [", ngr, sys.NumCPU(), "] mem [", sb.String(), erm, "]", "num-fd [", nfd, erf, "]") | ||
case syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT: | ||
signal.Stop(hk.sigCh) | ||
err = cos.NewSignalError(s) | ||
hk.Stop(err) | ||
case syscall.SIGUSR1: | ||
cbUSR1() | ||
default: | ||
cos.ExitLog("unexpected signal:", s) | ||
} | ||
|
||
return err | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ import ( | |
) | ||
|
||
func init() { | ||
hk.TestInit() | ||
hk.Init(false) | ||
} | ||
|
||
func TestMemsys(t *testing.T) { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,4 +9,4 @@ import ( | |
"github.com/NVIDIA/aistore/hk" | ||
) | ||
|
||
func init() { hk.TestInit() } | ||
func init() { hk.Init(false) } |