Skip to content

Commit 8627bc2

Browse files
authored
feat(explorer): relax token deletion with error threshold (#3211)
feat(explorer): relax token deletion with error threashold Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 0c0bc18 commit 8627bc2

File tree

2 files changed

+39
-13
lines changed

2 files changed

+39
-13
lines changed

core/cli/explorer.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ import (
1010
)
1111

1212
type ExplorerCMD struct {
13-
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
14-
PoolDatabase string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
15-
ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
13+
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
14+
PoolDatabase string `env:"LOCALAI_POOL_DATABASE,POOL_DATABASE" default:"explorer.json" help:"Path to the pool database" group:"api"`
15+
ConnectionTimeout string `env:"LOCALAI_CONNECTION_TIMEOUT,CONNECTION_TIMEOUT" default:"2m" help:"Connection timeout for the explorer" group:"api"`
16+
ConnectionErrorThreshold int `env:"LOCALAI_CONNECTION_ERROR_THRESHOLD,CONNECTION_ERROR_THRESHOLD" default:"3" help:"Connection failure threshold for the explorer" group:"api"`
1617
}
1718

1819
func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
@@ -26,7 +27,7 @@ func (e *ExplorerCMD) Run(ctx *cliContext.Context) error {
2627
if err != nil {
2728
return err
2829
}
29-
ds := explorer.NewDiscoveryServer(db, dur)
30+
ds := explorer.NewDiscoveryServer(db, dur, e.ConnectionErrorThreshold)
3031

3132
go ds.Start(context.Background())
3233
appHTTP := http.Explorer(db, ds)

core/explorer/discovery.go

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ import (
1515

1616
type DiscoveryServer struct {
1717
sync.Mutex
18-
database *Database
19-
networkState *NetworkState
20-
connectionTime time.Duration
18+
database *Database
19+
networkState *NetworkState
20+
connectionTime time.Duration
21+
failures map[string]int
22+
errorThreshold int
2123
}
2224

2325
type NetworkState struct {
@@ -32,16 +34,20 @@ func (s *DiscoveryServer) NetworkState() *NetworkState {
3234

3335
// NewDiscoveryServer creates a new DiscoveryServer with the given Database.
3436
// it keeps the db state in sync with the network state
35-
func NewDiscoveryServer(db *Database, dur time.Duration) *DiscoveryServer {
37+
func NewDiscoveryServer(db *Database, dur time.Duration, failureThreshold int) *DiscoveryServer {
3638
if dur == 0 {
3739
dur = 50 * time.Second
3840
}
41+
if failureThreshold == 0 {
42+
failureThreshold = 3
43+
}
3944
return &DiscoveryServer{
4045
database: db,
4146
connectionTime: dur,
4247
networkState: &NetworkState{
4348
Networks: map[string]Network{},
4449
},
50+
errorThreshold: failureThreshold,
4551
}
4652
}
4753

@@ -66,21 +72,21 @@ func (s *DiscoveryServer) runBackground() {
6672
n, err := p2p.NewNode(token)
6773
if err != nil {
6874
log.Err(err).Msg("Failed to create node")
69-
s.database.Delete(token)
75+
s.failedToken(token)
7076
continue
7177
}
7278

7379
err = n.Start(c)
7480
if err != nil {
7581
log.Err(err).Msg("Failed to start node")
76-
s.database.Delete(token)
82+
s.failedToken(token)
7783
continue
7884
}
7985

8086
ledger, err := n.Ledger()
8187
if err != nil {
8288
log.Err(err).Msg("Failed to start ledger")
83-
s.database.Delete(token)
89+
s.failedToken(token)
8490
continue
8591
}
8692

@@ -114,8 +120,27 @@ func (s *DiscoveryServer) runBackground() {
114120
}
115121
s.Unlock()
116122
} else {
117-
log.Info().Any("network", token).Msg("No workers found in the network. Removing it from the database")
118-
s.database.Delete(token)
123+
s.failedToken(token)
124+
}
125+
}
126+
127+
s.deleteFailedConnections()
128+
}
129+
130+
func (s *DiscoveryServer) failedToken(token string) {
131+
s.Lock()
132+
defer s.Unlock()
133+
s.failures[token]++
134+
}
135+
136+
func (s *DiscoveryServer) deleteFailedConnections() {
137+
s.Lock()
138+
defer s.Unlock()
139+
for k, v := range s.failures {
140+
if v > s.errorThreshold {
141+
log.Info().Any("network", k).Msg("Network has been removed from the database")
142+
s.database.Delete(k)
143+
delete(s.failures, k)
119144
}
120145
}
121146
}

0 commit comments

Comments
 (0)