chyezh 77e123762f
enhance: add graceful stop timeout to avoid node stop hang under extreme cases (#30320)
1. add coordinator and proxy graceful stop timeout to 5s.
3. add other work node graceful stop timeout to 900s, and we should
potentially change this to 600s when graceful stop is smooth
4. change the order of datacoord component while stop.
5. `LivenessCheck` do not perform graceful shutdown now. 

issue: https://github.com/milvus-io/milvus/issues/30310
pr: #30317
also see: https://github.com/milvus-io/milvus/pull/30306

---------

Signed-off-by: chyezh <chyezh@outlook.com>
2024-01-27 08:45:02 +08:00

39 lines
884 B
Go

package components
import (
"context"
"os"
"time"
"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus/pkg/util/conc"
)
var errStopTimeout = errors.New("stop timeout")
// exitWhenStopTimeout stops a component with timeout and exit progress when timeout.
func exitWhenStopTimeout(stop func() error, timeout time.Duration) error {
err := stopWithTimeout(stop, timeout)
if errors.Is(err, errStopTimeout) {
os.Exit(1)
}
return err
}
// stopWithTimeout stops a component with timeout.
func stopWithTimeout(stop func() error, timeout time.Duration) error {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
future := conc.Go(func() (struct{}, error) {
return struct{}{}, stop()
})
select {
case <-future.Inner():
return errors.Wrap(future.Err(), "failed to stop component")
case <-ctx.Done():
return errStopTimeout
}
}