feat: deregister worker on close
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-01-18 12:17:02 +01:00
parent 2cdab4a1ab
commit b6e9882855
8 changed files with 132 additions and 70 deletions

View File

@@ -22,3 +22,7 @@ SET
WHERE
worker_id = $1;
-- name: DeregisterWorker :exec
DELETE FROM worker_register
WHERE
worker_id = $1;

View File

@@ -11,6 +11,7 @@ import (
)
type Querier interface {
DeregisterWorker(ctx context.Context, workerID uuid.UUID) error
GetWorkers(ctx context.Context) ([]*GetWorkersRow, error)
Ping(ctx context.Context) (int32, error)
RegisterWorker(ctx context.Context, arg *RegisterWorkerParams) error

View File

@@ -11,6 +11,17 @@ import (
"github.com/google/uuid"
)
const deregisterWorker = `-- name: DeregisterWorker :exec
DELETE FROM worker_register
WHERE
worker_id = $1
`
func (q *Queries) DeregisterWorker(ctx context.Context, workerID uuid.UUID) error {
_, err := q.db.Exec(ctx, deregisterWorker, workerID)
return err
}
const getWorkers = `-- name: GetWorkers :many
SELECT
worker_id

View File

@@ -24,6 +24,9 @@ type Worker struct {
workProcessor workProcessor
logger *slog.Logger
heartBeatCancel context.CancelFunc
heartBeatCtx context.Context
capacity uint
}
@@ -56,6 +59,67 @@ func (w *Worker) Setup(ctx context.Context) error {
return nil
}
w.heartBeatCtx, w.heartBeatCancel = context.WithCancel(context.Background())
go func() {
ticker := time.NewTicker(time.Second * 5)
errorCount := 0
for {
select {
case <-w.heartBeatCtx.Done():
return
case <-ticker.C:
if err := w.updateHeartBeat(w.heartBeatCtx); err != nil {
if errorCount >= 5 {
panic(fmt.Errorf("worker failed to register heartbeat for a long time, panicing..., err: %w", err))
}
errorCount += 1
} else {
errorCount = 0
}
}
}
}()
return nil
}
func (w *Worker) Start(ctx context.Context) error {
for {
select {
case <-w.heartBeatCtx.Done():
return nil
// case <-ctx.Done():
// return nil
default:
if err := w.processWorkQueue(ctx); err != nil {
// FIXME: dead letter item, right now we just log and continue
w.logger.WarnContext(ctx, "failed to handle work item", "error", err)
}
}
}
}
func (w *Worker) Close(ctx context.Context) error {
if w.heartBeatCancel != nil {
w.heartBeatCancel()
}
if w.heartBeatCtx != nil {
<-w.heartBeatCtx.Done()
repo := repositories.New(w.db)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
defer cancel()
w.logger.InfoContext(ctx, "deregistering worker", "worker_id", w.workerID)
if err := repo.DeregisterWorker(ctx, w.workerID); err != nil {
return fmt.Errorf("failed to deregister worker: %s, err: %w", w.workerID, err)
}
}
return nil
}
@@ -99,47 +163,6 @@ func (w *Worker) GetWorkers(ctx context.Context) (*Workers, error) {
}, nil
}
func (w *Worker) Start(ctx context.Context) error {
heartBeatCtx, heartBeatCancel := context.WithCancel(context.Background())
go func() {
ticker := time.NewTicker(time.Second * 5)
errorCount := 0
for {
select {
case <-heartBeatCtx.Done():
return
case <-ticker.C:
if err := w.updateHeartBeat(heartBeatCtx); err != nil {
if errorCount >= 5 {
panic(fmt.Errorf("worker failed to register heartbeat for a long time, panicing..., err: %w", err))
}
errorCount += 1
} else {
errorCount = 0
}
}
}
}()
defer func() {
heartBeatCancel()
}()
for {
select {
case <-ctx.Done():
return nil
default:
if err := w.processWorkQueue(ctx); err != nil {
// FIXME: dead letter item, right now we just log and continue
w.logger.WarnContext(ctx, "failed to handle work item", "error", err)
}
}
}
}
func (w *Worker) updateHeartBeat(ctx context.Context) error {
repo := repositories.New(w.db)