feat: add worker distributor and model registry
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-01-18 01:46:37 +01:00
parent 54aa310583
commit 2cdab4a1ab
28 changed files with 1169 additions and 29 deletions

View File

@@ -0,0 +1,56 @@
-- name: Ping :one
SELECT 1;
-- name: GetCurrentQueueSize :one
SELECT
COUNT(*) current_queue_size
FROM
work_schedule
WHERE
worker_id = $1
AND state <> 'archived';
-- name: InsertQueueItem :exec
INSERT INTO work_schedule
(
schedule_id
, worker_id
, start_run
, end_run
, state
)
VALUES
(
$1
, $2
, $3
, $4
, 'pending'
);
-- name: GetNext :one
SELECT
*
FROM
work_schedule
WHERE
worker_id = $1
AND state = 'pending'
ORDER BY updated_at DESC
LIMIT 1;
-- name: StartProcessing :exec
UPDATE work_schedule
SET
state = 'processing'
WHERE
schedule_id = $1;
-- name: Archive :exec
UPDATE work_schedule
SET
state = 'archived'
WHERE
schedule_id = $1;

View File

@@ -0,0 +1,32 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.23.0
package repositories
import (
"context"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgconn"
)
type DBTX interface {
Exec(context.Context, string, ...interface{}) (pgconn.CommandTag, error)
Query(context.Context, string, ...interface{}) (pgx.Rows, error)
QueryRow(context.Context, string, ...interface{}) pgx.Row
}
func New(db DBTX) *Queries {
return &Queries{db: db}
}
type Queries struct {
db DBTX
}
func (q *Queries) WithTx(tx pgx.Tx) *Queries {
return &Queries{
db: tx,
}
}

View File

@@ -0,0 +1,30 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.23.0
package repositories
import (
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgtype"
)
type ModelSchedule struct {
ModelName string `json:"model_name"`
LastRun pgtype.Timestamptz `json:"last_run"`
}
type WorkSchedule struct {
ScheduleID uuid.UUID `json:"schedule_id"`
WorkerID uuid.UUID `json:"worker_id"`
StartRun pgtype.Timestamptz `json:"start_run"`
EndRun pgtype.Timestamptz `json:"end_run"`
UpdatedAt pgtype.Timestamptz `json:"updated_at"`
State string `json:"state"`
}
type WorkerRegister struct {
WorkerID uuid.UUID `json:"worker_id"`
Capacity int32 `json:"capacity"`
HeartBeat pgtype.Timestamptz `json:"heart_beat"`
}

View File

@@ -0,0 +1,22 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.23.0
package repositories
import (
"context"
"github.com/google/uuid"
)
type Querier interface {
Archive(ctx context.Context, scheduleID uuid.UUID) error
GetCurrentQueueSize(ctx context.Context, workerID uuid.UUID) (int64, error)
GetNext(ctx context.Context, workerID uuid.UUID) (*WorkSchedule, error)
InsertQueueItem(ctx context.Context, arg *InsertQueueItemParams) error
Ping(ctx context.Context) (int32, error)
StartProcessing(ctx context.Context, scheduleID uuid.UUID) error
}
var _ Querier = (*Queries)(nil)

View File

@@ -0,0 +1,129 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.23.0
// source: queries.sql
package repositories
import (
"context"
"github.com/google/uuid"
"github.com/jackc/pgx/v5/pgtype"
)
const archive = `-- name: Archive :exec
UPDATE work_schedule
SET
state = 'archived'
WHERE
schedule_id = $1
`
func (q *Queries) Archive(ctx context.Context, scheduleID uuid.UUID) error {
_, err := q.db.Exec(ctx, archive, scheduleID)
return err
}
const getCurrentQueueSize = `-- name: GetCurrentQueueSize :one
SELECT
COUNT(*) current_queue_size
FROM
work_schedule
WHERE
worker_id = $1
AND state <> 'archived'
`
func (q *Queries) GetCurrentQueueSize(ctx context.Context, workerID uuid.UUID) (int64, error) {
row := q.db.QueryRow(ctx, getCurrentQueueSize, workerID)
var current_queue_size int64
err := row.Scan(&current_queue_size)
return current_queue_size, err
}
const getNext = `-- name: GetNext :one
SELECT
schedule_id, worker_id, start_run, end_run, updated_at, state
FROM
work_schedule
WHERE
worker_id = $1
AND state = 'pending'
ORDER BY updated_at DESC
LIMIT 1
`
func (q *Queries) GetNext(ctx context.Context, workerID uuid.UUID) (*WorkSchedule, error) {
row := q.db.QueryRow(ctx, getNext, workerID)
var i WorkSchedule
err := row.Scan(
&i.ScheduleID,
&i.WorkerID,
&i.StartRun,
&i.EndRun,
&i.UpdatedAt,
&i.State,
)
return &i, err
}
const insertQueueItem = `-- name: InsertQueueItem :exec
INSERT INTO work_schedule
(
schedule_id
, worker_id
, start_run
, end_run
, state
)
VALUES
(
$1
, $2
, $3
, $4
, 'pending'
)
`
type InsertQueueItemParams struct {
ScheduleID uuid.UUID `json:"schedule_id"`
WorkerID uuid.UUID `json:"worker_id"`
StartRun pgtype.Timestamptz `json:"start_run"`
EndRun pgtype.Timestamptz `json:"end_run"`
}
func (q *Queries) InsertQueueItem(ctx context.Context, arg *InsertQueueItemParams) error {
_, err := q.db.Exec(ctx, insertQueueItem,
arg.ScheduleID,
arg.WorkerID,
arg.StartRun,
arg.EndRun,
)
return err
}
const ping = `-- name: Ping :one
SELECT 1
`
func (q *Queries) Ping(ctx context.Context) (int32, error) {
row := q.db.QueryRow(ctx, ping)
var column_1 int32
err := row.Scan(&column_1)
return column_1, err
}
const startProcessing = `-- name: StartProcessing :exec
UPDATE work_schedule
SET
state = 'processing'
WHERE
schedule_id = $1
`
func (q *Queries) StartProcessing(ctx context.Context, scheduleID uuid.UUID) error {
_, err := q.db.Exec(ctx, startProcessing, scheduleID)
return err
}

View File

@@ -0,0 +1,21 @@
version: "2"
sql:
- queries: queries.sql
schema: ../persistence/migrations/
engine: "postgresql"
gen:
go:
out: "repositories"
package: "repositories"
sql_package: "pgx/v5"
emit_json_tags: true
emit_prepared_queries: true
emit_interface: true
emit_empty_slices: true
emit_result_struct_pointers: true
emit_params_struct_pointers: true
overrides:
- db_type: "uuid"
go_type:
import: "github.com/google/uuid"
type: "UUID"

View File

@@ -0,0 +1,179 @@
package workscheduler
import (
"context"
"errors"
"fmt"
"log/slog"
"git.front.kjuulh.io/kjuulh/orbis/internal/modelschedule"
"git.front.kjuulh.io/kjuulh/orbis/internal/worker"
"git.front.kjuulh.io/kjuulh/orbis/internal/workscheduler/repositories"
"github.com/google/uuid"
"github.com/jackc/pgx/v5"
"github.com/jackc/pgx/v5/pgtype"
"github.com/jackc/pgx/v5/pgxpool"
)
//go:generate sqlc generate
type WorkScheduler struct {
db *pgxpool.Pool
logger *slog.Logger
}
func NewWorkScheduler(
db *pgxpool.Pool,
logger *slog.Logger,
) *WorkScheduler {
return &WorkScheduler{
db: db,
logger: logger,
}
}
type Worker struct {
Instance worker.WorkerInstance
RemainingCapacity uint
}
type Workers struct {
Workers []*Worker
}
func (w *Workers) IterateSlice(size uint) func(yield func([]Worker) bool) {
return func(yield func([]Worker) bool) {
if len(w.Workers) == 0 {
return
}
workers := make([]Worker, 0)
acc := uint(0)
for {
exit := true
for _, worker := range w.Workers {
if acc == size {
if !yield(workers) {
return
}
workers = make([]Worker, 0)
acc = uint(0)
}
if worker.RemainingCapacity <= 0 {
continue
}
worker.RemainingCapacity--
workers = append(workers, *worker)
acc++
exit = false
}
if exit {
if len(workers) > 0 {
if !yield(workers) {
return
}
}
return
}
}
}
}
func (w *WorkScheduler) GetWorkers(ctx context.Context, registeredWorkers *worker.Workers) (*Workers, error) {
w.logger.DebugContext(ctx, "found workers", "workers", len(registeredWorkers.Instances))
workers := make([]*Worker, 0, len(registeredWorkers.Instances))
for _, registeredWorker := range registeredWorkers.Instances {
remainingCapacity, err := w.GetWorker(ctx, &registeredWorker)
if err != nil {
return nil, fmt.Errorf("failed to find capacity for worker: %w", err)
}
if remainingCapacity == 0 {
w.logger.DebugContext(ctx, "skipping worker as no remaining capacity")
continue
}
workers = append(workers, &Worker{
Instance: registeredWorker,
RemainingCapacity: remainingCapacity,
})
}
return &Workers{Workers: workers}, nil
}
func (w *WorkScheduler) GetWorker(
ctx context.Context,
worker *worker.WorkerInstance,
) (uint, error) {
repo := repositories.New(w.db)
current_size, err := repo.GetCurrentQueueSize(ctx, worker.WorkerID)
if err != nil {
return 0, fmt.Errorf("failed to get current queue size: %s: %w", worker.WorkerID, err)
}
if int64(worker.Capacity)-current_size <= 0 {
return 0, nil
}
return worker.Capacity - uint(current_size), nil
}
func (w *WorkScheduler) InsertModelRun(
ctx context.Context,
worker Worker,
modelRun *modelschedule.ModelRunSchedule,
) error {
repo := repositories.New(w.db)
return repo.InsertQueueItem(ctx, &repositories.InsertQueueItemParams{
ScheduleID: uuid.New(),
WorkerID: worker.Instance.WorkerID,
StartRun: pgtype.Timestamptz{
Time: modelRun.Start,
Valid: true,
},
EndRun: pgtype.Timestamptz{
Time: modelRun.End,
Valid: true,
},
})
}
func (w *WorkScheduler) GetNext(ctx context.Context, workerID uuid.UUID) (*uuid.UUID, error) {
repo := repositories.New(w.db)
schedule, err := repo.GetNext(ctx, workerID)
if err != nil {
if !errors.Is(err, pgx.ErrNoRows) {
return nil, fmt.Errorf("failed to get next worker item: %w", err)
}
return nil, nil
}
return &schedule.ScheduleID, nil
}
func (w *WorkScheduler) StartProcessing(ctx context.Context, scheduleID uuid.UUID) error {
repo := repositories.New(w.db)
return repo.StartProcessing(ctx, scheduleID)
}
func (w *WorkScheduler) Archive(ctx context.Context, scheduleID uuid.UUID) error {
repo := repositories.New(w.db)
return repo.Archive(ctx, scheduleID)
}