Semaphore/services/runners/job_pool.go

513 lines
11 KiB
Go
Raw Normal View History

package runners
import (
"bytes"
"encoding/json"
"fmt"
2024-03-10 20:07:19 +01:00
"io"
"net/http"
"os"
"strconv"
2023-09-20 02:17:41 +02:00
"sync/atomic"
"time"
2024-10-13 12:49:28 +02:00
"github.com/ansible-semaphore/semaphore/db"
"github.com/ansible-semaphore/semaphore/db_lib"
"github.com/ansible-semaphore/semaphore/pkg/task_logger"
"github.com/ansible-semaphore/semaphore/services/tasks"
"github.com/ansible-semaphore/semaphore/util"
log "github.com/sirupsen/logrus"
)
2024-10-13 16:31:38 +02:00
type JobLogger struct {
2024-10-13 14:17:25 +02:00
Context string
}
2024-10-13 16:31:38 +02:00
func (e *JobLogger) ActionError(err error, action string, message string) {
2024-10-13 14:17:25 +02:00
util.LogErrorWithFields(err, log.Fields{
2024-10-13 16:31:38 +02:00
"type": "action",
2024-10-13 14:17:25 +02:00
"context": e.Context,
"action": action,
"error": message,
})
}
2024-10-13 16:31:38 +02:00
func (e *JobLogger) Info(message string) {
2024-10-13 14:17:25 +02:00
log.WithFields(log.Fields{
"context": e.Context,
}).Info(message)
}
2024-10-13 16:31:38 +02:00
func (e *JobLogger) TaskInfo(message string, task int, status string) {
log.WithFields(log.Fields{
"type": "task",
"context": e.Context,
"task": task,
"status": status,
}).Info(message)
}
func (e *JobLogger) Panic(err error, action string, message string) {
2024-10-13 14:17:25 +02:00
log.WithFields(log.Fields{
"context": e.Context,
}).Panic(message)
}
2024-10-13 16:31:38 +02:00
func (e *JobLogger) Debug(message string) {
log.WithFields(log.Fields{
"context": e.Context,
}).Debug(message)
}
type JobPool struct {
// logger channel used to putting log records to database.
logger chan jobLogRecord
// register channel used to put tasks to queue.
register chan *job
runningJobs map[int]*runningJob
queue []*job
2024-09-29 21:58:21 +02:00
//token *string
2023-09-20 02:17:41 +02:00
processing int32
}
func (p *JobPool) existsInQueue(taskID int) bool {
for _, j := range p.queue {
if j.job.Task.ID == taskID {
return true
}
}
return false
}
2023-09-12 19:40:22 +02:00
func (p *JobPool) hasRunningJobs() bool {
for _, j := range p.runningJobs {
if !j.status.IsFinished() {
return true
}
}
return false
}
func (p *JobPool) Register() (err error) {
if util.Config.Runner.TokenFile == "" {
return fmt.Errorf("runner token file required")
}
ok := p.tryRegisterRunner()
if !ok {
return fmt.Errorf("runner registration failed")
}
return
}
func (p *JobPool) Unregister() (err error) {
if util.Config.Runner.Token == "" {
return fmt.Errorf("runner is not registered")
}
client := &http.Client{}
url := util.Config.WebHost + "/api/internal/runners"
req, err := http.NewRequest("DELETE", url, nil)
if err != nil {
return
}
resp, err := client.Do(req)
if err != nil {
return
}
if resp.StatusCode >= 400 && resp.StatusCode != 404 {
err = fmt.Errorf("encountered error while unregistering runner; server returned code %d", resp.StatusCode)
return
}
if util.Config.Runner.TokenFile != "" {
err = os.Remove(util.Config.Runner.TokenFile)
}
return
}
func (p *JobPool) Run() {
2024-10-13 16:31:38 +02:00
logger := JobLogger{Context: "running"}
2024-09-29 21:58:21 +02:00
if util.Config.Runner.Token == "" {
2024-10-13 14:17:25 +02:00
logger.Panic(fmt.Errorf("no token provided"), "read input", "can not retrieve runner token")
}
queueTicker := time.NewTicker(5 * time.Second)
requestTimer := time.NewTicker(1 * time.Second)
p.runningJobs = make(map[int]*runningJob)
defer func() {
queueTicker.Stop()
requestTimer.Stop()
}()
for {
select {
case <-queueTicker.C: // timer 5 seconds: get task from queue and run it
logger.Debug("Checking queue")
if len(p.queue) == 0 {
break
}
t := p.queue[0]
if t.status == task_logger.TaskFailStatus {
//delete failed TaskRunner from queue
p.queue = p.queue[1:]
2024-10-13 16:31:38 +02:00
logger.TaskInfo("Task dequeued", t.job.Task.ID, "failed")
break
}
p.runningJobs[t.job.Task.ID] = &runningJob{
job: t.job,
}
2024-03-12 01:44:04 +01:00
t.job.Logger = t.job.App.SetLogger(p.runningJobs[t.job.Task.ID])
go func(runningJob *runningJob) {
runningJob.SetStatus(task_logger.TaskRunningStatus)
err := runningJob.job.Run(t.username, t.incomingVersion)
if runningJob.status.IsFinished() {
return
}
if err != nil {
if runningJob.status == task_logger.TaskStoppingStatus {
runningJob.SetStatus(task_logger.TaskStoppedStatus)
} else {
runningJob.SetStatus(task_logger.TaskFailStatus)
}
} else {
runningJob.SetStatus(task_logger.TaskSuccessStatus)
}
2024-10-13 16:31:38 +02:00
logger.TaskInfo("Task finished", runningJob.job.Task.ID, string(runningJob.status))
}(p.runningJobs[t.job.Task.ID])
p.queue = p.queue[1:]
2024-10-13 16:31:38 +02:00
logger.TaskInfo("Task dequeued", t.job.Task.ID, string(t.job.Task.Status))
logger.TaskInfo("Task started", t.job.Task.ID, string(t.job.Task.Status))
case <-requestTimer.C:
go func() {
if !atomic.CompareAndSwapInt32(&p.processing, 0, 1) {
return
}
2023-09-12 19:40:22 +02:00
defer atomic.StoreInt32(&p.processing, 0)
p.sendProgress()
if util.Config.Runner.OneOff && len(p.runningJobs) > 0 && !p.hasRunningJobs() {
os.Exit(0)
}
2023-09-12 19:40:22 +02:00
p.checkNewJobs()
}()
2023-09-12 19:40:22 +02:00
}
}
}
func (p *JobPool) sendProgress() {
2024-10-13 16:31:38 +02:00
logger := JobLogger{Context: "sending_progress"}
2024-10-13 14:17:25 +02:00
client := &http.Client{}
2024-09-29 21:58:21 +02:00
url := util.Config.WebHost + "/api/internal/runners"
body := RunnerProgress{
Jobs: nil,
}
for id, j := range p.runningJobs {
body.Jobs = append(body.Jobs, JobProgress{
ID: id,
LogRecords: j.logRecords,
Status: j.status,
})
j.logRecords = make([]LogRecord, 0)
if j.status.IsFinished() {
2024-10-13 16:31:38 +02:00
logger.TaskInfo("Task removed from running list", id, string(j.status))
delete(p.runningJobs, id)
}
}
jsonBytes, err := json.Marshal(body)
2024-10-13 12:49:28 +02:00
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "form request body", "can not marshal json")
2024-10-13 14:17:25 +02:00
return
2024-10-13 12:49:28 +02:00
}
req, err := http.NewRequest("PUT", url, bytes.NewBuffer(jsonBytes))
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "create request", "can not create request to the server")
return
}
2024-09-29 21:58:21 +02:00
req.Header.Set("X-Runner-Token", util.Config.Runner.Token)
2024-01-07 20:50:37 +01:00
resp, err := client.Do(req)
if err != nil {
2024-10-14 23:07:24 +02:00
logger.ActionError(err, "send request", "the server returned error")
return
}
2024-10-14 23:07:24 +02:00
if resp.StatusCode >= 400 {
logger.ActionError(fmt.Errorf("invalid status code"), "send request", "the server returned error "+strconv.Itoa(resp.StatusCode))
}
defer resp.Body.Close()
}
func (p *JobPool) tryRegisterRunner() bool {
2024-10-13 16:31:38 +02:00
logger := JobLogger{Context: "registration"}
2024-10-13 14:17:25 +02:00
2024-10-13 12:49:28 +02:00
log.Info("Registering a new runner")
2024-10-13 12:49:28 +02:00
if util.Config.Runner.RegistrationToken == "" {
2024-10-13 16:31:38 +02:00
logger.ActionError(fmt.Errorf("registration token cannot be empty"), "read input", "can not retrieve registration token")
2024-10-13 14:36:31 +02:00
return false
}
client := &http.Client{}
url := util.Config.WebHost + "/api/internal/runners"
jsonBytes, err := json.Marshal(RunnerRegistration{
2024-10-13 12:49:28 +02:00
RegistrationToken: util.Config.Runner.RegistrationToken,
Webhook: util.Config.Runner.Webhook,
MaxParallelTasks: util.Config.Runner.MaxParallelTasks,
})
2024-10-13 12:49:28 +02:00
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "form request", "can not marshal json")
2024-10-13 12:49:28 +02:00
return false
}
req, err := http.NewRequest("POST", url, bytes.NewBuffer(jsonBytes))
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "create request", "can not create request to the server")
return false
}
resp, err := client.Do(req)
2024-10-14 23:07:24 +02:00
if err != nil {
logger.ActionError(err, "send request", "unexpected error")
return false
}
if resp.StatusCode != 200 {
logger.ActionError(fmt.Errorf("invalid status code"), "send request", "the server returned error "+strconv.Itoa(resp.StatusCode))
return false
}
2024-03-10 20:07:19 +01:00
body, err := io.ReadAll(resp.Body)
if err != nil {
2024-10-13 12:49:28 +02:00
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "read response body", "can not read server's response body")
return false
}
var res struct {
Token string `json:"token"`
}
err = json.Unmarshal(body, &res)
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "parsing result json", "server's response has invalid format")
return false
}
err = os.WriteFile(util.Config.Runner.TokenFile, []byte(res.Token), 0644)
2024-10-13 12:49:28 +02:00
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "store token", "can not store token to the file")
2024-10-13 12:49:28 +02:00
return false
}
defer resp.Body.Close()
return true
}
// checkNewJobs tries to find runner to queued jobs
func (p *JobPool) checkNewJobs() {
2024-10-13 16:31:38 +02:00
logger := JobLogger{Context: "checking new jobs"}
2024-10-13 14:17:25 +02:00
2024-09-29 21:58:21 +02:00
if util.Config.Runner.Token == "" {
2024-10-13 16:31:38 +02:00
logger.ActionError(fmt.Errorf("no token provided"), "read input", "can not retrieve runner token")
return
}
client := &http.Client{}
url := util.Config.WebHost + "/api/internal/runners"
req, err := http.NewRequest("GET", url, nil)
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "create request", "can not create request to the server")
return
}
2024-10-13 14:17:25 +02:00
req.Header.Set("X-Runner-Token", util.Config.Runner.Token)
resp, err := client.Do(req)
2024-01-07 14:36:48 +01:00
if err != nil {
2024-10-14 23:07:24 +02:00
logger.ActionError(err, "send request", "upexpected error")
return
}
2024-01-07 14:36:48 +01:00
2024-01-07 14:37:22 +01:00
if resp.StatusCode >= 400 {
2024-10-13 14:17:25 +02:00
2024-10-13 16:31:38 +02:00
logger.ActionError(fmt.Errorf("error status code"), "send request", "the server returned an error"+strconv.Itoa(resp.StatusCode))
2024-01-07 14:36:48 +01:00
return
}
defer resp.Body.Close()
2024-03-10 20:07:19 +01:00
body, err := io.ReadAll(resp.Body)
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "read response body", "can not read server's response body")
return
}
var response RunnerState
err = json.Unmarshal(body, &response)
if err != nil {
2024-10-13 16:31:38 +02:00
logger.ActionError(err, "parsing result json", "server's response has invalid format")
return
}
for _, currJob := range response.CurrentJobs {
runJob, exists := p.runningJobs[currJob.ID]
if !exists {
continue
}
if runJob.status == task_logger.TaskStoppingStatus || runJob.status == task_logger.TaskStoppedStatus {
p.runningJobs[currJob.ID].job.Kill()
}
if runJob.status.IsFinished() {
continue
}
switch runJob.status {
case task_logger.TaskRunningStatus:
if currJob.Status == task_logger.TaskStartingStatus || currJob.Status == task_logger.TaskWaitingStatus {
continue
}
case task_logger.TaskStoppingStatus:
if !currJob.Status.IsFinished() {
continue
}
case task_logger.TaskConfirmed:
if currJob.Status == task_logger.TaskWaitingConfirmation {
continue
}
}
runJob.SetStatus(currJob.Status)
}
2023-09-12 19:40:22 +02:00
if util.Config.Runner.OneOff {
if len(p.queue) > 0 || len(p.runningJobs) > 0 {
return
}
}
for _, newJob := range response.NewJobs {
if _, exists := p.runningJobs[newJob.Task.ID]; exists {
continue
}
if p.existsInQueue(newJob.Task.ID) {
continue
}
newJob.Inventory.Repository = newJob.InventoryRepository
taskRunner := job{
username: newJob.Username,
incomingVersion: newJob.IncomingVersion,
job: &tasks.LocalJob{
Task: newJob.Task,
Template: newJob.Template,
Inventory: newJob.Inventory,
Repository: newJob.Repository,
Environment: newJob.Environment,
2024-07-17 17:29:12 +02:00
App: db_lib.CreateApp(
newJob.Template,
newJob.Repository,
newJob.Inventory,
nil),
},
}
taskRunner.job.Repository.SSHKey = response.AccessKeys[taskRunner.job.Repository.SSHKeyID]
if taskRunner.job.Inventory.SSHKeyID != nil {
taskRunner.job.Inventory.SSHKey = response.AccessKeys[*taskRunner.job.Inventory.SSHKeyID]
}
if taskRunner.job.Inventory.BecomeKeyID != nil {
taskRunner.job.Inventory.BecomeKey = response.AccessKeys[*taskRunner.job.Inventory.BecomeKeyID]
}
var vaults []db.TemplateVault
2024-10-03 21:41:36 +02:00
if taskRunner.job.Template.Vaults != nil {
for _, vault := range taskRunner.job.Template.Vaults {
vault := vault
key := response.AccessKeys[vault.VaultKeyID]
vault.Vault = &key
vaults = append(vaults, vault)
2024-10-03 21:41:36 +02:00
}
2023-09-11 02:00:10 +02:00
}
taskRunner.job.Template.Vaults = vaults
2023-09-11 02:00:10 +02:00
if taskRunner.job.Inventory.RepositoryID != nil {
taskRunner.job.Inventory.Repository.SSHKey = response.AccessKeys[taskRunner.job.Inventory.Repository.SSHKeyID]
}
p.queue = append(p.queue, &taskRunner)
2024-10-13 16:31:38 +02:00
logger.TaskInfo("Task enqueued", taskRunner.job.Task.ID, string(taskRunner.job.Task.Status))
}
}