Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion .github/renovate.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,19 @@
"matchStrings": ["GOTESTSUM_VERSION \\?= (?<currentValue>v[\\d.]+)"],
"depNameTemplate": "gotest.tools/gotestsum",
"datasourceTemplate": "go"
},
{
"fileMatch": ["^postgres/Dockerfile$"],
"matchStrings": ["FROM (?<depName>[^:\\n]+):(?<currentValue>[^@\\n]+)@sha256:(?<currentDigest>[a-f0-9]+)"],
"datasourceTemplate": "docker"
},
{
"fileMatch": ["^postgres/Dockerfile$"],
"matchStrings": ["ENV PG_VERSION (?<currentValue>[\\d]+\\.[\\d]+)-[^\\n]+"],
"depNameTemplate": "postgres",
"datasourceTemplate": "docker",
"versioningTemplate": "semver-coerced",
"autoReplaceStringTemplate": "ENV PG_VERSION {{{newValue}}}-1.pgdg13+1"
}
],
"packageRules": [
Expand All @@ -49,6 +62,17 @@
],
"allowedVersions": "1.26.x"
},
{
"matchPackageNames": [
"postgres"
],
"matchFileNames": [
"postgres/Dockerfile"
],
"allowedVersions": "17.x",
"automerge": true,
"groupName": "postgres Dockerfile"
},
Comment thread
coderabbitai[bot] marked this conversation as resolved.
{
"matchPackageNames": [
"/^github\\.com\\/sapcc\\/.*/"
Expand Down Expand Up @@ -80,4 +104,4 @@
"before 8am on Friday"
],
"semanticCommits": "disabled"
}
}
26 changes: 25 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (
"context"
"crypto/tls"
"flag"
"log/slog"
"net/http"
"os"
"path/filepath"
"slices"
"strings"
"time"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
Expand Down Expand Up @@ -143,13 +145,35 @@ func main() {
flag.BoolVar(&enableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
opts := zap.Options{
Development: true,
Development: false,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

// Configure slog (used across internal packages) with JSON output and
// level control via the LOG_LEVEL environment variable.
// Supported values: debug, info (default), warn, error.
slogLevel := new(slog.LevelVar)
slogLevel.Set(slog.LevelInfo)
if lvl := os.Getenv("LOG_LEVEL"); lvl != "" {
switch strings.ToLower(lvl) {
case "debug":
slogLevel.Set(slog.LevelDebug)
case "info":
slogLevel.Set(slog.LevelInfo)
case "warn", "warning":
slogLevel.Set(slog.LevelWarn)
case "error":
slogLevel.Set(slog.LevelError)
}
}
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
Level: slogLevel,
})))
slog.Info("slog configured", "level", slogLevel.Level().String())

// Log the main configuration
setupLog.Info("loaded main configuration",
"enabledControllers", mainConfig.EnabledControllers,
Expand Down
22 changes: 20 additions & 2 deletions cortex.secrets.example.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright SAP SE
# SPDX-License-Identifier: Apache-2.0

# Override config values that contain sensitive information or
# are specific to your environment. These values can be used in the Tiltfile.
# Override config values for local development. This includes secrets,
# environment-specific settings, and logging configuration.
# These values can be used in the Tiltfile.

# SSO certificate to use.
sharedSSOCert: &sharedSSOCert
Expand All @@ -20,6 +21,23 @@ sharedSSOCert: &sharedSSOCert
# If true, the certificate is not verified.
selfSigned: "false"

# Logging configuration for local development.
# Set logLevel to "debug" for verbose output from both zap and slog loggers.
# Set zapDevel to true for human-readable console logs instead of JSON.
# These apply per sub-chart, e.g. for cortex-nova:
#
# cortex-scheduling-controllers:
# controllerManager:
# container:
# logLevel: "debug"
# zapDevel: true
#
# cortex-knowledge-controllers:
# controllerManager:
# container:
# logLevel: "debug"
# zapDevel: true
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

# Enable kvm pipelines and scheduling support.
kvm:
enabled: true
Expand Down
2 changes: 1 addition & 1 deletion helm/bundles/cortex-cinder/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
# from: file://../../library/cortex-postgres
- name: cortex-postgres
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.5.13
version: 0.5.14

# from: file://../../library/cortex
- name: cortex
Expand Down
2 changes: 1 addition & 1 deletion helm/bundles/cortex-manila/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
# from: file://../../library/cortex-postgres
- name: cortex-postgres
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.5.13
version: 0.5.14

# from: file://../../library/cortex
- name: cortex
Expand Down
2 changes: 1 addition & 1 deletion helm/bundles/cortex-nova/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies:
# from: file://../../library/cortex-postgres
- name: cortex-postgres
repository: oci://ghcr.io/cobaltcore-dev/cortex/charts
version: 0.5.13
version: 0.5.14

# from: file://../../library/cortex
- name: cortex
Expand Down
2 changes: 1 addition & 1 deletion helm/bundles/cortex-nova/alerts/nova.alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -731,4 +731,4 @@ groups:
The webhook {{ $labels.webhook }} has experienced errors in the last 5 minutes.
This may indicate issues with the webhook logic, connectivity problems, or
external factors causing failures. Check the webhook server logs for error
details and investigate the affected resources.
details and investigate the affected resources.
1 change: 1 addition & 0 deletions helm/bundles/cortex-nova/templates/knowledges_kvm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ metadata:
name: kvm-libvirt-domain-cpu-steal-pct
spec:
schedulingDomain: nova
recency: "60s"
extractor:
name: kvm_libvirt_domain_cpu_steal_pct_extractor
description: |
Expand Down
2 changes: 1 addition & 1 deletion helm/library/cortex-postgres/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ apiVersion: v2
name: cortex-postgres
description: Postgres setup for Cortex.
type: application
version: 0.5.13
version: 0.5.14
appVersion: "sha-6db36b81"
16 changes: 16 additions & 0 deletions helm/library/cortex/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ app.kubernetes.io/instance: {{ .Release.Name }}
{{ $hasMutating }}}}{{- end }}


{{/*
chart.argsContainPrefix checks if any string in args starts with prefix.
Usage: include "chart.argsContainPrefix" (dict "prefix" "--zap-log-level" "args" .Values.controllerManager.container.args)
Returns "true" or "false".
*/}}
{{- define "chart.argsContainPrefix" -}}
{{- $prefix := .prefix -}}
{{- $result := dict "found" "false" -}}
{{- range .args -}}
{{- if hasPrefix $prefix . -}}
{{- $_ := set $result "found" "true" -}}
{{- end -}}
{{- end -}}
{{- get $result "found" -}}
{{- end -}}

{{- define "chart.hasValidatingWebhooks" -}}
{{- $hasValidating := false }}
{{- range . }}
Expand Down
16 changes: 13 additions & 3 deletions helm/library/cortex/templates/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ spec:
{{- range .Values.controllerManager.container.args }}
- {{ . }}
{{- end }}
{{- if and .Values.controllerManager.container.logLevel (ne (include "chart.argsContainPrefix" (dict "prefix" "--zap-log-level" "args" .Values.controllerManager.container.args)) "true") }}
- "--zap-log-level={{ .Values.controllerManager.container.logLevel }}"
{{- end }}
{{- if and .Values.controllerManager.container.zapDevel (ne (include "chart.argsContainPrefix" (dict "prefix" "--zap-devel" "args" .Values.controllerManager.container.args)) "true") }}
- "--zap-devel"
{{- end }}
{{- if and .Values.webhook.enable .Values.certmanager.enable }}
- "--webhook-cert-path=/tmp/k8s-webhook-server/serving-certs"
{{- end }}
Expand All @@ -56,13 +62,17 @@ spec:
{{- if .Values.controllerManager.container.image.pullPolicy }}
imagePullPolicy: {{ .Values.controllerManager.container.image.pullPolicy }}
{{- end }}
{{- if .Values.controllerManager.container.env }}
env:
{{- if and .Values.controllerManager.container.logLevel (not (and .Values.controllerManager.container.env (hasKey .Values.controllerManager.container.env "LOG_LEVEL"))) }}
- name: LOG_LEVEL
value: {{ .Values.controllerManager.container.logLevel | quote }}
{{- end }}
{{- if .Values.controllerManager.container.env }}
{{- range $key, $value := .Values.controllerManager.container.env }}
- name: {{ $key }}
value: {{ $value }}
{{- end }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.controllerManager.container.livenessProbe | nindent 12 }}
readinessProbe:
Expand Down Expand Up @@ -140,4 +150,4 @@ data:
{{- $mergedSecrets = mergeOverwrite .Values.secrets $mergedSecrets }}
{{- end }}
{{ toJson $mergedSecrets | b64enc }}
{{- end }}
{{- end }}
8 changes: 8 additions & 0 deletions helm/library/cortex/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ controllerManager:
- "--metrics-bind-address=:2112"
- "--health-probe-bind-address=:8081"
- "--metrics-secure=false"
# Log level for both zap (controller-runtime) and slog (internal packages).
# Supported: debug, info (default), warn, error.
logLevel: "info"
# Enable zap development mode (human-readable console logs, development stack traces).
# This only changes output format and stack trace behavior, not the log level.
# The effective log level is controlled by logLevel above (default: "info").
# Set to true for local development (e.g. Tilt), keep false for production.
zapDevel: false
resources:
limits:
cpu: 500m
Expand Down
7 changes: 4 additions & 3 deletions internal/knowledge/extractor/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ func (r *KnowledgeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
// Sanity checks.
lastExtracted := knowledge.Status.LastExtracted.Time
recency := knowledge.Spec.Recency.Duration
if lastExtracted.Add(recency).After(time.Now()) && knowledge.Status.RawLength != 0 {
log.Info("skipping knowledge extraction, not yet time", "name", knowledge.Name)
return ctrl.Result{RequeueAfter: time.Until(lastExtracted.Add(recency))}, nil
if lastExtracted.Add(recency).After(time.Now()) {
waitFor := time.Until(lastExtracted.Add(recency))
log.Info("skipping knowledge extraction, not yet time", "name", knowledge.Name, "waitFor", waitFor)
return ctrl.Result{RequeueAfter: waitFor}, nil
}

extractor, ok := supportedExtractors[knowledge.Spec.Extractor.Name]
Expand Down
8 changes: 4 additions & 4 deletions internal/scheduling/lib/filter_weigher_pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func InitNewFilterWeigherPipeline[RequestType FilterWeigherPipelineRequest](
unknownFilters := []string{}
for _, filterConfig := range confedFilters {
slog.Info("scheduler: configuring filter", "name", filterConfig.Name)
slog.Info("supported:", "filters", maps.Keys(supportedFilters))
slog.Info("supported:", "filters", slices.Sorted(maps.Keys(supportedFilters)))
makeFilter, ok := supportedFilters[filterConfig.Name]
if !ok {
slog.Error("scheduler: unsupported filter", "name", filterConfig.Name)
Expand All @@ -73,7 +73,7 @@ func InitNewFilterWeigherPipeline[RequestType FilterWeigherPipelineRequest](
filter = validateFilter(filter)
filter = monitorFilter(filter, filterConfig.Name, pipelineMonitor)
if err := filter.Init(ctx, client, filterConfig); err != nil {
slog.Error("scheduler: failed to initialize filter", "name", filterConfig.Name, "error", err)
slog.Warn("scheduler: failed to initialize filter", "name", filterConfig.Name, "error", err)
filterErrors[filterConfig.Name] = errors.New("failed to initialize filter: " + err.Error())
continue
}
Expand All @@ -90,7 +90,7 @@ func InitNewFilterWeigherPipeline[RequestType FilterWeigherPipelineRequest](
unknownWeighers := []string{}
for _, weigherConfig := range confedWeighers {
slog.Info("scheduler: configuring weigher", "name", weigherConfig.Name)
slog.Info("supported:", "weighers", maps.Keys(supportedWeighers))
slog.Info("supported:", "weighers", slices.Sorted(maps.Keys(supportedWeighers)))
makeWeigher, ok := supportedWeighers[weigherConfig.Name]
if !ok {
slog.Error("scheduler: unsupported weigher", "name", weigherConfig.Name)
Expand All @@ -102,7 +102,7 @@ func InitNewFilterWeigherPipeline[RequestType FilterWeigherPipelineRequest](
weigher = validateWeigher(weigher)
weigher = monitorWeigher(weigher, weigherConfig.Name, pipelineMonitor)
if err := weigher.Init(ctx, client, weigherConfig); err != nil {
slog.Error("scheduler: failed to initialize weigher", "name", weigherConfig.Name, "error", err)
slog.Warn("scheduler: failed to initialize weigher", "name", weigherConfig.Name, "error", err)
weigherErrors[weigherConfig.Name] = errors.New("failed to initialize weigher: " + err.Error())
continue
}
Expand Down
4 changes: 2 additions & 2 deletions internal/scheduling/nova/external_scheduler_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,12 @@ func (httpAPI *httpAPI) canRunScheduler(requestData api.ExternalSchedulerRequest
func (httpAPI *httpAPI) inferPipelineName(requestData api.ExternalSchedulerRequest) (string, error) {
hvType, err := requestData.GetHypervisorType()
if err != nil {
slog.Info("failed to determine hypervisor type, cannot infer pipeline name", "error", err)
slog.Warn("failed to determine hypervisor type, cannot infer pipeline name", "error", err)
return "", errors.New("failed to determine hypervisor type from request data")
}
flavorType, err := requestData.GetFlavorType()
if err != nil {
slog.Info("failed to determine flavor type, cannot infer pipeline name", "error", err)
slog.Warn("failed to determine flavor type, cannot infer pipeline name", "error", err)
return "", errors.New("failed to determine flavor type from request data")
}
switch hvType {
Expand Down
12 changes: 6 additions & 6 deletions internal/scheduling/nova/hypervisor_overcommit_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ type HypervisorOvercommitController struct {
// - https://ahmet.im/blog/controller-pitfalls/#reconcile-method-shape
func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)
log.Info("Reconciling resource")
log.V(1).Info("Reconciling resource")

obj := new(hv1.Hypervisor)
if err := c.Get(ctx, req.NamespacedName, obj); err != nil {
Expand All @@ -130,7 +130,7 @@ func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl
// non-overlapping resources from previous mappings.
desiredOvercommit := make(map[hv1.ResourceName]float64)
for _, mapping := range c.config.OvercommitMappings {
log.Info("Processing overcommit mapping",
log.V(1).Info("Processing overcommit mapping",
"mapping", mapping,
"hypervisorTraits", obj.Status.Traits)
var applyMapping bool
Expand All @@ -142,21 +142,21 @@ func (c *HypervisorOvercommitController) Reconcile(ctx context.Context, req ctrl
applyMapping = !slices.Contains(obj.Status.Traits, *mapping.HasntTrait)
default:
// This should never happen due to validation, but we check it just in case.
log.Info("Skipping overcommit mapping with no trait specified",
log.V(1).Info("Skipping overcommit mapping with no trait specified",
"overcommit", mapping.Overcommit)
continue
}
if !applyMapping {
continue
}
log.Info("Applying overcommit mapping on hypervisor",
log.V(1).Info("Applying overcommit mapping on hypervisor",
"overcommit", mapping.Overcommit)
maps.Copy(desiredOvercommit, mapping.Overcommit)
}
log.Info("Desired overcommit ratios based on traits",
log.V(1).Info("Desired overcommit ratios based on traits",
"desiredOvercommit", desiredOvercommit)
if maps.Equal(desiredOvercommit, obj.Spec.Overcommit) {
log.Info("Overcommit ratios are up to date, no update needed")
log.V(1).Info("Overcommit ratios are up to date, no update needed")
return ctrl.Result{}, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,12 @@ func (s *FilterCapabilitiesStep) Run(traceLog *slog.Logger, request api.External

hvCaps := make(map[string]map[string]string)
for _, hv := range hvs.Items {
var err error
if hvCaps[hv.Name], err = hvToNovaCapabilities(hv); err != nil {
traceLog.Error("failed to get nova capabilities from hypervisor", "host", hv.Name, "error", err)
return nil, err
caps, err := hvToNovaCapabilities(hv)
if err != nil {
traceLog.Warn("skipping hypervisor with unknown capabilities", "host", hv.Name, "error", err)
continue
}
hvCaps[hv.Name] = caps
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
traceLog.Info("looking for capabilities", "capabilities", hvCaps)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func (s *FilterExternalCustomerStep) Run(traceLog *slog.Logger, request api.Exte
result := s.IncludeAllHostsFromRequest(request)
domainName, err := request.Spec.Data.GetSchedulerHintStr("domain_name")
if err != nil {
traceLog.Error("failed to get domain_name scheduler hint, skipping filter", "error", err)
traceLog.Warn("failed to get domain_name scheduler hint, skipping filter", "error", err)
return result, nil
}
if slices.Contains(s.Options.CustomerIgnoredDomainNames, domainName) {
Expand Down
Loading
Loading