Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions kagenti-operator/internal/controller/mlflow_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,31 +184,26 @@ func (r *MLflowReconciler) resolveTrackingURI(ctx context.Context) string {
for i := range list.Items {
cr := &list.Items[i]
if meta.IsStatusConditionTrue(cr.Status.Conditions, "Available") {
if cr.Status.Address == nil || cr.Status.Address.URL == "" {
logger.Info("MLflow CR is Available but status.address.url is not set, skipping", "cr", cr.GetName())
continue
if cr.Status.URL != "" {
logger.V(1).Info("Auto-discovered MLflow gateway URL", "uri", cr.Status.URL, "cr", cr.GetName())
return cr.Status.URL
}
uri := cr.Status.Address.URL
logger.V(1).Info("Auto-discovered MLflow tracking URI", "uri", uri, "cr", cr.GetName())
return uri
logger.Info("MLflow CR is Available but status.url is not set, skipping", "cr", cr.GetName())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 Nice logic inversion — checking for presence first (URL != "") is cleaner than the old guard-and-continue pattern.

}
}

return ""
}

// mlflowEnvVars returns the environment variables to inject into agent containers.
// TODO(mlflow): MLFLOW_TRACKING_SERVER_CERT_PATH is OpenShift-specific — the
// service-ca operator injects service-ca.crt into the SA volume. On vanilla
// Kubernetes this file does not exist and MLflow clients will fail TLS verification.
// This should be made configurable (Helm value / annotation) before supporting non-OpenShift clusters.
// The tracking URI is typically the external gateway URL which uses a publicly-trusted
// TLS certificate, so no custom CA cert path is needed.
func mlflowEnvVars(trackingURI, experimentID, experimentName string) map[string]string {
return map[string]string{
"MLFLOW_TRACKING_URI": trackingURI,
"MLFLOW_TRACKING_AUTH": "kubernetes-namespaced",
"MLFLOW_EXPERIMENT_ID": experimentID,
"MLFLOW_EXPERIMENT_NAME": experimentName,
"MLFLOW_TRACKING_SERVER_CERT_PATH": "/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt",
"MLFLOW_TRACKING_URI": trackingURI,
"MLFLOW_TRACKING_AUTH": "kubernetes-namespaced",
"MLFLOW_EXPERIMENT_ID": experimentID,
"MLFLOW_EXPERIMENT_NAME": experimentName,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,6 @@ var _ = Describe("MLflow Controller", func() {
Expect(envMap["MLFLOW_TRACKING_AUTH"]).To(Equal("kubernetes-namespaced"))
Expect(envMap["MLFLOW_EXPERIMENT_ID"]).To(Equal("exp-123"))
Expect(envMap["MLFLOW_EXPERIMENT_NAME"]).To(Equal("mlflow-full"))
Expect(envMap["MLFLOW_TRACKING_SERVER_CERT_PATH"]).To(Equal("/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"))

Expect(updated.Spec.Template.Annotations[AnnotationMLflowExperimentID]).To(Equal("exp-123"))
Expect(updated.Spec.Template.Annotations[AnnotationMLflowExperimentName]).To(Equal("mlflow-full"))
Expand Down
30 changes: 1 addition & 29 deletions kagenti-operator/internal/mlflow/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ package mlflow
import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json"
"errors"
"fmt"
Expand All @@ -40,10 +38,6 @@ const (
// DefaultTokenPath is the projected SA token path in a pod.
DefaultTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token"

// DefaultCACertPath is the service-serving CA certificate path.
// On OpenShift, the service-ca.crt is projected into the SA token volume.
DefaultCACertPath = "/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"

// WorkspaceHeader is the MLflow workspace header (namespace-based isolation).
WorkspaceHeader = "X-MLFLOW-WORKSPACE"
)
Expand All @@ -56,10 +50,6 @@ type Client struct {
// TokenPath is the path to the SA token file. Defaults to DefaultTokenPath.
TokenPath string

// CACertPath is the path to the CA certificate for TLS verification.
// Defaults to the in-cluster SA CA cert.
CACertPath string

// HTTPClient is the HTTP client to use. If nil, a default client with 30s timeout is used.
HTTPClient *http.Client

Expand Down Expand Up @@ -111,32 +101,14 @@ func IsResourceAlreadyExists(err error) bool {
func (c *Client) httpClient() *http.Client {
c.httpOnce.Do(func() {
if c.HTTPClient == nil {
tlsCfg := &tls.Config{MinVersion: tls.VersionTLS12}
if caCert, err := os.ReadFile(c.caCertPath()); err == nil {
pool, err := x509.SystemCertPool()
if err != nil {
// Fall back to an empty pool; the service-CA cert will still be appended.
pool = x509.NewCertPool()
}
pool.AppendCertsFromPEM(caCert)
tlsCfg.RootCAs = pool
}
c.HTTPClient = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{TLSClientConfig: tlsCfg},
Timeout: 30 * time.Second,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: The default http.Client uses the system cert pool, which is correct for publicly-trusted gateway certs. If the gateway ever uses a private CA (e.g., air-gapped environments), this will need a configurable CA path re-added. The old TODO captured this well — consider keeping a one-line note for future readers.

}
}
})
return c.HTTPClient
}

func (c *Client) caCertPath() string {
if c.CACertPath != "" {
return c.CACertPath
}
return DefaultCACertPath
}

func (c *Client) tokenPath() string {
if c.TokenPath != "" {
return c.TokenPath
Expand Down
12 changes: 2 additions & 10 deletions kagenti-operator/internal/mlflow/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,8 @@ type MLflow struct {
}

type MLflowStatus struct {
Conditions []metav1.Condition `json:"conditions,omitempty"`
Address *MLflowAddressStatus `json:"address,omitempty"`
}

// MLflowAddressStatus holds the internal in-cluster endpoint for the MLflow Service.
type MLflowAddressStatus struct {
// URL is the in-cluster HTTPS URL for the managed MLflow Service.
Conditions []metav1.Condition `json:"conditions,omitempty"`
// URL is the external gateway URL for the MLflow server (e.g. via the RHOAI data-science gateway).
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The json:"url" tag changes the serialized field from nested address.url to flat status.url. If any external consumer (e.g., the MLflow operator setting this status) still writes to the old address structure, deserialization would silently drop the value.

Since E2E tests pass, the upstream CRD likely already uses the flat status.url field — just flagging for awareness.

URL string `json:"url,omitempty"`
}

Expand Down Expand Up @@ -95,9 +90,6 @@ func (in *MLflowStatus) DeepCopyInto(out *MLflowStatus) {
in.Conditions[i].DeepCopyInto(&out.Conditions[i])
}
}
if in.Address != nil {
out.Address = &MLflowAddressStatus{URL: in.Address.URL}
}
}

func (in *MLflowList) DeepCopyObject() runtime.Object {
Expand Down
Loading