11package cvo
22
33import (
4- "bytes"
54 "context"
6- "crypto/sha256"
75 "crypto/tls"
86 "errors"
97 "fmt"
10- "io"
118 "net"
129 "net/http"
13- "os"
14- "path/filepath"
1510 "strings"
1611 "time"
1712
@@ -23,6 +18,7 @@ import (
2318 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2419 "k8s.io/apimachinery/pkg/labels"
2520 "k8s.io/apimachinery/pkg/util/sets"
21+ "k8s.io/apiserver/pkg/server/dynamiccertificates"
2622 authenticationclientsetv1 "k8s.io/client-go/kubernetes/typed/authentication/v1"
2723 "k8s.io/client-go/rest"
2824 "k8s.io/client-go/tools/cache"
@@ -32,8 +28,6 @@ import (
3228 "github.com/openshift/cluster-version-operator/lib/resourcemerge"
3329 "github.com/openshift/cluster-version-operator/pkg/internal"
3430 "github.com/openshift/library-go/pkg/crypto"
35-
36- "gopkg.in/fsnotify.v1"
3731)
3832
3933// RegisterMetrics initializes metrics and registers them with the
@@ -210,15 +204,6 @@ func (a *authHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
210204 a .downstream .ServeHTTP (w , r )
211205}
212206
213- func shutdownHttpServer (parentCtx context.Context , svr * http.Server ) {
214- ctx , cancel := context .WithTimeout (parentCtx , 5 * time .Second )
215- defer cancel ()
216- klog .Info ("Shutting down metrics server so it can be recreated with updated TLS configuration." )
217- if err := svr .Shutdown (ctx ); err != nil {
218- klog .Errorf ("Failed to gracefully shut down metrics server during restart: %v" , err )
219- }
220- }
221-
222207func startListening (svr * http.Server , tlsConfig * tls.Config , lAddr string , resultChannel chan asyncResult ) {
223208 tcpListener , err := net .Listen ("tcp" , lAddr )
224209 if err != nil {
@@ -248,73 +233,65 @@ func handleServerResult(result asyncResult, lastLoopError error) error {
248233}
249234
250235// RunMetrics launches a server bound to listenAddress serving
251- // Prometheus metrics at /metrics over HTTPS. Continues serving
236+ // Prometheus metrics at /metrics over HTTPS. Continues serving
252237// until runContext.Done() and then attempts a clean shutdown
253- // limited by shutdownContext.Done(). Assumes runContext.Done()
238+ // limited by shutdownContext.Done(). Assumes runContext.Done()
254239// occurs before or simultaneously with shutdownContext.Done().
255- // Also detects changes to metrics certificate files upon which
256- // the metrics HTTP server is shutdown and recreated with a new
257- // TLS configuration.
240+ // The TLS configuration automatically reloads certificates when
241+ // they change on disk using dynamiccertificates.
258242func RunMetrics (runContext context.Context , shutdownContext context.Context , listenAddress , certFile , keyFile string , restConfig * rest.Config , disableMetricsAuth bool ) error {
259- var tlsConfig * tls.Config
260- if listenAddress != "" {
261- var err error
262- tlsConfig , err = makeTLSConfig (certFile , keyFile )
263- if err != nil {
264- return fmt .Errorf ("failed to create TLS config: %w" , err )
265- }
266- } else {
243+ if listenAddress == "" {
267244 return errors .New ("TLS configuration is required to serve metrics" )
268245 }
269246
270- client , err := authenticationclientsetv1 .NewForConfig (restConfig )
271- if err != nil {
272- return fmt .Errorf ("failed to create config: %w" , err )
273- }
274-
275- server := createHttpServer (runContext , client , disableMetricsAuth )
247+ // Prepare synchronization for to-be created go routines
248+ metricsContext , metricsContextCancel := context .WithCancel (runContext )
249+ defer metricsContextCancel ()
276250
277251 resultChannel := make (chan asyncResult , 1 )
278- resultChannelCount := 1
252+ resultChannelCount := 0
279253
280- go startListening (server , tlsConfig , listenAddress , resultChannel )
254+ // Create a dynamic serving cert/key controller to watch for serving certificate changes from files.
255+ servingCertController , err := dynamiccertificates .NewDynamicServingContentFromFiles ("metrics-serving-cert" , certFile , keyFile )
256+ if err != nil {
257+ return fmt .Errorf ("failed to create serving certificate controller: %w" , err )
258+ }
259+ if err := servingCertController .RunOnce (metricsContext ); err != nil {
260+ return fmt .Errorf ("failed to initialize serving content controller: %w" , err )
261+ }
281262
282- certDir := filepath .Dir (certFile )
283- keyDir := filepath .Dir (keyFile )
263+ // Start the serving cert controller to begin watching the cert and key files
264+ resultChannelCount ++
265+ go func () {
266+ servingCertController .Run (metricsContext , 1 )
267+ resultChannel <- asyncResult {name : "serving content controller" }
268+ }()
284269
285- origCertChecksum , err := checksumFile (certFile )
270+ // Create TLS config using the controllers. The config uses callbacks to dynamically
271+ // fetch the latest certificates and CA bundles on each connection, so no server
272+ // restart is needed when certificates change.
273+ tlsConfig , err := makeTLSConfig (servingCertController )
286274 if err != nil {
287- return fmt .Errorf ("failed to initialize certificate file checksum : %w" , err )
275+ return fmt .Errorf ("failed to create TLS config : %w" , err )
288276 }
289- origKeyChecksum , err := checksumFile (keyFile )
277+
278+ client , err := authenticationclientsetv1 .NewForConfig (restConfig )
290279 if err != nil {
291- return fmt .Errorf ("failed to initialize key file checksum : %w" , err )
280+ return fmt .Errorf ("failed to create config : %w" , err )
292281 }
293282
294- // Set up and start the file watcher.
295- watcher , err := fsnotify .NewWatcher ()
296- if watcher == nil || err != nil {
297- return fmt .Errorf ("failed to create file watcher for certificate and key rotation: %w" , err )
298- } else {
299- defer func () {
300- if err := watcher .Close (); err != nil {
301- klog .Errorf ("Failed to close file watcher: %v" , err )
302- }
303- }()
304- if err := watcher .Add (certDir ); err != nil {
305- return fmt .Errorf ("failed to add %v to watcher: %w" , certDir , err )
306- }
307- if certDir != keyDir {
308- if err := watcher .Add (keyDir ); err != nil {
309- return fmt .Errorf ("failed to add %v to watcher: %w" , keyDir , err )
310- }
311- }
312- }
283+ server := createHttpServer (metricsContext , client , disableMetricsAuth )
284+
285+ resultChannelCount ++
286+ go func () {
287+ startListening (server , tlsConfig , listenAddress , resultChannel )
288+ }()
313289
290+ // Wait for server to exit or shutdown signal
314291 shutdown := false
315- restartServer := false
316292 var loopError error
317293 for resultChannelCount > 0 {
294+ klog .Infof ("Waiting on %d outstanding goroutines." , resultChannelCount )
318295 if shutdown {
319296 select {
320297 case result := <- resultChannel :
@@ -326,63 +303,24 @@ func RunMetrics(runContext context.Context, shutdownContext context.Context, lis
326303 }
327304 } else {
328305 select {
329- case <- runContext .Done (): // clean shutdown
330- case result := <- resultChannel : // crashed before a shutdown was requested or metrics server recreated
331- if restartServer {
332- klog .Info ("Creating metrics server with updated TLS configuration." )
333- server = createHttpServer (runContext , client , disableMetricsAuth )
334- go startListening (server , tlsConfig , listenAddress , resultChannel )
335- restartServer = false
336- continue
337- }
306+ case <- metricsContext .Done ():
307+ klog .Infof ("Clean metrics shutdown requested: %v" , metricsContext .Err ())
308+ case result := <- resultChannel :
338309 resultChannelCount --
339310 loopError = handleServerResult (result , loopError )
340- case event := <- watcher .Events :
341- if event .Op != fsnotify .Chmod && event .Op != fsnotify .Remove {
342- if changed , err := certsChanged (origCertChecksum , origKeyChecksum , certFile , keyFile ); changed {
343-
344- // Update file checksums with latest files.
345- //
346- if origCertChecksum , err = checksumFile (certFile ); err != nil {
347- klog .Errorf ("Failed to update certificate file checksum: %v" , err )
348- loopError = err
349- break
350- }
351- if origKeyChecksum , err = checksumFile (keyFile ); err != nil {
352- klog .Errorf ("Failed to update key file checksum: %v" , err )
353- loopError = err
354- break
355- }
356-
357- tlsConfig , err = makeTLSConfig (certFile , keyFile )
358- if err == nil {
359- restartServer = true
360- shutdownHttpServer (shutdownContext , server )
361- continue
362- } else {
363- klog .Errorf ("Failed to create TLS configuration with updated configuration: %v" , err )
364- loopError = err
365- }
366- } else if err != nil {
367- klog .Errorf ("%v" , err )
368- loopError = err
369- } else {
370- continue
371- }
372- } else {
373- continue
374- }
375- case err = <- watcher .Errors :
376- klog .Errorf ("Error from metrics server certificate file watcher: %v" , err )
377- loopError = err
378311 }
379312 shutdown = true
380313 shutdownError := server .Shutdown (shutdownContext )
314+ if shutdownError != nil { // log the error we are discarding
315+ klog .Errorf ("Failed to gracefully shut down metrics server: %v" , shutdownError )
316+ }
317+
381318 if loopError == nil {
382319 loopError = shutdownError
383- } else if shutdownError != nil { // log the error we are discarding
384- klog .Errorf ("Failed to gracefully shut down metrics server: %v" , shutdownError )
385320 }
321+
322+ // Request remaining go routines to shut down
323+ metricsContextCancel ()
386324 }
387325 }
388326
@@ -722,95 +660,20 @@ func mostRecentTimestamp(cv *configv1.ClusterVersion) int64 {
722660 return latest .Unix ()
723661}
724662
725- // Determine if the certificates have changed and need to be updated.
726- // If no errors occur, returns true if both files have changed and
727- // neither is an empty file. Otherwise returns false and any error.
728- func certsChanged (origCertChecksum []byte , origKeyChecksum []byte , certFile , keyFile string ) (bool , error ) {
729- // Check if both files exist.
730- certNotEmpty , err := fileExistsAndNotEmpty (certFile )
663+ func makeTLSConfig (servingCertController dynamiccertificates.CertKeyContentProvider ) (* tls.Config , error ) {
664+ _ , err := tls .X509KeyPair (servingCertController .CurrentCertKeyContent ())
731665 if err != nil {
732- return false , fmt .Errorf ("Error checking if changed TLS cert file empty/exists : %w" , err )
666+ return nil , fmt .Errorf ("failed to create X509 key pair : %w" , err )
733667 }
734- keyNotEmpty , err := fileExistsAndNotEmpty (keyFile )
735- if err != nil {
736- return false , fmt .Errorf ("error checking if changed TLS key file empty/exists: %w" , err )
737- }
738- if ! certNotEmpty || ! keyNotEmpty {
739- // One of the files is missing despite some file event.
740- return false , fmt .Errorf ("certificate or key is missing or empty, certificates will not be rotated" )
741- }
742-
743- currentCertChecksum , err := checksumFile (certFile )
744- if err != nil {
745- return false , fmt .Errorf ("error checking certificate file checksum: %w" , err )
746- }
747-
748- currentKeyChecksum , err := checksumFile (keyFile )
749- if err != nil {
750- return false , fmt .Errorf ("error checking key file checksum: %w" , err )
751- }
752-
753- // Check if the non-empty certificate/key files have actually changed.
754- if ! bytes .Equal (origCertChecksum , currentCertChecksum ) && ! bytes .Equal (origKeyChecksum , currentKeyChecksum ) {
755- klog .V (2 ).Info ("Certificate and key changed. Will recreate metrics server with updated TLS configuration." )
756- return true , nil
757- }
758-
759- return false , nil
760- }
761-
762- func makeTLSConfig (servingCertFile , servingKeyFile string ) (* tls.Config , error ) {
763- // Load the initial certificate contents.
764- certBytes , err := os .ReadFile (servingCertFile )
765- if err != nil {
766- return nil , err
767- }
768- keyBytes , err := os .ReadFile (servingKeyFile )
769- if err != nil {
770- return nil , err
771- }
772- certificate , err := tls .X509KeyPair (certBytes , keyBytes )
773- if err != nil {
774- return nil , err
775- }
776-
777- return crypto .SecureTLSConfig (& tls.Config {
668+ tlsConfig := crypto .SecureTLSConfig (& tls.Config {
778669 GetCertificate : func (_ * tls.ClientHelloInfo ) (* tls.Certificate , error ) {
779- return & certificate , nil
670+ cert , err := tls .X509KeyPair (servingCertController .CurrentCertKeyContent ())
671+ if err != nil {
672+ klog .Errorf ("Failed to load current serving certificate, rejecting connection: %v" , err )
673+ return nil , fmt .Errorf ("invalid serving certificate: %w" , err )
674+ }
675+ return & cert , nil
780676 },
781- }), nil
782- }
783-
784- // Compute the sha256 checksum for file 'fName' returning any error.
785- func checksumFile (fName string ) ([]byte , error ) {
786- file , err := os .Open (fName )
787- if err != nil {
788- return nil , fmt .Errorf ("failed to open file %v for checksum: %w" , fName , err )
789- }
790- defer func () {
791- if err := file .Close (); err != nil {
792- klog .Errorf ("failed to close file %s: %v" , fName , err )
793- }
794- }()
795-
796- hash := sha256 .New ()
797-
798- if _ , err = io .Copy (hash , file ); err != nil {
799- return nil , fmt .Errorf ("failed to compute checksum for file %v: %w" , fName , err )
800- }
801-
802- return hash .Sum (nil ), nil
803- }
804-
805- // Check if a file exists and has file.Size() not equal to 0.
806- // Returns any error returned by os.Stat other than os.ErrNotExist.
807- func fileExistsAndNotEmpty (fName string ) (bool , error ) {
808- if fi , err := os .Stat (fName ); err == nil {
809- return (fi .Size () != 0 ), nil
810- } else if errors .Is (err , os .ErrNotExist ) {
811- return false , nil
812- } else {
813- // Some other error, file may not exist.
814- return false , err
815- }
677+ })
678+ return tlsConfig , nil
816679}
0 commit comments