@@ -298,9 +298,9 @@ func (cfg *Config) getCertDuringHandshake(ctx context.Context, hello *tls.Client
298298 // domain, avoid pounding manager or storage thousands of times simultaneously. We use a similar sync
299299 // strategy for obtaining certificate during handshake.
300300 certLoadWaitChansMu .Lock ()
301- wait , ok := certLoadWaitChans [name ]
301+ waiter , ok := certLoadWaitChans [name ]
302302 if ok {
303- // another goroutine is already loading the cert; just wait and we'll get it from the in-memory cache
303+ // another goroutine is already loading the cert; just wait
304304 certLoadWaitChansMu .Unlock ()
305305
306306 timeout := time .NewTimer (2 * time .Minute )
@@ -310,33 +310,44 @@ func (cfg *Config) getCertDuringHandshake(ctx context.Context, hello *tls.Client
310310 case <- ctx .Done ():
311311 timeout .Stop ()
312312 return Certificate {}, ctx .Err ()
313- case <- wait :
313+ case <- waiter . done :
314314 timeout .Stop ()
315315 }
316316
317- return cfg .getCertDuringHandshake (ctx , hello , false )
318- } else {
319- // no other goroutine is currently trying to load this cert
320- wait = make (chan struct {})
321- certLoadWaitChans [name ] = wait
322- certLoadWaitChansMu .Unlock ()
317+ // If the leader got a result from an external cert manager, use it
318+ // directly — these certs are not added to the cache, so a recursive
319+ // cache lookup would miss. For cached certs (on-demand, managed),
320+ // the waiter result will be empty and we fall through to the
321+ // original recursive lookup.
322+ if ! waiter .cert .Empty () || waiter .err != nil {
323+ return waiter .cert , waiter .err
324+ }
323325
324- // unblock others and clean up when we're done
325- defer func () {
326- certLoadWaitChansMu .Lock ()
327- close (wait )
328- delete (certLoadWaitChans , name )
329- certLoadWaitChansMu .Unlock ()
330- }()
326+ return cfg .getCertDuringHandshake (ctx , hello , false )
331327 }
332328
329+ // no other goroutine is currently trying to load this cert
330+ waiter = & certLoadWaiter {done : make (chan struct {})}
331+ certLoadWaitChans [name ] = waiter
332+ certLoadWaitChansMu .Unlock ()
333+
334+ // unblock others and clean up when we're done
335+ defer func () {
336+ certLoadWaitChansMu .Lock ()
337+ close (waiter .done )
338+ delete (certLoadWaitChans , name )
339+ certLoadWaitChansMu .Unlock ()
340+ }()
341+
333342 // If an external Manager is configured, try to get it from them.
334343 // Only continue to use our own logic if it returns empty+nil.
335344 externalCert , err := cfg .getCertFromAnyCertManager (ctx , hello , logger )
336345 if err != nil {
346+ waiter .err = err
337347 return Certificate {}, err
338348 }
339349 if ! externalCert .Empty () {
350+ waiter .cert = externalCert
340351 return externalCert , nil
341352 }
342353
@@ -946,9 +957,19 @@ var (
946957 obtainCertWaitChansMu sync.Mutex
947958)
948959
960+ // certLoadWaiter coordinates concurrent certificate loading for the same name.
961+ // The leader populates the result and closes the channel; waiters read the result
962+ // after the channel is closed. This allows externally-managed certificates (which
963+ // are not cached) to be shared directly with waiting goroutines.
964+ type certLoadWaiter struct {
965+ done chan struct {}
966+ cert Certificate
967+ err error
968+ }
969+
949970// TODO: this lockset should probably be per-cache
950971var (
951- certLoadWaitChans = make (map [string ]chan struct {} )
972+ certLoadWaitChans = make (map [string ]* certLoadWaiter )
952973 certLoadWaitChansMu sync.Mutex
953974)
954975
0 commit comments