feat(auth): implement auto-refresh loop for managing auth token schedule

- Introduced `authAutoRefreshLoop` to handle token refresh scheduling.
- Replaced semaphore-based refresh logic in `Manager` with the new loop.
- Added unit tests to verify refresh schedule logic and edge cases.
This commit is contained in:
Luis Pater
2026-04-12 02:06:40 +08:00
parent 0ab1f5412f
commit a583463d60
3 changed files with 636 additions and 64 deletions
+55 -64
View File
@@ -162,8 +162,8 @@ type Manager struct {
rtProvider RoundTripperProvider
// Auto refresh state
refreshCancel context.CancelFunc
refreshSemaphore chan struct{}
refreshCancel context.CancelFunc
refreshLoop *authAutoRefreshLoop
}
// NewManager constructs a manager with optional custom selector and hook.
@@ -182,7 +182,6 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager {
auths: make(map[string]*Auth),
providerOffsets: make(map[string]int),
modelPoolOffsets: make(map[string]int),
refreshSemaphore: make(chan struct{}, refreshMaxConcurrency),
}
// atomic.Value requires non-nil initial value.
manager.runtimeConfig.Store(&internalconfig.Config{})
@@ -214,6 +213,16 @@ func (m *Manager) syncScheduler() {
m.syncSchedulerFromSnapshot(m.snapshotAuths())
}
func (m *Manager) snapshotAuths() []*Auth {
m.mu.RLock()
defer m.mu.RUnlock()
out := make([]*Auth, 0, len(m.auths))
for _, a := range m.auths {
out = append(out, a.Clone())
}
return out
}
// RefreshSchedulerEntry re-upserts a single auth into the scheduler so that its
// supportedModelSet is rebuilt from the current global model registry state.
// This must be called after models have been registered for a newly added auth,
@@ -1088,6 +1097,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) {
if m.scheduler != nil {
m.scheduler.upsertAuth(authClone)
}
m.queueRefreshReschedule(auth.ID)
_ = m.persist(ctx, auth)
m.hook.OnAuthRegistered(ctx, auth.Clone())
return auth.Clone(), nil
@@ -1118,6 +1128,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
if m.scheduler != nil {
m.scheduler.upsertAuth(authClone)
}
m.queueRefreshReschedule(auth.ID)
_ = m.persist(ctx, auth)
m.hook.OnAuthUpdated(ctx, auth.Clone())
return auth.Clone(), nil
@@ -2890,80 +2901,51 @@ func (m *Manager) StartAutoRefresh(parent context.Context, interval time.Duratio
if interval <= 0 {
interval = refreshCheckInterval
}
if m.refreshCancel != nil {
m.refreshCancel()
m.refreshCancel = nil
m.mu.Lock()
cancel := m.refreshCancel
m.refreshCancel = nil
m.refreshLoop = nil
m.mu.Unlock()
if cancel != nil {
cancel()
}
ctx, cancel := context.WithCancel(parent)
loop := newAuthAutoRefreshLoop(m, interval)
m.mu.Lock()
m.refreshCancel = cancel
go func() {
ticker := time.NewTicker(interval)
defer ticker.Stop()
m.checkRefreshes(ctx)
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
m.checkRefreshes(ctx)
}
}
}()
m.refreshLoop = loop
m.mu.Unlock()
loop.rebuild(time.Now())
go loop.run(ctx)
}
// StopAutoRefresh cancels the background refresh loop, if running.
func (m *Manager) StopAutoRefresh() {
if m.refreshCancel != nil {
m.refreshCancel()
m.refreshCancel = nil
m.mu.Lock()
cancel := m.refreshCancel
m.refreshCancel = nil
m.refreshLoop = nil
m.mu.Unlock()
if cancel != nil {
cancel()
}
}
func (m *Manager) checkRefreshes(ctx context.Context) {
// log.Debugf("checking refreshes")
now := time.Now()
snapshot := m.snapshotAuths()
for _, a := range snapshot {
typ, _ := a.AccountInfo()
if typ != "api_key" {
if !m.shouldRefresh(a, now) {
continue
}
log.Debugf("checking refresh for %s, %s, %s", a.Provider, a.ID, typ)
if exec := m.executorFor(a.Provider); exec == nil {
continue
}
if !m.markRefreshPending(a.ID, now) {
continue
}
go m.refreshAuthWithLimit(ctx, a.ID)
}
}
}
func (m *Manager) refreshAuthWithLimit(ctx context.Context, id string) {
if m.refreshSemaphore == nil {
m.refreshAuth(ctx, id)
func (m *Manager) queueRefreshReschedule(authID string) {
if m == nil || authID == "" {
return
}
select {
case m.refreshSemaphore <- struct{}{}:
defer func() { <-m.refreshSemaphore }()
case <-ctx.Done():
return
}
m.refreshAuth(ctx, id)
}
func (m *Manager) snapshotAuths() []*Auth {
m.mu.RLock()
defer m.mu.RUnlock()
out := make([]*Auth, 0, len(m.auths))
for _, a := range m.auths {
out = append(out, a.Clone())
loop := m.refreshLoop
m.mu.RUnlock()
if loop == nil {
return
}
return out
loop.queueReschedule(authID)
}
func (m *Manager) shouldRefresh(a *Auth, now time.Time) bool {
@@ -3173,16 +3155,20 @@ func lookupMetadataTime(meta map[string]any, keys ...string) (time.Time, bool) {
func (m *Manager) markRefreshPending(id string, now time.Time) bool {
m.mu.Lock()
defer m.mu.Unlock()
auth, ok := m.auths[id]
if !ok || auth == nil || auth.Disabled {
m.mu.Unlock()
return false
}
if !auth.NextRefreshAfter.IsZero() && now.Before(auth.NextRefreshAfter) {
m.mu.Unlock()
return false
}
auth.NextRefreshAfter = now.Add(refreshPendingBackoff)
m.auths[id] = auth
m.mu.Unlock()
m.queueRefreshReschedule(id)
return true
}
@@ -3209,16 +3195,21 @@ func (m *Manager) refreshAuth(ctx context.Context, id string) {
log.Debugf("refreshed %s, %s, %v", auth.Provider, auth.ID, err)
now := time.Now()
if err != nil {
shouldReschedule := false
m.mu.Lock()
if current := m.auths[id]; current != nil {
current.NextRefreshAfter = now.Add(refreshFailureBackoff)
current.LastError = &Error{Message: err.Error()}
m.auths[id] = current
shouldReschedule = true
if m.scheduler != nil {
m.scheduler.upsertAuth(current.Clone())
}
}
m.mu.Unlock()
if shouldReschedule {
m.queueRefreshReschedule(id)
}
return
}
if updated == nil {