File size: 8,705 Bytes
bd925df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
/**
 * Rate Limit State Management
 *
 * Shared utilities for rate limit tracking, backoff calculation,
 * and error classification. Used by both streaming and non-streaming handlers.
 */

import {
    RATE_LIMIT_DEDUP_WINDOW_MS,
    RATE_LIMIT_STATE_RESET_MS,
    FIRST_RETRY_DELAY_MS,
    BACKOFF_BY_ERROR_TYPE,
    QUOTA_EXHAUSTED_BACKOFF_TIERS_MS,
    MIN_BACKOFF_MS,
    CAPACITY_JITTER_MAX_MS
} from '../constants.js';
import { generateJitter } from '../utils/helpers.js';
import { logger } from '../utils/logger.js';
import { parseRateLimitReason } from './rate-limit-parser.js';

/**
 * Rate limit deduplication - prevents thundering herd on concurrent rate limits.
 * Tracks rate limit state per account+model including consecutive429 count and timestamps.
 *
 * This is a singleton Map shared across all handlers (streaming and non-streaming).
 */
const rateLimitStateByAccountModel = new Map(); // `${email}:${model}` -> { consecutive429, lastAt }

/**
 * Get deduplication key for rate limit tracking
 * @param {string} email - Account email
 * @param {string} model - Model ID
 * @returns {string} Dedup key
 */
export function getDedupKey(email, model) {
    return `${email}:${model}`;
}

/**
 * Get rate limit backoff with deduplication and exponential backoff (matches opencode-antigravity-auth)
 * @param {string} email - Account email
 * @param {string} model - Model ID
 * @param {number|null} serverRetryAfterMs - Server-provided retry time
 * @returns {{attempt: number, delayMs: number, isDuplicate: boolean}} Backoff info
 */
export function getRateLimitBackoff(email, model, serverRetryAfterMs) {
    const now = Date.now();
    const stateKey = getDedupKey(email, model);
    const previous = rateLimitStateByAccountModel.get(stateKey);

    // Check if within dedup window - return duplicate status
    if (previous && (now - previous.lastAt < RATE_LIMIT_DEDUP_WINDOW_MS)) {
        const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
        const backoffDelay = Math.min(baseDelay * Math.pow(2, previous.consecutive429 - 1), 60000);
        logger.debug(`[CloudCode] Rate limit on ${email}:${model} within dedup window, attempt=${previous.consecutive429}, isDuplicate=true`);
        return { attempt: previous.consecutive429, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: true };
    }

    // Determine attempt number - reset after RATE_LIMIT_STATE_RESET_MS of inactivity
    const attempt = previous && (now - previous.lastAt < RATE_LIMIT_STATE_RESET_MS)
        ? previous.consecutive429 + 1
        : 1;

    // Update state
    rateLimitStateByAccountModel.set(stateKey, { consecutive429: attempt, lastAt: now });

    // Calculate exponential backoff
    const baseDelay = serverRetryAfterMs ?? FIRST_RETRY_DELAY_MS;
    const backoffDelay = Math.min(baseDelay * Math.pow(2, attempt - 1), 60000);

    logger.debug(`[CloudCode] Rate limit backoff for ${email}:${model}: attempt=${attempt}, delayMs=${Math.max(baseDelay, backoffDelay)}`);
    return { attempt, delayMs: Math.max(baseDelay, backoffDelay), isDuplicate: false };
}

/**
 * Clear rate limit state after successful request
 * @param {string} email - Account email
 * @param {string} model - Model ID
 */
export function clearRateLimitState(email, model) {
    const key = getDedupKey(email, model);
    rateLimitStateByAccountModel.delete(key);
}

/**
 * Detect permanent authentication failures that require re-authentication.
 * These should mark the account as invalid rather than just clearing cache.
 * @param {string} errorText - Error message from API
 * @returns {boolean} True if permanent auth failure
 */
export function isPermanentAuthFailure(errorText) {
    const lower = (errorText || '').toLowerCase();
    return lower.includes('invalid_grant') ||
        lower.includes('token revoked') ||
        lower.includes('token has been expired or revoked') ||
        lower.includes('token_revoked') ||
        lower.includes('invalid_client') ||
        lower.includes('credentials are invalid');
}

/**
 * Detect if 403 error is due to VALIDATION_REQUIRED or PERMISSION_DENIED.
 * These are account-level errors that should trigger account rotation,
 * not just endpoint rotation. The account needs validation (e.g., captcha,
 * terms acceptance) which cannot be resolved by trying different endpoints.
 * @param {string} errorText - Error message from API
 * @returns {boolean} True if validation/permission error requiring account rotation
 */
export function isValidationRequired(errorText) {
    const lower = (errorText || '').toLowerCase();
    return lower.includes('validation_required') ||
        lower.includes('account_disabled') ||
        lower.includes('user_disabled');
}

/**
 * Extract the Google verification URL from an error message.
 * The 403 VALIDATION_REQUIRED error contains a URL the user must visit.
 * @param {string} errorText - Error message from the API
 * @returns {string|null} The verification URL, or null if not found
 */
export function extractVerificationUrl(errorText) {
    if (!errorText) return null;
    // Try structured JSON first — the 403 response often has details[].metadata.validation_url
    try {
        const parsed = JSON.parse(errorText);
        const details = parsed?.error?.details || [];
        for (const detail of details) {
            if (detail?.metadata?.validation_url) {
                return detail.metadata.validation_url;
            }
        }
    } catch {
        // Not valid JSON or no structured field — fall through to regex
    }
    // Fallback: regex match for verification URL in unstructured text
    const raw = errorText.match(/https:\/\/accounts\.google\.com\/signin\/continue\?[^\s"\\]+/);
    if (!raw) return null;
    return raw[0].replace(/[,.)}>\]]+$/, '');
}

/**
 * Detect if 403 error is due to a permanent account ban (ToS violation).
 * These accounts are permanently disabled by Google and cannot be recovered
 * by retrying or re-authenticating. User must contact Google support to appeal.
 * @param {string} errorText - Error message from API
 * @returns {boolean} True if account is permanently banned
 */
export function isAccountBanned(errorText) {
    const lower = (errorText || '').toLowerCase();
    return lower.includes('has been disabled') && lower.includes('violation of terms of service');
}

/**
 * Detect if 429 error is due to model capacity (not user quota).
 * Capacity issues should retry on same account with shorter delay.
 * @param {string} errorText - Error message from API
 * @returns {boolean} True if capacity exhausted (not quota)
 */
export function isModelCapacityExhausted(errorText) {
    const lower = (errorText || '').toLowerCase();
    return lower.includes('model_capacity_exhausted') ||
        lower.includes('capacity_exhausted') ||
        lower.includes('model is currently overloaded') ||
        lower.includes('service temporarily unavailable');
}

/**
 * Calculate smart backoff based on error type (matches opencode-antigravity-auth)
 * @param {string} errorText - Error message
 * @param {number|null} serverResetMs - Reset time from server
 * @param {number} consecutiveFailures - Number of consecutive failures
 * @returns {number} Backoff time in milliseconds
 */
export function calculateSmartBackoff(errorText, serverResetMs, consecutiveFailures = 0) {
    // If server provides a reset time, use it (with minimum floor to prevent loops)
    if (serverResetMs && serverResetMs > 0) {
        return Math.max(serverResetMs, MIN_BACKOFF_MS);
    }

    const reason = parseRateLimitReason(errorText);

    switch (reason) {
        case 'QUOTA_EXHAUSTED':
            // Progressive backoff: [60s, 5m, 30m, 2h]
            const tierIndex = Math.min(consecutiveFailures, QUOTA_EXHAUSTED_BACKOFF_TIERS_MS.length - 1);
            return QUOTA_EXHAUSTED_BACKOFF_TIERS_MS[tierIndex];
        case 'RATE_LIMIT_EXCEEDED':
            return BACKOFF_BY_ERROR_TYPE.RATE_LIMIT_EXCEEDED;
        case 'MODEL_CAPACITY_EXHAUSTED':
            // Apply jitter to prevent thundering herd - clients retry at staggered times
            return BACKOFF_BY_ERROR_TYPE.MODEL_CAPACITY_EXHAUSTED + generateJitter(CAPACITY_JITTER_MAX_MS);
        case 'SERVER_ERROR':
            return BACKOFF_BY_ERROR_TYPE.SERVER_ERROR;
        default:
            return BACKOFF_BY_ERROR_TYPE.UNKNOWN;
    }
}

// Periodically clean up stale rate limit state (every 60 seconds)
setInterval(() => {
    const cutoff = Date.now() - RATE_LIMIT_STATE_RESET_MS;
    for (const [key, state] of rateLimitStateByAccountModel.entries()) {
        if (state.lastAt < cutoff) {
            rateLimitStateByAccountModel.delete(key);
        }
    }
}, 60000);