Spaces:

Yash030
/

antigravity-proxy

Running

App Files Files Community

antigravity-proxy / src /server.js

Yash030's picture

Initial Commit

bd925df 6 days ago

history blame contribute delete

35.9 kB

	/**
	* Express Server - Anthropic-compatible API
	* Proxies to Google Cloud Code via Antigravity
	* Supports multi-account load balancing
	*/

	import express from 'express';
	import cors from 'cors';
	import path from 'path';
	import { fileURLToPath } from 'url';
	import { sendMessage, sendMessageStream, listModels, getModelQuotas, getSubscriptionTier, isValidModel } from './cloudcode/index.js';
	import { mountWebUI } from './webui/index.js';
	import { config } from './config.js';

	const __filename = fileURLToPath(import.meta.url);
	const __dirname = path.dirname(__filename);
	import { forceRefresh } from './auth/token-extractor.js';
	import { REQUEST_BODY_LIMIT } from './constants.js';
	import { AccountManager } from './account-manager/index.js';
	import { clearThinkingSignatureCache } from './format/signature-cache.js';
	import { formatDuration } from './utils/helpers.js';
	import { logger } from './utils/logger.js';
	import usageStats from './modules/usage-stats.js';

	// Parse fallback flag directly from command line args to avoid circular dependency
	const args = process.argv.slice(2);
	const FALLBACK_ENABLED = args.includes('--fallback') \|\| process.env.FALLBACK === 'true';

	// Parse --strategy flag (format: --strategy=sticky or --strategy sticky)
	let STRATEGY_OVERRIDE = null;
	for (let i = 0; i < args.length; i++) {
	if (args[i].startsWith('--strategy=')) {
	STRATEGY_OVERRIDE = args[i].split('=')[1];
	} else if (args[i] === '--strategy' && args[i + 1]) {
	STRATEGY_OVERRIDE = args[i + 1];
	}
	}

	const app = express();

	// Disable x-powered-by header for security
	app.disable('x-powered-by');

	// Initialize account manager (will be fully initialized on first request or startup)
	export const accountManager = new AccountManager();

	// Track initialization status
	let isInitialized = false;
	let initError = null;
	let initPromise = null;

	/**
	* Ensure account manager is initialized (with race condition protection)
	*/
	async function ensureInitialized() {
	if (isInitialized) return;

	// If initialization is already in progress, wait for it
	if (initPromise) return initPromise;

	initPromise = (async () => {
	try {
	await accountManager.initialize(STRATEGY_OVERRIDE);
	isInitialized = true;
	const status = accountManager.getStatus();
	logger.success(`[Server] Account pool initialized: ${status.summary}`);
	} catch (error) {
	initError = error;
	initPromise = null; // Allow retry on failure
	logger.error('[Server] Failed to initialize account manager:', error.message);
	throw error;
	}
	})();

	return initPromise;
	}

	// Middleware
	app.use(cors());
	app.use(express.json({ limit: REQUEST_BODY_LIMIT }));

	// API Key authentication middleware for /v1/* endpoints
	app.use('/v1', (req, res, next) => {
	// Skip validation if apiKey is not configured
	if (!config.apiKey) {
	return next();
	}

	const authHeader = req.headers['authorization'];
	const xApiKey = req.headers['x-api-key'];

	let providedKey = '';
	if (authHeader && authHeader.startsWith('Bearer ')) {
	providedKey = authHeader.substring(7);
	} else if (xApiKey) {
	providedKey = xApiKey;
	}

	if (!providedKey \|\| providedKey !== config.apiKey) {
	logger.warn(`[API] Unauthorized request from ${req.ip}, invalid API key`);
	return res.status(401).json({
	type: 'error',
	error: {
	type: 'authentication_error',
	message: 'Invalid or missing API key'
	}
	});
	}

	next();
	});

	// Setup usage statistics middleware
	usageStats.setupMiddleware(app);

	/**
	* Silent handler for Claude Code CLI root POST requests
	* Claude Code sends heartbeat/event requests to POST / which we don't need
	* Using app.use instead of app.post for earlier middleware interception
	*/
	app.use((req, res, next) => {
	// Handle Claude Code event logging requests silently
	if (req.method === 'POST' && req.path === '/api/event_logging/batch') {
	return res.status(200).json({ status: 'ok' });
	}
	// Handle Claude Code root POST requests silently
	if (req.method === 'POST' && req.path === '/') {
	return res.status(200).json({ status: 'ok' });
	}
	next();
	});

	// Mount WebUI (optional web interface for account management)
	mountWebUI(app, __dirname, accountManager);

	/**
	* Parse error message to extract error type, status code, and user-friendly message
	*/
	function parseError(error) {
	let errorType = 'api_error';
	let statusCode = 500;
	let errorMessage = error.message;

	if (error.message.includes('401') \|\| error.message.includes('UNAUTHENTICATED')) {
	errorType = 'authentication_error';
	statusCode = 401;
	errorMessage = 'Authentication failed. Make sure Antigravity is running with a valid token.';
	} else if (error.message.includes('429') \|\| error.message.includes('RESOURCE_EXHAUSTED') \|\| error.message.includes('QUOTA_EXHAUSTED')) {
	errorType = 'invalid_request_error'; // Use invalid_request_error to force client to purge/stop
	statusCode = 400; // Use 400 to ensure client does not retry (429 and 529 trigger retries)

	// Try to extract the quota reset time from the error
	const resetMatch = error.message.match(/quota will reset after ([\dh\dm\ds]+)/i);
	// Try to extract model from our error format "Rate limited on <model>" or JSON format
	const modelMatch = error.message.match(/Rate limited on ([^.]+)\./) \|\| error.message.match(/"model":\s*"([^"]+)"/);
	const model = modelMatch ? modelMatch[1] : 'the model';

	if (resetMatch) {
	errorMessage = `RESOURCE_EXHAUSTED: You have exhausted your capacity on ${model}. Quota will reset after ${resetMatch[1]}.`;
	} else {
	errorMessage = `RESOURCE_EXHAUSTED: You have exhausted your capacity on ${model}. Please wait for your quota to reset.`;
	}
	} else if (error.message.includes('invalid_request_error') \|\| error.message.includes('INVALID_ARGUMENT')) {
	errorType = 'invalid_request_error';
	statusCode = 400;
	const msgMatch = error.message.match(/"message":"([^"]+)"/);
	if (msgMatch) errorMessage = msgMatch[1];
	} else if (error.message.includes('All endpoints failed')) {
	errorType = 'api_error';
	statusCode = 503;
	errorMessage = 'Unable to connect to Claude API. Check that Antigravity is running.';
	} else if (error.message.includes('PERMISSION_DENIED')) {
	errorType = 'permission_error';
	statusCode = 403;
	errorMessage = errorMessage;
	}

	return { errorType, statusCode, errorMessage };
	}

	// Request logging middleware
	app.use((req, res, next) => {
	const start = Date.now();

	// Log response on finish
	res.on('finish', () => {
	const duration = Date.now() - start;
	const status = res.statusCode;
	const logMsg = `[${req.method}] ${req.originalUrl} ${status} (${duration}ms)`;

	// Skip standard logging for event logging batch unless in debug mode
	if (req.originalUrl === '/api/event_logging/batch' \|\| req.originalUrl.startsWith('/v1/messages/count_tokens') \|\| req.originalUrl.startsWith('/.well-known/')) {
	if (logger.isDebugEnabled) {
	logger.debug(logMsg);
	}
	} else {
	// Colorize status code
	if (status >= 500) {
	logger.error(logMsg);
	} else if (status >= 400) {
	logger.warn(logMsg);
	} else {
	logger.info(logMsg);
	}
	}
	});

	next();
	});

	/**
	* Silent handler for Claude Code CLI root POST requests
	* Claude Code sends heartbeat/event requests to POST / which we don't need
	*/
	app.post('/', (req, res) => {
	res.status(200).json({ status: 'ok' });
	});

	/**
	* Test endpoint - Clear thinking signature cache
	* Used for testing cold cache scenarios in cross-model tests
	*/
	app.post('/test/clear-signature-cache', (req, res) => {
	clearThinkingSignatureCache();
	logger.debug('[Test] Cleared thinking signature cache');
	res.json({ success: true, message: 'Thinking signature cache cleared' });
	});

	/**
	* Health check endpoint - Detailed status
	* Returns status of all accounts including rate limits and model quotas
	*/
	app.get('/health', async (req, res) => {
	try {
	await ensureInitialized();
	const start = Date.now();

	// Get high-level status first
	const status = accountManager.getStatus();
	const allAccounts = accountManager.getAllAccounts();

	// Fetch quotas for each account in parallel to get detailed model info
	const accountDetails = await Promise.allSettled(
	allAccounts.map(async (account) => {
	// Check model-specific rate limits
	const activeModelLimits = Object.entries(account.modelRateLimits \|\| {})
	.filter(([_, limit]) => limit.isRateLimited && limit.resetTime > Date.now());
	const isRateLimited = activeModelLimits.length > 0;
	const soonestReset = activeModelLimits.length > 0
	? Math.min(...activeModelLimits.map(([_, l]) => l.resetTime))
	: null;

	const baseInfo = {
	email: account.email,
	lastUsed: account.lastUsed ? new Date(account.lastUsed).toISOString() : null,
	modelRateLimits: account.modelRateLimits \|\| {},
	rateLimitCooldownRemaining: soonestReset ? Math.max(0, soonestReset - Date.now()) : 0
	};

	// Skip invalid accounts for quota check
	if (account.isInvalid) {
	const isBanned = account.invalidReason?.toLowerCase().includes('banned') \|\|
	account.invalidReason?.toLowerCase().includes('terms of service');
	return {
	...baseInfo,
	status: isBanned ? 'banned' : 'invalid',
	error: account.invalidReason,
	models: {}
	};
	}

	try {
	const token = await accountManager.getTokenForAccount(account);
	const projectId = account.subscription?.projectId \|\| null;
	const quotas = await getModelQuotas(token, projectId);

	// Format quotas for readability
	const formattedQuotas = {};
	for (const [modelId, info] of Object.entries(quotas)) {
	formattedQuotas[modelId] = {
	remaining: info.remainingFraction !== null ? `${Math.round(info.remainingFraction * 100)}%` : 'N/A',
	remainingFraction: info.remainingFraction,
	resetTime: info.resetTime \|\| null
	};
	}

	return {
	...baseInfo,
	status: isRateLimited ? 'rate-limited' : 'ok',
	models: formattedQuotas
	};
	} catch (error) {
	return {
	...baseInfo,
	status: 'error',
	error: error.message,
	models: {}
	};
	}
	})
	);

	// Process results
	const detailedAccounts = accountDetails.map((result, index) => {
	if (result.status === 'fulfilled') {
	return result.value;
	} else {
	const acc = allAccounts[index];
	return {
	email: acc.email,
	status: 'error',
	error: result.reason?.message \|\| 'Unknown error',
	modelRateLimits: acc.modelRateLimits \|\| {}
	};
	}
	});

	res.json({
	status: 'ok',
	timestamp: new Date().toISOString(),
	latencyMs: Date.now() - start,
	summary: status.summary,
	counts: {
	total: status.total,
	available: status.available,
	rateLimited: status.rateLimited,
	invalid: status.invalid
	},
	accounts: detailedAccounts
	});

	} catch (error) {
	logger.error('[API] Health check failed:', error);
	res.status(503).json({
	status: 'error',
	error: error.message,
	timestamp: new Date().toISOString()
	});
	}
	});

	/**
	* Account limits endpoint - fetch quota/limits for all accounts × all models
	* Returns a table showing remaining quota and reset time for each combination
	* Use ?format=table for ASCII table output, default is JSON
	*/
	app.get('/account-limits', async (req, res) => {
	try {
	await ensureInitialized();
	const allAccounts = accountManager.getAllAccounts();
	const format = req.query.format \|\| 'json';
	const includeHistory = req.query.includeHistory === 'true';

	// Fetch quotas for each account in parallel
	const results = await Promise.allSettled(
	allAccounts.map(async (account) => {
	// Skip invalid accounts
	if (account.isInvalid) {
	return {
	email: account.email,
	status: 'invalid',
	error: account.invalidReason,
	models: {}
	};
	}

	try {
	const token = await accountManager.getTokenForAccount(account);

	// Fetch subscription tier first to get project ID
	const subscription = await getSubscriptionTier(token);

	// Then fetch quotas with project ID for accurate quota info
	const quotas = await getModelQuotas(token, subscription.projectId);

	// Update account object with fresh data
	account.subscription = {
	tier: subscription.tier,
	projectId: subscription.projectId,
	detectedAt: Date.now()
	};
	account.quota = {
	models: quotas,
	lastChecked: Date.now()
	};

	// Save updated account data to disk (async, don't wait)
	accountManager.saveToDisk().catch(err => {
	logger.error('[Server] Failed to save account data:', err);
	});

	return {
	email: account.email,
	status: 'ok',
	subscription: account.subscription,
	models: quotas
	};
	} catch (error) {
	// Detect ToS ban from quota/subscription fetch and mark account invalid
	if (error.message?.startsWith('ACCOUNT_BANNED:')) {
	accountManager.markInvalid(account.email, 'Account banned — Gemini disabled for Terms of Service violation');
	return {
	email: account.email,
	status: 'banned',
	error: 'Account banned — Gemini disabled for Terms of Service violation',
	subscription: account.subscription \|\| { tier: 'unknown', projectId: null },
	models: {}
	};
	}
	return {
	email: account.email,
	status: 'error',
	error: error.message,
	subscription: account.subscription \|\| { tier: 'unknown', projectId: null },
	models: {}
	};
	}
	})
	);

	// Process results
	const accountLimits = results.map((result, index) => {
	if (result.status === 'fulfilled') {
	return result.value;
	} else {
	return {
	email: allAccounts[index].email,
	status: 'error',
	error: result.reason?.message \|\| 'Unknown error',
	models: {}
	};
	}
	});

	// Collect all unique model IDs
	const allModelIds = new Set();
	for (const account of accountLimits) {
	for (const modelId of Object.keys(account.models \|\| {})) {
	allModelIds.add(modelId);
	}
	}

	const sortedModels = Array.from(allModelIds).sort();

	// Return ASCII table format
	if (format === 'table') {
	res.setHeader('Content-Type', 'text/plain; charset=utf-8');

	// Build table
	const lines = [];
	const timestamp = new Date().toLocaleString();
	lines.push(`Account Limits (${timestamp})`);

	// Get account status info
	const status = accountManager.getStatus();
	lines.push(`Accounts: ${status.total} total, ${status.available} available, ${status.rateLimited} rate-limited, ${status.invalid} invalid`);
	lines.push('');

	// Table 1: Account status
	const accColWidth = 25;
	const statusColWidth = 15;
	const lastUsedColWidth = 25;
	const resetColWidth = 25;

	let accHeader = 'Account'.padEnd(accColWidth) + 'Status'.padEnd(statusColWidth) + 'Last Used'.padEnd(lastUsedColWidth) + 'Quota Reset';
	lines.push(accHeader);
	lines.push('─'.repeat(accColWidth + statusColWidth + lastUsedColWidth + resetColWidth));

	for (const acc of status.accounts) {
	const shortEmail = acc.email.split('@')[0].slice(0, 22);
	const lastUsed = acc.lastUsed ? new Date(acc.lastUsed).toLocaleString() : 'never';

	// Get status and error from accountLimits
	const accLimit = accountLimits.find(a => a.email === acc.email);
	let accStatus;
	if (acc.isInvalid) {
	accStatus = 'invalid';
	} else if (accLimit?.status === 'error') {
	accStatus = 'error';
	} else {
	// Count exhausted models (0% or null remaining)
	const models = accLimit?.models \|\| {};
	const modelCount = Object.keys(models).length;
	const exhaustedCount = Object.values(models).filter(
	q => q.remainingFraction === 0 \|\| q.remainingFraction === null
	).length;

	if (exhaustedCount === 0) {
	accStatus = 'ok';
	} else {
	accStatus = `(${exhaustedCount}/${modelCount}) limited`;
	}
	}

	// Get reset time from quota API
	const claudeModel = sortedModels.find(m => m.includes('claude'));
	const quota = claudeModel && accLimit?.models?.[claudeModel];
	const resetTime = quota?.resetTime
	? new Date(quota.resetTime).toLocaleString()
	: '-';

	let row = shortEmail.padEnd(accColWidth) + accStatus.padEnd(statusColWidth) + lastUsed.padEnd(lastUsedColWidth) + resetTime;

	// Add error on next line if present
	if (accLimit?.error) {
	lines.push(row);
	lines.push(' └─ ' + accLimit.error);
	} else {
	lines.push(row);
	}
	}
	lines.push('');

	// Calculate column widths - need more space for reset time info
	const modelColWidth = Math.max(28, ...sortedModels.map(m => m.length)) + 2;
	const accountColWidth = 30;

	// Header row
	let header = 'Model'.padEnd(modelColWidth);
	for (const acc of accountLimits) {
	const shortEmail = acc.email.split('@')[0].slice(0, 26);
	header += shortEmail.padEnd(accountColWidth);
	}
	lines.push(header);
	lines.push('─'.repeat(modelColWidth + accountLimits.length * accountColWidth));

	// Data rows
	for (const modelId of sortedModels) {
	let row = modelId.padEnd(modelColWidth);
	for (const acc of accountLimits) {
	const quota = acc.models?.[modelId];
	let cell;
	if (acc.status !== 'ok' && acc.status !== 'rate-limited') {
	cell = `[${acc.status}]`;
	} else if (!quota) {
	cell = '-';
	} else if (quota.remainingFraction === 0 \|\| quota.remainingFraction === null) {
	// Show reset time for exhausted models
	if (quota.resetTime) {
	const resetMs = new Date(quota.resetTime).getTime() - Date.now();
	if (resetMs > 0) {
	cell = `0% (wait ${formatDuration(resetMs)})`;
	} else {
	cell = '0% (resetting...)';
	}
	} else {
	cell = '0% (exhausted)';
	}
	} else {
	const pct = Math.round(quota.remainingFraction * 100);
	cell = `${pct}%`;
	}
	row += cell.padEnd(accountColWidth);
	}
	lines.push(row);
	}

	return res.send(lines.join('\n'));
	}

	// Get account metadata from AccountManager
	const accountStatus = accountManager.getStatus();
	const accountMetadataMap = new Map(
	accountStatus.accounts.map(a => [a.email, a])
	);

	// Build response data
	const responseData = {
	timestamp: new Date().toLocaleString(),
	totalAccounts: allAccounts.length,
	models: sortedModels,
	modelConfig: config.modelMapping \|\| {},
	globalQuotaThreshold: config.globalQuotaThreshold \|\| 0,
	accounts: accountLimits.map(acc => {
	// Merge quota data with account metadata
	const metadata = accountMetadataMap.get(acc.email) \|\| {};
	return {
	email: acc.email,
	status: acc.status,
	error: acc.error \|\| null,
	// Include metadata from AccountManager (WebUI needs these)
	source: metadata.source \|\| 'unknown',
	enabled: metadata.enabled !== false,
	projectId: metadata.projectId \|\| null,
	isInvalid: metadata.isInvalid \|\| false,
	invalidReason: metadata.invalidReason \|\| null,
	verifyUrl: metadata.verifyUrl \|\| null,
	lastUsed: metadata.lastUsed \|\| null,
	modelRateLimits: metadata.modelRateLimits \|\| {},
	// Quota threshold settings
	quotaThreshold: metadata.quotaThreshold,
	modelQuotaThresholds: metadata.modelQuotaThresholds \|\| {},
	// Subscription data (new)
	subscription: acc.subscription \|\| metadata.subscription \|\| { tier: 'unknown', projectId: null },
	// Quota limits
	limits: Object.fromEntries(
	sortedModels.map(modelId => {
	const quota = acc.models?.[modelId];
	if (!quota) {
	return [modelId, null];
	}
	return [modelId, {
	remaining: quota.remainingFraction !== null
	? `${Math.round(quota.remainingFraction * 100)}%`
	: 'N/A',
	remainingFraction: quota.remainingFraction,
	resetTime: quota.resetTime \|\| null
	}];
	})
	)
	};
	})
	};

	// Optionally include usage history (for dashboard performance optimization)
	if (includeHistory) {
	responseData.history = usageStats.getHistory();
	}

	res.json(responseData);
	} catch (error) {
	res.status(500).json({
	status: 'error',
	error: error.message
	});
	}
	});

	/**
	* Force token refresh endpoint
	*/
	app.post('/refresh-token', async (req, res) => {
	try {
	await ensureInitialized();
	// Clear all caches
	accountManager.clearTokenCache();
	accountManager.clearProjectCache();
	// Force refresh default token
	const token = await forceRefresh();
	res.json({
	status: 'ok',
	message: 'Token caches cleared and refreshed',
	tokenPrefix: token.substring(0, 10) + '...'
	});
	} catch (error) {
	res.status(500).json({
	status: 'error',
	error: error.message
	});
	}
	});

	/**
	* List models endpoint (OpenAI-compatible format)
	*/
	app.get('/v1/models', async (req, res) => {
	try {
	await ensureInitialized();
	const { account } = accountManager.selectAccount();
	if (!account) {
	return res.status(503).json({
	type: 'error',
	error: {
	type: 'api_error',
	message: 'No accounts available'
	}
	});
	}
	const token = await accountManager.getTokenForAccount(account);
	const models = await listModels(token);
	res.json(models);
	} catch (error) {
	logger.error('[API] Error listing models:', error);
	res.status(500).json({
	type: 'error',
	error: {
	type: 'api_error',
	message: error.message
	}
	});
	}
	});

	/**
	* Count tokens endpoint - Anthropic Messages API compatible
	* Uses local tokenization with official tokenizers (@anthropic-ai/tokenizer for Claude, @lenml/tokenizer-gemini for Gemini)
	*/
	app.post('/v1/messages/count_tokens', (req, res) => {
	res.status(501).json({
	type: 'error',
	error: {
	type: 'not_implemented',
	message: 'Token counting is not implemented. Use /v1/messages with max_tokens or configure your client to skip token counting.'
	}
	});
	});

	/**
	* Main messages endpoint - Anthropic Messages API compatible
	*/


	/**
	* Anthropic-compatible Messages API
	* POST /v1/messages
	*/
	app.post('/v1/messages', async (req, res) => {
	try {
	// Ensure account manager is initialized
	await ensureInitialized();

	const {
	model,
	messages,
	stream,
	system,
	max_tokens,
	tools,
	tool_choice,
	thinking,
	top_p,
	top_k,
	temperature
	} = req.body;

	// Resolve model mapping if configured
	let requestedModel = model \|\| 'claude-3-5-sonnet-20241022';
	const modelMapping = config.modelMapping \|\| {};
	if (modelMapping[requestedModel] && modelMapping[requestedModel].mapping) {
	const targetModel = modelMapping[requestedModel].mapping;
	logger.info(`[Server] Mapping model ${requestedModel} -> ${targetModel}`);
	requestedModel = targetModel;
	}

	const modelId = requestedModel;

	// Validate model ID before processing
	const { account: validationAccount } = accountManager.selectAccount();
	if (validationAccount) {
	const token = await accountManager.getTokenForAccount(validationAccount);
	const projectId = validationAccount.subscription?.projectId \|\| null;
	const valid = await isValidModel(modelId, token, projectId);

	if (!valid) {
	throw new Error(`invalid_request_error: Invalid model: ${modelId}. Use /v1/models to see available models.`);
	}
	}

	// Optimistic Retry: If ALL accounts are rate-limited for this model, reset them to force a fresh check.
	// If we have some available accounts, we try them first.
	if (accountManager.isAllRateLimited(modelId)) {
	logger.warn(`[Server] All accounts rate-limited for ${modelId}. Resetting state for optimistic retry.`);
	accountManager.resetAllRateLimits();
	}

	// Validate required fields
	if (!messages \|\| !Array.isArray(messages)) {
	return res.status(400).json({
	type: 'error',
	error: {
	type: 'invalid_request_error',
	message: 'messages is required and must be an array'
	}
	});
	}

	// Filter out "count" requests (often automated background checks)
	if (messages.length === 1 && messages[0].content === 'count') {
	return res.json({});
	}

	// Build the request object
	const request = {
	model: modelId,
	messages,
	max_tokens: max_tokens \|\| 4096,
	stream,
	system,
	tools,
	tool_choice,
	thinking,
	top_p,
	top_k,
	temperature
	};

	logger.info(`[API] Request for model: ${request.model}, stream: ${!!stream}`);

	// Debug: Log message structure to diagnose tool_use/tool_result ordering
	if (logger.isDebugEnabled) {
	logger.debug('[API] Message structure:');
	messages.forEach((msg, i) => {
	const contentTypes = Array.isArray(msg.content)
	? msg.content.map(c => c.type \|\| 'text').join(', ')
	: (typeof msg.content === 'string' ? 'text' : 'unknown');
	logger.debug(` [${i}] ${msg.role}: ${contentTypes}`);
	});
	}

	if (stream) {
	// Handle streaming response
	// Do NOT flush headers immediately. We need to wait for the first chunk
	// to ensure we don't send a 200 OK if the upstream fails immediately (e.g. 429/503).

	try {
	// Initialize the generator
	const generator = sendMessageStream(request, accountManager, FALLBACK_ENABLED);

	// BUFFERING STRATEGY:
	// Pull the first event before sending headers.
	// If this throws, we can safely send a 4xx/5xx error JSON.
	const firstResult = await generator.next();

	// If we get here, the stream started successfully.
	res.status(200);
	res.setHeader('Content-Type', 'text/event-stream');
	res.setHeader('Cache-Control', 'no-cache');
	res.setHeader('Connection', 'keep-alive');
	res.setHeader('X-Accel-Buffering', 'no');
	res.flushHeaders();

	// If the generator isn't done, send the first chunk
	if (!firstResult.done) {
	res.write(`event: ${firstResult.value.type}\ndata: ${JSON.stringify(firstResult.value)}\n\n`);
	if (res.flush) res.flush();
	}

	// Continue with the rest of the stream
	for await (const event of generator) {
	res.write(`event: ${event.type}\ndata: ${JSON.stringify(event)}\n\n`);
	if (res.flush) res.flush();
	}

	res.end();

	} catch (error) {
	// If we haven't sent headers yet, we can send a proper error status
	if (!res.headersSent) {
	logger.error('[API] Initial stream error:', error);
	const { errorType, statusCode, errorMessage } = parseError(error);

	return res.status(statusCode).json({
	type: 'error',
	error: {
	type: errorType,
	message: errorMessage
	}
	});
	}

	// If headers were already sent (should only happen if error occurs mid-stream),
	// we have to fallback to SSE error event
	logger.error('[API] Mid-stream error:', error);
	const { errorType, errorMessage } = parseError(error);

	res.write(`event: error\ndata: ${JSON.stringify({
	type: 'error',
	error: { type: errorType, message: errorMessage }
	})}\n\n`);
	res.end();
	}

	} else {
	// Handle non-streaming response
	const response = await sendMessage(request, accountManager, FALLBACK_ENABLED);
	res.json(response);
	}

	} catch (error) {
	logger.error('[API] Error:', error);

	let { errorType, statusCode, errorMessage } = parseError(error);

	// For auth errors, try to refresh token
	if (errorType === 'authentication_error') {
	logger.warn('[API] Token might be expired, attempting refresh...');
	try {
	accountManager.clearProjectCache();
	accountManager.clearTokenCache();
	await forceRefresh();
	errorMessage = 'Token was expired and has been refreshed. Please retry your request.';
	} catch (refreshError) {
	errorMessage = 'Could not refresh token. Make sure Antigravity is running.';
	}
	}

	logger.warn(`[API] Returning error response: ${statusCode} ${errorType} - ${errorMessage}`);

	// Check if headers have already been sent (for streaming that failed mid-way)
	if (res.headersSent) {
	logger.warn('[API] Headers already sent, writing error as SSE event');
	res.write(`event: error\ndata: ${JSON.stringify({
	type: 'error',
	error: { type: errorType, message: errorMessage }
	})}\n\n`);
	res.end();
	} else {
	res.status(statusCode).json({
	type: 'error',
	error: {
	type: errorType,
	message: errorMessage
	}
	});
	}
	}
	});

	/**
	* Catch-all for unsupported endpoints
	*/
	usageStats.setupRoutes(app);

	app.use('*', (req, res) => {
	// Log 404s (use originalUrl since wildcard strips req.path)
	if (logger.isDebugEnabled) {
	logger.debug(`[API] 404 Not Found: ${req.method} ${req.originalUrl}`);
	}
	res.status(404).json({
	type: 'error',
	error: {
	type: 'not_found_error',
	message: `Endpoint ${req.method} ${req.originalUrl} not found`
	}
	});
	});

	export default app;