Unverified Commit fbb2a175 authored by Tashfeen's avatar Tashfeen Committed by GitHub

feat(catalog): migrateModelsV6 — probe-verified additions and Google RPD fix (#6)

Live-probed against real free-tier keys on 2026-04-25. Adds 8 models
that returned 200 with content, drops the one OR :free route that
404s, and corrects two Google rate-limits whose catalog values were
~10x-50x too high.

Adds:
- Cloudflare: @cf/moonshotai/kimi-k2.5, @cf/qwen/qwen3-30b-a3b-fp8,
  @cf/deepseek-ai/deepseek-r1-distill-qwen-32b
- Google preview: gemini-3-flash-preview, gemini-3.1-flash-lite-preview,
  gemini-3.1-pro-preview (Pro confirmed free-tier-eligible by the
  free_tier_requests quota metric in 429 errors)
- OpenRouter: google/gemma-4-31b-it:free, liquid/lfm-2.5-1.2b-instruct:free

Removes:
- openrouter/arcee-ai/trinity-large-preview:free (404 No endpoints found)

Corrects:
- gemini-2.5-flash and gemini-2.5-flash-lite RPD 250/1000 -> 20.
  Free tier now uniformly enforces 20 RPD per model per project.

Updates router test rationale: gemini-3.1-pro-preview at rank 1 now
outranks Groq's gpt-oss-120b (rank 6) when keys exist for both.
parent 73698e65
...@@ -52,10 +52,11 @@ describe('Router', () => { ...@@ -52,10 +52,11 @@ describe('Router', () => {
VALUES (?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?)
`).run('groq', 'test', groqKey.encrypted, groqKey.iv, groqKey.authTag, 'healthy', 1); `).run('groq', 'test', groqKey.encrypted, groqKey.iv, groqKey.authTag, 'healthy', 1);
// Post-V4: Groq's openai/gpt-oss-120b (rank 6) outranks Google's best free // Post-V6: Google's gemini-3.1-pro-preview (rank 1, free-tier-eligible per
// model gemini-2.5-pro (rank 14, demoted as at-risk after April 2026 free-tier cut). // probe on 2026-04-25) outranks Groq's best free-tier model openai/gpt-oss-120b
// (rank 6). With keys for both platforms, Google wins.
const result = routeRequest(); const result = routeRequest();
expect(result.platform).toBe('groq'); expect(result.platform).toBe('google');
}); });
it('should skip disabled keys', () => { it('should skip disabled keys', () => {
......
...@@ -40,6 +40,7 @@ export function initDb(dbPath?: string): Database.Database { ...@@ -40,6 +40,7 @@ export function initDb(dbPath?: string): Database.Database {
migrateModelsV3Ranks(db); migrateModelsV3Ranks(db);
migrateModelsV4(db); migrateModelsV4(db);
migrateModelsV5(db); migrateModelsV5(db);
migrateModelsV6(db);
ensureUnifiedKey(db); ensureUnifiedKey(db);
console.log(`Database initialized at ${resolvedPath}`); console.log(`Database initialized at ${resolvedPath}`);
...@@ -573,6 +574,93 @@ function migrateModelsV5(db: Database.Database) { ...@@ -573,6 +574,93 @@ function migrateModelsV5(db: Database.Database) {
apply(); apply();
} }
/**
* V6: Live-probed against real free-tier keys on 2026-04-25.
*
* Corrections (Google free-tier RPD): the documented "250" / "1000" RPD numbers
* for gemini-2.5-flash and gemini-2.5-flash-lite are stale — both share a 20
* RPD per-model-per-project free pool now. Confirmed by the
* `generate_content_free_tier_requests` quota error, limit 20.
*
* Removals: arcee-ai/trinity-large-preview:free returns 404 "No endpoints found"
* — pulled from OpenRouter's free pool. (Other previously-suspected dead OR :free
* IDs are still live in /api/v1/models, so they stay.)
*
* Additions (all probe-verified to return 200 with content on the user's keys):
* - 3 Cloudflare Workers AI reasoning models
* - 3 Google preview models, including Pro (which returned a free-tier 429
* against the same 20 RPD pool, confirming free-tier eligibility)
* - 2 OpenRouter :free models with no expiration_date
*/
function migrateModelsV6(db: Database.Database) {
// 1) Remove confirmed-dead OR route
const deleteModel = db.prepare(`DELETE FROM models WHERE platform = ? AND model_id = ?`);
const deleteFallback = db.prepare(`
DELETE FROM fallback_config WHERE model_db_id IN (
SELECT id FROM models WHERE platform = ? AND model_id = ?
)
`);
const removals: Array<[string, string]> = [
['openrouter', 'arcee-ai/trinity-large-preview:free'],
];
const applyRemovals = db.transaction(() => {
for (const [p, m] of removals) {
deleteFallback.run(p, m);
deleteModel.run(p, m);
}
});
applyRemovals();
// 2) Correct stale Google free-tier RPD numbers
db.prepare(`
UPDATE models SET rpd_limit = 20, monthly_token_budget = '~3M'
WHERE platform = 'google' AND model_id = 'gemini-2.5-flash'
`).run();
db.prepare(`
UPDATE models SET rpd_limit = 20, monthly_token_budget = '~3M'
WHERE platform = 'google' AND model_id = 'gemini-2.5-flash-lite'
`).run();
// 3) Add live-probed models
const insert = db.prepare(`
INSERT OR IGNORE INTO models (platform, model_id, display_name, intelligence_rank, speed_rank, size_label, rpm_limit, rpd_limit, tpm_limit, tpd_limit, monthly_token_budget, context_window)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
const additions: Array<[string, string, string, number, number, string, number | null, number | null, number | null, number | null, string, number | null]> = [
// Cloudflare Workers AI — 10K Neurons/day shared free pool. Reasoning traces
// burn output tokens fast, so per-call effective budget is small. Estimates
// assume 1K-in/500-out typical: kimi-k2.5 ≈ 50/day, qwen3-30b ≈ 200/day,
// r1-distill ≈ 5/day on the reasoning-heavy path.
['cloudflare', '@cf/moonshotai/kimi-k2.5', 'Kimi K2.5 (CF)', 3, 11, 'Frontier', null, null, null, null, '~10-20M', 262144],
['cloudflare', '@cf/qwen/qwen3-30b-a3b-fp8', 'Qwen3 30B-A3B fp8 (CF)', 7, 11, 'Large', null, null, null, null, '~18-45M', 131072],
['cloudflare', '@cf/deepseek-ai/deepseek-r1-distill-qwen-32b', 'DeepSeek R1 Distill Qwen 32B (CF)', 9, 11, 'Large', null, null, null, null, '~3-5M', 131072],
// Google preview tier — shares the 20 RPD per-model free pool. Pro confirmed
// free-tier-eligible by the `free_tier_requests` quota metric in 429 errors.
['google', 'gemini-3.1-flash-lite-preview', 'Gemini 3.1 Flash-Lite Preview', 18, 3, 'Medium', 15, 20, 250000, null, '~3M', 1048576],
['google', 'gemini-3-flash-preview', 'Gemini 3 Flash Preview', 11, 5, 'Large', 10, 20, 250000, null, '~3M', 1048576],
['google', 'gemini-3.1-pro-preview', 'Gemini 3.1 Pro Preview', 1, 8, 'Frontier', 5, 20, 250000, null, '~3M', 1048576],
// OpenRouter :free pool — 20 RPM / 50 RPD (1000 once $10 credits bought).
['openrouter', 'google/gemma-4-31b-it:free', 'Gemma 4 31B (free)', 19, 9, 'Medium', 20, 200, null, null, '~6M', 262144],
['openrouter', 'liquid/lfm-2.5-1.2b-instruct:free', 'Liquid LFM 2.5 1.2B (free)', 30, 10, 'Small', 20, 200, null, null, '~6M', 32768],
];
const apply = db.transaction(() => {
for (const a of additions) insert.run(...a);
const missing = db.prepare(`
SELECT m.id FROM models m
LEFT JOIN fallback_config f ON m.id = f.model_db_id
WHERE f.id IS NULL ORDER BY m.intelligence_rank ASC
`).all() as { id: number }[];
if (missing.length > 0) {
const maxPriority = (db.prepare('SELECT COALESCE(MAX(priority), 0) AS mx FROM fallback_config').get() as { mx: number }).mx;
const addFb = db.prepare('INSERT INTO fallback_config (model_db_id, priority, enabled) VALUES (?, ?, 1)');
for (let i = 0; i < missing.length; i++) addFb.run(missing[i].id, maxPriority + i + 1);
}
});
apply();
}
function ensureUnifiedKey(db: Database.Database) { function ensureUnifiedKey(db: Database.Database) {
const existing = db.prepare("SELECT value FROM settings WHERE key = 'unified_api_key'").get() as { value: string } | undefined; const existing = db.prepare("SELECT value FROM settings WHERE key = 'unified_api_key'").get() as { value: string } | undefined;
if (!existing) { if (!existing) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment