BasedHardware · beastoin · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/backend/routers/apps.py b/backend/routers/apps.py
@@ -1152,7 +1152,7 @@ async def generate_sample_prompts_endpoint(
     Generate sample app prompts for the AI app generator.
     Uses a fast model to generate creative suggestions.
     """
-    from utils.llm.clients import llm_mini
+    from utils.llm.clients import get_llm
     import json
 
     system_prompt = """Generate 5 creative and diverse ideas for apps that are either:
@@ -1173,7 +1173,7 @@ async def generate_sample_prompts_endpoint(
 
     try:
         with track_usage(uid, Features.APP_GENERATOR):
-            response = await llm_mini.ainvoke(
+            response = await get_llm('app_integration').ainvoke(
                 [
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": "Generate 5 creative app ideas now"},

diff --git a/backend/routers/chat_sessions.py b/backend/routers/chat_sessions.py
@@ -209,15 +209,15 @@ def generate_session_title(
     uid: str = Depends(auth.with_rate_limit(auth.get_current_user_uid, "chat:initial")),
 ):
     """Generate a title for a chat session based on its messages."""
-    from utils.llm.clients import llm_mini
+    from utils.llm.clients import get_llm
 
     conversation = '\n'.join(f"{m.sender}: {m.text}" for m in request.messages[:10])
     prompt = (
         "Generate a short, descriptive title (max 6 words) for this chat conversation. "
         "Return ONLY the title text, no quotes or punctuation.\n\n"
         f"{conversation}"
     )
-    title = llm_mini.invoke(prompt).content.strip().strip('"\'')
+    title = get_llm('session_titles').invoke(prompt).content.strip().strip('"\'')
     if not title:
         title = 'New Chat'
 

diff --git a/backend/test.sh b/backend/test.sh
@@ -100,6 +100,7 @@ pytest tests/unit/test_async_auth.py -v
 pytest tests/unit/test_thread_join_elimination.py -v
 pytest tests/unit/test_async_http_infrastructure.py -v
 pytest tests/unit/test_clean_sweep_migrations.py -v
+pytest tests/unit/test_omi_qos_tiers.py -v
 
 # Fair-use integration tests (require Redis; skip gracefully if unavailable)
 if redis-cli ping >/dev/null 2>&1; then

diff --git a/backend/tests/integration/test_qos_real_llm.py b/backend/tests/integration/test_qos_real_llm.py
@@ -0,0 +1,234 @@
+"""
+L1 Integration Test — Real LLM API calls for Omi QoS profiles.
+
+Tests that get_model() and get_llm() resolve correctly AND that the resolved
+models respond to real prompts. Each test sends a minimal prompt and verifies
+a non-empty response.
+
+Default profile is premium. Set MODEL_QOS=max to test max profile.
+
+Requires: OPENAI_API_KEY, OPENROUTER_API_KEY, ANTHROPIC_API_KEY, PERPLEXITY_API_KEY in .env.
+Run: cd backend && python3 -m pytest tests/integration/test_qos_real_llm.py -v -s
+"""
+
+import os
+import sys
+import httpx
+import pytest
+
+# Add backend to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
+
+# Load .env before importing clients
+from dotenv import load_dotenv
+
+load_dotenv(os.path.join(os.path.dirname(__file__), '..', '..', '.env'))
+
+from utils.llm.clients import (
+    MODEL_QOS_PROFILES,
+    get_model,
+    get_llm,
+    get_qos_info,
+    _classify_provider,
+    _active_profile_name,
+    anthropic_client,
+    ANTHROPIC_AGENT_MODEL,
+)
+
+SIMPLE_PROMPT = "Reply with exactly one word: hello"
+
+
+# ---------------------------------------------------------------------------
+# Helper: call Perplexity via HTTP (same pattern as perplexity_tools.py)
+# ---------------------------------------------------------------------------
+def call_perplexity(model: str, prompt: str) -> str:
+    url = "https://api.perplexity.ai/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {os.environ['PERPLEXITY_API_KEY']}",
+        "Content-Type": "application/json",
+    }
+    body = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "max_tokens": 50,
+    }
+    resp = httpx.post(url, json=body, headers=headers, timeout=30)
+    resp.raise_for_status()
+    return resp.json()["choices"][0]["message"]["content"]
+
+
+# ---------------------------------------------------------------------------
+# Premium profile — gpt-5.4-mini features (flagship tier)
+# ---------------------------------------------------------------------------
+class TestPremiumFlagship:
+    """Test gpt-5.4-mini features in premium profile respond to real prompts."""
+
+    FLAGSHIP_FEATURES = [
+        'conv_action_items',
+        'conv_structure',
+        'conv_app_result',
+        'daily_summary',
+        'learnings',
+        'chat_responses',
+        'goals_advice',
+        'notifications',
+        'app_generator',
+        'persona_clone',
+        'persona_chat_premium',
+    ]
+
+    @pytest.mark.parametrize("feature", FLAGSHIP_FEATURES)
+    def test_flagship_feature_responds(self, feature):
+        model = get_model(feature)
+        assert model == 'gpt-5.4-mini', f"{feature} should be gpt-5.4-mini in premium, got {model}"
+        llm = get_llm(feature)
+        response = llm.invoke(SIMPLE_PROMPT)
+        assert response.content.strip(), f"{feature} ({model}) returned empty response"
+        print(f"  {feature} ({model}): {response.content.strip()[:60]}")
+
+
+# ---------------------------------------------------------------------------
+# Premium profile — gpt-4.1-nano features (cheap tier)
+# ---------------------------------------------------------------------------
+class TestPremiumNano:
+    """Test gpt-4.1-nano features in premium profile respond to real prompts."""
+
+    NANO_FEATURES = [
+        'conv_app_select',
+        'conv_folder',
+        'conv_discard',
+        'daily_summary_simple',
+        'external_structure',
+        'memories',
+        'memory_conflict',
+        'memory_category',
+        'knowledge_graph',
+        'chat_extraction',
+        'chat_graph',
+        'session_titles',
+        'goals',
+        'proactive_notification',
+        'followup',
+        'smart_glasses',
+        'onboarding',
+        'app_integration',
+        'trends',
+        'persona_chat',
+    ]
+
+    @pytest.mark.parametrize("feature", NANO_FEATURES)
+    def test_nano_feature_responds(self, feature):
+        model = get_model(feature)
+        assert model == 'gpt-4.1-nano', f"{feature} should be gpt-4.1-nano in premium, got {model}"
+        llm = get_llm(feature)
+        response = llm.invoke(SIMPLE_PROMPT)
+        assert response.content.strip(), f"{feature} ({model}) returned empty response"
+        print(f"  {feature} ({model}): {response.content.strip()[:60]}")
+
+
+# ---------------------------------------------------------------------------
+# Premium profile — OpenRouter (only wrapped_analysis)
+# ---------------------------------------------------------------------------
+class TestPremiumOpenRouter:
+    """Test OpenRouter feature responds."""
+
+    def test_wrapped_analysis(self):
+        model = get_model('wrapped_analysis')
+        assert model == 'google/gemini-3-flash-preview'
+        llm = get_llm('wrapped_analysis')
+        response = llm.invoke(SIMPLE_PROMPT)
+        assert response.content.strip(), f"wrapped_analysis ({model}) returned empty response"
+        print(f"  wrapped_analysis ({model}): {response.content.strip()[:60]}")
+
+
+# ---------------------------------------------------------------------------
+# Premium profile — Anthropic (via get_model + anthropic_client)
+# ---------------------------------------------------------------------------
+class TestPremiumAnthropic:
+    """Test chat_agent via Anthropic client (get_model, not get_llm)."""
+
+    @pytest.mark.asyncio
+    async def test_chat_agent_anthropic(self):
+        model = get_model('chat_agent')
+        assert model == 'claude-sonnet-4-6', f"chat_agent should be claude-sonnet-4-6, got {model}"
+        assert model == ANTHROPIC_AGENT_MODEL
+
+        response = await anthropic_client.messages.create(
+            model=model,
+            max_tokens=50,
+            messages=[{"role": "user", "content": SIMPLE_PROMPT}],
+        )
+        text = response.content[0].text.strip()
+        assert text, f"chat_agent ({model}) returned empty response"
+        print(f"  chat_agent ({model}): {text[:60]}")
+
+
+# ---------------------------------------------------------------------------
+# Premium profile — Perplexity (via get_model + HTTP client)
+# ---------------------------------------------------------------------------
+class TestPremiumPerplexity:
+    """Test web_search via Perplexity HTTP client (get_model, not get_llm)."""
+
+    def test_web_search_perplexity(self):
+        model = get_model('web_search')
+        assert model == 'sonar-pro', f"web_search should be sonar-pro, got {model}"
+        text = call_perplexity(model, "What is 2+2? Reply in one word.")
+        assert text.strip(), f"web_search ({model}) returned empty response"
+        print(f"  web_search ({model}): {text.strip()[:60]}")
+
+
+# ---------------------------------------------------------------------------
+# Profile routing verification
+# ---------------------------------------------------------------------------
+class TestProfileRouting:
+    """Verify get_qos_info returns correct provider classification for all features."""
+
+    def test_all_features_have_valid_provider(self):
+        info = get_qos_info()
+        valid_providers = {'openai', 'anthropic', 'openrouter', 'perplexity'}
+        for feature, details in info.items():
+            assert details['provider'] in valid_providers, f"{feature}: invalid provider {details['provider']}"
+            print(f"  {feature}: {details['model']} ({details['provider']})")
+
+    def test_active_profile_is_premium(self):
+        assert _active_profile_name == 'premium'
+
+    def test_premium_profile_has_expected_variant_count(self):
+        distinct = set(MODEL_QOS_PROFILES['premium'].values())
+        assert len(distinct) == 5, f"Expected 5 variants in premium, got {len(distinct)}: {distinct}"
+
+    def test_max_profile_has_expected_variant_count(self):
+        distinct = set(MODEL_QOS_PROFILES['max'].values())
+        assert len(distinct) == 9, f"Expected 9 variants in max, got {len(distinct)}: {distinct}"
+
+
+# ---------------------------------------------------------------------------
+# Streaming support — verify streaming clients work
+# ---------------------------------------------------------------------------
+class TestStreamingClients:
+    """Test that streaming clients respond to real prompts."""
+
+    def test_streaming_openai(self):
+        llm = get_llm('chat_responses', streaming=True)
+        response = llm.invoke(SIMPLE_PROMPT)
+        assert response.content.strip(), "Streaming chat_responses returned empty"
+        print(f"  streaming chat_responses: {response.content.strip()[:60]}")
+
+    def test_streaming_openrouter(self):
+        llm = get_llm('wrapped_analysis', streaming=True)
+        response = llm.invoke(SIMPLE_PROMPT)
+        assert response.content.strip(), "Streaming wrapped_analysis returned empty"
+        print(f"  streaming wrapped_analysis: {response.content.strip()[:60]}")
+
+
+# ---------------------------------------------------------------------------
+# Cache key — verify prompt cache binding works with real API
+# ---------------------------------------------------------------------------
+class TestCacheKeyReal:
+    """Test that cache_key binding still produces valid responses."""
+
+    def test_cache_key_with_gpt54_mini(self):
+        llm = get_llm('conv_action_items', cache_key='omi-test-integration')
+        response = llm.invoke(SIMPLE_PROMPT)
+        assert response.content.strip(), "cache_key conv_action_items returned empty"
+        print(f"  cache_key conv_action_items: {response.content.strip()[:60]}")
diff --git a/backend/tests/unit/test_action_item_date_validation.py b/backend/tests/unit/test_action_item_date_validation.py
@@ -153,6 +153,7 @@ def fake_tool(func=None, **kwargs):
 llm_clients_stub.parser = MagicMock()
 llm_clients_stub.llm_high = MagicMock()
 llm_clients_stub.llm_medium_experiment = MagicMock()
+llm_clients_stub.get_llm = MagicMock(return_value=MagicMock())
 
 # Load models first
 _stub_package("models")
@@ -398,13 +399,13 @@ def test_clears_past_due_dates_from_extraction(self):
                 BACKEND_DIR / "utils" / "llm" / "conversation_processing.py",
             )
 
-        with patch.object(conv_proc, 'llm_medium_experiment') as mock_llm, patch.object(
+        mock_llm = MagicMock()
+        mock_llm.bind.return_value = mock_llm
+        mock_llm.__or__ = MagicMock(return_value=mock_chain)
+        with patch.object(conv_proc, 'get_llm', return_value=mock_llm) as mock_get_llm, patch.object(
             conv_proc, 'PydanticOutputParser'
         ) as mock_parser_cls, patch.object(conv_proc, 'ChatPromptTemplate') as mock_prompt_cls:
 
-            mock_llm.bind.return_value = mock_llm
-            mock_llm.__or__ = MagicMock(return_value=mock_chain)
-
             mock_parser = MagicMock()
             mock_parser.get_format_instructions.return_value = "format"
             mock_parser_cls.return_value = mock_parser
@@ -440,13 +441,13 @@ def test_passes_current_time_to_invoke(self):
                 BACKEND_DIR / "utils" / "llm" / "conversation_processing.py",
             )
 
-        with patch.object(conv_proc, 'llm_medium_experiment') as mock_llm, patch.object(
+        mock_llm = MagicMock()
+        mock_llm.bind.return_value = mock_llm
+        mock_llm.__or__ = MagicMock(return_value=mock_chain)
+        with patch.object(conv_proc, 'get_llm', return_value=mock_llm) as mock_get_llm, patch.object(
             conv_proc, 'PydanticOutputParser'
         ) as mock_parser_cls, patch.object(conv_proc, 'ChatPromptTemplate') as mock_prompt_cls:
 
-            mock_llm.bind.return_value = mock_llm
-            mock_llm.__or__ = MagicMock(return_value=mock_chain)
-
             mock_parser = MagicMock()
             mock_parser.get_format_instructions.return_value = "format"
             mock_parser_cls.return_value = mock_parser
@@ -482,13 +483,13 @@ def test_preserves_none_due_dates(self):
                 BACKEND_DIR / "utils" / "llm" / "conversation_processing.py",
             )
 
-        with patch.object(conv_proc, 'llm_medium_experiment') as mock_llm, patch.object(
+        mock_llm = MagicMock()
+        mock_llm.bind.return_value = mock_llm
+        mock_llm.__or__ = MagicMock(return_value=mock_chain)
+        with patch.object(conv_proc, 'get_llm', return_value=mock_llm) as mock_get_llm, patch.object(
             conv_proc, 'PydanticOutputParser'
         ) as mock_parser_cls, patch.object(conv_proc, 'ChatPromptTemplate') as mock_prompt_cls:
 
-            mock_llm.bind.return_value = mock_llm
-            mock_llm.__or__ = MagicMock(return_value=mock_chain)
-
             mock_parser = MagicMock()
             mock_parser.get_format_instructions.return_value = "format"
             mock_parser_cls.return_value = mock_parser
@@ -526,13 +527,13 @@ def test_preserves_due_date_within_grace_boundary(self):
                 BACKEND_DIR / "utils" / "llm" / "conversation_processing.py",
             )
 
-        with patch.object(conv_proc, 'llm_medium_experiment') as mock_llm, patch.object(
+        mock_llm = MagicMock()
+        mock_llm.bind.return_value = mock_llm
+        mock_llm.__or__ = MagicMock(return_value=mock_chain)
+        with patch.object(conv_proc, 'get_llm', return_value=mock_llm) as mock_get_llm, patch.object(
             conv_proc, 'PydanticOutputParser'
         ) as mock_parser_cls, patch.object(conv_proc, 'ChatPromptTemplate') as mock_prompt_cls:
 
-            mock_llm.bind.return_value = mock_llm
-            mock_llm.__or__ = MagicMock(return_value=mock_chain)
-
             mock_parser = MagicMock()
             mock_parser.get_format_instructions.return_value = "format"
             mock_parser_cls.return_value = mock_parser