test: fix async tests wrapped by patch decorators

Ali-Khodabakhsh · Ali-Khodabakhsh · commit c927b7e60e5c · 2026-03-05T12:53:33.000-06:00
diff --git a/tests/guardrails/test_model_engine.py b/tests/guardrails/test_model_engine.py
@@ -195,201 +195,203 @@ def test_client_initially_none(self):
 class TestModelEngineLifecycle:
     """Test the ModelEngine start() and stop() client lifecycle."""
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "key"})
     @pytest.mark.asyncio
     async def test_start_stop_lifecycle(self):
         """start() creates the client, stop() tears it down to None."""
-        engine = ModelEngine(_make_model())
-        assert engine._client is None
-        assert engine._running is False
-        await engine.start()
-        assert engine._client is not None
-        assert engine._running is True
-        await engine.stop()
-        assert engine._client is None
-        assert engine._running is False
+        # NOTE: use a context manager instead of the decorator, so pytest can
+        # correctly detect and run the coroutine test across Python versions.
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "key"}):
+            engine = ModelEngine(_make_model())
+            assert engine._client is None
+            assert engine._running is False
+            await engine.start()
+            assert engine._client is not None
+            assert engine._running is True
+            await engine.stop()
+            assert engine._client is None
+            assert engine._running is False
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "key"})
     @pytest.mark.asyncio
     async def test_start_is_idempotent(self):
         """Calling start() twice reuses the same client instance."""
-        engine = ModelEngine(_make_model())
-        await engine.start()
-        first_client = engine._client
-        await engine.start()  # should not create a new client
-        assert engine._client is first_client
-        await engine.stop()
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "key"}):
+            engine = ModelEngine(_make_model())
+            await engine.start()
+            first_client = engine._client
+            await engine.start()  # should not create a new client
+            assert engine._client is first_client
+            await engine.stop()
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "key"})
     @pytest.mark.asyncio
     async def test_stop_when_no_client_is_noop(self):
         """stop() without a prior start() does not raise."""
-        engine = ModelEngine(_make_model())
-        await engine.stop()  # should not raise
-        assert engine._running is False
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "key"}):
+            engine = ModelEngine(_make_model())
+            await engine.stop()  # should not raise
+            assert engine._running is False
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "key"})
     @pytest.mark.asyncio
     async def test_stop_is_idempotent(self):
         """Calling stop() twice does not raise."""
-        engine = ModelEngine(_make_model())
-        await engine.start()
-        await engine.stop()
-        await engine.stop()  # second stop is a no-op
-        assert engine._running is False
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "key"}):
+            engine = ModelEngine(_make_model())
+            await engine.start()
+            await engine.stop()
+            await engine.stop()  # second stop is a no-op
+            assert engine._running is False
 
 
 class TestModelEngineContextManager:
     """Test async context manager calls start/stop correctly."""
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "key"})
     @pytest.mark.asyncio
     async def test_context_manager_calls_start_and_stop(self):
         """async with calls start() on enter and stop() on exit."""
-        engine = ModelEngine(_make_model())
-        assert engine._running is False
-        async with engine as eng:
-            assert eng is engine
-            assert engine._running is True
-            assert engine._client is not None
-        assert engine._running is False
-        assert engine._client is None
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "key"}):
+            engine = ModelEngine(_make_model())
+            assert engine._running is False
+            async with engine as eng:
+                assert eng is engine
+                assert engine._running is True
+                assert engine._client is not None
+            assert engine._running is False
+            assert engine._client is None
 
 
 class TestModelEngineCall:
     """Test ModelEngine.call() HTTP request construction and error handling."""
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_successful_call(self):
         """Successful call returns parsed JSON and posts to correct URL with headers."""
-        model = _make_model()
-        engine = ModelEngine(model)
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"}):
+            model = _make_model()
+            engine = ModelEngine(model)
 
-        expected_response = {"choices": [{"message": {"role": "assistant", "content": "Hello!"}}]}
+            expected_response = {"choices": [{"message": {"role": "assistant", "content": "Hello!"}}]}
 
-        mock_response = AsyncMock()
-        mock_response.__aenter__ = AsyncMock(return_value=mock_response)
-        mock_response.status = 200
-        mock_response.json = AsyncMock(return_value=expected_response)
+            mock_response = AsyncMock()
+            mock_response.__aenter__ = AsyncMock(return_value=mock_response)
+            mock_response.status = 200
+            mock_response.json = AsyncMock(return_value=expected_response)
 
-        mock_client = AsyncMock()
-        mock_client.post = MagicMock(return_value=mock_response)
-        mock_client.closed = False
+            mock_client = AsyncMock()
+            mock_client.post = MagicMock(return_value=mock_response)
+            mock_client.closed = False
 
-        engine._client = mock_client
-        engine._running = True
+            engine._client = mock_client
+            engine._running = True
 
-        messages = [{"role": "user", "content": "Hi"}]
-        result = await engine.call(messages)
-        assert result == expected_response
+            messages = [{"role": "user", "content": "Hi"}]
+            result = await engine.call(messages)
+            assert result == expected_response
 
-        # Verify correct URL
-        call_args = mock_client.post.call_args
-        assert _CHAT_COMPLETIONS_ENDPOINT in call_args[0][0]
+            # Verify correct URL
+            call_args = mock_client.post.call_args
+            assert _CHAT_COMPLETIONS_ENDPOINT in call_args[0][0]
 
-        expected_url = _ENGINE_BASE_URLS[model.engine] + "/v1/chat/completions"
-        expected_json = {"messages": messages, "model": model.model}
-        expected_headers = {"Content-Type": "application/json", "Authorization": "Bearer test-key"}
-        mock_client.post.assert_called_once_with(expected_url, json=expected_json, headers=expected_headers)
+            expected_url = _ENGINE_BASE_URLS[model.engine] + "/v1/chat/completions"
+            expected_json = {"messages": messages, "model": model.model}
+            expected_headers = {"Content-Type": "application/json", "Authorization": "Bearer test-key"}
+            mock_client.post.assert_called_once_with(expected_url, json=expected_json, headers=expected_headers)
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_call_includes_model_name_and_messages_in_body(self):
         """Request body contains model name, messages, and extra kwargs."""
-        engine = ModelEngine(_make_model(model="my-llm"))
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"}):
+            engine = ModelEngine(_make_model(model="my-llm"))
 
-        mock_response = AsyncMock()
-        mock_response.__aenter__ = AsyncMock(return_value=mock_response)
-        mock_response.status = 200
-        mock_response.json = AsyncMock(return_value={"choices": [{"message": {"content": "ok"}}]})
+            mock_response = AsyncMock()
+            mock_response.__aenter__ = AsyncMock(return_value=mock_response)
+            mock_response.status = 200
+            mock_response.json = AsyncMock(return_value={"choices": [{"message": {"content": "ok"}}]})
 
-        mock_client = AsyncMock()
-        mock_client.post = MagicMock(return_value=mock_response)
-        mock_client.closed = False
-        engine._client = mock_client
-        engine._running = True
+            mock_client = AsyncMock()
+            mock_client.post = MagicMock(return_value=mock_response)
+            mock_client.closed = False
+            engine._client = mock_client
+            engine._running = True
 
-        messages = [{"role": "user", "content": "Hello"}]
-        await engine.call(messages, temperature=0.7)
+            messages = [{"role": "user", "content": "Hello"}]
+            await engine.call(messages, temperature=0.7)
 
-        call_kwargs = mock_client.post.call_args
-        body = call_kwargs[1]["json"]
-        assert body["model"] == "my-llm"
-        assert body["messages"] == messages
-        assert body["temperature"] == 0.7
+            call_kwargs = mock_client.post.call_args
+            body = call_kwargs[1]["json"]
+            assert body["model"] == "my-llm"
+            assert body["messages"] == messages
+            assert body["temperature"] == 0.7
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_call_without_api_key_omits_auth_header(self):
         """No Authorization header when api_key is None."""
-        engine = ModelEngine(_make_model())
-        engine.api_key = None  # simulate no API key
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"}):
+            engine = ModelEngine(_make_model())
+            engine.api_key = None  # simulate no API key
 
-        mock_response = AsyncMock()
-        mock_response.__aenter__ = AsyncMock(return_value=mock_response)
-        mock_response.status = 200
-        mock_response.json = AsyncMock(return_value={"choices": [{"message": {"content": "ok"}}]})
+            mock_response = AsyncMock()
+            mock_response.__aenter__ = AsyncMock(return_value=mock_response)
+            mock_response.status = 200
+            mock_response.json = AsyncMock(return_value={"choices": [{"message": {"content": "ok"}}]})
 
-        mock_client = AsyncMock()
-        mock_client.post = MagicMock(return_value=mock_response)
-        mock_client.closed = False
-        engine._client = mock_client
-        engine._running = True
+            mock_client = AsyncMock()
+            mock_client.post = MagicMock(return_value=mock_response)
+            mock_client.closed = False
+            engine._client = mock_client
+            engine._running = True
 
-        await engine.call([{"role": "user", "content": "Hi"}])
+            await engine.call([{"role": "user", "content": "Hi"}])
 
-        call_kwargs = mock_client.post.call_args
-        headers = call_kwargs[1]["headers"]
-        assert "Authorization" not in headers
+            call_kwargs = mock_client.post.call_args
+            headers = call_kwargs[1]["headers"]
+            assert "Authorization" not in headers
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_call_http_error_raises_model_engine_error(self):
         """HTTP 4xx/5xx raises ModelEngineError with status and model name."""
-        engine = ModelEngine(_make_model())
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"}):
+            engine = ModelEngine(_make_model())
 
-        mock_response = AsyncMock()
-        mock_response.__aenter__ = AsyncMock(return_value=mock_response)
-        mock_response.status = 400
-        mock_response.text = AsyncMock(return_value='{"error": "bad request"}')
+            mock_response = AsyncMock()
+            mock_response.__aenter__ = AsyncMock(return_value=mock_response)
+            mock_response.status = 400
+            mock_response.text = AsyncMock(return_value='{"error": "bad request"}')
 
-        mock_client = AsyncMock()
-        mock_client.post = MagicMock(return_value=mock_response)
-        mock_client.closed = False
-        engine._client = mock_client
-        engine._running = True
+            mock_client = AsyncMock()
+            mock_client.post = MagicMock(return_value=mock_response)
+            mock_client.closed = False
+            engine._client = mock_client
+            engine._running = True
 
-        with pytest.raises(ModelEngineError) as exc_info:
-            await engine.call([{"role": "user", "content": "Hi"}])
+            with pytest.raises(ModelEngineError) as exc_info:
+                await engine.call([{"role": "user", "content": "Hi"}])
 
-        assert exc_info.value.status == 400
-        assert exc_info.value.model_name == "meta/llama-3.3-70b-instruct"
+            assert exc_info.value.status == 400
+            assert exc_info.value.model_name == "meta/llama-3.3-70b-instruct"
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_call_unexpected_exception_wraps_in_model_engine_error(self):
         """Non-HTTP exceptions are wrapped in ModelEngineError."""
-        engine = ModelEngine(_make_model())
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"}):
+            engine = ModelEngine(_make_model())
 
-        mock_client = AsyncMock()
-        mock_client.post = MagicMock(side_effect=RuntimeError("connection dropped"))
-        mock_client.closed = False
-        engine._client = mock_client
-        engine._running = True
+            mock_client = AsyncMock()
+            mock_client.post = MagicMock(side_effect=RuntimeError("connection dropped"))
+            mock_client.closed = False
+            engine._client = mock_client
+            engine._running = True
 
-        with pytest.raises(ModelEngineError, match="connection dropped"):
-            await engine.call([{"role": "user", "content": "Hi"}])
+            with pytest.raises(ModelEngineError, match="connection dropped"):
+                await engine.call([{"role": "user", "content": "Hi"}])
 
-    @patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"})
     @pytest.mark.asyncio
     async def test_call_raises_if_not_started(self):
         """call() raises ModelEngineError if start() hasn't been called."""
-        engine = ModelEngine(_make_model())
-        assert engine._client is None
+        with patch.dict("os.environ", {"NVIDIA_API_KEY": "test-key"}):
+            engine = ModelEngine(_make_model())
+            assert engine._client is None
 
-        with pytest.raises(ModelEngineError, match="has not been started"):
-            await engine.call([{"role": "user", "content": "Hi"}])
+            with pytest.raises(ModelEngineError, match="has not been started"):
+                await engine.call([{"role": "user", "content": "Hi"}])
 
 
 class TestModelEngineConstants: