Skip to content

Commit 5211f50

Browse files
authored
Merge pull request #288 from algorithmicsuperintelligence/fix-default-addres-binding
Fix default addres binding
2 parents 93e1b52 + e455a8b commit 5211f50

14 files changed

+339
-106
lines changed

.github/workflows/test.yml

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,23 @@ jobs:
107107
OPTILLM_API_KEY=optillm python tests/test_n_parameter.py
108108
OPTILLM_API_KEY=optillm python -m pytest tests/test_api_compatibility.py -v --tb=short || echo "API compatibility tests require pytest"
109109
OPTILLM_API_KEY=optillm python tests/test.py --approaches none --single-test "Simple Math Problem" || echo "Main test completed"
110+
111+
# Run SSL config tests (no server needed but requires proper env setup)
112+
echo "Running SSL config tests..."
113+
python -m pytest tests/test_ssl_config.py -v --tb=short
114+
115+
# Run MARS tests
116+
echo "Running MARS parallel tests..."
117+
OPTILLM_API_KEY=optillm python -m pytest tests/test_mars_parallel.py -v --tb=short
118+
119+
# Run deepconf tests
120+
echo "Running deepconf tests..."
121+
OPTILLM_API_KEY=optillm python -m pytest tests/test_deepconf.py -v --tb=short
122+
123+
# Run conversation logger unit tests (no server needed)
124+
echo "Running conversation logger tests..."
125+
python -m pytest tests/test_conversation_logger.py -v --tb=short
126+
110127
echo "All integration tests completed successfully!"
111128
exit 0
112129
env:
@@ -125,4 +142,128 @@ jobs:
125142
pkill -f "python.*optillm" 2>/dev/null || true
126143
sleep 2
127144
echo "Server shutdown completed"
128-
exit 0
145+
exit 0
146+
147+
conversation-logging-tests:
148+
runs-on: ubuntu-latest
149+
needs: unit-tests
150+
strategy:
151+
matrix:
152+
python-version: ['3.12']
153+
154+
steps:
155+
- uses: actions/checkout@v4
156+
157+
- name: Set up Python ${{ matrix.python-version }}
158+
uses: actions/setup-python@v4
159+
with:
160+
python-version: ${{ matrix.python-version }}
161+
162+
- name: Cache pip packages
163+
uses: actions/cache@v3
164+
with:
165+
path: ~/.cache/pip
166+
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
167+
restore-keys: |
168+
${{ runner.os }}-pip-
169+
170+
- name: Install dependencies
171+
run: |
172+
python -m pip install --upgrade pip
173+
pip install -r requirements.txt
174+
pip install -r tests/requirements.txt
175+
pip install -e .
176+
177+
- name: Start optillm server with conversation logging
178+
run: |
179+
echo "Starting optillm server with conversation logging..."
180+
mkdir -p /tmp/optillm_conversations
181+
OPTILLM_API_KEY=optillm python optillm.py \
182+
--model google/gemma-3-270m-it \
183+
--port 8000 \
184+
--log-conversations \
185+
--conversation-log-dir /tmp/optillm_conversations &
186+
echo $! > server.pid
187+
188+
# Wait for server to be ready
189+
echo "Waiting for server to start..."
190+
sleep 20
191+
192+
# Test server health
193+
curl -s http://localhost:8000/health || echo "Server health check failed"
194+
env:
195+
OPTILLM_API_KEY: optillm
196+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
197+
198+
- name: Run conversation logging tests
199+
run: |
200+
echo "Running conversation logging approach tests..."
201+
OPTILLM_API_KEY=optillm python -m pytest tests/test_conversation_logging_approaches.py -v --tb=short
202+
203+
echo "Running conversation logging server tests..."
204+
OPTILLM_API_KEY=optillm OPTILLM_CONVERSATION_LOG_DIR=/tmp/optillm_conversations python -m pytest tests/test_conversation_logging_server.py -v --tb=short
205+
206+
echo "All conversation logging tests completed successfully!"
207+
env:
208+
OPTILLM_API_KEY: optillm
209+
OPTILLM_CONVERSATION_LOG_DIR: /tmp/optillm_conversations
210+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
211+
212+
- name: Stop optillm server
213+
if: always()
214+
run: |
215+
echo "Stopping optillm server..."
216+
if [ -f server.pid ]; then
217+
kill $(cat server.pid) 2>/dev/null || true
218+
rm -f server.pid
219+
fi
220+
pkill -f "python.*optillm" 2>/dev/null || true
221+
sleep 2
222+
echo "Server shutdown completed"
223+
exit 0
224+
225+
mcp-tests:
226+
runs-on: ubuntu-latest
227+
needs: unit-tests
228+
if: github.event_name == 'push' && github.ref == 'refs/heads/main' # Only run on main branch pushes (secrets available)
229+
strategy:
230+
matrix:
231+
python-version: ['3.12']
232+
233+
steps:
234+
- uses: actions/checkout@v4
235+
236+
- name: Set up Python ${{ matrix.python-version }}
237+
uses: actions/setup-python@v4
238+
with:
239+
python-version: ${{ matrix.python-version }}
240+
241+
- name: Set up Node.js
242+
uses: actions/setup-node@v4
243+
with:
244+
node-version: '20'
245+
246+
- name: Cache pip packages
247+
uses: actions/cache@v3
248+
with:
249+
path: ~/.cache/pip
250+
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
251+
restore-keys: |
252+
${{ runner.os }}-pip-
253+
254+
- name: Install dependencies
255+
run: |
256+
python -m pip install --upgrade pip
257+
pip install -r requirements.txt
258+
pip install -r tests/requirements.txt
259+
pip install -e .
260+
261+
- name: Run MCP plugin tests
262+
run: |
263+
echo "Running MCP plugin tests..."
264+
python -m pytest tests/test_mcp_plugin.py -v --tb=short
265+
echo "MCP tests completed successfully!"
266+
env:
267+
OPTILLM_API_KEY: optillm
268+
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
269+
HF_TOKEN: ${{ secrets.HF_TOKEN }}

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,15 @@ You can then run the optillm proxy as follows.
216216
```bash
217217
python optillm.py
218218
2024-09-06 07:57:14,191 - INFO - Starting server with approach: auto
219-
2024-09-06 07:57:14,191 - INFO - Server configuration: {'approach': 'auto', 'mcts_simulations': 2, 'mcts_exploration': 0.2, 'mcts_depth': 1, 'best_of_n': 3, 'model': 'gpt-4o-mini', 'rstar_max_depth': 3, 'rstar_num_rollouts': 5, 'rstar_c': 1.4, 'base_url': ''}
219+
2024-09-06 07:57:14,191 - INFO - Server configuration: {'approach': 'auto', 'mcts_simulations': 2, 'mcts_exploration': 0.2, 'mcts_depth': 1, 'best_of_n': 3, 'model': 'gpt-4o-mini', 'rstar_max_depth': 3, 'rstar_num_rollouts': 5, 'rstar_c': 1.4, 'base_url': '', 'host': '127.0.0.1'}
220220
* Serving Flask app 'optillm'
221221
* Debug mode: off
222222
2024-09-06 07:57:14,212 - INFO - WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
223-
* Running on all addresses (0.0.0.0)
224223
* Running on http://127.0.0.1:8000
225-
* Running on http://192.168.10.48:8000
226224
2024-09-06 07:57:14,212 - INFO - Press CTRL+C to quit
227225
```
226+
227+
> **Security Note**: By default, optillm binds to `127.0.0.1` (localhost only) for security. To allow external connections (e.g., for Docker or remote access), use `--host 0.0.0.0`. Only do this on trusted networks or with proper authentication configured via `--optillm-api-key`.
228228
## Usage
229229

230230
Once the proxy is running, you can use it as a drop in replacement for an OpenAI client by setting the `base_url` as `http://localhost:8000/v1`.

optillm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Version information
2-
__version__ = "0.3.11"
2+
__version__ = "0.3.12"
33

44
# Import from server module
55
from .server import (

optillm/server.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,10 @@ def get_config():
9494
API_KEY = os.environ.get("OPENAI_API_KEY")
9595
base_url = server_config['base_url']
9696
if base_url != "":
97-
default_client = OpenAI(api_key=API_KEY, base_url=base_url)
97+
default_client = OpenAI(api_key=API_KEY, base_url=base_url, http_client=http_client)
9898
logger.info(f"Created OpenAI client with base_url: {base_url}")
9999
else:
100-
default_client = OpenAI(api_key=API_KEY)
100+
default_client = OpenAI(api_key=API_KEY, http_client=http_client)
101101
logger.info("Created OpenAI client without base_url")
102102
elif os.environ.get("AZURE_OPENAI_API_KEY"):
103103
API_KEY = os.environ.get("AZURE_OPENAI_API_KEY")
@@ -189,6 +189,7 @@ def count_reasoning_tokens(text: str, tokenizer=None) -> int:
189189
'base_url': '',
190190
'optillm_api_key': '',
191191
'return_full_response': False,
192+
'host': '127.0.0.1', # Default to localhost for security; use 0.0.0.0 to allow external connections
192193
'port': 8000,
193194
'log': 'info',
194195
'ssl_verify': True,
@@ -396,9 +397,9 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
396397
if approach == 'none':
397398
# Use the request_config that was already prepared and passed to this function
398399
kwargs = request_config.copy() if request_config else {}
399-
400+
400401
# Remove items that are handled separately by the framework
401-
kwargs.pop('n', None) # n is handled by execute_n_times
402+
# Note: 'n' is NOT removed - the none_approach passes it to the client which handles multiple completions
402403
kwargs.pop('stream', None) # stream is handled by proxy()
403404

404405
# Reconstruct original messages from system_prompt and initial_query
@@ -408,6 +409,7 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
408409
if initial_query:
409410
messages.append({"role": "user", "content": initial_query})
410411

412+
logger.debug(f"none_approach kwargs: {kwargs}")
411413
response = none_approach(original_messages=messages, client=client, model=model, request_id=request_id, **kwargs)
412414
# For none approach, we return the response and a token count of 0
413415
# since the full token count is already in the response
@@ -546,17 +548,29 @@ def execute_n_times(n: int, approaches, operation: str, system_prompt: str, init
546548
return responses, total_tokens
547549

548550
def generate_streaming_response(final_response, model):
549-
# Yield the final response
551+
# Generate a unique response ID
552+
response_id = f"chatcmpl-{int(time.time()*1000)}"
553+
created = int(time.time())
554+
555+
# Yield the final response with OpenAI-compatible format
550556
if isinstance(final_response, list):
551557
for index, response in enumerate(final_response):
558+
# First chunk includes role
552559
yield "data: " + json.dumps({
553-
"choices": [{"delta": {"content": response}, "index": index, "finish_reason": "stop"}],
560+
"id": response_id,
561+
"object": "chat.completion.chunk",
562+
"created": created,
554563
"model": model,
564+
"choices": [{"delta": {"role": "assistant", "content": response}, "index": index, "finish_reason": "stop"}],
555565
}) + "\n\n"
556566
else:
567+
# First chunk includes role
557568
yield "data: " + json.dumps({
558-
"choices": [{"delta": {"content": final_response}, "index": 0, "finish_reason": "stop"}],
569+
"id": response_id,
570+
"object": "chat.completion.chunk",
571+
"created": created,
559572
"model": model,
573+
"choices": [{"delta": {"role": "assistant", "content": final_response}, "index": 0, "finish_reason": "stop"}],
560574
}) + "\n\n"
561575

562576
# Yield the final message to indicate the stream has ended
@@ -987,6 +1001,7 @@ def parse_args():
9871001
("--rstar-c", "OPTILLM_RSTAR_C", float, 1.4, "Exploration constant for rStar algorithm"),
9881002
("--n", "OPTILLM_N", int, 1, "Number of final responses to be returned"),
9891003
("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"),
1004+
("--host", "OPTILLM_HOST", str, "127.0.0.1", "Host address to bind the server to (use 0.0.0.0 to allow external connections)"),
9901005
("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"),
9911006
("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())),
9921007
("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"),
@@ -1263,7 +1278,8 @@ def process_batch_requests(batch_requests):
12631278
import gradio as gr
12641279
# Start server in a separate thread
12651280
import threading
1266-
server_thread = threading.Thread(target=app.run, kwargs={'host': '0.0.0.0', 'port': port})
1281+
host = server_config['host']
1282+
server_thread = threading.Thread(target=app.run, kwargs={'host': host, 'port': port})
12671283
server_thread.daemon = True
12681284
server_thread.start()
12691285

@@ -1310,12 +1326,12 @@ def chat_with_optillm(message, history):
13101326
description=f"Connected to OptILLM proxy at {base_url}"
13111327
)
13121328
demo.queue() # Enable queue to handle long operations properly
1313-
demo.launch(server_name="0.0.0.0", share=False)
1329+
demo.launch(server_name=host, share=False)
13141330
except ImportError:
13151331
logger.error("Gradio is required for GUI. Install it with: pip install gradio")
13161332
return
13171333

1318-
app.run(host='0.0.0.0', port=port)
1334+
app.run(host=server_config['host'], port=port)
13191335

13201336
if __name__ == "__main__":
13211337
main()

optillm/z3_solver.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,16 @@
1515
class TimeoutException(Exception):
1616
pass
1717

18-
def prepare_safe_globals():
19-
safe_globals = {
18+
def prepare_execution_globals():
19+
"""
20+
Prepare globals dictionary for Z3/SymPy code execution.
21+
22+
WARNING: This is NOT a security sandbox. The name "execution_globals" reflects
23+
that this simply provides the execution environment for solver code, not a
24+
security boundary. The code is executed via exec() with access to z3, sympy,
25+
and math libraries. Only execute trusted code.
26+
"""
27+
execution_globals = {
2028
'print': print,
2129
'__builtins__': {
2230
'True': True,
@@ -35,7 +43,7 @@ def prepare_safe_globals():
3543
}
3644

3745
# Add common math functions
38-
safe_globals.update({
46+
execution_globals.update({
3947
'log': math.log,
4048
'log2': math.log2,
4149
'sqrt': math.sqrt,
@@ -48,10 +56,10 @@ def prepare_safe_globals():
4856
})
4957

5058
# Add complex number support
51-
safe_globals['I'] = complex(0, 1)
52-
safe_globals['Complex'] = complex
59+
execution_globals['I'] = complex(0, 1)
60+
execution_globals['Complex'] = complex
5361

54-
return safe_globals
62+
return execution_globals
5563

5664
def execute_code_in_process(code: str):
5765
import z3
@@ -60,18 +68,18 @@ def execute_code_in_process(code: str):
6068
import itertools
6169
from fractions import Fraction
6270

63-
safe_globals = prepare_safe_globals()
64-
71+
execution_globals = prepare_execution_globals()
72+
6573
# Add Z3 specific functions
6674
z3_whitelist = set(dir(z3))
67-
safe_globals.update({name: getattr(z3, name) for name in z3_whitelist})
75+
execution_globals.update({name: getattr(z3, name) for name in z3_whitelist})
6876

6977
# Add SymPy specific functions
7078
sympy_whitelist = set(dir(sympy))
71-
safe_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})
79+
execution_globals.update({name: getattr(sympy, name) for name in sympy_whitelist})
7280

7381
# Ensure key Z3 and SymPy components are available
74-
safe_globals.update({
82+
execution_globals.update({
7583
'z3': z3,
7684
'sympy': sympy,
7785
'Solver': z3.Solver,
@@ -112,22 +120,22 @@ def as_numerical(x):
112120
return x.approx(20)
113121
return float(x)
114122

115-
safe_globals['as_numerical'] = as_numerical
123+
execution_globals['as_numerical'] = as_numerical
116124

117125
def Mod(x, y):
118126
return x % y
119127

120-
safe_globals['Mod'] = Mod
128+
execution_globals['Mod'] = Mod
121129

122130
def Rational(numerator, denominator=1):
123131
return z3.Real(str(Fraction(numerator, denominator)))
124132

125-
safe_globals['Rational'] = Rational
133+
execution_globals['Rational'] = Rational
126134

127135
output_buffer = io.StringIO()
128136
with contextlib.redirect_stdout(output_buffer):
129137
try:
130-
exec(code, safe_globals, {})
138+
exec(code, execution_globals, {})
131139
except Exception:
132140
return ("error", traceback.format_exc())
133141
return ("success", output_buffer.getvalue())

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "optillm"
7-
version = "0.3.11"
7+
version = "0.3.12"
88
description = "An optimizing inference proxy for LLMs."
99
readme = "README.md"
1010
license = "Apache-2.0"

0 commit comments

Comments
 (0)