Skip to content

Commit 8e8efa2

Browse files
committed
Release v4.5.10
1 parent 771c1ba commit 8e8efa2

File tree

12 files changed

+239
-52
lines changed

12 files changed

+239
-52
lines changed

docker/Dockerfile.chat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ RUN mkdir -p /root/.praison
1616
# Install Python packages (using latest versions)
1717
RUN pip install --no-cache-dir \
1818
praisonai_tools \
19-
"praisonai>=4.5.9" \
19+
"praisonai>=4.5.10" \
2020
"praisonai[chat]" \
2121
"embedchain[github,youtube]"
2222

docker/Dockerfile.dev

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ RUN mkdir -p /root/.praison
2020
# Install Python packages (using latest versions)
2121
RUN pip install --no-cache-dir \
2222
praisonai_tools \
23-
"praisonai>=4.5.9" \
23+
"praisonai>=4.5.10" \
2424
"praisonai[ui]" \
2525
"praisonai[chat]" \
2626
"praisonai[realtime]" \

docker/Dockerfile.ui

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ RUN mkdir -p /root/.praison
1616
# Install Python packages (using latest versions)
1717
RUN pip install --no-cache-dir \
1818
praisonai_tools \
19-
"praisonai>=4.5.9" \
19+
"praisonai>=4.5.10" \
2020
"praisonai[ui]" \
2121
"praisonai[crewai]"
2222

src/praisonai-agents/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "praisonaiagents"
7-
version = "1.5.9"
7+
version = "1.5.10"
88
description = "Praison AI agents for completing complex tasks with Self Reflection Agents"
99
readme = "README.md"
1010
requires-python = ">=3.10"

src/praisonai-agents/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/praisonai/praisonai.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ class Praisonai < Formula
33

44
desc "AI tools for various AI applications"
55
homepage "https://github.com/MervinPraison/PraisonAI"
6-
url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/v4.5.9.tar.gz"
7-
sha256 `curl -sL https://github.com/MervinPraison/PraisonAI/archive/refs/tags/v4.5.9.tar.gz | shasum -a 256`.split.first
6+
url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/v4.5.10.tar.gz"
7+
sha256 `curl -sL https://github.com/MervinPraison/PraisonAI/archive/refs/tags/v4.5.10.tar.gz | shasum -a 256`.split.first
88
license "MIT"
99

1010
depends_on "python@3.11"

src/praisonai/praisonai/cli/commands/tracker.py

Lines changed: 212 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,10 @@ def _run_tracked_task(
209209
if model:
210210
agent_kwargs["llm"] = model
211211

212-
if not verbose:
213-
agent_kwargs["output"] = "silent"
212+
if verbose:
213+
pass # Use default full output
214+
else:
215+
agent_kwargs["output"] = "status" # Real-time status like Agent(output="status")
214216

215217
agent = Agent(**agent_kwargs)
216218

@@ -419,10 +421,28 @@ def _print_summary(result: TrackerResult) -> None:
419421
console.print(f" • {gap}")
420422

421423

422-
@app.callback(invoke_without_command=True)
423-
def tracker_main(
424-
ctx: typer.Context,
425-
task: Optional[str] = typer.Argument(None, help="Task for the agent to complete"),
424+
@app.callback()
425+
def tracker_main(ctx: typer.Context):
426+
"""Autonomous agent tracking with step-by-step analysis.
427+
428+
Run tasks with full execution tracing and optional quality judging.
429+
430+
Commands:
431+
432+
praisonai tracker run "Search for Python best practices"
433+
434+
praisonai tracker judge "What is 2+2?" --expected "4"
435+
436+
praisonai tracker tools
437+
438+
praisonai tracker batch tasks.json
439+
"""
440+
pass
441+
442+
443+
@app.command(name="run")
444+
def tracker_run(
445+
task: str = typer.Argument(..., help="Task for the agent to complete"),
426446
max_iterations: int = typer.Option(20, "--max-iterations", "-n", help="Maximum iterations (default: 20)"),
427447
model: Optional[str] = typer.Option(None, "--model", "-m", help="LLM model to use"),
428448
tools: Optional[str] = typer.Option(None, "--tools", "-t", help="Comma-separated tool names to use"),
@@ -432,31 +452,32 @@ def tracker_main(
432452
):
433453
"""Run an agent with step-by-step tracking.
434454
435-
The tracker runs an agent in autonomous mode and records every step,
436-
tool call, and decision. At the end, it displays a summary table
437-
showing all steps taken and any gaps identified.
438-
439455
Examples:
440456
441-
praisonai tracker "Search for Python best practices and summarize"
457+
praisonai tracker run "Search for Python best practices and summarize"
442458
443-
praisonai tracker "Read config.yaml and explain its structure" -v
459+
praisonai tracker run "Read config.yaml and explain its structure" -v
444460
445-
praisonai tracker "Find trending AI news" --tools search_web,web_crawl
461+
praisonai tracker run "Find trending AI news" --tools search_web,web_crawl
446462
"""
447-
if ctx.invoked_subcommand is not None:
448-
return
449-
450-
if not task:
451-
typer.echo(ctx.get_help())
452-
return
453-
463+
_run_and_display(task, max_iterations, model, tools, extended, verbose, live)
464+
465+
466+
def _run_and_display(
467+
task: str,
468+
max_iterations: int = 20,
469+
model: Optional[str] = None,
470+
tools: Optional[str] = None,
471+
extended: bool = False,
472+
verbose: bool = False,
473+
live: bool = True,
474+
) -> TrackerResult:
475+
"""Shared logic: run a tracked task and display results."""
454476
# Resolve tools
455477
tool_names = AUTONOMY_DEFAULT_TOOLS.copy()
456478
if extended:
457479
tool_names.extend(EXTENDED_TOOLS)
458480
if tools:
459-
# Override with user-specified tools
460481
tool_names = [t.strip() for t in tools.split(",")]
461482

462483
resolved_tools = _get_tools(tool_names)
@@ -488,6 +509,8 @@ def step_callback(step: TrackedStep):
488509
_print_step_table(result.steps)
489510
console.print("\n")
490511
_print_summary(result)
512+
513+
return result
491514

492515

493516
@app.command(name="batch")
@@ -617,3 +640,171 @@ def tracker_tools():
617640

618641
console.print("\n[dim]Use --tools to specify custom tools, e.g.:[/dim]")
619642
console.print("[dim] praisonai tracker 'task' --tools search_web,read_file[/dim]")
643+
644+
645+
# ============================================================================
646+
# JUDGE FEATURE
647+
# ============================================================================
648+
649+
DEFAULT_JUDGE_CRITERIA = """Evaluate this autonomous agent execution trace:
650+
1. Task Completion: Did the agent fully complete the assigned task?
651+
2. Tool Selection: Were appropriate tools chosen for each step?
652+
3. Efficiency: Was the task completed with minimal unnecessary steps?
653+
4. Error Handling: Were errors handled gracefully without crashing?
654+
5. Output Quality: Is the final output accurate and useful?"""
655+
656+
657+
def _format_trace_for_judge(result: TrackerResult) -> str:
658+
"""Convert TrackerResult into a structured string for LLM evaluation."""
659+
lines = []
660+
lines.append(f"Task: \"{result.task}\"")
661+
status = "success" if result.success else "failed"
662+
lines.append(f"Completion: {status} (reason: {result.completion_reason})")
663+
lines.append(f"Duration: {result.total_duration:.1f}s | Steps: {result.total_steps} | Tools Used: {', '.join(result.tools_used) if result.tools_used else 'None'}")
664+
lines.append("")
665+
666+
for step in result.steps:
667+
icon = "✅" if step.success else "❌"
668+
lines.append(f"Step {step.step_number}: [{step.action_type}] {step.action_name} ({step.duration_seconds:.1f}s) {icon}")
669+
lines.append(f" Input: {step.input_summary}")
670+
lines.append(f" Output: {step.output_summary}")
671+
if step.error:
672+
lines.append(f" Error: {step.error}")
673+
lines.append("")
674+
675+
if result.gaps_identified:
676+
lines.append(f"Gaps: {'; '.join(result.gaps_identified)}")
677+
else:
678+
lines.append("Gaps: None")
679+
680+
return "\n".join(lines)
681+
682+
683+
def _print_judge_verdict(judge_result, threshold: float) -> None:
684+
"""Print the judge verdict with rich formatting."""
685+
score = getattr(judge_result, 'score', 0) or 0
686+
passed = score >= threshold
687+
reasoning = getattr(judge_result, 'reasoning', '') or ''
688+
suggestions = getattr(judge_result, 'suggestions', []) or []
689+
690+
# Score bar
691+
bar_len = 20
692+
filled = int(score / 10 * bar_len)
693+
bar = "█" * filled + "░" * (bar_len - filled)
694+
695+
color = "green" if passed else ("yellow" if score >= 5 else "red")
696+
icon = "✅" if passed else "❌"
697+
698+
console.print(Panel(
699+
f"""
700+
[bold]{icon} Score: [{color}]{score:.1f}/10[/{color}][/bold] [{color}]{bar}[/{color}]
701+
[bold]Threshold:[/bold] {threshold} | [bold]Verdict:[/bold] [{'green' if passed else 'red'}]{'PASS' if passed else 'FAIL'}[/{'green' if passed else 'red'}]
702+
703+
[bold]Reasoning:[/bold]
704+
{reasoning}
705+
""",
706+
title="⚖️ Judge Verdict",
707+
border_style=color,
708+
))
709+
710+
if suggestions:
711+
console.print("[bold yellow]💡 Suggestions:[/bold yellow]")
712+
for s in suggestions:
713+
console.print(f" • {s}")
714+
715+
716+
@app.command(name="judge")
717+
def tracker_judge(
718+
task: str = typer.Argument(..., help="Task to execute and judge"),
719+
criteria: Optional[str] = typer.Option(None, "--criteria", "-c", help="Custom evaluation criteria"),
720+
expected: Optional[str] = typer.Option(None, "--expected", "-e", help="Expected output for accuracy evaluation"),
721+
threshold: float = typer.Option(7.0, "--threshold", help="Pass/fail score threshold (1-10)"),
722+
max_iterations: int = typer.Option(20, "--max-iterations", "-n", help="Maximum iterations (default: 20)"),
723+
model: Optional[str] = typer.Option(None, "--model", "-m", help="LLM model to use"),
724+
judge_model: Optional[str] = typer.Option(None, "--judge-model", help="LLM model for judge (default: same as agent)"),
725+
tools: Optional[str] = typer.Option(None, "--tools", "-t", help="Comma-separated tool names to use"),
726+
extended: bool = typer.Option(False, "--extended", help="Include extended tools (may require API keys)"),
727+
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show verbose output"),
728+
):
729+
"""Run a task and judge the execution quality.
730+
731+
Executes the task with step tracking, then evaluates the execution
732+
trace using an LLM judge. Reports a score (1-10), pass/fail verdict,
733+
reasoning, and improvement suggestions.
734+
735+
Examples:
736+
737+
praisonai tracker judge "Calculate fibonacci(10) using execute_code"
738+
739+
praisonai tracker judge "Search for AI news" --criteria "Must use search_web"
740+
741+
praisonai tracker judge "What is 2+2?" --expected "4" --threshold 8.0
742+
"""
743+
# Resolve tools
744+
tool_names = AUTONOMY_DEFAULT_TOOLS.copy()
745+
if extended:
746+
tool_names.extend(EXTENDED_TOOLS)
747+
if tools:
748+
tool_names = [t.strip() for t in tools.split(",")]
749+
750+
resolved_tools = _get_tools(tool_names)
751+
752+
console.print(f"\n[bold cyan]⚖️ Agent Tracker + Judge[/bold cyan]")
753+
console.print(f"[dim]Task: {_summarize_text(task, 70)}[/dim]")
754+
console.print(f"[dim]Tools: {len(resolved_tools)} loaded | Threshold: {threshold}[/dim]\n")
755+
756+
# Step 1: Run the task
757+
console.print("[bold]Phase 1: Executing task...[/bold]\n")
758+
759+
def step_callback(step: TrackedStep):
760+
status = "✅" if step.success else "❌"
761+
console.print(f" [{step.step_number}] {status} {step.action_type}: {step.action_name} ({step.duration_seconds:.2f}s)")
762+
763+
result = _run_tracked_task(
764+
task=task,
765+
tools=resolved_tools,
766+
max_iterations=max_iterations,
767+
model=model,
768+
verbose=verbose,
769+
step_callback=step_callback,
770+
)
771+
772+
# Print step table + summary
773+
console.print("\n")
774+
_print_step_table(result.steps)
775+
console.print("\n")
776+
_print_summary(result)
777+
778+
# Step 2: Judge the execution
779+
console.print("\n[bold]Phase 2: Judging execution...[/bold]\n")
780+
781+
try:
782+
from praisonaiagents.eval import Judge
783+
784+
trace_text = _format_trace_for_judge(result)
785+
786+
judge_kwargs = {}
787+
if judge_model:
788+
judge_kwargs["model"] = judge_model
789+
elif model:
790+
judge_kwargs["model"] = model
791+
792+
judge = Judge(threshold=threshold, **judge_kwargs)
793+
794+
eval_criteria = criteria or DEFAULT_JUDGE_CRITERIA
795+
796+
judge_result = judge.run(
797+
output=trace_text,
798+
criteria=eval_criteria,
799+
expected=expected,
800+
input=task,
801+
)
802+
803+
_print_judge_verdict(judge_result, threshold)
804+
805+
except ImportError:
806+
console.print("[red]Error: praisonaiagents.eval not available[/red]")
807+
console.print("[dim]Install with: pip install praisonaiagents[/dim]")
808+
except Exception as e:
809+
console.print(f"[red]Judge error: {e}[/red]")
810+

src/praisonai/praisonai/cli/main.py

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4561,6 +4561,7 @@ def cli_approval_with_live_pause(function_name, arguments, risk_level):
45614561
return ApprovalDecision(approved=True, reason="User approved")
45624562
else:
45634563
console.print("[red]❌ Denied[/red]")
4564+
console.print("[dim]Tip: Use --trust to auto-approve all tools[/dim]")
45644565
return ApprovalDecision(approved=False, reason="User denied")
45654566
except (KeyboardInterrupt, EOFError):
45664567
status_info['approval_pending'] = False
@@ -4576,37 +4577,32 @@ def cli_approval_with_live_pause(function_name, arguments, risk_level):
45764577
thread.start()
45774578

45784579
# Show live status while processing
4580+
# Loop handles unlimited approval interruptions (each approval
4581+
# pauses the Live display, then we restart it afterwards)
45794582
try:
4580-
with Live(build_status_display(), console=console, refresh_per_second=4, transient=True) as live:
4581-
status_info['live_instance'] = live
4582-
while not status_info['done']:
4583-
# Check if approval is pending - stop Live to show prompt
4584-
if status_info['approval_pending']:
4585-
break
4586-
live.update(build_status_display())
4587-
time.sleep(0.1)
4588-
4589-
# If approval was pending, wait for it to complete then restart Live
4590-
while status_info['approval_pending']:
4591-
time.sleep(0.1)
4592-
4593-
# Continue with Live display if not done
4594-
if not status_info['done']:
4583+
while not status_info['done']:
45954584
with Live(build_status_display(), console=console, refresh_per_second=4, transient=True) as live:
4585+
status_info['live_instance'] = live
45964586
while not status_info['done']:
4587+
# Check if approval is pending - stop Live to show prompt
45974588
if status_info['approval_pending']:
45984589
break
45994590
live.update(build_status_display())
46004591
time.sleep(0.1)
4592+
4593+
# If approval was pending, wait for it to complete then loop
4594+
# back to restart the Live display
4595+
while status_info['approval_pending']:
4596+
time.sleep(0.1)
46014597
except KeyboardInterrupt:
46024598
console.print("\n[dim]Interrupted[/dim]")
46034599
# Unregister callback (use local variable with None check)
46044600
if _sync_display_callbacks is not None and 'tool_call' in _sync_display_callbacks:
46054601
del _sync_display_callbacks['tool_call']
46064602
return None
46074603

4608-
# Wait for thread to complete
4609-
thread.join(timeout=1.0)
4604+
# Wait for thread to complete (generous timeout for long-running tasks)
4605+
thread.join(timeout=5.0)
46104606

46114607
# Unregister callback to avoid memory leaks (use local variable with None check)
46124608
if _sync_display_callbacks is not None and 'tool_call' in _sync_display_callbacks:

src/praisonai/praisonai/deploy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def create_dockerfile(self):
5757
file.write("FROM python:3.11-slim\n")
5858
file.write("WORKDIR /app\n")
5959
file.write("COPY . .\n")
60-
file.write("RUN pip install flask praisonai==4.5.9 gunicorn markdown\n")
60+
file.write("RUN pip install flask praisonai==4.5.10 gunicorn markdown\n")
6161
file.write("EXPOSE 8080\n")
6262
file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')
6363

0 commit comments

Comments
 (0)