Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: Lint

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: psf/black@stable
8 changes: 4 additions & 4 deletions .github/workflows/integration-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ jobs:
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install llm-api dependencies
- name: Install llm_api dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install pytest-asyncio
pip install -r ./protollm_tools/llm-api/requirements.txt
- name: Test llm-api with pytest
pip install -r ./protollm_tools/llm_api/requirements.txt
- name: Test llm_api with pytest
run: |
pytest -s ./protollm_tools/llm-api/tests/integration
pytest -s ./protollm_tools/llm_api/tests/integration
8 changes: 4 additions & 4 deletions .github/workflows/unit-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ jobs:
- name: Test with pytest
run: |
pytest -s tests
- name: Install llm-api dependencies
- name: Install llm_api dependencies
run: |
pip install pytest-asyncio
pip install -r ./protollm_tools/llm-api/requirements.txt
- name: Test llm-api with pytest
pip install -r ./protollm_tools/llm_api/requirements.txt
- name: Test llm_api with pytest
run: |
pytest -s ./protollm_tools/llm-api/tests/unit
pytest -s ./protollm_tools/llm_api/tests/unit

28 changes: 19 additions & 9 deletions examples/connector_creator_usage_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

from protollm.connectors import create_llm_connector

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


def basic_call_example(url_with_name: str):
Expand All @@ -25,7 +27,7 @@ def basic_call_example(url_with_name: str):
logging.info(res.content)
except Exception as e:
logging.error(f"An error occurred: {e}")


# Some models do not support explicit function calls, so the system prompt will be used for this. If it is not
# specified, it will be generated from the tool description and response format. If specified, it will be
Expand All @@ -41,11 +43,15 @@ def function_call_example_with_functions(url_with_name: str):
model = create_llm_connector(url_with_name)
mssgs = [
SystemMessage(content=""),
HumanMessage(content="Build a plan for placing new schools with a budget of 5 billion rubles."),
HumanMessage(
content="Build a plan for placing new schools with a budget of 5 billion rubles."
),
]

@tool
def territory_by_budget(is_best_one: bool, budget: int | None, service_type: str) -> str:
def territory_by_budget(
is_best_one: bool, budget: int | None, service_type: str
) -> str:
"""
Get potential territories for building a new service of a given type, considering the budget (amount in
rubles). This function should be used if the discussion involves placement, creation, construction, or erection
Expand Down Expand Up @@ -102,7 +108,9 @@ def function_call_example_with_dicts(url_with_name: str):
model = create_llm_connector(url_with_name)
mssgs = [
SystemMessage(content=""),
HumanMessage(content="Build a plan for placing new schools with a budget of 5 billion rubles."),
HumanMessage(
content="Build a plan for placing new schools with a budget of 5 billion rubles."
),
]

tools_as_dicts = [
Expand Down Expand Up @@ -220,6 +228,7 @@ def structured_output_example_with_pydantic(url_with_name: str):

class Joke(BaseModel):
"""Joke to tell user."""

setup: str = Field(description="The setup of the joke")
punchline: str = Field(description="The punchline to the joke")
rating: Optional[int] = Field(
Expand All @@ -235,18 +244,19 @@ class Joke(BaseModel):


if __name__ == "__main__":
load_dotenv("../config.env") # Change path to your config file if needed or pass URL with name directly

load_dotenv(
"../config.env"
) # Change path to your config file if needed or pass URL with name directly

# model_url_and_name = os.getenv("LLAMA_URL")
# model_url_and_name = os.getenv("GIGACHAT_URL")
model_url_and_name = os.getenv("DEEPSEEK_URL")
# model_url_and_name = os.getenv("DEEPSEEK_R1_URL")
# model_url_and_name = os.getenv("GPT4_URL")

# Uncomment the example you want to run
basic_call_example(model_url_and_name)
function_call_example_with_functions(model_url_and_name)
function_call_example_with_dicts(model_url_and_name)
structured_output_example_with_dict(model_url_and_name)
structured_output_example_with_pydantic(model_url_and_name)

15 changes: 10 additions & 5 deletions examples/llama31_usage_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
tool,
)
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.prompts import (
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)

from protollm.agents.llama31_agents.llama31_agent import Llama31ChatModel

Expand All @@ -15,16 +18,18 @@ def add_numbers(a: int, b: int) -> int:
"""Adds two numbers."""
return a + b


@tool
def multiply_numbers(a: int, b: int) -> int:
"""Multiplies two numbers."""
return a * b


# List of tools
tools = [add_numbers, multiply_numbers]

# Create the system and human prompts
system_prompt = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:
system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:

{tools}

Expand All @@ -48,11 +53,11 @@ def multiply_numbers(a: int, b: int) -> int:
Action: {{ "action": "Final Answer", "action_input": "Final response to human" }}


Begin! Reminder to ALWAYS respond with a valid JSON blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB``` then Observation'''
Begin! Reminder to ALWAYS respond with a valid JSON blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB``` then Observation"""

human_prompt = '''{input}
human_prompt = """{input}
{agent_scratchpad}
(Reminder to respond in a JSON blob no matter what)'''
(Reminder to respond in a JSON blob no matter what)"""

system_message = SystemMessagePromptTemplate.from_template(
system_prompt,
Expand Down
21 changes: 13 additions & 8 deletions examples/metrics_usage_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

from protollm.metrics import correctness_metric, model_for_metrics

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

answer_relevancy = AnswerRelevancyMetric(model=model_for_metrics, async_mode=False)
tool_correctness = ToolCorrectnessMetric()
Expand All @@ -31,27 +33,30 @@
test_case = LLMTestCase(
input="What if these shoes don't fit?",
actual_output="We offer a 30-day full refund at no extra cost.",
expected_output="You are eligible for a 30 day full refund at no extra cost."
expected_output="You are eligible for a 30 day full refund at no extra cost.",
)

answer_relevancy.measure(test_case) # Evaluate metric
answer_relevancy.measure(test_case) # Evaluate metric
logging.info(f"Answer relevancy score {answer_relevancy.score}")
logging.info(f"Answer relevancy reason: {answer_relevancy.reason}")
correctness_metric.measure(test_case) # Evaluate metric

correctness_metric.measure(test_case) # Evaluate metric
logging.info(f"Correctness score {correctness_metric.score}")
logging.info(f"Correctness reason: {correctness_metric.reason}")

# ===================================metrics not using LLM=========================================
# Create test case for metric
test_case = LLMTestCase(
input="What if these shoes don't fit?",
actual_output="We offer a 30-day full refund at no extra cost.",
# Replace this with the tools that was actually used by your LLM agent
tools_called=[ToolCall(name="WebSearch", input_parameters={}), ToolCall(name="ToolQuery", input_parameters={})],
tools_called=[
ToolCall(name="WebSearch", input_parameters={}),
ToolCall(name="ToolQuery", input_parameters={}),
],
expected_tools=[ToolCall(name="WebSearch", input_parameters={})],
)

tool_correctness.measure(test_case)
logging.info(f"Tool correctness score {tool_correctness.score}")
logging.info(f"Tool correctness reason: {tool_correctness.reason}")
Loading
Loading