aimclub · jrzkaminski · Dec 24, 2024 · Mar 18, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,10 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: psf/black@stable
diff --git a/.github/workflows/integration-build.yml b/.github/workflows/integration-build.yml
@@ -38,12 +38,12 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install llm-api dependencies
+      - name: Install llm_api dependencies
         run: |
           python -m pip install --upgrade pip
           pip install pytest
           pip install pytest-asyncio
-          pip install -r ./protollm_tools/llm-api/requirements.txt
-      - name: Test llm-api with pytest
+          pip install -r ./protollm_tools/llm_api/requirements.txt
+      - name: Test llm_api with pytest
         run: |
-          pytest -s ./protollm_tools/llm-api/tests/integration
+          pytest -s ./protollm_tools/llm_api/tests/integration
diff --git a/.github/workflows/unit-build.yml b/.github/workflows/unit-build.yml
@@ -33,11 +33,11 @@ jobs:
     - name: Test with pytest
       run: |
         pytest -s tests
-    - name: Install llm-api dependencies
+    - name: Install llm_api dependencies
       run: |
         pip install pytest-asyncio
-        pip install -r ./protollm_tools/llm-api/requirements.txt
-    - name: Test llm-api with pytest
+        pip install -r ./protollm_tools/llm_api/requirements.txt
+    - name: Test llm_api with pytest
       run: |
-        pytest -s ./protollm_tools/llm-api/tests/unit
+        pytest -s ./protollm_tools/llm_api/tests/unit
 
diff --git a/examples/connector_creator_usage_example.py b/examples/connector_creator_usage_example.py
@@ -9,7 +9,9 @@
 
 from protollm.connectors import create_llm_connector
 
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 
 
 def basic_call_example(url_with_name: str):
@@ -25,7 +27,7 @@ def basic_call_example(url_with_name: str):
         logging.info(res.content)
     except Exception as e:
         logging.error(f"An error occurred: {e}")
-    
+
 
 # Some models do not support explicit function calls, so the system prompt will be used for this. If it is not
 # specified, it will be generated from the tool description and response format. If specified, it will be
@@ -41,11 +43,15 @@ def function_call_example_with_functions(url_with_name: str):
     model = create_llm_connector(url_with_name)
     mssgs = [
         SystemMessage(content=""),
-        HumanMessage(content="Build a plan for placing new schools with a budget of 5 billion rubles."),
+        HumanMessage(
+            content="Build a plan for placing new schools with a budget of 5 billion rubles."
+        ),
     ]
 
     @tool
-    def territory_by_budget(is_best_one: bool, budget: int | None, service_type: str) -> str:
+    def territory_by_budget(
+        is_best_one: bool, budget: int | None, service_type: str
+    ) -> str:
         """
         Get potential territories for building a new service of a given type, considering the budget (amount in
         rubles).  This function should be used if the discussion involves placement, creation, construction, or erection
@@ -102,7 +108,9 @@ def function_call_example_with_dicts(url_with_name: str):
     model = create_llm_connector(url_with_name)
     mssgs = [
         SystemMessage(content=""),
-        HumanMessage(content="Build a plan for placing new schools with a budget of 5 billion rubles."),
+        HumanMessage(
+            content="Build a plan for placing new schools with a budget of 5 billion rubles."
+        ),
     ]
 
     tools_as_dicts = [
@@ -220,6 +228,7 @@ def structured_output_example_with_pydantic(url_with_name: str):
 
     class Joke(BaseModel):
         """Joke to tell user."""
+
         setup: str = Field(description="The setup of the joke")
         punchline: str = Field(description="The punchline to the joke")
         rating: Optional[int] = Field(
@@ -235,18 +244,19 @@ class Joke(BaseModel):
 
 
 if __name__ == "__main__":
-    load_dotenv("../config.env") # Change path to your config file if needed or pass URL with name directly
-
+    load_dotenv(
+        "../config.env"
+    )  # Change path to your config file if needed or pass URL with name directly
+
     # model_url_and_name = os.getenv("LLAMA_URL")
     # model_url_and_name = os.getenv("GIGACHAT_URL")
     model_url_and_name = os.getenv("DEEPSEEK_URL")
     # model_url_and_name = os.getenv("DEEPSEEK_R1_URL")
     # model_url_and_name = os.getenv("GPT4_URL")
-    
+
     # Uncomment the example you want to run
     basic_call_example(model_url_and_name)
     function_call_example_with_functions(model_url_and_name)
     function_call_example_with_dicts(model_url_and_name)
     structured_output_example_with_dict(model_url_and_name)
     structured_output_example_with_pydantic(model_url_and_name)
-
diff --git a/examples/llama31_usage_example.py b/examples/llama31_usage_example.py
@@ -4,7 +4,10 @@
     tool,
 )
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
+from langchain_core.prompts import (
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
 
 from protollm.agents.llama31_agents.llama31_agent import Llama31ChatModel
 
@@ -15,16 +18,18 @@ def add_numbers(a: int, b: int) -> int:
     """Adds two numbers."""
     return a + b
 
+
 @tool
 def multiply_numbers(a: int, b: int) -> int:
     """Multiplies two numbers."""
     return a * b
 
+
 # List of tools
 tools = [add_numbers, multiply_numbers]
 
 # Create the system and human prompts
-system_prompt = '''Respond to the human as helpfully and accurately as possible. You have access to the following tools:
+system_prompt = """Respond to the human as helpfully and accurately as possible. You have access to the following tools:
 
 {tools}
 
@@ -48,11 +53,11 @@ def multiply_numbers(a: int, b: int) -> int:
 Action: {{ "action": "Final Answer", "action_input": "Final response to human" }}
 
 
-Begin! Reminder to ALWAYS respond with a valid JSON blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB``` then Observation'''
+Begin! Reminder to ALWAYS respond with a valid JSON blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB``` then Observation"""
 
-human_prompt = '''{input}
+human_prompt = """{input}
 {agent_scratchpad}
-(Reminder to respond in a JSON blob no matter what)'''
+(Reminder to respond in a JSON blob no matter what)"""
 
 system_message = SystemMessagePromptTemplate.from_template(
     system_prompt,

diff --git a/examples/metrics_usage_examples.py b/examples/metrics_usage_examples.py
@@ -20,7 +20,9 @@
 
 from protollm.metrics import correctness_metric, model_for_metrics
 
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 
 answer_relevancy = AnswerRelevancyMetric(model=model_for_metrics, async_mode=False)
 tool_correctness = ToolCorrectnessMetric()
@@ -31,27 +33,30 @@
     test_case = LLMTestCase(
         input="What if these shoes don't fit?",
         actual_output="We offer a 30-day full refund at no extra cost.",
-        expected_output="You are eligible for a 30 day full refund at no extra cost."
+        expected_output="You are eligible for a 30 day full refund at no extra cost.",
     )
 
-    answer_relevancy.measure(test_case) # Evaluate metric
+    answer_relevancy.measure(test_case)  # Evaluate metric
     logging.info(f"Answer relevancy score {answer_relevancy.score}")
     logging.info(f"Answer relevancy reason: {answer_relevancy.reason}")
-    
-    correctness_metric.measure(test_case) # Evaluate metric
+
+    correctness_metric.measure(test_case)  # Evaluate metric
     logging.info(f"Correctness score {correctness_metric.score}")
     logging.info(f"Correctness reason: {correctness_metric.reason}")
-    
+
     # ===================================metrics not using LLM=========================================
     # Create test case for metric
     test_case = LLMTestCase(
         input="What if these shoes don't fit?",
         actual_output="We offer a 30-day full refund at no extra cost.",
         # Replace this with the tools that was actually used by your LLM agent
-        tools_called=[ToolCall(name="WebSearch", input_parameters={}), ToolCall(name="ToolQuery", input_parameters={})],
+        tools_called=[
+            ToolCall(name="WebSearch", input_parameters={}),
+            ToolCall(name="ToolQuery", input_parameters={}),
+        ],
         expected_tools=[ToolCall(name="WebSearch", input_parameters={})],
     )
-    
+
     tool_correctness.measure(test_case)
     logging.info(f"Tool correctness score {tool_correctness.score}")
     logging.info(f"Tool correctness reason: {tool_correctness.reason}")