Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 52 additions & 10 deletions augur/util/inspect_without_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,59 @@
#while the config needs the phase names before population
#The solution is to either make the user define the phase names seperate or to do this
#Which is to import the .py as text and parse the function names.
def get_phase_names_without_import():
raw_file = open("augur/tasks/start_tasks.py")
lines = raw_file.readlines()
raw_file.close()

phase_names = []
"""
This module lets us inspect Python files without actually importing them.

It uses Python's Abstract Syntax Tree (AST) to parse source files and extract specific
function names. This is much safer and more robust than simple string parsing because
it handles things like indentation, whitespace, comments, and decorators correctly.
"""

import ast
from pathlib import Path
from typing import List

for line in lines:
if "def " in line and "_phase(" in line:
without_def = line.split()[1]
phase_names.append(without_def.split('(')[0])


def get_phase_names_without_import() -> List[str]:
"""
Grabs the names of phase functions from start_tasks.py.

Instead of importing the file (which runs code), we parse it strictly as text
using the AST module. We're looking for any function definition that has
'_phase' somewhere in its name.

Returns:
List[str]: A list of found function names, like 'prelim_phase' or 'secondary_repo_collect_phase'.

Raises:
FileNotFoundError: If we can't locate the start_tasks.py file.
SyntaxError: If start_tasks.py has broken Python syntax.
"""
current_file = Path(__file__).resolve()
start_tasks_path = current_file.parent.parent / "tasks" / "start_tasks.py"

try:
source_code = start_tasks_path.read_text(encoding='utf-8')
except FileNotFoundError as e:
raise FileNotFoundError(
f"We couldn't find start_tasks.py at {start_tasks_path}"
) from e

try:
tree = ast.parse(source_code, filename=str(start_tasks_path))
except SyntaxError as e:
raise SyntaxError(
f"The file exists, but it contains invalid Python syntax: {e}"
) from e

phase_names = []

# We walk through every node in the abstract syntax tree
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
# We want any function that identifies itself as a 'phase'
if node.name.endswith('_phase'):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the edge case tests use '_phase' in node.name (contains) instead of endswith('_phase'). That means the tests are validating a broader matching rule than what this actually ships. More on that in the test file comments.

Also, this only checks ast.FunctionDef. If someone ever adds an async def phase function, it'd be silently skipped. Probably fine for now since there aren't any, but worth a comment or including ast.AsyncFunctionDef in the check.

phase_names.append(node.name)

return phase_names
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ addopts = "-ra -s"
testpaths = [
"tests/test_classes",
"tests/test_application/test_cli/test_csv_utils.py",
"tests/test_util/test_inspect_without_import.py",
# "tests/test_routes", # runs, but needs a fixture for connecting to the web interface of Augur
# "tests/test_metrics",
# "tests/test_tasks",
Expand Down
1 change: 1 addition & 0 deletions tests/test_util/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for the augur.util package."""
Loading
Loading