Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
Lists all the files where performance variables can be found.

directory [*str*]::
Common directory where files containing performance variables can be found.
Common directory where files containing performance variables can be found. This field is optional **only if all stages extract from `stdout`**.
If at least one stage extracts from a file, `directory` must be provided.


clean_directory [*bool*] (Optional)::
If true, it will delete the contents of inside `directory`.
Expand All @@ -13,16 +15,16 @@ clean_directory [*bool*] (Optional)::
stages [*List[Stage]*]::
Describes the files containing performance variables, and how to extract them.

-name [*str*]:::
-name [*str* (Optional)]:::
Name to describe the stage. It is used as prefix to add to the performance variables found in the file.
If no prefix is needed, the name can be "".
If no prefix is needed, the name can be ommited.

-filepath [*str*]:::
Relative filepath of the file containing performance variables, relative to the `directory` field.
-filepath [*str*|"stdout"]:::
Can be either "stdout" or relative filepath of the file containing performance variables, relative to the `directory` field.

-format [*str*]:::
Format of the stage file.
Supported values are "csv" and "json".
Supported values are "regex", "csv" and "json".

-units [*Dict[str,str]*] (Optional):::
Custom units for certain performance variables.
Expand All @@ -35,6 +37,16 @@ stages [*List[Stage]*]::
Only valid if format is "json".
Defines where, in the JSON hierrarchy, performance variables will be found. Supports the use of one or multiple wildcards (`*`).

-pattern [*str*]:::
Required if format is `regex`. The regular expression applied to each line. Accepts named and arbitrary capture groups.

-variable_value_group[*str|int*]:::
Required if format is `regex`
The capture group containing the performance value to extract. Can be named or an integer.

-variable_name_group[*str|int* (Optional)]:::
The capture group containing the performance variable name to extract. If ommited, variables are named automatically as `match_0`, `match_1`, ...

custom_variables [*List[Dict[str,str]]*] (Optional)::
Contains a list of objects describing custom performance variables to create, based on extracted ones (from stages). An aggregation will be performed using provided columns and valid operations.
For more information, see the xref:tutorial:advancedConfiguration.adoc[advanced Configuration]
Expand Down Expand Up @@ -63,6 +75,24 @@ Recursive creation of custom_variables is supported!
Deeply nested and complex JSON scalability files are supported, using multiple wildcard syntax!
====


== Extracting from standard output

Stages may extract performance variables directly from the application standard output by setting:

[source,json]
----
"filepath": "stdout"
----

This works with any supported file format (e.g. logging a csv on the stdout).

The top-level directory field is not required if ALL stages extract from stdout.

[TIP]
Mixing `stdout` and file-based stages is allowed but requires `directory` to be set


== Examples

Let's assume our application exports the following files:
Expand Down Expand Up @@ -199,4 +229,40 @@ If a full path is passed, the variable name corresponds to the key of the leaf
[TIP]
====
`variables_path` can be a list.
====
====


=== Extracting performance variables using `regex`

Assume the application prints the following lines to standard output:

[source,text]
----
assembly: 0.012
solve: 1.42
postprocess: 0.08
----

A minimal `regex` stage extracting these values from `stdout` is shown below.

[source,json]
----
"scalability": {
"stages": [
{
"name": "timers",
"filepath": "stdout",
"format": "regex",
"pattern": "^(?P<name>[^:\\n]+):\\s*(?P<value>[-+]?[\\d.]+(?:[eE][-+]?\\d+)?)$",
"variable_name_group": "name",
"variable_value_group": "value"
}
]
}
----

This configuration extracts the following performance variables:

- timers_assembly : 0.012
- timers_solve : 1.42
- timers_postprocess : 0.08
8 changes: 8 additions & 0 deletions examples/fibonacci/benchmark.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
"name":"",
"filepath":"output.csv",
"units":{ "fibonacci_number":"" }
},
{
"name":"",
"filepath":"stdout",
"format":"regex",
"pattern": "^(?P<name>[^:\\n]+):\\s*(?P<value>[-+]?[\\d.]+(?:[eE][-+]?\\d+)?)$",
"variable_name_group":"name",
"variable_value_group":"value"
}
]
},
Expand Down
6 changes: 3 additions & 3 deletions examples/fibonacci/fibonacci.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def fibonacciIterative(n):
os.makedirs(dirpath)

with open(args.out,'w') as f:
f.write(f"elapsed,fibonacci_number\n{elapsed_time},{fib_number}")
f.write(f"fibonacci_number\n{fib_number}")

print(f"elapsed: {elapsed_time}")

print(f"Elapsed time: {elapsed_time}")
print(f"Fibonacci number: {fib_number}")
print("Done!")
4 changes: 2 additions & 2 deletions src/feelpp/benchmarking/reframe/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class RegressionTest(ReframeSetup):
def initHandlers(self):
self.validation_handler = ValidationHandler(self.app_reader.config.sanity)
if self.app_reader.config.scalability:
self.scalability_handler = ScalabilityHandler(self.app_reader.config.scalability)
self.scalability_handler = ScalabilityHandler(self.app_reader.config.scalability, os.path.join(self.stagedir,self.stdout.evaluate()))
else:
self.scalability_handler = None

Expand Down Expand Up @@ -84,7 +84,7 @@ def setPerfVars(self):

@run_before("cleanup")
def removeDirectories(self):
if self.app_reader.config.scalability and self.app_reader.config.scalability.clean_directory:
if self.app_reader.config.scalability and self.app_reader.config.scalability.clean_directory and self.app_reader.config.scalability.directory:
FileHandler.cleanupDirectory(self.app_reader.config.scalability.directory)
if self.machine_reader.config.input_user_dir and self.app_reader.config.input_file_dependencies:
DEBUG("REMOVING INPUT FILE DEPENDENCIES...")
Expand Down
48 changes: 44 additions & 4 deletions src/feelpp/benchmarking/reframe/scalability.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ def __init__(self,filepath,stage_name, units):
self.stage_name = stage_name
self.units = units

@staticmethod
def _tryCastFloat(x):
try:
return float(x)
except ValueError:
return x

def _getPerfVars(self,columns,vars):
perf_variables = {}
nb_rows = len(vars.evaluate())
Expand Down Expand Up @@ -120,27 +127,60 @@ def _extractVariables(self):
return items.keys(),sn.defer([[sn.defer(v) for v in items.values()]])


class RegexExtractor(Extractor):
def __init__(self, filepath, stage_name, units, pattern, variable_name_group, variable_value_group):
super().__init__(filepath, stage_name, units)
self.pattern = pattern
self.variable_name_group = variable_name_group
self.variable_value_group = variable_value_group

def _extractVariables(self):
if self.variable_name_group:
tags = (self.variable_name_group, self.variable_value_group)
conv = (str,self._tryCastFloat)
else:
tags = self.variable_value_group
conv = self._tryCastFloat

raw_results = sn.extractall(rf"{self.pattern}", self.filepath, tags, conv=conv)

if self.variable_name_group:
columns = [x[0].strip() for x in raw_results]
matches = [x[1] for x in raw_results]
else:
matches = [x for x in raw_results]
columns = [f"match_{i}" for i in range(len(matches))]

return columns, sn.defer([matches])

class ExtractorFactory:
"""Factory class for extractor strategies"""
@staticmethod
def create(stage,directory,index=None):
filepath = os.path.join(directory,stage.filepath)
def create(stage,directory,index=None, stdout = None):
if stage.filepath == "stdout":
filepath = stdout
else:
filepath = os.path.join(directory,stage.filepath)

if stage.format == "csv":
return CsvExtractor(filepath=filepath, stage_name = stage.name, units=stage.units)
elif stage.format == "tsv":
return TsvExtractor(filepath=filepath,stage_name = stage.name,index=index, units=stage.units)
elif stage.format == "json":
return JsonExtractor(filepath=filepath,stage_name = stage.name, variables_path=stage.variables_path, units=stage.units)
elif stage.format == "regex":
return RegexExtractor(filepath=filepath,stage_name = stage.name, pattern=stage.pattern, units=stage.units, variable_name_group=stage.variable_name_group, variable_value_group=stage.variable_value_group)
else:
raise NotImplementedError


class ScalabilityHandler:
""" Class to handle scalability related attributes"""
def __init__(self,scalability_config):
def __init__(self,scalability_config, stdout = None):
self.directory = scalability_config.directory
self.stages = scalability_config.stages
self.custom_variables = scalability_config.custom_variables
self.stdout = stdout

def getPerformanceVariables(self,index=None):
""" Opens and parses the performance variable values depending on the config setup.
Expand All @@ -150,7 +190,7 @@ def getPerformanceVariables(self,index=None):
"""
perf_variables = {}
for stage in self.stages:
extractor = ExtractorFactory.create(stage,self.directory,index)
extractor = ExtractorFactory.create(stage,self.directory,index, self.stdout)
perf_variables.update( extractor.extract() )

return perf_variables
Expand Down
34 changes: 30 additions & 4 deletions src/feelpp/benchmarking/reframe/schemas/scalability.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,23 @@


class Stage(BaseModel):
name:str
name:Optional[str] = None
filepath:str
format:Optional[Literal["csv","tsv","json"]] = None
format:Optional[Literal["csv","tsv","json","regex"]] = None
variables_path:Optional[Union[str,List[str]]] = []
units: Optional[Dict[str,str]] = {}

pattern: Optional[str] = None
variable_value_group: Optional[Union[str,int]] = None
variable_name_group: Optional[Union[str,int]] = None

@field_validator("name",mode="after")
@classmethod
def defaultName(cls,v):
if v is None:
return ""
return v

@field_validator("units",mode="before")
@classmethod
def parseUnits(cls,v):
Expand All @@ -28,7 +39,12 @@ def checkFormatOptions(self):
raise ValueError("variables_path must be specified if format == json")
if type(self.variables_path) == str:
self.variables_path = [self.variables_path]
elif self.format != "json":
elif self.format == "regex":
if not self.pattern:
raise ValueError("regex must be specified if format == regex")
if not self.variable_value_group:
raise ValueError("variable_value_group must be specified if format == regex")
else:
if self.variables_path:
raise ValueError("variables_path cannot be specified with other format than json")
return self
Expand All @@ -40,7 +56,17 @@ class CustomVariable(BaseModel):
unit: str

class Scalability(BaseModel):
directory: str
directory: Optional[str] = None
stages: List[Stage]
custom_variables:Optional[List[CustomVariable]] = []
clean_directory: Optional[bool] = False

@model_validator(mode = "after")
def checkOptionalDirectory(self):
if self.directory is None:
#Directory should be specified if any stage has filename other than stdout
for stage in self.stages:
if stage.filepath != "stdout":
raise ValueError("Directory should be specified for non-stdout output files")

return self
2 changes: 1 addition & 1 deletion src/feelpp/benchmarking/reframe/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def setResources(self):

@run_before('run')
def cleanupDirectories(self):
if self.app_reader.config.scalability:
if self.app_reader.config.scalability and self.app_reader.config.scalability.directory:
FileHandler.cleanupDirectory(self.app_reader.config.scalability.directory)

@run_before('run')
Expand Down
29 changes: 28 additions & 1 deletion tests/scalability/test_scalabilityHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
import tempfile, json
from feelpp.benchmarking.reframe.scalability import ScalabilityHandler, CsvExtractor,TsvExtractor,JsonExtractor,Extractor,ExtractorFactory
from feelpp.benchmarking.reframe.scalability import ScalabilityHandler, CsvExtractor,TsvExtractor,JsonExtractor,RegexExtractor,Extractor,ExtractorFactory
import numpy as np

class StageMocker:
Expand Down Expand Up @@ -83,6 +83,33 @@ def test_extractTsv(self):
file.close()



def test_extractRegex(self):
""" Test extracting performance variables using regex from a file """

file = tempfile.NamedTemporaryFile(mode="w+")
content = "assembly: 0.012\nsolve: 1.42\npostprocess: 0.08"
file.write(content)
file.flush()

pattern = r"^(?P<name>[^:]+):\s*(?P<value>[\d.]+)$"
extractor = RegexExtractor(
filepath=file.name,
stage_name="timers",
pattern=pattern,
variable_name_group="name",
variable_value_group="value",
units={"*":"s"}
)
perfvars = extractor.extract()
assert perfvars["timers_assembly"].evaluate() == 0.012
assert perfvars["timers_solve"].evaluate() == 1.42
assert perfvars["timers_postprocess"].evaluate() == 0.08

file.close()



def test_extractJson(self):
""" Test performance variable extraction for JSON files"""
file = tempfile.NamedTemporaryFile()
Expand Down
24 changes: 24 additions & 0 deletions tests/scalability/test_scalabilityValidation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,29 @@ def test_format(self):
stage = Stage(**{"name":"test_stage","filepath":"test_filepath","format":"csv"})
assert stage.variables_path == []

def test_regex_validation(self):
""" Tests mandatory regex fields and named/numbered groups """
# Missing pattern
with pytest.raises(ValidationError, match="regex must be specified if format == regex"):
Stage(**{"name": "r_stage", "filepath": "file.txt", "format": "regex", "variable_value_group": "value"})

# Missing variable_value_group
with pytest.raises(ValidationError, match="variable_value_group must be specified if format == regex"):
Stage(**{"name": "r_stage", "filepath": "file.txt", "format": "regex", "pattern": ".*"})

# Valid named capture groups
stage = Stage(
name="r_stage",
filepath="file.txt",
format="regex",
pattern="^(?P<name>[^:]+):\\s*(?P<value>[\\d.]+)$",
variable_name_group="name",
variable_value_group="value"
)
assert stage.format == "regex"
assert stage.variable_name_group == "name"
assert stage.variable_value_group == "value"


class TestAppOutput:
pass