Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTRIBUTING.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ To resolve this, manually link the upstream and pull all tags::
git remote add upstream https://github.com/con/duct
git fetch upstream



Testing
-------
Expand Down
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ usage: con-duct run [-h] [-l {NONE,CRITICAL,ERROR,WARNING,INFO,DEBUG}] [-q]
[-o {all,none,stdout,stderr}]
[-t {all,system-summary,processes-samples}] [-m MESSAGE]
[--mode {new-session,current-session}]
[--instruments INSTRUMENTS]
[--gpu-sample-interval GPU_SAMPLE_INTERVAL]
[--gpu-timeout GPU_TIMEOUT]
command [command_args ...] ...

duct is a lightweight wrapper that collects execution data for an arbitrary
Expand Down Expand Up @@ -105,6 +108,9 @@ environment variables:
DUCT_REPORT_INTERVAL: see --report-interval
DUCT_CAPTURE_OUTPUTS: see --capture-outputs
DUCT_MESSAGE: see --message
DUCT_INSTRUMENTS: see --instruments (e.g., "cpu,mem,gpu" or "all")
DUCT_GPU_SAMPLE_INTERVAL: see --gpu-sample-interval
DUCT_GPU_TIMEOUT: see --gpu-timeout
DUCT_CONFIG_PATHS: paths to .env files separated by platform path separator
(':' on Unix) (see below)

Expand Down Expand Up @@ -235,6 +241,21 @@ options:
current session instead of starting a new one. Useful
for tracking slurm jobs or other commands that should
run in the current session. (default: new-session)
--instruments INSTRUMENTS
Comma-separated list of instruments to enable: cpu,
mem, gpu, or 'all'. You can also provide value via
DUCT_INSTRUMENTS env variable. (default: cpu,mem)
(default: {<Instruments.MEM: 'mem'>, <Instruments.CPU:
'cpu'>})
--gpu-sample-interval GPU_SAMPLE_INTERVAL
Interval in seconds between GPU status checks. If not
specified or 0, uses --sample-interval. Useful when
nvidia-smi calls are slow. You can also provide value
via DUCT_GPU_SAMPLE_INTERVAL env variable. (default:
0.0)
--gpu-timeout GPU_TIMEOUT
Timeout in seconds for nvidia-smi calls. (default:
5.0) (default: 5.0)

```
<!-- END HELP -->
Expand Down
29 changes: 29 additions & 0 deletions src/con_duct/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
from con_duct.duct_main import (
DUCT_OUTPUT_PREFIX,
EXECUTION_SUMMARY_FORMAT,
GPU_SAMPLE_TIMEOUT,
Outputs,
RecordTypes,
SessionMode,
)
from con_duct.duct_main import execute as duct_execute
from con_duct.duct_main import instruments_from_str
from con_duct.ls import LS_FIELD_CHOICES, ls
from con_duct.plot import matplotlib_plot
from con_duct.pprint_json import pprint_json
Expand Down Expand Up @@ -146,6 +148,9 @@ def _replay_early_logs(log_buffer: List[tuple[str, str]]) -> None:
DUCT_REPORT_INTERVAL: see --report-interval
DUCT_CAPTURE_OUTPUTS: see --capture-outputs
DUCT_MESSAGE: see --message
DUCT_INSTRUMENTS: see --instruments (e.g., "cpu,mem,gpu" or "all")
DUCT_GPU_SAMPLE_INTERVAL: see --gpu-sample-interval
DUCT_GPU_TIMEOUT: see --gpu-timeout
DUCT_CONFIG_PATHS: paths to .env files separated by platform path separator
(':' on Unix) (see below)

Expand Down Expand Up @@ -366,6 +371,30 @@ def _create_run_parser() -> argparse.ArgumentParser:
"'current-session' tracks the current session instead of starting a new one. "
"Useful for tracking slurm jobs or other commands that should run in the current session.",
)
parser.add_argument(
"--instruments",
type=instruments_from_str,
default=instruments_from_str(os.getenv("DUCT_INSTRUMENTS", "cpu,mem")),
help="Comma-separated list of instruments to enable: cpu, mem, gpu, or 'all'. "
"You can also provide value via DUCT_INSTRUMENTS env variable. "
"(default: cpu,mem)",
)
parser.add_argument(
"--gpu-sample-interval",
type=float,
default=float(os.getenv("DUCT_GPU_SAMPLE_INTERVAL", "0")),
help="Interval in seconds between GPU status checks. "
"If not specified or 0, uses --sample-interval. "
"Useful when nvidia-smi calls are slow. "
"You can also provide value via DUCT_GPU_SAMPLE_INTERVAL env variable.",
)
parser.add_argument(
"--gpu-timeout",
type=float,
default=float(os.getenv("DUCT_GPU_TIMEOUT", str(GPU_SAMPLE_TIMEOUT))),
help="Timeout in seconds for nvidia-smi calls. "
f"(default: {GPU_SAMPLE_TIMEOUT})",
)
return parser


Expand Down
Loading
Loading