diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml new file mode 100644 index 0000000..754ea65 --- /dev/null +++ b/.github/workflows/build-and-publish.yml @@ -0,0 +1,83 @@ +name: Build and Publish Package + +on: + push: + branches: + - main + - master + release: + types: [created] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + - name: Install dependencies + run: | + # Create a virtual environment for uv + uv venv + # Install pip inside the virtual environment + uv pip install pip + # Install build tools + uv pip install build twine setuptools wheel + - name: Build package + run: | + # Activate the virtual environment + source .venv/bin/activate + # Build the package using pyproject.toml + python -m build + - name: Check package + run: | + # Activate the virtual environment + source .venv/bin/activate + twine check dist/* + - name: Store built package + uses: actions/upload-artifact@v3 + with: + name: dist + path: dist/ + retention-days: 7 + + publish: + needs: build + # Only run on release + if: github.event_name == 'release' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + - name: Install dependencies + run: | + # Create a virtual environment for uv + uv venv + # Install pip inside the virtual environment + uv pip install pip + # Install twine + uv pip install twine + - name: Download built package + uses: actions/download-artifact@v3 + with: + name: dist + path: dist/ + - name: Publish to PyPI + run: | + # Activate the virtual environment + source .venv/bin/activate + # Use twine to upload to PyPI + twine upload dist/* --username __token__ --password ${{ secrets.PYPI_API_TOKEN }} --skip-existing diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9c562d6..2c48ba8 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -5,28 +5,53 @@ on: branches: - '**' # Match all branches including those with slashes pull_request: - branches: + branches: - main - master # Include master branch as well jobs: - # lint: - # # Ruff checks are commented out for now. Uncomment to restore linting/formatting checks. - # runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v3 - # - name: Set up Python - # uses: actions/setup-python@v4 - # with: - # python-version: '3.10' - # - name: Install dependencies - # run: | - # python -m pip install --upgrade pip - # pip install ruff - # - name: Run Ruff - # run: | - # ruff check . - # ruff format --check . + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for proper file comparison + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + - name: Install dependencies + run: | + # Create a virtual environment for uv + uv venv + # Install pre-commit + uv pip install pre-commit + - name: Get changed files + id: changed-files + run: | + # Get the list of changed files + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For pull requests, compare with the base branch + CHANGED_FILES=$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | tr '\n' ' ') + else + # For pushes, compare with the previous commit + CHANGED_FILES=$(git diff --name-only --diff-filter=ACMRT HEAD^ HEAD | tr '\n' ' ') + fi + echo "CHANGED_FILES=$CHANGED_FILES" >> $GITHUB_ENV + - name: Run pre-commit on changed files + run: | + # Activate the virtual environment + source .venv/bin/activate + if [ -n "$CHANGED_FILES" ]; then + echo "Checking files: $CHANGED_FILES" + pre-commit run --color always --files $CHANGED_FILES --show-diff-on-failure + else + echo "No files changed. Skipping pre-commit checks." + fi test: runs-on: ubuntu-latest @@ -43,7 +68,7 @@ jobs: uses: actions/cache@v4 with: path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }} restore-keys: | ${{ runner.os }}-pip- - name: Install uv @@ -54,21 +79,14 @@ jobs: run: | # Create a virtual environment for uv uv venv - # Install PyTorch first to avoid conflicts - uv pip install torch --index-url https://download.pytorch.org/whl/cpu - # Install all dependencies from requirements.txt - uv pip install -r requirements.txt - # Install package in development mode - uv pip install -e . - # Explicitly install pytest and pytest-cov in the virtual environment - uv pip install pytest pytest-cov - - name: Run tests + # Install the package with test dependencies + uv pip install -e ".[test]" + - name: Test with pytest run: | - # Use the Python from the virtual environment + # Activate the virtual environment source .venv/bin/activate - # Only run unit tests for now, integration tests will be handled in a separate MR - #python -m pytest tests --cov=dsipts --cov-report=xml - python -m pytest tests/unit/ --cov=dsipts --cov-report=xml + # Run unit tests with coverage + pytest tests/unit/ --cov=dsipts --cov-report=xml docs: needs: test runs-on: ubuntu-latest @@ -78,41 +96,26 @@ jobs: uses: actions/setup-python@v4 with: python-version: '3.12' + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install dependencies run: | - python -m pip install --upgrade pip + # Create a virtual environment for uv + uv venv + # Install pip inside the virtual environment + uv pip install pip # Install package with docs extras - pip install -e ".[docs]" + uv pip install -e ".[docs]" # Explicitly install Sphinx and required extensions - pip install sphinx>=7.0.0 - pip install sphinx_pdj_theme>=0.4.0 + uv pip install sphinx>=7.0.0 + uv pip install sphinx_pdj_theme>=0.4.0 # Install sphinx_mdinclude directly from GitHub to ensure compatibility - pip install git+https://github.com/omnilib/sphinx-mdinclude.git + uv pip install git+https://github.com/omnilib/sphinx-mdinclude.git - name: Build documentation run: | + # Activate the virtual environment + source .venv/bin/activate cd docs make html - build: - needs: [test, docs] - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.12' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build twine setuptools wheel - - name: Build package - run: | - # Create a requirements.txt file if it doesn't exist - if [ ! -f requirements.txt ]; then - echo "Creating requirements.txt from setup.py core_requirements" - python -c "import re; from setup import core_requirements; print('\n'.join(core_requirements))" > requirements.txt - fi - # Build the package - python -m build - - name: Check package - run: twine check dist/* \ No newline at end of file diff --git a/.gitignore b/.gitignore index bc6a83a..3f77b03 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,175 @@ -*.ipynb_checkpoints -*.pyc -*.ckpt +# Byte-compiled / optimized / DLL files __pycache__/ -*.rst -*.pkl -*config_used* -logs -multirun -tmp -*.db -dist* -/.venv -/dsipts.egg-info \ No newline at end of file +*.py[cod] +*$py.class + +# C extensions +*.so +*.c + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg + +#TSC results +results/ + +MANIFEST + +#downloaded datasets +sktime/datasets/local_data/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-wheel-metadata/ + +# Training logs +lightning_logs/ + +# folder created by `make test` +testdir/ + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/source/api_reference/auto_generated/ +docs/estimator_overview_table.md + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +sktime-dev/ + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +.pyre_configuration + +# Pytype type checker +.pytype/ + +# IDE files +*.prefs +.pydevproject +.idea +.vscode +.spyderproject +.spyproject +.ropeproject + +# scikit-learn specific +doc/_build/ +doc/auto_examples/ +doc/modules/generated/ +doc/datasets/generated/ + +# vim swap files +*.swp + +# autogen stuff +/documentation/source/autogen + +# macOS files +.DS_Store + +# dask-worker-space +dask-worker-space + +# documentation build files +build_doc_site/ + +# dev files +envs/ +*.html +sktime/_contrib/debug.py +sktime/_contrib/local_code.py +sktime/_contrib/main.py +sktime/_contrib/temp/ + +# example mlflow runs +examples/local/ + +# py-spy profiler output +profile.svg +test_output/* +mlruns/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c29a201 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,34 @@ +# Pre-commit configuration +# By default, pre-commit only runs on staged files (not the entire codebase) + +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version + rev: v0.3.0 + hooks: + - id: ruff + name: ruff (check) + # Only run on staged files (default behavior) + args: [--fix, --exit-non-zero-on-fix] + # Skip very large files to avoid performance issues + exclude: '^(docs/|data/|notebooks/)' + - id: ruff-format + name: ruff (format) + # Skip very large files to avoid performance issues + exclude: '^(docs/|data/|notebooks/)' + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + # Skip binary files and very large files + exclude: '(^binary/|\.(png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot|pdf|zip|tar|gz|db|pkl))$' + - id: end-of-file-fixer + # Skip binary files and very large files + exclude: '(^binary/|\.(png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot|pdf|zip|tar|gz|db|pkl))$' + - id: check-yaml + - id: check-toml + - id: check-added-large-files + args: ['--maxkb=500'] + - id: debug-statements + - id: check-merge-conflict diff --git a/README.md b/README.md index 4c43450..8437056 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ This frist block maybe is common between several architectures: - **future_steps** = int. THIS IS CRUCIAL and self explanatory - **past_channels** = len(ts.num_var). THIS IS CRUCIAL and self explanatory - **future_channels** = len(ts.future_variables). THIS IS CRUCIAL and self explanatory -- **embs** = [ts.dataset[c].nunique() for c in ts.cat_var]. THIS IS CRUCIAL and self explanatory. +- **embs** = [ts.dataset[c].nunique() for c in ts.cat_var]. THIS IS CRUCIAL and self explanatory. - **out_channels** = len(ts.target_variables). THIS IS CRUCIAL and self explanatory - **cat_emb_dim** = int. Dimension of embedded categorical variables, the choice here is to use a constant value and let the user chose if concatenate or sum the variables - **sum_emb** = boolean. If true the contribution of each categorical variable is summed @@ -92,16 +92,41 @@ or attention based models: - **n_layer_decoder** = int. decoder layers --- -## How to -Clone the repo (gitlab or github) -In a pre-generated environment install pytorch and pytorch-lightning (`pip install pytorch-lightning==1.9.4`) then go inside the lib folder and execute: +## Installation +To get started, first clone the repository: +```bash +git clone https://github.com/DSIP-FBK/DSIPTS_PTF.git +cd DSIPTS_PTF +``` + +Note: This project uses `pyproject.toml` for dependency management. For a smooth installation, it's recommended to first install `torch` and `pytorch-lightning` matching your system's configuration (e.g., with specific CUDA versions if applicable). + +```bash +pip install torch pytorch-lightning==1.9.4 +``` + +### Dependency Groups + +The project dependencies are organized into logical groups. You can install what you need. + +- **Core**: Minimal requirements to use the package. + ```bash + pip install . + ``` + +- **Development**: All dependencies for development, including tests, docs, and other tools. (Recommended for contributors). + ```bash + pip install -e .[dev] + ``` -`` -python setup.py install --force -`` -In the gitlab repository it is possible to find the documentation (pages) and the package in the package registry. As soon as possible the CI/CD pipeline will update for working also in the github mirrored repository. +- **Specific Groups**: You can also install specific optional groups like `docs`, `test`, `optim`, or `web`: + ```bash + pip install -e .[test,docs] + ``` + +See the `pyproject.toml` file for a complete list of dependencies in each group. ## AIM DSIPTS uses AIM for tracking losses, parameters and other useful information. The first time you use DSIPTS you may need to initialize aim executing: @@ -110,7 +135,7 @@ aim init ``` -## Test +## Test You can test your model using a tool timeseries ``` @@ -158,7 +183,7 @@ from dsipts import Monash, get_freq, TimeSeries, RNN import pandas as pd m = Monash(filename='monash',baseUrl='https://forecastingdata.org/', rebuild=True) ``` -This code will scrap the website and save the URLs connected to the dataset. After downloading it will save a file using the `filename` and, the next time you use it you can set `rebuild=False` avoinding the scraping procedure. +This code will scrap the website and save the URLs connected to the dataset. After downloading it will save a file using the `filename` and, the next time you use it you can set `rebuild=False` avoinding the scraping procedure. After that `m.table` contains the table. Each dataset has an ID, you can downloadthe data: ``` @@ -174,8 +199,8 @@ and create a timeseries object using the auxiliary function `get_freq`: ``` serie = pd.DataFrame({'signal':loaded_data.series_value.iloc[0]}) serie['time'] = pd.date_range(start = loaded_data.start_timestamp.iloc[0], periods= serie.shape[0],freq=get_freq(frequency)) -serie['cum'] = serie.time.dt.minute + serie.time.dt.hour -starting_point = {'cum':0} ##this can be used for creating the dataset: only samples with cum=0 in the first future lag will be used as samples! +serie['cum'] = serie.time.dt.minute + serie.time.dt.hour +starting_point = {'cum':0} ##this can be used for creating the dataset: only samples with cum=0 in the first future lag will be used as samples! ts = TimeSeries('4656144') ts.load_signal(serie.iloc[0:8000],enrich_cat=['dow','hour'],target_variables=['signal']) ts.plot(); @@ -212,7 +237,7 @@ config = dict(model_configs =dict( remove_last= True, use_bn = False, optim= 'torch.optim.Adam', - activation= 'torch.nn.GELU', + activation= 'torch.nn.GELU', verbose = True, out_channels = len(ts.target_variables)), scheduler_config = dict(gamma=0.1,step_size=100), @@ -222,14 +247,14 @@ ts.set_model(model_sum,config=config ) ``` Once the model is selected, it will display some information like follows: ``` -Can handle multivariate output +Can handle multivariate output Can handle future covariates Can handle categorical covariates Can handle Quantile loss function ``` This can help you knowing which models can be used for multioutput prediction and also if the quantile loss can be used and provide the confidence interval of the predictions. -Notice that there are some free parameters: `cat_emb_dim` for example represent the dimension of the embedded categorical variable, `sum_embs` will sum all the categorical contribution otherwise it will concatenate them. It is possible to use a quantile loss, specify some parameters of the scheduler (StepLR) and optimizer parameters (Adam). +Notice that there are some free parameters: `cat_emb_dim` for example represent the dimension of the embedded categorical variable, `sum_embs` will sum all the categorical contribution otherwise it will concatenate them. It is possible to use a quantile loss, specify some parameters of the scheduler (StepLR) and optimizer parameters (Adam). Now we are ready to split and train our model using: @@ -259,11 +284,11 @@ res.head() ##it contains something like 3 1 2006-02-15 03:50:01 -2.009074e-07 -8.994338 -0.003175 0.987681 2006-02-15 03:40:01 4 1 2006-02-15 04:00:01 -2.009074e-07 -8.994338 -0.003175 1.006510 2006-02-15 03:50:01 ``` -Where signal is the target variable (same name). If a quantile loss has been selected the model generares three signals `_low, _median, _high`, if not the output the model is indicated with `_pred`. Lag indicates wich step the prediction is referred (eg. lag=1 is the frist output of the model along the sequence output). +Where signal is the target variable (same name). If a quantile loss has been selected the model generares three signals `_low, _median, _high`, if not the output the model is indicated with `_pred`. Lag indicates wich step the prediction is referred (eg. lag=1 is the frist output of the model along the sequence output). ``` import matplotlib.pyplot as plt -mask = res.prediction_time=='2006-02-14 12:30:01' +mask = res.prediction_time=='2006-02-14 12:30:01' plt.plot(res.lag[mask],res.signal[mask],label='real') plt.plot(res.lag[mask],res.signal_median[mask],label='median') plt.legend() @@ -284,10 +309,10 @@ This example can be found in the [first notebook](/notebooks/1-monash_timeseries Most of the models implemented can deal with categorical variables. In particulare there are some variables that you don't need to computed. When declaring a `ts` obejct you can pass also the parameter `enrich_cat=['dow']` that will add to the dataframe (and to the dataloader) the day of the week. Since now you can automatically add `hour, dow, month and minute`. If there are other categorical variables pleas add it to the list while loading your data. # Models -A description of each model can be found in the class documentation [here](https://dsip.pages.fbk.eu/dsip_dlresearch/timeseries/). +A description of each model can be found in the class documentation [here](https://dsip.pages.fbk.eu/dsip_dlresearch/timeseries/). It is possible to use one of the following architectures: -- **RNN** (GRU, LSTM or xLSTM) models, (xLSTM)[https://arxiv.org/pdf/2405.04517] are taken from the [official repo](https://github.com/muditbhargava66/PyxLSTM) +- **RNN** (GRU, LSTM or xLSTM) models, (xLSTM)[https://arxiv.org/pdf/2405.04517] are taken from the [official repo](https://github.com/muditbhargava66/PyxLSTM) - **Linear** models based on the [official repository](https://github.com/cure-lab/LTSF-Linear), [paper](https://arxiv.org/pdf/2205.13504.pdf). An alternative model (alinear) has been implemented that drop the autoregressive part and uses only covariates - **Crossformer** [official repository](https://github.com/cheerss/CrossFormer), [paper](https://openreview.net/forum?id=vSVLM2j9eie) - **Informer** [official repository](https://github.com/zhouhaoyi/Informer2020), [paper](https://arxiv.org/abs/2012.07436) @@ -306,20 +331,20 @@ It is possible to use one of the following architectures: - **Samformer** [paper](https://arxiv.org/pdf/2402.10198) [official repo](https://github.com/romilbert/samformer/tree/main?tab=MIT-1-ov-) ## Metrics -In some cases the persistence model is hard to beat and even the more complex model can fall in the persistence trap that propagates the last seen values. +In some cases the persistence model is hard to beat and even the more complex model can fall in the persistence trap that propagates the last seen values. For this reason a set of metrics can be used trying to avoid the model to get stuck in the trap. In particular we implemented: MSE, L1, sinkhorn divergence, dilated loss, quantile loss, MDA and a couple of experimental losses for minimizing the variance or penalizing the persistency. See the base model definition in `dsipts/models/base.py` for more details. -# Usage +# Usage In the folder `bash_examples` you can find an example in wich the library is used for training a model from command line using OmegaConf and Hydra with more updated models and examples. Please read the documentation [here](/bash_examples/README.md) # Modifiers -The VVA model is composed by two steps: the first is a clusterting procedure that divides the input time series in smaller segments an performs a clustering procedure in order to associate a label for each segment. A this point the GPT models works on the sequence of labels trying to predict the next cluster id. Using the centroids of the clusters (and the variace) the final ouput is reconstructed. This pipeline is quite unusual and does not fit with the automation pipeline, but it is possible to use a `Modifier` an abstract class that has 3 methods: +The VVA model is composed by two steps: the first is a clusterting procedure that divides the input time series in smaller segments an performs a clustering procedure in order to associate a label for each segment. A this point the GPT models works on the sequence of labels trying to predict the next cluster id. Using the centroids of the clusters (and the variace) the final ouput is reconstructed. This pipeline is quite unusual and does not fit with the automation pipeline, but it is possible to use a `Modifier` an abstract class that has 3 methods: - **fit_transform**: called before startin the training process and returns the train/validation pytorch datasets. In the aforementioned model the clustering model is trained. - **transform**: used during the inference phase. It is similar to fit_transform but without the training process - **inverse_transform**: the output of the model are reverted to the original shape. In the VVA model the centroids are used for reconstruct the predicted timeseries. @@ -330,7 +355,7 @@ You can find the documentation [here](https://dsip.pages.fbk.eu/dsip_dlresearch/ or in the folder `docs/_build/html/index.html` If yon need to generate the documentation after some modification just run: ``` -./make_doc.sh +./make_doc.sh ``` For user only: be sure that the the CI file has pages enabled, see [public pages](https://roneo.org/en/gitlab-public-pages-private-repo/) @@ -339,7 +364,7 @@ For user only: be sure that the the CI file has pages enabled, see [public pages If you want to add a model: - extend the `Base` class in `dsipts/models` -- add the export line in the `dsipts/__init__.py` +- add the export line in the `dsipts/__init__.py` - add a full configuration file in `bash_examples/config_test/architecture` - optional: add in `bash_script/utils.py` the section to initializate and load the new model - add the modifier in `dsipts/data_structure/modifiers.py` if it is required @@ -347,6 +372,56 @@ If you want to add a model: # Testing See [here](/bash_examples/README.md) for the testing session. +# Development Tools + +## Pre-commit Hooks + +This project uses pre-commit hooks to ensure code quality and consistency. The hooks automatically run before each commit to catch issues early. + +### Setup + +To set up pre-commit hooks: + +```bash +# Install pre-commit +pip install pre-commit + +# Install the git hooks +pre-commit install +``` + +### Available Hooks + +The following checks are performed automatically before each commit **only on staged files** (not the entire codebase): + +- **Ruff**: Lints and formats Python code (excludes docs, data, and notebooks directories) +- **Trailing whitespace**: Removes trailing whitespace (excludes binary and large files) +- **End of file fixer**: Ensures files end with a newline (excludes binary and large files) +- **YAML/TOML checker**: Validates syntax of configuration files +- **Large file checker**: Prevents committing large files +- **Debug statement checker**: Catches forgotten debug statements +- **Merge conflict checker**: Detects unresolved merge conflicts + +This focused approach ensures that only files you're actively changing are checked, making the process efficient even with a large codebase. + +### Manual Usage + +You can manually run all pre-commit hooks on all files: + +```bash +pre-commit run --all-files +``` + +Or run on specific files: + +```bash +pre-commit run --files path/to/file1.py path/to/file2.py +``` + +## Continuous Integration + +The GitHub Actions workflow runs Ruff only on changed Python files to speed up CI checks. See `.github/workflows/python-package.yml` for details. + # Logging From version 1.1.0, Aim is used for logging all the experiments and metrics. It is quite easy to install and to use. Just go inside the main folder (`bash_exaples`) and run: ``` @@ -360,29 +435,29 @@ and then open the url (http://127.0.0.1:43800)[http://127.0.0.1:43800]. It will ## TODO -[ ] reduce test time +[ ] reduce test time -[ ] add pre-commit hook for code checking (`ruff check --ignore E501,E722 .`) +[x] add pre-commit hook for code checking with Ruff -[ ] add pre-commit hook testing +[x] add pre-commit hook testing [ ] clean code and standardize documentation [ ] add more sintetic data -[x] add TIDE and iTransformer +[x] add TIDE and iTransformer [ ] clean some old function -[ ] check all the code in the README +[ ] check all the code in the README -[ ] check architecture description (which model can be used under certain assumption) +[ ] check architecture description (which model can be used under certain assumption) [ ] complete the classification part (loss function + inference step) -[x] add mirror to git if possible +[x] add mirror to git if possible -[x] fix dependencies +[x] fix dependencies [ ] check D3VAE, it seems broken in some configurations diff --git a/pyproject.toml b/pyproject.toml index 4332796..9912e41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,25 +1,108 @@ [build-system] -requires = ["setuptools>=45", "wheel"] +requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] name = "dsipts" -dynamic = ["version"] +version = "1.1.3" +authors = [ + {name = "Andrea Gobbi", email = "agobbi@fbk.eu"}, +] +description = "Python library for time series forecasting" +readme = "README.md" +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development :: Libraries :: Python Modules", +] +keywords = ["time series", "forecasting", "deep learning", "pytorch", "machine learning"] + +# Core dependencies - only what's needed for the package to function +dependencies = [ + "numpy>=1.24.0", + "torch>=2.0.0,<2.7.0", + "scipy>=1.10.0", + "pandas>=2.0.0", + "scikit-learn>=1.2.0", + "omegaconf>=2.3.0", + "einops>=0.6.0", + "matplotlib>=3.7.0", + "requests>=2.28.0", + "pydantic>=1.10.0,<3.0.0", + "plotly>=5.14.0", + "beautifulsoup4==4.13.4", + "html5lib>=1.1", + "pytorch-lightning==1.9.4", + "torchmetrics>=0.11.0", + "lightning_utilities>=0.8.0", + "aim>=3.29.1", + "numba>=0.57.0", +] [project.optional-dependencies] +# Additional deep learning and optimization dependencies +deep = [ + # Currently empty as core dependencies cover basic needs +] + +# Hyperparameter optimization and experiment tracking +optim = [ + "hydra-core>=1.3.2", + "hydra-joblib-launcher>=1.2.0", + "hydra-optuna-sweeper>=1.2.0", +] + +# Web and API dependencies +web = [ + "starlette>=0.30.0,<0.47.0", + "html-table-parser-python3==0.3.1", +] + +# Testing dependencies test = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", ] + +# Documentation dependencies docs = [ "sphinx>=7.0.0", - "sphinx-rtd-theme>=1.0.0", + "sphinx_pdj_theme>=0.4.0", + "sphinx_mdinclude>=0.5.0", + "sphinx_rtd_theme>=1.0.0", ] +# Development dependencies (combines all optional dependencies) +dev = [ + "dsipts[deep,optim,web,test,docs]", +] + +# Complete installation with all features +all = [ + "dsipts[deep,optim,web]", +] + +[project.urls] +Homepage = "https://github.com/DSIP-FBK/DSIPTS_PTF" +Repository = "https://github.com/DSIP-FBK/DSIPTS_PTF" +Documentation = "https://github.com/DSIP-FBK/DSIPTS_PTF" +"Bug Tracker" = "https://github.com/DSIP-FBK/DSIPTS_PTF/issues" + +[tool.setuptools.packages.find] +where = ["."] +exclude = ["tests*"] + [tool.ruff] select = ["E", "F", "I"] ignore = [] line-length = 100 +target-version = "py38" [tool.ruff.format] quote-style = "double" @@ -34,4 +117,38 @@ testpaths = ["tests/unit"] markers = [ "unit: marks tests as unit tests", "integration: marks tests as integration tests", -] \ No newline at end of file +] +addopts = "-v --tb=short" +minversion = "7.0" + +[tool.coverage.run] +source = ["dsipts"] +omit = [ + "*/tests/*", + "*/test_*", + "setup.py", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == .__main__.:", +] + +[tool.mypy] +python_version = "3.8" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = false +disallow_incomplete_defs = false +check_untyped_defs = true +disallow_untyped_decorators = false +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_unreachable = true +strict_equality = true