Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,7 @@ Apart from that there are other options whose names speak for themselves: ``'sta
`stl_arima`,STL Decomposition with ARIMA,Forecasting
`ts_naive_average`,Naive Average,Forecasting
`tabpfn`,TabPFN classifier,Classification
`tabpfnreg`,TabPFN regressor,Regression,
`autotabpfn`,AutoTabPFN classifier,Classification
`autotabpfnreg`,AutoTabPFN regressor,Regression,
`tabpfnreg`,TabPFN regressor,Regression


.. csv-table:: Available models implementations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import numpy as np
from tabpfn import TabPFNClassifier, TabPFNRegressor
from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNClassifier, AutoTabPFNRegressor
from typing import Optional
from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ModelImplementation
Expand All @@ -26,10 +25,14 @@ def __init__(self, params: Optional[OperationParameters] = None):
}

model_path = self.params.get('model_path', None)

if model_path == "auto":
self.model_params['model_path'] = os.path.join(default_fedot_data_dir(), 'tabpfn')
model_path = os.path.join(default_fedot_data_dir(), 'tabpfn')
if not os.path.exists(model_path):
os.makedirs(model_path, exist_ok=True)
os.environ["TABPFN_MODEL_CACHE_DIR"] = model_path
elif model_path is not None:
self.model_params['model_path'] = model_path
os.environ["TABPFN_MODEL_CACHE_DIR"] = model_path

self.model = None
self.classes_ = None
Expand Down Expand Up @@ -82,19 +85,3 @@ class FedotTabPFNRegressionImplementation(FedotTabPFNImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.model = TabPFNRegressor(**self.model_params)


class FedotAutoTabPFNClassificationImplementation(FedotTabPFNImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.model = AutoTabPFNClassifier(**self.model_params)

def fit(self, input_data: InputData):
self.classes_ = np.unique(np.array(input_data.target))
return super().fit(input_data=input_data)


class FedotAutoTabPFNRegressionImplementation(FedotTabPFNImplementation):
def __init__(self, params: Optional[OperationParameters] = None):
super().__init__(params)
self.model = AutoTabPFNRegressor(**self.model_params)
11 changes: 4 additions & 7 deletions fedot/core/operations/evaluation/tabpfn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from fedot.core.data.data import InputData, OutputData
from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy
from fedot.core.operations.evaluation.operation_implementations.models.tabpfn import \
FedotTabPFNClassificationImplementation, FedotTabPFNRegressionImplementation, \
FedotAutoTabPFNClassificationImplementation, FedotAutoTabPFNRegressionImplementation
FedotTabPFNClassificationImplementation, FedotTabPFNRegressionImplementation
from fedot.core.operations.operation_parameters import OperationParameters
from fedot.core.repository.tasks import TaskTypesEnum
from fedot.utilities.random import ImplementationRandomStateHandler
Expand All @@ -14,16 +13,14 @@ class TabPFNStrategy(EvaluationStrategy):
_operations_by_types = {
'tabpfn': FedotTabPFNClassificationImplementation,
'tabpfnreg': FedotTabPFNRegressionImplementation,
'autotabpfn': FedotAutoTabPFNClassificationImplementation,
'autotabpfnreg': FedotAutoTabPFNRegressionImplementation,
}

def __init__(self, operation_type: str, params: Optional[OperationParameters] = None):
self.operation_impl = self._convert_to_operation(operation_type)
super().__init__(operation_type, params)
self.device = params.get('device', 'auto')
self.max_samples = params.get('max_samples', 1000)
self.max_features = params.get('max_features', 500)
self.device = params.get('device', 'auto') if params else 'auto'
self.max_samples = params.get('max_samples', 1000) if params else 1000
self.max_features = params.get('max_features', 500) if params else 500

def fit(self, train_data: InputData):
check_data_size(
Expand Down
40 changes: 0 additions & 40 deletions fedot/core/repository/data/default_operation_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -256,26 +256,6 @@
"max_samples": 1000,
"max_features": 500
},
"autotabpfn": {
"max_time": 30,
"preset": "default",
"ges_scoring_string": "roc",
"device": "cpu",
"ignore_pretraining_limits": false,
"enable_categorical": true,
"max_samples": 1000,
"max_features": 500
},
"autotabpfnreg": {
"max_time": 30,
"preset": "default",
"ges_scoring_string": "mse",
"device": "cpu",
"ignore_pretraining_limits": false,
"enable_categorical": true,
"max_samples": 1000,
"max_features": 500
},
"tabpfn_gpu": {
"n_jobs": 1,
"n_estimators": 4,
Expand Down Expand Up @@ -308,25 +288,5 @@
"enable_categorical": true,
"max_samples": 10000,
"max_features": 500
},
"autotabpfn_gpu": {
"max_time": 30,
"preset": "default",
"ges_scoring_string": "roc",
"device": "cuda",
"ignore_pretraining_limits": false,
"enable_categorical": true,
"max_samples": 10000,
"max_features": 500
},
"autotabpfnreg_gpu": {
"max_time": 30,
"preset": "default",
"ges_scoring_string": "mse",
"device": "cuda",
"ignore_pretraining_limits": false,
"enable_categorical": true,
"max_samples": 10000,
"max_features": 500
}
}
12 changes: 0 additions & 12 deletions fedot/core/repository/data/gpu_models_repository.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@
"TabPFNClassificationStrategy"
],
"tags": [
"ml",
"neural",
"non_linear",
"tabpfn",
Expand All @@ -96,7 +95,6 @@
"TabPFNRegressionStrategy"
],
"tags": [
"ml",
"neural",
"non_linear",
"tabpfn",
Expand Down Expand Up @@ -188,16 +186,6 @@
"meta": "tabpfn_gpu_regr",
"presets": ["gpu"],
"tags": ["non_auto"]
},
"autotabpfn_gpu": {
"meta": "tabpfn_gpu_class",
"presets": ["gpu"],
"tags": ["auto"]
},
"autotabpfnreg_gpu": {
"meta": "tabpfn_gpu_regr",
"presets": ["gpu"],
"tags": ["auto"]
}
}
}
10 changes: 0 additions & 10 deletions fedot/core/repository/data/model_repository.json
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@
"TabPFNClassificationStrategy"
],
"tags": [
"ml",
"neural",
"non_linear",
"tabpfn",
Expand All @@ -164,7 +163,6 @@
"TabPFNRegressionStrategy"
],
"tags": [
"ml",
"neural",
"non_linear",
"tabpfn",
Expand Down Expand Up @@ -565,14 +563,6 @@
"tabpfnreg": {
"meta": "tabpfn_regr",
"tags": ["non_auto"]
},
"autotabpfn": {
"meta": "tabpfn_class",
"tags": ["auto"]
},
"autotabpfnreg": {
"meta": "tabpfn_regr",
"tags": ["auto"]
}
}
}
1 change: 0 additions & 1 deletion other_requirements/extra.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
tensorflow >= 2.8.0; python_version >= '3.8'
torch >= 1.9.0
tabpfn >= 2.0.0
tabpfn-extensions >= 0.0.4

# Images
opencv-python >= 4.5.5.64
Expand Down
2 changes: 0 additions & 2 deletions test/integration/models/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,6 @@ def test_models_does_not_fall_on_constant_data(operation):
'class_decompose',
'tabpfn',
'tabpfnreg',
'autotabpfn',
'autotabpfnreg',
}
if operation.id in to_skip:
return
Expand Down
16 changes: 10 additions & 6 deletions test/integration/models/test_strategy.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import roc_auc_score as roc_auc, mean_squared_error
from sklearn.metrics import roc_auc_score as roc_auc, mean_squared_error, r2_score

from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
Expand Down Expand Up @@ -83,18 +83,22 @@ def run_tabpfn(
model_name: str,
train_data: pd.DataFrame,
test_data: pd.DataFrame,
task: str,
):
pipeline = PipelineBuilder().add_node(model_name).build()
pipeline.fit(train_data)
predicted_output = pipeline.predict(test_data, output_mode='labels')
metric = roc_auc(test_data.target, predicted_output.predict)
if task == 'classification':
metric = roc_auc(test_data.target, predicted_output.predict)
else:
metric = r2_score(test_data.target, predicted_output.predict)

assert isinstance(pipeline, Pipeline)
assert metric > 0.5


def test_tabpfn_classification_operation():
n_samples = 20
n_samples = 100
train_data, test_data = get_classification_data(
classes_amount=2,
samples_amount=n_samples,
Expand All @@ -106,11 +110,11 @@ def test_tabpfn_classification_operation():
)

for model_name in model_names:
run_tabpfn(model_name, train_data, test_data)
run_tabpfn(model_name, train_data, test_data, task='classification')


def test_tabpfn_regression_operation():
n_samples = 20
n_samples = 100
data = get_synthetic_regression_data(n_samples=n_samples, n_features=4, random_state=42)
train_data, test_data = train_test_data_setup(data)

Expand All @@ -119,4 +123,4 @@ def test_tabpfn_regression_operation():
)

for model_name in model_names:
run_tabpfn(model_name, train_data, test_data)
run_tabpfn(model_name, train_data, test_data, task='regression')