-
Notifications
You must be signed in to change notification settings - Fork 91
feat: TabPFN implementation #1390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4e172f3
b79c8d1
42c6180
d7e62b5
0fe2e69
f046413
d448bb9
6a23dcd
ca181af
7525d03
63c5ce3
fe89d46
75054b2
a3fad43
245e9b4
83d6608
51af4e9
e1f29d2
bfd8a08
921eae3
e179182
0d0e389
49f1877
360758e
d41aff1
f984f63
811c617
b4f0fe0
120341f
061378f
f052e9e
e62b5b2
254b8f6
5c67de9
68d1ad3
1b89dae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,6 @@ | ||
| language: python | ||
|
|
||
| python: | ||
| - "3.8" | ||
| - "3.9" | ||
| - "3.10" | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| import os | ||
|
|
||
| import numpy as np | ||
| from tabpfn import TabPFNClassifier, TabPFNRegressor | ||
| from tabpfn_extensions.post_hoc_ensembles.sklearn_interface import AutoTabPFNClassifier, AutoTabPFNRegressor | ||
| from typing import Optional | ||
| from fedot.core.data.data import InputData, OutputData | ||
| from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import ModelImplementation | ||
| from fedot.core.operations.operation_parameters import OperationParameters | ||
| from fedot.core.utils import default_fedot_data_dir | ||
|
|
||
|
|
||
| class FedotTabPFNImplementation(ModelImplementation): | ||
| __operation_params = [ | ||
| 'enable_categorical', | ||
| 'max_samples', | ||
| 'max_features', | ||
| 'model_path' | ||
| ] | ||
|
|
||
| def __init__(self, params: Optional[OperationParameters] = None): | ||
| super().__init__(params) | ||
|
|
||
| self.model_params = { | ||
| k: v for k, v in self.params.to_dict().items() if k not in self.__operation_params | ||
| } | ||
|
|
||
| model_path = self.params.get('model_path', None) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: А где используется этот параметр?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. model_path передается в model_params, а затем попадает в инициализацию класса TabPFN. В новом коммите пофиксила, чтобы передавался путь к папке кэша, если model_path = "auto" |
||
| if model_path == "auto": | ||
| self.model_params['model_path'] = os.path.join(default_fedot_data_dir(), 'tabpfn') | ||
| elif model_path is not None: | ||
| self.model_params['model_path'] = model_path | ||
|
|
||
| self.model = None | ||
| self.classes_ = None | ||
|
|
||
| def fit(self, input_data: InputData): | ||
| self.model.categorical_features_indices = input_data.categorical_idx | ||
|
|
||
| if self.params.get('enable_categorical'): | ||
| input_data = input_data.get_not_encoded_data() | ||
|
|
||
| self.model.fit(X=input_data.features, y=input_data.target) | ||
|
|
||
| return self.model | ||
|
|
||
| def predict(self, input_data: InputData) -> OutputData: | ||
| if self.params.get('enable_categorical'): | ||
| input_data = input_data.get_not_encoded_data() | ||
|
|
||
| prediction = self.model.predict(input_data.features) | ||
|
|
||
| output_data = self._convert_to_output( | ||
| input_data=input_data, | ||
| predict=prediction | ||
| ) | ||
| return output_data | ||
|
|
||
| def predict_proba(self, input_data: InputData): | ||
| if self.params.get('enable_categorical'): | ||
| input_data = input_data.get_not_encoded_data() | ||
|
|
||
| prediction = self.model.predict_proba(input_data.features) | ||
| output_data = self._convert_to_output( | ||
| input_data=input_data, | ||
| predict=prediction | ||
| ) | ||
| return output_data | ||
|
|
||
|
|
||
| class FedotTabPFNClassificationImplementation(FedotTabPFNImplementation): | ||
| def __init__(self, params: Optional[OperationParameters] = None): | ||
| super().__init__(params) | ||
| self.model = TabPFNClassifier(**self.model_params) | ||
|
|
||
| def fit(self, input_data: InputData): | ||
| self.classes_ = np.unique(np.array(input_data.target)) | ||
| return super().fit(input_data=input_data) | ||
|
|
||
|
|
||
| class FedotTabPFNRegressionImplementation(FedotTabPFNImplementation): | ||
| def __init__(self, params: Optional[OperationParameters] = None): | ||
| super().__init__(params) | ||
| self.model = TabPFNRegressor(**self.model_params) | ||
|
|
||
|
|
||
| class FedotAutoTabPFNClassificationImplementation(FedotTabPFNImplementation): | ||
| def __init__(self, params: Optional[OperationParameters] = None): | ||
| super().__init__(params) | ||
| self.model = AutoTabPFNClassifier(**self.model_params) | ||
|
|
||
| def fit(self, input_data: InputData): | ||
| self.classes_ = np.unique(np.array(input_data.target)) | ||
| return super().fit(input_data=input_data) | ||
|
|
||
|
|
||
| class FedotAutoTabPFNRegressionImplementation(FedotTabPFNImplementation): | ||
| def __init__(self, params: Optional[OperationParameters] = None): | ||
| super().__init__(params) | ||
| self.model = AutoTabPFNRegressor(**self.model_params) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,94 @@ | ||
| from typing import Optional | ||
|
|
||
| from fedot.core.data.data import InputData, OutputData | ||
| from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy | ||
| from fedot.core.operations.evaluation.operation_implementations.models.tabpfn import \ | ||
| FedotTabPFNClassificationImplementation, FedotTabPFNRegressionImplementation, \ | ||
| FedotAutoTabPFNClassificationImplementation, FedotAutoTabPFNRegressionImplementation | ||
| from fedot.core.operations.operation_parameters import OperationParameters | ||
| from fedot.core.repository.tasks import TaskTypesEnum | ||
| from fedot.utilities.random import ImplementationRandomStateHandler | ||
|
|
||
|
|
||
| class TabPFNStrategy(EvaluationStrategy): | ||
| _operations_by_types = { | ||
| 'tabpfn': FedotTabPFNClassificationImplementation, | ||
| 'tabpfnreg': FedotTabPFNRegressionImplementation, | ||
| 'autotabpfn': FedotAutoTabPFNClassificationImplementation, | ||
| 'autotabpfnreg': FedotAutoTabPFNRegressionImplementation, | ||
| } | ||
|
|
||
| def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): | ||
| self.operation_impl = self._convert_to_operation(operation_type) | ||
| super().__init__(operation_type, params) | ||
| self.device = params.get('device', 'auto') | ||
| self.max_samples = params.get('max_samples', 1000) | ||
| self.max_features = params.get('max_features', 500) | ||
|
|
||
| def fit(self, train_data: InputData): | ||
martilut marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| check_data_size( | ||
| data=train_data, | ||
| device=self.device, | ||
| max_samples=self.max_samples, | ||
| max_features=self.max_features, | ||
| ) | ||
| if train_data.task.task_type == TaskTypesEnum.ts_forecasting: | ||
| raise ValueError('Time series forecasting not supported for TabPFN') | ||
|
|
||
| operation_implementation = self.operation_impl(self.params_for_fit) | ||
|
|
||
| with ImplementationRandomStateHandler(implementation=operation_implementation): | ||
| operation_implementation.fit(train_data) | ||
|
|
||
| return operation_implementation | ||
|
|
||
| def predict(self, trained_operation, predict_data: InputData) -> OutputData: | ||
| raise NotImplementedError() | ||
|
|
||
|
|
||
| class TabPFNClassificationStrategy(TabPFNStrategy): | ||
| def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): | ||
| super().__init__(operation_type, params) | ||
|
|
||
| def predict(self, trained_operation, predict_data: InputData) -> OutputData: | ||
| if self.output_mode == 'labels': | ||
| output = trained_operation.predict(predict_data) | ||
| elif self.output_mode in ['probs', 'full_probs', 'default']: | ||
| n_classes = len(trained_operation.classes_) | ||
| output = trained_operation.predict_proba(predict_data) | ||
| if n_classes < 2: | ||
| raise ValueError('Data set contain only 1 target class. Please reformat your data.') | ||
| elif (n_classes == 2 and self.output_mode != 'full_probs' | ||
| and len(output.predict.shape) > 1): | ||
| output.predict = output.predict[:, 1] | ||
| else: | ||
| raise ValueError(f'Output model {self.output_mode} is not supported') | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| class TabPFNRegressionStrategy(TabPFNStrategy): | ||
| def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): | ||
| super().__init__(operation_type, params) | ||
|
|
||
| def predict(self, trained_operation, predict_data: InputData) -> OutputData: | ||
| return trained_operation.predict(predict_data) | ||
|
|
||
|
|
||
| def check_data_size( | ||
| data: InputData, | ||
| device: str = "auto", | ||
| max_samples: int = 1000, | ||
| max_features: int = 500, | ||
| ) -> bool: | ||
| if data.features.shape[0] > max_samples: | ||
| raise ValueError( | ||
| f"Input data has too many samples ({data.features.shape[0]}), " | ||
| f"maximum is {max_samples} for device '{device}'" | ||
| ) | ||
| if data.features.shape[1] > max_features: | ||
| raise ValueError( | ||
| f"Input data has too many features ({data.features.shape[1]}), " | ||
| f"maximum is {max_features}" | ||
| ) | ||
| return True | ||
Uh oh!
There was an error while loading. Please reload this page.