diff --git a/dao/.dockerignore b/dao/.dockerignore index 2bfa6a4d..786f7a74 100644 --- a/dao/.dockerignore +++ b/dao/.dockerignore @@ -1 +1,2 @@ tests/ +pred/ diff --git a/dao/CHANGELOG.md b/dao/CHANGELOG.md index daa7b7de..2d9bec6e 100644 --- a/dao/CHANGELOG.md +++ b/dao/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog 刀 DAO # Day Ahead Optimizer +# 2026.03.0 +### New features: +- Add Fast Forward and Fast Reverse to web interface Home page (PR from @tomvandepoel3) +- Improve error handling. Got "could not convert string to float: unavailable" exception without a reference to the problem HA entity. +This change should help locate and fix such issues. +- All used data of the ml-training are output in debug-level of logging (take care much data!!) +### Change +Changed entity hp heat demand from input_boolean (values "on"/"off" to input_select values "off"/"eco"/"max"(="on") + +### Fixes: +- Corrected logging when there are no data in wp-sensor(s) +- Fixed error when retrieving wp-data (reported by @rescla) +- Fix error only supply zero's for missing sensor data of the solar inverter after the first record. +- Fixed error when checking runtime hours heatpump and there no data (reported by @rescla) +- Fixed error and better warning when no data for actual soc level battery (reported by @tonvanboven) +- Fixed error when optional "entity calculated end" (machine) is not defined (reported by @Xelaph) +- Fixed error when hp-stages are not sorted ascending max_power (reported by @Mvdw) +- Update several python modules +- Fixed error report/api with period "morgen" +- Fixed error "reduce hours" with interval "1hour" (pr by @bramgradussen) +- Fixed error missing inverter values at the begin/end of period (@reported by @DaBit) +- Fixed error when reducing power during charging at high soc and during discharging +at low soc, taken the mean value of the soc at the start and the soc at the end of the interval (reported by @bartzzz) +- Fixed error with flex setting of "dc_to_bat max power" or "bat_to_dc max power" (reported by @DaBit) +- Fixed error not planning heatpump in first interval when not in run-mode (reported by @f.welvering) +- Missing hour-values (solar-inverters) are filled up by zero's (suggested by @DaBit) +- Fixed error when "-" is used in name of solar-devices (reported by @patrickvorgers and @Asclepius8) +- Made optional battery settings "bat_to_dc max power" and "dc_to_bat max power" flex-setting (feature request by @DaBit) +- Reduce power during charging at high soc and during discharging at low soc (feature requests form @bartzzz and @arjenhiemstra) +- Made check 'optimal lower level" lower as "lower limit" (feature request of @mistral2) + # 2026.02.2 - Fix error in calculating heating window boiler - Fixed error in reports and api with interval "vandaag en morgen" @@ -17,7 +48,7 @@ Added missing module tzdata ### Breaking change -The file-format ofthe calculated model is changed (update of module pandas). +The file-format of the calculated model is changed (update of module pandas). The ml_prediction works only after a new training of the models.
### Changes: - Update several python modules @@ -211,7 +242,7 @@ Fix error api prognose pv_dc - You can configure the meteo-model for your data (option, default **harmonie**) - You can configure the max number of attempts (option, default 2)
More info in DOCS.md -- Fixed index-error when more than one batteries are used (reported by @PSMGoossens) +- Fixed index-error when more than one batterie are used (reported by @PSMGoossens) - Improved graphical presentation received meteodata - Improved logging getting meteodata - Fixed error handling getting meteo-data diff --git a/dao/config.yaml b/dao/config.yaml index c5dfa35c..7415f38d 100644 --- a/dao/config.yaml +++ b/dao/config.yaml @@ -1,6 +1,6 @@ --- name: 刀 Day Ahead Optimizer -version: 2026.02.2 +version: 2026.03.0 slug: day_ahead_opt description: Home Assistant Community Add-ons for day ahead optimizations url: https://github.com/corneel27/day-ahead diff --git a/dao/lib/__init__.py b/dao/lib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dao/prog/da_config.py b/dao/lib/da_config.py similarity index 95% rename from dao/prog/da_config.py rename to dao/lib/da_config.py index bf305c70..7d847855 100644 --- a/dao/prog/da_config.py +++ b/dao/lib/da_config.py @@ -2,8 +2,10 @@ import logging import os +from pandas.io.common import file_exists + # from logging import raiseExceptions -from dao.prog.db_manager import DBmanagerObj +from dao.lib.db_manager import DBmanagerObj import sqlalchemy_utils @@ -24,7 +26,10 @@ def __init__(self, file_name: str): self.options = self.parse(file_name) datapath = os.path.dirname(file_name) file_secrets = datapath + "/secrets.json" - self.secrets = self.parse(file_secrets) + if file_exists(file_secrets): + self.secrets = self.parse(file_secrets) + else: + self.secrets = {} def get( self, keys: list, options: dict = None, default=None diff --git a/dao/prog/da_graph.py b/dao/lib/da_graph.py similarity index 100% rename from dao/prog/da_graph.py rename to dao/lib/da_graph.py diff --git a/dao/prog/da_meteo.py b/dao/lib/da_meteo.py similarity index 99% rename from dao/prog/da_meteo.py rename to dao/lib/da_meteo.py index 281fd0f6..7efe1162 100644 --- a/dao/prog/da_meteo.py +++ b/dao/lib/da_meteo.py @@ -8,9 +8,9 @@ from requests import get import matplotlib.pyplot as plt import knmi -from dao.prog.da_graph import GraphBuilder -from dao.prog.da_config import Config -from dao.prog.db_manager import DBmanagerObj +from dao.lib.da_graph import GraphBuilder +from dao.lib.da_config import Config +from dao.lib.db_manager import DBmanagerObj from sqlalchemy import Table, select, func, and_ diff --git a/dao/prog/da_prices.py b/dao/lib/da_prices.py similarity index 98% rename from dao/prog/da_prices.py rename to dao/lib/da_prices.py index d60dd0d9..4e65a180 100644 --- a/dao/prog/da_prices.py +++ b/dao/lib/da_prices.py @@ -1,18 +1,16 @@ -from dao.prog.da_config import Config +from dao.lib.da_config import Config import pandas as pd -from dao.prog.db_manager import DBmanagerObj +from dao.lib.db_manager import DBmanagerObj from entsoe import EntsoePandasClient import datetime import sys from requests import get, post from nordpool.elspot import Prices import pytz -import tzdata import json import math import pprint as pp import logging -from sqlalchemy import Table, select, and_ class DaPrices: diff --git a/dao/prog/db_manager.py b/dao/lib/db_manager.py similarity index 100% rename from dao/prog/db_manager.py rename to dao/lib/db_manager.py diff --git a/dao/pred/__init__.py b/dao/pred/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dao/pred/check_db_dap.py b/dao/pred/check_db_dap.py new file mode 100644 index 00000000..60844e9f --- /dev/null +++ b/dao/pred/check_db_dap.py @@ -0,0 +1,216 @@ +import datetime + +from sqlalchemy import ( + Table, + Column, + Integer, + DateTime, + String, + BigInteger, + Float, + ForeignKey, + UniqueConstraint, + select, + desc, + insert, + update, + and_, + delete, + literal_column, +) +import pandas as pd + +# from da_base import DaBase +# sys.path.append("../") +from dao.lib.da_config import Config +from version import __version__ +from utils import version_number + + +class CheckDAPDB: + def __init__(self, file_name: str | None = None): + self.file_name = file_name + self.config = Config(self.file_name) + self.version = __version__ + self.last_version = None + self.db_da = self.config.get_db_da(key="database_dap", check_create=True) + self.engine = self.db_da.engine + + def upsert_variabel(self, variabel_table, record): + select_variabel = select(variabel_table.c.id).where( + variabel_table.c.id == record[0] + ) + with self.engine.connect() as connection: + variabel_result = connection.execute(select_variabel).first() + if variabel_result: + query = ( + update(variabel_table) + .where(variabel_table.c.id == record[0]) + .values(code=record[1], name=record[2], dim=record[3]) + ) + else: + query = insert(variabel_table).values( + id=record[0], code=record[1], name=record[2], dim=record[3] + ) + with self.engine.connect() as connection: + connection.execute(query) + connection.commit() + return + + def get_all_var_data( + self, + tablename: str, + column_name: str, + ): + """ + Retourneert een dataframe + :param tablename: de naam van de tabel "prognoses" of "values" + :param column_name: de code van het veld + :return: + """ + + variabel_table = Table( + "variabel", self.db_da.metadata, autoload_with=self.engine + ) + values_table = Table(tablename, self.db_da.metadata, autoload_with=self.engine) + query = select( + values_table.c.time.label("time"), + literal_column("'" + column_name + "'").label("code"), + values_table.c.value.label("value"), + ).where( + and_( + variabel_table.c.code == column_name, + values_table.c.variabel == variabel_table.c.id, + ) + ) + query = query.order_by("time") + + with self.engine.connect() as connection: + df = pd.read_sql(query, connection) + return df + + def delete_all_var_data( + self, + tablename: str, + variabel_id: int, + ): + values_table = Table(tablename, self.db_da.metadata, autoload_with=self.engine) + delete_stmt = delete(values_table).where( + values_table.c.variabel == variabel_id, + ) + with self.engine.connect() as connection: + connection.execute(delete_stmt) + connection.commit() + return + + def update_db_da(self): + # Defining the Engine + # Create the Metadata Object + metadata = self.db_da.metadata + # Define the version table + version_table = Table( + "version", + metadata, + Column("id", Integer, primary_key=True, autoincrement=True), + Column("moment", DateTime, unique=True), + Column("value", String(20), unique=True), + ) + # Create the version table (if not exists) + metadata.create_all(self.engine) + l_version = 20251201 + + query = select(version_table.c.moment, version_table.c.value).order_by( + desc(version_table.c.moment) + ) + with self.engine.connect() as connection: + rows = pd.read_sql(query, connection) + if len(rows) >= 1: + self.last_version = rows.iloc[0]["value"] + l_version = version_number(self.last_version) + n_version = version_number(self.version) + + variabel_tabel = Table( + "variabel", + metadata, + Column("id", Integer, primary_key=True, autoincrement=True), + Column("code", String(15), unique=True, nullable=False), + Column("name", String(50), unique=True, nullable=False), + Column("dim", String(10), nullable=False), + sqlite_autoincrement=True, # Ensure SQLite uses AUTOINCREMENT + ) + + if l_version <= 20260101: + # check variabel + # Create the version table (if not exists) + variabel_tabel.create(self.engine) + records = [ + [1, "cons", "Verbruik", "MWh"], + [2, "prod_zon", "Productie zon", "MWh"], + [3, "prod_wind", "Productie wind (land)", "MWh"], + [4, "prod_zeewind", "Productie wind (zee)", "MWh"], + [5, "da", "Day Ahead prijs epex", "euro/kWh"], + ] + + for i in range(len(records)): + record = records[i] + self.upsert_variabel(variabel_tabel, record) + + print('Table "variabel" met inhoud gecreeerd.') + + # table "values" maken + values_tabel = Table( + "values", + metadata, + Column("id", Integer, primary_key=True, autoincrement=True), + Column( + "variabel", + Integer, + ForeignKey("variabel.id", ondelete="CASCADE"), + nullable=False, + ), + Column("time", BigInteger, nullable=False), + Column("value", Float), + UniqueConstraint("variabel", "time"), + sqlite_autoincrement=True, # Ensure SQLite uses AUTOINCREMENT + ) + values_tabel.create(self.engine) + + print('Table "values" gecreeerd.') + prognoses_tabel = Table( + "prognoses", + metadata, + Column("id", Integer, primary_key=True, autoincrement=True), + Column( + "variabel", + Integer, + ForeignKey("variabel.id", ondelete="CASCADE"), + nullable=False, + ), + Column("time", BigInteger, nullable=False), + Column("value", Float), + UniqueConstraint("variabel", "time"), + sqlite_autoincrement=True, # Ensure SQLite uses AUTOINCREMENT + ) + prognoses_tabel.create(self.engine) + print('Table "prognoses" gecreeerd.') + + if l_version < n_version: + # update version number database + moment = datetime.datetime.fromtimestamp( + round(datetime.datetime.now().timestamp()) + ) + insert_query = insert(version_table).values( + moment=moment, value=self.version + ) + with self.engine.connect() as connection: + connection.execute(insert_query) + connection.commit() + + +def main(): + checkdb = CheckDAPDB("options_dap.json") + checkdb.update_db_da() + + +if __name__ == "__main__": + main() diff --git a/dao/pred/da_predictor.py b/dao/pred/da_predictor.py new file mode 100644 index 00000000..2ff1478b --- /dev/null +++ b/dao/pred/da_predictor.py @@ -0,0 +1,1362 @@ +""" +Solar Production Prediction Module + +This module provides functionality to train XGBoost models for predicting +hourly solar production based on weather data and historical solar output. +""" + +import pandas as pd +import numpy as np +import joblib +import os +import sys +import warnings +from typing import Union, Dict, Any +import datetime as dt +import logging +import copy +import json +import requests + +from dao.lib.da_config import Config +from dao.lib.da_prices import DaPrices + +# ML imports +from xgboost import XGBRegressor +from sklearn.model_selection import GridSearchCV +from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error +from scipy import stats + + +warnings.filterwarnings("ignore") + +# constants +API_URL = "https://api.ned.nl/v1/utilizations" +CONF_FORECAST_HOURS = 120 +# NED API Data Types +DATA_TYPE_WIND_ONSHORE = 1 +DATA_TYPE_SOLAR = 2 +DATA_TYPE_WIND_OFFSHORE = 51 +DATA_TYPE_CONSUMPTION = 59 + +# NED API Classifications +CLASSIFICATION_FORECAST = 1 +CLASSIFICATION_CURRENT = 2 + +# NED API Activities +ACTIVITY_PRODUCTION = 1 +ACTIVITY_CONSUMPTION = 2 + +# NED API Granularity +GRANULARITY_HOURLY = 5 +GRANULARITY_TIMEZONE_CET = 1 + +# Sensor definitions +DATA_TYPES = { + "wind_onshore": { + "name": "NED Forecast Wind Onshore", + "icon": "mdi:wind-turbine", + "type_id": DATA_TYPE_WIND_ONSHORE, + "activity": ACTIVITY_PRODUCTION, + "unit": "MW", + "code": "prod_wind", + }, + "wind_offshore": { + "name": "NED Forecast Wind Offshore", + "icon": "mdi:wind-turbine", + "type_id": DATA_TYPE_WIND_OFFSHORE, + "activity": ACTIVITY_PRODUCTION, + "unit": "MW", + "code": "prod_zeewind", + }, + "solar": { + "name": "NED Forecast Solar", + "icon": "mdi:solar-power", + "type_id": DATA_TYPE_SOLAR, + "activity": ACTIVITY_PRODUCTION, + "unit": "MW", + "code": "prod_zon", + }, + "consumption": { + "name": "NED Forecast Consumption", + "icon": "mdi:transmission-tower", + "type_id": DATA_TYPE_CONSUMPTION, + "activity": ACTIVITY_CONSUMPTION, + "unit": "MW", + "code": "cons", + }, +} + +""" +"lng_price": { + "name": "Aardgas prijs", + "icon": "mdi:transmission-tower", + "type_id": DATA_TYPE_CONSUMPTION, + "activity": ACTIVITY_CONSUMPTION, + "unit": "euro/m3", + "code": "da_gas", +}, +""" + + +class DAPredictor: + """ + A comprehensive solar production prediction system using XGBoost. + + This class handles data preprocessing, outlier detection, feature engineering, + model training, and prediction for hourly solar production forecasting. + """ + + def __init__(self, random_state: int = 42, file_name: str = None): + """ + Initialize the DaPredictor. + """ + self.file_name = file_name + self.random_state = random_state + self.ned_nl_api_key = ( + "36e4cd847cc428f204c58b19a3d626be19237414deb55be44ace33809750b223" + # "78fdc0ef7a33f13f410051ec9593a80968d1acff349c9df55a3a52842d9f2b7f" + # "307a6562a5d99ebaede09d043b7ec66cce06f0ab16a147c7b42d671baddf0f9b" + ) + self.model_save_path = "../data/prediction/models/da_prediction.pkl" + self.model = None + self.log_level = logging.INFO + logging.getLogger().setLevel(self.log_level) + self.feature_columns = [ + "day_of_week", + "hour", + "quarter", + "month", + "season", + "week_nr", + ] + self.is_trained = False + self.training_stats = {} + self.forecast_hours: int = 96 + self.config = Config(self.file_name) + self.db_da = self.config.get_db_da(key="database_dap") + + def _fetch_ned_nl_data( + self, + type_id: int, + activity: int, + classification: int, + db_code: str, + start_date: dt.datetime, + end_date: dt.datetime, + ) -> pd.DataFrame: + """ + Haalt data op ij ned.nl en slaat op in database + :param type_id: + :param activity: + :param classification: + :param start_date: + :param end_date: + :return: + """ + url = API_URL + # url = "https://api.ned.nl/v1/utilizations" + headers = { + "X-AUTH-TOKEN": self.ned_nl_api_key, + "accept": "application/ld+json", + } + + params = { + "point": 0, + "type": type_id, + "granularity": GRANULARITY_HOURLY, + "granularitytimezone": GRANULARITY_TIMEZONE_CET, + "classification": classification, + "activity": activity, + "validfrom[after]": start_date.strftime("%Y-%m-%d"), + "validfrom[strictly_before]": end_date.strftime("%Y-%m-%d"), + } + """ + Point: 0 - Netherlands + Type: 2 - Solar + Granularity 3 - 10 min + Timezone 1 - UTC + Classification 2 – current + Activity 1 - providing + Validfromstrictlybefore 2020-11-17 + Validfromafter 2020-11-16 + """ + + try: + response = requests.get( + url, headers=headers, params=params, allow_redirects=False + ) + if response.status_code == 401: + raise ValueError("Invalid API key") + if response.status_code == 403: + raise ValueError("API access forbidden - check your API key") + + if response.status_code != 200: + error_text = response.text + logging.error( + "NED API returned status %s for type %s: %s", + response.status_code, + type_id, + error_text, + ) + return [] + + data = json.loads(response.text) + records = data.get("hydra:member", []) + + if not records: + logging.warning("No data returned for type %s", type_id) + return [] + + save_df = pd.DataFrame(columns=["time", "tijd", "code", "value"]) + for record in records: + tijd = pd.to_datetime(record["validfrom"]) + time = pd.Timestamp(tijd).timestamp() + time = str(round(time)) + row = [time, tijd, db_code, record["volume"] / 1000.0] + save_df.loc[save_df.shape[0]] = row + + table = ( + "values" if classification == CLASSIFICATION_CURRENT else "prognoses" + ) + self.db_da.savedata(save_df, table) + return tijd + + except ConnectionError as ex: + logging.exception("Unexpected error fetching data for type %s", type_id) + return None + + def update_data(self, classification: int, tot: dt.datetime = None): + if tot is None: + if classification == CLASSIFICATION_CURRENT: + tot = dt.date.today() + else: + tot = dt.date.today() + dt.timedelta(days=7) + + for key, data in DATA_TYPES.items(): + # laatste record + if classification == CLASSIFICATION_CURRENT: + table = "values" + latest_record = self.db_da.get_time_border_record( + data["code"], latest=True, table_name=table + ) + else: + table = "prognoses" + latest_record = dt.datetime.now() - dt.timedelta(days=1) + if latest_record is None: + if classification == CLASSIFICATION_CURRENT: + latest_record = dt.datetime(year=2025, month=1, day=1) + else: + latest_record = dt.datetime.now() - dt.timedelta(days=1) + logging.info( + f"Data van {data['code']} {classification} aanwezig tot en met {latest_record}" + ) + first_date = (latest_record + dt.timedelta(days=1)).date() + while first_date < tot: + latest_record = self._fetch_ned_nl_data( + data["type_id"], + data["activity"], + classification, + data["code"], + first_date, + tot, + ) + if ( + pd.Timestamp(latest_record).timestamp() + < pd.Timestamp(first_date).timestamp() + ): + break + logging.info( + f"Data ned.nl opgehaald {data['code']} {classification} vanaf {first_date} " + f"tot en met {latest_record}" + ) + first_date = (latest_record + dt.timedelta(days=1)).date() + + def import_knmi_df(self, start: dt.datetime, end: dt.datetime): + """ + haalt data op bij knmi en slaat deze op in dao-database + :param start: begin-datum waarvan data aanwezig moeten zijn + :parame end: datum tot data aanwezig meten zijn + :return: + """ + """ + # import and delete meteo-files + meteo_files = [] + map = "../data/prediction/meteo/" + for f in os.listdir(map): + if not f ==".keep" and os.path.isfile(map+f): + meteo_files.append(map+f) + for meteo_file in meteo_files: + self.import_weatherdata(meteo_file) + """ + # get dataframe with knmi-py + # datetime of latest data-reord + logging.info( + f"KNMI-weerstation: {self.knmi_station} {knmi.stations[int(self.knmi_station)].name}" + ) + first_dt = self.db_da.get_time_border_record("gr", latest=False) + latest_dt = self.db_da.get_time_border_record("gr", latest=True) + if latest_dt is None: # er zijn nog geen data + logging.info(f"Er zijn nog geen knmi-data aanwezig") + self.get_and_save_knmi_data(start, end) + first_dt = self.db_da.get_time_border_record("gr", latest=False) + latest_dt = self.db_da.get_time_border_record("gr", latest=True) + else: + logging.info(f"Er zijn knmi-data aanwezig vanaf {first_dt} tot {latest_dt}") + if first_dt <= start and latest_dt >= end: + logging.info(f"Er worden geen knmi-data opgehaald") + return None + if first_dt > start: + self.get_and_save_knmi_data(start, first_dt) + if latest_dt < end: + self.get_and_save_knmi_data(latest_dt, end) + return None + + def get_weatherdata( + self, start: dt.datetime, end: dt.datetime | None = None, prognose: bool = False + ) -> pd.DataFrame: + """ + vult database aan met ontbrekende data + load ned_nl_data from dao-database + :param start: begindatum laden vanaf + :param end: einddatum if None: tot gisteren 00:00 + :param prognose: boolean, False: meetdata ophalen + True: prognoses ophalen + :return: dataframe with weatherdata + """ + # haal ontbrekende data op bij knmi + + if end is None: + end = dt.datetime.now() + if not prognose: + # knmi data evt aanvullen + self.import_knmi_df(start, end) + + if prognose: + table_name = "prognoses" + else: + table_name = "values" + start = dt.datetime(start.year, start.month, start.day, start.hour) + # get weather-dataframe from database + weather_data = pd.DataFrame(columns=["utc", "gr", "temp"]) + for weather_item in weather_data.columns[1:]: + df_item = self.db_da.get_column_data( + table_name, weather_item, start=start, end=end + ) + if len(weather_data) == 0: + weather_data["utc"] = df_item["utc"] + weather_data[weather_item] = df_item["value"] + weather_data["utc"] = pd.to_datetime(weather_data["utc"], unit="s", utc=True) + weather_data = weather_data.set_index(weather_data["utc"]) + weather_data = weather_data.rename( + columns={"utc": "datetime", "gr": "irradiance", "temp": "temperature"} + ) + return weather_data + + def import_ned_nl_files( + self, + ): + files = { + "2025_zon": { + "file_name": "zon-2025-uur-data.csv", + "dap_code": "prod_zon", + "data_type": DATA_TYPE_SOLAR, + "classification": CLASSIFICATION_CURRENT, + }, + "2026_zon": { + "file_name": "zon-2026-uur-data.csv", + "dap_code": "prod_zon", + "data_type": DATA_TYPE_SOLAR, + "classification": CLASSIFICATION_CURRENT, + }, + "2025_wind": { + "file_name": "wind-2025-uur-data.csv", + "dap_code": "prod_wind", + "data_type": DATA_TYPE_WIND_ONSHORE, + "classification": CLASSIFICATION_CURRENT, + }, + "2026_wind": { + "file_name": "wind-2026-uur-data.csv", + "dap_code": "prod_wind", + "data_type": DATA_TYPE_WIND_ONSHORE, + "classification": CLASSIFICATION_CURRENT, + }, + "2025_zeewind": { + "file_name": "zeewind-2025-uur-data.csv", + "dap_code": "prod_zeewind", + "data_type": DATA_TYPE_WIND_OFFSHORE, + "classification": CLASSIFICATION_CURRENT, + }, + "2026_zeewind": { + "file_name": "zeewind-2026-uur-data.csv", + "dap_code": "prod_zeewind", + "data_type": DATA_TYPE_WIND_OFFSHORE, + "classification": CLASSIFICATION_CURRENT, + }, + "2025_prod": { + "file_name": "electriciteitsmix-2025-uur-data.csv", + "dap_code": "prod_totaal", + "data_type": DATA_TYPE_CONSUMPTION, + "classification": CLASSIFICATION_CURRENT, + }, + "2026_prod": { + "file_name": "electriciteitsmix-2026-uur-data.csv", + "dap_code": "prod_totaal", + "data_type": DATA_TYPE_CONSUMPTION, + "classification": CLASSIFICATION_CURRENT, + }, + } + for key, file_data in files.items(): + csv_data = pd.read_csv("../data/" + file_data["file_name"]) + csv_df = csv_data.filter( + ["validfrom (UTC)", "volume (kWh)", "clasification"] + ) + csv_df.rename( + {"validfrom (UTC)": "datetime", "volume (kWh)": "value"}, + axis=1, + inplace=True, + ) + csv_df["time"] = pd.to_datetime(csv_df["datetime"]).astype(int) / 10**6 + csv_df["time"] = csv_df["time"].astype(int).astype(str) + csv_df["code"] = file_data["dap_code"] + csv_df["value"] = csv_df["value"] / 1000 + table_name = ( + "values" + if file_data["classification"] == CLASSIFICATION_CURRENT + else "prognoses" + ) + self.db_da.savedata(csv_df, table_name) + return None + + def import_gas_prijzen(self): + csv_data = pd.read_csv("../data/dynamische_gasprijzen.csv", delimiter=";") + df = pd.DataFrame(columns=["time", "tijd", "code", "value"]) + for row in csv_data.itertuples(): + value = float(row.prijs_excl_belastingen.replace(",", ".")) + datum = pd.to_datetime(row.datum) + for uur in range(24): + tijd = datum + dt.timedelta(hours=uur) + time = str(int(tijd.timestamp())) + values = [time, tijd, "da_gas", value] + df.loc[df.shape[0]] = values + self.db_da.savedata(df, "values") + + def fetch_recent_ned_nl_data( + self, classification: int, start: dt.datetime, end: dt.datetime + ) -> pd.DataFrame: + files = { + "zon": { + "dap_code": "prod_zon", + "data_type": DATA_TYPE_SOLAR, + "classification": CLASSIFICATION_CURRENT, + }, + "wind": { + "dap_code": "prod_wind", + "data_type": DATA_TYPE_WIND_ONSHORE, + "classification": CLASSIFICATION_CURRENT, + }, + "zeewind": { + "dap_code": "prod_zeewind", + "data_type": DATA_TYPE_WIND_OFFSHORE, + "classification": CLASSIFICATION_CURRENT, + }, + "prod": { + "dap_code": "prod_totaal", + "data_type": DATA_TYPE_CONSUMPTION, + "classification": CLASSIFICATION_CURRENT, + }, + } + + count = 0 + start_utc = pd.to_datetime(start, utc=True) + end_utc = pd.to_datetime(end, utc=True) + for key, data_type in DATA_TYPES.items(): + csv_df = pd.DataFrame() + if classification == CLASSIFICATION_CURRENT: + if "file_name" in data_type: + csv_data = pd.read_csv(data_type["file_name"]) + csv_df = csv_data.filter(["validfrom (UTC)", "volume (kWh)"]) + csv_df.rename( + {"validfrom (UTC)": "datetime", "volume (kWh)": key}, + axis=1, + inplace=True, + ) + csv_df["datetime"] = pd.to_datetime(csv_df["datetime"], utc=True) + csv_df = csv_df[csv_df["datetime"] >= start_utc] + csv_df = csv_df[csv_df["datetime"] <= end_utc] + if len(csv_df) > 0: + start = max(start_utc, csv_df["datetime"].iloc[-1]) + csv_df = csv_df.set_index(csv_df["datetime"], drop=False) + else: + start = start_utc + csv_df = pd.DataFrame() + fetch_count = 0 + start = start_utc + while start < end_utc: + start_date = start.strftime("%Y-%m-%d") + end_date = end.strftime("%Y-%m-%d") + if end_date <= start_date: + break + data_slice = self._fetch_ned_nl_data( + key, + data_type["type_id"], + data_type["activity"], + classification, + start_date, + end_date, + ) + if len(data_slice) > 0: + start = pd.to_datetime(data_slice["datetime"].iloc[-1]).tz_convert( + "CET" + ) + dt.timedelta(hours=1) + else: + break + if fetch_count == 0: + data = data_slice + else: + data = pd.concat([data, data_slice]) + fetch_count += 1 + if len(csv_df) > 0: + data = pd.concat([csv_df, data]) + if count == 0: + result = data + else: + data.drop_duplicates(inplace=True) + result[key] = data[key] + count += 1 + return result + + def updata_gasprices(self): + url = "https://enever.nl/apiv3/gasprijs_laatste30dagen.php?token=3762b807802f28b4fb1dafeda4340c35" + """ + { + "status": "true", + "data": [ + { + "datum": "2026-02-21T06:00:00+01:00", + "prijsEGSI": "0.307337", + "prijsEOD": "0.299780", + "prijsANWB": "1.157788", + ..... + "prijsZP": "1.178678" + }, + { + "datum": "2026-02-20T06:00:00+01:00", + "prijsEGSI": "0.323671", + "prijsEOD": "0.320250", + "prijsANWB": "1.177553", + ..... +""" + response = requests.get(url) + if response.status_code == 200: + data = json.loads(response.text) + if data["status"] == "true": + df = pd.DataFrame(columns=["time", "tijd", "code", "value"]) + for record in data["data"]: + datum = pd.to_datetime(record["datum"]) + value = float(record["prijsEGSI"]) + for uur in range(24): + tijd = datum + dt.timedelta(hours=uur) + time = str(int(tijd.timestamp())) + values = [time, tijd, "da_gas", value] + df.loc[df.shape[0]] = values + self.db_da.savedata(df, "values") + + def update_prices(self, start: dt.datetime): + da_prices = DaPrices(self.config, self.db_da) + da_prices.get_prices("nordpool", _start=start) + + def _create_features(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Perform feature engineering on a time-indexed DataFrame for energy modeling. + + This function generates time-based and weather-related features from + the input DataFrame, which is assumed to have a DatetimeIndex and + columns for 'temperature' and 'irradiance'. It also computes derived + performance metrics for solar energy. + + Parameters + ---------- + df : pandas.DataFrame + Input DataFrame with at least the following columns: + - 'temperature' : float, ambient temperature in °C + - 'irradiance' : float, solar irradiance in J/cm²/h + The DataFrame index must be a pandas.DatetimeIndex. + + Returns + ------- + pandas.DataFrame + A new DataFrame with the following additional columns: + - 'day_of_week' : int, day of the week (0=Monday, 6=Sunday) + - 'hour' : int, hour of the day (0–23) + - 'quarter' : int, quarter of the year (1–4) + - 'month' : int, month of the year (1–12) + - 'season' : int, mapped season (0=winter, 1=spring, 2=summer, 3=autumn) + - weeknr: int + + """ + df = copy.deepcopy(df) + df["day_of_week"] = df.index.dayofweek + df["hour"] = df.index.hour + df["quarter"] = df.index.quarter + df["month"] = df.index.month + df["season"] = df.index.month.map( + { + 12: 0, + 1: 0, + 2: 0, # winter + 3: 1, + 4: 1, + 5: 1, # spring + 6: 2, + 7: 2, + 8: 2, # summer + 9: 3, + 10: 3, + 11: 3, # autumn + } + ) + df["week_nr"] = df.index.isocalendar().week + df["time"] = pd.to_datetime(df.index) + df.drop("time", axis=1, inplace=True) + logging.debug(f"Data with all features\n{df.to_string()}") + self.feature_columns = df.columns + return df + + def get_ned_nl_data(self, classification, start, end): + count = 0 + table_name = ( + "values" if classification == CLASSIFICATION_CURRENT else "prognoses" + ) + for key, data in DATA_TYPES.items(): + df_data = self.db_da.get_column_data(table_name, data["code"], start, end) + if count == 0: + result_df = df_data + result_df.rename(columns={"value": data["code"]}, inplace=True) + else: + result_df[data["code"]] = df_data["value"] + count += 1 + result_df["utc"] = pd.to_datetime(result_df["utc"], unit="s", utc=True) + result_df = result_df.set_index(result_df["utc"]) + result_df.rename(columns={"utc": "datetime"}, inplace=True) + result_df.drop(["uur", "time", "datasoort"], axis=1, inplace=True) + return result_df + + def _load_and_process_ned_nl_data(self, ned_nl_data: pd.DataFrame) -> pd.DataFrame: + """ + Load and process ned_nl data. + + Args: + ned_nl_data: Path to CSV file or pandas DataFrame + + Returns: + Processed weather DataFrame with features + """ + ned_nl_df = ned_nl_data.copy() + + # Ensure datetime column exists + if "datetime" in ned_nl_df.columns: + ned_nl_df["datetime"] = pd.to_datetime(ned_nl_df["datetime"]) + ned_nl_df = ned_nl_df.set_index("datetime") + elif not isinstance(ned_nl_df.index, pd.DatetimeIndex): + raise ValueError( + "Weather data must have a 'datetime' column or DatetimeIndex" + ) + + # Validate required columns + # required_cols = ['temperature', 'irradiance'] + # missing_cols = [col for col in required_cols if col not in weather_df.columns] + # if missing_cols: + # raise ValueError(f"Weather data missing required columns: {missing_cols}") + + # Create features + return self._create_features(ned_nl_df) + + def _load_and_process_da_data(self, da_data: pd.DataFrame) -> pd.DataFrame: + """ + Load and process solar production data. + + Args: + da_data: pandas DataFrame + + Returns: + Processed da DataFrame + """ + da_df = da_data.copy() + + # Process datetime index + if "datetime" in da_df.columns: + da_df["datetime"] = pd.to_datetime(da_df["datetime"]) + da_df = da_df.set_index("datetime") + elif not isinstance(da_df.index, pd.DatetimeIndex): + raise ValueError("DA data must have a 'datetime' column or DatetimeIndex") + + # Ensure solar_kwh column exists + if "da" not in da_df.columns: + raise ValueError("DA data must contain 'da' column") + + return da_df + + def _detect_outliers(self, merged_data: pd.DataFrame) -> pd.DataFrame: + """ + Comprehensive outlier detection for solar production data. + + Uses a three-method approach to identify and remove outliers: + + 1. **Statistical outliers**: Z-score > 3 (values more than 3 standard deviations from mean) + 2. **IQR outliers**: Values outside Q1 - 1.5*IQR or Q3 + 1.5*IQR range + 3. **Physics-based outliers**: Values exceeding theoretical maximum production by hour + + A data point is flagged as an outlier only if detected by 2+ methods, + reducing false positives while catching genuine anomalies. + + Additionally applies seasonal context outlier detection based on the + correlation between solar production and irradiance within season-hour groups. + + Args: + merged_data: Merged weather and solar data + + Returns: + Clean data with outliers removed + """ + logging.info("Detecting outliers...") + original_size = len(merged_data) + + # 1. Context-aware outlier detection by hour + outlier_mask = pd.Series(False, index=merged_data.index) + + for hour in range(24): + hour_data = merged_data[merged_data["hour"] == hour] + if len(hour_data) < 10: + continue + + solar_values = hour_data["solar_kwh"] + + # Statistical outliers (Z-score > 3) + z_scores = np.abs(stats.zscore(solar_values)) + statistical_outliers = z_scores > 3 + + # IQR method + Q1 = solar_values.quantile(0.25) + Q3 = solar_values.quantile(0.75) + IQR = Q3 - Q1 + iqr_outliers = (solar_values < (Q1 - 1.5 * IQR)) | ( + solar_values > (Q3 + 1.5 * IQR) + ) + + # Physics-based constraints: Maximum reasonable solar production by hour + # These values represent theoretical upper bounds for a typical residential + # solar installation (4-6kW system) under ideal conditions. + # + # Logic: + # - Night hours (20-05): Virtually no production (0.1 kWh max for measurement noise) + # - Dawn/Dusk (6, 19): Low production as sun is at low angles + # - Morning ramp (7-9): Increasing production as sun rises + # - Peak hours (10-14): Maximum production when sun is highest + # - Afternoon decline (15-18): Decreasing as sun sets + # + # Note: These are conservative estimates and may need adjustment for: + # - Larger installations (scale proportionally) + # - Different latitudes (seasonal variation) + # - Local climate conditions + + # Use configurable physics-based constraints + physics_outliers = solar_values > self.max_hourly_production.get(hour, 5.5) + + # Combine methods (outlier if flagged by 2+ methods) + combined_outliers = ( + statistical_outliers.astype(int) + + iqr_outliers.astype(int) + + physics_outliers.astype(int) + ) >= 2 + + outlier_mask.loc[hour_data.index] = combined_outliers + + # 2. Seasonal context outlier detection + seasonal_outlier_mask = pd.Series(False, index=merged_data.index) + clean_data = merged_data[~outlier_mask] + + for season in clean_data["season"].unique(): + for hour in range(6, 20): # Daylight hours only + mask = (clean_data["season"] == season) & (clean_data["hour"] == hour) + season_hour_data = clean_data[mask] + + if len(season_hour_data) < 20: + continue + + if season_hour_data["irradiance"].std() > 0: + irradiance_corr = season_hour_data["solar_kwh"].corr( + season_hour_data["irradiance"] + ) + + if irradiance_corr > 0.5: + # Use direct irradiance vs solar production ratio for outlier detection + irradiance_ratio = season_hour_data["solar_kwh"] / ( + season_hour_data["irradiance"] + 1e-6 + ) + Q1 = irradiance_ratio.quantile(0.25) + Q3 = irradiance_ratio.quantile(0.75) + IQR = Q3 - Q1 + ratio_outliers = (irradiance_ratio < (Q1 - 2.0 * IQR)) | ( + irradiance_ratio > (Q3 + 2.0 * IQR) + ) + seasonal_outlier_mask.loc[season_hour_data.index] = ( + ratio_outliers + ) + + # Apply outlier removal + final_clean_data = clean_data[~seasonal_outlier_mask] + + outliers_removed = original_size - len(final_clean_data) + if outliers_removed > 0: + logging.info( + f"Outliers removed: {outliers_removed} " + f"({outliers_removed / original_size * 100:.1f}%)" + ) + if self.log_level >= logging.DEBUG: + outliers = merged_data[~merged_data.isin(final_clean_data).all(axis=1)] + logging.debug(f"Detectted outliers:\n{outliers.to_string()}") + return final_clean_data + + def train_model( + self, + feature_data: pd.DataFrame | None, + da_data: pd.DataFrame, + test_size: float = 0.0, + remove_outliers: bool = False, + tune_hyperparameters: bool = True, + ) -> Dict[str, Any]: + """ + Train the solar prediction model. + + Args: + feature_data: DataFrame with columns: + ['datetime', sev types] + da_data: DataFrame with columns: + ['datetime', 'da'] + test_size: Fraction of data to use for testing + remove_outliers: Whether to apply outlier detection + tune_hyperparameters: Whether to perform hyperparameter tuning + + Returns: + Dictionary with training statistics + """ + logging.info(f"Starting da prediction model da-prices training...") + + # Load and process data + logging.info("Loading and processing data...") + ned_nl_df = self._load_and_process_ned_nl_data(feature_data) + da_df = self._load_and_process_da_data(da_data) + + # Merge datasets + logging.info("Merging ned_nl and da data...") + # Align timezone information + start_ts = ned_nl_df.index[0].timestamp() + if ned_nl_df.index.tz is None: + ned_nl_df.index = ned_nl_df.index.tz_localize("UTC", ambiguous="NaT") + if da_df.index.tz is None: + da_df.index = da_df.index.tz_localize("UTC", ambiguous="NaT") + + ned_nl_df = ned_nl_df.dropna() + da_df = da_df.dropna() + + # historic weighting + def weight(val): + wf = (val.timestamp() - start_ts) / (30 * 24 * 60) + return 1 # wf + + ned_nl_df["weight"] = ned_nl_df.index.to_series().apply(weight) + + merged_data = ned_nl_df.join(da_df, how="inner") + merged_data = merged_data.dropna() + + logging.info(f"Merged dataset: {len(merged_data)} records") + logging.info( + f"Date range: {merged_data.index.min()} to {merged_data.index.max()}" + ) + + # Outlier detection + if remove_outliers: + merged_data = self._detect_outliers(merged_data) + logging.info(f"Clean dataset: {len(merged_data)} records") + + # Prepare features and target + X = merged_data[self.feature_columns].copy() + y = merged_data["da"].copy() + X["weight"] = X.index.to_series().apply(weight) + + # Remove any remaining NaN values + mask = ~(X.isnull().any(axis=1) | y.isnull()) + X = X[mask] + y = y[mask] + + split_idx = int((1 - test_size) * len(X)) + if test_size != 0.0: + X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:] + y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:] + else: + X_train = X + y_train = y + X_test = X + y_test = y + + weight_factors = X_train["weight"] + X_train.drop("weight", axis=1, inplace=True) + + logging.debug(f"Training samples: {len(X_train)}") + logging.debug(f"Testing samples: {len(X_test)}") + + tune_hyperparameters = ( + self.config.get( + ["xgboost", "tune_hyperparameters"], None, f"{tune_hyperparameters}" + ).lower() + == "true" + ) + + logging.info(f"Tune hyperparameters: {tune_hyperparameters}") + # Model training + if tune_hyperparameters: + logging.info("Tuning hyperparameters...") + param_grid = { + "n_estimators": [100, 200, 300], + "max_depth": [3, 4, 6], + "learning_rate": [0.05, 0.1, 0.15], + "subsample": [0.8, 0.9], + } + param_grid = self.config.get(["xgboost", "param_grid"], None, param_grid) + logging.info(f"Parameter grid: {param_grid}") + + # Use subset for faster grid search + subset_size = min(5000, len(X_train)) + X_train_subset = X_train.iloc[:subset_size] + y_train_subset = y_train.iloc[:subset_size] + wf_train_subset = weight_factors.iloc[:subset_size] + + grid_search = GridSearchCV( + estimator=XGBRegressor( + random_state=self.random_state, + objective="reg:squarederror", + weight_factors=wf_train_subset, + ), + param_grid=param_grid, + cv=3, + scoring="neg_mean_absolute_error", + n_jobs=-1, + ) + + grid_search.fit(X_train_subset, y_train_subset) + best_params = grid_search.best_params_ + logging.info(f"Best parameters: {best_params}") + else: + # Use default parameters + best_params = { + "n_estimators": 200, + "max_depth": 6, + "learning_rate": 0.1, + "subsample": 0.8, + } + best_params = self.config.get(["xgboost", "parameters"], None, best_params) + + # Train final model + logging.info("Training final model...") + logging.info(f"Parameters: {best_params}") + self.model = XGBRegressor( + **best_params, + random_state=self.random_state, + objective="reg:squarederror", + ) + self.model.fit(X_train, y_train, sample_weight=weight_factors) + + # Evaluate model + y_train_pred = self.model.predict(X_train) + y_test_pred = self.model.predict(X_test) + + # Calculate metrics + train_mae = mean_absolute_error(y_train, y_train_pred) + test_mae = mean_absolute_error(y_test, y_test_pred) + train_r2 = r2_score(y_train, y_train_pred) + test_r2 = r2_score(y_test, y_test_pred) + train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred)) + test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred)) + + # Store training statistics + self.training_stats = { + "train_mae": train_mae, + "test_mae": test_mae, + "train_r2": train_r2, + "test_r2": test_r2, + "train_rmse": train_rmse, + "test_rmse": test_rmse, + "training_samples": len(X_train), + "testing_samples": len(X_test), + "feature_importance": dict( + zip(self.feature_columns, self.model.feature_importances_) + ), + "mean_target": y.mean(), + "std_target": y.std(), + "best_params": best_params if tune_hyperparameters else "default", + } + + # Save model + os.makedirs( + os.path.dirname(self.model_save_path) + if os.path.dirname(self.model_save_path) + else ".", + exist_ok=True, + ) + joblib.dump(self.model, self.model_save_path) + self.is_trained = True + + logging.info(f"Model training van da-prediction complete") + logging.info(f"Model saved to: {self.model_save_path}") + logging.info(f"Training MAE: {train_mae:.4f} eur/kWh") + logging.info(f"Testing MAE: {test_mae:.4f} eur/kWh") + logging.info(f"Training R²: {train_r2:.4f}") + logging.info(f"Testing R²: {test_r2:.4f}") + logging.info("Sorted features:") + importance = self.training_stats["feature_importance"] + sorted_features = sorted(importance.items(), key=lambda x: x[1], reverse=True) + for i, (feature, score) in enumerate(sorted_features): + logging.info(f" {i + 1}. {feature}: {score:.3f}") + return self.training_stats + + def predict( + self, ned_nl_data: Union[Dict[str, float], pd.DataFrame] + ) -> Union[float, np.ndarray]: + """ + Make predictions using the trained model. + + Args: + ned_nl_data: Either a dictionary with single prediction data or DataFrame with multiple predictions + For single prediction (dict), required keys: temperature, irradiance, datetime + For batch prediction (DataFrame), required columns: temperature, irradiance, datetime + + Returns: + Predicted solar production in kWh + """ + if not self.is_trained or self.model is None: + raise ValueError( + "Model must be trained before making predictions. Call train() first." + ) + + if isinstance(ned_nl_data, dict): + # Single prediction - convert to DataFrame and process + if "datetime" not in ned_nl_data: + raise ValueError("Single prediction requires 'datetime' key") + + # Create single-row DataFrame + single_df = pd.DataFrame([ned_nl_data]) + single_df["datetime"] = pd.to_datetime(single_df["datetime"]) + single_df = single_df.set_index("datetime") + + # Process through feature engineering + processed_df = self._create_features(single_df) + + # Extract features and make prediction + features = processed_df[self.feature_columns].iloc[0:1] + prediction = self.model.predict(features)[0] + return max(0, prediction) # Ensure non-negative + + else: + # Multiple predictions + if not isinstance(ned_nl_data, pd.DataFrame): + raise ValueError("ned_nl_data must be a dictionary or pandas DataFrame") + + # Process weather data using the standard method + ned_nl_data = self._load_and_process_ned_nl_data(ned_nl_data) + + # Select required features + featured_df = ned_nl_data[self.feature_columns] + if len(featured_df) == 0: + prediction = [] + else: + prediction = self.model.predict(featured_df) + prediction = np.maximum(0, prediction) # Ensure non-negative + result = pd.DataFrame( + {"date_time": featured_df.index, "prediction": prediction} + ) + result.set_index("date_time", inplace=True, drop=False) + return result + + def load_model(self, model_path: str): # -> 'SolarPredictor': + """ + Load a trained model from disk. + + Args: + model_path: Path to the saved model file + + Returns: + None # SolarPredictor instance with loaded model + """ + if not os.path.exists(model_path): + raise FileNotFoundError(f"Model file not found: {model_path}") + + # predictor = cls() + self.model = joblib.load(model_path) + self.is_trained = True + + return None + + def get_feature_importance(self) -> Dict[str, float]: + """ + Get feature importance from the trained model. + + Returns: + Dictionary mapping feature names to importance scores + """ + if not self.is_trained or self.model is None: + raise ValueError("Model must be trained before getting feature importance") + + return dict(zip(self.feature_columns, self.model.feature_importances_)) + + def get_da_data( + self, classification: int, start: dt.datetime, end: dt.datetime + ) -> pd.DataFrame: + """ + haalt da_data op uit DAO database + :param start: begindatum + :param entities: list van sensoren van ha + :return: + """ + + if classification == CLASSIFICATION_FORECAST: + table_name = "prognoses" + else: + table_name = "values" + result_df = self.db_da.get_column_data( + table_name, "da", start, end, agg_func="avg" + ) + result_df["utc"] = pd.to_datetime(result_df["utc"], unit="s", utc=True) + result_df = result_df.set_index(result_df["utc"]) + result_df = result_df.rename(columns={"utc": "datetime", "value": "da"}) + result_df.drop(["uur", "time", "datasoort"], axis=1, inplace=True) + return result_df + + def calc_netto_fossile(self, ned_nl_df: pd.DataFrame) -> pd.DataFrame: + result = ned_nl_df.copy() + result["fossile"] = ( + ned_nl_df["cons"] + - ned_nl_df["prod_wind"] + - ned_nl_df["prod_zeewind"] + - ned_nl_df["prod_zon"] + ) + return result + + def run_train(self): + """ + traint alle gedefinieerde ml-objecten + :param start: optionele begindatum om te trainen, anders een jaar geleden + :return: + """ + classification = CLASSIFICATION_CURRENT + now = dt.datetime.today() + + end = now + start = end - dt.timedelta(days=180) + """ + start = now + end = now + dt.timedelta(days=6) + + start_date = start.strftime("%Y-%m-%d") + end_date = end.strftime("%Y-%m-%d") + + if start is None: + start = now + if end is None: + end = now + dt.timedelta(hours=self.forecast_hours) + else: + if end is None: + + if start is None: + start = end - dt.timedelta(days=365) + start_date = start.strftime("%Y-%m-%d") + end_date = end.strftime("%Y-%m-%d") + """ + + ned_nl_df = self.get_ned_nl_data( + classification=classification, + start=start, + end=end, + ) + ned_nl_df = self.calc_netto_fossile(ned_nl_df) + + da_df = self.get_da_data( + classification, + start=start, + end=end, + ) + self.train_model(ned_nl_df, da_df) + + def predict_da_price(self, start: dt.datetime, end: dt.datetime) -> pd.DataFrame: + """ + berekent de voorspelling voor een pv-installatie + :param start: start-tijdstip voorspelling + :param end: eind-tijdstip voorspelling + :return: dataframe met berekende voorspellingen per uur + """ + + if os.path.isfile(self.model_save_path): + self.load_model(model_path=self.model_save_path) + else: + raise FileNotFoundError( + f"Er is geen model aanwezig voor {self.solar_name},svp eerst trainen." + ) + # latest_dt = self.db_da.get_time_border_record("gr", latest=True, table_name="prognoses") + # prognose = latest_dt < end + ned_nl_data = self.get_ned_nl_data( + classification=CLASSIFICATION_FORECAST, start=start, end=end + ) + ned_nl_data = self.calc_netto_fossile(ned_nl_data) + + prediction = self.predict(ned_nl_data) + # prediction["datetime"] = prediction["date_time"].apply(lambda x: x - dt.timedelta(seconds=1)) + # prediction["datetime"] = prediction["datetime"].tz_localize(GRANULARITY_TIMEZONE_CET) + ned_nl_data["da_prediction"] = prediction["prediction"] + prediction_df = ned_nl_data + da_data = self.get_da_data( + classification=CLASSIFICATION_CURRENT, start=start, end=end + ) + prediction_df["da_epex"] = da_data["da"] + logging.debug(f"ML prediction: \n{prediction_df.to_string()}") + logging.info(prediction) + return prediction, prediction_df + + def show_prediction(self, start, end): + prediction, result_df = self.predict_da_price(start, end) + from dao.lib.da_graph import GraphBuilder + + result_df["time"] = pd.to_datetime(result_df.index).tz_convert( + tz="Europe/Amsterdam" + ) + result_df.reset_index(drop=True, inplace=True) + uur = [] + year = 0 + for row in result_df.itertuples(): + moment = row.time + if moment.hour == 0: + if moment.year != year: + uur.append(moment.strftime("%Y-%m-%d %H")) + year = moment.year + month = moment.month + else: + if moment.month != month: + uur.append(moment.strftime("%Y-%m-%d %H")) + month = moment.month + else: + uur.append(moment.strftime("%Y-%m-%d %H")) + elif moment.hour % 6 == 0: + uur.append(moment.hour) + else: + uur.append(None) + result_df["uur"] = uur + style = self.config.get(["graphics", "style"], None, "") + graph_options = { + "title": f"Prognose day_ahead prijzen vanaf {start.strftime('%Y-%m-%d %H:%M')}", + "style": style, + "haxis": {"values": "uur", "title": "uren"}, + "graphs": [ + { + "vaxis": [{"title": "MWh"}], + "series": [ + { + "column": "cons", + "name": "Verbruik", + "type": "bar", + "color": "yellow", + "width": 1, + }, + { + "column": "prod_wind", + "name": "Wind op land", + "type": "stacked", + "color": "#00bfff", + "width": 1, + }, + { + "column": "prod_zeewind", + "name": "Wind op zee", + "type": "stacked", + "color": "blue", + "width": 1, + }, + { + "column": "prod_zon", + "name": "Zon", + "type": "stacked", + "color": "orange", + "width": 1, + }, + ], + }, + { + "vaxis": [{"title": "eur/kwh", "format": "%.2f"}], + "series": [ + { + "column": "da_prediction", + "name": "DA voorspelling", + "type": "step", + "color": "purple", + }, + { + "column": "da_epex", + "name": "DA epex", + "type": "step", + "color": "green", + }, + ], + }, + ], + } + g_builder = GraphBuilder() + plot = g_builder.build(result_df, graph_options) + now = dt.datetime.now() + plot.savefig( + f"../data/images/da_prediction_{now.strftime('%Y-%m-%d %H:%M')}.png" + ) + return plot + + +def main(): + if len(sys.argv) > 1: + arg = sys.argv[1] + else: + arg = None + if len(sys.argv) > 2: + arg2 = sys.argv[2] + start_dt = dt.datetime.strptime(arg2, "%Y-%m-%d") + else: + start_dt = dt.date.today() + if len(sys.argv) > 3: + arg3 = sys.argv[3] + end_dt = dt.datetime.strptime(arg3, "%Y-%m-%d") + else: + end_dt = start_dt + dt.timedelta(days=7) + da_predictor = DAPredictor(file_name="options_dap.json") + if arg.lower() == "train": + da_predictor.run_train() + if arg.lower() == "predict": + da_predictor.show_prediction( + start=start_dt, + end=end_dt, + ) + if arg.lower() == "import": + # da_predictor.import_ned_nl_files() + da_predictor.import_gas_prijzen() + if arg.lower() == "update": + da_predictor.update_data(classification=CLASSIFICATION_CURRENT) + da_predictor.update_data(classification=CLASSIFICATION_FORECAST) + da_predictor.updata_gasprices() + da_predictor.update_prices( + dt.datetime(start_dt.year, start_dt.month, start_dt.day) + + dt.timedelta(days=1) + ) + da_predictor.run_train() + da_predictor.show_prediction(start=start_dt, end=end_dt) + if arg.lower() == "show": + da_predictor.show_prediction(start=start_dt, end=end_dt) + if arg.lower() == "prices": + da_predictor.update_prices(start_dt) + + +if __name__ == "__main__": + main() diff --git a/dao/pred/options_dap.json b/dao/pred/options_dap.json new file mode 100644 index 00000000..be9776cf --- /dev/null +++ b/dao/pred/options_dap.json @@ -0,0 +1,20 @@ +{ + "time_zone": "Europe/Amsterdam", + "database_dap": { + "engine": "sqlite", + "db_path": "../data", + "database": "day_ahead_pred.db" + }, + "tibber": { + "api_token": "!secret tibber_api_token" + }, + "prices": { + "//source day ahead": "entsoe", + "source day ahead": "nordpool", + "\\source day ahead": "tibber", + "entsoe-api-key": "!secret entsoe-api-key" + }, + "graphics": { + "style": "Solarize_Light2" + } +} \ No newline at end of file diff --git a/dao/prog/check_db.py b/dao/prog/check_db.py index 060af95e..5b98e432 100644 --- a/dao/prog/check_db.py +++ b/dao/prog/check_db.py @@ -24,7 +24,7 @@ # from da_base import DaBase # sys.path.append("../") -from da_config import Config +from dao.lib.da_config import Config from version import __version__ from utils import version_number diff --git a/dao/prog/da_base.py b/dao/prog/da_base.py index 9362bbb8..a04197bc 100644 --- a/dao/prog/da_base.py +++ b/dao/prog/da_base.py @@ -17,9 +17,9 @@ # from dao.prog.solar_predictor import SolarPredictor from dao.prog.utils import get_tibber_data, error_handling from dao.prog.version import __version__ -from dao.prog.da_config import Config -from dao.prog.da_meteo import Meteo -from dao.prog.da_prices import DaPrices +from dao.lib.da_config import Config +from dao.lib.da_meteo import Meteo +from dao.lib.da_prices import DaPrices from dao.prog.utils import interpolate # from db_manager import DBmanagerObj diff --git a/dao/prog/da_report.py b/dao/prog/da_report.py index d99ec552..c2565836 100644 --- a/dao/prog/da_report.py +++ b/dao/prog/da_report.py @@ -9,8 +9,8 @@ from dateutil.relativedelta import relativedelta from pandas.core.dtypes.inference import is_number -from dao.prog.da_config import Config -from dao.prog.da_graph import GraphBuilder +from dao.lib.da_config import Config +from dao.lib.da_graph import GraphBuilder from dao.prog.da_base import DaBase from dao.prog.utils import get_value_from_dict import math @@ -1091,7 +1091,7 @@ def get_sensor_data( # Print the raw DataFrame # fill 0 for missing records - if agg == "uur": + if (agg == "uur") and (sensor_type == "quantity"): columns = list(df_raw.columns.values) df_insert = pd.DataFrame(columns=columns) """ @@ -1104,7 +1104,7 @@ def get_sensor_data( row = None border = self.get_time_border_ha_record(sensor, latest=False) - border = max(border, vanaf) + border = vanaf if border is None else max(border, vanaf) prev_time = pd.to_datetime(border - datetime.timedelta(hours=1)) for row in df_raw.itertuples(): new_tijd = prev_time + datetime.timedelta(hours=1) @@ -3147,7 +3147,6 @@ def calc_solar_data(self, device: dict, day: datetime.date, active_view: str): result["prognose_dao"] = pred_dao # voorspelling ML - from dao.prog.solar_predictor import SolarPredictor # solar_predictor = SolarPredictor() # solar_prog = solar_predictor.predict_solar_device(device, start, end) diff --git a/dao/prog/day_ahead.py b/dao/prog/day_ahead.py index 3be09496..aebc728c 100644 --- a/dao/prog/day_ahead.py +++ b/dao/prog/day_ahead.py @@ -625,13 +625,20 @@ def calc_optimum( penalty_low_soc.append(penalty) if _start_soc is None: - start_soc_str = self.get_state( - self.battery_options[b]["entity actual level"] - ).state - if start_soc_str.lower() == "unavailable": + try: + start_soc_str = "" + start_soc_str = self.get_state( + self.battery_options[b]["entity actual level"] + ).state + start_soc_num = float(start_soc_str) + start_soc.append(start_soc_num) + except Exception as ex: + logging.warning(f"{ex} :" + f"No actual level info recieved from " + f"{self.battery_options[b]["entity actual level"]}, " + f"but recieved '{start_soc_str}', " + f"assumed 50%") start_soc.append(50) - else: - start_soc.append(float(start_soc_str)) else: start_soc.append(_start_soc) logging.info( @@ -2261,6 +2268,7 @@ def calc_optimum( f"Warmtepomp met power-regeling/stooklijnverschuiving wordt ingepland." ) hp_stages = self.heating_options["stages"] + hp_stages = sorted(hp_stages, key=lambda d: d['max_power']) if hp_stages[0]["max_power"] != 0.0: hp_stages = [ {"max_power": 0, "cop": 8}, @@ -2634,7 +2642,7 @@ def calc_optimum( planned_start_dt = dt.datetime.strptime( planned_start_str, "%Y-%m-%d %H:%M:%S" ) - if ma_entity_plan_end is not None: + if ma_entity_plan_end[m] is not None: planned_end_str = self.get_state(ma_entity_plan_end[m]).state planned_end_dt = dt.datetime.strptime( planned_end_str, "%Y-%m-%d %H:%M:%S" @@ -4223,7 +4231,7 @@ def calc_optimum( # grafiek 1 import numpy as np - from dao.prog.da_graph import GraphBuilder + from dao.lib.da_graph import GraphBuilder gr1_df = pd.DataFrame() gr1_df["index"] = np.arange(U) diff --git a/dao/prog/utils.py b/dao/prog/utils.py index 1d45af9d..560b4a97 100644 --- a/dao/prog/utils.py +++ b/dao/prog/utils.py @@ -1,5 +1,3 @@ -from calendar import month - from dateutil import easter import datetime import bisect @@ -94,8 +92,7 @@ def convert_timestr(time_str: str, now_dt: datetime.datetime) -> datetime.dateti def get_tibber_data(): - from da_config import Config - from db_manager import DBmanagerObj + from dao.lib.da_config import Config def get_datetime_from_str(s): # "2022-09-01T01:00:00.000+02:00" @@ -409,8 +406,7 @@ def tst_interpolate(): def interpolate_prognose_data(): - from da_config import Config - from db_manager import DBmanagerObj + from dao.lib.da_config import Config config = Config("../data/options.json") db_da = config.get_db_da() diff --git a/dao/prog/utils2.py b/dao/prog/utils2.py index 1915b957..15a10a26 100644 --- a/dao/prog/utils2.py +++ b/dao/prog/utils2.py @@ -1,4 +1,4 @@ -from dao.prog.da_config import Config +from dao.lib.da_config import Config import datetime diff --git a/dao/webserver/app/__init__.py b/dao/webserver/app/__init__.py index c25ffec6..d1dc0fca 100644 --- a/dao/webserver/app/__init__.py +++ b/dao/webserver/app/__init__.py @@ -1,6 +1,4 @@ from flask import Flask -import sys -from dao.prog.da_config import Config # sys.path.append("../") diff --git a/dao/webserver/app/routes.py b/dao/webserver/app/routes.py index a8d1c3ae..afec64e6 100644 --- a/dao/webserver/app/routes.py +++ b/dao/webserver/app/routes.py @@ -1,6 +1,5 @@ import collections import datetime -import time # from sqlalchemy.sql.coercions import expect_col_expression_collection @@ -11,7 +10,7 @@ from subprocess import PIPE, run import logging from logging.handlers import TimedRotatingFileHandler -from dao.prog.da_config import Config +from dao.lib.da_config import Config from dao.prog.da_report import Report from dao.prog.version import __version__ diff --git a/dao/webserver/gunicorn_config.py b/dao/webserver/gunicorn_config.py index 1f12e361..3807b8fa 100644 --- a/dao/webserver/gunicorn_config.py +++ b/dao/webserver/gunicorn_config.py @@ -1,7 +1,7 @@ import sys sys.path.append("../../") -from dao.prog.da_config import get_config +from dao.lib.da_config import get_config app_datapath = "app/static/data/" port = get_config(app_datapath + "options.json", ["dashboard", "port"], 5000) diff --git a/release-testing/CHANGELOG.md b/release-testing/CHANGELOG.md index 2a109750..1554daf2 100644 --- a/release-testing/CHANGELOG.md +++ b/release-testing/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog 刀 DAO # Day Ahead Optimizer +# 2026.03.0.rc6 +- Fixed error when checking runtime hours heatpump and there no data (reported by @rescla) +- Fixed error and better warning when no data for actual soc level battery (reported by @tonvanboven) +- Fixed error when optional "entity calculated end" (machine) is not defined (reported by @Xelaph) +- Fixed error when hp-stages are not sorted ascending max_power (reported by @Mvdw) +- Fix error reduced hours with interval = 15min +- Update several python modules + # 2026.03.0.rc5 ### New feature: - Add Fast Forward and Fast Reverse to web interface Home page (PR from @tomvandepoel3) diff --git a/release-testing/config.yaml b/release-testing/config.yaml index 184417e2..945cca73 100644 --- a/release-testing/config.yaml +++ b/release-testing/config.yaml @@ -1,6 +1,6 @@ --- name: 刀 Day Ahead Optimizer (TESTING) -version: 2026.03.0.rc5 +version: 2026.03.0.rc6 stage: experimental slug: day_ahead_opt-testing description: Beta version of DAO. Use only for testing!