Add FlexMeasures plugins, USEF protocol, and Cariflex simulator

- flexmeasures-entsoe: ENTSO-E data plugin
- flexmeasures-weather: Weather data plugin
- USEF Flex Trading Protocol PDF (2.4MB)
- Cariflex simulator (publishes to Redis)
- Dashboard Grafana updated with correct InfluxDB queries
- All tools extracted in /tools/
This commit is contained in:
Eric F
2026-06-08 07:38:57 -04:00
parent 3fb90a8033
commit d4974e3241
72 changed files with 5185 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
import os
import sys
from flask import Blueprint
HERE = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, HERE)
DEFAULT_COUNTRY_CODE = "NL"
DEFAULT_COUNTRY_TIMEZONE = "Europe/Amsterdam" # This is what we receive, even if ENTSO-E documents Europe/Brussels
DEFAULT_DATA_SOURCE_NAME = "ENTSO-E"
DEFAULT_DERIVED_DATA_SOURCE = "FlexMeasures ENTSO-E"
__version__ = "0.9"
__settings__ = {
"ENTSOE_AUTH_TOKEN": dict(
description="You can generate this token after you made an account at ENTSO-E.",
level="error",
),
"ENTSOE_COUNTRY_CODE": dict(
level="warning",
message_if_missing=f"'{DEFAULT_COUNTRY_CODE}' will be used as a default.",
),
"ENTSOE_COUNTRY_TIMEZONE": dict(
description="IANA timezone name used to localize ENTSO-E sensors.",
level="info",
message_if_missing=f"'{DEFAULT_COUNTRY_TIMEZONE}' will be used as a default.",
),
"ENTSOE_USE_TEST_SERVER": dict(
description="Boolean to indicate whether to use the ENTSO-E's iop test server instead of their production server",
level="debug",
),
"ENTSOE_AUTH_TOKEN_TEST_SERVER": dict(
description="You can generate this token after you made an account at ENTSO-E.",
level="debug",
),
"ENTSOE_DERIVED_DATA_SOURCE": dict(
description="String used to name the source of data that this plugin derives from ENTSO-E data, like a CO₂ signal.",
level="info",
message_if_missing=f"'{DEFAULT_DERIVED_DATA_SOURCE}' will be used as a default.",
),
"ENTSOE_DATA_SOURCE_NAME": dict(
description="String used to name the ENTSO-E data source and the account associated with it.",
level="info",
message_if_missing=f"'{DEFAULT_DATA_SOURCE_NAME}' will be used as a default.",
),
}
entsoe_data_bp = Blueprint("entsoe", __name__, cli_group="entsoe")
entsoe_data_bp.cli.help = "ENTSO-E Data commands"
from .generation import day_ahead as day_ahead_generation # noqa: E402,F401
from .prices import day_ahead as day_ahead_prices # noqa: E402,F401

View File

@@ -0,0 +1,10 @@
from datetime import timedelta
# sensor_name, unit, event_resolution, data sourced directly by ENTSO-E or not (i.e. derived)
generation_sensors = (
("Scheduled generation", "MW", timedelta(minutes=15), True),
("Solar", "MW", timedelta(hours=1), True),
("Wind Onshore", "MW", timedelta(hours=1), True),
("Wind Offshore", "MW", timedelta(hours=1), True),
("CO₂ intensity", "kg/MWh", timedelta(minutes=15), False),
)

View File

@@ -0,0 +1,214 @@
from typing import Optional
from datetime import datetime
import click
from flask.cli import with_appcontext
from flask import current_app
# from entsoe.entsoe import URL
import pandas as pd
from flexmeasures.data.transactional import task_with_status_report
from .. import (
entsoe_data_bp,
) # noqa: E402
from . import generation_sensors
from ..utils import (
create_entsoe_client,
ensure_country_code_and_timezone,
ensure_data_source,
ensure_data_source_for_derived_data,
abort_if_data_empty,
parse_from_and_to_dates,
save_entsoe_series,
ensure_sensors,
resample_if_needed,
start_import_log,
)
"""
Get the CO₂ content from tomorrow's generation forecasts.
We get the overall forecast and the solar&wind forecast, so we know the share of green energy.
For now, we'll compute the CO₂ mix from some assumptions.
"""
# TODO: Decide which sources to use ― https://github.com/SeitaBV/flexmeasures-entsoe/issues/2
# Source for these ratios: https://ourworldindata.org/energy/country/netherlands#what-sources-does-the-country-get-its-electricity-from (2020 data)
grey_energy_mix = dict(gas=0.598, oil=0.045, coal=0.0718)
# Source for kg CO₂ per MWh: https://energy.utexas.edu/news/nuclear-and-wind-power-estimated-have-lowest-levelized-co2-emissions
kg_CO2_per_MWh = dict(
coal=870, # lignite
gas=464, # natural
solar=44.5, # mix of utility/residential, difference isn't large
oil=652, # ca. 75% of coal, see https://www.volker-quaschning.de/datserv/CO2-spez/index_e.php
wind_onshore=14,
wind_offshore=17, # factor of ~ 1.1, see https://www.mdpi.com/2071-1050/10/6/2022
)
@entsoe_data_bp.cli.command("import-day-ahead-generation")
@click.option(
"--from-date",
required=False,
type=click.DateTime(["%Y-%m-%d"]),
help="Query data from this date onwards. If not specified, defaults to today",
)
@click.option(
"--to-date",
required=False,
type=click.DateTime(["%Y-%m-%d"]),
help="Query data until this date (inclusive). If not specified, defaults to tomorrow.",
)
@click.option(
"--dryrun/--no-dryrun",
default=False,
help="In dry run mode, do not save the data to the db.",
)
@click.option(
"--country",
"country_code",
required=False,
help="ENTSO-E country code (such as BE, DE, FR or NL).",
)
@click.option(
"--timezone",
"country_timezone",
required=False,
help="Timezone for the country (such as 'Europe/Amsterdam').",
)
@click.option(
"--for",
"default_import_timerange",
required=False,
default="today-and-tomorrow",
type=click.Choice(["today", "tomorrow", "today-and-tomorrow"]),
help="Easy-to-use time range setting, only used if --from-date and --to-date are not used. If set to 'today' or 'tomorrow' or 'today-and-tomorrow', only import data for thes days. The default is today-and-tomorrow.",
)
@with_appcontext
@task_with_status_report("entsoe-import-day-ahead-generation")
def import_day_ahead_generation(
dryrun: bool = False,
from_date: Optional[datetime] = None,
to_date: Optional[datetime] = None,
country_code: Optional[str] = None,
country_timezone: Optional[str] = None,
default_import_timerange: str = "today-and-tomorrow",
):
"""
Import forecasted generation for any date range, defaulting to today and tomorrow.
This will save overall generation, solar, offshore and onshore wind, and the estimated CO₂ content per hour.
Possibly best to run this script somewhere around or maybe two or three hours after 13:00,
when tomorrow's prices are announced.
"""
# Set up FlexMeasures data structure
country_code, country_timezone = ensure_country_code_and_timezone(
country_code, country_timezone
)
entsoe_data_source = ensure_data_source()
derived_data_source = ensure_data_source_for_derived_data()
sensors = ensure_sensors(generation_sensors, country_code, country_timezone)
# Parse CLI options (or set defaults)
from_time, until_time = parse_from_and_to_dates(
from_date, to_date, country_timezone, default_to=default_import_timerange
)
# Start import
client = create_entsoe_client()
log, now = start_import_log(
"day-ahead generation", from_time, until_time, country_code, country_timezone
)
log.info("Getting scheduled generation ...")
# We assume that the green (solar & wind) generation is not included in this (it is not scheduled)
scheduled_generation: pd.Series = client.query_generation_forecast(
country_code, start=from_time, end=until_time
)
abort_if_data_empty(scheduled_generation)
log.debug("Overall aggregated generation: \n%s" % scheduled_generation)
scheduled_generation = resample_if_needed(
scheduled_generation,
sensors["Scheduled generation"],
)
log.info("Getting green generation ...")
green_generation_df: pd.DataFrame = client.query_wind_and_solar_forecast(
country_code, start=from_time, end=until_time, psr_type=None
)
abort_if_data_empty(green_generation_df)
log.debug("Green generation: \n%s" % green_generation_df)
log.info("Aggregating green energy columns ...")
all_green_generation = green_generation_df.sum(axis="columns")
log.debug("Aggregated green generation: \n%s" % all_green_generation)
log.info("Computing combined generation forecast ...")
all_generation = scheduled_generation + all_green_generation
log.debug("Combined generation: \n%s" % all_generation)
log.info("Computing CO₂ content from the MWh values ...")
co2_in_kg = calculate_CO2_content_in_kg(scheduled_generation, green_generation_df)
log.debug("Overall CO₂ content (kg): \n%s" % co2_in_kg)
forecasted_kg_CO2_per_MWh = co2_in_kg / all_generation
log.debug("Overall CO₂ content (kg/MWh): \n%s" % forecasted_kg_CO2_per_MWh)
def get_series_for_sensor(sensor):
if sensor.name == "Scheduled generation":
return scheduled_generation
elif sensor.name == "Solar":
return green_generation_df["Solar"]
elif sensor.name == "Wind Onshore":
return green_generation_df["Wind Onshore"]
elif sensor.name == "Wind Offshore":
return green_generation_df["Wind Offshore"]
elif sensor.name == "CO₂ intensity":
return forecasted_kg_CO2_per_MWh
else:
log.error(f"Cannot connect data to sensor {sensor.name}.")
raise click.Abort
if not dryrun:
for sensor in sensors.values():
series = get_series_for_sensor(sensor)
log.info(f"Saving {len(series)} beliefs for Sensor {sensor.name} ...")
entsoe_source = (
entsoe_data_source if sensor.data_by_entsoe else derived_data_source
)
save_entsoe_series(series, sensor, entsoe_source, country_timezone, now)
def calculate_CO2_content_in_kg(
grey_generation: pd.Series, green_generation: pd.DataFrame
) -> pd.Series:
grey_CO2_intensity_factor = ( # TODO: a factor per hour of the day
(grey_energy_mix["coal"] * kg_CO2_per_MWh["coal"])
+ (grey_energy_mix["gas"] * kg_CO2_per_MWh["gas"])
+ (grey_energy_mix["oil"] * kg_CO2_per_MWh["oil"])
)
current_app.logger.debug(f"Grey intensity factor: {grey_CO2_intensity_factor}")
grey_CO2_content = grey_generation * grey_CO2_intensity_factor
current_app.logger.debug("Grey CO₂ content (tonnes): \n%s" % grey_CO2_content)
green_generation["solar CO₂"] = (
green_generation["Solar"] * kg_CO2_per_MWh["solar"] / 1000.0
)
green_generation["wind_onshore CO₂"] = (
green_generation["Wind Onshore"] * kg_CO2_per_MWh["wind_onshore"]
)
green_generation["wind_offshore CO₂"] = (
green_generation["Wind Offshore"] * kg_CO2_per_MWh["wind_offshore"]
)
current_app.logger.debug(
"Green generation and CO₂ content: \n%s" % green_generation
)
return (
grey_CO2_content
+ green_generation["solar CO₂"]
+ green_generation["wind_onshore CO₂"]
+ green_generation["wind_offshore CO₂"]
)

View File

@@ -0,0 +1,57 @@
import pandas as pd
def determine_net_emission_factors(shares: pd.DataFrame) -> pd.Series:
"""Given production shares, determine the net emission factors.
Or given production by type, determine the net emissions.
Use column headers that match production types listed below.
Use any index.
For example:
print(shares)
fossil_gas other fossil_hard_coal waste nuclear
hour
0 0.443685 0.206033 0.237596 0.050915 0.059455
1 0.443910 0.205065 0.235022 0.052614 0.060987
print(determine_net_emission_factors(shares))
hour
0 644.753221
1 641.410093
Name: Average emissions from Dutch electricity production (kg CO₂ eq/MWh), dtype: float64
"""
emission_factors = dict(
biomass=50.4,
fossil_brown_coal_or_lignite=None, # unknown
fossil_coal_derived_gas=None, # unknown
fossil_gas=464,
fossil_hard_coal=1030,
fossil_oil=1010,
fossil_oil_shale=None, # unknown
fossil_peat=None, # unknown
geothermal=0.00664,
hydro_pumped_storage=611,
hydro_run_of_river_and_poundage=0.0253,
hydro_water_reservoir=8.13,
marine=None, # unknown
nuclear=10.1,
other=927, # for EU28
other_renewable=None, # unknown
solar=0.00591,
waste=None, # unknown
wind_offshore=0.133,
wind_onshore=0.133,
) # supplementary material from "Real-time carbon accounting method for the European electricity markets, Tranberg et al. (2019)"
# todo: substitute placeholder for unknown emission factor of waste
emission_factors["waste"] = emission_factors["biomass"]
for production_type in shares.columns:
shares[production_type] = (
shares[production_type] * emission_factors[production_type]
)
return shares.sum(axis=1).rename(
"Average emissions from Dutch electricity production (kg CO₂ eq/MWh)"
)

View File

@@ -0,0 +1,4 @@
from datetime import timedelta
# sensor_name, unit, even_resolution, data sourced directly by ENTSO-E or not (i.e. derived)
pricing_sensors = (("Day-ahead prices", "EUR/MWh", timedelta(minutes=15), True),)

View File

@@ -0,0 +1,155 @@
from typing import Optional
from datetime import datetime
import click
from flask.cli import with_appcontext
import pandas as pd
from flexmeasures import Source, Sensor
from flexmeasures.data.transactional import task_with_status_report
from flexmeasures.data.schemas import SensorIdField
from flexmeasures.data.schemas.sources import DataSourceIdField
from . import pricing_sensors
from .. import (
entsoe_data_bp,
) # noqa: E402
from ..utils import (
create_entsoe_client,
ensure_country_code_and_timezone,
ensure_data_source,
parse_from_and_to_dates,
ensure_sensors,
save_entsoe_series,
abort_if_data_empty,
abort_if_data_incomplete,
resample_if_needed,
start_import_log,
)
@entsoe_data_bp.cli.command("import-day-ahead-prices")
@click.option(
"--from-date",
required=False,
type=click.DateTime(["%Y-%m-%d"]),
help="Query data from this date onwards. If not specified, defaults to today",
)
@click.option(
"--to-date",
required=False,
type=click.DateTime(["%Y-%m-%d"]),
help="Query data until this date (inclusive). If not specified, defaults to tomorrow.",
)
@click.option(
"--dryrun/--no-dryrun",
default=False,
help="In dry run mode, do not save the data to the db.",
)
@click.option(
"--country",
"country_code",
required=False,
help="ENTSO-E country code (such as BE, DE, FR or NL).",
)
@click.option(
"--timezone",
"country_timezone",
required=False,
help="Timezone for the country (such as 'Europe/Amsterdam').",
)
@click.option(
"--sensor",
"sensor",
type=SensorIdField(),
required=False,
help="Sensor to store the data into. If not provided, the sensor `Day-ahead prices` is used.",
)
@click.option(
"--source",
"source",
type=DataSourceIdField(),
required=False,
help="Source of the price data. If not provided, the source `ENTSO-E` is used.",
)
@click.option(
"--for",
"default_import_timerange",
required=False,
default="today-and-tomorrow",
type=click.Choice(["today", "tomorrow", "today-and-tomorrow"]),
help="Easy-to-use time range setting, which defines the defaults for start and end to be used when --from-date and/or --to-date are not used. Can be set to 'today' or 'tomorrow' or 'today-and-tomorrow' (which is the default value).",
)
@click.option(
"--fail-on-incomplete-data",
"fail_on_incomplete_data",
is_flag=True,
default=False,
help="If set, the import will abort if the data received is incomplete.",
)
@with_appcontext
@task_with_status_report("entsoe-import-day-ahead-prices")
def import_day_ahead_prices(
dryrun: bool = False,
from_date: Optional[datetime] = None,
to_date: Optional[datetime] = None,
country_code: Optional[str] = None,
country_timezone: Optional[str] = None,
sensor: Optional[Sensor] = None,
source: Optional[Source] = None,
default_import_timerange: str = "today-and-tomorrow",
fail_on_incomplete_data: bool = False,
):
"""
Import forecasted prices for any date range, defaulting to today and tomorrow.
Possibly best to run this script somewhere around or maybe two or three hours after 13:00,
when tomorrow's prices are announced.
"""
# Set up FlexMeasures data structure
country_code, country_timezone = ensure_country_code_and_timezone(
country_code, country_timezone
)
if source is None:
entsoe_data_source = ensure_data_source()
else:
entsoe_data_source = source
if sensor is None:
# For now, we only have one pricing sensor ...
sensors = ensure_sensors(pricing_sensors, country_code, country_timezone)
pricing_sensor = sensors["Day-ahead prices"]
assert pricing_sensor.name == "Day-ahead prices"
else:
pricing_sensor = sensor
# Parse CLI options (or set defaults)
from_time, until_time = parse_from_and_to_dates(
from_date, to_date, country_timezone, default_to=default_import_timerange
)
# Start import
client = create_entsoe_client()
log, now = start_import_log(
"day-ahead price", from_time, until_time, country_code, country_timezone
)
log.info("Getting prices ...")
prices: pd.Series = client.query_day_ahead_prices(
country_code, start=from_time, end=until_time
)
abort_if_data_empty(prices)
if fail_on_incomplete_data:
abort_if_data_incomplete(
prices, from_time, until_time, pricing_sensor.event_resolution
)
prices = resample_if_needed(prices, pricing_sensor)
log.debug("Prices: \n%s" % prices)
if not dryrun:
log.info(f"Saving {len(prices)} beliefs for Sensor {pricing_sensor.name} ...")
save_entsoe_series(
prices, pricing_sensor, entsoe_data_source, country_timezone, now
)

View File

@@ -0,0 +1,308 @@
from datetime import datetime, timedelta
from types import SimpleNamespace
import click
import pandas as pd
import pytz
import pytest
from flexmeasures_entsoe import DEFAULT_DATA_SOURCE_NAME, DEFAULT_DERIVED_DATA_SOURCE
from flexmeasures_entsoe.utils import (
FM_SUPPORTS_ACCOUNT_LINKED_SOURCES,
_ensure_entsoe_source,
abort_if_data_incomplete,
ensure_data_source,
ensure_data_source_for_derived_data,
parse_from_and_to_dates,
)
def test_abort_if_data_incomplete():
"""
Tests that the function raises click.Abort if data is incomplete.
1. Data is complete: No exception raised.
2. Data is incomplete: click.Abort is raised.
"""
start = pd.Timestamp("2025-01-01 00:00")
end = pd.Timestamp("2025-01-02 00:00")
resolution = pd.Timedelta(hours=1)
# Case 1: Data is complete (24 items for 24 hours)
complete_data = pd.DataFrame({"val": range(24)})
try:
abort_if_data_incomplete(complete_data, start, end, resolution)
except click.Abort:
pytest.fail("Function raised Abort unexpectedly on complete data")
# Case 2: Data is incomplete (20 items for 24 hours)
incomplete_data = pd.DataFrame({"val": range(20)})
with pytest.raises(click.Abort):
abort_if_data_incomplete(incomplete_data, start, end, resolution)
def test_parse_from_and_to_dates():
"""
Tests CLI date parsing logic:
1. Explicit dates are timezone-localized correctly.
2. 'None' defaults to tomorrow (start of day) -> day after tomorrow.
"""
tz_str = "UTC"
tz = pytz.timezone(tz_str)
now = datetime.now(tz)
today = datetime(now.year, now.month, now.day, tzinfo=tz)
# Case 1: Explicit inputs
input_start = datetime(2025, 5, 1)
input_end = datetime(2025, 5, 2)
s, e = parse_from_and_to_dates(
from_date=input_start, until_date=input_end, country_timezone=tz_str
)
assert s.tzinfo.zone == tz.zone
assert (e - s) == timedelta(days=2)
assert e == datetime(2025, 5, 3, tzinfo=tz)
# Case 2: default_to="tomorrow"
s_tom, e_tom = parse_from_and_to_dates(
from_date=None, until_date=None, country_timezone=tz_str, default_to="tomorrow"
)
assert e_tom - s_tom == timedelta(days=1)
assert s_tom == today + timedelta(days=1)
assert e_tom == today + timedelta(days=2)
# Case 3: default_to="today-and-tomorrow"
s_tod, e_tod = parse_from_and_to_dates(
from_date=None, until_date=None, country_timezone=tz_str
)
assert e_tod - s_tod == timedelta(days=2)
assert s_tod == today
assert e_tod == today + timedelta(days=2)
# Case 4: only providing until_date (today midnight == start of tomorrow), while start comes from "today-and-tomorrow"
today_midnight = datetime(now.year, now.month, now.day) + timedelta(days=1)
s_none, e_none = parse_from_and_to_dates(
from_date=None, until_date=today_midnight, country_timezone=tz_str
)
assert e_none - s_none == timedelta(days=2)
assert s_none == today
assert e_none == today + timedelta(days=2)
# The version-branch tests below still use monkeypatching to isolate source
# creation side effects and to simulate upgrade reuse of legacy ENTSO-E
# sources without requiring multiple FlexMeasures installs in one test run.
@pytest.mark.skipif(
not FM_SUPPORTS_ACCOUNT_LINKED_SOURCES,
reason="Account-linked ENTSO-E sources are only supported on FlexMeasures >= 0.32.",
)
def test_ensure_data_source_passes_entsoe_account_when_supported(monkeypatch):
"""Test that ensure_data_source() creates a market-type source and passes the ENTSO-E account."""
from flask import Flask
app = Flask(__name__)
captured_kwargs = {}
def fake_get_or_create_source(source, source_type, account, flush):
captured_kwargs.update(
dict(
source=source,
source_type=source_type,
account=account,
flush=flush,
)
)
return SimpleNamespace(type=source_type, account=account, name=source)
fake_account = SimpleNamespace(name=DEFAULT_DATA_SOURCE_NAME)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_or_create_source",
fake_get_or_create_source,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils._find_existing_source",
lambda source_name, source_type: None,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_or_create_entsoe_account",
lambda: fake_account,
)
with app.app_context():
data_source = ensure_data_source()
assert data_source.type == "market"
assert captured_kwargs["source"] == DEFAULT_DATA_SOURCE_NAME
assert captured_kwargs["account"].name == DEFAULT_DATA_SOURCE_NAME
@pytest.mark.skipif(
not FM_SUPPORTS_ACCOUNT_LINKED_SOURCES,
reason="Account-linked ENTSO-E sources are only supported on FlexMeasures >= 0.32.",
)
def test_ensure_data_source_for_derived_data_passes_entsoe_account_when_supported(
monkeypatch,
):
"""Test that ensure_data_source_for_derived_data() passes the ENTSO-E account."""
from flask import Flask
app = Flask(__name__)
captured_kwargs = {}
def fake_get_or_create_source(source, source_type, account, flush):
captured_kwargs.update(
dict(
source=source,
source_type=source_type,
account=account,
flush=flush,
)
)
return SimpleNamespace(type=source_type, account=account, name=source)
fake_account = SimpleNamespace(name=DEFAULT_DATA_SOURCE_NAME)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_or_create_source",
fake_get_or_create_source,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils._find_existing_source",
lambda source_name, source_type: None,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_or_create_entsoe_account",
lambda: fake_account,
)
with app.app_context():
data_source = ensure_data_source_for_derived_data()
assert data_source.type == "forecasting script"
assert captured_kwargs["source"] == DEFAULT_DERIVED_DATA_SOURCE
assert captured_kwargs["account"].name == DEFAULT_DATA_SOURCE_NAME
@pytest.mark.skipif(
FM_SUPPORTS_ACCOUNT_LINKED_SOURCES,
reason="Legacy get_data_source fallback is only used on FlexMeasures < 0.32.",
)
def test_ensure_data_source_omits_account_when_not_supported(monkeypatch):
"""Test that ensure_data_source() falls back to the legacy source factory without an account."""
from flask import Flask
app = Flask(__name__)
captured_kwargs = {}
def fake_get_data_source(data_source_name, data_source_type):
captured_kwargs.update(
data_source_name=data_source_name,
data_source_type=data_source_type,
)
return SimpleNamespace(name=data_source_name, type=data_source_type)
monkeypatch.setattr(
"flexmeasures_entsoe.utils._find_existing_source",
lambda source_name, source_type: None,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_data_source",
fake_get_data_source,
)
with app.app_context():
data_source = ensure_data_source()
assert data_source.type == "market"
assert captured_kwargs == {
"data_source_name": DEFAULT_DATA_SOURCE_NAME,
"data_source_type": "market",
}
@pytest.mark.skipif(
FM_SUPPORTS_ACCOUNT_LINKED_SOURCES,
reason="Legacy get_data_source fallback is only used on FlexMeasures < 0.32.",
)
def test_ensure_data_source_for_derived_data_omits_account_when_not_supported(
monkeypatch,
):
"""Test that ensure_data_source_for_derived_data() falls back to the legacy source factory."""
from flask import Flask
app = Flask(__name__)
captured_kwargs = {}
def fake_get_data_source(data_source_name, data_source_type):
captured_kwargs.update(
data_source_name=data_source_name,
data_source_type=data_source_type,
)
return SimpleNamespace(name=data_source_name, type=data_source_type)
monkeypatch.setattr(
"flexmeasures_entsoe.utils._find_existing_source",
lambda source_name, source_type: None,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_data_source",
fake_get_data_source,
)
with app.app_context():
data_source = ensure_data_source_for_derived_data()
assert data_source.type == "forecasting script"
assert captured_kwargs == {
"data_source_name": DEFAULT_DERIVED_DATA_SOURCE,
"data_source_type": "forecasting script",
}
@pytest.mark.skipif(
not FM_SUPPORTS_ACCOUNT_LINKED_SOURCES,
reason="Legacy source upgrade reuse matters in the account-linked source path only.",
)
def test_ensure_entsoe_source_reuses_legacy_source_and_sets_account(monkeypatch):
legacy_source = SimpleNamespace(
name=DEFAULT_DATA_SOURCE_NAME,
type="forecasting script",
account=None,
)
fake_account = SimpleNamespace(name=DEFAULT_DATA_SOURCE_NAME)
def fake_find_existing_source(source_name, source_type):
if source_type == "market":
return None
if source_type == "forecasting script":
return legacy_source
return None
monkeypatch.setattr(
"flexmeasures_entsoe.utils._find_existing_source",
fake_find_existing_source,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_or_create_entsoe_account",
lambda: fake_account,
)
monkeypatch.setattr(
"flexmeasures_entsoe.utils.get_or_create_source",
lambda **kwargs: pytest.fail(
"Should reuse a legacy ENTSO-E source before creating a new one."
),
)
data_source = _ensure_entsoe_source(
source_name=DEFAULT_DATA_SOURCE_NAME,
source_type="market",
legacy_source_type="forecasting script",
)
assert data_source is legacy_source
assert data_source.type == "market"
assert data_source.account is fake_account

View File

@@ -0,0 +1,369 @@
from typing import Dict, Optional, Tuple, Union
from datetime import datetime, timedelta
from logging import Logger
from entsoe import EntsoePandasClient
from flask import current_app
from packaging import version
from pandas.tseries.frequencies import to_offset
import pandas as pd
import click
import pytz
import entsoe
from flexmeasures.data.utils import get_data_source, save_to_db
from flexmeasures import Asset, AssetType, Sensor, Source, __version__ as flexmeasures_version
from flexmeasures.data import db
from flexmeasures.utils.time_utils import server_now
from timely_beliefs import BeliefsDataFrame
from flexmeasures.cli.utils import MsgStyle
from . import (
DEFAULT_DATA_SOURCE_NAME,
DEFAULT_DERIVED_DATA_SOURCE,
DEFAULT_COUNTRY_CODE,
DEFAULT_COUNTRY_TIMEZONE,
) # noqa: E402
FM_SUPPORTS_ACCOUNT_LINKED_SOURCES = version.parse(
flexmeasures_version
) >= version.parse("0.32")
if FM_SUPPORTS_ACCOUNT_LINKED_SOURCES:
from flexmeasures import Account
from flexmeasures.data.services.data_sources import get_or_create_source
def _find_existing_source(source_name: str, source_type: str) -> Optional[Source]:
return (
Source.query.filter(
Source.name == source_name,
Source.type == source_type,
)
.order_by(Source.id)
.first()
)
def get_or_create_entsoe_account():
"""Make sure we have an account for the ENTSO-E provider service."""
account_name = current_app.config.get(
"ENTSOE_DATA_SOURCE_NAME", DEFAULT_DATA_SOURCE_NAME
)
entsoe_account = Account.query.filter(
Account.name == account_name,
).one_or_none()
if entsoe_account is None:
entsoe_account = Account(name=account_name)
db.session.add(entsoe_account)
db.session.flush()
return entsoe_account
def _ensure_entsoe_source(
source_name: str,
source_type: str,
legacy_source_type: Optional[str] = None,
) -> Source:
"""Reuse legacy sources when possible while branching explicitly on FM version."""
entsoe_account = None
if FM_SUPPORTS_ACCOUNT_LINKED_SOURCES:
entsoe_account = get_or_create_entsoe_account()
existing_source = _find_existing_source(source_name, source_type)
if existing_source is None and legacy_source_type is not None:
existing_source = _find_existing_source(source_name, legacy_source_type)
if existing_source is not None:
existing_source.type = source_type
if existing_source is not None:
if entsoe_account is not None and getattr(existing_source, "account", None) is None:
existing_source.account = entsoe_account
return existing_source
if not FM_SUPPORTS_ACCOUNT_LINKED_SOURCES:
return get_data_source(
data_source_name=source_name,
data_source_type=source_type,
)
source_kwargs = dict(
source=source_name,
source_type=source_type,
flush=False,
)
if entsoe_account is not None:
source_kwargs["account"] = entsoe_account
return get_or_create_source(**source_kwargs)
def ensure_data_source() -> Source:
"""Make sure we have a raw ENTSO-E data source of type "market"."""
return _ensure_entsoe_source(
source_name=current_app.config.get(
"ENTSOE_DATA_SOURCE_NAME", DEFAULT_DATA_SOURCE_NAME
),
source_type="market",
legacy_source_type="forecasting script",
)
def ensure_data_source_for_derived_data() -> Source:
"""Make sure we have a data source for data derived from ENTSO-E data."""
return _ensure_entsoe_source(
source_name=current_app.config.get(
"ENTSOE_DERIVED_DATA_SOURCE", DEFAULT_DERIVED_DATA_SOURCE
),
source_type="forecasting script",
)
def ensure_transmission_zone_asset(country_code: str) -> Asset:
"""
Ensure a GenericAsset exists to model the transmission zone for which this plugin gathers data.
"""
transmission_zone_type = AssetType.query.filter(
AssetType.name == "transmission zone"
).one_or_none()
if not transmission_zone_type:
current_app.logger.info("Adding transmission zone type ...")
transmission_zone_type = AssetType(
name="transmission zone",
description="A grid regulated & balanced as a whole, usually a national grid.",
)
db.session.add(transmission_zone_type)
ga_name = f"{country_code} transmission zone"
transmission_zone = Asset.query.filter(Asset.name == ga_name).one_or_none()
if not transmission_zone:
current_app.logger.info(f"Adding {ga_name} ...")
transmission_zone = Asset(
name=ga_name,
generic_asset_type=transmission_zone_type,
account_id=None, # public
)
db.session.add(transmission_zone)
db.session.commit()
return transmission_zone
def ensure_sensors(
sensor_specifications: Tuple,
country_code: str,
timezone: str,
) -> Dict[str, Sensor]:
"""
Ensure a GenericAsset exists to model the transmission zone for which this plugin gathers
generation data, then add specified sensors for relevant data we collect.
If new sensors got created, the session has been flushed.
"""
sensors = {}
sensors_created: bool = False
transmission_zone = ensure_transmission_zone_asset(country_code)
for sensor_name, unit, event_resolution, data_by_entsoe in sensor_specifications:
sensor = Sensor.query.filter(
Sensor.name == sensor_name,
Sensor.unit == unit,
Sensor.generic_asset == transmission_zone,
).one_or_none()
if not sensor:
current_app.logger.info(f"Adding sensor {sensor_name} ...")
sensor = Sensor(
name=sensor_name,
unit=unit,
generic_asset=transmission_zone,
timezone=timezone,
event_resolution=event_resolution,
)
db.session.add(sensor)
sensors_created = True
elif sensor.event_resolution != event_resolution:
current_app.logger.warning(
f"The {sensor_name} sensor exists, but has a resolution of {sensor.event_resolution} instead of {event_resolution}. Please refer the 'October 1st 2025 go-live' instructions in `README.md`."
)
sensor.data_by_entsoe = data_by_entsoe
sensors[sensor_name] = sensor
if sensors_created:
db.session.flush()
return sensors
def get_auth_token_from_config_and_set_server_url() -> str:
"""
Read ENTSOE auth token from config, raise if not given.
If test server is supposed to be used, we'll try to read the token
usable for that, and also change the URL.
"""
use_test_server = current_app.config.get("ENTSOE_USE_TEST_SERVER", False)
if use_test_server:
auth_token = current_app.config.get("ENTSOE_AUTH_TOKEN_TEST_SERVER")
entsoe.entsoe.URL = "https://iop-transparency.entsoe.eu/api"
else:
auth_token = current_app.config.get("ENTSOE_AUTH_TOKEN")
entsoe.entsoe.URL = "https://web-api.tp.entsoe.eu/api"
if not auth_token:
click.echo("Setting ENTSOE_AUTH_TOKEN seems empty!")
raise click.Abort
return auth_token
def ensure_country_code_and_timezone(
country_code: Optional[str] = None,
country_timezone: Optional[str] = None,
) -> Tuple[str, str]:
if country_code is None:
country_code = current_app.config.get(
"ENTSOE_COUNTRY_CODE", DEFAULT_COUNTRY_CODE
)
if country_timezone is None:
country_timezone = current_app.config.get(
"ENTSOE_COUNTRY_TIMEZONE", DEFAULT_COUNTRY_TIMEZONE
)
return country_code, country_timezone
def create_entsoe_client() -> EntsoePandasClient:
auth_token = get_auth_token_from_config_and_set_server_url()
client = EntsoePandasClient(api_key=auth_token)
return client
def abort_if_data_empty(data: Union[pd.DataFrame, pd.Series]):
if data.empty:
click.echo(
"Result is empty. Probably ENTSO-E does not provide these forecasts yet ..."
)
raise click.Abort
def abort_if_data_incomplete(
data: Union[pd.DataFrame, pd.Series],
from_time: pd.Timestamp,
until_time: pd.Timestamp,
resolution: pd.Timedelta,
):
expected_periods = int((until_time - from_time) / resolution)
if len(data) < expected_periods:
click.secho(
f"Result is incomplete. Expected {expected_periods} periods but got {len(data)}. Probably ENTSO-E does not provide these forecasts yet ...",
**MsgStyle.ERROR,
)
raise click.Abort
def parse_from_and_to_dates(
from_date: Optional[datetime],
until_date: Optional[datetime],
country_timezone: str,
default_to: str = "today-and-tomorrow", # Can be "tomorrow" or "today"
) -> Tuple[pd.Timestamp, pd.Timestamp]:
"""
Parse CLI options for start and end date (or set default to today and tomorrow) for inout to entsoe-py
Note: we expect only dates as input here, and until_date is inclusive, so we extend it with 24h - so if from_date is equal to until_date, we return 00:00 and 24:00 of that day.
Note: entsoe-py expects time params as pd.Timestamp
"""
tz = pytz.timezone(country_timezone)
now = datetime.now(tz)
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
if default_to == "today":
default_start = today_start
default_end = today_start + timedelta(days=1)
elif default_to == "tomorrow":
default_start = today_start + timedelta(days=1)
default_end = default_start + timedelta(days=1)
elif default_to == "today-and-tomorrow":
default_start = today_start
default_end = default_start + timedelta(days=2)
else:
raise ValueError(
f"Invalid default_to value: {default_to}. Expected 'today', 'tomorrow' or 'today-and-tomorrow'."
)
if from_date is None:
start_date = pd.Timestamp(default_start)
else:
start_date = pd.Timestamp(from_date, tzinfo=pytz.timezone(country_timezone))
if until_date is None:
end_date = pd.Timestamp(default_end)
else:
end_date = pd.Timestamp(until_date, tzinfo=pytz.timezone(country_timezone))
# The until_date provided is considered inclusive, so we add 24 hours to include the entire day
end_date += pd.Timedelta(hours=24)
return start_date, end_date
def resample_if_needed(s: pd.Series, sensor: Sensor) -> pd.Series:
inferred_frequency = pd.infer_freq(s.index)
if inferred_frequency is None:
raise ValueError(
"Data has no discernible frequency from which to derive an event resolution."
)
inferred_resolution = pd.to_timedelta(to_offset(inferred_frequency))
target_resolution = sensor.event_resolution
if inferred_resolution == target_resolution:
return s
elif inferred_resolution > target_resolution:
current_app.logger.debug(f"Upsampling data for {sensor.name} ...")
index = pd.date_range(
s.index[0],
s.index[-1] + inferred_resolution,
freq=target_resolution,
inclusive="left",
)
s = s.reindex(index).pad()
elif inferred_resolution < target_resolution:
current_app.logger.debug(f"Downsampling data for {sensor.name} ...")
s = s.resample(target_resolution).mean()
current_app.logger.debug(f"Resampled data for {sensor.name}: \n%s" % s)
return s
def save_entsoe_series(
series: pd.Series,
sensor: Sensor,
entsoe_source: Source,
country_timezone: str,
now: Optional[datetime] = None,
):
"""
Save a series gotten from ENTSO-E to a FlexMeasures database.
"""
if not now:
now = server_now().astimezone(pytz.timezone(country_timezone))
belief_times = (
(series.index.floor("D") - pd.Timedelta("6h"))
.to_frame(name="clipped_belief_times")
.clip(upper=now)
.set_index("clipped_belief_times")
.index
) # published no later than D-1 18:00 Brussels time
bdf = BeliefsDataFrame(
series,
source=entsoe_source,
sensor=sensor,
belief_time=belief_times,
)
# TODO: evaluate some traits of the data via FlexMeasures, see https://github.com/SeitaBV/flexmeasures-entsoe/issues/3
status = save_to_db(bdf)
if status == "success_but_nothing_new":
current_app.logger.info("Done. These beliefs had already been saved before.")
elif status == "success_with_unchanged_beliefs_skipped":
current_app.logger.info("Done. Some beliefs had already been saved before.")
def start_import_log(
import_type: str,
from_time: pd.Timestamp,
until_time: pd.Timestamp,
country_code: str,
country_timezone: str,
) -> Tuple[Logger, datetime]:
log = current_app.logger
log.info(
f"Importing {import_type} data for {country_code} (timezone {country_timezone}), starting at {from_time}, up until {until_time}, from ENTSO-E at {entsoe.entsoe.URL} ..."
)
now = server_now().astimezone(pytz.timezone(country_timezone))
return log, now