Uploading New Exposures to Existing Dataset and Updating the Risk Dataset

Uploading New Exposures to Existing Dataset and Updating the Risk Dataset#

Use this notebook to append new exposure data to an existing upload and update an existing risk dataset.

import datetime as dt
from pathlib import Path

from tqdm import tqdm

from bayesline.apiclient import BayeslineApiClient
from bayesline.api.equity import (
    ContinuousExposureGroupSettings,
    ExposureSettings, 
    UniverseSettings,
)
bln = BayeslineApiClient.new_client(
    endpoint="https://[ENDPOINT]",
    api_key="[API-KEY]",
)

Updating New Exposure Files#

exposure_dir = Path("PATH/TO/EXPOSURES")
exposure_dataset_name = "My-Exposures"

Below gets the exposure uploader for the chosen dataset name My-Exposures. This dataset is assummed to be already created since we demonstrate a catch up upload. See the Uploaders Tutorial for a deep dive into the Uploaders API.

exposure_uploader = bln.equity.uploaders.get_data_type("exposures")
uploader = exposure_uploader.get_dataset(dataset=exposure_dataset_name)

Below we list the existing files in the provided folder and filter out all dates for which we already processed files in a previous run.

# list all csv files and group them by year
# expects file pattern "*_YYYY-MM-DD.csv"

files = list(exposure_dir.glob("*.csv"))
file_date_strs = [file.name.split("_")[-1].replace("-", "")[:8] for file in files]

available_dates = [
    dt.date(int(d[:4]), int(d[4:6]), int(d[6:8])) for d in file_date_strs
]

existing_dates = (
    uploader.get_data(columns=["date"], unique=True).collect().to_series().to_list()
)

new_dates = sorted(set(available_dates) - set(existing_dates))

print(f"Got {len(new_dates)} new dates")
Got 30 new dates
files_by_date = {
    dt.date(int(d[:4]), int(d[4:6]), int(d[6:8])): f for d, f in zip(file_date_strs, files)
}

As a next step we iterate over each csv file and stage it. Note that for large amounts of files it’s much more performant to upload zip files instead. See this recipe for details.

See the Uploaders Tutorial for more details on the staging and commit concepts.

for date in tqdm(new_dates):
    file = files_by_date[date]
    result = uploader.stage_file(file)
    assert result.success
uploader.commit(mode="append")
UploadCommitResult(version=2, committed_names=['exposures_2025-06-01', 'exposures_2025-06-02', 'exposures_2025-06-03', 'exposures_2025-06-04', 'exposures_2025-06-05', 'exposures_2025-06-06', 'exposures_2025-06-07', 'exposures_2025-06-08', 'exposures_2025-06-09', 'exposures_2025-06-10', 'exposures_2025-06-11', 'exposures_2025-06-12', 'exposures_2025-06-13', 'exposures_2025-06-14', 'exposures_2025-06-15', 'exposures_2025-06-16', 'exposures_2025-06-17', 'exposures_2025-06-18', 'exposures_2025-06-19', 'exposures_2025-06-20', 'exposures_2025-06-21', 'exposures_2025-06-22', 'exposures_2025-06-23', 'exposures_2025-06-24', 'exposures_2025-06-25', 'exposures_2025-06-26', 'exposures_2025-06-27', 'exposures_2025-06-28', 'exposures_2025-06-29', 'exposures_2025-06-30'])
uploader.get_data_detail_summary()
shape: (61, 6)
daten_assetsmin_exposuremax_exposuremean_exposurestd_exposure
datei64f32f32f64f64
2025-05-0146773-4.1718754.281250.235341.023091
2025-05-0246766-4.1718754.2773440.2358771.022765
2025-05-0346763-4.1718754.2773440.2357971.022699
2025-05-0446763-4.1718754.2773440.2357951.022698
2025-05-0546766-4.1718754.2773440.2365491.022354
2025-06-2646656-4.1406254.2851560.2363441.021307
2025-06-2746645-4.1406254.28906250.2366521.021136
2025-06-2846637-4.1406254.28906250.2366831.021083
2025-06-2946637-4.1406254.28906250.2366831.021083
2025-06-3046646-4.1406254.2968750.2369641.021116

Updating the Risk Dataset#

To bring the newly uploaded exposures into the pre-existing dataset we need to update it. This pulls the most recent version for all referenced datasets and re-creates the risk dataset.

risk_dataset_name = "My-Risk-Dataset"
risk_datasets = bln.equity.riskdatasets
risk_dataset = risk_datasets.load(risk_dataset_name)
risk_dataset.update()
RiskDatasetUpdateResult()
risk_dataset.describe()
RiskDatasetProperties(factor_risk_model_settings_menu=FactorRiskModelSettingsMenu(exposure_settings_menu=ExposureSettingsMenu(universe_settings_menu=UniverseSettingsMenu(calendar_settings_menu=CalendarSettingsMenu(exchanges=['ARCX', 'BVCA', 'BVMF', 'DIFX', 'DSMD', 'ETFP', 'FRAB', 'HSTC', 'JBUL', 'PFTS', 'ROCO', 'SHSC', 'SZSC', 'WBDM', 'XADS', 'XAMM', 'XAMS', 'XASE', 'XASX', 'XATH', 'XBAH', 'XBEL', 'XBEY', 'XBKF', 'XBKK', 'XBOG', 'XBOM', 'XBOS', 'XBRA', 'XBRU', 'XBRV', 'XBUD', 'XBUE', 'XCAI', 'XCAN', 'XCAS', 'XCSE', 'XCYS', 'XDUB', 'XEQY', 'XETB', 'XHEL', 'XHKG', 'XHNX', 'XICE', 'XIDX', 'XJAM', 'XJAS', 'XJSE', 'XKAR', 'XKLS', 'XKOS', 'XKRX', 'XKUW', 'XLIM', 'XLIS', 'XLIT', 'XLJU', 'XLON', 'XLUX', 'XMAD', 'XMAL', 'XMAU', 'XMEX', 'XMUS', 'XNAI', 'XNAM', 'XNAS', 'XNCM', 'XNSA', 'XNSE', 'XNYS', 'XNZE', 'XOSL', 'XPAE', 'XPAR', 'XPHS', 'XPRM', 'XPSX', 'XQUI', 'XRIS', 'XSAU', 'XSEC', 'XSES', 'XSGO', 'XSHE', 'XSHG', 'XSSC', 'XSTC', 'XSTO', 'XSWX', 'XTAE', 'XTAI', 'XTAL', 'XTKS', 'XTSE', 'XTSX', 'XTUN', 'XWAR', 'XZAG', 'XZIM']), id_types=['bayesid'], categorical_hierarchies={'industry': ['Academic & Educational Services', 'Basic Materials', 'Consumer Cyclicals', 'Consumer Non-Cyclicals', 'Energy', 'Financials', 'Government Activity', 'Healthcare', 'Industrials', 'Institutions, Associations & Organizations', 'Real Estate', 'Technology', 'Utilities'], 'region': ['Argentina', 'Australia', 'Austria', 'Bahrain', 'Belgium', 'Bermuda', 'Brazil', 'British Virgin Islands', 'Bulgaria', 'Cambodia', 'Canada', 'Cayman Islands', 'Chile', 'China', 'Colombia', "Cote d'Ivoire", 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Ecuador', 'Egypt', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Guernsey', 'Hong Kong', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jersey', 'Jordan', 'Kazakhstan', 'Kenya', 'Kuwait', 'Latvia', 'Lebanon', 'Lithuania', 'Luxembourg', 'Macau', 'Malaysia', 'Malta', 'Mauritius', 'Mexico', 'Monaco', 'Morocco', 'Namibia', 'Netherlands', 'New Zealand', 'Nigeria', 'Norway', 'Oman', 'Pakistan', 'Palestine', 'Papua New Guinea', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania', 'Saudi Arabia', 'Serbia', 'Singapore', 'Slovakia', 'Slovenia', 'South Africa', 'South Korea', 'Spain', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Trinidad and Tobago', 'Tunisia', 'Türkiye', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'Venezuela', 'Vietnam']}, categorical_hierarchies_labels={'industry': {'Academic & Educational Services': 'Academic & Educational Services', 'Basic Materials': 'Basic Materials', 'Consumer Cyclicals': 'Consumer Cyclicals', 'Consumer Non-Cyclicals': 'Consumer Non-Cyclicals', 'Energy': 'Energy', 'Financials': 'Financials', 'Government Activity': 'Government Activity', 'Healthcare': 'Healthcare', 'Industrials': 'Industrials', 'Institutions, Associations & Organizations': 'Institutions, Associations & Organizations', 'Real Estate': 'Real Estate', 'Technology': 'Technology', 'Utilities': 'Utilities'}, 'region': {'Argentina': 'Argentina', 'Australia': 'Australia', 'Austria': 'Austria', 'Bahrain': 'Bahrain', 'Belgium': 'Belgium', 'Bermuda': 'Bermuda', 'Brazil': 'Brazil', 'British Virgin Islands': 'British Virgin Islands', 'Bulgaria': 'Bulgaria', 'Cambodia': 'Cambodia', 'Canada': 'Canada', 'Cayman Islands': 'Cayman Islands', 'Chile': 'Chile', 'China': 'China', 'Colombia': 'Colombia', "Cote d'Ivoire": "Cote d'Ivoire", 'Croatia': 'Croatia', 'Cyprus': 'Cyprus', 'Czechia': 'Czechia', 'Denmark': 'Denmark', 'Ecuador': 'Ecuador', 'Egypt': 'Egypt', 'Estonia': 'Estonia', 'Finland': 'Finland', 'France': 'France', 'Germany': 'Germany', 'Greece': 'Greece', 'Guernsey': 'Guernsey', 'Hong Kong': 'Hong Kong', 'Hungary': 'Hungary', 'Iceland': 'Iceland', 'India': 'India', 'Indonesia': 'Indonesia', 'Ireland': 'Ireland', 'Israel': 'Israel', 'Italy': 'Italy', 'Jamaica': 'Jamaica', 'Japan': 'Japan', 'Jersey': 'Jersey', 'Jordan': 'Jordan', 'Kazakhstan': 'Kazakhstan', 'Kenya': 'Kenya', 'Kuwait': 'Kuwait', 'Latvia': 'Latvia', 'Lebanon': 'Lebanon', 'Lithuania': 'Lithuania', 'Luxembourg': 'Luxembourg', 'Macau': 'Macau', 'Malaysia': 'Malaysia', 'Malta': 'Malta', 'Mauritius': 'Mauritius', 'Mexico': 'Mexico', 'Monaco': 'Monaco', 'Morocco': 'Morocco', 'Namibia': 'Namibia', 'Netherlands': 'Netherlands', 'New Zealand': 'New Zealand', 'Nigeria': 'Nigeria', 'Norway': 'Norway', 'Oman': 'Oman', 'Pakistan': 'Pakistan', 'Palestine': 'Palestine', 'Papua New Guinea': 'Papua New Guinea', 'Peru': 'Peru', 'Philippines': 'Philippines', 'Poland': 'Poland', 'Portugal': 'Portugal', 'Qatar': 'Qatar', 'Romania': 'Romania', 'Saudi Arabia': 'Saudi Arabia', 'Serbia': 'Serbia', 'Singapore': 'Singapore', 'Slovakia': 'Slovakia', 'Slovenia': 'Slovenia', 'South Africa': 'South Africa', 'South Korea': 'South Korea', 'Spain': 'Spain', 'Sweden': 'Sweden', 'Switzerland': 'Switzerland', 'Taiwan': 'Taiwan', 'Thailand': 'Thailand', 'Trinidad and Tobago': 'Trinidad and Tobago', 'Tunisia': 'Tunisia', 'Türkiye': 'Türkiye', 'Ukraine': 'Ukraine', 'United Arab Emirates': 'United Arab Emirates', 'United Kingdom': 'United Kingdom', 'United States': 'United States', 'Venezuela': 'Venezuela', 'Vietnam': 'Vietnam'}}), continuous_hierarchies={'market': ['Market'], 'style': ['Dividend', 'Growth', 'Leverage', 'Momentum', 'Size', 'Value', 'Volatility']}, continuous_hierarchies_labels={'market': {'Market': 'Market'}, 'style': {'Dividend': 'Dividend', 'Growth': 'Growth', 'Leverage': 'Leverage', 'Momentum': 'Momentum', 'Size': 'Size', 'Value': 'Value', 'Volatility': 'Volatility'}}), modelconstruction_settings_menu=ModelConstructionSettingsMenu(weights=['SqrtCap', 'InvIdioVar'])))
exposures_api = bln.equity.exposures.load(
    ExposureSettings(
        exposures=[
            ContinuousExposureGroupSettings(hierarchy="market"),
            ContinuousExposureGroupSettings(hierarchy="style"),
        ]
    ),
)
# note that the industry and region hierarchy names tie out with the factor groups we specified above

df = exposures_api.get(UniverseSettings(dataset=risk_dataset_name), standardize_universe=None)

df.tail()
shape: (5, 10)
datebayesidmarket.Marketstyle.Dividendstyle.Growthstyle.Leveragestyle.Momentumstyle.Sizestyle.Valuestyle.Volatility
datestrf32f32f32f32f32f32f32f32
2025-06-30"ICFFE54368"1.0-0.67334-0.948730.129272-0.8593750.436279-2.8515620.599609
2025-06-30"ICFFE60191"0.0-0.017334-0.406982-0.300781-0.221558-0.9550780.2875980.30835
2025-06-30"ICFFE94AED"0.0-0.012909-0.395996-0.292725-0.215698-0.9296880.2800290.300049
2025-06-30"ICFFEBBB38"1.00.6933590.4863280.1871340.6928710.0836790.051361-1.095703
2025-06-30"ICFFF2F5AD"0.0-0.039185-0.460449-0.340332-0.250732-1.0810550.3254390.348877