Initial Exposures Upload and Risk Dataset Creation

Initial Exposures Upload and Risk Dataset Creation#

Use this notebook for the initial upload and risk dataset creation of a set of factor exposures using daily CSV files.

import datetime as dt
import itertools as it
import shutil
import tempfile
import zipfile

from pathlib import Path

from tqdm import tqdm

from bayesline.apiclient import BayeslineApiClient
from bayesline.api.equity import (
    ExposureSettings, 
    IndustrySettings, 
    RegionSettings,
    RiskDatasetSettings,
    RiskDatasetUploadedExposureSettings,
    UniverseSettings,
)
bln = BayeslineApiClient.new_client(
    endpoint="https://[ENDPOINT]",
    api_key="[API-KEY]",
)

Exposure Upload#

exposure_dir = Path("PATH/TO/EXPOSURES")
assert exposure_dir.exists()
exposure_dataset_name = "My-Exposures"

Below creates a new exposure uploader for the chosen dataset name My-Exposures. See the Uploaders Tutorial for a deep dive into the Uploaders API.

exposure_uploader = bln.equity.uploaders.get_data_type("exposures")
uploader = exposure_uploader.get_or_create_dataset(exposure_dataset_name)
# list all csv files and group them by year
# expects file pattern "*_YYYY-MM-DD.csv"

all_files = sorted(exposure_dir.glob("*.csv"))
existing_files = uploader.get_staging_results().keys()

files_by_year = {
    k: list(v) 
    for k, v in 
    it.groupby(all_files, lambda x: int(x.name.split("_")[1].split(".")[0].split("-")[0]))
}
files_by_year.keys()

print(f"Found {len(all_files)} files.")
print("Years:", ", ".join(map(str, files_by_year)))
Found 31 files.
Years: 2025

Below we batch the daily CSV files into annual ZIP files. Creating batched ZIP files is recommended as it will be much faster to upload and process compared to individually uploading daily files.

temp_dir = Path(tempfile.mkdtemp())
print(f"Created temp directory: {temp_dir}")
Created temp directory: /tmp/tmphtk46llo
for year, files in tqdm(files_by_year.items()):
    zip_path = temp_dir / f"exposures_{year}.zip"

    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file in files:
            zipf.writestr(file.name, file.read_bytes())

As a next step we iterate over the annual Zip files and stage them in the uploader. See the Uploaders Tutorial for more details on the staging and commit concepts.

for year in files_by_year.keys():
    zip_path = temp_dir / f"exposures_{year}.zip"
    result = uploader.stage_file(zip_path)
    assert result.success
shutil.rmtree(temp_dir)

Data Commit#

Lastly we can collect some descriptive statistics as a sense check before committing the data to the uploader.

staging_data_summary_df = uploader.get_staging_data_detail_summary()
staging_data_summary_df
shape: (31, 8)
_namedaten_assetsmin_exposuremax_exposuremean_exposuremedian_exposurestd_exposure
strdateu32f32f32f32f32f32
"exposures_2025-05-01"2025-05-0135804-4.06254.093750.2417190.497071.013115
"exposures_2025-05-02"2025-05-0235798-4.06254.093750.241690.4975591.013026
"exposures_2025-05-03"2025-05-0335793-4.06254.093750.2417010.4975591.012994
"exposures_2025-05-04"2025-05-0435793-4.06254.093750.241720.4975591.013005
"exposures_2025-05-05"2025-05-0535796-4.06254.093750.2416750.497071.012967
"exposures_2025-05-27"2025-05-2735767-4.0468754.0976560.2451580.4985351.010661
"exposures_2025-05-28"2025-05-2835763-4.0468754.093750.2431360.4956051.01173
"exposures_2025-05-29"2025-05-2935761-4.0468754.093750.2431730.495851.011687
"exposures_2025-05-30"2025-05-3035756-4.0468754.093750.2431290.4960941.011694
"exposures_2025-05-31"2025-05-3135742-4.0468754.093750.2431450.495851.011691
uploader.commit(mode="append")
UploadCommitResult(version=1, committed_names=['exposures_2025-05-28', 'exposures_2025-05-08', 'exposures_2025-05-19', 'exposures_2025-05-04', 'exposures_2025-05-11', 'exposures_2025-05-15', 'exposures_2025-05-23', 'exposures_2025-05-02', 'exposures_2025-05-22', 'exposures_2025-05-20', 'exposures_2025-05-07', 'exposures_2025-05-25', 'exposures_2025-05-05', 'exposures_2025-05-31', 'exposures_2025-05-29', 'exposures_2025-05-30', 'exposures_2025-05-27', 'exposures_2025-05-21', 'exposures_2025-05-06', 'exposures_2025-05-17', 'exposures_2025-05-26', 'exposures_2025-05-24', 'exposures_2025-05-09', 'exposures_2025-05-01', 'exposures_2025-05-14', 'exposures_2025-05-18', 'exposures_2025-05-16', 'exposures_2025-05-12', 'exposures_2025-05-13', 'exposures_2025-05-10', 'exposures_2025-05-03'])

Risk Dataset Creation#

Below creates a new Risk Dataset using above uploaded exposures. See the Risk Datasets Tutorial for a deep dive into the Risk Datasets API.

risk_datasets = bln.equity.riskdatasets
# exisint datasets which can be used as reference datasets
risk_datasets.get_dataset_names()
['Bayesline-US-500-1y', 'Bayesline-US-All-1y']
risk_dataset_name = "My-Risk-Dataset"
risk_datasets.delete_dataset_if_exists(risk_dataset_name)

We need to specify an assignment of which exposures are style, region, etc. Below lists those factor groups as they were extracted from the uploaded exposures.

uploader.get_data(columns=["factor_group"], unique=True).collect()
shape: (4, 1)
factor_group
str
"industry"
"market"
"style"
"region"

See API docs for RiskDatasetSettings and RiskDatasetUploadedExposureSettings for other potential settings.

settings = RiskDatasetSettings(
    reference_dataset="Bayesline-US-All-1y",
    exposures=[
        RiskDatasetUploadedExposureSettings(
            exposure_source=exposure_dataset_name,
            market_factor_group="market",
            style_factor_group="style",
            industry_factor_group="industry",
            region_factor_group="region",
            style_factor_fill_miss=True,
        ),
    ],
    trim_start_date=dt.date(2025, 5, 1),
)

Lastly we create the new dataset followed by describing its properties after creation.

my_risk_dataset = risk_datasets.create_dataset(risk_dataset_name, settings)
my_risk_dataset.describe().universe_settings_menu
UniverseSettingsMenu(id_types=['bayesid'], exchanges=['ARCX', 'BVCA', 'BVMF', 'DIFX', 'DSMD', 'ETFP', 'FRAB', 'HSTC', 'JBUL', 'PFTS', 'ROCO', 'SHSC', 'SZSC', 'WBDM', 'XADS', 'XAMM', 'XAMS', 'XASE', 'XASX', 'XATH', 'XBAH', 'XBEL', 'XBEY', 'XBKF', 'XBKK', 'XBOG', 'XBOM', 'XBOS', 'XBRA', 'XBRU', 'XBRV', 'XBUD', 'XBUE', 'XCAI', 'XCAN', 'XCAS', 'XCSE', 'XCYS', 'XDUB', 'XEQY', 'XETB', 'XHEL', 'XHKG', 'XHNX', 'XICE', 'XIDX', 'XJAM', 'XJAS', 'XJSE', 'XKAR', 'XKLS', 'XKOS', 'XKRX', 'XKUW', 'XLIM', 'XLIS', 'XLIT', 'XLJU', 'XLON', 'XLUX', 'XMAD', 'XMAL', 'XMAU', 'XMEX', 'XMUS', 'XNAI', 'XNAM', 'XNAS', 'XNCM', 'XNSA', 'XNSE', 'XNYS', 'XNZE', 'XOSL', 'XPAE', 'XPAR', 'XPHS', 'XPRM', 'XPSX', 'XQUI', 'XRIS', 'XSAU', 'XSEC', 'XSES', 'XSGO', 'XSHE', 'XSHG', 'XSSC', 'XSTC', 'XSTO', 'XSWX', 'XTAE', 'XTAI', 'XTAL', 'XTKS', 'XTSE', 'XTSX', 'XTUN', 'XWAR', 'XZAG', 'XZIM'], industry={'industry': {'ALL': ['industry.Academic & Educational Services', 'industry.Basic Materials', 'industry.Consumer Cyclicals', 'industry.Consumer Non-Cyclicals', 'industry.Energy', 'industry.Financials', 'industry.Government Activity', 'industry.Healthcare', 'industry.Industrials', 'industry.Institutions, Associations & Organizations', 'industry.Real Estate', 'industry.Technology', 'industry.Utilities']}}, region={'region': {'ALL': ['region.Australia', 'region.Austria', 'region.Belgium', 'region.Bermuda', 'region.Brazil', 'region.British Virgin Islands', 'region.Bulgaria', 'region.Canada', 'region.Cayman Islands', 'region.China', 'region.Croatia', 'region.Cyprus', 'region.Czechia', 'region.Denmark', 'region.Estonia', 'region.Finland', 'region.France', 'region.Germany', 'region.Greece', 'region.Guernsey', 'region.Hong Kong', 'region.Hungary', 'region.Iceland', 'region.India', 'region.Indonesia', 'region.Ireland', 'region.Israel', 'region.Italy', 'region.Japan', 'region.Jersey', 'region.Kazakhstan', 'region.Latvia', 'region.Lithuania', 'region.Luxembourg', 'region.Macau', 'region.Malaysia', 'region.Mexico', 'region.Netherlands', 'region.New Zealand', 'region.Norway', 'region.Pakistan', 'region.Papua New Guinea', 'region.Philippines', 'region.Poland', 'region.Portugal', 'region.Romania', 'region.Serbia', 'region.Singapore', 'region.Slovakia', 'region.Slovenia', 'region.South Korea', 'region.Spain', 'region.Sweden', 'region.Switzerland', 'region.Taiwan', 'region.Thailand', 'region.Türkiye', 'region.Ukraine', 'region.United Arab Emirates', 'region.United Kingdom', 'region.United States', 'region.Vietnam']}}, industry_labels={'industry': {'ALL': 'ALL', 'industry.Academic & Educational Services': 'industry.Academic & Educational Services', 'industry.Basic Materials': 'industry.Basic Materials', 'industry.Consumer Cyclicals': 'industry.Consumer Cyclicals', 'industry.Consumer Non-Cyclicals': 'industry.Consumer Non-Cyclicals', 'industry.Energy': 'industry.Energy', 'industry.Financials': 'industry.Financials', 'industry.Government Activity': 'industry.Government Activity', 'industry.Healthcare': 'industry.Healthcare', 'industry.Industrials': 'industry.Industrials', 'industry.Institutions, Associations & Organizations': 'industry.Institutions, Associations & Organizations', 'industry.Real Estate': 'industry.Real Estate', 'industry.Technology': 'industry.Technology', 'industry.Utilities': 'industry.Utilities'}}, region_labels={'region': {'ALL': 'ALL', 'region.Australia': 'region.Australia', 'region.Austria': 'region.Austria', 'region.Belgium': 'region.Belgium', 'region.Bermuda': 'region.Bermuda', 'region.Brazil': 'region.Brazil', 'region.British Virgin Islands': 'region.British Virgin Islands', 'region.Bulgaria': 'region.Bulgaria', 'region.Canada': 'region.Canada', 'region.Cayman Islands': 'region.Cayman Islands', 'region.China': 'region.China', 'region.Croatia': 'region.Croatia', 'region.Cyprus': 'region.Cyprus', 'region.Czechia': 'region.Czechia', 'region.Denmark': 'region.Denmark', 'region.Estonia': 'region.Estonia', 'region.Finland': 'region.Finland', 'region.France': 'region.France', 'region.Germany': 'region.Germany', 'region.Greece': 'region.Greece', 'region.Guernsey': 'region.Guernsey', 'region.Hong Kong': 'region.Hong Kong', 'region.Hungary': 'region.Hungary', 'region.Iceland': 'region.Iceland', 'region.India': 'region.India', 'region.Indonesia': 'region.Indonesia', 'region.Ireland': 'region.Ireland', 'region.Israel': 'region.Israel', 'region.Italy': 'region.Italy', 'region.Japan': 'region.Japan', 'region.Jersey': 'region.Jersey', 'region.Kazakhstan': 'region.Kazakhstan', 'region.Latvia': 'region.Latvia', 'region.Lithuania': 'region.Lithuania', 'region.Luxembourg': 'region.Luxembourg', 'region.Macau': 'region.Macau', 'region.Malaysia': 'region.Malaysia', 'region.Mexico': 'region.Mexico', 'region.Netherlands': 'region.Netherlands', 'region.New Zealand': 'region.New Zealand', 'region.Norway': 'region.Norway', 'region.Pakistan': 'region.Pakistan', 'region.Papua New Guinea': 'region.Papua New Guinea', 'region.Philippines': 'region.Philippines', 'region.Poland': 'region.Poland', 'region.Portugal': 'region.Portugal', 'region.Romania': 'region.Romania', 'region.Serbia': 'region.Serbia', 'region.Singapore': 'region.Singapore', 'region.Slovakia': 'region.Slovakia', 'region.Slovenia': 'region.Slovenia', 'region.South Korea': 'region.South Korea', 'region.Spain': 'region.Spain', 'region.Sweden': 'region.Sweden', 'region.Switzerland': 'region.Switzerland', 'region.Taiwan': 'region.Taiwan', 'region.Thailand': 'region.Thailand', 'region.Türkiye': 'region.Türkiye', 'region.Ukraine': 'region.Ukraine', 'region.United Arab Emirates': 'region.United Arab Emirates', 'region.United Kingdom': 'region.United Kingdom', 'region.United States': 'region.United States', 'region.Vietnam': 'region.Vietnam'}})

We can pull some exposures from the new risk dataset to verify.

exposures_api = bln.equity.exposures.load(
    ExposureSettings(
        industries=None,
        regions=None,
    )
)
# note that the industry and region hierarchy names tie out with the factor groups we specified above

df = exposures_api.get(
    UniverseSettings(
        dataset=risk_dataset_name, 
        industry=IndustrySettings(hierarchy="industry", include="All"),
        region=RegionSettings(hierarchy="region", include="All")
    )
)

df.tail()
shape: (5, 10)
datebayesidmarket.market.Marketstyle.style.Dividendstyle.style.Growthstyle.style.Leveragestyle.style.Momentumstyle.style.Sizestyle.style.Valuestyle.style.Volatility
datestrf32f32f32f32f32f32f32f32
2025-05-31"ZSPC"1.0-0.5683590.364502-0.083862-0.183472-1.768555-1.721681.799805
2025-05-31"ZTR"1.01.834961-0.286621-0.3212891.28125-0.7382811.160156-1.362305
2025-05-31"ZUMZ"1.0-1.166016-2.34375-0.848145-1.31543-0.7827151.06250.819824
2025-05-31"ZVIA"1.0-0.069031-1.301758-1.9755861.680664-1.431641-0.6962891.248047
2025-05-31"ZVVT"1.0-0.533203-0.9077151.4541020.082458-1.37207-1.2773442.097656