{
  "cells": [
    {
      "cell_type": "markdown",
      "id": "992223c7",
      "metadata": {},
      "source": [
        "# Portfolio Aligned Universes\n",
        "\n",
        "In this notebook we'll demonstrate how to define universes based on existing portfolio data. \n",
        "\n",
        "For this purpose we will first upload some sample portfolio holdings and subsequently use them to define a universe filter."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "id": "112d39e5",
      "metadata": {},
      "outputs": [],
      "source": [
        "import io\n",
        "\n",
        "import polars as pl\n",
        "\n",
        "from bayesline.api.equity import (\n",
        "    PortfolioOrganizerSettings,\n",
        "    UniverseSettings,\n",
        ")\n",
        "from bayesline.apiclient import BayeslineApiClient"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "7fc19806",
      "metadata": {
        "tags": [
          "skip-execution"
        ]
      },
      "outputs": [],
      "source": [
        "bln = BayeslineApiClient.new_client(\n",
        "    endpoint=\"https://[ENDPOINT]\",\n",
        "    api_key=\"[API-KEY]\",\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "d43df41f",
      "metadata": {},
      "source": [
        "## Uploading Portfolios\n",
        "\n",
        "We'll upload two portfolios `PORT_1` and `PORT_2` which contain assets *Apple* (`IC83A1B819`), *Microsoft* (`ICF982536B`) and *Alphabet* (`ICA17F00B9`)."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "id": "42971f80",
      "metadata": {},
      "outputs": [],
      "source": [
        "portfolio_uploaders = bln.equity.uploaders.get_data_type(\"portfolios\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "id": "fa9144b9",
      "metadata": {},
      "outputs": [],
      "source": [
        "portfolio_uploader = portfolio_uploaders.create_or_replace_dataset(\"US-Portfolios\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "id": "564efafe",
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div><style>\n",
              ".dataframe > thead > tr,\n",
              ".dataframe > tbody > tr {\n",
              "  text-align: right;\n",
              "  white-space: pre-wrap;\n",
              "}\n",
              "</style>\n",
              "<small>shape: (4, 5)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>date</th><th>portfolio_id</th><th>IC83A1B819</th><th>ICA17F00B9</th><th>ICF982536B</th></tr><tr><td>date</td><td>str</td><td>f64</td><td>f64</td><td>f64</td></tr></thead><tbody><tr><td>2026-01-01</td><td>&quot;PORT_1&quot;</td><td>0.3</td><td>0.7</td><td>0.0</td></tr><tr><td>2026-01-01</td><td>&quot;PORT_2&quot;</td><td>0.4</td><td>0.0</td><td>0.6</td></tr><tr><td>2026-02-01</td><td>&quot;PORT_1&quot;</td><td>1.0</td><td>0.0</td><td>0.0</td></tr><tr><td>2026-02-01</td><td>&quot;PORT_2&quot;</td><td>0.0</td><td>0.0</td><td>1.0</td></tr></tbody></table></div>"
            ],
            "text/plain": [
              "shape: (4, 5)\n",
              "┌────────────┬──────────────┬────────────┬────────────┬────────────┐\n",
              "│ date       ┆ portfolio_id ┆ IC83A1B819 ┆ ICA17F00B9 ┆ ICF982536B │\n",
              "│ ---        ┆ ---          ┆ ---        ┆ ---        ┆ ---        │\n",
              "│ date       ┆ str          ┆ f64        ┆ f64        ┆ f64        │\n",
              "╞════════════╪══════════════╪════════════╪════════════╪════════════╡\n",
              "│ 2026-01-01 ┆ PORT_1       ┆ 0.3        ┆ 0.7        ┆ 0.0        │\n",
              "│ 2026-01-01 ┆ PORT_2       ┆ 0.4        ┆ 0.0        ┆ 0.6        │\n",
              "│ 2026-02-01 ┆ PORT_1       ┆ 1.0        ┆ 0.0        ┆ 0.0        │\n",
              "│ 2026-02-01 ┆ PORT_2       ┆ 0.0        ┆ 0.0        ┆ 1.0        │\n",
              "└────────────┴──────────────┴────────────┴────────────┴────────────┘"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "data_csv = \"\"\"\n",
        "portfolio_id\tdate\tasset_id\tasset_id_type\tcurrency\tshare_qty\tnav\n",
        "PORT_1\t2026-01-01\tIC83A1B819\tbayesid\t\t\t.3\n",
        "PORT_1\t2026-01-01\tICA17F00B9\tbayesid\t\t\t.7\n",
        "PORT_2\t2026-01-01\tIC83A1B819\tbayesid\t\t\t.4\n",
        "PORT_2\t2026-01-01\tICF982536B\tbayesid\t\t\t.6\n",
        "PORT_1\t2026-02-01\tIC83A1B819\tbayesid\t\t\t1\n",
        "PORT_2\t2026-02-01\tICF982536B\tbayesid\t\t\t1\n",
        "\"\"\"\n",
        "\n",
        "portfolios_df = pl.read_csv(\n",
        "    io.StringIO(data_csv.strip()), \n",
        "    separator=\"\\t\", \n",
        "    try_parse_dates=True,\n",
        "    schema_overrides={\n",
        "        \"currency\": pl.String,\n",
        "        \"share_qty\": pl.Float64,\n",
        "        \"nav\": pl.Float64,\n",
        "    },\n",
        ")\n",
        "\n",
        "portfolios_df.pivot(index=[\"date\", \"portfolio_id\"], on=\"asset_id\", values=\"nav\").fill_null(0)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "id": "b80b8359",
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "UploadCommitResult(version=1, committed_names=[])"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "portfolio_uploader.fast_commit(portfolios_df, mode=\"append\")"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "7d5dfb26",
      "metadata": {},
      "source": [
        "## Creating the Universe\n",
        "\n",
        "Note that below we could still use other filters in addition to the portfolio filter. This way we could for instance express *the energy sector within the Russell 3000*."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "id": "3b83a604",
      "metadata": {},
      "outputs": [],
      "source": [
        "universe_settings = UniverseSettings(\n",
        "    dataset=\"Bayesline-US-All-1y\",\n",
        "    portfolio_filter=PortfolioOrganizerSettings(\n",
        "        # filters against the point in time superset \n",
        "        # of all portfolios contained in `US-Portfolios`\n",
        "        enabled_portfolios=\"US-Portfolios\"\n",
        "    ),\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "id": "73b8a7ae",
      "metadata": {},
      "outputs": [],
      "source": [
        "universe_api = bln.equity.universes.load(universe_settings)"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "b6c628a4",
      "metadata": {},
      "source": [
        "When obtaining the universe data note how even though we only specified holdings for `2026-01-01` and `2026-02-01` (which could be rebalance dates) the resulting universe contains entries for each day. Underneath the holdings are forward filled and delisted assets are dropped."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "id": "3074fb96",
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead tr th {\n",
              "        text-align: left;\n",
              "    }\n",
              "\n",
              "    .dataframe thead tr:last-of-type th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr>\n",
              "      <th></th>\n",
              "      <th colspan=\"3\" halign=\"left\">value</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>bayesid</th>\n",
              "      <th>IC83A1B819</th>\n",
              "      <th>ICA17F00B9</th>\n",
              "      <th>ICF982536B</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2026-01-01</th>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-01-02</th>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-01-03</th>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-01-04</th>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-01-05</th>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-03-27</th>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-03-28</th>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-03-29</th>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-03-30</th>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2026-03-31</th>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>1.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>90 rows × 3 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "                value                      \n",
              "bayesid    IC83A1B819 ICA17F00B9 ICF982536B\n",
              "date                                       \n",
              "2026-01-01        1.0        1.0        1.0\n",
              "2026-01-02        1.0        1.0        1.0\n",
              "2026-01-03        1.0        1.0        1.0\n",
              "2026-01-04        1.0        1.0        1.0\n",
              "2026-01-05        1.0        1.0        1.0\n",
              "...               ...        ...        ...\n",
              "2026-03-27        1.0        NaN        1.0\n",
              "2026-03-28        1.0        NaN        1.0\n",
              "2026-03-29        1.0        NaN        1.0\n",
              "2026-03-30        1.0        NaN        1.0\n",
              "2026-03-31        1.0        NaN        1.0\n",
              "\n",
              "[90 rows x 3 columns]"
            ]
          },
          "execution_count": 9,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "universe_api.get().to_pandas().assign(value=1.).set_index([\"date\", \"bayesid\"]).unstack()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": ".venv",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.15"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}