mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-24 02:43:55 +00:00
289 lines
9.5 KiB
Python
289 lines
9.5 KiB
Python
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Scenario generation module for portfolio optimization.
|
|
|
|
Provides tools for generating synthetic financial data using Geometric Brownian Motion
|
|
and other stochastic processes. Used to create forward-looking scenarios for
|
|
risk assessment and portfolio optimization.
|
|
"""
|
|
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import pandas as pd
|
|
import seaborn as sns
|
|
|
|
|
|
class ForwardPathSimulator:
|
|
"""Generates synthetic forward paths for financial assets.
|
|
|
|
Uses Geometric Brownian Motion to simulate asset price paths based on
|
|
historical data calibration.
|
|
|
|
Parameters
|
|
----------
|
|
fitting_data : pd.DataFrame
|
|
Historical price data for calibration (dates x assets).
|
|
generation_dates : pd.DatetimeIndex or list
|
|
Date range for synthetic data generation.
|
|
n_paths : int
|
|
Number of scenarios/forward paths to generate.
|
|
method : str, default "log_gbm"
|
|
Generation method (currently only "log_gbm" supported).
|
|
|
|
Attributes
|
|
----------
|
|
fitting_data : pd.DataFrame
|
|
Historical data used for calibration.
|
|
dates : pd.DatetimeIndex or list
|
|
Generation date range.
|
|
n_steps : int
|
|
Number of time steps for simulation.
|
|
n_paths : int
|
|
Number of scenarios generated.
|
|
generation_method : str
|
|
Method used for generation.
|
|
simulated_paths : np.ndarray
|
|
Generated synthetic paths (n_paths x n_steps+1 x n_assets).
|
|
"""
|
|
|
|
def __init__(self, fitting_data, generation_dates, n_paths, method="log_gbm"):
|
|
"""Initialize scenario generator with data and parameters."""
|
|
self.fitting_data = fitting_data
|
|
self.dates = generation_dates
|
|
self.n_steps = len(generation_dates) - 1
|
|
self.n_paths = n_paths
|
|
self.generation_method = method.lower()
|
|
|
|
def generate(self, plot_paths=False, n_plots=0):
|
|
"""Generate synthetic forward paths.
|
|
|
|
Parameters
|
|
----------
|
|
plot_paths : bool, default False
|
|
Whether to plot generated paths.
|
|
n_plots : int, default 0
|
|
Number of paths to plot if plot_paths is True.
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
If generation method is not recognized.
|
|
"""
|
|
if self.generation_method == "log_gbm":
|
|
mu, sigma, L = self._calibrate_log_process()
|
|
self.simulated_paths = self._generate_via_log_gbm(mu, sigma, L)
|
|
else:
|
|
raise ValueError("Unrecognized generation method.")
|
|
|
|
if plot_paths:
|
|
self._plot_generated_paths(n_plots)
|
|
|
|
def _calibrate_log_process(self):
|
|
"""Calibrate log-normal process parameters from historical data.
|
|
|
|
Returns
|
|
-------
|
|
mu : np.ndarray
|
|
Drift parameters for each asset.
|
|
sigma : np.ndarray
|
|
Covariance matrix of log returns.
|
|
L : np.ndarray
|
|
Cholesky decomposition of covariance matrix.
|
|
"""
|
|
log_returns = np.log(self.fitting_data / self.fitting_data.shift(1)).dropna()
|
|
|
|
# Estimate covariance matrix of log returns
|
|
sigma = log_returns.cov().values
|
|
# Cholesky decomposition of the correlation matrix
|
|
L = np.linalg.cholesky(sigma).T
|
|
|
|
# Estimate drift
|
|
total_drift = log_returns.iloc[-1].values - log_returns.iloc[0].values
|
|
step_drift = total_drift / self.n_steps
|
|
mu = step_drift + 0.5 * np.sum(L**2, axis=1)
|
|
|
|
return mu, sigma, L
|
|
|
|
def _generate_via_log_gbm(self, mu, sigma, L, dt=1):
|
|
"""Generate paths using log-normal Geometric Brownian Motion.
|
|
|
|
Parameters
|
|
----------
|
|
mu : np.ndarray
|
|
Drift parameters for each asset.
|
|
sigma : np.ndarray
|
|
Covariance matrix of log returns.
|
|
L : np.ndarray
|
|
Cholesky decomposition of covariance matrix.
|
|
dt : float, default 1
|
|
Time step size.
|
|
|
|
Returns
|
|
-------
|
|
np.ndarray
|
|
Simulated paths (n_paths x n_steps+1 x n_assets).
|
|
"""
|
|
# Initial forward rates
|
|
|
|
last_rates = self.fitting_data.loc[
|
|
self.dates[0]
|
|
].values # set starting value as the start of the generation period
|
|
|
|
# Initialize an array for simulated paths
|
|
simulated_paths = np.zeros((self.n_paths, self.n_steps + 1, len(mu)))
|
|
|
|
current_rates = last_rates
|
|
simulated_paths[:, 0, :] = current_rates
|
|
Z = np.random.normal(size=(self.n_paths, self.n_steps, len(mu)))
|
|
dW = np.matmul(Z, L) * np.sqrt(dt)
|
|
|
|
for t in range(1, self.n_steps + 1):
|
|
# compute drift and diffusion
|
|
drift = (mu - 0.5 * np.diag(sigma) ** 2) * dt
|
|
diffusion = dW[:, t - 1, :]
|
|
|
|
# Simulate next step forward rates using GBM formula
|
|
simulated_paths[:, t, :] = simulated_paths[:, t - 1, :] * np.exp(
|
|
drift + diffusion
|
|
)
|
|
|
|
return simulated_paths
|
|
|
|
def _plot_generated_paths(self, n_plots):
|
|
"""Plot randomly selected generated paths.
|
|
|
|
Parameters
|
|
----------
|
|
n_plots : int
|
|
Number of paths to plot.
|
|
"""
|
|
# Assuming 'simulated_paths' is your array of simulated paths with shape
|
|
# (n_paths, n_steps, n_ccy_pairs)
|
|
n_paths = self.simulated_paths.shape[0]
|
|
_ = self.simulated_paths.shape[2] # n_ccy_pairs (unused)
|
|
|
|
# Randomly select indices for the scenarios to plot
|
|
random_indices = np.random.choice(n_paths, n_plots, replace=False)
|
|
plt.rcParams.update({"font.size": 8})
|
|
sns.set(rc={"figure.dpi": 100, "savefig.dpi": 300})
|
|
sns.set_palette(palette="tab10")
|
|
sns.set_style("white")
|
|
|
|
# Loop through each selected scenario and create a subplot
|
|
for i, idx in enumerate(random_indices):
|
|
plt.figure(i, figsize=(10, 7))
|
|
|
|
selected_paths = pd.DataFrame(
|
|
self.simulated_paths[idx, :, :],
|
|
index=self.fitting_data.index,
|
|
columns=self.fitting_data.columns,
|
|
)
|
|
|
|
selected_paths.plot()
|
|
|
|
plt.title(f"Scenario {i + 1} - Path {idx + 1}")
|
|
plt.xticks(rotation=50, fontsize=8)
|
|
|
|
plt.ylabel("Forward Rate")
|
|
plt.legend()
|
|
|
|
plt.show()
|
|
|
|
def get_simulated_paths_ccy_pair(self, ccy_pair):
|
|
"""Extract simulated paths for a specific asset.
|
|
|
|
Parameters
|
|
----------
|
|
ccy_pair : str
|
|
Asset identifier to extract paths for.
|
|
|
|
Returns
|
|
-------
|
|
pd.DataFrame
|
|
Simulated paths for the specified asset (dates x n_paths).
|
|
"""
|
|
ccy_pair_idx = list(self.fitting_data.columns).index(ccy_pair)
|
|
simulated_paths_ccy_pair = self.simulated_paths[:, :, ccy_pair_idx]
|
|
simulated_paths_dataframe = pd.DataFrame(
|
|
simulated_paths_ccy_pair, index=self.dates
|
|
)
|
|
|
|
return simulated_paths_dataframe
|
|
|
|
|
|
def generate_synthetic_stock_data(
|
|
dataset_directory, num_synthetic, fit_range, generate_range
|
|
):
|
|
"""Generate synthetic stock data using Geometric Brownian Motion.
|
|
|
|
Fits GBM parameters to historical data from one period and generates
|
|
synthetic time series for another period.
|
|
|
|
Parameters
|
|
----------
|
|
dataset_directory : str
|
|
Path to CSV file containing historical stock data.
|
|
num_synthetic : int
|
|
Multiplier for synthetic stocks. Total synthetic stocks will be
|
|
num_synthetic * num_assets.
|
|
fit_range : tuple of str
|
|
Start and end dates for calibration period (start, end).
|
|
generate_range : tuple of str
|
|
Start and end dates for generation period (start, end).
|
|
|
|
Returns
|
|
-------
|
|
pd.DataFrame
|
|
Combined dataset with original and synthetic stock data.
|
|
Synthetic columns are named as 'ticker-idx' where idx is the
|
|
path number.
|
|
"""
|
|
input_data = pd.read_csv(dataset_directory, index_col=0)
|
|
fit_data = input_data.loc[fit_range[0] : fit_range[1]]
|
|
n_assets = len(fit_data.columns)
|
|
generate_time_range = input_data.loc[generate_range[0] : generate_range[1]].index
|
|
|
|
scen_gen = ForwardPathSimulator(
|
|
fitting_data=fit_data,
|
|
generation_dates=generate_time_range,
|
|
n_paths=num_synthetic,
|
|
method="log_gbm",
|
|
)
|
|
|
|
scen_gen.generate()
|
|
|
|
synthetic_data = scen_gen.simulated_paths.transpose(1, 0, 2).reshape(
|
|
scen_gen.n_steps + 1, (scen_gen.n_paths * n_assets)
|
|
)
|
|
|
|
tickers_list = list(input_data.columns)
|
|
|
|
synthetic_dataframe = pd.DataFrame(synthetic_data, index=generate_time_range)
|
|
|
|
augmented_data = pd.concat(
|
|
[input_data.loc[generate_range[0] : generate_range[1]], synthetic_dataframe],
|
|
axis=1,
|
|
)
|
|
columns = [
|
|
ticker + "-" + str(idx)
|
|
for idx in range(scen_gen.n_paths)
|
|
for ticker in tickers_list
|
|
]
|
|
tickers_list += columns
|
|
augmented_data.columns = tickers_list
|
|
|
|
return augmented_data
|