mirror of
https://github.com/NVIDIA/dgx-spark-playbooks.git
synced 2026-04-23 18:33:54 +00:00
1718 lines
62 KiB
Python
1718 lines
62 KiB
Python
|
|
# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
|
||
|
|
# SPDX-License-Identifier: Apache-2.0
|
||
|
|
#
|
||
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
# you may not use this file except in compliance with the License.
|
||
|
|
# You may obtain a copy of the License at
|
||
|
|
#
|
||
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
#
|
||
|
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
# See the License for the specific language governing permissions and
|
||
|
|
# limitations under the License.
|
||
|
|
|
||
|
|
import os
|
||
|
|
import time
|
||
|
|
from typing import Union
|
||
|
|
|
||
|
|
import matplotlib.pyplot as plt
|
||
|
|
import numpy as np
|
||
|
|
import pandas as pd
|
||
|
|
import seaborn as sns
|
||
|
|
import sklearn.neighbors
|
||
|
|
|
||
|
|
from . import scenario_generation, utils
|
||
|
|
from .cvar_data import CvarData
|
||
|
|
from .cvar_parameters import CvarParameters
|
||
|
|
from .portfolio import Portfolio
|
||
|
|
|
||
|
|
# Note: cvar_optimizer and cuml are imported lazily within functions to avoid
|
||
|
|
# circular imports and loading CUDA libraries at module import time
|
||
|
|
|
||
|
|
def generate_samples_kde(
|
||
|
|
num_scen: int,
|
||
|
|
returns_data: np.ndarray,
|
||
|
|
kde_settings: dict = None,
|
||
|
|
verbose: bool = False,
|
||
|
|
):
|
||
|
|
"""Fit KernelDensity to data and return new samples.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
num_scen (int): Number of scenarios to generate.
|
||
|
|
returns_data (np.ndarray): Historical returns data for fitting.
|
||
|
|
kde_settings (dict, optional): Dictionary containing KDE settings. Defaults to None.
|
||
|
|
verbose (bool, optional): Whether to print verbose output. Defaults to False.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
np.ndarray: Array of generated samples with shape (num_scen, n_features).
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
ValueError: If device is not "CPU" or "GPU".
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> import numpy as np
|
||
|
|
>>> # Historical returns for 3 assets over 100 days
|
||
|
|
>>> returns_data = np.random.randn(100, 3) * 0.02
|
||
|
|
>>> # Generate 50 new scenarios using KDE
|
||
|
|
>>> new_scenarios = generate_samples_kde(
|
||
|
|
... num_scen=50,
|
||
|
|
... returns_data=returns_data,
|
||
|
|
... bandwidth=0.01,
|
||
|
|
... kernel="gaussian",
|
||
|
|
... device="CPU"
|
||
|
|
... )
|
||
|
|
>>> print(new_scenarios.shape) # (50, 3)
|
||
|
|
"""
|
||
|
|
kde_device = kde_settings["device"]
|
||
|
|
bandwidth = kde_settings["bandwidth"]
|
||
|
|
kernel = kde_settings["kernel"]
|
||
|
|
|
||
|
|
if kde_device == "CPU":
|
||
|
|
kde = sklearn.neighbors.KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(
|
||
|
|
returns_data
|
||
|
|
)
|
||
|
|
new_samples = kde.sample(num_scen)
|
||
|
|
if verbose:
|
||
|
|
print("KDE fitting on CPU")
|
||
|
|
|
||
|
|
elif kde_device == "GPU":
|
||
|
|
# Lazy import to avoid loading CUDA libraries on module import
|
||
|
|
import cuml.neighbors
|
||
|
|
|
||
|
|
kde = cuml.neighbors.KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(
|
||
|
|
returns_data
|
||
|
|
)
|
||
|
|
new_samples = kde.sample(num_scen).get() # convert to numpy array
|
||
|
|
if verbose:
|
||
|
|
print("KDE fitting on GPU")
|
||
|
|
|
||
|
|
else:
|
||
|
|
raise ValueError("Invalid Device: CPU or GPU!")
|
||
|
|
|
||
|
|
return new_samples
|
||
|
|
|
||
|
|
|
||
|
|
def generate_cvar_data(returns_dict: dict, scenario_generation_settings: dict):
|
||
|
|
"""Generate CvarData dataclass for CVaR optimization.
|
||
|
|
|
||
|
|
This function creates the CvarData dataclass containing scenarios and probabilities
|
||
|
|
based on the specified fit type (Gaussian, KDE, or historical).
|
||
|
|
|
||
|
|
Args:
|
||
|
|
returns_dict (dict): Dictionary containing returns data with mean,
|
||
|
|
covariance, and returns.
|
||
|
|
scenario_generation_settings (dict): Dictionary containing scenario generation settings including
|
||
|
|
fit_type and num_scen.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
dict: Updated returns_dict with added 'cvar_data' containing
|
||
|
|
CvarData dataclass.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
ValueError: If fit_type is not "gaussian", "kde", or "no_fit".
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> import numpy as np
|
||
|
|
>>> # Prepare returns data
|
||
|
|
>>> returns_dict = {
|
||
|
|
... "mean": np.array([0.08, 0.10, 0.12]),
|
||
|
|
... "covariance": np.eye(3) * 0.01,
|
||
|
|
... "returns": np.random.randn(100, 3) * 0.02
|
||
|
|
... }
|
||
|
|
>>> scenario_generation_settings = {"num_scen": 50, "fit_type": "gaussian"}
|
||
|
|
>>> result = generate_CVaR_data(returns_dict, scenario_generation_settings)
|
||
|
|
>>> print(type(result["cvar_data"]))
|
||
|
|
<class 'cufolio.cvar_data.CvarData'>
|
||
|
|
>>> print(result["cvar_data"].R.shape) # (3, 50)
|
||
|
|
"""
|
||
|
|
|
||
|
|
return_mean = returns_dict["mean"]
|
||
|
|
returns_data = returns_dict["returns"].to_numpy()
|
||
|
|
num_scen = scenario_generation_settings["num_scen"]
|
||
|
|
fit_type = scenario_generation_settings["fit_type"]
|
||
|
|
|
||
|
|
if "kde_settings" in scenario_generation_settings:
|
||
|
|
kde_settings = scenario_generation_settings["kde_settings"]
|
||
|
|
else:
|
||
|
|
kde_settings = {"device": "CPU", "bandwidth": 0.05, "kernel": "gaussian"}
|
||
|
|
|
||
|
|
if fit_type == "gaussian": # Gaussian distribution
|
||
|
|
covariance = returns_dict["covariance"]
|
||
|
|
R_log = np.random.multivariate_normal(return_mean, covariance, size=num_scen)
|
||
|
|
R = np.transpose(R_log)
|
||
|
|
p = np.ones(num_scen) / num_scen # probability of each scenario
|
||
|
|
|
||
|
|
elif fit_type == "kde": # kde distribution
|
||
|
|
R_log = generate_samples_kde(
|
||
|
|
num_scen,
|
||
|
|
returns_data,
|
||
|
|
kde_settings=kde_settings,
|
||
|
|
verbose=scenario_generation_settings["verbose"],
|
||
|
|
)
|
||
|
|
R = np.transpose(R_log)
|
||
|
|
p = np.ones(num_scen) / num_scen # probability of each scenario
|
||
|
|
|
||
|
|
elif fit_type == "no_fit": # use input data directly
|
||
|
|
R = np.transpose(returns_data)
|
||
|
|
num_scen = R.shape[1]
|
||
|
|
p = np.ones(num_scen) / num_scen
|
||
|
|
|
||
|
|
else:
|
||
|
|
raise ValueError("Unsupported fit type: must be from gaussian, kde, or no_fit.")
|
||
|
|
|
||
|
|
cvar_data = CvarData(mean=return_mean, R=R, p=p)
|
||
|
|
|
||
|
|
returns_dict["cvar_data"] = cvar_data
|
||
|
|
|
||
|
|
return returns_dict
|
||
|
|
|
||
|
|
|
||
|
|
def optimize_market_regimes(
|
||
|
|
input_file_name: str,
|
||
|
|
returns_compute_settings: dict,
|
||
|
|
scenario_generation_settings: dict,
|
||
|
|
all_regimes: dict,
|
||
|
|
cvar_params: CvarParameters,
|
||
|
|
solver_settings_list: list[dict],
|
||
|
|
results_csv_file_name: str = None,
|
||
|
|
num_synthetic: int = 0,
|
||
|
|
print_results: bool = True,
|
||
|
|
):
|
||
|
|
"""
|
||
|
|
Compare CVaR optimization performance across different regimes and solvers.
|
||
|
|
|
||
|
|
Tests multiple solvers across different market regimes and collects
|
||
|
|
performance metrics.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
input_file_name (str): Path to input data file.
|
||
|
|
returns_compute_settings (dict): Dictionary containing returns calculation settings.
|
||
|
|
scenario_generation_settings (dict): Dictionary containing scenario generation settings.
|
||
|
|
all_regimes (dict): Dictionary of regimes to test with format
|
||
|
|
{'regime_name': regime_range}.
|
||
|
|
cvar_params (CvarParameters): CVaR optimization parameters.
|
||
|
|
solver_settings_list (list[dict]): List of solver settings to test.
|
||
|
|
Each dict contains solver-specific settings
|
||
|
|
(e.g., {'solver': cp.CLARABEL, 'verbose': False}).
|
||
|
|
results_csv_file_name (str, optional): CSV filename to save results.
|
||
|
|
Defaults to None.
|
||
|
|
num_synthetic (int, optional): Number of synthetic data copies to generate.
|
||
|
|
0 means no generation. Defaults to 0.
|
||
|
|
print_results (bool, optional): Whether to print optimization results.
|
||
|
|
Defaults to True.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
pd.DataFrame: Results dataframe with columns:
|
||
|
|
- 'regime': Regime name
|
||
|
|
- '{solver_name}-obj': Objective value for each solver
|
||
|
|
- '{solver_name}-solve_time': Solve time for each solver
|
||
|
|
- '{solver_name}-optimal_portfolio': Optimal portfolio for each solver
|
||
|
|
- '{solver_name}-return': Expected return for each solver
|
||
|
|
- '{solver_name}-CVaR': CVaR value for each solver
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
FileNotFoundError: If problem_from_folder doesn't exist.
|
||
|
|
ValueError: If solver_settings_list is empty.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> solver_settings_list = [
|
||
|
|
... {'solver': cp.CLARABEL, 'verbose': False},
|
||
|
|
... {'solver': cp.HIGHS, 'verbose': False},
|
||
|
|
... ]
|
||
|
|
>>> results = optimize_market_regimes(
|
||
|
|
... 'sp500.csv', 'LOG', all_regimes, cvar_params, solver_settings_list
|
||
|
|
... )
|
||
|
|
"""
|
||
|
|
from . import cvar_optimizer # Lazy import
|
||
|
|
|
||
|
|
if len(solver_settings_list) == 0:
|
||
|
|
raise ValueError("Please provide at least one solver settings!")
|
||
|
|
|
||
|
|
# Helper function to extract solver name from settings
|
||
|
|
def get_solver_name(settings):
|
||
|
|
"""Extract solver name from solver settings dict."""
|
||
|
|
if "solver" in settings:
|
||
|
|
# CVXPY solver - extract name from solver object
|
||
|
|
solver_obj = settings["solver"]
|
||
|
|
return str(solver_obj).replace("cp.", "").replace("solvers.", "")
|
||
|
|
else:
|
||
|
|
raise ValueError(f"Unsupported solver settings: {settings}")
|
||
|
|
|
||
|
|
# Build column names dynamically based on solvers
|
||
|
|
columns = ["regime"]
|
||
|
|
solver_names = []
|
||
|
|
for settings in solver_settings_list:
|
||
|
|
solver_name = get_solver_name(settings)
|
||
|
|
solver_names.append(solver_name)
|
||
|
|
columns.extend(
|
||
|
|
[
|
||
|
|
f"{solver_name}-obj",
|
||
|
|
f"{solver_name}-solve_time",
|
||
|
|
f"{solver_name}-return",
|
||
|
|
f"{solver_name}-CVaR",
|
||
|
|
f"{solver_name}-optimal_portfolio",
|
||
|
|
]
|
||
|
|
)
|
||
|
|
|
||
|
|
result_rows = []
|
||
|
|
|
||
|
|
for regime_name, regime_range in all_regimes.items():
|
||
|
|
print("=" * 70)
|
||
|
|
print(f"Processing Regime: {regime_name}")
|
||
|
|
print("=" * 70)
|
||
|
|
|
||
|
|
# Create synthetic datasets on the fly if requested
|
||
|
|
input_data_directory = (
|
||
|
|
create_synthetic_stock_dataset(
|
||
|
|
input_file_name, regime_name, regime_range, num_synthetic
|
||
|
|
)
|
||
|
|
if num_synthetic > 0
|
||
|
|
else input_file_name
|
||
|
|
)
|
||
|
|
|
||
|
|
# create the returns_dict for the current regime
|
||
|
|
curr_regime = {"name": regime_name, "range": regime_range}
|
||
|
|
returns_dict = utils.calculate_returns(
|
||
|
|
input_data_directory, curr_regime, returns_compute_settings
|
||
|
|
)
|
||
|
|
returns_dict = generate_cvar_data(returns_dict, scenario_generation_settings)
|
||
|
|
|
||
|
|
# Initialize result row for this regime
|
||
|
|
result_row = {"regime": regime_name}
|
||
|
|
|
||
|
|
# Solve with each solver
|
||
|
|
for idx, solver_settings in enumerate(solver_settings_list):
|
||
|
|
solver_name = solver_names[idx]
|
||
|
|
print(f"\n--- Testing Solver: {solver_name} ---")
|
||
|
|
|
||
|
|
# Set up optimization problem
|
||
|
|
cvar_problem = cvar_optimizer.CVaR(
|
||
|
|
returns_dict=returns_dict, cvar_params=cvar_params
|
||
|
|
)
|
||
|
|
|
||
|
|
# Solve optimization problem
|
||
|
|
try:
|
||
|
|
result, portfolio = cvar_problem.solve_optimization_problem(
|
||
|
|
solver_settings, print_results=print_results
|
||
|
|
)
|
||
|
|
|
||
|
|
# Store results with solver-specific column names
|
||
|
|
result_row[f"{solver_name}-obj"] = result["obj"]
|
||
|
|
result_row[f"{solver_name}-solve_time"] = result["solve time"]
|
||
|
|
result_row[f"{solver_name}-return"] = result["return"]
|
||
|
|
result_row[f"{solver_name}-CVaR"] = result["CVaR"]
|
||
|
|
result_row[f"{solver_name}-optimal_portfolio"] = portfolio.print_clean(
|
||
|
|
verbose=False
|
||
|
|
)
|
||
|
|
|
||
|
|
print(
|
||
|
|
f" ✓ {solver_name} - Objective: {result['obj']:.6f}, "
|
||
|
|
f"Time: {result['solve time']:.4f}s"
|
||
|
|
f"--------------------------------"
|
||
|
|
)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f" ✗ {solver_name} failed: {str(e)}")
|
||
|
|
# Store None for failed solvers
|
||
|
|
result_row[f"{solver_name}-obj"] = None
|
||
|
|
result_row[f"{solver_name}-solve_time"] = None
|
||
|
|
result_row[f"{solver_name}-return"] = None
|
||
|
|
result_row[f"{solver_name}-CVaR"] = None
|
||
|
|
result_row[f"{solver_name}-optimal_portfolio"] = None
|
||
|
|
|
||
|
|
# Add this regime's results to list
|
||
|
|
result_rows.append(result_row)
|
||
|
|
|
||
|
|
# Create DataFrame from collected rows
|
||
|
|
result_dataframe = pd.DataFrame(result_rows, columns=columns)
|
||
|
|
|
||
|
|
print("\n" + "=" * 70)
|
||
|
|
print("Optimization Complete!")
|
||
|
|
print("=" * 70)
|
||
|
|
print("\n")
|
||
|
|
|
||
|
|
if results_csv_file_name:
|
||
|
|
result_dataframe.to_csv(results_csv_file_name, index=False)
|
||
|
|
print(f"Results saved to: {results_csv_file_name}")
|
||
|
|
|
||
|
|
return result_dataframe
|
||
|
|
|
||
|
|
|
||
|
|
def create_synthetic_stock_dataset(
|
||
|
|
training_directory: str, regime_name: str, regime_range: tuple, num_synthetic: int
|
||
|
|
):
|
||
|
|
"""Create synthetic stock dataset based on training data.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
training_directory (str): Path to the training data directory.
|
||
|
|
regime_name (str): Name of the market regime.
|
||
|
|
regime_range (tuple): Date range for the regime (start_date, end_date).
|
||
|
|
num_synthetic (int): Number of synthetic datasets to generate.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
str: Path to the saved synthetic dataset file.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
ValueError: If num_synthetic is less than or equal to 0.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> training_dir = "data/stock_data/sp500.csv"
|
||
|
|
>>> regime = "bull_market"
|
||
|
|
>>> date_range = ("2020-01-01", "2021-12-31")
|
||
|
|
>>> save_path = create_synthetic_stock_dataset(
|
||
|
|
... training_dir,
|
||
|
|
... regime,
|
||
|
|
... date_range,
|
||
|
|
... num_synthetic=100
|
||
|
|
... )
|
||
|
|
>>> print(save_path) # data/stock_data/synthetic-bull_market-size_500.csv
|
||
|
|
"""
|
||
|
|
if num_synthetic <= 0:
|
||
|
|
raise ValueError("Please provide a valid integer for num_synthetic!")
|
||
|
|
|
||
|
|
synthetic_data = scenario_generation.generate_synthetic_stock_data(
|
||
|
|
dataset_directory=training_directory,
|
||
|
|
num_synthetic=num_synthetic,
|
||
|
|
fit_range=regime_range,
|
||
|
|
generate_range=regime_range,
|
||
|
|
)
|
||
|
|
dataset_size = len(synthetic_data.columns)
|
||
|
|
|
||
|
|
save_name = "synthetic-" + regime_name + f"-size_{dataset_size}.csv"
|
||
|
|
save_path = os.path.join(os.path.dirname(training_directory), save_name)
|
||
|
|
synthetic_data.to_csv(save_path)
|
||
|
|
|
||
|
|
return save_path
|
||
|
|
|
||
|
|
|
||
|
|
def evaluate_portfolio_performance(
|
||
|
|
cvar_data: CvarData,
|
||
|
|
portfolio: Portfolio,
|
||
|
|
confidence_level: float,
|
||
|
|
covariance: np.ndarray,
|
||
|
|
):
|
||
|
|
"""Evaluate performance metrics for a given portfolio.
|
||
|
|
|
||
|
|
Calculates expected return, variance, and CVaR for a non-optimized portfolio
|
||
|
|
based on the provided CVaR data and confidence level.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
cvar_data (CvarData): CVaR data containing mean returns and scenarios.
|
||
|
|
portfolio (Portfolio): Portfolio object with weights and other attributes.
|
||
|
|
confidence_level (float): Confidence level for CVaR calculation
|
||
|
|
(e.g., 0.95).
|
||
|
|
covariance (np.ndarray): Covariance matrix of asset returns.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
dict: Dictionary containing portfolio performance metrics with keys:
|
||
|
|
- 'portfolio': Portfolio object
|
||
|
|
- 'return': Expected portfolio return
|
||
|
|
- 'variance': Portfolio variance
|
||
|
|
- 'CVaR': Conditional Value at Risk
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> import numpy as np
|
||
|
|
>>> cvar_data = CvarData(
|
||
|
|
... mean=np.array([0.08, 0.10, 0.12]),
|
||
|
|
... R=np.random.randn(3, 100),
|
||
|
|
... p=np.ones(100) / 100
|
||
|
|
... )
|
||
|
|
>>> portfolio = Portfolio(tickers=["AAPL", "GOOGL", "MSFT"])
|
||
|
|
>>> portfolio.weights = np.array([0.3, 0.4, 0.3])
|
||
|
|
>>> covariance = np.eye(3) * 0.01
|
||
|
|
>>> performance = evaluate_portfolio_performance(
|
||
|
|
... cvar_data, portfolio, 0.95, covariance
|
||
|
|
... )
|
||
|
|
>>> print(f"Return: {performance['return']:.4f}")
|
||
|
|
>>> print(f"CVaR: {performance['CVaR']:.4f}")
|
||
|
|
"""
|
||
|
|
portfolio_expected_return = portfolio.calculate_portfolio_expected_return(
|
||
|
|
cvar_data.mean
|
||
|
|
)
|
||
|
|
portfolio_variance = portfolio.calculate_portfolio_variance(covariance)
|
||
|
|
portfolio_CVaR = compute_CVaR(cvar_data, portfolio.weights, confidence_level)
|
||
|
|
|
||
|
|
return {
|
||
|
|
"portfolio": portfolio,
|
||
|
|
"return": portfolio_expected_return,
|
||
|
|
"variance": portfolio_variance,
|
||
|
|
"CVaR": portfolio_CVaR,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def compute_CVaR(cvar_data: CvarData, weights: np.ndarray, confidence_level: float):
|
||
|
|
"""Compute the Conditional Value at Risk (CVaR) of a portfolio.
|
||
|
|
|
||
|
|
Calculates the expected value of losses beyond the Value at Risk (VaR)
|
||
|
|
threshold for a given confidence level.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
cvar_data (CvarData): CVaR data containing scenarios and probabilities.
|
||
|
|
weights (np.ndarray): Portfolio weights vector.
|
||
|
|
confidence_level (float): Confidence level for CVaR calculation (e.g., 0.95).
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
float: CVaR value representing the expected loss beyond VaR.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> import numpy as np
|
||
|
|
>>> cvar_data = CvarData(
|
||
|
|
... mean=np.array([0.08, 0.10, 0.12]),
|
||
|
|
... R=np.random.randn(3, 1000),
|
||
|
|
... p=np.ones(1000) / 1000
|
||
|
|
... )
|
||
|
|
>>> weights = np.array([0.4, 0.3, 0.3])
|
||
|
|
>>> cvar_95 = compute_CVaR(cvar_data, weights, 0.95)
|
||
|
|
>>> print(f"95% CVaR: {cvar_95:.4f}")
|
||
|
|
95% CVaR: 0.0234
|
||
|
|
"""
|
||
|
|
portfolio_returns = cvar_data.R.T @ weights
|
||
|
|
VaR = np.percentile(portfolio_returns, (1 - confidence_level) * 100)
|
||
|
|
tail_loss = portfolio_returns[portfolio_returns <= VaR]
|
||
|
|
CVaR = np.abs(np.mean(tail_loss))
|
||
|
|
|
||
|
|
return CVaR
|
||
|
|
|
||
|
|
|
||
|
|
def evaluate_single_asset_portfolios(cvar_problem):
|
||
|
|
"""Create DataFrame with performance metrics for single-asset portfolios.
|
||
|
|
|
||
|
|
Evaluates the performance of portfolios where each portfolio consists of
|
||
|
|
only one asset (stock) at maximum allowed weight.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
cvar_problem: CVaR optimization problem object containing data,
|
||
|
|
parameters, and ticker information.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
pd.DataFrame: DataFrame with index as ticker symbols and columns:
|
||
|
|
['portfolio', 'return', 'variance', 'CVaR'] containing performance
|
||
|
|
metrics for each single-asset portfolio.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> from . import cvar_optimizer
|
||
|
|
>>> # Assuming cvar_problem is already set up
|
||
|
|
>>> single_asset_df = evaluate_single_asset_portfolios(cvar_problem)
|
||
|
|
>>> print(single_asset_df.head())
|
||
|
|
portfolio return variance CVaR
|
||
|
|
AAPL AAPL_single 0.1200 0.0400 0.0560
|
||
|
|
GOOGL GOOGL_single 0.1500 0.0500 0.0680
|
||
|
|
MSFT MSFT_single 0.1100 0.0350 0.0520
|
||
|
|
"""
|
||
|
|
|
||
|
|
single_asset_portfolio_performance = pd.DataFrame(
|
||
|
|
index=cvar_problem.tickers,
|
||
|
|
columns=["portfolio", "return", "variance", "CVaR"],
|
||
|
|
)
|
||
|
|
|
||
|
|
for ticker_idx, ticker in enumerate(cvar_problem.tickers):
|
||
|
|
portfolio_name = ticker + "_single_portfolio"
|
||
|
|
weights_dict = {ticker: cvar_problem.params.w_max[ticker_idx]}
|
||
|
|
cash = 1 - cvar_problem.params.w_max[ticker_idx]
|
||
|
|
|
||
|
|
portfolio = Portfolio(
|
||
|
|
tickers=cvar_problem.tickers, time_range=cvar_problem.regime_range
|
||
|
|
)
|
||
|
|
portfolio.portfolio_from_dict(portfolio_name, weights_dict, cash)
|
||
|
|
|
||
|
|
portfolio_performance = evaluate_portfolio_performance(
|
||
|
|
cvar_problem.data,
|
||
|
|
portfolio,
|
||
|
|
cvar_problem.params.confidence,
|
||
|
|
cvar_problem.covariance,
|
||
|
|
)
|
||
|
|
# Assign each column explicitly to avoid dtype inference issues
|
||
|
|
single_asset_portfolio_performance.loc[ticker, "portfolio"] = (
|
||
|
|
portfolio_performance["portfolio"]
|
||
|
|
)
|
||
|
|
single_asset_portfolio_performance.loc[ticker, "return"] = (
|
||
|
|
portfolio_performance["return"]
|
||
|
|
)
|
||
|
|
single_asset_portfolio_performance.loc[ticker, "variance"] = (
|
||
|
|
portfolio_performance["variance"]
|
||
|
|
)
|
||
|
|
single_asset_portfolio_performance.loc[ticker, "CVaR"] = portfolio_performance[
|
||
|
|
"CVaR"
|
||
|
|
]
|
||
|
|
|
||
|
|
return single_asset_portfolio_performance
|
||
|
|
|
||
|
|
|
||
|
|
def generate_user_input_portfolios(
|
||
|
|
portfolios_dict: dict, returns_dict: dict, existing_portfolios: list = None
|
||
|
|
):
|
||
|
|
"""Create Portfolio objects from user input dictionaries.
|
||
|
|
|
||
|
|
Converts user-provided portfolio specifications into Portfolio objects
|
||
|
|
and adds them to existing portfolios list.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
portfolios_dict (dict): Dictionary of portfolio specifications with format:
|
||
|
|
{portfolio_name: (weight_dict, cash_amount)}
|
||
|
|
returns_dict (dict): Dictionary containing returns data and ticker
|
||
|
|
information.
|
||
|
|
existing_portfolios (list or pd.DataFrame, optional): Existing portfolios
|
||
|
|
to append to. Can be a list of Portfolio objects or DataFrame with
|
||
|
|
'portfolio' column. Defaults to empty list.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
list: List of Portfolio objects including existing and newly created
|
||
|
|
portfolios.
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
ValueError: If existing_portfolios type is not supported
|
||
|
|
(must be list or DataFrame).
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> portfolios_dict = {
|
||
|
|
... "Tech_Heavy": ({"AAPL": 0.4, "GOOGL": 0.3, "MSFT": 0.2}, 0.1),
|
||
|
|
... "Equal_Weight": ({"AAPL": 0.33, "GOOGL": 0.33, "MSFT": 0.34}, 0.0)
|
||
|
|
... }
|
||
|
|
>>> returns_dict = {
|
||
|
|
... "tickers": ["AAPL", "GOOGL", "MSFT"],
|
||
|
|
... "regime": {"range": ("2020-01-01", "2021-12-31")}
|
||
|
|
... }
|
||
|
|
>>> portfolios = generate_user_input_portfolios(portfolios_dict, returns_dict)
|
||
|
|
"""
|
||
|
|
if existing_portfolios is None:
|
||
|
|
existing_portfolios = []
|
||
|
|
|
||
|
|
if isinstance(existing_portfolios, pd.DataFrame):
|
||
|
|
if not existing_portfolios.empty:
|
||
|
|
existing_portfolios = existing_portfolios["portfolio"].tolist()
|
||
|
|
else:
|
||
|
|
existing_portfolios = []
|
||
|
|
|
||
|
|
elif isinstance(existing_portfolios, list):
|
||
|
|
pass
|
||
|
|
else:
|
||
|
|
raise ValueError(
|
||
|
|
"Existing portfolios type not supported - it has to be a list of "
|
||
|
|
"Portfolios or a DataFrame with portfolio performance."
|
||
|
|
)
|
||
|
|
|
||
|
|
for portfolio_name, portfolio_tuple in portfolios_dict.items():
|
||
|
|
weights_dict, cash = portfolio_tuple
|
||
|
|
portfolio = Portfolio(
|
||
|
|
tickers=returns_dict["tickers"], time_range=returns_dict["regime"]["range"]
|
||
|
|
)
|
||
|
|
portfolio.portfolio_from_dict(portfolio_name, weights_dict, cash)
|
||
|
|
|
||
|
|
existing_portfolios.append(portfolio)
|
||
|
|
|
||
|
|
return existing_portfolios
|
||
|
|
|
||
|
|
|
||
|
|
def evaluate_user_input_portfolios(
|
||
|
|
cvar_problem,
|
||
|
|
portfolios_dict: dict,
|
||
|
|
returns_dict: dict,
|
||
|
|
custom_portfolios=None,
|
||
|
|
):
|
||
|
|
"""Create DataFrame of portfolios with performance metrics.
|
||
|
|
|
||
|
|
Evaluates user-provided portfolios and creates a DataFrame containing
|
||
|
|
performance metrics for efficient frontier plotting or comparison.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
cvar_problem: CVaR optimization problem object containing data
|
||
|
|
and parameters.
|
||
|
|
portfolios_dict (dict): Dictionary of portfolio specifications with format:
|
||
|
|
{portfolio_name: (weight_dict, cash_amount)}
|
||
|
|
returns_dict (dict): Dictionary containing returns data and ticker
|
||
|
|
information.
|
||
|
|
custom_portfolios (pd.DataFrame, optional): Existing custom portfolios
|
||
|
|
DataFrame. Must have columns: ['portfolio_name', 'portfolio',
|
||
|
|
'return', 'variance', 'CVaR']. Defaults to None.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
pd.DataFrame: DataFrame containing portfolio performance metrics
|
||
|
|
for all portfolios.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> portfolios_dict = {
|
||
|
|
... "Conservative": ({"AAPL": 0.2, "GOOGL": 0.2}, 0.6),
|
||
|
|
... "Aggressive": ({"AAPL": 0.5, "GOOGL": 0.4, "MSFT": 0.1}, 0.0)
|
||
|
|
... }
|
||
|
|
>>> performance_df = evaluate_user_input_portfolios(
|
||
|
|
... cvar_problem, portfolios_dict, returns_dict
|
||
|
|
... )
|
||
|
|
>>> print(performance_df[['portfolio_name', 'return', 'CVaR']])
|
||
|
|
portfolio_name return CVaR
|
||
|
|
0 Conservative 0.0850 0.0320
|
||
|
|
1 Aggressive 0.1150 0.0580
|
||
|
|
"""
|
||
|
|
if custom_portfolios is None:
|
||
|
|
custom_portfolios = pd.DataFrame(
|
||
|
|
[], columns=["portfolio_name", "portfolio", "return", "variance", "CVaR"]
|
||
|
|
)
|
||
|
|
|
||
|
|
existing_portfolios = generate_user_input_portfolios(
|
||
|
|
portfolios_dict, returns_dict, custom_portfolios
|
||
|
|
)
|
||
|
|
|
||
|
|
for portfolio in existing_portfolios:
|
||
|
|
portfolio_performance = evaluate_portfolio_performance(
|
||
|
|
cvar_problem.data,
|
||
|
|
portfolio,
|
||
|
|
cvar_problem.params.confidence,
|
||
|
|
cvar_problem.covariance,
|
||
|
|
)
|
||
|
|
portfolio_performance["portfolio_name"] = portfolio.name
|
||
|
|
|
||
|
|
portfolio_dataframe = (
|
||
|
|
pd.Series(portfolio_performance, index=custom_portfolios.columns)
|
||
|
|
.to_frame()
|
||
|
|
.T
|
||
|
|
)
|
||
|
|
if custom_portfolios.shape[0] > 0:
|
||
|
|
if portfolio.name not in custom_portfolios["portfolio_name"].values:
|
||
|
|
custom_portfolios = pd.concat(
|
||
|
|
[custom_portfolios, portfolio_dataframe], ignore_index=False
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
print(
|
||
|
|
f"{portfolio_dataframe['portfolio_name'].values} already "
|
||
|
|
"exists or please change to a different portfolio name."
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
custom_portfolios = portfolio_dataframe
|
||
|
|
|
||
|
|
custom_portfolios.reset_index(drop=True, inplace=True)
|
||
|
|
|
||
|
|
return custom_portfolios
|
||
|
|
|
||
|
|
|
||
|
|
def create_efficient_frontier(
|
||
|
|
returns_dict: dict,
|
||
|
|
cvar_params: CvarParameters,
|
||
|
|
solver_settings: dict,
|
||
|
|
notional: float = 1e7,
|
||
|
|
figsize: tuple = (12, 8),
|
||
|
|
style: str = "publication",
|
||
|
|
color_scheme: str = "modern",
|
||
|
|
ra_num: int = 25,
|
||
|
|
min_risk_aversion: float = -3,
|
||
|
|
max_risk_aversion: float = 1,
|
||
|
|
custom_portfolios_dict: dict = None,
|
||
|
|
benchmark_portfolios: bool = True,
|
||
|
|
show_discretized_portfolios: bool = True,
|
||
|
|
discretization_params: dict = None,
|
||
|
|
save_path: str = None,
|
||
|
|
title: str = None,
|
||
|
|
print_portfolio_results: bool = False,
|
||
|
|
show_plot: bool = True,
|
||
|
|
dpi: int = 300,
|
||
|
|
) -> tuple:
|
||
|
|
"""Create an efficient frontier plot with visualization features.
|
||
|
|
|
||
|
|
This function generates an efficient frontier plot with styling,
|
||
|
|
annotations, and portfolio analysis.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
returns_dict (dict): Dictionary containing returns data and ticker
|
||
|
|
information.
|
||
|
|
cvar_params (CvarParameters): CVaR optimization parameters.
|
||
|
|
solver_settings (dict): Solver configuration for optimization.
|
||
|
|
notional (float, optional): Notional amount (in USD) for scaling
|
||
|
|
returns display.
|
||
|
|
Defaults to 1e7 (10 million USD).
|
||
|
|
figsize (tuple, optional): Figure size (width, height). Defaults to (12, 8).
|
||
|
|
style (str, optional): Plot style ("publication", "presentation", "minimal").
|
||
|
|
Defaults to "publication".
|
||
|
|
color_scheme (str, optional): Color scheme ("modern", "classic", "vibrant").
|
||
|
|
Defaults to "modern".
|
||
|
|
ra_num (int, optional): Number of risk aversion levels. Defaults to 25.
|
||
|
|
min_risk_aversion (float, optional): Minimum risk aversion (log scale).
|
||
|
|
Defaults to -3.
|
||
|
|
max_risk_aversion (float, optional): Maximum risk aversion (log scale).
|
||
|
|
Defaults to 1.
|
||
|
|
custom_portfolios_dict (dict, optional): Custom portfolios to highlight.
|
||
|
|
Format: {name: (weights_dict, cash)}. Defaults to None.
|
||
|
|
benchmark_portfolios (bool, optional): Include benchmark portfolios
|
||
|
|
(min variance, max Sharpe, max return). Defaults to True.
|
||
|
|
show_discretized_portfolios (bool, optional): Show discretized
|
||
|
|
portfolio combinations. Defaults to True.
|
||
|
|
discretization_params (dict, optional): Parameters for discretized
|
||
|
|
portfolios. Dict with keys: weight_discretization, max_assets,
|
||
|
|
min_weight, max_weight, sum_to_one. Defaults to
|
||
|
|
{"weight_discretization": 10, "max_assets": 5}.
|
||
|
|
save_path (str, optional): Path to save the figure. Defaults to None.
|
||
|
|
title (str, optional): Custom plot title. Defaults to auto-generated.
|
||
|
|
print_portfolio_results (bool, optional): Whether to print the portfolio
|
||
|
|
results. Defaults to False.
|
||
|
|
show_plot (bool, optional): Whether to display the plot. Defaults to True.
|
||
|
|
dpi (int, optional): Resolution for saved figure. Defaults to 300.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
tuple: (results_df, fig, ax) containing the optimization results DataFrame,
|
||
|
|
matplotlib figure, and axes objects.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> regime = {"name": "full_period", "range": ("2020-01-01", "2023-12-31")}
|
||
|
|
>>> results_df, fig, ax = create_efficient_frontier(
|
||
|
|
... returns_dict,
|
||
|
|
... cvar_params,
|
||
|
|
... {"solver": "CLARABEL", "verbose": False}
|
||
|
|
... )
|
||
|
|
"""
|
||
|
|
from . import cvar_optimizer # Lazy import
|
||
|
|
|
||
|
|
if custom_portfolios_dict is None:
|
||
|
|
custom_portfolios_dict = {}
|
||
|
|
|
||
|
|
if discretization_params is None:
|
||
|
|
discretization_params = {
|
||
|
|
"weight_discretization": 10,
|
||
|
|
"max_assets": 5,
|
||
|
|
"min_weight": 0.0,
|
||
|
|
"max_weight": 1.0,
|
||
|
|
"sum_to_one": True,
|
||
|
|
}
|
||
|
|
|
||
|
|
# Color schemes
|
||
|
|
color_schemes = {
|
||
|
|
"modern": {
|
||
|
|
"frontier": "#7cd7fe",
|
||
|
|
"benchmark": ["#ef9100", "#ff8181", "#0d8473"], #NVIDIA orange, red, dark teal
|
||
|
|
"assets": "#c359ef",
|
||
|
|
"custom": "#fc79ca",
|
||
|
|
"background": "#FFFFFF",
|
||
|
|
"grid": "#E0E0E0",
|
||
|
|
}
|
||
|
|
}
|
||
|
|
colors = color_schemes[color_scheme]
|
||
|
|
|
||
|
|
# Set style
|
||
|
|
if style == "publication":
|
||
|
|
plt.style.use("seaborn-v0_8-whitegrid")
|
||
|
|
sns.set_context("paper", font_scale=1.2)
|
||
|
|
elif style == "presentation":
|
||
|
|
plt.style.use("seaborn-v0_8-whitegrid")
|
||
|
|
sns.set_context("talk", font_scale=1.1)
|
||
|
|
else: # minimal
|
||
|
|
plt.style.use("seaborn-v0_8-white")
|
||
|
|
sns.set_context("notebook")
|
||
|
|
|
||
|
|
# Initialize optimization problem
|
||
|
|
cvar_problem = cvar_optimizer.CVaR(
|
||
|
|
returns_dict=returns_dict, cvar_params=cvar_params
|
||
|
|
)
|
||
|
|
|
||
|
|
# Generate risk aversion range
|
||
|
|
risk_aversion_list = np.logspace(
|
||
|
|
start=min_risk_aversion, stop=max_risk_aversion, num=ra_num
|
||
|
|
)[::-1]
|
||
|
|
|
||
|
|
# Containers for results
|
||
|
|
results_data = []
|
||
|
|
portfolios = []
|
||
|
|
|
||
|
|
print(f"Computing efficient frontier with {ra_num} portfolios...")
|
||
|
|
|
||
|
|
for i, ra_value in enumerate(risk_aversion_list):
|
||
|
|
cvar_problem.params.update_risk_aversion(ra_value)
|
||
|
|
cvar_problem.risk_aversion_param.value = ra_value
|
||
|
|
|
||
|
|
result_row, portfolio = cvar_problem.solve_optimization_problem(
|
||
|
|
solver_settings, print_results=print_portfolio_results
|
||
|
|
)
|
||
|
|
|
||
|
|
result_row["risk_aversion"] = ra_value
|
||
|
|
result_row["variance"] = portfolio.calculate_portfolio_variance(
|
||
|
|
cvar_problem.covariance
|
||
|
|
)
|
||
|
|
result_row["volatility"] = np.sqrt(result_row["variance"])
|
||
|
|
result_row["sharpe"] = (
|
||
|
|
result_row["return"] / result_row["volatility"] * np.sqrt(252)
|
||
|
|
)
|
||
|
|
|
||
|
|
results_data.append(result_row)
|
||
|
|
portfolios.append(portfolio)
|
||
|
|
|
||
|
|
if (i + 1) % 10 == 0:
|
||
|
|
print(f" ✓ Completed {i + 1}/{ra_num} portfolios")
|
||
|
|
|
||
|
|
# Create results DataFrame
|
||
|
|
results_df = pd.DataFrame(results_data)
|
||
|
|
|
||
|
|
# Identify key portfolios
|
||
|
|
min_var_idx = results_df["variance"].idxmin()
|
||
|
|
max_sharpe_idx = results_df["sharpe"].idxmax()
|
||
|
|
max_return_idx = results_df["return"].idxmax()
|
||
|
|
|
||
|
|
key_portfolios = {
|
||
|
|
"Min Variance": min_var_idx,
|
||
|
|
"Max Sharpe": max_sharpe_idx,
|
||
|
|
"Max Return": max_return_idx,
|
||
|
|
}
|
||
|
|
|
||
|
|
# Create the plot
|
||
|
|
fig, ax = plt.subplots(figsize=figsize, dpi=dpi, facecolor=colors["background"])
|
||
|
|
ax.set_facecolor(colors["background"])
|
||
|
|
|
||
|
|
# Plot efficient frontier with notional scaling and percentage CVaR
|
||
|
|
ax.plot(
|
||
|
|
results_df["CVaR"] * 100, # Convert CVaR to percentage
|
||
|
|
results_df["return"] * notional, # Scale returns by notional
|
||
|
|
linewidth=3,
|
||
|
|
color=colors["frontier"],
|
||
|
|
label="Efficient Frontier",
|
||
|
|
zorder=3,
|
||
|
|
alpha=0.9,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Add gradient fill under the frontier
|
||
|
|
ax.fill_between(
|
||
|
|
results_df["CVaR"] * 100, # Convert CVaR to percentage
|
||
|
|
results_df["return"] * notional, # Scale returns by notional
|
||
|
|
alpha=0.1,
|
||
|
|
color=colors["frontier"],
|
||
|
|
zorder=1,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Plot benchmark portfolios
|
||
|
|
if benchmark_portfolios:
|
||
|
|
benchmark_markers = ["o", "^", "s"]
|
||
|
|
|
||
|
|
for i, (name, idx) in enumerate(key_portfolios.items()):
|
||
|
|
ax.scatter(
|
||
|
|
results_df.loc[idx, "CVaR"] * 100, # Convert CVaR to percentage
|
||
|
|
results_df.loc[idx, "return"] * notional, # Scale returns by notional
|
||
|
|
s=120,
|
||
|
|
color=colors["benchmark"][i],
|
||
|
|
marker=benchmark_markers[i],
|
||
|
|
edgecolor="white",
|
||
|
|
linewidth=2,
|
||
|
|
label=name,
|
||
|
|
zorder=4,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Add annotations for key portfolios
|
||
|
|
ax.annotate(
|
||
|
|
f"{name}\nReturn: ${results_df.loc[idx, 'return'] * notional:,.0f}\n"
|
||
|
|
+ f"CVaR: {results_df.loc[idx, 'CVaR'] * 100:.1f}%",
|
||
|
|
(
|
||
|
|
results_df.loc[idx, "CVaR"] * 100,
|
||
|
|
results_df.loc[idx, "return"] * notional,
|
||
|
|
),
|
||
|
|
xytext=(10, 10),
|
||
|
|
textcoords="offset points",
|
||
|
|
bbox=dict(
|
||
|
|
boxstyle="round,pad=0.3",
|
||
|
|
facecolor=colors["benchmark"][i],
|
||
|
|
alpha=0.8,
|
||
|
|
edgecolor="white",
|
||
|
|
),
|
||
|
|
fontsize=9,
|
||
|
|
color="white",
|
||
|
|
ha="left",
|
||
|
|
zorder=5,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Discretized portfolios (if requested)
|
||
|
|
if show_discretized_portfolios:
|
||
|
|
discretized_portfolios = evaluate_all_linear_combinations(
|
||
|
|
returns_dict, cvar_params, **discretization_params
|
||
|
|
)
|
||
|
|
|
||
|
|
# Plot discretized portfolios with variance as hue
|
||
|
|
scatter = ax.scatter(
|
||
|
|
discretized_portfolios["CVaR"] * 100, # Convert CVaR to percentage
|
||
|
|
discretized_portfolios["return"] * notional, # Scale returns by notional
|
||
|
|
s=40,
|
||
|
|
c=discretized_portfolios["variance"],
|
||
|
|
cmap="plasma",
|
||
|
|
alpha=0.6,
|
||
|
|
edgecolor="white",
|
||
|
|
linewidth=0.5,
|
||
|
|
label="Discretized Portfolios",
|
||
|
|
zorder=2,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Add colorbar for portfolio variance
|
||
|
|
cbar = plt.colorbar(scatter, ax=ax, shrink=0.8, pad=0.02)
|
||
|
|
cbar.set_label("Portfolio Variance", rotation=270, labelpad=15)
|
||
|
|
|
||
|
|
# Custom portfolios
|
||
|
|
if custom_portfolios_dict:
|
||
|
|
custom_portfolios = evaluate_user_input_portfolios(
|
||
|
|
cvar_problem, custom_portfolios_dict, returns_dict
|
||
|
|
)
|
||
|
|
|
||
|
|
for _idx, row in custom_portfolios.iterrows():
|
||
|
|
ax.scatter(
|
||
|
|
row["CVaR"] * 100, # Convert CVaR to percentage
|
||
|
|
row["return"] * notional, # Scale returns by notional
|
||
|
|
s=100,
|
||
|
|
color=colors["custom"],
|
||
|
|
marker="D",
|
||
|
|
edgecolor="white",
|
||
|
|
linewidth=2,
|
||
|
|
label=f"Custom: {row['portfolio_name']}",
|
||
|
|
zorder=4,
|
||
|
|
)
|
||
|
|
|
||
|
|
# Styling and labels
|
||
|
|
ax.set_xlabel(
|
||
|
|
f"{cvar_params.confidence:.0%} CVaR (percentage)",
|
||
|
|
fontsize=12,
|
||
|
|
fontweight="bold",
|
||
|
|
)
|
||
|
|
ax.set_ylabel(
|
||
|
|
f"Expected Return (${notional / 1e6:.0f}M Notional)",
|
||
|
|
fontsize=12,
|
||
|
|
fontweight="bold",
|
||
|
|
)
|
||
|
|
|
||
|
|
if title is None:
|
||
|
|
title = f"Efficient Frontier - {ra_num} portfolios"
|
||
|
|
|
||
|
|
ax.set_title(title, fontsize=14, fontweight="bold", pad=20)
|
||
|
|
|
||
|
|
# Grid and styling
|
||
|
|
ax.grid(True, alpha=0.3, color=colors["grid"])
|
||
|
|
ax.spines["top"].set_visible(False)
|
||
|
|
ax.spines["right"].set_visible(False)
|
||
|
|
ax.spines["left"].set_color("#E0E0E0")
|
||
|
|
ax.spines["bottom"].set_color("#E0E0E0")
|
||
|
|
|
||
|
|
# Legend
|
||
|
|
ax.legend(
|
||
|
|
loc="upper left",
|
||
|
|
frameon=True,
|
||
|
|
fancybox=True,
|
||
|
|
shadow=True,
|
||
|
|
framealpha=0.9,
|
||
|
|
fontsize=10,
|
||
|
|
)
|
||
|
|
|
||
|
|
plt.tight_layout()
|
||
|
|
|
||
|
|
# Save the figure
|
||
|
|
if save_path:
|
||
|
|
plt.savefig(
|
||
|
|
save_path,
|
||
|
|
dpi=dpi,
|
||
|
|
bbox_inches="tight",
|
||
|
|
facecolor=colors["background"],
|
||
|
|
edgecolor="none",
|
||
|
|
)
|
||
|
|
print(f"💾 Plot saved to: {save_path}")
|
||
|
|
|
||
|
|
# Show the plot
|
||
|
|
if show_plot:
|
||
|
|
plt.show()
|
||
|
|
|
||
|
|
print("Efficient frontier analysis complete!")
|
||
|
|
|
||
|
|
return results_df, fig, ax
|
||
|
|
|
||
|
|
|
||
|
|
def evaluate_all_linear_combinations(
|
||
|
|
returns_dict: dict,
|
||
|
|
cvar_params: CvarParameters,
|
||
|
|
weight_discretization: int = 20,
|
||
|
|
max_assets: int = None,
|
||
|
|
min_weight: float = 0.0,
|
||
|
|
max_weight: float = 1.0,
|
||
|
|
use_gpu: bool = True,
|
||
|
|
):
|
||
|
|
"""
|
||
|
|
Discretize dataset and evaluate all linear combinations of stocks for
|
||
|
|
returns and CVaR using GPU acceleration and parallel computing.
|
||
|
|
|
||
|
|
This function creates discrete weight combinations for all stocks and
|
||
|
|
evaluates portfolio performance using vectorized operations with CuPy Numeric
|
||
|
|
for enhanced NumPy compatibility and potential GPU acceleration.
|
||
|
|
|
||
|
|
**Mean Return Constraints:**
|
||
|
|
- Assets with negative mean returns: only zero weight allowed (no long positions)
|
||
|
|
- Assets with positive mean returns: full weight range allowed (long positions only)
|
||
|
|
|
||
|
|
Args:
|
||
|
|
returns_dict (dict): Dictionary containing returns data and market information.
|
||
|
|
cvar_params (CvarParameters): CVaR optimization parameters.
|
||
|
|
weight_discretization (int, optional): Number of discrete weight
|
||
|
|
levels for each asset. Defaults to 20.
|
||
|
|
max_assets (int, optional): Maximum number of assets to consider.
|
||
|
|
If None, uses all assets. Defaults to None.
|
||
|
|
min_weight (float, optional): Minimum weight for any asset.
|
||
|
|
Defaults to 0.0.
|
||
|
|
max_weight (float, optional): Maximum weight for any asset.
|
||
|
|
Defaults to 1.0.
|
||
|
|
use_gpu (bool, optional): Whether to use GPU acceleration with CuPy.
|
||
|
|
When available, CuPy Numeric is used for all operations for
|
||
|
|
better performance. Defaults to True.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
pd.DataFrame: DataFrame containing all portfolio combinations with
|
||
|
|
their performance metrics:
|
||
|
|
- 'weights': Portfolio weights as a dictionary
|
||
|
|
- 'return': Expected portfolio return
|
||
|
|
- 'variance': Portfolio variance
|
||
|
|
- 'volatility': Portfolio volatility (standard deviation)
|
||
|
|
- 'CVaR': Conditional Value at Risk
|
||
|
|
- 'sharpe': Sharpe ratio (return/CVaR)
|
||
|
|
- 'num_assets': Number of non-zero assets in portfolio
|
||
|
|
|
||
|
|
Raises:
|
||
|
|
ValueError: If parameters are invalid or incompatible.
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> results = evaluate_all_linear_combinations(
|
||
|
|
... returns_dict,
|
||
|
|
... cvar_params,
|
||
|
|
... weight_discretization=10,
|
||
|
|
... max_assets=5,
|
||
|
|
... use_gpu=True
|
||
|
|
... )
|
||
|
|
"""
|
||
|
|
if weight_discretization < 2:
|
||
|
|
raise ValueError("weight_discretization must be at least 2")
|
||
|
|
|
||
|
|
# Try to import CuPy Numeric for enhanced performance
|
||
|
|
try:
|
||
|
|
import cupynumeric as cnp
|
||
|
|
|
||
|
|
cupynumeric_available = True
|
||
|
|
print("Using CuPy Numeric for enhanced NumPy operations")
|
||
|
|
except ImportError:
|
||
|
|
import numpy as cnp # Fallback to regular NumPy
|
||
|
|
|
||
|
|
cupynumeric_available = False
|
||
|
|
print("CuPy Numeric not available, using standard NumPy")
|
||
|
|
|
||
|
|
# Determine GPU acceleration separately
|
||
|
|
gpu_available = use_gpu and cupynumeric_available
|
||
|
|
|
||
|
|
# Extract data
|
||
|
|
cvar_data = returns_dict["cvar_data"]
|
||
|
|
covariance = returns_dict["covariance"]
|
||
|
|
tickers = returns_dict["tickers"]
|
||
|
|
|
||
|
|
if max_assets is None:
|
||
|
|
max_assets = len(tickers)
|
||
|
|
max_assets = min(max_assets, len(tickers))
|
||
|
|
|
||
|
|
# Validate constraint feasibility before generating combinations
|
||
|
|
# Only count assets with positive mean returns for weight sum calculations
|
||
|
|
mean_returns = cvar_data.mean[:max_assets]
|
||
|
|
positive_mean_assets = sum(1 for i in range(max_assets) if mean_returns[i] >= 0)
|
||
|
|
negative_mean_assets = max_assets - positive_mean_assets
|
||
|
|
|
||
|
|
# Calculate actual possible weight sums based on mean return constraints
|
||
|
|
min_possible_weight_sum = (
|
||
|
|
positive_mean_assets * min_weight
|
||
|
|
) # negative mean assets contribute 0
|
||
|
|
max_possible_weight_sum = (
|
||
|
|
positive_mean_assets * max_weight
|
||
|
|
) # negative mean assets contribute 0
|
||
|
|
min_required_weight_sum = 1.0 - cvar_params.c_max
|
||
|
|
max_allowed_weight_sum = 1.0 - cvar_params.c_min
|
||
|
|
|
||
|
|
print("Mean return analysis:")
|
||
|
|
print(f" Assets with positive mean returns: {positive_mean_assets}")
|
||
|
|
print(
|
||
|
|
f" Assets with negative mean returns: {negative_mean_assets} "
|
||
|
|
"(will have zero weight)"
|
||
|
|
)
|
||
|
|
print("Constraint validation:")
|
||
|
|
print(
|
||
|
|
f" Possible weight sum range: "
|
||
|
|
f"[{min_possible_weight_sum:.3f}, {max_possible_weight_sum:.3f}]"
|
||
|
|
)
|
||
|
|
print(
|
||
|
|
f" Required weight sum range: "
|
||
|
|
f"[{min_required_weight_sum:.3f}, {max_allowed_weight_sum:.3f}]"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Check if constraints are feasible
|
||
|
|
if min_possible_weight_sum > max_allowed_weight_sum:
|
||
|
|
raise ValueError(
|
||
|
|
f"Impossible constraints: minimum possible weight sum "
|
||
|
|
f"({min_possible_weight_sum:.3f}) exceeds maximum allowed "
|
||
|
|
f"({max_allowed_weight_sum:.3f}). "
|
||
|
|
f"Try reducing w_min ({min_weight}) or "
|
||
|
|
f"increasing c_min ({cvar_params.c_min}). "
|
||
|
|
f"Note: {negative_mean_assets} assets with negative mean returns "
|
||
|
|
"are excluded from long positions."
|
||
|
|
)
|
||
|
|
|
||
|
|
if max_possible_weight_sum < min_required_weight_sum:
|
||
|
|
raise ValueError(
|
||
|
|
f"Impossible constraints: maximum possible weight sum "
|
||
|
|
f"({max_possible_weight_sum:.3f}) is below minimum required "
|
||
|
|
f"({min_required_weight_sum:.3f}). "
|
||
|
|
f"Try increasing w_max ({max_weight}) or "
|
||
|
|
f"reducing c_max ({cvar_params.c_max}). "
|
||
|
|
f"Note: Only {positive_mean_assets} assets with positive "
|
||
|
|
"mean returns can have non-zero weights."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Create discrete weight levels based on mean returns
|
||
|
|
# Assets with negative mean returns: only allow 0 weight
|
||
|
|
# Assets with positive mean returns: allow min_weight to max_weight
|
||
|
|
|
||
|
|
mean_returns = cvar_data.mean[:max_assets]
|
||
|
|
asset_weight_levels = []
|
||
|
|
|
||
|
|
for i in range(max_assets):
|
||
|
|
if mean_returns[i] < 0:
|
||
|
|
# Negative mean return: only allow zero weight
|
||
|
|
if gpu_available:
|
||
|
|
levels = cnp.array([0.0])
|
||
|
|
else:
|
||
|
|
levels = np.array([0.0])
|
||
|
|
asset_name = tickers[i] if i < len(tickers) else f"Asset_{i}"
|
||
|
|
print(
|
||
|
|
f"Asset {i} ({asset_name}): negative mean return, "
|
||
|
|
"only zero weight allowed"
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
# Positive mean return: allow full weight range
|
||
|
|
if gpu_available:
|
||
|
|
levels = cnp.linspace(min_weight, max_weight, weight_discretization)
|
||
|
|
else:
|
||
|
|
levels = np.linspace(min_weight, max_weight, weight_discretization)
|
||
|
|
asset_name = tickers[i] if i < len(tickers) else f"Asset_{i}"
|
||
|
|
print(
|
||
|
|
f"Asset {i} ({asset_name}): positive mean return, "
|
||
|
|
"full weight range allowed"
|
||
|
|
)
|
||
|
|
|
||
|
|
asset_weight_levels.append(levels)
|
||
|
|
|
||
|
|
if gpu_available:
|
||
|
|
print("Using CuPy Numeric for weight generation")
|
||
|
|
else:
|
||
|
|
print("Using standard NumPy for weight generation")
|
||
|
|
|
||
|
|
# Calculate total combinations (product of lengths of each asset's weight levels)
|
||
|
|
total_combinations = 1
|
||
|
|
for levels in asset_weight_levels:
|
||
|
|
total_combinations *= len(levels)
|
||
|
|
|
||
|
|
print(
|
||
|
|
f"Generating {total_combinations:,} combinations "
|
||
|
|
"based on mean return constraints..."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Use meshgrid for efficient combination generation
|
||
|
|
# with asset-specific weight levels
|
||
|
|
if gpu_available:
|
||
|
|
grids = cnp.meshgrid(*asset_weight_levels, indexing="ij")
|
||
|
|
all_weights = cnp.stack([grid.ravel() for grid in grids], axis=1)
|
||
|
|
weight_sums = cnp.sum(all_weights, axis=1)
|
||
|
|
else:
|
||
|
|
grids = np.meshgrid(*asset_weight_levels, indexing="ij")
|
||
|
|
all_weights = np.stack([grid.ravel() for grid in grids], axis=1)
|
||
|
|
weight_sums = np.sum(all_weights, axis=1)
|
||
|
|
|
||
|
|
# Allow for some flexibility in the constraints
|
||
|
|
tolerance = 1e-6
|
||
|
|
valid_mask = (weight_sums >= min_required_weight_sum - tolerance) & (
|
||
|
|
weight_sums <= max_allowed_weight_sum + tolerance
|
||
|
|
)
|
||
|
|
|
||
|
|
valid_weights = all_weights[valid_mask]
|
||
|
|
valid_combinations = len(valid_weights)
|
||
|
|
|
||
|
|
if valid_combinations == 0:
|
||
|
|
# Provide detailed error message
|
||
|
|
if gpu_available:
|
||
|
|
actual_min = cnp.min(weight_sums)
|
||
|
|
actual_max = cnp.max(weight_sums)
|
||
|
|
else:
|
||
|
|
actual_min = np.min(weight_sums)
|
||
|
|
actual_max = np.max(weight_sums)
|
||
|
|
raise ValueError(
|
||
|
|
f"No valid weight combinations found. "
|
||
|
|
f"Generated weight sums range: "
|
||
|
|
f"[{actual_min:.3f}, {actual_max:.3f}], "
|
||
|
|
f"but required range is: "
|
||
|
|
f"[{min_required_weight_sum:.3f}, {max_allowed_weight_sum:.3f}]. "
|
||
|
|
f"Try adjusting weight bounds "
|
||
|
|
f"(w_min={min_weight}, w_max={max_weight}) "
|
||
|
|
f"or cash constraints "
|
||
|
|
f"(c_min={cvar_params.c_min}, c_max={cvar_params.c_max})"
|
||
|
|
)
|
||
|
|
|
||
|
|
print(f"Found {valid_combinations:,} valid combinations after filtering")
|
||
|
|
|
||
|
|
# Move data to GPU if available
|
||
|
|
if gpu_available:
|
||
|
|
try:
|
||
|
|
print("Moving data to GPU...")
|
||
|
|
valid_weights_gpu = cnp.asarray(valid_weights)
|
||
|
|
# Create copies of sliced arrays to avoid view issues with cuPyNumeric
|
||
|
|
mean_returns_gpu = cnp.asarray(cvar_data.mean[:max_assets].copy())
|
||
|
|
covariance_gpu = cnp.asarray(covariance[:max_assets, :max_assets].copy())
|
||
|
|
scenarios_gpu = cnp.asarray(cvar_data.R[:max_assets, :].copy())
|
||
|
|
except Exception as e:
|
||
|
|
print(f"GPU memory error: {e}. Falling back to CPU.")
|
||
|
|
gpu_available = False
|
||
|
|
|
||
|
|
if not gpu_available:
|
||
|
|
# Use standard NumPy for CPU fallback
|
||
|
|
valid_weights_gpu = valid_weights
|
||
|
|
mean_returns_gpu = cvar_data.mean[:max_assets]
|
||
|
|
covariance_gpu = covariance[:max_assets, :max_assets]
|
||
|
|
scenarios_gpu = cvar_data.R[:max_assets, :]
|
||
|
|
|
||
|
|
# Process all portfolios at once using vectorized operations
|
||
|
|
print(f"Processing {valid_combinations:,} portfolios...")
|
||
|
|
|
||
|
|
# Vectorized calculations for all portfolios
|
||
|
|
if gpu_available:
|
||
|
|
# GPU calculations
|
||
|
|
portfolio_returns = cnp.dot(valid_weights_gpu, mean_returns_gpu)
|
||
|
|
temp = cnp.dot(valid_weights_gpu, covariance_gpu)
|
||
|
|
portfolio_variances = cnp.sum(temp * valid_weights_gpu, axis=1)
|
||
|
|
portfolio_returns_scenarios = cnp.dot(valid_weights_gpu, scenarios_gpu)
|
||
|
|
|
||
|
|
# Calculate CVaR for each portfolio
|
||
|
|
portfolio_cvars = cnp.zeros(valid_combinations)
|
||
|
|
confidence_percentile = (1 - cvar_params.confidence) * 100
|
||
|
|
|
||
|
|
for i in range(valid_combinations):
|
||
|
|
scenario_returns = portfolio_returns_scenarios[i]
|
||
|
|
var_threshold = cnp.percentile(scenario_returns, confidence_percentile)
|
||
|
|
tail_losses = scenario_returns[scenario_returns <= var_threshold]
|
||
|
|
if len(tail_losses) > 0:
|
||
|
|
portfolio_cvars[i] = cnp.abs(cnp.mean(tail_losses))
|
||
|
|
else:
|
||
|
|
portfolio_cvars[i] = 0.0
|
||
|
|
|
||
|
|
# Move results back to CPU
|
||
|
|
weights_cpu = cnp.asnumpy(valid_weights_gpu)
|
||
|
|
returns_cpu = cnp.asnumpy(portfolio_returns)
|
||
|
|
variances_cpu = cnp.asnumpy(portfolio_variances)
|
||
|
|
cvars_cpu = cnp.asnumpy(portfolio_cvars)
|
||
|
|
else:
|
||
|
|
# CPU calculations using standard NumPy
|
||
|
|
portfolio_returns = np.dot(valid_weights_gpu, mean_returns_gpu)
|
||
|
|
temp = np.dot(valid_weights_gpu, covariance_gpu)
|
||
|
|
portfolio_variances = np.sum(temp * valid_weights_gpu, axis=1)
|
||
|
|
portfolio_returns_scenarios = np.dot(valid_weights_gpu, scenarios_gpu)
|
||
|
|
|
||
|
|
# Calculate CVaR for each portfolio
|
||
|
|
portfolio_cvars = np.zeros(valid_combinations)
|
||
|
|
confidence_percentile = (1 - cvar_params.confidence) * 100
|
||
|
|
|
||
|
|
for i in range(valid_combinations):
|
||
|
|
scenario_returns = portfolio_returns_scenarios[i]
|
||
|
|
var_threshold = np.percentile(scenario_returns, confidence_percentile)
|
||
|
|
tail_losses = scenario_returns[scenario_returns <= var_threshold]
|
||
|
|
if len(tail_losses) > 0:
|
||
|
|
portfolio_cvars[i] = np.abs(np.mean(tail_losses))
|
||
|
|
else:
|
||
|
|
portfolio_cvars[i] = 0.0
|
||
|
|
|
||
|
|
weights_cpu = valid_weights_gpu
|
||
|
|
returns_cpu = portfolio_returns
|
||
|
|
variances_cpu = portfolio_variances
|
||
|
|
cvars_cpu = portfolio_cvars
|
||
|
|
|
||
|
|
# Calculate derived metrics
|
||
|
|
if gpu_available:
|
||
|
|
volatilities = cnp.sqrt(variances_cpu)
|
||
|
|
sharpe_ratios = cnp.where(cvars_cpu > 0, returns_cpu / cvars_cpu, 0.0)
|
||
|
|
num_assets = cnp.sum(weights_cpu > 1e-10, axis=1)
|
||
|
|
else:
|
||
|
|
volatilities = np.sqrt(variances_cpu)
|
||
|
|
sharpe_ratios = np.where(cvars_cpu > 0, returns_cpu / cvars_cpu, 0.0)
|
||
|
|
num_assets = np.sum(weights_cpu > 1e-10, axis=1)
|
||
|
|
|
||
|
|
# Normalize weights and calculate cash for each portfolio
|
||
|
|
if gpu_available:
|
||
|
|
weight_sums = cnp.sum(weights_cpu, axis=1)
|
||
|
|
cash_amounts = cnp.maximum(0, 1.0 - weight_sums)
|
||
|
|
total_sums = weight_sums + cash_amounts
|
||
|
|
else:
|
||
|
|
weight_sums = np.sum(weights_cpu, axis=1)
|
||
|
|
cash_amounts = np.maximum(0, 1.0 - weight_sums)
|
||
|
|
total_sums = weight_sums + cash_amounts
|
||
|
|
|
||
|
|
# Create results list
|
||
|
|
results_list = []
|
||
|
|
for i in range(valid_combinations):
|
||
|
|
weights_raw = weights_cpu[i]
|
||
|
|
cash_raw = cash_amounts[i]
|
||
|
|
total_sum = total_sums[i]
|
||
|
|
|
||
|
|
# Normalize to sum to 1
|
||
|
|
normalized_weights = weights_raw / total_sum
|
||
|
|
normalized_cash = cash_raw / total_sum
|
||
|
|
|
||
|
|
# Create weights dictionary (only include first max_assets)
|
||
|
|
weights_dict = {
|
||
|
|
tickers[j]: float(normalized_weights[j])
|
||
|
|
for j in range(min(max_assets, len(tickers)))
|
||
|
|
}
|
||
|
|
|
||
|
|
result_row = {
|
||
|
|
"combination_id": i,
|
||
|
|
"weights": weights_dict,
|
||
|
|
"weights_array": normalized_weights.copy(),
|
||
|
|
"return": float(returns_cpu[i]),
|
||
|
|
"variance": float(variances_cpu[i]),
|
||
|
|
"volatility": float(volatilities[i]),
|
||
|
|
"CVaR": float(cvars_cpu[i]),
|
||
|
|
"sharpe": float(sharpe_ratios[i]),
|
||
|
|
"num_assets": int(num_assets[i]),
|
||
|
|
"cash": float(normalized_cash),
|
||
|
|
}
|
||
|
|
|
||
|
|
results_list.append(result_row)
|
||
|
|
|
||
|
|
print(f"Completed processing {valid_combinations:,} portfolios")
|
||
|
|
|
||
|
|
# Create results DataFrame
|
||
|
|
results_df = pd.DataFrame(results_list)
|
||
|
|
|
||
|
|
# Add ranking columns
|
||
|
|
results_df["return_rank"] = results_df["return"].rank(ascending=False)
|
||
|
|
results_df["cvar_rank"] = results_df["CVaR"].rank(
|
||
|
|
ascending=True
|
||
|
|
) # Lower CVaR is better
|
||
|
|
results_df["sharpe_rank"] = results_df["sharpe"].rank(ascending=False)
|
||
|
|
|
||
|
|
# Sort by Sharpe ratio (descending)
|
||
|
|
results_df = results_df.sort_values("sharpe", ascending=False).reset_index(
|
||
|
|
drop=True
|
||
|
|
)
|
||
|
|
|
||
|
|
return results_df
|
||
|
|
|
||
|
|
|
||
|
|
def normalize_portfolio_weights_to_one(weights_dict: dict, cash: float):
|
||
|
|
"""
|
||
|
|
Normalize portfolio weights and cash to sum to 1.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
weights_dict (dict): Dictionary mapping tickers to portfolio weights.
|
||
|
|
cash (float): Portfolio cash amount.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
tuple: (normalized_weights_dict, normalized_cash) where:
|
||
|
|
- normalized_weights_dict (dict): Normalized portfolio weights
|
||
|
|
- normalized_cash (float): Normalized portfolio cash
|
||
|
|
|
||
|
|
Example:
|
||
|
|
>>> weights_dict = {"AAPL": 0.3, "GOOGL": 0.4, "MSFT": 0.2}
|
||
|
|
>>> cash = 0.2
|
||
|
|
>>> normalized_weights, normalized_cash = normalize_portfolio_weights_to_one(
|
||
|
|
... weights_dict, cash
|
||
|
|
... )
|
||
|
|
>>> print(normalized_weights)
|
||
|
|
{'AAPL': 0.272..., 'GOOGL': 0.363..., 'MSFT': 0.181...}
|
||
|
|
>>> print(normalized_cash) # 0.181...
|
||
|
|
>>> # Verify sum equals 1
|
||
|
|
>>> total = sum(normalized_weights.values()) + normalized_cash
|
||
|
|
>>> print(f"{total:.10f}") # 1.0000000000
|
||
|
|
"""
|
||
|
|
weights = np.array(list(weights_dict.values()))
|
||
|
|
raw_sum = np.sum(weights) + cash
|
||
|
|
normalized_weights = weights / raw_sum
|
||
|
|
normalized_cash = cash / raw_sum
|
||
|
|
normalized_weights_dict = {
|
||
|
|
ticker: weight
|
||
|
|
for ticker, weight in zip(weights_dict.keys(), normalized_weights)
|
||
|
|
}
|
||
|
|
normalized_cash = normalized_cash
|
||
|
|
return normalized_weights_dict, normalized_cash
|
||
|
|
|
||
|
|
|
||
|
|
def compare_cvxpy_vs_cuopt(
|
||
|
|
returns_dict: dict,
|
||
|
|
cvar_params: CvarParameters,
|
||
|
|
cvxpy_solver_settings: dict = None,
|
||
|
|
cuopt_solver_settings: dict = None,
|
||
|
|
print_results: bool = True,
|
||
|
|
):
|
||
|
|
"""
|
||
|
|
Compare CVXPY and cuOpt implementations for setup time and solve results.
|
||
|
|
|
||
|
|
Creates separate CVaR optimizer instances for each API to compare performance.
|
||
|
|
|
||
|
|
Parameters
|
||
|
|
----------
|
||
|
|
returns_dict : dict
|
||
|
|
Input data containing regime info and CvarData instance
|
||
|
|
cvar_params : CvarParameters
|
||
|
|
Constraint parameters and optimization settings
|
||
|
|
cvxpy_solver_settings : dict, optional
|
||
|
|
Solver settings for CVXPY
|
||
|
|
cuopt_solver_settings : dict, optional
|
||
|
|
Solver settings for cuOpt
|
||
|
|
print_results : bool, default True
|
||
|
|
Whether to print comparison results
|
||
|
|
|
||
|
|
Returns
|
||
|
|
-------
|
||
|
|
dict
|
||
|
|
Comparison results including setup times, solve times, and solution differences
|
||
|
|
|
||
|
|
Examples
|
||
|
|
--------
|
||
|
|
>>> import cvxpy as cp
|
||
|
|
>>> # Prepare data and parameters
|
||
|
|
>>> regime = {"name": "bull_market", "range": ("2020-01-01", "2021-12-31")}
|
||
|
|
>>> returns_dict = calculate_returns(
|
||
|
|
... "data/stock_data/sp500.csv", regime, "LOG", cvar_params
|
||
|
|
... )
|
||
|
|
>>> cvar_params = CvarParameters(num_scen=100, confidence=0.95)
|
||
|
|
>>>
|
||
|
|
>>> # Compare CVXPY and cuOpt
|
||
|
|
>>> cvxpy_settings = {"solver": cp.CLARABEL, "verbose": False}
|
||
|
|
>>> cuopt_settings = {"api": "cuopt_python", "verbose": False}
|
||
|
|
>>> results = compare_cvxpy_vs_cuopt(
|
||
|
|
... returns_dict,
|
||
|
|
... cvar_params,
|
||
|
|
... cvxpy_settings,
|
||
|
|
... cuopt_settings,
|
||
|
|
... print_results=True
|
||
|
|
... )
|
||
|
|
>>>
|
||
|
|
>>> # Access comparison results
|
||
|
|
>>> print(f"cuOpt speedup: {results['comparison']['total_speedup']:.2f}x")
|
||
|
|
cuOpt speedup: 15.34x
|
||
|
|
>>> print(f"Objective difference: {results['comparison']['objective_diff']:.8f}")
|
||
|
|
Objective difference: 0.00000123
|
||
|
|
>>> print(f"Max weight diff: {results['comparison']['max_weight_diff']:.8f}")
|
||
|
|
Max weight diff: 0.00000045
|
||
|
|
"""
|
||
|
|
from . import cvar_optimizer # Lazy import
|
||
|
|
|
||
|
|
if cvxpy_solver_settings is None:
|
||
|
|
cvxpy_solver_settings = {}
|
||
|
|
if cuopt_solver_settings is None:
|
||
|
|
cuopt_solver_settings = {}
|
||
|
|
|
||
|
|
print(f"{'=' * 70}")
|
||
|
|
print("CVXPY vs cuOpt Performance Comparison")
|
||
|
|
print(f"{'=' * 70}")
|
||
|
|
|
||
|
|
results = {}
|
||
|
|
cvxpy_portfolio = None
|
||
|
|
cuopt_portfolio = None
|
||
|
|
|
||
|
|
try:
|
||
|
|
# ===============================
|
||
|
|
# CVXPY Setup and Solve
|
||
|
|
# ===============================
|
||
|
|
print("\nCreating CVXPY optimizer instance...")
|
||
|
|
cvxpy_optimizer = cvar_optimizer.CVaR(
|
||
|
|
returns_dict=returns_dict, cvar_params=cvar_params, api_choice="cvxpy"
|
||
|
|
)
|
||
|
|
|
||
|
|
print("Solving with CVXPY...")
|
||
|
|
cvxpy_result_row, cvxpy_portfolio = cvxpy_optimizer.solve_optimization_problem(
|
||
|
|
cvxpy_solver_settings, print_results=False
|
||
|
|
)
|
||
|
|
cvxpy_setup_time = cvxpy_optimizer.set_up_time
|
||
|
|
cvxpy_solve_time = cvxpy_result_row["solve time"]
|
||
|
|
|
||
|
|
# Store CVXPY results
|
||
|
|
cvxpy_objective = cvxpy_optimizer.optimization_problem.value
|
||
|
|
cvxpy_status = cvxpy_optimizer.optimization_problem.status
|
||
|
|
|
||
|
|
if cvxpy_portfolio is None:
|
||
|
|
print(
|
||
|
|
f"Warning: CVXPY optimization failed or returned no solution. "
|
||
|
|
f"Status: {cvxpy_status}"
|
||
|
|
)
|
||
|
|
|
||
|
|
results["cvxpy"] = {
|
||
|
|
"setup_time": cvxpy_setup_time,
|
||
|
|
"solve_time": cvxpy_solve_time,
|
||
|
|
"total_time": cvxpy_setup_time + cvxpy_solve_time,
|
||
|
|
"portfolio": cvxpy_portfolio,
|
||
|
|
"objective_value": cvxpy_objective,
|
||
|
|
"status": cvxpy_status,
|
||
|
|
}
|
||
|
|
|
||
|
|
# ===============================
|
||
|
|
# cuOpt Setup and Solve
|
||
|
|
# ===============================
|
||
|
|
print("\nCreating cuOpt optimizer instance...")
|
||
|
|
cuopt_optimizer = cvar_optimizer.CVaR(
|
||
|
|
returns_dict=returns_dict,
|
||
|
|
cvar_params=cvar_params,
|
||
|
|
api_choice="cuopt_python",
|
||
|
|
)
|
||
|
|
|
||
|
|
print("Solving with cuOpt...")
|
||
|
|
cuopt_result_row, cuopt_portfolio = cuopt_optimizer.solve_optimization_problem(
|
||
|
|
cuopt_solver_settings, print_results=False
|
||
|
|
)
|
||
|
|
cuopt_solve_time = cuopt_result_row["solve time"]
|
||
|
|
cuopt_setup_time = cuopt_optimizer.set_up_time
|
||
|
|
|
||
|
|
# Store cuOpt results
|
||
|
|
cuopt_objective = cuopt_optimizer._cuopt_problem.ObjValue
|
||
|
|
cuopt_status = cuopt_optimizer._cuopt_problem.Status.name
|
||
|
|
|
||
|
|
if cuopt_portfolio is None:
|
||
|
|
print(
|
||
|
|
f"Warning: cuOpt optimization failed or returned no solution. "
|
||
|
|
f"Status: {cuopt_status}"
|
||
|
|
)
|
||
|
|
|
||
|
|
results["cuopt"] = {
|
||
|
|
"setup_time": cuopt_setup_time,
|
||
|
|
"solve_time": cuopt_solve_time,
|
||
|
|
"total_time": cuopt_setup_time + cuopt_solve_time,
|
||
|
|
"portfolio": cuopt_portfolio,
|
||
|
|
"objective_value": cuopt_objective,
|
||
|
|
"status": cuopt_status,
|
||
|
|
}
|
||
|
|
|
||
|
|
# ===============================
|
||
|
|
# Calculate Differences
|
||
|
|
# ===============================
|
||
|
|
setup_speedup = (
|
||
|
|
cvxpy_setup_time / cuopt_setup_time
|
||
|
|
if cuopt_setup_time > 0
|
||
|
|
else float("inf")
|
||
|
|
)
|
||
|
|
solve_speedup = (
|
||
|
|
cvxpy_solve_time / cuopt_solve_time
|
||
|
|
if cuopt_solve_time > 0
|
||
|
|
else float("inf")
|
||
|
|
)
|
||
|
|
total_speedup = (cvxpy_setup_time + cvxpy_solve_time) / (
|
||
|
|
cuopt_setup_time + cuopt_solve_time
|
||
|
|
)
|
||
|
|
|
||
|
|
# Portfolio weight differences (only if both portfolios exist)
|
||
|
|
if cvxpy_portfolio is not None and cuopt_portfolio is not None:
|
||
|
|
weight_diff = np.abs(cvxpy_portfolio.weights - cuopt_portfolio.weights)
|
||
|
|
max_weight_diff = np.max(weight_diff)
|
||
|
|
mean_weight_diff = np.mean(weight_diff)
|
||
|
|
else:
|
||
|
|
max_weight_diff = float("inf")
|
||
|
|
mean_weight_diff = float("inf")
|
||
|
|
|
||
|
|
# Objective value difference
|
||
|
|
if cvxpy_objective is not None and cuopt_objective is not None:
|
||
|
|
obj_diff = abs(cvxpy_objective - cuopt_objective)
|
||
|
|
obj_rel_diff = (
|
||
|
|
obj_diff / abs(cvxpy_objective) * 100
|
||
|
|
if cvxpy_objective != 0
|
||
|
|
else float("inf")
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
obj_diff = float("inf")
|
||
|
|
obj_rel_diff = float("inf")
|
||
|
|
|
||
|
|
results["comparison"] = {
|
||
|
|
"setup_speedup": setup_speedup,
|
||
|
|
"solve_speedup": solve_speedup,
|
||
|
|
"total_speedup": total_speedup,
|
||
|
|
"max_weight_diff": max_weight_diff,
|
||
|
|
"mean_weight_diff": mean_weight_diff,
|
||
|
|
"objective_diff": obj_diff,
|
||
|
|
"objective_rel_diff_pct": obj_rel_diff,
|
||
|
|
}
|
||
|
|
|
||
|
|
if print_results:
|
||
|
|
_print_comparison_results(results)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"Error during comparison: {str(e)}")
|
||
|
|
results["error"] = str(e)
|
||
|
|
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
def _print_comparison_results(results):
|
||
|
|
"""Print formatted comparison results.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
results (dict): Results dictionary from compare_cvxpy_vs_cuopt() containing
|
||
|
|
'cvxpy', 'cuopt', and 'comparison' keys with timing and solution data.
|
||
|
|
"""
|
||
|
|
cvxpy = results["cvxpy"]
|
||
|
|
cuopt = results["cuopt"]
|
||
|
|
comp = results["comparison"]
|
||
|
|
|
||
|
|
print(f"\n{'=' * 70}")
|
||
|
|
print("PERFORMANCE COMPARISON RESULTS")
|
||
|
|
print(f"{'=' * 70}")
|
||
|
|
|
||
|
|
# Timing comparison table
|
||
|
|
print("\nTIMING COMPARISON")
|
||
|
|
print(f"{'-' * 50}")
|
||
|
|
print(f"{'Metric':<20} {'CVXPY':<12} {'cuOpt':<12} {'Speedup':<10}")
|
||
|
|
print(f"{'-' * 50}")
|
||
|
|
print(
|
||
|
|
f"{'Setup Time':<20} {cvxpy['setup_time']:<12.4f} "
|
||
|
|
f"{cuopt['setup_time']:<12.4f} {comp['setup_speedup']:<10.2f}x"
|
||
|
|
)
|
||
|
|
print(
|
||
|
|
f"{'Solve Time':<20} {cvxpy['solve_time']:<12.4f} "
|
||
|
|
f"{cuopt['solve_time']:<12.4f} {comp['solve_speedup']:<10.2f}x"
|
||
|
|
)
|
||
|
|
print(
|
||
|
|
f"{'Total Time':<20} {cvxpy['total_time']:<12.4f} "
|
||
|
|
f"{cuopt['total_time']:<12.4f} {comp['total_speedup']:<10.2f}x"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Solution quality comparison
|
||
|
|
print("\nSOLUTION QUALITY COMPARISON")
|
||
|
|
print(f"{'-' * 50}")
|
||
|
|
print(f"{'Status':<25} CVXPY: {cvxpy['status']:<15} cuOpt: {cuopt['status']}")
|
||
|
|
|
||
|
|
# Handle objective values that might be None
|
||
|
|
cvxpy_obj_str = (
|
||
|
|
f"{cvxpy['objective_value']:.6f}"
|
||
|
|
if cvxpy["objective_value"] is not None
|
||
|
|
else "N/A"
|
||
|
|
)
|
||
|
|
cuopt_obj_str = (
|
||
|
|
f"{cuopt['objective_value']:.6f}"
|
||
|
|
if cuopt["objective_value"] is not None
|
||
|
|
else "N/A"
|
||
|
|
)
|
||
|
|
print(f"{'Objective Value':<25} CVXPY: {cvxpy_obj_str:<15} cuOpt: {cuopt_obj_str}")
|
||
|
|
|
||
|
|
# Handle differences that might be infinite
|
||
|
|
if comp["objective_diff"] == float("inf"):
|
||
|
|
print(f"{'Objective Difference':<25} N/A (one solver failed)")
|
||
|
|
else:
|
||
|
|
print(
|
||
|
|
f"{'Objective Difference':<25} {comp['objective_diff']:.8f} "
|
||
|
|
f"({comp['objective_rel_diff_pct']:.4f}%)"
|
||
|
|
)
|
||
|
|
|
||
|
|
if comp["max_weight_diff"] == float("inf"):
|
||
|
|
print(f"{'Max Weight Difference':<25} N/A (one solver failed)")
|
||
|
|
print(f"{'Mean Weight Difference':<25} N/A (one solver failed)")
|
||
|
|
else:
|
||
|
|
print(f"{'Max Weight Difference':<25} {comp['max_weight_diff']:.8f}")
|
||
|
|
print(f"{'Mean Weight Difference':<25} {comp['mean_weight_diff']:.8f}")
|
||
|
|
|
||
|
|
# Summary
|
||
|
|
print("\nSUMMARY")
|
||
|
|
print(f"{'-' * 50}")
|
||
|
|
if comp["total_speedup"] > 1:
|
||
|
|
print(f"cuOpt is {comp['total_speedup']:.2f}x faster overall")
|
||
|
|
else:
|
||
|
|
print(f"CVXPY is {1 / comp['total_speedup']:.2f}x faster overall")
|
||
|
|
|
||
|
|
# Only compare solutions if both solvers succeeded
|
||
|
|
if comp["objective_rel_diff_pct"] == float("inf"):
|
||
|
|
print("Cannot compare solution quality - one or both solvers failed")
|
||
|
|
elif comp["objective_rel_diff_pct"] < 0.01:
|
||
|
|
print("Solutions match within 0.01% tolerance")
|
||
|
|
elif comp["objective_rel_diff_pct"] < 1.0:
|
||
|
|
print(f"Solutions differ by {comp['objective_rel_diff_pct']:.4f}%")
|
||
|
|
else:
|
||
|
|
print(f"Significant solution difference: {comp['objective_rel_diff_pct']:.4f}%")
|
||
|
|
|
||
|
|
print(f"{'=' * 70}\n")
|