%pip install yfinance pandas numpy scipy matplotlib seaborn PyPortfolioOpt --quiet

import yfinance as yf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis, norm
import scipy.stats as stats

sns.set_style('whitegrid')
np.random.seed(42) 

assets = ['AAPL', 'NVDA', 'TSLA', 'XOM', 'REGN', 'LLY', 'JPM']
start_date = '2023-01-01'
end_date = '2025-06-30' 


data = yf.download(assets, start=start_date, end=end_date)['Close']
returns = data.pct_change().dropna()

summary_stats = pd.DataFrame({
    'Annualized Mean (%)': returns.mean() * 252 * 100,
    'Annualized Volatility (%)': returns.std() * np.sqrt(252) * 100,
    'Skewness': returns.apply(skew),
    'Kurtosis': returns.apply(kurtosis)
}).T

print("\n Table 1: Asset Summary Statistics")
display(summary_stats.round(2))

plt.figure(figsize=(10, 8))
corr_matrix = returns.corr()
sns.heatmap(corr_matrix, annot=True, cmap='RdYlGn', center=0, 
            fmt=".2f", linewidths=0.5, cbar_kws={"shrink": .8})
plt.title('Figure 1: Asset Correlation Heatmap (Daily Returns)', fontsize=14, pad=15)
plt.tight_layout()
plt.show()

Note: you may need to restart the kernel to use updated packages.

[*********************100%***********************]  7 of 7 completed

 Table 1: Asset Summary Statistics

from pypfopt import EfficientFrontier, risk_models, expected_returns

mu = expected_returns.mean_historical_return(data)
S = risk_models.sample_cov(data)

ef1 = EfficientFrontier(mu, S, weight_bounds=(0, 1))
ef1.max_sharpe()
weights_step1 = ef1.clean_weights()
weights_step1

OrderedDict([('AAPL', 0.0),
             ('JPM', 0.34879),
             ('LLY', 0.14029),
             ('NVDA', 0.51091),
             ('REGN', 0.0),
             ('TSLA', 0.0),
             ('XOM', 0.0)])

ef2 = EfficientFrontier(mu, S, weight_bounds=(0, 0.20))
ef2.max_sharpe()
weights_step2 = ef2.clean_weights()

weights_step2

OrderedDict([('AAPL', 0.19021),
             ('JPM', 0.2),
             ('LLY', 0.2),
             ('NVDA', 0.2),
             ('REGN', 0.0),
             ('TSLA', 0.00979),
             ('XOM', 0.2)])

ef3 = EfficientFrontier(mu, S, weight_bounds=(-1, 1))
ef3.max_sharpe()
weights_step3 = ef3.clean_weights()

weights_df = pd.DataFrame({
    'Step 1: Long Only (%)': pd.Series(weights_step1) * 100,
    'Step 2: Diversified Max 20% (%)': pd.Series(weights_step2) * 100,
    'Step 3: Aggressive w/ Shorting (%)': pd.Series(weights_step3) * 100
}).fillna(0)

display(weights_df.round(2))

weights_df.plot(kind='bar', figsize=(14, 7), colormap='viridis', width=0.8, edgecolor='black')
plt.title('Figure 2: Optimal portfolio allocations across methodologies', fontsize=16, pad=15)
plt.ylabel('Allocation Weight (%)', fontsize=12)
plt.xlabel('Assets', fontsize=12)
plt.axhline(0, color='black', linewidth=1) 
plt.axhline(20, color='red', linestyle='--', alpha=0.5, label='20% Constraint (Step 2)')
plt.legend(fontsize=10)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


test_asset = 'NVDA' 
actual_returns = returns[test_asset]
mu_daily = actual_returns.mean()
sigma_daily = actual_returns.std()

z_empirical = np.sort((actual_returns - mu_daily) / sigma_daily)
n = len(z_empirical)

p_i = (np.arange(1, n + 1) - 0.5) / n
z_theoretical = stats.norm.ppf(p_i)

deviations = z_empirical - z_theoretical
se = (1 / stats.norm.pdf(z_theoretical)) * np.sqrt(p_i * (1 - p_i) / n)
ci_upper = 1.96 * se
ci_lower = -1.96 * se

fig, ax = plt.subplots(1, 2, figsize=(16, 6))

sns.histplot(actual_returns, kde=True, stat="density", bins=50, ax=ax[0], color='skyblue', label='Empirical Returns')
x_axis = np.linspace(actual_returns.min(), actual_returns.max(), 100)
ax[0].plot(x_axis, norm.pdf(x_axis, mu_daily, sigma_daily), color='red', linestyle='dashed', linewidth=2, label='Normal Fit')
ax[0].set_title(f'{test_asset} Figure 3: Return distribution vs. Normal fit', fontsize=14)
ax[0].set_xlabel('Daily Returns')
ax[0].set_ylabel('Density')
ax[0].legend()

ax[1].plot(z_theoretical, deviations, 'o', markerfacecolor='skyblue', markeredgecolor='black', alpha=0.7, label='Deviations (Worm)')
ax[1].plot(z_theoretical, np.zeros_like(z_theoretical), 'r--', linewidth=2, label='Baseline (Perfect Normal)')
ax[1].plot(z_theoretical, ci_upper, 'r:', linewidth=1.5, label='95% CI')
ax[1].plot(z_theoretical, ci_lower, 'r:', linewidth=1.5)
ax[1].set_title(f'Figure 4: Worm Plot (Detrended Q-Q) for {test_asset}', fontsize=14)
ax[1].set_xlabel('Theoretical Normal Quantiles')
ax[1].set_ylabel('Deviation (Empirical - Theoretical)')
ax[1].legend()


plt.tight_layout()
plt.show()

%pip install yfinance pandas numpy scipy matplotlib seaborn PyPortfolioOpt --quiet

import yfinance as yf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from pypfopt import EfficientFrontier, risk_models, expected_returns
import warnings
warnings.filterwarnings("ignore") 

np.random.seed(42)  

universe = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'BRK-B', 'JNJ', 'JPM', 'V',
    'PG', 'UNH', 'HD', 'MA', 'BAC', 'DIS', 'CVX', 'XOM', 'KO', 'PEP'
]

train_start = '2021-01-01'
train_end   = '2022-12-31'
test_start  = '2023-01-01'
test_end    = '2023-12-31'

print(f"Fetching data for {len(universe)} assets...")
data = yf.download(universe, start=train_start, end=test_end)['Close']

train_data = data.loc[train_start:train_end]
test_data  = data.loc[test_start:test_end]

oos_asset_returns = (test_data.iloc[-1] / test_data.iloc[0]) - 1

Note: you may need to restart the kernel to use updated packages.
Fetching data for 20 assets...

[*********************100%***********************]  20 of 20 completed

N_SIMULATIONS = 5000
N_ASSETS_PER_PORTFOLIO = 5

np.random.seed(12)
mvo_outcomes = []
ew_outcomes = []

for i in range(N_SIMULATIONS):
    selected_assets = np.random.choice(universe, N_ASSETS_PER_PORTFOLIO, replace=False)
    
    train_subset = train_data[selected_assets]
    
    mu = expected_returns.mean_historical_return(train_subset)
    S = risk_models.sample_cov(train_subset)
    
    try:
        ef = EfficientFrontier(mu, S, weight_bounds=(0, 1))
        ef.add_objective(objective_functions.L2_reg, gamma=0.1) 
        ef.max_sharpe()
        w_mvo_dict = ef.clean_weights()
        w_mvo = np.array([w_mvo_dict[ticker] for ticker in selected_assets])
    except:
        try:
            ef = EfficientFrontier(mu, S, weight_bounds=(0, 1))
            ef.min_volatility()
            w_mvo_dict = ef.clean_weights()
            w_mvo = np.array([w_mvo_dict[ticker] for ticker in selected_assets])
        except:
            w_mvo = np.repeat(1/N_ASSETS_PER_PORTFOLIO, N_ASSETS_PER_PORTFOLIO)
            
    w_ew = np.repeat(1/N_ASSETS_PER_PORTFOLIO, N_ASSETS_PER_PORTFOLIO)
    

    asset_test_returns = oos_asset_returns[selected_assets].values
    
    mvo_oos_ret = np.dot(w_mvo, asset_test_returns)
    ew_oos_ret = np.dot(w_ew, asset_test_returns)
    
    mvo_outcomes.append(mvo_oos_ret)
    ew_outcomes.append(ew_oos_ret)

mvo_outcomes = np.array(mvo_outcomes)
ew_outcomes = np.array(ew_outcomes)

results_df = pd.DataFrame({
    'Metric': [
        'Mean 1-Year OOS Return', 
        'Median 1-Year OOS Return',
        'Standard Deviation (Volatility of Outcomes)', 
        'Max Return Achieved', 
        'Min Return Achieved',
        'Skewness',
        'Kurtosis'
    ],
    'MVO Portfolio': [
        np.mean(mvo_outcomes),
        np.median(mvo_outcomes),
        np.std(mvo_outcomes),
        np.max(mvo_outcomes),
        np.min(mvo_outcomes),
        stats.skew(mvo_outcomes),
        stats.kurtosis(mvo_outcomes)
    ],
    '1/N (Equal Weight) Portfolio': [
        np.mean(ew_outcomes),
        np.median(ew_outcomes),
        np.std(ew_outcomes),
        np.max(ew_outcomes),
        np.min(ew_outcomes),
        stats.skew(ew_outcomes),
        stats.kurtosis(ew_outcomes)
    ]
})

mvo_win_rate = np.mean(mvo_outcomes > ew_outcomes) * 100

print("\n Table 3: Statistical Comparison")
display(results_df.round(4))
print(f"\n Mena-variance optimization outperformance frequency (Win Rate): {mvo_win_rate:.2f}%")

 Table 3: Statistical Comparison

 Mena-variance optimization outperformance frequency (Win Rate): 2.18%

fig = plt.figure(figsize=(18, 16))
grid = plt.GridSpec(3, 2, figure=fig, hspace=0.4)

ax1 = fig.add_subplot(grid[0, 0])
sns.kdeplot(mvo_outcomes, fill=True, color='blue', label='MVO Portfolio', ax=ax1, alpha=0.4)
sns.kdeplot(ew_outcomes, fill=True, color='orange', label='1/N Portfolio', ax=ax1, alpha=0.4)
ax1.axvline(np.mean(mvo_outcomes), color='blue', linestyle='--', label='MVO Mean')
ax1.axvline(np.mean(ew_outcomes), color='orange', linestyle='--', label='1/N Mean')
ax1.set_title('Figure 4: 1-Year Out-of-Sample Return Distributions (5,000 Sims)', fontsize=14)
ax1.set_xlabel('Cumulative Return (1 Year)')
ax1.set_ylabel('Density')
ax1.legend()

ax2 = fig.add_subplot(grid[0, 1])
sns.violinplot(data=[mvo_outcomes, ew_outcomes], palette=['blue', 'orange'], ax=ax2)
ax2.set_xticks([0, 1])
ax2.set_xticklabels(['MVO Portfolio', '1/N Portfolio'])
ax2.set_title('Figure 5: Return Dispersion and Quartiles', fontsize=14)
ax2.set_ylabel('1-Year Return')

ax3 = fig.add_subplot(grid[1, 0])
sns.ecdfplot(mvo_outcomes, color='blue', label='MVO Portfolio', ax=ax3, linewidth=2)
sns.ecdfplot(ew_outcomes, color='orange', label='1/N Portfolio', ax=ax3, linewidth=2)
ax3.set_title('Figure 6: Empirical CDF (Stochastic Dominance Analysis)', fontsize=14)
ax3.set_xlabel('Cumulative Return (1 Year)')
ax3.set_ylabel('Cumulative Probability')
ax3.legend()

ax4 = fig.add_subplot(grid[1, 1])
stats.probplot(mvo_outcomes, dist="norm", plot=ax4)
ax4.get_lines()[0].set_markerfacecolor('blue')
ax4.get_lines()[0].set_markeredgecolor('black')
ax4.get_lines()[0].set_alpha(0.3)
ax4.get_lines()[1].set_color('red')
ax4.set_title('Figure 7: Goodness of Fit (Q-Q Plot): MVO Results', fontsize=14)


mvo_sorted = np.sort(mvo_outcomes)
mu_mvo = np.mean(mvo_sorted)
std_mvo = np.std(mvo_sorted)

z_empirical = (mvo_sorted - mu_mvo) / std_mvo
n = len(z_empirical)

p_i = (np.arange(1, n + 1) - 0.5) / n
z_theoretical = stats.norm.ppf(p_i)

deviations = z_empirical - z_theoretical
se = (1 / stats.norm.pdf(z_theoretical)) * np.sqrt(p_i * (1 - p_i) / n)
ci_upper = 1.96 * se
ci_lower = -1.96 * se

ax5 = fig.add_subplot(grid[2, :])
ax5.plot(z_theoretical, deviations, 'o', markerfacecolor='blue', markeredgecolor='black', alpha=0.3, label='MVO Deviations (Worm)')
ax5.plot(z_theoretical, np.zeros_like(z_theoretical), 'r--', linewidth=2, label='Baseline (Perfect Normality)')
ax5.plot(z_theoretical, ci_upper, 'r:', linewidth=1.5, label='95% Confidence Interval')
ax5.plot(z_theoretical, ci_lower, 'r:', linewidth=1.5)
ax5.set_title('Figure 8: Worm Plot (Detrended Q-Q): Simulated MVO Portfolio Returns', fontsize=14)
ax5.set_xlabel('Theoretical Normal Quantiles')
ax5.set_ylabel('Deviation (Empirical - Theoretical)')
ax5.set_ylim(min(deviations)*1.2, max(deviations)*1.2) 
ax5.legend()



plt.show()

%pip install yfinance pandas numpy scipy matplotlib seaborn PyPortfolioOpt --quiet

import yfinance as yf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pypfopt import black_litterman, risk_models, EfficientFrontier
from pypfopt.black_litterman import BlackLittermanModel

assets = ['AAPL', 'NVDA', 'TSLA', 'XOM', 'REGN', 'LLY', 'JPM']
start_date = '2023-01-01'
end_date = '2025-06-30'

data = yf.download(assets, start=start_date, end=end_date)['Close']
returns = data.pct_change().dropna()


mcaps = {'AAPL': 2800, 'NVDA': 2200, 'TSLA': 550, 'XOM': 450, 'REGN': 100, 'LLY': 700, 'JPM': 500}

S = risk_models.sample_cov(data)

delta = 2.5 
market_prior = black_litterman.market_implied_prior_returns(mcaps, delta, S)


asset_cols = list(data.columns)

P = np.zeros((4, len(assets)))
Q = np.array([0.25, 0.05, 0.02, 0.04])

nvda_idx = asset_cols.index('NVDA')
lly_idx = asset_cols.index('LLY')
regn_idx = asset_cols.index('REGN')
aapl_idx = asset_cols.index('AAPL')
jpm_idx = asset_cols.index('JPM')
xom_idx = asset_cols.index('XOM')
tsla_idx = asset_cols.index('TSLA')

P[0, nvda_idx] = 1.0
P[1, lly_idx] = 1.0; P[1, regn_idx] = -1.0
P[2, aapl_idx] = 1.0; P[2, jpm_idx] = -1.0
P[3, xom_idx] = 1.0; P[3, tsla_idx] = -1.0


bl = BlackLittermanModel(S, pi=market_prior, P=P, Q=Q, omega="idzorek", view_confidences=[0.8, 0.6, 0.4, 0.6])

posterior_rets = bl.bl_returns()
posterior_cov = bl.bl_cov()

ef_bl = EfficientFrontier(posterior_rets, posterior_cov, weight_bounds=(0, 1))
ef_bl.max_sharpe()
bl_weights = ef_bl.clean_weights()

total_mcap = sum(mcaps.values())
market_weights = {ticker: cap/total_mcap for ticker, cap in mcaps.items()}

comparison_df = pd.DataFrame({
    'Market Cap Weights (Prior)': pd.Series(market_weights),
    'Black-Litterman (Posterior)': pd.Series(bl_weights)
}).fillna(0) * 100

comparison_df.plot(kind='bar', figsize=(12, 6), color=['#A9A9A9', '#1F77B4'], edgecolor='black')
plt.title('Figure 9: Black-Litterman Portfolio Optimization: Prior vs. Posterior Allocations', fontsize=14)
plt.ylabel('Allocation (%)')
plt.axhline(0, color='black', linewidth=1)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

print("\n Table 4: Black-Litterman Optimal Weights")
display(comparison_df.round(2))

Note: you may need to restart the kernel to use updated packages.

[*********************100%***********************]  7 of 7 completed

 Table 4: Black-Litterman Optimal Weights

import scipy.stats as stats

r_f = 0.04 
annualized_returns = returns.mean() * 252
annualized_cov = returns.cov() * 252

excess_returns = annualized_returns - r_f

inv_cov = np.linalg.inv(annualized_cov.values)
full_kelly_weights = inv_cov.dot(excess_returns.values)

half_kelly_weights = 0.5 * full_kelly_weights
double_kelly_weights = 2.0 * full_kelly_weights

def calculate_daily_kelly_path(weights, daily_returns, rf_annual=0.04):
    rf_daily = rf_annual / 252
    weight_sum = np.sum(weights)
    cash_weight = 1.0 - weight_sum 

    port_returns = daily_returns.dot(weights) + (cash_weight * rf_daily)
    return port_returns

ret_full = calculate_daily_kelly_path(full_kelly_weights, returns)
ret_half = calculate_daily_kelly_path(half_kelly_weights, returns)
ret_double = calculate_daily_kelly_path(double_kelly_weights, returns)

wealth_full = (1 + ret_full).cumprod()
wealth_half = (1 + ret_half).cumprod()
wealth_double = (1 + ret_double).cumprod()

fig = plt.figure(figsize=(18, 12))
grid = plt.GridSpec(2, 2, figure=fig, hspace=0.3)

ax1 = fig.add_subplot(grid[0, 0])
ax1.plot(wealth_half.index, wealth_half, label='Half Kelly (0.5f*)', color='green', linewidth=2)
ax1.plot(wealth_full.index, wealth_full, label='Full Kelly (1.0f*)', color='blue', linewidth=2)
ax1.plot(wealth_double.index, wealth_double, label='Double Kelly (2.0f*)', color='red', linewidth=2)
ax1.set_yscale('log')
ax1.set_title('Figure 10: Kelly Strategies Cumulative Wealth (Log Scale)', fontsize=14)
ax1.set_ylabel('Portfolio Value (Log $)')
ax1.legend()
ax1.grid(True, which="both", ls="--", alpha=0.5)

def get_drawdown(wealth_path):
    peak = wealth_path.cummax()
    return (wealth_path - peak) / peak

ax2 = fig.add_subplot(grid[0, 1])
ax2.fill_between(wealth_half.index, get_drawdown(wealth_half), color='green', alpha=0.3, label='Half Kelly')
ax2.fill_between(wealth_full.index, get_drawdown(wealth_full), color='blue', alpha=0.3, label='Full Kelly')
ax2.fill_between(wealth_double.index, get_drawdown(wealth_double), color='red', alpha=0.3, label='Double Kelly')
ax2.set_title('Figure 11:   Strategy Drawdowns', fontsize=14)
ax2.set_ylabel('Drawdown (%)')
ax2.legend()

mvo_sorted = np.sort(ret_full)
mu_mvo = np.mean(mvo_sorted)
std_mvo = np.std(mvo_sorted)

z_empirical = (mvo_sorted - mu_mvo) / std_mvo
n = len(z_empirical)
p_i = (np.arange(1, n + 1) - 0.5) / n
z_theoretical = stats.norm.ppf(p_i)

deviations = z_empirical - z_theoretical
se = (1 / stats.norm.pdf(z_theoretical)) * np.sqrt(p_i * (1 - p_i) / n)
ci_upper = 1.96 * se
ci_lower = -1.96 * se

ax3 = fig.add_subplot(grid[1, 0])
stats.probplot(ret_full, dist="norm", plot=ax3)
ax3.get_lines()[0].set_markerfacecolor('blue')
ax3.get_lines()[0].set_markeredgecolor('black')
ax3.get_lines()[0].set_alpha(0.5)
ax3.get_lines()[1].set_color('red')
ax3.set_title('Figure 12: Goodness of Fit (Q-Q): Full Kelly Returns', fontsize=14)

ax4 = fig.add_subplot(grid[1, 1])
ax4.plot(z_theoretical, deviations, 'o', markerfacecolor='blue', markeredgecolor='black', alpha=0.4, label='Deviations (Worm)')
ax4.plot(z_theoretical, np.zeros_like(z_theoretical), 'r--', linewidth=2, label='Perfect Normal Baseline')
ax4.plot(z_theoretical, ci_upper, 'r:', linewidth=1.5, label='95% Confidence Interval')
ax4.plot(z_theoretical, ci_lower, 'r:', linewidth=1.5)
ax4.set_title('Figure 13: Worm Plot (Detrended Q-Q): Diagnostic of Kelly Tail Risks', fontsize=14)
ax4.set_xlabel('Theoretical Normal Quantiles')
ax4.set_ylabel('Deviation (Empirical - Theoretical)')
ax4.legend()

plt.show()

print("\n Table 5: Kelly Leverage Weights (Fraction of Equity)")
kelly_df = pd.DataFrame({
    'Half Kelly': half_kelly_weights,
    'Full Kelly': full_kelly_weights,
    'Double Kelly': double_kelly_weights
}, index=returns.columns).round(3)
display(kelly_df)

 Table 5: Kelly Leverage Weights (Fraction of Equity)

Ticker	AAPL	JPM	LLY	NVDA	REGN	TSLA	XOM
Annualized Mean (%)	23.21	35.96	36.70	111.31	-8.53	63.70	7.26
Annualized Volatility (%)	26.38	23.71	32.85	53.16	29.52	62.49	23.38
Skewness	0.84	0.21	0.74	0.79	-2.10	0.59	-0.23
Kurtosis	13.92	10.02	9.65	7.39	20.61	4.01	1.76

	Metric	MVO Portfolio	1/N (Equal Weight) Portfolio
0	Mean 1-Year OOS Return	0.0577	0.3238
1	Median 1-Year OOS Return	0.0300	0.2998
2	Standard Deviation (Volatility of Outcomes)	0.0923	0.1943
3	Max Return Achieved	0.5834	1.0115
4	Min Return Achieved	-0.0785	-0.0466
5	Skewness	1.4643	0.4960
6	Kurtosis	2.3455	-0.2717

	Market Cap Weights (Prior)	Black-Litterman (Posterior)
AAPL	38.36	26.50
JPM	6.85	7.44
LLY	9.59	14.42
NVDA	30.14	29.35
REGN	1.37	0.00
TSLA	7.53	0.00
XOM	6.16	22.31

	Half Kelly	Full Kelly	Double Kelly
Ticker
AAPL	-0.503	-1.005	-2.010
JPM	2.235	4.471	8.941
LLY	1.323	2.647	5.294
NVDA	1.661	3.322	6.645
REGN	-1.763	-3.525	-7.050
TSLA	0.061	0.122	0.244
XOM	-0.157	-0.314	-0.629

Introduction¶

Part 1¶

Step 1: Mean-variance optimization¶

Step 3: Optimization with maximum restriction of diversification¶

Step 3: Optimization with allowing short selling¶

Step 4: Results¶

Part 2¶

Optimization of mean-variance¶

1/N portfolio¶

Part 3 and 4¶

Step 4: Optimal increase and risk management using multivariate Kelly¶

Conclusion¶

References¶

	Step 1: Long Only (%)	Step 2: Diversified Max 20% (%)	Step 3: Aggressive w/ Shorting (%)
AAPL	0.00	19.02	-26.61
JPM	34.88	20.00	73.08
LLY	14.03	20.00	34.32
NVDA	51.09	20.00	79.29
REGN	0.00	0.00	-50.38
TSLA	0.00	0.98	-9.88
XOM	0.00	20.00	0.19