!pip install pandas_datareader statsmodels matplotlib --quiet

import os
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader import data as web
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression
import warnings
warnings.filterwarnings('ignore')
#==FRED API key
os.environ["FRED_API_KEY"] = "ec67a401ae513935b544843d8c308536"
df = web.DataReader("UNRATE", "fred", start="2000-01-01", end="2023-12-31")
df.head()

plt.figure(figsize=(12, 6))
plt.plot(df, label="U.S. Unemployment Rate (%)", color="navy")

plt.title("Figure 1: Monthly U.S. Unemployment Rate (2000–2023)")
plt.xlabel("Year")
plt.ylabel("Rate (%)")

#=== Convert date strings to datetime objects
crisis_2008 = pd.to_datetime('2008-01-01')
covid_2020 = pd.to_datetime('2020-03-01')
recovery_2021 = pd.to_datetime('2021-01-01')

#=== Add vertical lines with datetime objects
plt.axvline(x=crisis_2008, color='red', linestyle='--', linewidth=1.2)
plt.text(crisis_2008, 11, '->2008 Crisis', color='red', rotation=30, verticalalignment='center')

plt.axvline(x=covid_2020, color='black', linestyle='--', linewidth=1.2)
plt.text(covid_2020, 5, '->COVID-19 Shock', color='black', rotation=80, verticalalignment='center')

plt.axvline(x=recovery_2021, color='purple', linestyle='--', linewidth=1.2)
plt.text(recovery_2021, 13, '->Recovery Phase', color='purple', rotation=30, verticalalignment='center')

plt.legend()
plt.tight_layout()
plt.show()

# Fit 2-regime MS-AR(1) model
model = MarkovRegression(df, k_regimes=2, trend='c', order=1)
result = model.fit()

print(result.summary())

                        Markov Switching Model Results                        
==============================================================================
Dep. Variable:                 UNRATE   No. Observations:                  288
Model:               MarkovRegression   Log Likelihood                -491.836
Date:                Tue, 08 Apr 2025   AIC                            993.672
Time:                        22:25:32   BIC                           1011.987
Sample:                    01-01-2000   HQIC                          1001.012
                         - 12-01-2023                                         
Covariance Type:               approx                                         
                             Regime 0 parameters                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          4.9206      0.109     45.092      0.000       4.707       5.135
                             Regime 1 parameters                              
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.1819      0.428     16.789      0.000       6.343       8.020
                           Non-switching parameters                           
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
sigma2         1.9514      0.548      3.562      0.000       0.878       3.025
                         Regime transition parameters                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
p[0->0]        0.9957      0.003    326.515      0.000       0.990       1.002
p[1->0]        0.0460      0.030      1.511      0.131      -0.014       0.106
==============================================================================

Warnings:
[1] Covariance matrix calculated using numerical (complex-step) differentiation.

!pip install ruptures --quiet

import ruptures as rpt
import matplotlib.pyplot as plt

import ruptures as rpt
import matplotlib.pyplot as plt

#======= Structural Break Detection =======#
data = df.values

plt.figure(figsize=(12, 6))
plt.plot(df.index, data, label="U.S. Unemployment Rate (%)", color="navy", alpha=0.7)

#======= Detect breaks using Pelt algorithm =======#
algo = rpt.Pelt(model="l2").fit(data)
breaks = algo.predict(pen=10)  # Penalty parameter

#======= Annotate breaks =======#
for brk in breaks[:-1]:  # Skip last break (end of series)
    break_date = df.index[brk]
    plt.axvline(x=break_date, color='red', linestyle='--', linewidth=1)
    plt.text(break_date,
             data.max() - 1,
             f'Break: {break_date.strftime("%Y-%m")}',
             color='red',
             rotation=90,
             verticalalignment='top')

plt.title("Figure 2: Structural Break Detection (Bai-Perron)")
plt.xlabel("Year")
plt.ylabel("Unemployment Rate (%)")
plt.legend()
plt.tight_layout()
plt.show()

!pip install hmmlearn
from hmmlearn import hmm
import numpy as np

#==Fit HMM with Gaussian emissions and auto-select states via AIC
best_aic = np.inf
best_n_states = 1

for n_components in [1, 2, 3]:
    model = hmm.GaussianHMM(n_components=n_components, covariance_type="diag")
    model.fit(data.reshape(-1, 1))
    aic = model.aic(data.reshape(-1, 1))
    if aic < best_aic:
        best_aic = aic
        best_n_states = n_components

print(f"Optimal states: {best_n_states} (AIC={best_aic:.2f})")

Requirement already satisfied: hmmlearn in /usr/local/lib/python3.11/dist-packages (0.3.3)
Requirement already satisfied: numpy>=1.10 in /usr/local/lib/python3.11/dist-packages (from hmmlearn) (2.0.2)
Requirement already satisfied: scikit-learn!=0.22.0,>=0.16 in /usr/local/lib/python3.11/dist-packages (from hmmlearn) (1.6.1)
Requirement already satisfied: scipy>=0.19 in /usr/local/lib/python3.11/dist-packages (from hmmlearn) (1.14.1)
Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn!=0.22.0,>=0.16->hmmlearn) (1.4.2)
Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn!=0.22.0,>=0.16->hmmlearn) (3.6.0)
Optimal states: 2 (AIC=839.32)

# Fit 3-regime MS-AR(1) model
df=df
model = MarkovRegression(
    df,
    k_regimes=3,
    trend='ct',
    order=1,
   # switching_variance=True
)
result = model.fit()

prob_high = result.smoothed_marginal_probabilities[1]

plt.figure(figsize=(12, 6))
plt.plot(df, label='Unemployment Rate (%)', color='navy', alpha=0.7)
plt.plot(prob_high * 10, label='High-Unemployment Probability (Scaled x10)', color='red', linestyle='--')

plt.fill_between(df.index, 0, prob_high*10, where=(prob_high > 0.5),
                 color='red', alpha=0.2, label='High-Unemployment Regime')

plt.title("Figure 3: Regime Detection in U.S. Unemployment Rate (2000–2023)")
plt.xlabel("Year")
plt.ylabel("Rate / Probability")
plt.legend()
plt.show()

from statsmodels.graphics.tsaplots import plot_acf
from scipy.stats import probplot

result = model.fit()

residuals = result.resid
#== residuals and ACF
fig, axes = plt.subplots(1, 3, figsize=(16, 4))


axes[0].plot(df.index, residuals, color='navy')
axes[0].axhline(0, linestyle='--', color='gray')
axes[0].set_title('Figure 4.1: Residuals of MS-AR Model')
axes[0].set_ylabel('Residuals')

#==ACF of residuals
plot_acf(residuals.dropna(), lags=20, ax=axes[1], color='red')
axes[1].set_title('Figure 4.2: Autocorrelation Function (ACF)')

#==QQ plot for normality check
probplot(residuals.dropna(), dist='norm', plot=axes[2])
axes[2].set_title('Figure 4.3: QQ Plot')

plt.tight_layout()
plt.show()

import pandas as pd
from pandas_datareader import data as web

unrate = web.DataReader("UNRATE", "fred", start="2000-01-01", end="2023-12-31")  # Monthly

gdp = web.DataReader("GDPC1", "fred", start="2000-01-01", end="2023-12-31")      # Quarterly

gdp_yoy = gdp.pct_change(4) * 100
gdp_yoy.columns = ["GDP_YOY_GROWTH"]

gdp_monthly = gdp_yoy.resample("ME").ffill()

combined_df = pd.merge(
    unrate,
    gdp_monthly,
    left_index=True,
    right_index=True,
    how="outer"
)

combined_df["GDP_YOY_GROWTH"] = combined_df["GDP_YOY_GROWTH"].ffill()

combined_df = combined_df.dropna(subset=["UNRATE"])

combined_df = combined_df.dropna()

model = MarkovRegression(
    combined_df["UNRATE"],
    k_regimes=3,
    trend="ct",
    order=12,
    exog=combined_df["GDP_YOY_GROWTH"],
    #switching_variance=True
)
result = model.fit()

from statsmodels.tsa.stattools import adfuller

#==Test stationarity of unemployment rate
adf_unrate = adfuller(combined_df["UNRATE"].dropna())
print(f"ADF p-value (UNRATE): {adf_unrate[1]:.3f}")

#==Test stationarity of residuals
residuals = result.resid.dropna()
adf_resid = adfuller(residuals)
print(f"ADF p-value (Residuals): {adf_resid[1]:.3f}")

from statsmodels.stats.diagnostic import acorr_ljungbox

#==Performing Ljung-Box test on residuals
lb_test = acorr_ljungbox(residuals, lags=20)
print(lb_test)

from scipy.stats import probplot
Fig, axes = plt.subplots(1, 2, figsize=(16, 4))

probplot(result.resid.dropna(), dist="norm", plot=axes[0])
axes[0].set_title("Figure 5.1: QQ Plot of Residuals")

plot_acf(result.resid.dropna(), lags=20, ax=axes[1], title="Figure 5.2: ACF of Residuals")

plt.tight_layout()
plt.show()

ADF p-value (UNRATE): 0.049
ADF p-value (Residuals): 0.003
       lb_stat      lb_pvalue
1   176.404773   2.954250e-40
2   277.784164   4.785586e-61
3   370.725540   4.848473e-80
4   457.649831   9.638266e-98
5   517.829166  1.130935e-109
6   552.334363  4.431761e-116
7   572.264870  2.279826e-119
8   584.208884  5.802081e-121
9   592.355686  9.152336e-122
10  595.065528  2.008855e-121
11  595.238896  1.461266e-120
12  595.275701  1.081723e-119
13  596.168535  5.027513e-119
14  596.588201  2.830401e-118
15  596.892051  1.623914e-117
16  597.872278  6.464842e-117
17  598.993419  2.327846e-116
18  601.008694  5.267250e-116
19  603.942482  7.430332e-116
20  608.427211  4.825610e-116

!pip install arch --quiet
import pandas as pd
from pandas_datareader import data as web
import matplotlib.pyplot as plt
from arch import arch_model
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.gofplots import qqplot
from statsmodels.tsa.regime_switching.markov_regression import MarkovRegression

unrate = web.DataReader("UNRATE", "fred", start="2000-01-01", end="2023-12-31")
returns = unrate.pct_change().dropna() * 100  # Convert to percentage returns

# Fit GARCH(1,1) Model
garch_model = arch_model(returns, mean="Constant", vol="GARCH", p=1, q=1, dist="t")
garch_result = garch_model.fit(disp="off")

# Fit Markov Switching Model
ms_model = MarkovRegression(returns, k_regimes=2, trend='ct', switching_variance=True)
ms_result = ms_model.fit()

# Plot ACFs
fig, axes = plt.subplots(1, 2, figsize=(8, 3))

plot_acf(garch_result.resid.dropna(), lags=20, ax=axes[0])
axes[0].set_title("Figure 6.1: ACF of GARCH Residuals")

plot_acf(ms_result.resid.dropna(), lags=20, ax=axes[1])
axes[1].set_title("Figure 6.2: ACF of Markov Switching Residuals")

plt.tight_layout()
plt.show()

import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.stats import norm


y = unrate.values.flatten()
T = len(y)

# -----------------------------
# Log-likelihood Function
# -----------------------------
def ms_garch_loglik(params):
    # Unpack parameters
    mu = params[0:2]
    omega = params[2:4]
    alpha = params[4:6]
    beta = params[6:8]
    p00 = params[8]
    p11 = params[9]

    # Transition probabilities
    P = np.array([
        [p00, 1 - p00],
        [1 - p11, p11]
    ])

    # Initialize regime probabilities and variances
    xi = np.ones(2) / 2  # Start with equal regime probability
    ll = 0.0
    h = np.array([np.var(y)] * 2)  # Initial volatility per regime

    for t in range(1, T):
        # Forecast next h for each regime
        for s in range(2):
            h[s] = omega[s] + alpha[s] * (y[t - 1] - mu[s])**2 + beta[s] * h[s]

        # Conditional densities
        f = np.array([
            norm.pdf(y[t], loc=mu[0], scale=np.sqrt(h[0])),
            norm.pdf(y[t], loc=mu[1], scale=np.sqrt(h[1]))
        ])

        # Prediction step
        xi_pred = P.T @ xi

        # Likelihood at t
        ft = np.dot(f, xi_pred)
        ll += np.log(ft + 1e-8)

        # Filtering step
        xi = (f * xi_pred) / (ft + 1e-8)

    return -ll  # Negative log-likelihood for minimization

# -----------------------------
# Initial values and bounds
# -----------------------------
initial_params = [0.0, 0.0,   # mu0, mu1
                  0.01, 0.01, # omega0, omega1
                  0.05, 0.05, # alpha0, alpha1
                  0.90, 0.90, # beta0, beta1
                  0.9, 0.9]   # p00, p11

bounds = [(-1, 1)] * 2 + [(1e-6, 2)] * 6 + [(0.01, 0.99)] * 2

# -----------------------------
# Estimate the model
# -----------------------------
result = minimize(ms_garch_loglik, initial_params, bounds=bounds, method='L-BFGS-B')
params_hat = result.x

# -----------------------------
# Print Results
# -----------------------------
regime_labels = ['Regime 0', 'Regime 1']
for i in range(2):
    print(f"\n--- {regime_labels[i]} ---")
    print(f"mu     = {params_hat[i]:.4f}")
    print(f"omega  = {params_hat[2+i]:.4f}")
    print(f"alpha  = {params_hat[4+i]:.4f}")
    print(f"beta   = {params_hat[6+i]:.4f}")
print(f"\nTransition Matrix:")
print(f"P = [[{params_hat[8]:.2f}, {1 - params_hat[8]:.2f}],")
print(f"     [{1 - params_hat[9]:.2f}, {params_hat[9]:.2f}]]")

--- Regime 0 ---
mu     = 1.0000
omega  = 1.1043
alpha  = 0.9757
beta   = 0.0000

--- Regime 1 ---
mu     = 1.0000
omega  = 1.1043
alpha  = 0.9757
beta   = 0.0000

Transition Matrix:
P = [[0.90, 0.10],
     [0.10, 0.90]]

import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.stats import norm
from statsmodels.graphics.tsaplots import plot_acf
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy.stats.mstats import winsorize


y = unrate.values.flatten()
exog = combined_df["GDP_YOY_GROWTH"].values


min_len = min(len(y), len(exog))
y = y[:min_len]
exog = exog[:min_len]
T = min_len

# =====================
# MS-GARCH Model with Exogenous Variable
# =====================
def ms_garch_loglik_exog(params):
    mu = params[0:2]
    gamma = params[2:4]
    omega = params[4:6]
    alpha = params[6:8]
    beta = params[8:10]
    p00, p11 = params[10:12]

    P = np.array([[p00, 1 - p00], [1 - p11, p11]])
    xi = np.ones(2)/2
    ll = 0.0
    h = np.array([np.var(y)] * 2)

    for t in range(1, T):
        regime_mean = mu + gamma * exog[t]
        h = omega + alpha * (y[t-1] - regime_mean)**2 + beta * h
        f = norm.pdf(y[t], loc=regime_mean, scale=np.sqrt(h))
        xi_pred = P.T @ xi
        ft = f @ xi_pred
        ll += np.log(ft + 1e-12)
        xi = (f * xi_pred) / ft

    return -ll

# =====================
# Parameter Initialization with Exogenous Terms
# =====================
initial_params = [
    y.mean()/2, y.mean()/2,  # mu
    0.1, -0.1,               # gamma
    0.01, 0.01,              # omega
    0.05, 0.05,              # alpha
    0.85, 0.85,              # beta
    0.95, 0.95               # p00, p11
]

bounds = [
    (-2, 2), (-2, 2),         # mu
    (-1, 1), (-1, 1),         # gamma
    (1e-6, 1), (1e-6, 1),     # omega
    (0, 0.3), (0, 0.3),       # alpha
    (0.5, 0.99), (0.5, 0.99), # beta
    (0.7, 0.99), (0.7, 0.99)  # transition probs
]

# =====================
# Model Estimation
# =====================
result = minimize(ms_garch_loglik_exog, initial_params,
                 bounds=bounds, method='L-BFGS-B',
                 options={'maxiter': 1500})

params_hat = result.x

# =====================
#  Diagnostics
# =====================
xi = np.ones((T, 2)) * (1/2)
h_hist = np.zeros((T, 2))
h_hist[0] = [np.var(y)] * 2

for t in range(1, T):
    regime_mean = params_hat[0:2] + params_hat[2:4] * exog[t]
    h_hist[t] = (params_hat[4:6] +
                 params_hat[6:8] * (y[t-1] - regime_mean)**2 +
                 params_hat[8:10] * h_hist[t-1])

    f = norm.pdf(y[t], loc=regime_mean, scale=np.sqrt(h_hist[t]))
    xi_pred = np.array([[params_hat[10], 1 - params_hat[10]],
                        [1 - params_hat[11], params_hat[11]]]).T @ xi[t-1]
    xi[t] = (f * xi_pred) / (f @ xi_pred + 1e-12)

h_weighted = np.sum(h_hist * xi, axis=1)
residuals = y - np.sum(xi * (params_hat[0:2] + params_hat[2:4] * exog[:, None]), axis=1)
std_resid = residuals / np.sqrt(h_weighted)

fig, ax = plt.subplots(2, 1, figsize=(12, 8))

# Regime probabilities plot
ax[0].plot(unrate.index[:T], xi[:, 0], label='Low Volatility', alpha=0.7)
ax[0].plot(unrate.index[:T], xi[:, 1], label='High Volatility', alpha=0.7)
ax[0].set_title('Figure 7.1: Regime Probabilities with GDP Impact')
ax[0].legend()

# Conditional volatility with GDP overlay
ax1b = ax[1].twinx()
ax[1].plot(unrate.index[:T], h_weighted, color='black', label='Conditional Volatility')
ax1b.bar(unrate.index[:T], exog, alpha=0.3, color='orange', label='GDP Growth', width=20)
ax[1].set_title('Figure 7.2: Volatility vs. Exogenous GDP Growth')
ax[1].legend(loc='upper left')
ax1b.legend(loc='upper right')

plt.tight_layout()
plt.show()

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from scipy.stats import skew

# Download Apple stock data from Yahoo Finance
stock_data = yf.download("AAPL", start="2020-01-01", end="2025-01-01")
stock_data["Returns"] = stock_data["Close"].pct_change()
apple_data = stock_data["Returns"].dropna()

# Display first rows
stock_data.head()

YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed

# Feature extraction: Rolling mean and volatility
stock_data["Rolling_Mean"] = stock_data["Close"].rolling(window=30).mean()
stock_data["Volatility"] = stock_data["Close"].rolling(window=30).std()

# Plot rolling statistics
plt.figure(figsize=(12,6))
plt.plot(stock_data["Close"], label="Closing Price", color="blue")
plt.plot(stock_data["Rolling_Mean"], label="30-day Rolling Mean", color="red")
plt.legend()
plt.title("Feature Extraction: Rolling Mean")
plt.show()

# Plot Volatility
plt.figure(figsize=(12, 6))
plt.plot(stock_data["Volatility"], label="30-day Rolling Volatility", color="purple")
plt.title("Feature Extraction: Rolling Volatility")
plt.xlabel("Date")
plt.ylabel("Standard Deviation of Closing Price")
plt.legend()
plt.show()

# Check stationarity of returns using Augmented Dickey-Fuller test
print("\n--- ADF Test on Daily Returns ---")
adf_result = adfuller(apple_data)
print(f'ADF Statistic: {adf_result[0]}')
print(f'p-value: {adf_result[1]}')
print('Critical Values:')
for key, value in adf_result[4].items():
    print(f'\t{key}: {value}')
# Interpretation: If p-value < 0.05, we reject the null hypothesis (H0) that the series has a unit root (is non-stationary).

# Check basic distribution stats for returns
print("\n--- Distribution Statistics for Daily Returns ---")
print(f"Skewness: {skew(apple_data)}")
print(f"Kurtosis (Fisher): {pd.Series(apple_data).kurtosis()}") # Fisher's definition (normal == 0)

--- ADF Test on Daily Returns ---
ADF Statistic: -11.352901141106825
p-value: 9.894488688904509e-21
Critical Values:
	1%: -3.4356006420838963
	5%: -2.8638586845641063
	10%: -2.5680044958343604

--- Distribution Statistics for Daily Returns ---
Skewness: 0.1055018995267093
Kurtosis (Fisher): 5.277830927864649

# ADF Test for non-stationarity
adf_test = adfuller(apple_data)
print(f"ADF Statistic: {adf_test[0]}")
print(f"p-value: {adf_test[1]}")
if adf_test[1] > 0.05:
    print("The data is non-stationary.")
else:
    print("The data is stationary.")

ADF Statistic: -11.352901141106825
p-value: 9.894488688904509e-21
The data is stationary.

np.random.seed(42)
dates = pd.date_range(start='2023-01-01', periods=200)
prices = np.cumsum(np.random.normal(0, 1, size=200)) + 100  # Simulated stock prices

stock_data = pd.DataFrame({'Date': dates, 'Close': prices})
stock_data.set_index('Date', inplace=True)

# Rolling features
stock_data["Rolling_Mean"] = stock_data["Close"].rolling(window=30).mean()
stock_data["Volatility"] = stock_data["Close"].rolling(window=30).std()

# Plot rolling statistics
plt.figure(figsize=(12,6))
plt.plot(stock_data["Close"], label="Closing Price", color="blue")
plt.plot(stock_data["Rolling_Mean"], label="30-day Rolling Mean", color="red")
plt.legend()
plt.title("Feature Extraction: Rolling Mean")
plt.show()

# Johansen Cointegration Test
johansen_data = stock_data[["Close", "Rolling_Mean"]].dropna()

if len(johansen_data) > 50:
    from statsmodels.tsa.vector_ar.vecm import coint_johansen
    johansen_test = coint_johansen(johansen_data, det_order=0, k_ar_diff=1)
    print("Johansen Test Statistic (trace):", johansen_test.lr1)
    print("Critical Values (90%, 95%, 99%):", johansen_test.cvt)
else:
    print("❗Still not enough data after dropna()")

Johansen Test Statistic (trace): [33.00473089  7.34255887]
Critical Values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]

# Applying differencing to make data stationary
apple_data_diff = apple_data.diff().dropna()
adf_test_diff = adfuller(apple_data_diff)
print(f"ADF Statistic after differencing: {adf_test_diff[0]}")
print(f"p-value after differencing: {adf_test_diff[1]}")

ADF Statistic after differencing: -15.377526955825033
p-value after differencing: 3.442397839516386e-28

data = yf.download("AAPL", start="2020-01-01", end="2025-01-01")

[*********************100%***********************]  1 of 1 completed

window = 20  # adjustable window size

# 1. Rolling Mean (20-day moving average)
data['Rolling_Mean'] = data['Close'].rolling(window=window).mean()

# 2. Volatility (20-day rolling standard deviation of returns)
data['Returns'] = data['Close'].pct_change()  # daily returns
data['Volatility'] = data['Returns'].rolling(window=window).std()

# Drop NaN values from rolling calculations
data_clean = data[['Close', 'Rolling_Mean', 'Volatility']].dropna()

# Compute correlation matrix
corr_matrix = data_clean.corr()

# Visualize heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(
    corr_matrix,
    annot=True,
    cmap='coolwarm',
    fmt=".2f",
    linewidths=0.5,
    annot_kws={"size": 12}
)
plt.title("Correlation: Close Price vs. Rolling Mean vs. Volatility", pad=20)
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.show()

from statsmodels.stats.outliers_influence import variance_inflation_factor

data_clean.columns=[f"{a}-{b}" if b else a for a, b in data_clean.columns]
X = data_clean[['Close-AAPL','Rolling_Mean', 'Volatility']]
# Calculate VIF for each feature
vif = pd.DataFrame()
vif["Variable"] = data_clean.columns
vif["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print(vif)
# 'Close-AAPL','Rolling_Mean' is detected highly collinearity

       Variable         VIF
0    Close-AAPL  635.027573
1  Rolling_Mean  642.455302
2    Volatility    2.870124

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

print("Number of component:", pca.n_components_)
print("Explained variance ratio:", pca.explained_variance_ratio_)
# Two component totally explained 99.6% of the original data

Number of component: 2
Explained variance ratio: [0.80805975 0.18805552]

from sklearn.linear_model import Ridge,LinearRegression
from sklearn.model_selection import train_test_split

ridge = Ridge(alpha=1.0).fit(X_scaled, data_clean['Close-AAPL'])
ols = LinearRegression().fit(X_scaled, data_clean['Close-AAPL'])
print("OLS Coefficients:", ols.coef_)
print("Ridge Coefficients:", ridge.coef_)

OLS Coefficients: [ 4.12413561e+01  1.30341216e-14 -2.39379470e-16]
Ridge Coefficients: [39.87665979  1.32399118 -0.04275847]

import warnings
warnings.filterwarnings('ignore')
import sys
import os

class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

def ignore_exceptions(func):
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception:
            pass
    return wrapper

!apt-get install texlive-xetex texlive-fonts-recommended texlive-plain-generic --quiet
!pip install nbconvert --quiet
!apt-get install pandoc --quiet
!jupyter nbconvert --to html "/content/GWP2EC (1).ipynb"
from google.colab import files
files.download("/content/GWP2EC (1).html")

!apt-get install texlive-xetex texlive-fonts-recommended texlive-plain-generic
!pip install PyPDF2
!jupyter nbconvert --to pdf --no-input --log-level='ERROR' "/content/GWP2EC (1).ipynb"

!apt-get install -y libpangocairo-1.0-0
!pip install weasyprint

!jupyter nbconvert --to html --no-input "/content/GWP2EC (1).ipynb"
files.download("/content/GWP2EC (1).html")

	UNRATE
DATE
2000-01-01	4.0
2000-02-01	4.1
2000-03-01	4.0
2000-04-01	3.8
2000-05-01	4.0

Price	Close	High	Low	Open	Volume	Returns
Ticker	AAPL	AAPL	AAPL	AAPL	AAPL
Date
2020-01-02	72.716057	72.776583	71.466797	71.721004	135480400	NaN
2020-01-03	72.009132	72.771760	71.783977	71.941343	146322800	-0.009722
2020-01-06	72.582901	72.621639	70.876068	71.127858	118387200	0.007968
2020-01-07	72.241539	72.849216	72.021223	72.592586	108872000	-0.004703
2020-01-08	73.403656	73.706287	71.943766	71.943766	132079200	0.016087

Financial Econometrics Group Work Project 2¶

Introduction¶

Regime change in time series modeling¶

Detection Methods¶

Description¶

Demonstration¶

Structural Break Detection (Bai-Perron), Hidden Markov Models (HMMs) and Bayesian Change-Point Detection¶

Structural Break Detection (Bai-Perron)¶

Hidden Markov Models¶

Diagram¶

Diagnostic¶

Demage¶

Directions¶

Deployment¶

Conclusion¶

References¶

Feature Extraction¶

Definitions:¶

Description:¶

1-2. Fetch Apple Stock Data¶

Definitions:¶

Non-Stationarity & Equilibrium:¶

Description:¶

Task: Download AAPL stock data¶

Demonstration:¶

1-3. Feature Extraction: Rolling Mean & Volatility¶

Demonstration:¶

Feature Extraction Implementation¶

Description:¶

Task: Extract key features¶

Diagram: Rolling mean and volatility¶

Diagnostic:¶

Damage:¶

Directions:¶

Deployment:¶

Conclusion:¶

References¶

2-1. Non-Stationarity Detection: ADF Test¶

Definitions:¶

Non-Stationarity & Equilibrium¶

Description:¶

Diagnostic:¶

Task: Check if the data is non-stationary¶

Damage:¶

Deployment: ADF Test for non-stationarity¶

Directions: (Interpreting the ADF Test Result)¶

2-2. Differencing to Make Data Stationary¶

Definitions:¶

Description: Testing & Modeling Non-Stationarity¶

Diagram: Rolling Statistics¶

Diagnostic: Johansen Cointegration Test¶

Directions:¶

Damage:¶

Deployment: Apply first differencing¶

References¶

3. Multicollinearity Check using Correlation Matrix¶

Definitions:¶

Feature Extraction¶

Multicollinearity¶

Description:¶

Diagnostic:¶

Task: Detect multicollinearity¶

Diagram: Correlation: Close Price vs. Rolling Mean vs. Volatility¶

Directions: Handling Multicollinearity¶

Principal Component Analysis (PCA) reduces dimensionality.¶

Ridge Regression stabilizes estimates when collinearity is high.¶

Damage:¶

Deployment:¶

References:¶