!pip install numpy scipy matplotlib statsmodels pandas --quiet
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
import warnings
warnings.filterwarnings('ignore')
import sys
import os

class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

def ignore_exceptions(func):
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception:
            pass
    return wrapper


#--------------------------------
np.random.seed(42)
x = np.linspace(-4, 4, 1000)

# Function to compute sample kurtosis
def compute_kurtosis(data):
    n = len(data)
    mean = np.mean(data)
    std_dev = np.std(data, ddof=1)
    return np.sum(((data - mean) / std_dev) ** 4) / n


normal_data = np.random.normal(0, 1, 10000)
normal_pdf = stats.norm.pdf(x, 0, 1)
normal_kurtosis = compute_kurtosis(normal_data)


leptokurtic_data = np.random.laplace(0, 0.5, 10000)
leptokurtic_pdf = stats.laplace.pdf(x, 0, 0.5)
leptokurtic_kurtosis = compute_kurtosis(leptokurtic_data)


platykurtic_data = np.random.normal(0, 2, 10000)
platykurtic_pdf = stats.norm.pdf(x, 0, 2)
platykurtic_kurtosis = compute_kurtosis(platykurtic_data)


plt.figure(figsize=(8, 4))
plt.plot(x, normal_pdf, label=f'Mesokurtic (K=3,  K={normal_kurtosis:.1f})', color='blue', linewidth=2)
plt.plot(x, leptokurtic_pdf, label=f'Leptokurtic (K>3,  K={leptokurtic_kurtosis:.1f})', color='yellow', linewidth=2)
plt.plot(x, platykurtic_pdf, label=f'Platykurtic (K<3, K={platykurtic_kurtosis:.1f})', color='red', linewidth=2)


plt.title("Figure 1.1- Types of Kurtosis")
plt.xlabel("Random data")
plt.ylabel("Density")
plt.legend()
plt.show()

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import beta, uniform, norm

sns.set_theme(style="whitegrid", font_scale=1.1)
plt.rcParams.update({'font.size': 12, 'figure.dpi': 100})

fig, axs = plt.subplots(2, 2, figsize=(16, 4))
plt.subplots_adjust(hspace=0.3, wspace=0.3, top=0.85)

# ================================================
# Beta(0.5,0.5) PDF vs uniform
# ================================================
a, b = 0.5, 0.5
x = np.linspace(0, 1, 1000)
unif_a = 0.5 - np.sqrt(3/8)
unif_b = 0.5 + np.sqrt(3/8)

axs[0, 0].plot(x, beta.pdf(x, a, b), label='Beta(0.5,0.5)', color='navy')
axs[0, 0].plot(x, uniform.pdf(x, unif_a, unif_b - unif_a), 'r--', label=f'Uniform({unif_a:.2f},{unif_b:.2f})')
axs[0, 0].fill_between(x, beta.pdf(x, a, b), uniform.pdf(x, unif_a, unif_b - unif_a),
                        where=(beta.pdf(x, a, b) < uniform.pdf(x, unif_a, unif_b - unif_a)),
                        color='gray', alpha=0.3, label='Missing area')
axs[0, 0].set_xlabel('x')
axs[0, 0].set_ylabel('Density')
axs[0, 0].set_title('a)')
axs[0, 0].legend()

# ================================================
# Beta(0.5,0.5) CDF vs uniform
# ================================================
axs[0, 1].plot(x, beta.cdf(x, a, b), label='Beta CDF', color='navy')
axs[0, 1].plot(x, uniform.cdf(x, unif_a, unif_b - unif_a), 'r--', label='Uniform CDF')
axs[0, 1].fill_between(x, beta.cdf(x, a, b), uniform.cdf(x, unif_a, unif_b - unif_a),
                        where=(beta.cdf(x, a, b) < uniform.cdf(x, unif_a, unif_b - unif_a)),
                        color='gray', alpha=0.3, label='Missing area')
axs[0, 1].set_xlabel('x')
axs[0, 1].set_ylabel('Cumulative probability')
axs[0, 1].set_title('b)')
axs[0, 1].legend()

# ================================================
# Normal PDF Peak/Tails
# ================================================
mu, sigma = 0, 1
x = np.linspace(mu - 4 * sigma, mu + 4 * sigma, 1000)
unif_a = mu - sigma * np.sqrt(3)
unif_b = mu + sigma * np.sqrt(3)

axs[1, 0].plot(x, norm.pdf(x, mu, sigma), label='Normal PDF', color='darkgreen')
axs[1, 0].plot(x, uniform.pdf(x, unif_a, unif_b - unif_a), 'r--', label=f'Uniform({unif_a:.2f},{unif_b:.2f})')
axs[1, 0].fill_between(x, norm.pdf(x, mu, sigma), uniform.pdf(x, unif_a, unif_b - unif_a),
                        where=(norm.pdf(x, mu, sigma) > uniform.pdf(x, unif_a, unif_b - unif_a)),
                        color='skyblue', alpha=0.3, label='Excess Area')
axs[1, 0].set_ylabel('Density')
axs[1, 0].set_title('c)')
axs[1, 0].legend()

# ================================================
# Normal CDF comparison
# ================================================
cdf = norm.cdf(x, mu, sigma)
uniform_cdf = np.clip((x - unif_a) / (unif_b - unif_a), 0, 1)

axs[1, 1].plot(x, cdf, 'b-', label='$F(x)$')
axs[1, 1].plot(x, uniform_cdf, 'r--', label='Uniform CDF')

axs[1, 1].annotate('Position of peak\nto the right of $M$', xy=(mu + sigma * 0.5, 0.65),
                    xytext=(mu + 2, 0.3),
                    arrowprops=dict(arrowstyle="->", color='k'),
                    ha='center')

axs[1, 1].set_xlabel('$x$')
axs[1, 1].set_ylabel('Cumulative Probability')
axs[1, 1].set_title('d)')
axs[1, 1].legend()


fig.suptitle('Figure 1.2- Diagnosis of Kurtosis based on Comparison of CDF with a Uniform Distribution', fontsize=14)

plt.show()

import numpy as np
import pandas as pd
from scipy import stats


np.random.seed(42)

# Generate normal and fat-tailed (Student's t) data
normal_data = np.random.normal(loc=0, scale=1, size=1000)
t_data = np.random.standard_t(df=3, size=1000)

# Anscombe-Glynn test function
def anscombe_glynn_test(data):
    excess_kurtosis = stats.kurtosis(data, fisher=True)
    n = len(data)
    ag = excess_kurtosis / np.sqrt(24 / n)
    p_value = 2 * (1 - stats.norm.cdf(np.abs(ag)))
    return ag, p_value

# Diagnostic tests
def run_tests(data, label):
    jb = stats.jarque_bera(data)
    k2 = stats.normaltest(data)
    ag, p_ag = anscombe_glynn_test(data)
    return {
        "Dataset": label,
        "Jarque-Bera Stat": jb.statistic,
        "Jarque-Bera p-value": jb.pvalue,
        "D’Agostino K² Stat": k2.statistic,
        "D’Agostino K² p-value": k2.pvalue,
        "Anscombe-Glynn Stat": ag,
        "Anscombe-Glynn p-value": p_ag
    }

results = pd.DataFrame([
    run_tests(normal_data, "Normal"),
    run_tests(t_data, "Student's t (df=3)")
])

# VaR
empirical_var = np.percentile(t_data, 5)
mean_t, std_t = np.mean(t_data), np.std(t_data)
normal_var = stats.norm.ppf(0.05, loc=mean_t, scale=std_t)

# Fit Student's t-distribution and calculate VaR
df, loc, scale = stats.t.fit(t_data)
t_var = stats.t.ppf(0.05, df=df, loc=loc, scale=scale)

var_results = pd.DataFrame({
    "Model": ["Normal VaR (5%)", "Empirical VaR (5%)", "t-Based VaR (5%)"],
    "Value": [normal_var, empirical_var, t_var]
})

print("Diagnostic Tests Results:")
print(results)
print("\nVaR Estimates:")
print(var_results)

Diagnostic Tests Results:
              Dataset  Jarque-Bera Stat  Jarque-Bera p-value  \
0              Normal          2.456373             0.292823   
1  Student's t (df=3)       1565.864886             0.000000   

   D’Agostino K² Stat  D’Agostino K² p-value  Anscombe-Glynn Stat  \
0            2.575518           2.758884e-01             0.427357   
1          146.183757           1.805513e-32            39.443466   

   Anscombe-Glynn p-value  
0                0.669119  
1                0.000000  

VaR Estimates:
                Model     Value
0     Normal VaR (5%) -2.575878
1  Empirical VaR (5%) -2.353898
2    t-Based VaR (5%) -2.304689

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

np.random.seed(42)
n = 50

x = np.linspace(1, 50, n)

y = np.linspace(20, 500, n)

data = pd.DataFrame({'x': x, 'y': y})

# ---------------------------
# Fit the OLS Model and Conduct Formal Tests
# ---------------------------
X = sm.add_constant(data['x'])
ols_model = sm.OLS(data['y'], X).fit()

print("Tabele 1.1: OLS Regression Summary:")
print(ols_model.summary())

# ------------------------------------------
# 3. Correcting Heteroscedasticity using WLS
# ------------------------------------------
# Compute absolute residuals and fitted values
data["abs_residuals"] = np.abs(ols_model.resid)
data["fitted_values"] = ols_model.fittedvalues

Tabele 1.1: OLS Regression Summary:
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 1.147e+32
Date:                Sun, 23 Mar 2025   Prob (F-statistic):               0.00
Time:                        22:49:05   Log-Likelihood:                 1430.2
No. Observations:                  50   AIC:                            -2856.
Df Residuals:                      48   BIC:                            -2853.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         10.2041   2.68e-14   3.81e+14      0.000      10.204      10.204
x              9.7959   9.15e-16   1.07e+16      0.000       9.796       9.796
==============================================================================
Omnibus:                        4.966   Durbin-Watson:                   0.097
Prob(Omnibus):                  0.083   Jarque-Bera (JB):                2.236
Skew:                          -0.188   Prob(JB):                        0.327
Kurtosis:                       2.034   Cond. No.                         59.5
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

from statsmodels.stats.diagnostic import het_breuschpagan

# Breusch-Pagan Test
bp_test = het_breuschpagan(ols_model.resid, X)
labels = ['LM Statistic', 'LM-Test p-value', 'F-Statistic', 'F-Test p-value']
print("\n OLS Breusch-Pagan Test Results:")
for label, value in zip(labels, bp_test):
    print(f"{label}: {value:.4f}")

 OLS Breusch-Pagan Test Results:
LM Statistic: 41.6877
LM-Test p-value: 0.0000
F-Statistic: 240.7305
F-Test p-value: 0.0000

# Fit auxiliary OLS model to get weights
model_temp = smf.ols("abs_residuals ~ fitted_values", data=data).fit()

# Compute weights
weights = model_temp.fittedvalues
weights=weights** -2
data["weights"] = weights

data.head()
# Fit WLS model
Y = data["y"].tolist()
X = data["x"].tolist()
X = sm.add_constant(X)  # add a intercept point
model_WLS = sm.WLS(Y, X, data["weights"]).fit()

# Diagnostic Plots
# ---------------------------
fig, axs = plt.subplots(1, 2, figsize=(11, 5))

# Plot 1: OLS Residuals vs. Fitted Values
axs[0].scatter(ols_model.fittedvalues, ols_model.resid, alpha=0.7, edgecolors='k')
axs[0].axhline(y=0, color='red', linestyle='--')
axs[0].set_title("a) OLS Residuals vs. Fitted Values")
axs[0].set_xlabel("Fitted Values")
axs[0].set_ylabel("Residuals")

# Plot 2: WLS Residuals vs. Fitted Values
axs[1].scatter(model_WLS.fittedvalues, model_WLS.resid, alpha=0.7, edgecolors='k', color='green')
axs[1].axhline(y=0, color='red', linestyle='--')
axs[1].set_title("b) WLS Residuals vs. Fitted Values")
axs[1].set_xlabel("Fitted Values")
axs[1].set_ylabel("Residuals")

plt.tight_layout()
fig.suptitle('Figure 1.3 - Diagnostic Plots: a) OLS Residuals vs Fitted Values, b) WLS Residuals vs Fitted Values', fontsize=14)

plt.show()

import pandas as pd
from statsmodels.stats.api import het_breuschpagan

# Perform Breusch-Pagan Test for OLS
bp_test = het_breuschpagan(ols_model.resid, X)
bp_test_results = {
    'Statistic': bp_test[0],
    'LM-Test p-value': bp_test[1],
    'F-Statistic': bp_test[2],
    'F-Test p-value': bp_test[3]
}

# Perform Breusch-Pagan Test for WLS
WLSbp_test = het_breuschpagan(model_WLS.resid, X)
WLSbp_test_results = {
    'Statistic': WLSbp_test[0],
    'LM-Test p-value': WLSbp_test[1],
    'F-Statistic': WLSbp_test[2],
    'F-Test p-value': WLSbp_test[3]
}

bp_results_df = pd.DataFrame({
    'OLS': bp_test_results,
    'WLS': WLSbp_test_results
})

print("\nBreusch-Pagan Test Results DataFrame:")
print(bp_results_df)

Breusch-Pagan Test Results DataFrame:
                          OLS       WLS
Statistic        4.168775e+01  0.001889
LM-Test p-value  1.070779e-10  0.965334
F-Statistic      2.407305e+02  0.001813
F-Test p-value   2.481934e-20  0.966210

# !pip install yfinance
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew
import yfinance as yf

stock_data = yf.download("AAPL", start="2024-01-01", end="2025-01-01")
stock_data["Returns"] = stock_data["Close"].pct_change()
data_apple= stock_data["Returns"].dropna()
returns_skewness = skew(data_apple)
print("\n")
print(f"AAPL Skewness(return data from 2024-01-01 to 2025-01-01): {returns_skewness:.2f}")

[*********************100%***********************]  1 of 1 completed


AAPL Skewness(return data from 2024-01-01 to 2025-01-01): 0.50

plt.figure(figsize=(8, 4))
sns.histplot(stock_data["Returns"].dropna(), bins=50, kde=True, color='purple')
plt.title(f"AAPL Yield (Skewness = {returns_skewness:.2f})")
plt.show()

import scipy
from scipy.stats import probplot

print('Calculate values - scipy skew: ',scipy.stats.skew(data_apple))

Calculate values - scipy skew:  0.5013931445602677

# Boxplot
plt.figure(figsize=(6, 6))
sns.boxplot(y=stock_data["Returns"].dropna(), color="lightblue")
plt.title("Boxplot of stock AAPL returns")
plt.ylabel("Return Rate")
plt.axhline(stock_data["Returns"].mean(), color="green", linestyle="--", label="Mean")
plt.axhline(stock_data["Returns"].median(), color="red", linestyle="-.", label="Median")
plt.legend()
plt.show()

# QQ Plot
data = stock_data["Returns"].dropna()
fig, ax = plt.subplots(figsize=(6, 6))
probplot(data, dist="norm", plot=ax)
plt.title("QQ Plot of AAPL Returns 2024- 2025")
plt.show()

import numpy as np
import pandas as pd
import yfinance as yf
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import skew

# 1. Get Apple stock data
stock_data = yf.download("AAPL", start="2024-01-01", end="2025-01-01")
stock_data["Returns"] = stock_data["Close"].pct_change()
apple_data = stock_data["Returns"].dropna()

# 2. Demonstrate the effect of outliers on skewness
original_skewness = skew(apple_data)
print(f"Original Skewness: {original_skewness:.2f}")

# 3. Introduce an artificial outlier
apple_data_with_outlier = apple_data.copy()
apple_data_with_outlier.iloc[0] = 0.20  # Adding a large outlier
outlier_skewness = skew(apple_data_with_outlier)
print(f"Skewness with Outlier: {outlier_skewness:.2f}")

# 4. Demonstrate outlier capping using percentiles
lower_bound = np.percentile(apple_data, 5)
upper_bound = np.percentile(apple_data, 95)
capped_data = np.clip(apple_data, lower_bound, upper_bound)
capped_skewness = skew(capped_data)
print(f"Skewness after Capping: {capped_skewness:.2f}")

[*********************100%***********************]  1 of 1 completed

Original Skewness: 0.50
Skewness with Outlier: 4.75
Skewness after Capping: -0.19

# Create boxplots for original and capped returns
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
sns.boxplot(y=apple_data, color='purple')
plt.title('Original Returns')

plt.subplot(1, 2, 2)
sns.boxplot(y=capped_data, color='green')
plt.title('Capped Returns')

plt.show()

"""AAPL Stock Analysis (2010-2024) - Demonstration"""
!pip install yfinance --quiet
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# =============================================================================
# Data Acquisition
# =============================================================================
stock = yf.Ticker("AAPL")
data = stock.history(start='2010-01-01', end='2024-12-31')[['Close']]

# =============================================================================
# Returns Calculation
# =============================================================================
# Daily returns
data['Daily Return'] = data['Close'].pct_change() * 100

# Yearly returns calculation
def calculate_yearly_returns(df):
    years = df.index.year.unique()
    yearly_data = []
    for year in years:
        year_df = df[df.index.year == year]
        if not year_df.empty:
            start = year_df.iloc[0]['Close']
            end = year_df.iloc[-1]['Close']
            yearly_data.append((year_df.index[-1], ((end/start)-1)*100))
    return pd.DataFrame(yearly_data, columns=['Date', 'Return']).set_index('Date')

yearly_returns = calculate_yearly_returns(data)

data['yearly_returns']=yearly_returns
data.head(10)

# =============================================================================
# Returns Visualization
# =============================================================================
def plot_returns():
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))

    # Daily returns plot
    ax1.plot(data['Daily Return'], color='#1f77b4', linewidth=0.8)
    ax1.set_title('Daily Returns (2010-2024)', fontsize=14)
    ax1.set_ylabel('Return (%)', fontsize=12)
    ax1.grid(True, alpha=0.3)

    # Yearly returns plot
    colors = ['#2ca02c' if x>0 else '#d62728' for x in yearly_returns['Return']]
    ax2.scatter(yearly_returns.index, yearly_returns['Return'], c=colors, s=100)
    ax2.plot(yearly_returns.index, yearly_returns['Return'], color='gray', alpha=0.3)
    for date, ret in zip(yearly_returns.index, yearly_returns['Return']):
        ax2.text(date, ret, f'{ret:.1f}%', ha='center', va='bottom')
    ax2.set_title('Annual Returns', fontsize=14)
    ax2.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.tight_layout()
    plt.show()

plot_returns()

# =============================================================================
# Model Diagnostics
# =============================================================================
from statsmodels.tsa.statespace.sarimax import SARIMAX

def run_diagnostics():
    model = SARIMAX(data['Close'], order=(1,1,1)).fit(disp=False)
    print("Model Summary:")
    print(model.summary())

    print("\nDiagnostic Plots:")
    model.plot_diagnostics(figsize=(10,8))
    plt.tight_layout()
    plt.show()

run_diagnostics()

Model Summary:
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                 3773
Model:               SARIMAX(1, 1, 1)   Log Likelihood               -7079.711
Date:                Sun, 23 Mar 2025   AIC                          14165.422
Time:                        22:49:15   BIC                          14184.129
Sample:                             0   HQIC                         14172.073
                               - 3773                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.5421      0.254      2.138      0.033       0.045       1.039
ma.L1         -0.5623      0.251     -2.243      0.025      -1.054      -0.071
sigma2         2.4991      0.023    106.676      0.000       2.453       2.545
===================================================================================
Ljung-Box (L1) (Q):                   0.15   Jarque-Bera (JB):             17145.13
Prob(Q):                              0.70   Prob(JB):                         0.00
Heteroskedasticity (H):             119.88   Skew:                             0.11
Prob(H) (two-sided):                  0.00   Kurtosis:                        13.44
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

Diagnostic Plots:

# Aggregate daily returns to yearly mean
data['Year'] = data.index.year
yearly_avg_return = data.groupby('Year')['Daily Return'].mean()

# Merge with yearly returns
combined_yearly = pd.concat([yearly_returns, yearly_avg_return], axis=1)
combined_yearly.columns = ['Yearly Return (%)', 'Avg Daily Return (%)']
print(combined_yearly.head())

                           Yearly Return (%)  Avg Daily Return (%)
2010-12-31 00:00:00-05:00          50.721916                   NaN
2011-12-30 00:00:00-05:00          22.887422                   NaN
2012-12-31 00:00:00-05:00          30.558623                   NaN
2013-12-31 00:00:00-05:00           4.750874                   NaN
2014-12-31 00:00:00-05:00          42.628382                   NaN

# Disaggregate yearly returns to daily frequency
yearly_returns_daily = yearly_returns.resample('D').ffill().reindex(data.index)
data_combined = data.join(yearly_returns_daily.rename(columns={'Return': 'Yearly Return'}))

# Plot results
plt.figure(figsize=(10, 4))
plt.plot(data_combined['Daily Return'], label='Daily Return', alpha=0.5)
plt.plot(data_combined['Yearly Return'], label='Yearly Return (FFill)', color='red')
plt.legend()
plt.show()

from sklearn.linear_model import LinearRegression

yearly_returns_lagged = yearly_returns.shift(1)
data_midas = data.join(yearly_returns_lagged.rename(columns={'Return': 'Lagged Yearly Return'}))


data_midas_clean = data_midas.dropna()
X = data_midas_clean[['Lagged Yearly Return']]
y = data_midas_clean['Daily Return']

model = LinearRegression()
model.fit(X, y)
print(f"MIDAS Coefficient: {model.coef_[0]:.4f}, Intercept: {model.intercept_:.4f}")

MIDAS Coefficient: -0.0024, Intercept: -0.0087

import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX

data['Year'] = data.index.year
yearly_returns['Year'] = yearly_returns.index.year

data_merged = data.merge(
    yearly_returns[['Year', 'Return']],
    on='Year',
    how='left',
    suffixes=('', '_Yearly')
)

data_merged['Return'] = data_merged['Return'].astype(float)


data_clean = data_merged[['Daily Return', 'Return']].dropna()

# Fit SARIMAX model (Kalman filter)
mod = SARIMAX(
    endog=data_clean['Daily Return'],
    exog=data_clean['Return'],
    order=(1, 0, 0)
)
res = mod.fit(disp=False)
print(res.summary())

                               SARIMAX Results                                
==============================================================================
Dep. Variable:           Daily Return   No. Observations:                 3772
Model:               SARIMAX(1, 0, 0)   Log Likelihood               -7465.057
Date:                Sun, 23 Mar 2025   AIC                          14936.114
Time:                        22:49:17   BIC                          14954.820
Sample:                             0   HQIC                         14942.764
                               - 3772                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Return         0.0034      0.001      5.892      0.000       0.002       0.004
ar.L1         -0.0434      0.011     -3.996      0.000      -0.065      -0.022
sigma2         3.0657      0.040     76.954      0.000       2.988       3.144
===================================================================================
Ljung-Box (L1) (Q):                   0.00   Jarque-Bera (JB):              3951.29
Prob(Q):                              0.98   Prob(JB):                         0.00
Heteroskedasticity (H):               1.39   Skew:                            -0.07
Prob(H) (two-sided):                  0.00   Kurtosis:                         8.01
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf

fig = res.plot_diagnostics(figsize=(10, 6))
plt.suptitle('SARIMAX Model Diagnostics', y=1.02)
plt.tight_layout()
plt.show()

import warnings
warnings.filterwarnings('ignore')
import sys
import os

class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

def ignore_exceptions(func):
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except Exception:
            pass
    return wrapper

!apt-get install texlive-xetex texlive-fonts-recommended texlive-plain-generic --quiet
!pip install nbconvert --quiet
!apt-get install pandoc --quiet

#!jupyter nbconvert --to pdf --no-input "/content/MFE_610_GWP1.ipynb"
!jupyter nbconvert --to html --no-input "/content/MFE_610_GWP1.ipynb"
from google.colab import files
#files.download("/content/MFE_610_GWP1.pdf")
files.download("/content/MFE_610_GWP1.html")

	Close	Daily Return	yearly_returns
Date
2010-01-04 00:00:00-05:00	6.440333	NaN	NaN
2010-01-05 00:00:00-05:00	6.451466	0.172867	NaN
2010-01-06 00:00:00-05:00	6.348847	-1.590626	NaN
2010-01-07 00:00:00-05:00	6.337108	-0.184911	NaN
2010-01-08 00:00:00-05:00	6.379241	0.664859	NaN
2010-01-11 00:00:00-05:00	6.322965	-0.882164	NaN
2010-01-12 00:00:00-05:00	6.251043	-1.137477	NaN
2010-01-13 00:00:00-05:00	6.339215	1.410516	NaN
2010-01-14 00:00:00-05:00	6.302501	-0.579151	NaN
2010-01-15 00:00:00-05:00	6.197174	-1.671203	NaN

Introduction¶

Kurtosis/Heteroscedasticity¶

1.1 Kurtosis¶

1.1.1 Definition¶

1.1.2 Excess Kurtosis¶

1.1.3 Multivariate extension of Kurtosis: For multivariate distributions, kurtosis can be extended using methods outlined by Mardia (115–128).¶

1.2 Diagnosis of Kurtosis¶

1.3 Demonstration¶

1.4 Damage Caused by excess Kurtosis¶

1.5 Directions to address excess Kurtosis¶

2. Heteroscedasticity¶

2.1 Diagnosis¶

2.2 Damage¶

2.3 Directions¶

2.4 Demonstration¶

Interpretation¶

2.5 Conclusion¶

References¶

3. Skewness¶

3.1 Definition:¶

3.2 Description:¶

3.3 Demonstration:¶

3.4 Diagnosis¶

3.6 Damage:¶

3.7 Directions¶

Conclussion¶

4 Addressing sensitivity to outliers¶

4.1 Definition:¶

4.2 Description:¶

4.3 Demonstration:¶

4.4 Diagnosis:¶

4.5 Damage:¶

4.7 Directions¶

4.8 Outliers vs. Extreme Values:¶

To differentiate between outliers and extreme values, we use statistical techniques like:¶

Conclusion:¶

References¶

5 Joining Time Series with Different Frequencies¶

5.1 Definition¶

5.2 Description¶

5.3 Demonstration¶

5.4. Diagnosis¶

5.5 Damage¶

5.6 Directions¶

5.6.1 Temporal Aggregation/Disaggregation¶

5.6.2 Mixed-Frequency Models¶

5.6.3 State-Space Models¶

5.7 Conclusion¶

References¶