import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import scipy.stats as stats

np.random.seed(42)
#
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 5)

# ---------------------------------------------------------
# 1. DATA ACQUISITION (LEVELS)
# ---------------------------------------------------------
ticker = "BTC-USD"
start_date = "2022-01-01"
end_date = "2023-12-31"

print(f"Acquiring financial data for {ticker}...")
# Download data
data = yf.download(ticker, start=start_date, end=end_date, progress=False, auto_adjust=True)

# Handle MultiIndex columns if present (common with newer yfinance versions)
if isinstance(data.columns, pd.MultiIndex):
    try:
        prices = data.xs('Close', level=0, axis=1)[ticker]
    except KeyError:
        # Fallback if structure is different
        prices = data.iloc[:, 0] # Take first column assuming it's close
else:
    prices = data['Close']

prices = prices.dropna()
print(f"Data loaded successfully. Observations: {len(prices)}")

# 1. Visual Inspection
plt.figure(figsize=(14, 5))
plt.plot(prices, color='navy', linewidth=1.5)
plt.title(f"Figure 1: {ticker} Prices (Levels)", fontsize=14, fontweight='bold')
plt.ylabel("Price (USD)")
plt.xlabel("Date")
plt.tight_layout()
plt.show()


# 2. Autocorrelation Analysis (ACF & PACF)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
plot_acf(prices, lags=40, ax=ax1, title='Figure 1.1: ACF-Levels (Slow Decay indicates Non-Stationarity)')
plot_pacf(prices, lags=40, ax=ax2, title='Figure 1.2: PACF - Levels')
plt.tight_layout()
plt.show()

# 3. Statistical Summary & Stationarity Test
desc_stats = prices.describe()
skewness = stats.skew(prices)
kurtosis = stats.kurtosis(prices)
adf_result = adfuller(prices)

print("\n" + "-"*60)
print("Table 1.a). STATISTICAL CHARACTERIZATION: LEVELS")
print("-"*60)
print(f"Mean:       {desc_stats['mean']:.4f}")
print(f"Std Dev:    {desc_stats['std']:.4f}")
print(f"Skewness:   {skewness:.4f}")
print(f"Kurtosis:   {kurtosis:.4f}")
print("-" * 30)
print(f"ADF Statistic: {adf_result[0]:.4f}")
print(f"p-value:       {adf_result[1]:.4f}")
print("Conclusion:    " + ("STATIONARY" if adf_result[1] < 0.05 else "NON-STATIONARY (Fail to reject H0)"))
print("-"*60)


# ---------------------------------------------------------
# B. ANALYSIS OF TRANSFORMED SERIES (INTEGER DIFFERENCING)
# ---------------------------------------------------------

# 1. Transformation (Log Returns)
log_returns = np.log(prices / prices.shift(1)).dropna()

# 2. Visual Inspection
plt.figure(figsize=(14, 5))
plt.plot(log_returns, color='darkgreen', linewidth=1)
plt.title(f"Figure 2: Differenced Series: {ticker} Log Returns (d=1)", fontsize=14, fontweight='bold')
plt.ylabel("Log Return")
plt.tight_layout()
plt.show()

# 3. Autocorrelation Analysis
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
plot_acf(log_returns, lags=40, ax=ax1, title='Figure 2.1: ACF- Log Returns (Memory Loss)')
plot_pacf(log_returns, lags=40, ax=ax2, title='Figure 2.2: PACF - Log Returns')
plt.tight_layout()
plt.show()

# 4. Statistical Summary
desc_stats_log = log_returns.describe()
skewness_log = stats.skew(log_returns)
kurtosis_log = stats.kurtosis(log_returns)
adf_result_log = adfuller(log_returns)

print("\n" + "-"*60)
print("Table 2b.) STATISTICAL CHARACTERIZATION: LOG RETURNS")
print("-"*60)
print(f"Mean:       {desc_stats_log['mean']:.4f}")
print(f"Std Dev:    {desc_stats_log['std']:.4f}")
print(f"Skewness:   {skewness_log:.4f}")
print(f"Kurtosis:   {kurtosis_log:.4f}")
print("-" * 30)
print(f"ADF Statistic: {adf_result_log[0]:.4f}")
print(f"p-value:       {adf_result_log[1]:.4e}") # Scientific notation for very small p
print("Conclusion:    " + ("STATIONARY" if adf_result_log[1] < 0.05 else "NON-STATIONARY"))
print("-"*60)


# ---------------------------------------------------------
# C. ANALYSIS OF FRACTIONAL DIFFERENCING
# ---------------------------------------------------------

def get_weights_ffd(d, thres=1e-5):
    """Calculate weights for fractional differencing using binomial expansion."""
    w, k = [1.], 1
    while True:
        w_k = -w[-1] / k * (d - k + 1)
        if abs(w_k) < thres:
            break
        w.append(w_k)
        k += 1
    return np.array(w[::-1])

def frac_diff_ffd(series, d, thres=1e-5):
    """Apply fixed-window fractional differencing."""
    # 1. Compute weights
    w = get_weights_ffd(d, thres)
    width = len(w) - 1

    # 2. Apply weights (Vectorized rolling dot product)
    # Note: We apply to the raw series values
    vals = series.values
    if len(vals) < width:
        return pd.Series(dtype=float)

    res = np.full(len(vals), np.nan)

    # Iterative application (robust for variable window sizes)
    # For high performance on large data, stride_tricks can be used,
    # but loop is sufficient for <2000 obs.
    for i in range(width, len(vals)):
        res[i] = np.dot(vals[i-width:i+1], w)

    return pd.Series(res, index=series.index)

# 1. Optimization: Find minimum d for stationarity
print("\nOptimizing Fractional Differentiation parameter (d)...")
possible_ds = np.linspace(0.1, 0.9, 17) # Check 0.1, 0.15 ... 0.9
optimal_d = 1.0
frac_diff_series = None

for d_val in possible_ds:
    temp_series = frac_diff_ffd(prices, d_val).dropna()

    # Run ADF check
    if len(temp_series) > 50: # Ensure sufficient data
        p_val = adfuller(temp_series)[1]
        if p_val < 0.05:
            optimal_d = d_val
            frac_diff_series = temp_series
            print(f"   Found stationary d = {optimal_d:.2f} (p-value: {p_val:.4f})")
            break

if frac_diff_series is None:
    print("  No stationary d < 1.0 found. Using d=1.0 (Log Returns equivalent).")
    frac_diff_series = log_returns
    optimal_d = 1.0

# 2. Visual Inspection
plt.figure(figsize=(14, 5))
plt.plot(frac_diff_series, color='darkred', linewidth=1)
plt.title(f"Figure 3: Fractionally Differenced Series (d={optimal_d:.2f})", fontsize=14, fontweight='bold')
plt.ylabel("Transformed Value")
plt.tight_layout()
plt.show()

# 3. Autocorrelation Analysis
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
plot_acf(frac_diff_series, lags=40, ax=ax1, title=f'Figure 3.1: ACF- Frac Diff d={optimal_d:.2f} (Memory Preserved)')
plot_pacf(frac_diff_series, lags=40, ax=ax2, title='Figure 3.2: PACF - Frac Diff')
plt.tight_layout()
plt.show()

# 4. Statistical Summary
desc_stats_frac = frac_diff_series.describe()
skewness_frac = stats.skew(frac_diff_series)
kurtosis_frac = stats.kurtosis(frac_diff_series)
adf_result_frac = adfuller(frac_diff_series)

print("\n" + "-"*60)
print(f"Table 3c.) STATISTICAL CHARACTERIZATION: FRAC DIFF (d={optimal_d:.2f})")
print("-"*60)
print(f"Mean:       {desc_stats_frac['mean']:.4f}")
print(f"Std Dev:    {desc_stats_frac['std']:.4f}")
print(f"Skewness:   {skewness_frac:.4f}")
print(f"Kurtosis:   {kurtosis_frac:.4f}")
print("-" * 30)
print(f"ADF Statistic: {adf_result_frac[0]:.4f}")
print(f"p-value:       {adf_result_frac[1]:.4f}")
print("Conclusion:    " + ("STATIONARY" if adf_result_frac[1] < 0.05 else "NON-STATIONARY"))
print("-"*60)

Acquiring financial data for BTC-USD...
Data loaded successfully. Observations: 729

------------------------------------------------------------
Table 1.a). STATISTICAL CHARACTERIZATION: LEVELS
------------------------------------------------------------
Mean:       28509.7581
Std Dev:    8321.9629
Skewness:   0.4995
Kurtosis:   -0.8028
------------------------------
ADF Statistic: -1.5208
p-value:       0.5232
Conclusion:    NON-STATIONARY (Fail to reject H0)
------------------------------------------------------------

------------------------------------------------------------
Table 2b.) STATISTICAL CHARACTERIZATION: LOG RETURNS
------------------------------------------------------------
Mean:       -0.0002
Std Dev:    0.0288
Skewness:   -0.4091
Kurtosis:   5.5382
------------------------------
ADF Statistic: -26.7875
p-value:       0.0000e+00
Conclusion:    STATIONARY
------------------------------------------------------------

Optimizing Fractional Differentiation parameter (d)...
   Found stationary d = 0.60 (p-value: 0.0174)

------------------------------------------------------------
Table 3c.) STATISTICAL CHARACTERIZATION: FRAC DIFF (d=0.60)
------------------------------------------------------------
Mean:       817.3111
Std Dev:    973.0975
Skewness:   0.6413
Kurtosis:   1.1414
------------------------------
ADF Statistic: -3.2466
p-value:       0.0174
Conclusion:    STATIONARY
------------------------------------------------------------

## Code suggestion

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
import yfinance as yf
from statsmodels.tsa.stattools import adfuller
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

#
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (18, 6)

# ---------------------------------------------------------
# 1. DATA ACQUISITION & PREPARATION (Re-running Step 1 logic)
# ---------------------------------------------------------
ticker = "BTC-USD"
start_date = "2022-01-01"
end_date = "2023-12-31"

data = yf.download(ticker, start=start_date, end=end_date, progress=False, auto_adjust=True)

if isinstance(data.columns, pd.MultiIndex):
    prices = data.xs('Close', level=0, axis=1)[ticker].dropna()
else:
    prices = data['Close'].dropna()

# Transformations
# 1. Log Returns
log_returns = np.log(prices / prices.shift(1)).dropna()

# 2. Fractional Differencing
def get_weights_ffd(d, thres=1e-5, max_len=1000):
    w, k = [1.0], 1
    while True:
        w_k = -w[-1] * (d - k + 1) / k
        if abs(w_k) < thres or k >= max_len: break
        w.append(w_k)
        k += 1
    return np.array(w[::-1])

def frac_diff_ffd(series, d, thres=1e-5):
    w = get_weights_ffd(d, thres)
    width = len(w) - 1
    # Apply weights to series values
    vals = series.values
    res = np.full(len(vals), np.nan)
    for i in range(width, len(vals)):
        res[i] = np.dot(vals[i-width:i+1], w)
    return pd.Series(res, index=series.index)

# Optimize d
best_d = 1.0
frac_diff_series = None
possible_ds = np.linspace(0.1, 0.9, 17)

for d_val in possible_ds:
    temp_series = frac_diff_ffd(prices, d_val).dropna()
    if len(temp_series) > 50:
        p_val = adfuller(temp_series)[1]
        if p_val < 0.05:
            best_d = d_val
            frac_diff_series = temp_series
            break

if frac_diff_series is None:
    frac_diff_series = log_returns # Fallback
    best_d = 1.0

print(f"Data Prepared. Optimal d for FracDiff: {best_d:.2f}")

# ---------------------------------------------------------
# 2. MODELING FUNCTIONS
# ---------------------------------------------------------

def create_supervised_dataset(series, n_lags=10):
    """Creates lagged features (X) and target (y)"""
    vals = series.values
    X, y = [], []
    for i in range(n_lags, len(vals)):
        X.append(vals[i-n_lags:i])
        y.append(vals[i])
    return np.array(X), np.array(y)

def build_mlp_model(input_dim):
    """Constructs the MLP architecture"""
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),
        Dense(32, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

def train_and_evaluate(series, name, n_lags=10, epochs=100, batch_size=32):
    """Trains MLP and returns results + history for plotting"""

    # 1. Data Prep
    X, y = create_supervised_dataset(series, n_lags)

    # Train/Test Split (Chronological)
    split = int(len(X) * 0.8)
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]
    test_dates = series.index[n_lags + split:]

    # Scaling
    scaler_X = StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)

    scaler_y = StandardScaler()
    y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
    y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1))

    # 2. Model Training
    model = build_mlp_model(n_lags)
    es = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

    history = model.fit(
        X_train_scaled, y_train_scaled,
        validation_split=0.2,
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[es],
        verbose=0
    )

    # 3. Prediction
    pred_scaled = model.predict(X_test_scaled, verbose=0)
    y_pred = scaler_y.inverse_transform(pred_scaled).flatten()

    # 4. Metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    return {
        "name": name,
        "rmse": rmse,
        "mae": mae,
        "r2": r2,
        "y_test": y_test,
        "y_pred": y_pred,
        "dates": test_dates,
        "history": history.history
    }

# ---------------------------------------------------------
# 3. EXECUTION & VISUALIZATION
# ---------------------------------------------------------

# Run experiments
results_a = train_and_evaluate(prices, "a) Levels (Non-Stationary)")
results_b = train_and_evaluate(log_returns, "b) Log Returns (Stationary)")
results_c = train_and_evaluate(frac_diff_series, f"c) Frac Diff (d={best_d:.2f})")

experiments = [results_a, results_b, results_c]

# Visualization Loop
for res in experiments:
    fig, axes = plt.subplots(1, 3, figsize=(20, 5))
    plt.suptitle(f"Model Performance: {res['name']}", fontsize=16, y=1.05, fontweight='bold')

    # Plot 1: Observed vs Fitted (Time Series)
    # We zoom in on the last 100 points for clarity if series is long
    display_len = 150
    ax1 = axes[0]
    ax1.plot(res['dates'][-display_len:], res['y_test'][-display_len:], label='Observed', color='black', linewidth=1.5, alpha=0.7)
    ax1.plot(res['dates'][-display_len:], res['y_pred'][-display_len:], label='Fitted (Pred)', color='red', linestyle='--', linewidth=1.5)
    ax1.set_title("Figure 4: Observed vs Fitted (Last 150 Days)", fontsize=12, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # Plot 2: Loss Function (Training vs Validation)
    ax2 = axes[1]
    loss = res['history']['loss']
    val_loss = res['history']['val_loss']
    ax2.plot(loss, label='Training Loss', color='blue')
    ax2.plot(val_loss, label='Validation Loss', color='orange')
    ax2.set_title("Figure 4.1: Learning Curve (MSE Loss)", fontsize=12, fontweight='bold')
    ax2.set_xlabel("Epochs")
    ax2.set_ylabel("Loss (MSE)")
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # Plot 3: Scatter Plot (Actual vs Predicted) - Good for spotting bias
    ax3 = axes[2]
    ax3.scatter(res['y_test'], res['y_pred'], alpha=0.5, color='purple', edgecolors='w')

    # Perfect prediction line
    min_val = min(res['y_test'].min(), res['y_pred'].min())
    max_val = max(res['y_test'].max(), res['y_pred'].max())
    ax3.plot([min_val, max_val], [min_val, max_val], 'k--', lw=2, label='Perfect Fit')

    ax3.set_title(f"Figure 4.2: Prediction Accuracy (R² = {res['r2']:.4f})", fontsize=12, fontweight='bold')
    ax3.set_xlabel("Observed Values")
    ax3.set_ylabel("Predicted Values")
    ax3.legend()
    ax3.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

# ---------------------------------------------------------
# 4. SUMMARY STATISTICS TABLE
# ---------------------------------------------------------
summary_data = []
for res in experiments:
    summary_data.append({
        "Model": res['name'],
        "RMSE": res['rmse'],
        "MAE": res['mae'],
        "R² Score": res['r2']
    })

summary_df = pd.DataFrame(summary_data)

print("\n" + "-"*60)
print("STEP 2: PREDICTION PERFORMANCE SUMMARY")
print("-"*60)
print(summary_df.to_string(index=False, float_format="%.6f"))
print("-"*60)

Acquiring data for BTC-USD...
Data Prepared. Optimal d for FracDiff: 0.60

WARNING:tensorflow:5 out of the last 11 calls to <function TensorFlowTrainer.make_predict_function.<locals>.one_step_on_data_distributed at 0x7fdef267afc0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.

------------------------------------------------------------
STEP 2: PREDICTION PERFORMANCE SUMMARY
------------------------------------------------------------
                      Model        RMSE        MAE  R² Score
 a) Levels (Non-Stationary)  881.596759 627.592312  0.981129
b) Log Returns (Stationary)    0.022090   0.014657 -0.073436
      c) Frac Diff (d=0.60) 1132.062242 847.689191 -0.234766
------------------------------------------------------------

import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from statsmodels.tsa.stattools import adfuller
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization, Input, LeakyReLU, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.losses import Huber
from tensorflow.keras import regularizers

# ---------------------------------------------------------
# GLOBAL CONFIGURATION
# ---------------------------------------------------------
import warnings
warnings.filterwarnings("ignore")
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (20, 6)

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# HYPERPARAMETERS OPTIMIZED
WINDOW_SIZE = 48
BATCH_SIZE = 32
EPOCHS = 100
LEARNING_RATE = 0.0005

# ---------------------------------------------------------
# 1. DATA LOADING & ADVANCED PREPARATION
# ---------------------------------------------------------
ticker = "BTC-USD"
data = yf.download(ticker, start="2017-01-01", end="2023-12-31", progress=False, auto_adjust=True)

if isinstance(data.columns, pd.MultiIndex):
    prices = data.xs('Close', level=0, axis=1)[ticker].dropna()
else:
    prices = data['Close'].dropna()

# A. Log Returns (Stationary)
log_returns = np.log(prices / prices.shift(1)).dropna()

# B. Optimized Fractional Differencing (Find best 'd')
def get_weights_ffd(d, thres=1e-5, limit=1000):
    w, k = [1.0], 1
    while True:
        w_k = -w[-1] * (d - k + 1) / k
        if abs(w_k) < thres or k >= limit: break
        w.append(w_k)
        k += 1
    return np.array(w[::-1])

def frac_diff_ffd(series, d, thres=1e-5):
    # Forward fill to handle tiny gaps before calculation
    series = series.ffill()
    w = get_weights_ffd(d, thres)
    width = len(w) - 1
    if len(series) <= width: return pd.Series(dtype=float)
    return series.rolling(window=width+1).apply(lambda x: np.dot(x, w), raw=True).dropna()

best_d = 0.4 # Default
min_p_value = 1.0

# Search for the minimum 'd' that makes the series stationary (p-value < 0.05)
for d_val in np.linspace(0.1, 0.9, 9):
    temp_diff = frac_diff_ffd(prices, d_val)
    if len(temp_diff) > 100:
        p_val = adfuller(temp_diff.dropna())[1]
        if p_val < 0.05:
            best_d = d_val
            print(f"Best d found: {best_d:.2f} (ADF p-value: {p_val:.4f})")
            break

frac_diff_series = frac_diff_ffd(prices, best_d)

# Alignment
common_index = prices.index.intersection(log_returns.index).intersection(frac_diff_series.index)
prices = prices.loc[common_index]
log_returns = log_returns.loc[common_index]
frac_diff_series = frac_diff_series.loc[common_index]

# ---------------------------------------------------------
# 2.  GAF
# ---------------------------------------------------------
def series_to_gaf(window_data):
    # Local MinMax Scaling per window to [-1, 1]
    # This captures the *shape* of the trend regardless of the absolute price level
    min_val = np.min(window_data)
    max_val = np.max(window_data)

    if max_val == min_val:
        x_scaled = np.zeros_like(window_data)
    else:
        x_scaled = ((window_data - min_val) / (max_val - min_val)) * 2 - 1

    x_cos = x_scaled
    x_sin = np.sqrt(1 - np.clip(x_scaled**2, 0, 1))

    # Gramian Angular Summation Field
    gaf = np.outer(x_cos, x_cos) - np.outer(x_sin, x_sin)
    return gaf

def create_dataset(series, window_size, flatten=False):
    vals = series.values
    X, y = [], []

    for i in range(window_size, len(vals)):
        window = vals[i-window_size:i]
        target = vals[i]

        img = series_to_gaf(window)

        if flatten:
            X.append(img.flatten())
        else:
            X.append(img) # Shape: (Window, Window)

        y.append(target)

    X = np.array(X)
    y = np.array(y)

    if not flatten:
        X = X.reshape(-1, window_size, window_size, 1)

    return X, y

# ---------------------------------------------------------
# 3. HIGH-PERFORMANCE MODEL ARCHITECTURES
# ---------------------------------------------------------

def build_cnn_optimized(input_shape):
    """
    Improved CNN with LeakyReLU, L2 Regularization, and Residual-like depth
    to capture complex GAF patterns.
    """
    model = Sequential([
        Input(shape=input_shape),

        # Block 1: Feature Extraction
        Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(0.001)),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        MaxPooling2D(2,2),

        # Block 2: Abstract Features
        Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(0.001)),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        MaxPooling2D(2,2),

        # Block 3: Deep Features
        Conv2D(128, (3,3), padding='same'),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        MaxPooling2D(2,2),

        Flatten(),

        # Dense Layers
        Dense(128),
        LeakyReLU(alpha=0.1),
        Dropout(0.4), # Robust Dropout

        Dense(64),
        LeakyReLU(alpha=0.1),
        Dropout(0.3),

        Dense(1, activation='linear')
    ])

    # Huber Loss is robust to outliers (spikes in Bitcoin)
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss=Huber(delta=1.0))
    return model

def build_mlp_optimized(input_dim):
    """Deep MLP for Flattened Inputs"""
    model = Sequential([
        Input(shape=(input_dim,)),
        Dense(512),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        Dropout(0.5),

        Dense(256),
        LeakyReLU(alpha=0.1),
        BatchNormalization(),
        Dropout(0.5),

        Dense(64, activation='relu'),
        Dense(1, activation='linear')
    ])
    model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='mse')
    return model

# ---------------------------------------------------------
# 4. TRAINING LOOP
# ---------------------------------------------------------
configs = [
    (prices, "a) Levels (CNN)", "CNN"),
    (log_returns, "b) Log Returns (MLP)", "MLP"),
    (frac_diff_series, f"c) FracDiff (d={best_d:.2f}) (CNN)", "CNN")
]

results = []

print("\n--- STARTING OPTIMIZED TRAINING ---")

for series_data, name, model_type in configs:
    print(f"\n>> Training: {name}")

    # 1. Dataset Generation
    is_mlp = (model_type == "MLP")
    X, y = create_dataset(series_data, WINDOW_SIZE, flatten=is_mlp)

    # Split
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]
    dates_test = series_data.index[WINDOW_SIZE + split_idx:]

    # 2. TARGET SCALING (CRITICAL FIX FOR MODEL A)
    # Using MinMaxScaler for Model A to bound outputs between 0 and 1, easier for NN to predict
    if "Levels" in name:
        scaler_y = MinMaxScaler(feature_range=(0, 1))
    else:
        scaler_y = StandardScaler()

    y_train_s = scaler_y.fit_transform(y_train.reshape(-1, 1))
    y_test_s = scaler_y.transform(y_test.reshape(-1, 1))

    # 3. Model
    if is_mlp:
        model = build_mlp_optimized(WINDOW_SIZE * WINDOW_SIZE)
    else:
        model = build_cnn_optimized((WINDOW_SIZE, WINDOW_SIZE, 1))

    # 4. Training Callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=0),
        ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6, verbose=0)
    ]

    # 5. Fit
    history = model.fit(
        X_train, y_train_s,
        validation_split=0.2,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        callbacks=callbacks,
        verbose=0
    )

    # 6. Predict
    pred_s = model.predict(X_test, verbose=0)
    pred = scaler_y.inverse_transform(pred_s).flatten()

    # Metrics
    rmse = np.sqrt(mean_squared_error(y_test, pred))
    r2 = r2_score(y_test, pred)

    # Save for plotting
    img_sample = X_test[-1].reshape(WINDOW_SIZE, WINDOW_SIZE)

    results.append({
        "Model": name,
        "RMSE": rmse,
        "R2": r2,
        "y_true": y_test,
        "y_pred": pred,
        "dates": dates_test,
        "hist": history.history,
        "img": img_sample
    })

# ---------------------------------------------------------
# 5. VISUALIZATION (Step 3d)
# ---------------------------------------------------------
for res in results:
    fig, axes = plt.subplots(1, 3, figsize=(24, 6))

    # Plot 1: Prediction vs Real (Zoomed in on last 200 points for detail)
    ax1 = axes[0]
    zoom = 200
    dates = res['dates'][-zoom:]
    real = res['y_true'][-zoom:]
    pred = res['y_pred'][-zoom:]

    ax1.plot(dates, real, 'k-', linewidth=1.5, label='Actual')
    ax1.plot(dates, pred, 'r--', linewidth=1.5, label='Predicted')
    ax1.set_title(f"{res['Model']}", fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.tick_params(axis='x', rotation=45)

    # Plot 2: Loss Curve
    ax2 = axes[1]
    hist = res['hist']
    ax2.plot(hist['loss'], label='Train Loss', color='blue')
    ax2.plot(hist['val_loss'], label='Val Loss', color='orange')
    ax2.set_title("Training Convergence (Huber Loss)", fontsize=14, fontweight='bold')
    ax2.set_xlabel("Epochs")
    ax2.legend()
    ax2.grid(True, linestyle='--')

    # Plot 3: GAF Representation
    ax3 = axes[2]
    im = ax3.imshow(res['img'], cmap='Spectral', origin='lower', vmin=-1, vmax=1)
    ax3.set_title("GAF Input (Correlation Structure)", fontsize=14, fontweight='bold')
    fig.colorbar(im, ax=ax3, fraction=0.046, pad=0.04)
    ax3.axis('off')

    plt.tight_layout()
    plt.show()

# Summary Table
summary = pd.DataFrame([{k: v for k, v in r.items() if k in ['Model', 'RMSE']} for r in results])
print(summary.to_string(index=False))

Best d found: 0.50 (ADF p-value: 0.0155)

--- STARTING OPTIMIZED TRAINING ---

>> Training: a) Levels (CNN)

>> Training: b) Log Returns (MLP)

>> Training: c) FracDiff (d=0.50) (CNN)

                     Model         RMSE
           a) Levels (CNN) 16083.763002
      b) Log Returns (MLP)     0.029398
c) FracDiff (d=0.50) (CNN)  1467.682251

Introduction¶

Step 1¶

Augmented Dickey Fuller (ADF) test¶

Integer Differencing (log Returns)¶

Fractional Differencing¶

d) Comment on the 3 representations of the data¶

Step 2¶

d) Prediction performance of the three models¶

Step 3¶

d) Prediction performance of the three models¶

Step 4: Discussion of the different results obtained between the CNN and MLP architectures¶

Conclusion¶

References¶