bitcoin-model/model.py

import argparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
import shutil
import warnings
from datetime import timedelta
from multiprocessing import Pool
from typing import BinaryIO


# Output


class Output:
    """
    Output ensures result files get written to the right directory.

    Example:
        output = Output("output", "test-1")
        with output.create("summary.txt") as f:
            # path to f is "output/test-1/summary.txt"
    """

    out_dir: str

    def __init__(self, base_dir: str, name: str):
        """
        Initialize the output manager. This will both fully delete any existing
        output directory, and then create an empty directory for the output.

        Args:
            base_dir: The root directory for outputs
            name: The subdir of the root, where the results files go
        """
        self.out_dir = os.path.join(base_dir, name)
        shutil.rmtree(self.out_dir, ignore_errors=True)
        os.makedirs(self.out_dir)

    def create(self, filename) -> BinaryIO:
        """
        Create a new file for writing in the output directory.
        """
        full_path = self.named(filename)
        f = open(full_path, "w")
        return f

    def named(self, filename) -> str:
        """
        Get the full path within the output directory for a named results file.
        """
        full_path = os.path.join(self.out_dir, filename)
        return full_path


# Utility functions


def get_halving_dates():
    """Return known and projected Bitcoin halving dates"""
    return pd.to_datetime(
        [
            "2008-01-03",  # Bitcoin genesis block (treat as cycle start)
            "2012-11-28",  # First halving
            "2016-07-09",  # Second halving
            "2020-05-11",  # Third halving
            "2024-04-19",  # Fourth halving
            "2028-04-20",  # Fifth halving (projected)
        ]
    )


def get_cycle_position(date, halving_dates):
    """
    Calculate position in halving cycle (0 to 1) for a given date.
    0 represents a halving event, 1 represents just before the next halving.
    """
    if len(halving_dates) == 0:
        raise Exception("halving dates cannot be empty")

    # Convert date to datetime if it's not already
    date = pd.to_datetime(date)

    # Find the most recent halving before this date
    prev_halving = halving_dates[halving_dates <= date].max()
    if pd.isna(prev_halving):
        return 0.0  # For dates before first halving

    # Find next halving
    future_halvings = halving_dates[halving_dates > date]
    if len(future_halvings) == 0:
        # For dates after last known halving, use same cycle length as last known cycle
        last_cycle_length = (halving_dates[-1] - halving_dates[-2]).days
        days_since_halving = (date - halving_dates[-1]).days
        return min(days_since_halving / last_cycle_length, 1.0)

    next_halving = future_halvings.min()

    # Calculate position as fraction between halvings
    days_since_halving = (date - prev_halving).days
    cycle_length = (next_halving - prev_halving).days
    return min(days_since_halving / cycle_length, 1.0)


def format_price(x, p):
    """Format large numbers in K, M, B format with appropriate precision"""
    if abs(x) >= 1e9:
        return f"${x/1e9:.1f}B"
    if abs(x) >= 1e6:
        return f"${x/1e6:.1f}M"
    if abs(x) >= 1e3:
        return f"${x/1e3:.1f}K"
    if abs(x) >= 1:
        return f"${x:.0f}"
    return f"${x:.2f}"  # For values less than $1, show cents


def get_nice_price_points(min_price, max_price):
    """
    Generate a reasonable set of price points for the y-axis that look clean
    and cover the range without cluttering the chart.
    """
    # Handle zero or negative prices
    min_price = max(min_price, 0.0001)  # Set minimum price to $0.0001

    log_min = np.floor(np.log10(min_price))
    log_max = np.ceil(np.log10(max_price))
    price_points = []

    # For very large ranges (spanning more than 4 orders of magnitude),
    # only use powers of 10 and mid-points
    if log_max - log_min > 4:
        for exp in range(int(log_min), int(log_max + 1)):
            base = 10**exp
            # Add main power of 10
            if min_price <= base <= max_price:
                price_points.append(base)
            # Add mid-point if range is large enough
            if min_price <= base * 5 <= max_price and exp > log_min:
                price_points.append(base * 5)
    else:
        # For smaller ranges, use 1, 2, 5 sequence
        for exp in range(int(log_min), int(log_max + 1)):
            for mult in [1, 2, 5]:
                point = mult * 10**exp
                if min_price <= point <= max_price:
                    price_points.append(point)

    return np.array(price_points)


# Market metrics


class MarketFundamentals:
    """
    Calculate and track fundamental market metrics for Bitcoin.
    Designed to be extensible for additional metrics.
    """

    def __init__(self):
        # Constants
        self.GENESIS_DATE = pd.Timestamp("2009-01-03")
        self.BLOCKS_PER_DAY = 144
        self.HALVING_INTERVAL = 210000  # blocks

        # Volatility adjustment parameters (from our tuning)
        self.VOLUME_SCALE = 70
        self.DEPTH_SCALE = 7
        self.BASE_ADJUSTMENT = 0.68

    def calculate_total_supply(self, date):
        """Calculate total Bitcoin supply at a given date."""
        days_since_genesis = (date - self.GENESIS_DATE).days
        if days_since_genesis < 0:
            return 0

        total_supply = 0
        remaining_blocks = days_since_genesis * self.BLOCKS_PER_DAY
        current_reward = 50

        while remaining_blocks > 0 and current_reward >= 0.01:
            blocks_at_this_reward = min(remaining_blocks, self.HALVING_INTERVAL)
            total_supply += blocks_at_this_reward * current_reward
            remaining_blocks -= blocks_at_this_reward
            current_reward /= 2

        # Adjust for missed blocks and lost coins
        total_supply *= 0.95  # Account for varying block times
        total_supply *= 0.93  # Estimate for lost/inaccessible coins

        return total_supply

    def get_block_reward(self, date):
        """Get Bitcoin block reward at a given date."""
        days_since_genesis = (date - self.GENESIS_DATE).days
        if days_since_genesis < 0:
            return 0

        halvings = days_since_genesis // (4 * 365)  # Approximate halving periods
        return 50 / (2**halvings)

    def calculate_supply_metrics(self, date):
        """Calculate supply-related metrics."""
        total_supply = self.calculate_total_supply(date)
        block_reward = self.get_block_reward(date)
        daily_new_supply = block_reward * self.BLOCKS_PER_DAY

        return {
            "total_supply": total_supply,
            "daily_new_supply": daily_new_supply,
            "supply_growth_rate": daily_new_supply / total_supply,
            "stock_to_flow": total_supply / (daily_new_supply * 365),  # Annualized
        }

    def calculate_market_metrics(self, df, date, window=30):
        """Calculate market activity metrics."""
        recent_data = df[df["Date"] <= date].tail(window)

        if len(recent_data) < window:
            return {"avg_volume": 0, "price_volatility": 0, "price_impact": 0}

        avg_volume = recent_data["Volume"].mean()
        price_volatility = recent_data["Close"].pct_change().std()
        price_impact = recent_data["Close"].std() / recent_data["Close"].mean()

        return {
            "avg_volume": avg_volume,
            "price_volatility": price_volatility,
            "price_impact": price_impact,
        }

    def get_market_maturity_metrics(self, df, date, window=30):
        """
        Combine supply and market metrics to assess market maturity.
        """
        supply_metrics = self.calculate_supply_metrics(date)
        market_metrics = self.calculate_market_metrics(df, date, window)

        # Calculate combined metrics
        volume_to_supply = market_metrics["avg_volume"] / supply_metrics["total_supply"]
        market_depth = volume_to_supply / (market_metrics["price_impact"] + 0.001)

        return {
            "volume_to_supply": volume_to_supply,
            "supply_growth_rate": supply_metrics["supply_growth_rate"],
            "market_depth": market_depth,
            "stock_to_flow": supply_metrics["stock_to_flow"],
            "price_impact": market_metrics["price_impact"],
        }

    def calculate_volatility_adjustment(self, metrics):
        """
        Calculate volatility adjustment based on market metrics.
        """
        # Supply-based component
        supply_based_vol = np.sqrt(metrics["supply_growth_rate"] * 365 * 100)

        # Market maturity component
        maturity_factor = 1 - np.clip(
            metrics["volume_to_supply"] * self.VOLUME_SCALE, 0, 0.6
        )

        # Market depth component
        depth_factor = np.clip(
            1 / np.sqrt(1 + metrics["market_depth"] * self.DEPTH_SCALE), 0.7, 1.3
        )

        # Combine factors
        adjustment = (
            self.BASE_ADJUSTMENT
            * (1 + supply_based_vol)
            * maturity_factor
            * depth_factor
        )

        # Ensure reasonable bounds
        return np.clip(adjustment, 0.65, 0.75)

    def calculate_confidence_adjustment(self, metrics, level):
        """Calculate confidence interval adjustments with more sensitive market metrics."""
        # More granular depth impact
        if metrics["market_depth"] > 0.1:
            depth_factor = 0.5
        elif metrics["market_depth"] > 0.05:
            depth_factor = 0.7
        else:
            depth_factor = 1.0

        # More granular volume impact
        if metrics["volume_to_supply"] > 0.015:
            volume_factor = 0.5
        elif metrics["volume_to_supply"] > 0.008:
            volume_factor = 0.7
        else:
            volume_factor = 1.0

        depth_impact = np.clip(metrics["market_depth"] * 0.15 * depth_factor, 0, 0.15)
        vol_impact = np.clip(metrics["volume_to_supply"] * 20 * volume_factor, 0, 0.15)

        total_adjustment = (depth_impact + vol_impact) * 0.4

        if level >= 0.95:
            total_adjustment *= 0.4

        return level + (1 - level) * total_adjustment


def compare_adjustments(df, fundamentals):
    """
    Compare fundamental-based adjustments with original era-based ones.
    """
    # Sample dates for comparison
    date_range = pd.date_range(start=df["Date"].min(), end=df["Date"].max(), freq="30D")

    results = []
    for date in date_range:
        # Calculate era-based adjustment
        if date < pd.Timestamp("2017-12-10"):
            era_adj = 0.71  # early era
        elif date < pd.Timestamp("2020-01-01"):
            era_adj = 0.69  # transition era
        else:
            era_adj = 0.67  # mature era

        # Calculate fundamental-based adjustment
        metrics = fundamentals.get_market_maturity_metrics(df, date)
        fund_adj = fundamentals.calculate_volatility_adjustment(metrics)

        results.append(
            {
                "date": date,
                "era_adjustment": era_adj,
                "fundamental_adjustment": fund_adj,
                "metrics": metrics,
            }
        )

    return pd.DataFrame(results)


# Analysis functions


def analyze_trends(df):
    """
    Analyze Bitcoin price trends using log returns with S2F awareness.
    """
    df = df.copy()
    fundamentals = MarketFundamentals()

    # Get halving dates and calculate cycle position
    halving_dates = get_halving_dates()
    df["Cycle_Position"] = df["Date"].apply(
        lambda x: get_cycle_position(x, halving_dates)
    )
    df["Cycle_Days"] = (df["Cycle_Position"] * 4 * 365).round().astype(int)

    # Calculate S2F metrics for each date
    supply_metrics = [
        fundamentals.calculate_supply_metrics(date) for date in df["Date"]
    ]
    df["S2F_Ratio"] = [m["stock_to_flow"] for m in supply_metrics]
    df["S2F_Change"] = df["S2F_Ratio"].pct_change()

    # Calculate log returns and basic cycle returns
    df["Log_Price"] = np.log(df["Close"])
    df["Log_Return"] = df["Log_Price"].diff()

    # Calculate cycle-based returns
    position_returns = df.groupby("Cycle_Days")["Log_Return"].mean()

    # Calculate S2F impact on returns
    s2f_impact = df.groupby("Cycle_Days")["S2F_Change"].mean()

    # Smooth both components
    window = 60
    smoothed_cycle_returns = position_returns.rolling(
        window=window,
        center=True,
        min_periods=int(window / 2),
    ).mean()

    smoothed_s2f_impact = s2f_impact.rolling(
        window=window,
        center=True,
        min_periods=int(window / 2),
    ).mean()

    # Fill NaN values
    smoothed_cycle_returns = smoothed_cycle_returns.fillna(method="bfill").fillna(
        method="ffill"
    )
    smoothed_s2f_impact = smoothed_s2f_impact.fillna(method="bfill").fillna(
        method="ffill"
    )

    # Combine cycle returns with S2F impact
    s2f_weight = 0.3  # Adjustable parameter
    combined_returns = (
        smoothed_cycle_returns * (1 - s2f_weight) + smoothed_s2f_impact * s2f_weight
    )

    # Apply dampening using current market metrics
    latest_metrics = fundamentals.get_market_maturity_metrics(df, df["Date"].max())
    market_adjustment = fundamentals.calculate_volatility_adjustment(latest_metrics)

    def adaptive_dampen(x):
        if x > 2 * combined_returns.std():
            return x * (0.6 * market_adjustment)
        elif x < -2 * combined_returns.std():
            return x * (0.7 * market_adjustment)
        return x * market_adjustment

    return combined_returns.map(adaptive_dampen)


def calculate_adaptive_volatility(
    df,
    short_window=30,
    medium_window=90,
    long_window=180,
    vol_clip_min=0.5,
    vol_clip_max=2.0,
):
    """
    Calculate volatility with adaptive window sizes based on market conditions.
    Returns a single volatility value for the most recent period.

    Incorporates long-term volatility as a stability baseline and additional
    reference point for regime detection.
    """
    df = df.copy()
    df["Log_Return"] = np.log(df["Close"]).diff()

    # Remove any NaN values that could cause issues
    df = df.dropna()

    if len(df) < long_window:
        # Not enough data, fall back to simple volatility
        return df["Log_Return"].std()

    # Get recent data for efficiency
    lookback = max(long_window * 2, 360)  # Use enough data for stable estimates
    recent_df = df.iloc[-lookback:].copy() if len(df) > lookback else df.copy()

    try:
        # Initial volatility estimate using base windows
        short_vol = recent_df["Log_Return"].ewm(span=short_window, adjust=False).std()
        medium_vol = recent_df["Log_Return"].ewm(span=medium_window, adjust=False).std()
        long_vol = recent_df["Log_Return"].ewm(span=long_window, adjust=False).std()

        # Ensure we have valid volatility values
        if short_vol.iloc[-1] == 0 or np.isnan(short_vol.iloc[-1]):
            return df["Log_Return"].std()  # Fallback to simple volatility

        # Calculate regime indicators for recent period
        medium_vol_mean = medium_vol.rolling(min(90, len(recent_df))).mean()
        long_vol_mean = long_vol.rolling(min(180, len(recent_df))).mean()

        if medium_vol_mean.iloc[-1] == 0:
            vol_regime = pd.Series([1.0] * len(recent_df))
        else:
            # Compare short-term to both medium and long-term volatility
            medium_regime = short_vol / medium_vol_mean
            long_regime = short_vol / long_vol_mean

            # Use the more conservative (higher) regime indicator
            vol_regime = pd.concat([medium_regime, long_regime], axis=1).max(axis=1)

        vol_regime = vol_regime.clip(vol_clip_min, vol_clip_max)

        # Get most recent regime reading
        latest_regime = vol_regime.iloc[-1]

        # Adjust window sizes based on current regime
        adj_factor = 1 / latest_regime
        adj_short = max(10, int(short_window * adj_factor))  # Minimum window of 10
        adj_medium = max(30, int(medium_window * adj_factor))
        adj_long = max(60, int(long_window * adj_factor))

        # Calculate final volatilities using adjusted windows
        final_short = recent_df["Log_Return"].iloc[-adj_short:].std()
        final_medium = recent_df["Log_Return"].iloc[-adj_medium:].std()
        final_long = recent_df["Log_Return"].iloc[-adj_long:].std()

        # If any volatility measure is NaN or 0, fall back to simple volatility
        if np.isnan([final_short, final_medium, final_long]).any() or 0 in [
            final_short,
            final_medium,
            final_long,
        ]:
            return df["Log_Return"].std()

        # Calculate regime-based weights, now incorporating long-term volatility
        high_vol_weight = (latest_regime - vol_clip_min) / (vol_clip_max - vol_clip_min)
        base_weights = np.array([0.2, 0.5, 0.3])  # Short, medium, long weights
        stress_weights = np.array(
            [0.4, 0.4, 0.2]
        )  # More weight on short-term during stress

        # Interpolate between base and stress weights
        weights = (
            base_weights * (1 - high_vol_weight) + stress_weights * high_vol_weight
        )

        # Calculate final volatility using all three timeframes
        final_vol = (
            final_short * weights[0]
            + final_medium * weights[1]
            + final_long * weights[2]
        )

        # Add uncertainty adjustment based on regime changes
        regime_change = abs(vol_regime.diff()).fillna(0)
        regime_change_mean = regime_change.rolling(5, min_periods=1).mean().iloc[-1]
        if regime_change_mean == 0:
            uncertainty_adjustment = 1.0
        else:
            regime_change_zscore = regime_change.iloc[-1] / regime_change_mean
            uncertainty_adjustment = 1 + np.clip(regime_change_zscore / 2, 0, 0.5)

        return max(final_vol * uncertainty_adjustment, df["Log_Return"].std() * 0.5)

    except Exception as e:
        print(f"Error in adaptive volatility calculation: {e}")
        # Fall back to simple volatility calculation
        return df["Log_Return"].std()


def calculate_volatility(df, short_window=30, medium_window=90, long_window=180):
    """Calculate volatility using fundamental metrics and adaptive windows."""
    df = df.copy()
    df["Log_Return"] = np.log(df["Close"]).diff()

    if len(df) < 30:
        return 0.02  # Reasonable default for very short periods

    try:
        # Initialize fundamentals calculator
        fundamentals = MarketFundamentals()
        current_date = df["Date"].max()

        # Get recent data for efficiency
        lookback = max(long_window * 2, 360)
        recent_df = df.iloc[-lookback:].copy() if len(df) > lookback else df.copy()

        # Calculate base volatilities
        short_vol = recent_df["Log_Return"].ewm(span=short_window, adjust=False).std()
        medium_vol = recent_df["Log_Return"].ewm(span=medium_window, adjust=False).std()
        long_vol = recent_df["Log_Return"].ewm(span=long_window, adjust=False).std()

        if short_vol.iloc[-1] == 0 or np.isnan(short_vol.iloc[-1]):
            return df["Log_Return"].std()

        # Calculate volatility regime indicators
        medium_vol_mean = medium_vol.rolling(min(90, len(recent_df))).mean()
        long_vol_mean = long_vol.rolling(min(180, len(recent_df))).mean()

        # Compare short-term to both medium and long-term volatility
        if medium_vol_mean.iloc[-1] == 0:
            vol_regime = pd.Series([1.0] * len(recent_df))
        else:
            medium_regime = short_vol / medium_vol_mean
            long_regime = short_vol / long_vol_mean
            vol_regime = pd.concat([medium_regime, long_regime], axis=1).max(axis=1)

        vol_regime = vol_regime.clip(0.5, 2.0)
        latest_regime = vol_regime.iloc[-1]

        # Get market metrics
        metrics = fundamentals.get_market_maturity_metrics(df, current_date)

        # Calculate adaptive weights
        high_vol_weight = (latest_regime - 0.5) / 1.5  # 1.5 = 2.0 - 0.5
        base_weights = np.array([0.2, 0.5, 0.3])
        stress_weights = np.array([0.4, 0.4, 0.2])
        weights = (
            base_weights * (1 - high_vol_weight) + stress_weights * high_vol_weight
        )

        # Calculate final volatilities
        final_short = recent_df["Log_Return"].iloc[-short_window:].std()
        final_medium = recent_df["Log_Return"].iloc[-medium_window:].std()
        final_long = recent_df["Log_Return"].iloc[-long_window:].std()

        if np.isnan([final_short, final_medium, final_long]).any() or 0 in [
            final_short,
            final_medium,
            final_long,
        ]:
            return df["Log_Return"].std()

        # Apply market-based adjustment
        market_adjustment = fundamentals.calculate_volatility_adjustment(metrics)

        # Calculate final volatility
        final_vol = (
            final_short * weights[0]
            + final_medium * weights[1]
            + final_long * weights[2]
        ) * market_adjustment

        return max(final_vol, df["Log_Return"].std() * 0.5)

    except Exception as e:
        print(f"Error in volatility calculation: {e}")
        return df["Log_Return"].std()


def adjust_trend_expectations(expected_returns, cycle_position):
    """
    Simple trend adjustment.
    """
    if cycle_position > 0.75:
        damping_factor = 0.70
    else:
        damping_factor = 0.85

    return expected_returns * damping_factor


def calculate_market_conditions(df, lookback_window=180):
    """
    Calculate market condition metrics to inform uncertainty scaling.
    """
    df = df.copy()  # Avoid modifying original dataframe
    metrics = {}

    # Use log returns for stability
    df["Log_Return"] = np.log(df["Close"]).diff()

    # Handle initial NaN values
    df["Log_Return"] = df["Log_Return"].fillna(method="bfill")

    # Recent vs historical volatility ratio
    recent_vol = max(df["Log_Return"].tail(30).std(), 1e-6)  # Prevent division by zero
    historical_vol = max(df["Log_Return"].tail(lookback_window).std(), 1e-6)
    metrics["vol_ratio"] = recent_vol / historical_vol

    # Trend strength using log prices
    log_prices = np.log(df["Close"])
    ma50 = log_prices.rolling(50, min_periods=1).mean()
    ma200 = log_prices.rolling(200, min_periods=1).mean()
    metrics["trend_strength"] = (ma50.iloc[-1] - ma200.iloc[-1]) / historical_vol

    # Drawdown intensity
    rolling_max = df["Close"].rolling(lookback_window, min_periods=1).max()
    current_drawdown = df["Close"].iloc[-1] / rolling_max.iloc[-1] - 1
    metrics["drawdown"] = abs(min(current_drawdown, 0))

    return metrics


def get_projection_adjustments(days_forward, current_cycle_position, df):
    """Enhanced projection adjustments using market fundamentals."""
    adjustments = np.ones(days_forward)
    fundamentals = MarketFundamentals()

    # Pre-calculate metrics
    lookback = min(365, len(df))
    historical_metrics = [
        fundamentals.get_market_maturity_metrics(df, date)
        for date in df["Date"].tail(lookback)
    ]
    historical_avg_volume_to_supply = np.mean(
        [m["volume_to_supply"] for m in historical_metrics]
    )

    current_metrics = fundamentals.get_market_maturity_metrics(df, df["Date"].max())
    base_uncertainty = 0.016 * (1 + current_metrics["supply_growth_rate"] * 365)
    vol_factor = 1 + 0.2 * abs(1 - current_metrics["volume_to_supply"] * 50)
    market_depth_factor = 1 / np.sqrt(1 + current_metrics["market_depth"] * 5)
    s2f_factor = 1 / np.log1p(current_metrics["stock_to_flow"])
    regime_scale = np.clip(
        current_metrics["volume_to_supply"] / historical_avg_volume_to_supply, 0.88, 1.2
    )

    for i in range(days_forward):
        time_factor = min(
            1
            + (i / 365)
            * base_uncertainty
            * vol_factor
            * market_depth_factor
            * s2f_factor,
            1.20,
        )

        cycle_position = (current_cycle_position + i / 1460) % 1
        local_cycle_factor = 1.15 if cycle_position > 0.75 else 1.0

        adjustments[i] = time_factor * local_cycle_factor * regime_scale
        adjustments[i] = max(adjustments[i], 1.02 + (i / 365) * 0.01)

    return adjustments


def calculate_confidence_intervals(simulated_paths, confidence_levels=[0.95, 0.68]):
    """
    Calculate confidence intervals with dynamic quantile selection based on market conditions.
    """
    results = {}

    for level in confidence_levels:
        # Calculate standard error of the median
        median_std = np.std(
            [np.median(simulated_paths[:, i]) for i in range(simulated_paths.shape[1])]
        )

        # Adjust quantiles based on estimation uncertainty
        adjustment = min(0.1, median_std / np.median(simulated_paths))  # Cap adjustment

        # Widen intervals slightly when uncertainty is high
        effective_level = level + (1 - level) * adjustment

        lower_percentile = (1 - effective_level) * 100 / 2
        upper_percentile = 100 - lower_percentile

        results[f"Lower_{int(level*100)}"] = np.percentile(
            simulated_paths, lower_percentile, axis=1
        )
        results[f"Upper_{int(level*100)}"] = np.percentile(
            simulated_paths, upper_percentile, axis=1
        )

    return results


def project_prices(
    df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]
):
    """Generate price projections with fundamental-based adjustments."""
    df = df.copy()
    fundamentals = MarketFundamentals()

    df["Log_Price"] = np.log(df["Close"])
    df["Log_Return"] = df["Log_Price"].diff()

    # Get halving dates and current cycle position
    halving_dates = get_halving_dates()
    current_date = df["Date"].max()
    cycle_position = get_cycle_position(current_date, halving_dates)
    current_cycle_days = int(cycle_position * 4 * 365)

    # Current price and date
    last_price = df["Close"].iloc[-1]
    last_date = df["Date"].iloc[-1]

    # Generate projection dates
    future_dates = pd.date_range(
        start=last_date + timedelta(days=1), periods=days_forward, freq="D"
    )

    # Calculate expected returns
    future_cycle_days = [
        (current_cycle_days + i) % (4 * 365) for i in range(days_forward)
    ]
    cycle_trends = analyze_trends(df)
    expected_returns = np.array(
        [cycle_trends.get(day, cycle_trends.mean()) for day in future_cycle_days]
    )

    # Calculate base volatility
    base_volatility = calculate_volatility(df)

    # Get projection adjustments
    projection_adjustments = get_projection_adjustments(
        days_forward, cycle_position, df
    )

    # Run Monte Carlo simulation
    np.random.seed(42)  # Restored for reproducibility
    simulated_paths = np.zeros((days_forward, simulations))

    for sim in range(simulations):
        drift = expected_returns
        vol = base_volatility
        time_scaled_vol = vol * projection_adjustments

        returns = np.random.normal(loc=drift, scale=time_scaled_vol, size=days_forward)

        cumulative_returns = np.cumsum(returns)
        price_path = last_price * np.exp(cumulative_returns)
        simulated_paths[:, sim] = price_path

    # Calculate results
    results = pd.DataFrame(index=future_dates)
    results["Median"] = np.percentile(simulated_paths, 50, axis=1)
    results["Expected_Trend"] = last_price * np.exp(np.cumsum(drift))

    # Calculate confidence intervals
    for level in confidence_levels:
        metrics = fundamentals.get_market_maturity_metrics(df, current_date)

        # Calculate intervals by projection horizon
        lower_bounds = []
        upper_bounds = []

        for day in range(days_forward):
            time_factor = (
                0.85 if day > 365 else 0.9 if day > 180 else 0.95 if day > 90 else 1.0
            )

            effective_level = (
                fundamentals.calculate_confidence_adjustment(metrics, level)
                * time_factor
            )

            lower_percentile = (1 - effective_level) * 100 / 2
            upper_percentile = 100 - lower_percentile

            lower_bounds.append(
                np.percentile(simulated_paths[day, :], lower_percentile)
            )
            upper_bounds.append(
                np.percentile(simulated_paths[day, :], upper_percentile)
            )

        results[f"Lower_{int(level*100)}"] = lower_bounds
        results[f"Upper_{int(level*100)}"] = upper_bounds

    return results


def analyze_bitcoin_prices(csv_path):
    """
    Analyze Bitcoin price data to calculate volatility and growth rates.
    """
    # Read CSV with proper data types
    df = pd.read_csv(csv_path, parse_dates=[0])

    # Print first few rows of raw data to inspect
    print("\nFirst few rows of raw data:")
    print(df.head())

    # Print data info to see types and non-null counts
    print("\nDataset Info:")
    print(df.info())

    # Convert price columns to float and handle any potential formatting issues
    numeric_columns = ["Price", "Open", "High", "Low", "Vol."]  # Added Volume
    for col in numeric_columns:
        # Remove any commas and 'K'/'M' suffixes
        df[col] = df[col].astype(str).str.replace(",", "")
        # Convert K to thousands
        df[col] = df[col].str.replace("K", "e3")
        # Convert M to millions
        df[col] = df[col].str.replace("M", "e6")
        # Convert B to billions
        df[col] = df[col].str.replace("B", "e9")
        # Convert to numeric
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Rename columns for clarity
    df.columns = ["Date", "Close", "Open", "High", "Low", "Volume", "Change"]

    # Sort by date in ascending order
    df = df.sort_values("Date")

    # Print summary statistics after conversion
    print("\nPrice Summary After Conversion:")
    print(df[["Close", "Open", "High", "Low", "Volume"]].describe())

    # Calculate daily returns
    df["Daily_Return"] = df["Close"].pct_change()

    # Print first few daily returns to verify calculation
    print("\nFirst few daily returns:")
    print(df[["Date", "Close", "Daily_Return"]].head())

    # Check for any infinite or NaN values
    print("\nInfinite or NaN value counts:")
    print(df.isna().sum())

    # Calculate metrics using 365 days for annualization
    analysis = {
        "period_start": df["Date"].min().strftime("%Y-%m-%d"),
        "period_end": df["Date"].max().strftime("%Y-%m-%d"),
        "total_days": len(df),
        "daily_volatility": df["Daily_Return"].std(),
        "annualized_volatility": df["Daily_Return"].std() * np.sqrt(365),
        "total_return": (df["Close"].iloc[-1] / df["Close"].iloc[0] - 1) * 100,
        "average_daily_return": df["Daily_Return"].mean() * 100,
        "average_annual_return": ((1 + df["Daily_Return"].mean()) ** 365 - 1) * 100,
        "min_price": df["Low"].min(),
        "max_price": df["High"].max(),
        "avg_price": df["Close"].mean(),
        "start_price": df["Close"].iloc[0],
        "end_price": df["Close"].iloc[-1],
    }

    # Calculate rolling metrics
    df["Rolling_Volatility_30d"] = df["Daily_Return"].rolling(
        window=30
    ).std() * np.sqrt(365)
    df["Rolling_Return_30d"] = df["Close"].pct_change(periods=30) * 100

    return analysis, df


# Main plotting functions


def add_cdpr_plot(df, output: Output):
    """
    Add a plot showing the Compounding Daily Periodic Rate (CDPR) over different time periods.
    Also includes a logarithmic price axis on the right side.
    """
    plt.style.use("seaborn-v0_8")
    fig, ax1 = plt.subplots(figsize=(15, 6))

    # Calculate CDPR for different time periods
    periods = [180, 360, 720]
    cdpr = {}

    # Find the longest CDPR series length
    max_period = max(periods)

    daily_returns = df["Close"].pct_change().fillna(0)
    for period in periods:
        cdpr[f"{period}d CDPR"] = (
            daily_returns.rolling(period).apply(
                lambda x: (1 + x).prod() ** (1 / period) - 1, raw=True
            )
        ) * 100

        # Clip all CDPR series to the length of the longest one
        cdpr[f"{period}d CDPR"] = cdpr[f"{period}d CDPR"][max_period:]

    # Find the non-NaN min and max CDPR values
    cdpr_values = [values for values in cdpr.values()]
    min_cdpr = np.nanmin([np.nanmin(values) for values in cdpr_values])
    max_cdpr = np.nanmax([np.nanmax(values) for values in cdpr_values])

    # Ensure x-axis (dates) and y-axis (CDPR) have the same length
    start_date = df["Date"].iloc[max_period:].min()
    end_date = df["Date"].max()
    plot_dates = pd.date_range(start=start_date, end=end_date, freq="D")

    # Plot CDPR lines on the left axis
    for label, values in cdpr.items():
        ax1.plot(plot_dates, values, label=label)

    # Customize the left axis (CDPR)
    ax1.set_xlabel("Date")
    ax1.set_ylabel("CDPR (%)")
    ax1.grid(True, alpha=0.3)

    # Adjust y-axis tick marks and add shaded lines between ticks
    yticks = list(np.arange(int(min_cdpr), int(max_cdpr) + 1, 0.5))
    ax1.set_yticks(yticks)
    ax1.tick_params(axis="y", which="major", labelsize=8)
    ax1.set_yticklabels(["{:.1f}%".format(y) for y in yticks])

    # Add shaded lines between tick marks
    for i in range(1, len(yticks)):
        ax1.axhline(
            y=yticks[i], color="lightgray", linestyle="--", linewidth=1, alpha=0.5
        )

    # Create the right axis for price
    ax2 = ax1.twinx()

    # Plot price on the right axis
    price_data = df["Close"].iloc[max_period:]
    ax2.semilogy(
        plot_dates, price_data, color="#FF6B6B", linewidth=1.5, label="Price (USD)"
    )
    ax2.set_ylabel("Price (USD)", color="#FF6B6B")
    ax2.tick_params(axis="y", labelcolor="#FF6B6B")

    # Set human-readable price ticks
    price_ticks = [1, 10, 100, 1000, 10000, 100000]
    price_labels = ["$1", "$10", "$100", "$1k", "$10k", "$100k"]
    ax2.set_yticks(price_ticks)
    ax2.set_yticklabels(price_labels)

    # Add legends for both axes
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

    plt.title("Compounding Daily Periodic Rate (CDPR) and Price")

    # Save the plot
    filename = output.named("bitcoin_cdpr_plot.png")
    plt.tight_layout()
    plt.savefig(filename, dpi=150, bbox_inches="tight")
    plt.close()


def create_plots(df, output: Output, start=None, end=None, project_days=365):
    """
    Create enhanced plots including market maturity visualization.
    """
    # Add the new CDPR plot
    add_cdpr_plot(df, output)

    # Filter data based on date range
    mask = pd.Series(True, index=df.index)
    if start:
        mask &= df["Date"] >= pd.to_datetime(start)
    if end:
        mask &= df["Date"] <= pd.to_datetime(end)

    plot_df = df[mask].copy()

    if len(plot_df) == 0:
        raise ValueError("No data found for the specified date range")

    # Generate projections
    projections = project_prices(plot_df, days_forward=project_days)

    # Set up the style
    plt.style.use("seaborn-v0_8")

    # Create figure with adjusted size and spacing
    fig = plt.figure(figsize=(15, 15))

    # Use GridSpec for better control over subplot spacing
    gs = plt.GridSpec(5, 1, height_ratios=[3, 1.5, 1.5, 1.5, 2], hspace=0.4)

    # Date range for titles
    hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})"

    # Calculate full date range including projections
    full_date_range = pd.date_range(plot_df["Date"].min(), projections.index.max())

    # 1. Price history and projections (log scale)
    ax1 = fig.add_subplot(gs[0])

    # Plot historical prices
    ax1.semilogy(plot_df["Date"], plot_df["Close"], "b-", label="Historical Price")

    # Plot projections
    ax1.semilogy(
        projections.index,
        projections["Expected_Trend"],
        "--",
        color="purple",
        label="Expected Trend",
    )
    ax1.semilogy(
        projections.index,
        projections["Median"],
        ":",
        color="green",
        label="Simulated Median",
    )
    ax1.fill_between(
        projections.index,
        projections["Lower_95"],
        projections["Upper_95"],
        alpha=0.2,
        color="orange",
        label="95% Confidence Interval",
    )
    ax1.fill_between(
        projections.index,
        projections["Lower_68"],
        projections["Upper_68"],
        alpha=0.3,
        color="green",
        label="68% Confidence Interval",
    )

    # Customize y-axis
    ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price))
    min_price = min(plot_df["Low"].min(), projections["Lower_95"].min())
    max_price = max(plot_df["High"].max(), projections["Upper_95"].max())
    price_points = get_nice_price_points(min_price, max_price)
    ax1.set_yticks(price_points)
    ax1.tick_params(axis="y", labelsize=8)
    ax1.margins(y=0.02)
    ax1.grid(True, which="major", linestyle="-", alpha=0.5)
    ax1.grid(True, which="minor", linestyle=":", alpha=0.2)
    ax1.set_title("Bitcoin Price History and Projections (Log Scale)" + hist_date_range)
    ax1.legend(fontsize=8)

    # Set x-axis limits to full range
    ax1.set_xlim(full_date_range[0], full_date_range[-1])
    ax1.tick_params(axis="x", rotation=45)

    # 3. Rolling volatility
    ax3 = fig.add_subplot(gs[1])
    ax3.plot(
        plot_df["Date"],
        plot_df["Rolling_Volatility_30d"],
        "r-",
        label="30-Day Rolling Volatility",
    )

    # Add empty space to match price plot x-axis
    ax3.set_xlim(full_date_range[0], full_date_range[-1])

    # Add vertical line to mark start of projections
    ax3.axvline(plot_df["Date"].max(), color="gray", linestyle="--", alpha=0.5)
    ax3.text(
        plot_df["Date"].max(),
        ax3.get_ylim()[1],
        "Projection Start",
        rotation=90,
        va="top",
        ha="right",
        alpha=0.7,
    )

    ax3.set_title("30-Day Rolling Volatility (Annualized)" + hist_date_range)
    ax3.set_ylabel("Volatility")
    ax3.grid(True)
    ax3.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: "{:.0%}".format(y)))
    ax3.legend()
    ax3.tick_params(axis="x", rotation=45)

    # 4. Returns distribution
    ax4 = fig.add_subplot(gs[2])
    returns_mean = plot_df["Daily_Return"].mean()
    returns_std = plot_df["Daily_Return"].std()
    filtered_returns = plot_df["Daily_Return"][
        (plot_df["Daily_Return"] > returns_mean - 5 * returns_std)
        & (plot_df["Daily_Return"] < returns_mean + 5 * returns_std)
    ]

    sns.histplot(filtered_returns, bins=100, ax=ax4)
    ax4.set_title(
        "Distribution of Daily Returns (Excluding Extreme Outliers)" + hist_date_range
    )
    ax4.set_xlabel("Daily Return")
    ax4.set_ylabel("Count")
    ax4.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: "{:.0%}".format(x)))

    # Add mean line
    ax4.axvline(filtered_returns.mean(), color="r", linestyle="dashed", linewidth=1)
    ax4.text(
        filtered_returns.mean(),
        ax4.get_ylim()[1],
        "Mean",
        rotation=90,
        va="top",
        ha="right",
    )

    # 5. Projection ranges
    ax5 = fig.add_subplot(gs[3:])  # Use last two grid spaces for larger plot
    timepoints = np.array(range(30, project_days, 30))
    timepoints = timepoints[timepoints <= project_days]

    ranges = []
    labels = []
    positions = []

    for t in timepoints:
        idx = t - 1
        ranges.extend(
            [
                projections["Lower_95"].iloc[idx],
                projections["Lower_68"].iloc[idx],
                projections["Median"].iloc[idx],
                projections["Upper_68"].iloc[idx],
                projections["Upper_95"].iloc[idx],
            ]
        )
        labels.extend(["95% Lower", "68% Lower", "Median", "68% Upper", "95% Upper"])
        positions.extend([t] * 5)

    ax5.scatter(positions, ranges, alpha=0.6)

    for t in timepoints:
        idx = positions.index(t)
        ax5.plot([t] * 5, ranges[idx : idx + 5], "k-", alpha=0.3)

    ax5.set_yscale("log")
    min_price = min(ranges)
    max_price = max(ranges)
    price_points = get_nice_price_points(min_price, max_price)
    ax5.set_yticks(price_points)
    ax5.yaxis.set_major_formatter(plt.FuncFormatter(format_price))
    ax5.set_title("Projected Price Ranges at Future Timepoints")
    ax5.set_xlabel("Days Forward")
    ax5.set_ylabel("Price (USD)")
    ax5.grid(True, alpha=0.3)
    ax5.set_xticks(timepoints)

    # Save the plot
    start_str = start if start else plot_df["Date"].min().strftime("%Y-%m-%d")
    end_str = end if end else plot_df["Date"].max().strftime("%Y-%m-%d")
    filename = output.named(
        f"bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png"
    )

    # Use tight_layout with adjusted parameters
    plt.tight_layout(pad=2.0)
    plt.savefig(filename, dpi=300, bbox_inches="tight")
    plt.close()

    return projections


def visualize_cycle_patterns(df, output: Output, cycle_returns, cycle_volatility):
    """
    Create enhanced visualization of Bitcoin's behavior across halving cycles.
    """
    plt.style.use("seaborn-v0_8")
    fig = plt.figure(figsize=(15, 15))

    # Create a 3x1 subplot grid with different heights
    gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3)

    # Plot 1: Returns across cycle with confidence bands
    ax1 = plt.subplot(gs[0])

    # Convert days to percentage through cycle
    x_points = np.array(cycle_returns.index) / (4 * 365) * 100

    # Calculate rolling mean and standard deviation for confidence bands
    window = 30  # 30-day window
    rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean()
    rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std()

    # Plot confidence bands
    ax1.fill_between(
        x_points,
        (rolling_mean - 2 * rolling_std) * 100,
        (rolling_mean + 2 * rolling_std) * 100,
        alpha=0.2,
        color="blue",
        label="95% Confidence",
    )
    ax1.fill_between(
        x_points,
        (rolling_mean - rolling_std) * 100,
        (rolling_mean + rolling_std) * 100,
        alpha=0.3,
        color="blue",
        label="68% Confidence",
    )

    # Plot average returns
    ax1.plot(
        x_points,
        cycle_returns.values * 100,
        "b-",
        label="Average Daily Return",
        linewidth=2,
    )
    ax1.axhline(y=0, color="gray", linestyle="--", alpha=0.5)

    # Add vertical lines for each year in cycle
    for year in range(1, 4):
        ax1.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3)
        ax1.text(
            year * 25,
            ax1.get_ylim()[1],
            f"Year {year}",
            rotation=90,
            va="top",
            ha="right",
            alpha=0.7,
        )

    # Highlight halving points
    ax1.axvline(x=0, color="red", linestyle="--", alpha=0.5, label="Halving Event")
    ax1.axvline(x=100, color="red", linestyle="--", alpha=0.5)

    ax1.set_title("Bitcoin Return Patterns Across Halving Cycle", pad=20)
    ax1.set_xlabel("Position in Cycle (%)")
    ax1.set_ylabel("Average Daily Return (%)")
    ax1.grid(True, alpha=0.3)
    ax1.legend(loc="upper right")

    # Plot 2: Volatility across cycle
    ax2 = plt.subplot(gs[1])

    # Calculate rolling volatility confidence bands
    vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean()
    vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std()

    # Plot volatility with confidence bands
    annualized_factor = np.sqrt(365) * 100
    ax2.fill_between(
        x_points,
        (vol_mean - 2 * vol_std) * annualized_factor,
        (vol_mean + 2 * vol_std) * annualized_factor,
        alpha=0.2,
        color="red",
        label="95% Confidence",
    )
    ax2.plot(
        x_points,
        cycle_volatility.values * annualized_factor,
        "r-",
        label="Annualized Volatility",
        linewidth=2,
    )

    # Add year markers
    for year in range(1, 4):
        ax2.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3)

    ax2.axvline(x=0, color="red", linestyle="--", alpha=0.5)
    ax2.axvline(x=100, color="red", linestyle="--", alpha=0.5)

    ax2.set_xlabel("Position in Cycle (%)")
    ax2.set_ylabel("Volatility (%)")
    ax2.grid(True, alpha=0.3)
    ax2.legend(loc="upper right")

    # Plot 3: Average price trajectory within cycles
    ax3 = plt.subplot(gs[2])

    # Define a color scheme for cycles
    cycle_colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]

    # Calculate average price path for each cycle
    halving_dates = get_halving_dates()
    cycles = []

    for i in range(len(halving_dates) - 1):
        cycle_start = halving_dates[i]
        cycle_end = halving_dates[i + 1]
        cycle_data = df[(df["Date"] >= cycle_start) & (df["Date"] < cycle_end)].copy()

        if len(cycle_data) > 0:
            cycle_data["Cycle_Pct"] = (
                (cycle_data["Date"] - cycle_start).dt.total_seconds()
                / (cycle_end - cycle_start).total_seconds()
                * 100
            )
            cycle_data["Normalized_Price"] = (
                cycle_data["Close"] / cycle_data["Close"].iloc[0]
            )
            cycles.append(cycle_data)

    # Plot each historical cycle with distinct colors
    for i, cycle in enumerate(cycles):
        ax3.semilogy(
            cycle["Cycle_Pct"],
            cycle["Normalized_Price"],
            color=cycle_colors[i],
            alpha=0.7,
            label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})',
        )

    # Calculate and plot average cycle
    if cycles:
        avg_cycle = pd.concat(
            [c.set_index("Cycle_Pct")["Normalized_Price"] for c in cycles], axis=1
        )
        avg_cycle_mean = avg_cycle.mean(axis=1)
        avg_cycle_std = avg_cycle.std(axis=1)

        ax3.semilogy(
            avg_cycle_mean.index,
            avg_cycle_mean.values,
            "k-",
            linewidth=2,
            label="Average Cycle",
        )
        ax3.fill_between(
            avg_cycle_mean.index,
            avg_cycle_mean * np.exp(-2 * avg_cycle_std),
            avg_cycle_mean * np.exp(2 * avg_cycle_std),
            alpha=0.2,
            color="gray",
        )

    # Add year markers
    for year in range(1, 4):
        ax3.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3)

    ax3.axvline(x=0, color="red", linestyle="--", alpha=0.5)
    ax3.axvline(x=100, color="red", linestyle="--", alpha=0.5)

    ax3.set_title("Price Performance Across Cycles (Normalized)", pad=20)
    ax3.set_xlabel("Position in Cycle (%)")
    ax3.set_ylabel("Price (Relative to Cycle Start)")
    ax3.grid(True, alpha=0.3)
    ax3.legend(loc="center left", bbox_to_anchor=(1.02, 0.5))

    # Add current cycle position marker on all plots
    current_position = get_cycle_position(df["Date"].max(), halving_dates) * 100
    for ax in [ax1, ax2, ax3]:
        ax.axvline(
            x=current_position,
            color="green",
            linestyle="-",
            alpha=0.5,
            label="Current Position",
        )

    # Main title for the figure
    fig.suptitle("Bitcoin Halving Cycle Analysis", fontsize=16, y=0.95)

    # Adjust layout to prevent legend cutoff
    plt.tight_layout()

    # Save the plot
    filename = output.named("bitcoin_cycle_patterns.png")
    plt.savefig(filename, dpi=300, bbox_inches="tight")
    plt.close()


def create_backtest_plot(
    df,
    output: Output,
    backtest_date="2020-05-11",
    start_date="2012-11-28",
    project_days=1650,
):
    """
    Create a plot comparing actual price history against model projections from a historical date.
    Returns both the projections and performance metrics.

    Args:
        df: DataFrame with historical price data
        backtest_date: Date to start the backtest from
        start_date: Date to start considering historical data
        project_days: Number of days to project forward from backtest date

    Returns:
        tuple: (projections DataFrame, metrics dictionary)
    """
    # Convert dates to datetime
    backtest_date = pd.to_datetime(backtest_date)
    start_date = pd.to_datetime(start_date)

    # Validate dates
    if start_date >= backtest_date:
        raise ValueError("start_date must be earlier than backtest_date")

    # Clean the data: remove rows with zero or invalid prices and filter by date
    df = df[(df["Close"] > 0) & (df["Date"] >= start_date)].copy()

    # Split data into training (before backtest date) and validation (after backtest date)
    training_df = df[df["Date"] <= backtest_date].copy()
    validation_df = df[df["Date"] > backtest_date].copy()

    # Check if we have enough data
    if len(training_df) < 30:  # Require at least 30 days of training data
        raise ValueError("Insufficient training data before backtest date")

    if len(validation_df) < project_days:
        warnings.warn(
            f"Validation period ({len(validation_df)} days) shorter than projection period ({project_days} days)"
        )

    # Generate historical projections using only training data
    historical_projections = project_prices(training_df, days_forward=project_days)

    # Set up the plot
    plt.style.use("seaborn-v0_8")
    _, ax = plt.figure(figsize=(15, 10)), plt.gca()

    # Plot training data
    heading_label = f'Historical Price (Training: {start_date.strftime("%Y-%m-%d")} to {backtest_date.strftime("%Y-%m-%d")})'

    ax.semilogy(
        training_df["Date"],
        training_df["Close"],
        "b-",
        label=heading_label,
        alpha=0.7,
    )

    # Plot validation data
    ax.semilogy(
        validation_df["Date"],
        validation_df["Close"],
        "g-",
        label=f'Actual Price (Validation: {backtest_date.strftime("%Y-%m-%d")} onwards)',
        linewidth=2,
    )

    # Plot projections
    ax.semilogy(
        historical_projections.index,
        historical_projections["Expected_Trend"],
        "--",
        color="purple",
        label="Model Projection (Expected)",
    )
    ax.semilogy(
        historical_projections.index,
        historical_projections["Median"],
        ":",
        color="orange",
        label="Model Projection (Median)",
    )

    # Add confidence intervals
    ax.fill_between(
        historical_projections.index,
        historical_projections["Lower_95"],
        historical_projections["Upper_95"],
        alpha=0.2,
        color="orange",
        label="95% Confidence Interval",
    )
    ax.fill_between(
        historical_projections.index,
        historical_projections["Lower_68"],
        historical_projections["Upper_68"],
        alpha=0.3,
        color="green",
        label="68% Confidence Interval",
    )

    # Customize y-axis
    ax.yaxis.set_major_formatter(plt.FuncFormatter(format_price))

    # Set custom y-axis ticks
    min_price = min(
        df["Low"].min(),
        historical_projections["Lower_95"].min(),
        0.0001,  # Set minimum price floor
    )
    max_price = max(df["High"].max(), historical_projections["Upper_95"].max())
    price_points = get_nice_price_points(min_price, max_price)
    ax.set_yticks(price_points)

    # Add halving lines
    halving_dates = get_halving_dates()
    relevant_halvings = halving_dates[
        (halving_dates >= start_date) & (halving_dates <= validation_df["Date"].max())
    ]
    for date in relevant_halvings:
        ax.axvline(date, color="red", linestyle="--", alpha=0.3)
        ax.text(
            date,
            ax.get_ylim()[1],
            "Halving",
            rotation=90,
            va="top",
            ha="right",
            alpha=0.7,
        )

    # Calculate model performance metrics
    metrics = {}
    if len(validation_df) > 0:
        # Create a common date range for comparison
        actual_prices = validation_df.set_index("Date")["Close"]
        common_dates = actual_prices.index.intersection(historical_projections.index)

        if len(common_dates) > 0:
            actual_aligned = actual_prices[common_dates]
            projections_aligned = historical_projections.loc[common_dates]

            # Calculate metrics using aligned data
            metrics = {
                "mape": np.mean(
                    np.abs(
                        (actual_aligned - projections_aligned["Expected_Trend"])
                        / actual_aligned
                    )
                )
                * 100,
                "rmse": np.sqrt(
                    np.mean(
                        (actual_aligned - projections_aligned["Expected_Trend"]) ** 2
                    )
                ),
                "max_error": np.max(
                    np.abs(actual_aligned - projections_aligned["Expected_Trend"])
                ),
                "coverage_95": np.mean(
                    (actual_aligned >= projections_aligned["Lower_95"])
                    & (actual_aligned <= projections_aligned["Upper_95"])
                )
                * 100,
                "coverage_68": np.mean(
                    (actual_aligned >= projections_aligned["Lower_68"])
                    & (actual_aligned <= projections_aligned["Upper_68"])
                )
                * 100,
            }

            # Add metrics to plot
            metrics_text = (
                f"Model Performance Metrics:\n"
                f"MAPE: {metrics['mape']:.1f}%\n"
                f"RMSE: ${metrics['rmse']:,.0f}\n"
                f"Max Error: ${metrics['max_error']:,.0f}\n"
                f"95% CI Coverage: {metrics['coverage_95']:.1f}%\n"
                f"68% CI Coverage: {metrics['coverage_68']:.1f}%"
            )
            ax.text(
                0.02,
                0.98,
                metrics_text,
                transform=ax.transAxes,
                verticalalignment="top",
                bbox=dict(facecolor="white", alpha=0.8),
            )

    # Customize plot
    ax.set_title(
        f'Bitcoin Price: Model Backtest\nTraining: {start_date.strftime("%Y-%m-%d")} to {backtest_date.strftime("%Y-%m-%d")}'
    )
    ax.set_xlabel("Date")
    ax.set_ylabel("Price (USD)")
    ax.grid(True, which="major", linestyle="-", alpha=0.5)
    ax.grid(True, which="minor", linestyle=":", alpha=0.2)
    ax.legend(loc="center left", bbox_to_anchor=(1.02, 0.5))

    # Adjust layout and save
    plt.tight_layout()
    filename = output.named(
        f'bitcoin_backtest_{start_date.strftime("%Y%m%d")}_to_{backtest_date.strftime("%Y%m%d")}.png'
    )
    plt.savefig(filename, dpi=300, bbox_inches="tight")
    plt.close()

    return historical_projections, metrics


def run_projection(args):
    df, start, output = args
    _ = create_plots(df, output, start=start, project_days=365 * 4)


def run_projections(df, output: Output):
    # Create main projection
    projection_starts = [
        "2011-01-01",
        "2012-01-01",
        "2013-01-01",
        "2014-01-01",
        "2015-01-01",
        "2016-07-09",
    ]
    args = [(df, start, output) for start in projection_starts]
    with Pool() as pool:
        pool.map(run_projection, args)


def run_single_backtest(args):
    """
    Run a single backtest with the given parameters.
    Must be defined at module level for multiprocessing.

    Args:
        args: tuple of (params dict, DataFrame)
    """
    params, df, output = args
    try:
        # Create a copy of params without the description
        backtest_params = params.copy()
        backtest_params.pop("description", None)

        projections, metrics = create_backtest_plot(df, output, **backtest_params)

        # Ensure metrics has all required keys with default values
        if metrics is None:
            metrics = {}

        default_metrics = {
            "mape": 0.0,
            "rmse": 0.0,
            "max_error": 0.0,
            "coverage_95": 0.0,
            "coverage_68": 0.0,
        }

        # Update metrics with defaults for any missing keys
        metrics = {**default_metrics, **metrics}

        return {
            "params": params,
            "projections": projections,
            "metrics": metrics,
            "success": True,
        }
    except Exception as e:
        print(
            f"Error in backtest for period {params['description']}: {str(e)}"
        )  # Debug print
        return {"params": params, "error": str(e), "success": False}


def run_systematic_backtests(
    df, output: Output, validation_years=2, min_training_years=8
):
    """
    Run a comprehensive suite of backtests with consistent validation periods.
    Uses sliding windows for both start and end dates.
    """
    # Convert years to days
    validation_days = validation_years * 365
    min_training_days = min_training_years * 365

    # Define start date for reliable data
    mature_start = pd.Timestamp("2011-01-01")
    last_possible_start = df["Date"].max() - pd.Timedelta(
        days=min_training_days + validation_days
    )
    end_date = df["Date"].max() - pd.Timedelta(days=validation_days)

    if mature_start >= last_possible_start:
        raise ValueError(
            f"Insufficient data for backtesting with current parameters:\n"
            f"- Data range: {mature_start} to {df['Date'].max()}\n"
            f"- Minimum training period: {min_training_years} years\n"
            f"- Validation period: {validation_years} years"
        )

    old_backtests = [
        {
            "start_date": "2016-07-09",
            "backtest_date": "2024-04-19",
            "project_days": validation_days,
            "description": "Second until fourth halving",
        },
        {
            "start_date": "2013-01-01",  # Includes pre-futures for cycle learning
            "backtest_date": "2020-05-11",
            "project_days": validation_days,
            "description": "Post-Futures Window with two cycles of training",
        },
        {
            "start_date": "2014-01-01",
            "backtest_date": "2021-12-31",
            "project_days": validation_days,
            "description": "Cross-Regime Test with two cycles of training",
        },
        {
            "start_date": "2015-01-01",
            "backtest_date": "2022-01-01",
            "project_days": validation_days,
            "description": "Recent Window focusing on post-2022 behavior",
        },
    ]
    backtest_periods = []
    backtest_periods.extend(old_backtests)

    # Generate backtest periods with sliding windows
    window_start = mature_start
    step = pd.Timedelta(days=180)  # 6 month steps

    while window_start <= last_possible_start:
        backtest_date = window_start + pd.Timedelta(days=min_training_days)

        backtest_periods.append(
            {
                "start_date": window_start.strftime("%Y-%m-%d"),
                "backtest_date": backtest_date.strftime("%Y-%m-%d"),
                "project_days": validation_days,
                "description": f"Training {window_start.strftime('%Y-%m-%d')} to {backtest_date.strftime('%Y-%m-%d')}",
            }
        )
        window_start += step

    # Add specific periods of interest
    special_periods = []

    # Halving-based periods
    halving_dates = get_halving_dates()
    relevant_halvings = [
        h
        for h in halving_dates
        if h < end_date and h > (mature_start + pd.Timedelta(days=min_training_days))
    ]

    for halving in relevant_halvings:
        earliest_start = halving - pd.Timedelta(days=min_training_days)
        if earliest_start >= mature_start:
            special_periods.append(
                {
                    "start_date": earliest_start.strftime("%Y-%m-%d"),
                    "backtest_date": halving.strftime("%Y-%m-%d"),
                    "project_days": validation_days,
                    "description": f"Pre-halving {halving.strftime('%Y')}",
                }
            )

    # Market structure change periods
    important_dates = [
        ("2017-12-01", "Post-futures introduction"),
        ("2020-03-01", "Post-COVID crash"),
        ("2021-11-01", "Post-2021 peak"),
    ]

    for date, description in important_dates:
        test_date = pd.Timestamp(date)
        if test_date < end_date:
            earliest_start = test_date - pd.Timedelta(days=min_training_days)
            if earliest_start >= mature_start:
                special_periods.append(
                    {
                        "start_date": earliest_start.strftime("%Y-%m-%d"),
                        "backtest_date": date,
                        "project_days": validation_days,
                        "description": description,
                    }
                )

    # Combine and remove any duplicates
    all_periods = backtest_periods + special_periods
    unique_periods = []
    seen_dates = set()
    for period in all_periods:
        key = f"{period['start_date']}_{period['backtest_date']}"
        if key not in seen_dates:
            unique_periods.append(period)
            seen_dates.add(key)

    if not unique_periods:
        raise ValueError("No valid backtest periods found with current parameters")

    # Sort periods by backtest date for clearer analysis
    unique_periods.sort(key=lambda x: pd.Timestamp(x["backtest_date"]))

    print("\nRunning backtests with:")
    print(
        f"- Start dates range: {unique_periods[0]['start_date']} to {unique_periods[-1]['start_date']}"
    )
    print(
        f"- Backtest dates range: {unique_periods[0]['backtest_date']} to {unique_periods[-1]['backtest_date']}"
    )
    print(f"- Minimum training period: {min_training_years} years")
    print(f"- Validation period: {validation_years} years")
    print(f"- Number of test periods: {len(unique_periods)}")

    print("\nTest periods:")
    for period in unique_periods:
        print(f"- {period['description']}")

    # Create args tuples with params and DataFrame
    args = [(params, df, output) for params in unique_periods]

    # Use multiprocessing
    with Pool() as pool:
        results = pool.map(run_single_backtest, args)

    # Analyze results
    successful_tests = [r for r in results if r["success"]]
    failed_tests = [r for r in results if not r["success"]]

    # Define stress periods
    stress_periods = {
        # COVID crash and recovery
        ("2020-03-01", "2020-09-01"): "COVID crash period",
        # 2021 peak and subsequent crash
        ("2021-11-01", "2022-06-01"): "2021 peak aftermath",
        # Add more stress periods as needed
    }

    def is_stress_period(test_date):
        """Check if a test date falls in any stress period"""
        test_date = pd.Timestamp(test_date)
        for (start, end), _ in stress_periods.items():
            if pd.Timestamp(start) <= test_date <= pd.Timestamp(end):
                return True
        return False

    # Categorize results
    normal_periods = []
    stress_periods_results = []

    for result in successful_tests:
        if is_stress_period(result["params"]["backtest_date"]):
            stress_periods_results.append(result)
        else:
            normal_periods.append(result)

    # Calculate metrics for each category
    def calculate_category_metrics(results):
        if not results:
            return None
        return {
            "count": len(results),
            "mape": np.mean([r["metrics"]["mape"] for r in results]),
            "rmse": np.mean([r["metrics"]["rmse"] for r in results]),
            "max_error": np.mean([r["metrics"]["max_error"] for r in results]),
            "coverage_95": np.mean([r["metrics"]["coverage_95"] for r in results]),
            "coverage_68": np.mean([r["metrics"]["coverage_68"] for r in results]),
        }

    normal_metrics = calculate_category_metrics(normal_periods)
    stress_metrics = calculate_category_metrics(stress_periods_results)

    # Write detailed results
    with output.create("bitcoin_backtest_results_summary.txt") as f:
        f.write("Systematic Backtest Results\n")
        f.write("==========================\n\n")

        f.write("Configuration:\n")
        f.write(f"- Minimum training period: {min_training_years} years\n")
        f.write(f"- Validation period: {validation_years} years\n")
        f.write(
            f"- Start dates range: {unique_periods[0]['start_date']} to {unique_periods[-1]['start_date']}\n"
        )
        f.write(
            f"- Backtest dates range: {unique_periods[0]['backtest_date']} to {unique_periods[-1]['backtest_date']}\n"
        )
        f.write(f"- Number of test periods: {len(unique_periods)}\n\n")

        # Normal Periods
        f.write("Normal Market Periods\n")
        f.write("====================\n")
        f.write(f"Number of periods: {len(normal_periods)}\n\n")

        for result in normal_periods:
            f.write("\n" + "=" * 50 + "\n")
            f.write(f"Period: {result['params']['description']}\n")
            f.write(
                f"Training: {result['params']['start_date']} to {result['params']['backtest_date']}\n"
            )
            f.write(
                f"Validation: {result['params']['backtest_date']} to {pd.Timestamp(result['params']['backtest_date']) + pd.Timedelta(days=validation_years*365):%Y-%m-%d}\n"
            )
            f.write("\nMetrics:\n")
            for metric, value in result["metrics"].items():
                if metric in ["mape", "coverage_95", "coverage_68"]:
                    f.write(f"- {metric}: {value:.1f}%\n")
                else:
                    f.write(f"- {metric}: ${value:,.0f}\n")
            f.write("\n")

        if normal_metrics:
            f.write("\nNormal Periods Aggregate Metrics:\n")
            f.write(f"MAPE: {normal_metrics['mape']:.1f}%\n")
            f.write(f"RMSE: ${normal_metrics['rmse']:,.0f}\n")
            f.write(f"Average Max Error: ${normal_metrics['max_error']:,.0f}\n")
            f.write(f"95% CI Coverage: {normal_metrics['coverage_95']:.1f}%\n")
            f.write(f"68% CI Coverage: {normal_metrics['coverage_68']:.1f}%\n")

        # Stress Periods
        f.write("\n\nStress Periods\n")
        f.write("=============\n")
        f.write(f"Number of periods: {len(stress_periods_results)}\n\n")

        for result in stress_periods_results:
            f.write("\n" + "=" * 50 + "\n")
            f.write(f"Period: {result['params']['description']}\n")
            f.write(
                f"Training: {result['params']['start_date']} to {result['params']['backtest_date']}\n"
            )
            f.write(
                f"Validation: {result['params']['backtest_date']} to {pd.Timestamp(result['params']['backtest_date']) + pd.Timedelta(days=validation_years*365):%Y-%m-%d}\n"
            )
            f.write("\nMetrics:\n")
            for metric, value in result["metrics"].items():
                if metric in ["mape", "coverage_95", "coverage_68"]:
                    f.write(f"- {metric}: {value:.1f}%\n")
                else:
                    f.write(f"- {metric}: ${value:,.0f}\n")
            f.write("\n")

        if stress_metrics:
            f.write("\nStress Periods Aggregate Metrics:\n")
            f.write(f"MAPE: {stress_metrics['mape']:.1f}%\n")
            f.write(f"RMSE: ${stress_metrics['rmse']:,.0f}\n")
            f.write(f"Average Max Error: ${stress_metrics['max_error']:,.0f}\n")
            f.write(f"95% CI Coverage: {stress_metrics['coverage_95']:.1f}%\n")
            f.write(f"68% CI Coverage: {stress_metrics['coverage_68']:.1f}%\n")

    return (
        normal_metrics,
        stress_metrics,
        normal_periods,
        stress_periods_results,
        failed_tests,
    )


# CLI


def get_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        prog="model",
        description="Bitcoin price model",
    )
    parser.add_argument(
        "-o",
        "--output",
        help="output base directory",
        default="./output",
    )
    parser.add_argument(
        "-n",
        "--name",
        help="subdir of output base directory",
        default="baseline",
    )
    return parser.parse_args()


def main():
    args = get_args()
    global output
    output = Output(args.output, args.name)

    analysis, df = analyze_bitcoin_prices("prices.csv")
    run_projections(df, output)
    normal_metrics, stress_metrics, normal_results, stress_results, failed_tests = (
        run_systematic_backtests(df, output)
    )

    print("\nAggregate Metrics:")
    print(f"Total backtests run: {normal_metrics['count']}")
    print(f"Successful tests: {len(normal_results)}")
    print(f"Failed tests: {len(failed_tests)}")
    print("\nAverage Performance:")
    print(f"MAPE: {normal_metrics['mape']:.1f}%")
    print(f"RMSE: ${normal_metrics['rmse']:,.0f}")
    print(f"95% CI Coverage: {normal_metrics['coverage_95']:.1f}%")
    print(f"68% CI Coverage: {normal_metrics['coverage_68']:.1f}%")

    print("\nFailed Tests:")
    for test in failed_tests:
        print(f"Period: {test['params']['description']}")
        print(f"Error: {test['error']}\n")


if __name__ == "__main__":
    main()