bitcoin-model/model.py

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm

def analyze_bitcoin_prices(csv_path):
    """
    Analyze Bitcoin price data to calculate volatility and growth rates.
    """
    # Read CSV with proper data types
    df = pd.read_csv(csv_path, parse_dates=[0])

    # Print first few rows of raw data to inspect
    print("\nFirst few rows of raw data:")
    print(df.head())

    # Print data info to see types and non-null counts
    print("\nDataset Info:")
    print(df.info())

    # Convert price columns to float and handle any potential formatting issues
    price_columns = ['Price', 'Open', 'High', 'Low']
    for col in price_columns:
        # Remove any commas in numbers
        df[col] = df[col].astype(str).str.replace(',', '')
        df[col] = pd.to_numeric(df[col], errors='coerce')

    # Rename columns for clarity
    df.columns = ['Date', 'Close', 'Open', 'High', 'Low', 'Volume', 'Change']

    # Sort by date in ascending order
    df = df.sort_values('Date')

    # Print summary statistics after conversion
    print("\nPrice Summary After Conversion:")
    print(df[['Close', 'Open', 'High', 'Low']].describe())

    # Calculate daily returns
    df['Daily_Return'] = df['Close'].pct_change()

    # Print first few daily returns to verify calculation
    print("\nFirst few daily returns:")
    print(df[['Date', 'Close', 'Daily_Return']].head())

    # Check for any infinite or NaN values
    print("\nInfinite or NaN value counts:")
    print(df.isna().sum())

    # Calculate metrics using 365 days for annualization
    analysis = {
        'period_start': df['Date'].min().strftime('%Y-%m-%d'),
        'period_end': df['Date'].max().strftime('%Y-%m-%d'),
        'total_days': len(df),
        'daily_volatility': df['Daily_Return'].std(),
        'annualized_volatility': df['Daily_Return'].std() * np.sqrt(365),
        'total_return': (df['Close'].iloc[-1] / df['Close'].iloc[0] - 1) * 100,
        'average_daily_return': df['Daily_Return'].mean() * 100,
        'average_annual_return': ((1 + df['Daily_Return'].mean()) ** 365 - 1) * 100,
        'min_price': df['Low'].min(),
        'max_price': df['High'].max(),
        'avg_price': df['Close'].mean(),
        'start_price': df['Close'].iloc[0],
        'end_price': df['Close'].iloc[-1]
    }

    # Calculate rolling metrics
    df['Rolling_Volatility_30d'] = df['Daily_Return'].rolling(window=30).std() * np.sqrt(365)
    df['Rolling_Return_30d'] = df['Close'].pct_change(periods=30) * 100

    return analysis, df

def visualize_cycle_patterns(df, cycle_returns, cycle_volatility):
    """
    Create enhanced visualization of Bitcoin's behavior across halving cycles.
    """
    plt.style.use('seaborn-v0_8')
    fig = plt.figure(figsize=(15, 15))

    # Create a 3x1 subplot grid with different heights
    gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3)

    # Plot 1: Returns across cycle with confidence bands
    ax1 = plt.subplot(gs[0])

    # Convert days to percentage through cycle
    x_points = np.array(cycle_returns.index) / (4 * 365) * 100

    # Calculate rolling mean and standard deviation for confidence bands
    window = 30  # 30-day window
    rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean()
    rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std()

    # Plot confidence bands
    ax1.fill_between(x_points,
                     (rolling_mean - 2*rolling_std) * 100,
                     (rolling_mean + 2*rolling_std) * 100,
                     alpha=0.2, color='blue', label='95% Confidence')
    ax1.fill_between(x_points,
                     (rolling_mean - rolling_std) * 100,
                     (rolling_mean + rolling_std) * 100,
                     alpha=0.3, color='blue', label='68% Confidence')

    # Plot average returns
    ax1.plot(x_points, cycle_returns.values * 100, 'b-',
             label='Average Daily Return', linewidth=2)
    ax1.axhline(y=0, color='gray', linestyle='--', alpha=0.5)

    # Add vertical lines for each year in cycle
    for year in range(1, 4):
        ax1.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
        ax1.text(year*25, ax1.get_ylim()[1], f'Year {year}',
                rotation=90, va='top', ha='right', alpha=0.7)

    # Highlight halving points
    ax1.axvline(x=0, color='red', linestyle='--', alpha=0.5, label='Halving Event')
    ax1.axvline(x=100, color='red', linestyle='--', alpha=0.5)

    ax1.set_title('Bitcoin Return Patterns Across Halving Cycle', pad=20)
    ax1.set_xlabel('Position in Cycle (%)')
    ax1.set_ylabel('Average Daily Return (%)')
    ax1.grid(True, alpha=0.3)
    ax1.legend(loc='upper right')

    # Plot 2: Volatility across cycle
    ax2 = plt.subplot(gs[1])

    # Calculate rolling volatility confidence bands
    vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean()
    vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std()

    # Plot volatility with confidence bands
    annualized_factor = np.sqrt(365) * 100
    ax2.fill_between(x_points,
                     (vol_mean - 2*vol_std) * annualized_factor,
                     (vol_mean + 2*vol_std) * annualized_factor,
                     alpha=0.2, color='red', label='95% Confidence')
    ax2.plot(x_points, cycle_volatility.values * annualized_factor, 'r-',
             label='Annualized Volatility', linewidth=2)

    # Add year markers
    for year in range(1, 4):
        ax2.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)

    ax2.axvline(x=0, color='red', linestyle='--', alpha=0.5)
    ax2.axvline(x=100, color='red', linestyle='--', alpha=0.5)

    ax2.set_xlabel('Position in Cycle (%)')
    ax2.set_ylabel('Volatility (%)')
    ax2.grid(True, alpha=0.3)
    ax2.legend(loc='upper right')

    # Plot 3: Average price trajectory within cycles
    ax3 = plt.subplot(gs[2])

    # Define a color scheme for cycles
    cycle_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

    # Calculate average price path for each cycle
    halving_dates = get_halving_dates()
    cycles = []

    for i in range(len(halving_dates)-1):
        cycle_start = halving_dates[i]
        cycle_end = halving_dates[i+1]
        cycle_data = df[(df['Date'] >= cycle_start) & (df['Date'] < cycle_end)].copy()

        if len(cycle_data) > 0:
            cycle_data['Cycle_Pct'] = ((cycle_data['Date'] - cycle_start).dt.total_seconds() /
                                     (cycle_end - cycle_start).total_seconds() * 100)
            cycle_data['Normalized_Price'] = cycle_data['Close'] / cycle_data['Close'].iloc[0]
            cycles.append(cycle_data)

    # Plot each historical cycle with distinct colors
    for i, cycle in enumerate(cycles):
        ax3.semilogy(cycle['Cycle_Pct'], cycle['Normalized_Price'],
                    color=cycle_colors[i], alpha=0.7,
                    label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})')

    # Calculate and plot average cycle
    if cycles:
        avg_cycle = pd.concat([c.set_index('Cycle_Pct')['Normalized_Price'] for c in cycles], axis=1)
        avg_cycle_mean = avg_cycle.mean(axis=1)
        avg_cycle_std = avg_cycle.std(axis=1)

        ax3.semilogy(avg_cycle_mean.index, avg_cycle_mean.values, 'k-',
                     linewidth=2, label='Average Cycle')
        ax3.fill_between(avg_cycle_mean.index,
                        avg_cycle_mean * np.exp(-2*avg_cycle_std),
                        avg_cycle_mean * np.exp(2*avg_cycle_std),
                        alpha=0.2, color='gray')

    # Add year markers
    for year in range(1, 4):
        ax3.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)

    ax3.axvline(x=0, color='red', linestyle='--', alpha=0.5)
    ax3.axvline(x=100, color='red', linestyle='--', alpha=0.5)

    ax3.set_title('Price Performance Across Cycles (Normalized)', pad=20)
    ax3.set_xlabel('Position in Cycle (%)')
    ax3.set_ylabel('Price (Relative to Cycle Start)')
    ax3.grid(True, alpha=0.3)
    ax3.legend(loc='center left', bbox_to_anchor=(1.02, 0.5))

    # Add current cycle position marker on all plots
    current_position = get_cycle_position(df['Date'].max(), halving_dates) * 100
    for ax in [ax1, ax2, ax3]:
        ax.axvline(x=current_position, color='green', linestyle='-', alpha=0.5,
                   label='Current Position')

    # Main title for the figure
    fig.suptitle('Bitcoin Halving Cycle Analysis', fontsize=16, y=0.95)

    # Adjust layout to prevent legend cutoff
    plt.tight_layout()

    # Save the plot
    plt.savefig('bitcoin_cycle_patterns.png', dpi=300, bbox_inches='tight')
    plt.close()

def create_plots(df, start=None, end=None, project_days=365):
    """
    Create plots including historical data and future projections.
    """
    # Filter data based on date range
    mask = pd.Series(True, index=df.index)
    if start:
        mask &= df['Date'] >= pd.to_datetime(start)
    if end:
        mask &= df['Date'] <= pd.to_datetime(end)

    plot_df = df[mask].copy()

    if len(plot_df) == 0:
        raise ValueError("No data found for the specified date range")

    # Generate projections
    cycle_returns, cycle_volatility = analyze_cycles_with_halvings(plot_df)
    projections = project_prices_with_cycles(plot_df, days_forward=project_days)

    # Create cycle visualization
    visualize_cycle_patterns(plot_df, cycle_returns, cycle_volatility)

    # Set up the style
    plt.style.use('seaborn-v0_8')

    # Create figure
    fig = plt.figure(figsize=(15, 15))

    # Date range for titles
    hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})"

    # 1. Price history and projections (log scale)
    ax1 = plt.subplot(4, 1, 1)

    # Plot historical prices
    ax1.semilogy(plot_df['Date'], plot_df['Close'], 'b-', label='Historical Price')

    # Plot projections
    ax1.semilogy(projections.index, projections['Expected_Trend'], '--',
                 color='purple', label='Expected Trend')
    ax1.semilogy(projections.index, projections['Median'], ':',
                 color='green', label='Simulated Median')
    ax1.fill_between(projections.index,
                    projections['Lower_95'], projections['Upper_95'],
                    alpha=0.2, color='orange', label='95% Confidence Interval')
    ax1.fill_between(projections.index,
                    projections['Lower_68'], projections['Upper_68'],
                    alpha=0.3, color='green', label='68% Confidence Interval')

    # Customize y-axis
    ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price))

    # Set custom y-axis ticks at meaningful price points
    min_price = min(plot_df['Low'].min(), projections['Lower_95'].min())
    max_price = max(plot_df['High'].max(), projections['Upper_95'].max())

    price_points = get_nice_price_points(min_price, max_price)
    ax1.set_yticks(price_points)

    # Adjust y-axis label properties
    ax1.tick_params(axis='y', labelsize=8)  # Smaller font size

    # Add some padding to prevent label cutoff
    ax1.margins(y=0.02)

    # Adjust label padding to prevent overlap
    ax1.yaxis.set_tick_params(pad=1)

    # Add grid lines with adjusted opacity
    ax1.grid(True, which='major', linestyle='-', alpha=0.5)
    ax1.grid(True, which='minor', linestyle=':', alpha=0.2)

    ax1.set_title('Bitcoin Price History and Projections (Log Scale)' + hist_date_range)
    # Make legend font size smaller too for consistency
    ax1.legend(fontsize=8)

    # 2. Rolling volatility
    ax2 = plt.subplot(4, 1, 2)
    ax2.plot(plot_df['Date'], plot_df['Rolling_Volatility_30d'], 'r-', label='30-Day Rolling Volatility')
    ax2.set_title('30-Day Rolling Volatility (Annualized)' + hist_date_range)
    ax2.set_xlabel('Date')
    ax2.set_ylabel('Volatility')
    ax2.grid(True)
    ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
    ax2.legend()

    # 3. Returns distribution
    ax3 = plt.subplot(4, 1, 3)
    returns_mean = plot_df['Daily_Return'].mean()
    returns_std = plot_df['Daily_Return'].std()
    filtered_returns = plot_df['Daily_Return'][
        (plot_df['Daily_Return'] > returns_mean - 5 * returns_std) &
        (plot_df['Daily_Return'] < returns_mean + 5 * returns_std)
    ]

    sns.histplot(filtered_returns, bins=100, ax=ax3)
    ax3.set_title('Distribution of Daily Returns (Excluding Extreme Outliers)' + hist_date_range)
    ax3.set_xlabel('Daily Return')
    ax3.set_ylabel('Count')
    ax3.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))

    # Add a vertical line for mean return
    ax3.axvline(filtered_returns.mean(), color='r', linestyle='dashed', linewidth=1)
    ax3.text(filtered_returns.mean(), ax3.get_ylim()[1], 'Mean',
             rotation=90, va='top', ha='right')

# 4. Projection ranges
    ax4 = plt.subplot(4, 1, 4)

    # Calculate and plot price ranges at different future points
    timepoints = np.array([30, 90, 180, 365])
    timepoints = timepoints[timepoints <= project_days]

    ranges = []
    labels = []
    positions = []

    for t in timepoints:
        idx = t - 1  # Convert to 0-based index
        ranges.extend([
            projections['Lower_95'].iloc[idx],
            projections['Lower_68'].iloc[idx],
            projections['Median'].iloc[idx],
            projections['Upper_68'].iloc[idx],
            projections['Upper_95'].iloc[idx]
        ])
        labels.extend([
            '95% Lower',
            '68% Lower',
            'Median',
            '68% Upper',
            '95% Upper'
        ])
        positions.extend([t] * 5)

    # Plot ranges (removed violin plot)
    ax4.scatter(positions, ranges, alpha=0.6)

    # Add lines connecting the ranges
    for t in timepoints:
        idx = positions.index(t)
        ax4.plot([t] * 5, ranges[idx:idx+5], 'k-', alpha=0.3)

    # Set log scale first
    ax4.set_yscale('log')

    # Get the current order of magnitude for setting appropriate ticks
    min_price = min(ranges)
    max_price = max(ranges)

    # Create price points at regular intervals on log scale
    log_min = np.floor(np.log10(min_price))
    log_max = np.ceil(np.log10(max_price))
    price_points = []
    for exp in range(int(log_min), int(log_max + 1)):
        for mult in [1, 2, 5]:
            point = mult * 10**exp
            if min_price <= point <= max_price:
                price_points.append(point)

    ax4.set_yticks(price_points)

    def price_formatter(x, p):
        if x >= 1e6:
            return f'${x/1e6:.1f}M'
        if x >= 1e3:
            return f'${x/1e3:.0f}K'
        return f'${x:.0f}'

    # Apply formatter to major ticks
    ax4.yaxis.set_major_formatter(plt.FuncFormatter(price_formatter))

    # Customize the plot
    ax4.set_title('Projected Price Ranges at Future Timepoints')
    ax4.set_xlabel('Days Forward')
    ax4.set_ylabel('Price (USD)')
    ax4.grid(True, alpha=0.3)

    # Set x-axis to show only our timepoints
    ax4.set_xticks(timepoints)

    # Adjust layout
    plt.tight_layout()

    # Save the plot
    start_str = start if start else plot_df['Date'].min().strftime('%Y-%m-%d')
    end_str = end if end else plot_df['Date'].max().strftime('%Y-%m-%d')
    filename = f'bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png'
    plt.savefig(filename, dpi=300, bbox_inches='tight')
    plt.close()

    return projections

def analyze_cycles(df, cycle_period=4*365):
    """Analyze Bitcoin market cycles to understand return patterns"""
    df = df.copy()

    # Calculate rolling returns at different scales
    df['Returns_30d'] = df['Close'].pct_change(periods=30)
    df['Returns_90d'] = df['Close'].pct_change(periods=90)
    df['Returns_365d'] = df['Close'].pct_change(periods=365)

    # Calculate where we are in the supposed 4-year cycle
    df['Days_From_Start'] = (df['Date'] - df['Date'].min()).dt.days
    df['Cycle_Position'] = df['Days_From_Start'] % cycle_period

    # Group by cycle position and calculate average returns
    cycle_returns = df.groupby(df['Cycle_Position'])['Daily_Return'].mean()
    cycle_volatility = df.groupby(df['Cycle_Position'])['Daily_Return'].std()

    return cycle_returns, cycle_volatility

def get_halving_dates():
    """Return known and projected Bitcoin halving dates"""
    return pd.to_datetime([
        '2008-01-03',  # Bitcoin genesis block (treat as cycle start)
        '2012-11-28',  # First halving
        '2016-07-09',  # Second halving
        '2020-05-11',  # Third halving
        '2024-04-17',  # Fourth halving (projected)
        '2028-04-17',  # Fifth halving (projected)
    ])

def get_cycle_position(date, halving_dates):
    """
    Calculate position in halving cycle (0 to 1) for a given date.
    0 represents a halving event, 1 represents just before the next halving.
    """
    # Convert date to datetime if it's not already
    date = pd.to_datetime(date)

    # Find the most recent halving before this date
    prev_halving = halving_dates[halving_dates <= date].max()
    if pd.isna(prev_halving):
        return 0.0  # For dates before first halving

    # Find next halving
    future_halvings = halving_dates[halving_dates > date]
    if len(future_halvings) == 0:
        # For dates after last known halving, use same cycle length as last known cycle
        last_cycle_length = (halving_dates[-1] - halving_dates[-2]).days
        days_since_halving = (date - halving_dates[-1]).days
        return min(days_since_halving / last_cycle_length, 1.0)

    next_halving = future_halvings.min()

    # Calculate position as fraction between halvings
    days_since_halving = (date - prev_halving).days
    cycle_length = (next_halving - prev_halving).days
    return min(days_since_halving / cycle_length, 1.0)

def analyze_cycles_with_halvings(df):
    """Analyze Bitcoin market cycles aligned with halving events"""
    df = df.copy()

    # Get halving dates
    halving_dates = get_halving_dates()

    # Calculate cycle position for each date
    df['Cycle_Position'] = df['Date'].apply(
        lambda x: get_cycle_position(x, halving_dates)
    )

    # Convert to days within cycle (0 to ~1460 days)
    df['Cycle_Days'] = (df['Cycle_Position'] * 4 * 365).round().astype(int)

    # Calculate returns at different scales
    df['Returns_30d'] = df['Close'].pct_change(periods=30)
    df['Returns_90d'] = df['Close'].pct_change(periods=90)
    df['Returns_365d'] = df['Close'].pct_change(periods=365)

    # Group by position in cycle and calculate average returns
    cycle_returns = df.groupby(df['Cycle_Days'])['Daily_Return'].mean()
    cycle_volatility = df.groupby(df['Cycle_Days'])['Daily_Return'].std()

    # Smooth the cycle returns to reduce noise
    from scipy.signal import savgol_filter
    window = 91  # About 3 months
    if len(cycle_returns) > window:
        cycle_returns = pd.Series(
            savgol_filter(cycle_returns, window, 3),
            index=cycle_returns.index
        )

    return cycle_returns, cycle_volatility


def project_prices_with_cycles(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
    """
    Project future Bitcoin prices using Monte Carlo simulation with halving-aligned cycles.
    """
    # Analyze historical cycles
    cycle_returns, cycle_volatility = analyze_cycles_with_halvings(df)

    # Get current position in halving cycle
    halving_dates = get_halving_dates()
    current_date = df['Date'].max()
    cycle_position = get_cycle_position(current_date, halving_dates)
    current_cycle_days = int(cycle_position * 4 * 365)

    # Current price (last known price)
    last_price = df['Close'].iloc[-1]
    last_date = df['Date'].iloc[-1]

    # Generate dates for projection
    future_dates = pd.date_range(
        start=last_date + timedelta(days=1),
        periods=days_forward,
        freq='D'
    )

    # Calculate expected returns for future dates based on cycle position
    future_cycle_days = [
        (current_cycle_days + i) % (4 * 365)
        for i in range(days_forward)
    ]
    expected_returns = np.array([
        cycle_returns.get(day, cycle_returns.mean())
        for day in future_cycle_days
    ])

    # Calculate base volatility (recent)
    recent_volatility = df['Daily_Return'].tail(90).std()

    # Add long-term trend component (very gentle decay)
    long_term_decay = 0.9 ** (np.arange(days_forward) / 365)  # 10% reduction per year
    expected_returns = expected_returns * long_term_decay

    # Run Monte Carlo simulation
    np.random.seed(42)  # For reproducibility
    simulated_paths = np.zeros((days_forward, simulations))

    for sim in range(simulations):
        # Generate random returns using cycle-aware expected returns
        returns = np.random.normal(
            loc=expected_returns,
            scale=recent_volatility,
            size=days_forward
        )

        # Calculate price path
        price_path = last_price * np.exp(np.cumsum(returns))
        simulated_paths[:, sim] = price_path

    # Calculate percentiles for confidence intervals
    results = pd.DataFrame(index=future_dates)
    results['Median'] = np.percentile(simulated_paths, 50, axis=1)

    for level in confidence_levels:
        lower_percentile = (1 - level) * 100 / 2
        upper_percentile = 100 - lower_percentile

        results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
        results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)

    # Add expected trend line (without randomness)
    results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns))

    return results

def calculate_rolling_metrics(df, window=365):
    """Calculate rolling returns and volatility metrics"""
    df = df.copy()
    df['Rolling_Daily_Return'] = df['Daily_Return'].rolling(window=window).mean()
    df['Rolling_Daily_Volatility'] = df['Daily_Return'].rolling(window=window).std()
    return df

def fit_return_trend(df):
    """Fit an exponential decay trend to the rolling returns"""
    # Calculate days from start
    df = df.copy()
    df['Days'] = (df['Date'] - df['Date'].min()).dt.days

    # Calculate rolling metrics
    df = calculate_rolling_metrics(df)

    # Remove NaN values for fitting
    clean_data = df.dropna()

    # Fit exponential decay: y = a * exp(-bx) + c
    from scipy.optimize import curve_fit

    def exp_decay(x, a, b, c):
        return a * np.exp(-b * x) + c

    popt, _ = curve_fit(
        exp_decay,
        clean_data['Days'],
        clean_data['Rolling_Daily_Return'],
        p0=[0.01, 0.001, 0.0001],  # Initial guess for parameters
        bounds=([0, 0, 0], [1, 1, 0.01])  # Constraints to keep parameters positive
    )

    return popt

def project_prices_with_trend(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
    """
    Project future Bitcoin prices using Monte Carlo simulation with trend adjustment.
    """
    # Fit return trend
    trend_params = fit_return_trend(df)

    # Calculate days from start for projection
    days_from_start = (df['Date'].max() - df['Date'].min()).days

    # Current price (last known price)
    last_price = df['Close'].iloc[-1]
    last_date = df['Date'].iloc[-1]

    # Generate dates for projection
    future_dates = pd.date_range(
        start=last_date + timedelta(days=1),
        periods=days_forward,
        freq='D'
    )

    # Calculate expected returns for future dates using fitted trend
    def exp_decay(x, a, b, c):
        return a * np.exp(-b * x) + c

    future_days = np.arange(days_from_start + 1, days_from_start + days_forward + 1)
    expected_returns = exp_decay(future_days, *trend_params)

    # Use recent volatility for projections
    recent_volatility = df['Daily_Return'].tail(365).std()

    # Run Monte Carlo simulation
    np.random.seed(42)  # For reproducibility
    simulated_paths = np.zeros((days_forward, simulations))

    for sim in range(simulations):
        # Generate random returns using trending expected return
        returns = np.random.normal(
            loc=expected_returns,
            scale=recent_volatility,
            size=days_forward
        )

        # Calculate price path
        price_path = last_price * np.exp(np.cumsum(returns))
        simulated_paths[:, sim] = price_path

    # Calculate percentiles for confidence intervals
    results = pd.DataFrame(index=future_dates)
    results['Median'] = np.percentile(simulated_paths, 50, axis=1)

    for level in confidence_levels:
        lower_percentile = (1 - level) * 100 / 2
        upper_percentile = 100 - lower_percentile

        results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
        results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)

    # Add expected trend line (without randomness)
    results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns))

    return results

def get_nice_price_points(min_price, max_price):
    """
    Generate a reasonable set of price points for the y-axis that look clean
    and cover the range without cluttering the chart.
    """
    log_min = np.floor(np.log10(min_price))
    log_max = np.ceil(np.log10(max_price))
    price_points = []

    # For very large ranges (spanning more than 4 orders of magnitude),
    # only use powers of 10 and mid-points
    if log_max - log_min > 4:
        for exp in range(int(log_min), int(log_max + 1)):
            base = 10**exp
            # Add main power of 10
            if min_price <= base <= max_price:
                price_points.append(base)
            # Add mid-point if range is large enough
            if min_price <= base * 5 <= max_price and exp > log_min:
                price_points.append(base * 5)
    else:
        # For smaller ranges, use 1, 2, 5 sequence
        for exp in range(int(log_min), int(log_max + 1)):
            for mult in [1, 2, 5]:
                point = mult * 10**exp
                if min_price <= point <= max_price:
                    price_points.append(point)

    return np.array(price_points)

def format_price(x, p):
    """Format large numbers in K, M, B format with appropriate precision"""
    if abs(x) >= 1e9:
        return f'${x/1e9:.1f}B'
    if abs(x) >= 1e6:
        return f'${x/1e6:.1f}M'
    if abs(x) >= 1e3:
        return f'${x/1e3:.1f}K'
    if abs(x) >= 1:
        return f'${x:.0f}'
    return f'${x:.2f}'  # For values less than $1, show cents

def project_prices(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
    """
    Project future Bitcoin prices using Monte Carlo simulation.

    Parameters:
    df: DataFrame with historical price data
    days_forward: Number of days to project forward
    simulations: Number of Monte Carlo simulations to run
    confidence_levels: List of confidence levels for the projection intervals

    Returns:
    DataFrame with projection results
    """
    # Calculate daily return parameters
    daily_return = df['Daily_Return'].mean()
    daily_volatility = df['Daily_Return'].std()

    # Current price (last known price)
    last_price = df['Close'].iloc[-1]
    last_date = df['Date'].iloc[-1]

    # Generate dates for projection
    future_dates = pd.date_range(
        start=last_date + timedelta(days=1),
        periods=days_forward,
        freq='D'
    )

    # Run Monte Carlo simulation
    np.random.seed(42)  # For reproducibility
    simulated_paths = np.zeros((days_forward, simulations))

    for sim in range(simulations):
        # Generate random returns using historical parameters
        returns = np.random.normal(
            loc=daily_return,
            scale=daily_volatility,
            size=days_forward
        )

        # Calculate price path
        price_path = last_price * np.exp(np.cumsum(returns))
        simulated_paths[:, sim] = price_path

    # Calculate percentiles for confidence intervals
    results = pd.DataFrame(index=future_dates)
    results['Median'] = np.percentile(simulated_paths, 50, axis=1)

    for level in confidence_levels:
        lower_percentile = (1 - level) * 100 / 2
        upper_percentile = 100 - lower_percentile

        results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
        results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)

    return results

def print_analysis(analysis):
    print(f"\nBitcoin Price Analysis ({analysis['period_start']} to {analysis['period_end']})")
    print("-" * 50)
    print(f"Total Days Analyzed: {analysis['total_days']}")
    print(f"\nPrice Range:")
    print(f"Starting Price: ${analysis['start_price']:,.2f}")
    print(f"Ending Price: ${analysis['end_price']:,.2f}")
    print(f"Minimum Price: ${analysis['min_price']:,.2f}")
    print(f"Maximum Price: ${analysis['max_price']:,.2f}")
    print(f"Average Price: ${analysis['avg_price']:,.2f}")
    print(f"\nVolatility Metrics:")
    print(f"Daily Volatility: {analysis['daily_volatility']:.2%}")
    print(f"Annualized Volatility: {analysis['annualized_volatility']:.2%}")
    print(f"\nReturn Metrics:")
    print(f"Total Return: {analysis['total_return']:,.2f}%")
    print(f"Average Daily Return: {analysis['average_daily_return']:.2f}%")
    print(f"Average Annual Return: {analysis['average_annual_return']:,.2f}%")

if __name__ == "__main__":
    analysis, df = analyze_bitcoin_prices("prices.csv")
    #create_plots(df)  # Full history
    #create_plots(df, start='2022-01-01')  # From 2022 onwards
    #create_plots(df, start='2023-01-01', end='2023-12-31')  # Just 2023
    # Create plots with different time ranges and projections
    projections = create_plots(df, start='2011-01-01', project_days=365*4)
    print("\nProjected Prices at Key Points:")
    print(projections.iloc[[29, 89, 179, 364]].round(2))  # 30, 90, 180, 365 days
    print_analysis(analysis)