From 5fa358e6aecaf984d7849ec60bebc9c3c0b5dca9 Mon Sep 17 00:00:00 2001 From: Sam Fredrickson Date: Thu, 14 Nov 2024 20:05:27 -0800 Subject: [PATCH] Clean up the code. - Re-order functions. - Remove older projection methods. - Format with black. --- model.py | 1303 +++++++++++++++++++++++++----------------------------- 1 file changed, 607 insertions(+), 696 deletions(-) diff --git a/model.py b/model.py index 61cd25b..109cbbe 100644 --- a/model.py +++ b/model.py @@ -5,444 +5,23 @@ import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import norm -def analyze_bitcoin_prices(csv_path): - """ - Analyze Bitcoin price data to calculate volatility and growth rates. - """ - # Read CSV with proper data types - df = pd.read_csv(csv_path, parse_dates=[0]) - # Print first few rows of raw data to inspect - print("\nFirst few rows of raw data:") - print(df.head()) +# Utility functions - # Print data info to see types and non-null counts - print("\nDataset Info:") - print(df.info()) - - # Convert price columns to float and handle any potential formatting issues - price_columns = ['Price', 'Open', 'High', 'Low'] - for col in price_columns: - # Remove any commas in numbers - df[col] = df[col].astype(str).str.replace(',', '') - df[col] = pd.to_numeric(df[col], errors='coerce') - - # Rename columns for clarity - df.columns = ['Date', 'Close', 'Open', 'High', 'Low', 'Volume', 'Change'] - - # Sort by date in ascending order - df = df.sort_values('Date') - - # Print summary statistics after conversion - print("\nPrice Summary After Conversion:") - print(df[['Close', 'Open', 'High', 'Low']].describe()) - - # Calculate daily returns - df['Daily_Return'] = df['Close'].pct_change() - - # Print first few daily returns to verify calculation - print("\nFirst few daily returns:") - print(df[['Date', 'Close', 'Daily_Return']].head()) - - # Check for any infinite or NaN values - print("\nInfinite or NaN value counts:") - print(df.isna().sum()) - - # Calculate metrics using 365 days for annualization - analysis = { - 'period_start': df['Date'].min().strftime('%Y-%m-%d'), - 'period_end': df['Date'].max().strftime('%Y-%m-%d'), - 'total_days': len(df), - 'daily_volatility': df['Daily_Return'].std(), - 'annualized_volatility': df['Daily_Return'].std() * np.sqrt(365), - 'total_return': (df['Close'].iloc[-1] / df['Close'].iloc[0] - 1) * 100, - 'average_daily_return': df['Daily_Return'].mean() * 100, - 'average_annual_return': ((1 + df['Daily_Return'].mean()) ** 365 - 1) * 100, - 'min_price': df['Low'].min(), - 'max_price': df['High'].max(), - 'avg_price': df['Close'].mean(), - 'start_price': df['Close'].iloc[0], - 'end_price': df['Close'].iloc[-1] - } - - # Calculate rolling metrics - df['Rolling_Volatility_30d'] = df['Daily_Return'].rolling(window=30).std() * np.sqrt(365) - df['Rolling_Return_30d'] = df['Close'].pct_change(periods=30) * 100 - - return analysis, df - -def visualize_cycle_patterns(df, cycle_returns, cycle_volatility): - """ - Create enhanced visualization of Bitcoin's behavior across halving cycles. - """ - plt.style.use('seaborn-v0_8') - fig = plt.figure(figsize=(15, 15)) - - # Create a 3x1 subplot grid with different heights - gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3) - - # Plot 1: Returns across cycle with confidence bands - ax1 = plt.subplot(gs[0]) - - # Convert days to percentage through cycle - x_points = np.array(cycle_returns.index) / (4 * 365) * 100 - - # Calculate rolling mean and standard deviation for confidence bands - window = 30 # 30-day window - rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean() - rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std() - - # Plot confidence bands - ax1.fill_between(x_points, - (rolling_mean - 2*rolling_std) * 100, - (rolling_mean + 2*rolling_std) * 100, - alpha=0.2, color='blue', label='95% Confidence') - ax1.fill_between(x_points, - (rolling_mean - rolling_std) * 100, - (rolling_mean + rolling_std) * 100, - alpha=0.3, color='blue', label='68% Confidence') - - # Plot average returns - ax1.plot(x_points, cycle_returns.values * 100, 'b-', - label='Average Daily Return', linewidth=2) - ax1.axhline(y=0, color='gray', linestyle='--', alpha=0.5) - - # Add vertical lines for each year in cycle - for year in range(1, 4): - ax1.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3) - ax1.text(year*25, ax1.get_ylim()[1], f'Year {year}', - rotation=90, va='top', ha='right', alpha=0.7) - - # Highlight halving points - ax1.axvline(x=0, color='red', linestyle='--', alpha=0.5, label='Halving Event') - ax1.axvline(x=100, color='red', linestyle='--', alpha=0.5) - - ax1.set_title('Bitcoin Return Patterns Across Halving Cycle', pad=20) - ax1.set_xlabel('Position in Cycle (%)') - ax1.set_ylabel('Average Daily Return (%)') - ax1.grid(True, alpha=0.3) - ax1.legend(loc='upper right') - - # Plot 2: Volatility across cycle - ax2 = plt.subplot(gs[1]) - - # Calculate rolling volatility confidence bands - vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean() - vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std() - - # Plot volatility with confidence bands - annualized_factor = np.sqrt(365) * 100 - ax2.fill_between(x_points, - (vol_mean - 2*vol_std) * annualized_factor, - (vol_mean + 2*vol_std) * annualized_factor, - alpha=0.2, color='red', label='95% Confidence') - ax2.plot(x_points, cycle_volatility.values * annualized_factor, 'r-', - label='Annualized Volatility', linewidth=2) - - # Add year markers - for year in range(1, 4): - ax2.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3) - - ax2.axvline(x=0, color='red', linestyle='--', alpha=0.5) - ax2.axvline(x=100, color='red', linestyle='--', alpha=0.5) - - ax2.set_xlabel('Position in Cycle (%)') - ax2.set_ylabel('Volatility (%)') - ax2.grid(True, alpha=0.3) - ax2.legend(loc='upper right') - - # Plot 3: Average price trajectory within cycles - ax3 = plt.subplot(gs[2]) - - # Define a color scheme for cycles - cycle_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'] - - # Calculate average price path for each cycle - halving_dates = get_halving_dates() - cycles = [] - - for i in range(len(halving_dates)-1): - cycle_start = halving_dates[i] - cycle_end = halving_dates[i+1] - cycle_data = df[(df['Date'] >= cycle_start) & (df['Date'] < cycle_end)].copy() - - if len(cycle_data) > 0: - cycle_data['Cycle_Pct'] = ((cycle_data['Date'] - cycle_start).dt.total_seconds() / - (cycle_end - cycle_start).total_seconds() * 100) - cycle_data['Normalized_Price'] = cycle_data['Close'] / cycle_data['Close'].iloc[0] - cycles.append(cycle_data) - - # Plot each historical cycle with distinct colors - for i, cycle in enumerate(cycles): - ax3.semilogy(cycle['Cycle_Pct'], cycle['Normalized_Price'], - color=cycle_colors[i], alpha=0.7, - label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})') - - # Calculate and plot average cycle - if cycles: - avg_cycle = pd.concat([c.set_index('Cycle_Pct')['Normalized_Price'] for c in cycles], axis=1) - avg_cycle_mean = avg_cycle.mean(axis=1) - avg_cycle_std = avg_cycle.std(axis=1) - - ax3.semilogy(avg_cycle_mean.index, avg_cycle_mean.values, 'k-', - linewidth=2, label='Average Cycle') - ax3.fill_between(avg_cycle_mean.index, - avg_cycle_mean * np.exp(-2*avg_cycle_std), - avg_cycle_mean * np.exp(2*avg_cycle_std), - alpha=0.2, color='gray') - - # Add year markers - for year in range(1, 4): - ax3.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3) - - ax3.axvline(x=0, color='red', linestyle='--', alpha=0.5) - ax3.axvline(x=100, color='red', linestyle='--', alpha=0.5) - - ax3.set_title('Price Performance Across Cycles (Normalized)', pad=20) - ax3.set_xlabel('Position in Cycle (%)') - ax3.set_ylabel('Price (Relative to Cycle Start)') - ax3.grid(True, alpha=0.3) - ax3.legend(loc='center left', bbox_to_anchor=(1.02, 0.5)) - - # Add current cycle position marker on all plots - current_position = get_cycle_position(df['Date'].max(), halving_dates) * 100 - for ax in [ax1, ax2, ax3]: - ax.axvline(x=current_position, color='green', linestyle='-', alpha=0.5, - label='Current Position') - - # Main title for the figure - fig.suptitle('Bitcoin Halving Cycle Analysis', fontsize=16, y=0.95) - - # Adjust layout to prevent legend cutoff - plt.tight_layout() - - # Save the plot - plt.savefig('bitcoin_cycle_patterns.png', dpi=300, bbox_inches='tight') - plt.close() - -def create_plots(df, start=None, end=None, project_days=365): - """ - Create plots including historical data and future projections. - """ - # Filter data based on date range - mask = pd.Series(True, index=df.index) - if start: - mask &= df['Date'] >= pd.to_datetime(start) - if end: - mask &= df['Date'] <= pd.to_datetime(end) - - plot_df = df[mask].copy() - - if len(plot_df) == 0: - raise ValueError("No data found for the specified date range") - - # Generate projections - cycle_returns, cycle_volatility = analyze_cycles_with_halvings(plot_df) - projections = project_prices_with_cycles(plot_df, days_forward=project_days) - - # Create cycle visualization - visualize_cycle_patterns(plot_df, cycle_returns, cycle_volatility) - - # Set up the style - plt.style.use('seaborn-v0_8') - - # Create figure - fig = plt.figure(figsize=(15, 15)) - - # Date range for titles - hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})" - - # 1. Price history and projections (log scale) - ax1 = plt.subplot(4, 1, 1) - - # Plot historical prices - ax1.semilogy(plot_df['Date'], plot_df['Close'], 'b-', label='Historical Price') - - # Plot projections - ax1.semilogy(projections.index, projections['Expected_Trend'], '--', - color='purple', label='Expected Trend') - ax1.semilogy(projections.index, projections['Median'], ':', - color='green', label='Simulated Median') - ax1.fill_between(projections.index, - projections['Lower_95'], projections['Upper_95'], - alpha=0.2, color='orange', label='95% Confidence Interval') - ax1.fill_between(projections.index, - projections['Lower_68'], projections['Upper_68'], - alpha=0.3, color='green', label='68% Confidence Interval') - - # Customize y-axis - ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price)) - - # Set custom y-axis ticks at meaningful price points - min_price = min(plot_df['Low'].min(), projections['Lower_95'].min()) - max_price = max(plot_df['High'].max(), projections['Upper_95'].max()) - - price_points = get_nice_price_points(min_price, max_price) - ax1.set_yticks(price_points) - - # Adjust y-axis label properties - ax1.tick_params(axis='y', labelsize=8) # Smaller font size - - # Add some padding to prevent label cutoff - ax1.margins(y=0.02) - - # Adjust label padding to prevent overlap - ax1.yaxis.set_tick_params(pad=1) - - # Add grid lines with adjusted opacity - ax1.grid(True, which='major', linestyle='-', alpha=0.5) - ax1.grid(True, which='minor', linestyle=':', alpha=0.2) - - ax1.set_title('Bitcoin Price History and Projections (Log Scale)' + hist_date_range) - # Make legend font size smaller too for consistency - ax1.legend(fontsize=8) - - # 2. Rolling volatility - ax2 = plt.subplot(4, 1, 2) - ax2.plot(plot_df['Date'], plot_df['Rolling_Volatility_30d'], 'r-', label='30-Day Rolling Volatility') - ax2.set_title('30-Day Rolling Volatility (Annualized)' + hist_date_range) - ax2.set_xlabel('Date') - ax2.set_ylabel('Volatility') - ax2.grid(True) - ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0%}'.format(y))) - ax2.legend() - - # 3. Returns distribution - ax3 = plt.subplot(4, 1, 3) - returns_mean = plot_df['Daily_Return'].mean() - returns_std = plot_df['Daily_Return'].std() - filtered_returns = plot_df['Daily_Return'][ - (plot_df['Daily_Return'] > returns_mean - 5 * returns_std) & - (plot_df['Daily_Return'] < returns_mean + 5 * returns_std) - ] - - sns.histplot(filtered_returns, bins=100, ax=ax3) - ax3.set_title('Distribution of Daily Returns (Excluding Extreme Outliers)' + hist_date_range) - ax3.set_xlabel('Daily Return') - ax3.set_ylabel('Count') - ax3.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: '{:.0%}'.format(x))) - - # Add a vertical line for mean return - ax3.axvline(filtered_returns.mean(), color='r', linestyle='dashed', linewidth=1) - ax3.text(filtered_returns.mean(), ax3.get_ylim()[1], 'Mean', - rotation=90, va='top', ha='right') - -# 4. Projection ranges - ax4 = plt.subplot(4, 1, 4) - - # Calculate and plot price ranges at different future points - timepoints = np.array(range(30,365,30)) - timepoints = timepoints[timepoints <= project_days] - - ranges = [] - labels = [] - positions = [] - - for t in timepoints: - idx = t - 1 # Convert to 0-based index - ranges.extend([ - projections['Lower_95'].iloc[idx], - projections['Lower_68'].iloc[idx], - projections['Median'].iloc[idx], - projections['Upper_68'].iloc[idx], - projections['Upper_95'].iloc[idx] - ]) - labels.extend([ - '95% Lower', - '68% Lower', - 'Median', - '68% Upper', - '95% Upper' - ]) - positions.extend([t] * 5) - - # Plot ranges (removed violin plot) - ax4.scatter(positions, ranges, alpha=0.6) - - # Add lines connecting the ranges - for t in timepoints: - idx = positions.index(t) - ax4.plot([t] * 5, ranges[idx:idx+5], 'k-', alpha=0.3) - - # Set log scale first - ax4.set_yscale('log') - - # Get the current order of magnitude for setting appropriate ticks - min_price = min(ranges) - max_price = max(ranges) - - # Create price points at regular intervals on log scale - log_min = np.floor(np.log10(min_price)) - log_max = np.ceil(np.log10(max_price)) - price_points = [] - for exp in range(int(log_min), int(log_max + 1)): - for mult in [1, 2, 5]: - point = mult * 10**exp - if min_price <= point <= max_price: - price_points.append(point) - - ax4.set_yticks(price_points) - - def price_formatter(x, p): - if x >= 1e6: - return f'${x/1e6:.1f}M' - if x >= 1e3: - return f'${x/1e3:.0f}K' - return f'${x:.0f}' - - # Apply formatter to major ticks - ax4.yaxis.set_major_formatter(plt.FuncFormatter(price_formatter)) - - # Customize the plot - ax4.set_title('Projected Price Ranges at Future Timepoints') - ax4.set_xlabel('Days Forward') - ax4.set_ylabel('Price (USD)') - ax4.grid(True, alpha=0.3) - - # Set x-axis to show only our timepoints - ax4.set_xticks(timepoints) - - # Adjust layout - plt.tight_layout() - - # Save the plot - start_str = start if start else plot_df['Date'].min().strftime('%Y-%m-%d') - end_str = end if end else plot_df['Date'].max().strftime('%Y-%m-%d') - filename = f'bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png' - plt.savefig(filename, dpi=300, bbox_inches='tight') - plt.close() - - return projections - -def analyze_cycles(df, cycle_period=4*365): - """Analyze Bitcoin market cycles to understand return patterns""" - df = df.copy() - - # Calculate rolling returns at different scales - df['Returns_30d'] = df['Close'].pct_change(periods=30) - df['Returns_90d'] = df['Close'].pct_change(periods=90) - df['Returns_365d'] = df['Close'].pct_change(periods=365) - - # Calculate where we are in the supposed 4-year cycle - df['Days_From_Start'] = (df['Date'] - df['Date'].min()).dt.days - df['Cycle_Position'] = df['Days_From_Start'] % cycle_period - - # Group by cycle position and calculate average returns - cycle_returns = df.groupby(df['Cycle_Position'])['Daily_Return'].mean() - cycle_volatility = df.groupby(df['Cycle_Position'])['Daily_Return'].std() - - return cycle_returns, cycle_volatility def get_halving_dates(): """Return known and projected Bitcoin halving dates""" - return pd.to_datetime([ - '2008-01-03', # Bitcoin genesis block (treat as cycle start) - '2012-11-28', # First halving - '2016-07-09', # Second halving - '2020-05-11', # Third halving - '2024-04-19', # Fourth halving - '2028-04-20', # Fifth halving (projected) - ]) + return pd.to_datetime( + [ + "2008-01-03", # Bitcoin genesis block (treat as cycle start) + "2012-11-28", # First halving + "2016-07-09", # Second halving + "2020-05-11", # Third halving + "2024-04-19", # Fourth halving + "2028-04-20", # Fifth halving (projected) + ] + ) + def get_cycle_position(date, halving_dates): """ @@ -472,212 +51,19 @@ def get_cycle_position(date, halving_dates): cycle_length = (next_halving - prev_halving).days return min(days_since_halving / cycle_length, 1.0) -def analyze_cycles_with_halvings(df): - """Analyze Bitcoin market cycles aligned with halving events""" - df = df.copy() - # Get halving dates - halving_dates = get_halving_dates() +def format_price(x, p): + """Format large numbers in K, M, B format with appropriate precision""" + if abs(x) >= 1e9: + return f"${x/1e9:.1f}B" + if abs(x) >= 1e6: + return f"${x/1e6:.1f}M" + if abs(x) >= 1e3: + return f"${x/1e3:.1f}K" + if abs(x) >= 1: + return f"${x:.0f}" + return f"${x:.2f}" # For values less than $1, show cents - # Calculate cycle position for each date - df['Cycle_Position'] = df['Date'].apply( - lambda x: get_cycle_position(x, halving_dates) - ) - - # Convert to days within cycle (0 to ~1460 days) - df['Cycle_Days'] = (df['Cycle_Position'] * 4 * 365).round().astype(int) - - # Calculate returns at different scales - df['Returns_30d'] = df['Close'].pct_change(periods=30) - df['Returns_90d'] = df['Close'].pct_change(periods=90) - df['Returns_365d'] = df['Close'].pct_change(periods=365) - - # Group by position in cycle and calculate average returns - cycle_returns = df.groupby(df['Cycle_Days'])['Daily_Return'].mean() - cycle_volatility = df.groupby(df['Cycle_Days'])['Daily_Return'].std() - - # Smooth the cycle returns to reduce noise - from scipy.signal import savgol_filter - window = 91 # About 3 months - if len(cycle_returns) > window: - cycle_returns = pd.Series( - savgol_filter(cycle_returns, window, 3), - index=cycle_returns.index - ) - - return cycle_returns, cycle_volatility - - -def project_prices_with_cycles(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]): - """ - Project future Bitcoin prices using Monte Carlo simulation with halving-aligned cycles. - """ - # Analyze historical cycles - cycle_returns, cycle_volatility = analyze_cycles_with_halvings(df) - - # Get current position in halving cycle - halving_dates = get_halving_dates() - current_date = df['Date'].max() - cycle_position = get_cycle_position(current_date, halving_dates) - current_cycle_days = int(cycle_position * 4 * 365) - - # Current price (last known price) - last_price = df['Close'].iloc[-1] - last_date = df['Date'].iloc[-1] - - # Generate dates for projection - future_dates = pd.date_range( - start=last_date + timedelta(days=1), - periods=days_forward, - freq='D' - ) - - # Calculate expected returns for future dates based on cycle position - future_cycle_days = [ - (current_cycle_days + i) % (4 * 365) - for i in range(days_forward) - ] - expected_returns = np.array([ - cycle_returns.get(day, cycle_returns.mean()) - for day in future_cycle_days - ]) - - # Calculate base volatility (recent) - recent_volatility = df['Daily_Return'].tail(90).std() - - # Add long-term trend component (very gentle decay) - long_term_decay = 0.9 ** (np.arange(days_forward) / 365) # 10% reduction per year - expected_returns = expected_returns * long_term_decay - - # Run Monte Carlo simulation - np.random.seed(42) # For reproducibility - simulated_paths = np.zeros((days_forward, simulations)) - - for sim in range(simulations): - # Generate random returns using cycle-aware expected returns - returns = np.random.normal( - loc=expected_returns, - scale=recent_volatility, - size=days_forward - ) - - # Calculate price path - price_path = last_price * np.exp(np.cumsum(returns)) - simulated_paths[:, sim] = price_path - - # Calculate percentiles for confidence intervals - results = pd.DataFrame(index=future_dates) - results['Median'] = np.percentile(simulated_paths, 50, axis=1) - - for level in confidence_levels: - lower_percentile = (1 - level) * 100 / 2 - upper_percentile = 100 - lower_percentile - - results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1) - results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1) - - # Add expected trend line (without randomness) - results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns)) - - return results - -def calculate_rolling_metrics(df, window=365): - """Calculate rolling returns and volatility metrics""" - df = df.copy() - df['Rolling_Daily_Return'] = df['Daily_Return'].rolling(window=window).mean() - df['Rolling_Daily_Volatility'] = df['Daily_Return'].rolling(window=window).std() - return df - -def fit_return_trend(df): - """Fit an exponential decay trend to the rolling returns""" - # Calculate days from start - df = df.copy() - df['Days'] = (df['Date'] - df['Date'].min()).dt.days - - # Calculate rolling metrics - df = calculate_rolling_metrics(df) - - # Remove NaN values for fitting - clean_data = df.dropna() - - # Fit exponential decay: y = a * exp(-bx) + c - from scipy.optimize import curve_fit - - def exp_decay(x, a, b, c): - return a * np.exp(-b * x) + c - - popt, _ = curve_fit( - exp_decay, - clean_data['Days'], - clean_data['Rolling_Daily_Return'], - p0=[0.01, 0.001, 0.0001], # Initial guess for parameters - bounds=([0, 0, 0], [1, 1, 0.01]) # Constraints to keep parameters positive - ) - - return popt - -def project_prices_with_trend(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]): - """ - Project future Bitcoin prices using Monte Carlo simulation with trend adjustment. - """ - # Fit return trend - trend_params = fit_return_trend(df) - - # Calculate days from start for projection - days_from_start = (df['Date'].max() - df['Date'].min()).days - - # Current price (last known price) - last_price = df['Close'].iloc[-1] - last_date = df['Date'].iloc[-1] - - # Generate dates for projection - future_dates = pd.date_range( - start=last_date + timedelta(days=1), - periods=days_forward, - freq='D' - ) - - # Calculate expected returns for future dates using fitted trend - def exp_decay(x, a, b, c): - return a * np.exp(-b * x) + c - - future_days = np.arange(days_from_start + 1, days_from_start + days_forward + 1) - expected_returns = exp_decay(future_days, *trend_params) - - # Use recent volatility for projections - recent_volatility = df['Daily_Return'].tail(365).std() - - # Run Monte Carlo simulation - np.random.seed(42) # For reproducibility - simulated_paths = np.zeros((days_forward, simulations)) - - for sim in range(simulations): - # Generate random returns using trending expected return - returns = np.random.normal( - loc=expected_returns, - scale=recent_volatility, - size=days_forward - ) - - # Calculate price path - price_path = last_price * np.exp(np.cumsum(returns)) - simulated_paths[:, sim] = price_path - - # Calculate percentiles for confidence intervals - results = pd.DataFrame(index=future_dates) - results['Median'] = np.percentile(simulated_paths, 50, axis=1) - - for level in confidence_levels: - lower_percentile = (1 - level) * 100 / 2 - upper_percentile = 100 - lower_percentile - - results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1) - results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1) - - # Add expected trend line (without randomness) - results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns)) - - return results def get_nice_price_points(min_price, max_price): """ @@ -709,56 +95,93 @@ def get_nice_price_points(min_price, max_price): return np.array(price_points) -def format_price(x, p): - """Format large numbers in K, M, B format with appropriate precision""" - if abs(x) >= 1e9: - return f'${x/1e9:.1f}B' - if abs(x) >= 1e6: - return f'${x/1e6:.1f}M' - if abs(x) >= 1e3: - return f'${x/1e3:.1f}K' - if abs(x) >= 1: - return f'${x:.0f}' - return f'${x:.2f}' # For values less than $1, show cents -def project_prices(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]): +# Analysis functions + + +def analyze_cycles_with_halvings(df): + """Analyze Bitcoin market cycles aligned with halving events""" + df = df.copy() + + # Get halving dates + halving_dates = get_halving_dates() + + # Calculate cycle position for each date + df["Cycle_Position"] = df["Date"].apply( + lambda x: get_cycle_position(x, halving_dates) + ) + + # Convert to days within cycle (0 to ~1460 days) + df["Cycle_Days"] = (df["Cycle_Position"] * 4 * 365).round().astype(int) + + # Calculate returns at different scales + df["Returns_30d"] = df["Close"].pct_change(periods=30) + df["Returns_90d"] = df["Close"].pct_change(periods=90) + df["Returns_365d"] = df["Close"].pct_change(periods=365) + + # Group by position in cycle and calculate average returns + cycle_returns = df.groupby(df["Cycle_Days"])["Daily_Return"].mean() + cycle_volatility = df.groupby(df["Cycle_Days"])["Daily_Return"].std() + + # Smooth the cycle returns to reduce noise + from scipy.signal import savgol_filter + + window = 91 # About 3 months + if len(cycle_returns) > window: + cycle_returns = pd.Series( + savgol_filter(cycle_returns, window, 3), index=cycle_returns.index + ) + + return cycle_returns, cycle_volatility + + +def project_prices_with_cycles( + df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68] +): """ - Project future Bitcoin prices using Monte Carlo simulation. - - Parameters: - df: DataFrame with historical price data - days_forward: Number of days to project forward - simulations: Number of Monte Carlo simulations to run - confidence_levels: List of confidence levels for the projection intervals - - Returns: - DataFrame with projection results + Project future Bitcoin prices using Monte Carlo simulation with halving-aligned cycles. """ - # Calculate daily return parameters - daily_return = df['Daily_Return'].mean() - daily_volatility = df['Daily_Return'].std() + # Analyze historical cycles + cycle_returns, cycle_volatility = analyze_cycles_with_halvings(df) + + # Get current position in halving cycle + halving_dates = get_halving_dates() + current_date = df["Date"].max() + cycle_position = get_cycle_position(current_date, halving_dates) + current_cycle_days = int(cycle_position * 4 * 365) # Current price (last known price) - last_price = df['Close'].iloc[-1] - last_date = df['Date'].iloc[-1] + last_price = df["Close"].iloc[-1] + last_date = df["Date"].iloc[-1] # Generate dates for projection future_dates = pd.date_range( - start=last_date + timedelta(days=1), - periods=days_forward, - freq='D' + start=last_date + timedelta(days=1), periods=days_forward, freq="D" ) + # Calculate expected returns for future dates based on cycle position + future_cycle_days = [ + (current_cycle_days + i) % (4 * 365) for i in range(days_forward) + ] + expected_returns = np.array( + [cycle_returns.get(day, cycle_returns.mean()) for day in future_cycle_days] + ) + + # Calculate base volatility (recent) + recent_volatility = df["Daily_Return"].tail(90).std() + + # Add long-term trend component (very gentle decay) + long_term_decay = 0.9 ** (np.arange(days_forward) / 365) # 10% reduction per year + expected_returns = expected_returns * long_term_decay + # Run Monte Carlo simulation np.random.seed(42) # For reproducibility simulated_paths = np.zeros((days_forward, simulations)) for sim in range(simulations): - # Generate random returns using historical parameters + # Generate random returns using cycle-aware expected returns returns = np.random.normal( - loc=daily_return, - scale=daily_volatility, - size=days_forward + loc=expected_returns, scale=recent_volatility, size=days_forward ) # Calculate price path @@ -767,42 +190,530 @@ def project_prices(df, days_forward=365, simulations=1000, confidence_levels=[0. # Calculate percentiles for confidence intervals results = pd.DataFrame(index=future_dates) - results['Median'] = np.percentile(simulated_paths, 50, axis=1) + results["Median"] = np.percentile(simulated_paths, 50, axis=1) for level in confidence_levels: lower_percentile = (1 - level) * 100 / 2 upper_percentile = 100 - lower_percentile - results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1) - results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1) + results[f"Lower_{int(level*100)}"] = np.percentile( + simulated_paths, lower_percentile, axis=1 + ) + results[f"Upper_{int(level*100)}"] = np.percentile( + simulated_paths, upper_percentile, axis=1 + ) + + # Add expected trend line (without randomness) + results["Expected_Trend"] = last_price * np.exp(np.cumsum(expected_returns)) return results -def print_analysis(analysis): - print(f"\nBitcoin Price Analysis ({analysis['period_start']} to {analysis['period_end']})") - print("-" * 50) - print(f"Total Days Analyzed: {analysis['total_days']}") - print(f"\nPrice Range:") - print(f"Starting Price: ${analysis['start_price']:,.2f}") - print(f"Ending Price: ${analysis['end_price']:,.2f}") - print(f"Minimum Price: ${analysis['min_price']:,.2f}") - print(f"Maximum Price: ${analysis['max_price']:,.2f}") - print(f"Average Price: ${analysis['avg_price']:,.2f}") - print(f"\nVolatility Metrics:") - print(f"Daily Volatility: {analysis['daily_volatility']:.2%}") - print(f"Annualized Volatility: {analysis['annualized_volatility']:.2%}") - print(f"\nReturn Metrics:") - print(f"Total Return: {analysis['total_return']:,.2f}%") - print(f"Average Daily Return: {analysis['average_daily_return']:.2f}%") - print(f"Average Annual Return: {analysis['average_annual_return']:,.2f}%") + +def analyze_bitcoin_prices(csv_path): + """ + Analyze Bitcoin price data to calculate volatility and growth rates. + """ + # Read CSV with proper data types + df = pd.read_csv(csv_path, parse_dates=[0]) + + # Print first few rows of raw data to inspect + print("\nFirst few rows of raw data:") + print(df.head()) + + # Print data info to see types and non-null counts + print("\nDataset Info:") + print(df.info()) + + # Convert price columns to float and handle any potential formatting issues + price_columns = ["Price", "Open", "High", "Low"] + for col in price_columns: + # Remove any commas in numbers + df[col] = df[col].astype(str).str.replace(",", "") + df[col] = pd.to_numeric(df[col], errors="coerce") + + # Rename columns for clarity + df.columns = ["Date", "Close", "Open", "High", "Low", "Volume", "Change"] + + # Sort by date in ascending order + df = df.sort_values("Date") + + # Print summary statistics after conversion + print("\nPrice Summary After Conversion:") + print(df[["Close", "Open", "High", "Low"]].describe()) + + # Calculate daily returns + df["Daily_Return"] = df["Close"].pct_change() + + # Print first few daily returns to verify calculation + print("\nFirst few daily returns:") + print(df[["Date", "Close", "Daily_Return"]].head()) + + # Check for any infinite or NaN values + print("\nInfinite or NaN value counts:") + print(df.isna().sum()) + + # Calculate metrics using 365 days for annualization + analysis = { + "period_start": df["Date"].min().strftime("%Y-%m-%d"), + "period_end": df["Date"].max().strftime("%Y-%m-%d"), + "total_days": len(df), + "daily_volatility": df["Daily_Return"].std(), + "annualized_volatility": df["Daily_Return"].std() * np.sqrt(365), + "total_return": (df["Close"].iloc[-1] / df["Close"].iloc[0] - 1) * 100, + "average_daily_return": df["Daily_Return"].mean() * 100, + "average_annual_return": ((1 + df["Daily_Return"].mean()) ** 365 - 1) * 100, + "min_price": df["Low"].min(), + "max_price": df["High"].max(), + "avg_price": df["Close"].mean(), + "start_price": df["Close"].iloc[0], + "end_price": df["Close"].iloc[-1], + } + + # Calculate rolling metrics + df["Rolling_Volatility_30d"] = df["Daily_Return"].rolling( + window=30 + ).std() * np.sqrt(365) + df["Rolling_Return_30d"] = df["Close"].pct_change(periods=30) * 100 + + return analysis, df + + +# Main plotting functions + + +def create_plots(df, start=None, end=None, project_days=365): + """ + Create plots including historical data and future projections. + """ + # Filter data based on date range + mask = pd.Series(True, index=df.index) + if start: + mask &= df["Date"] >= pd.to_datetime(start) + if end: + mask &= df["Date"] <= pd.to_datetime(end) + + plot_df = df[mask].copy() + + if len(plot_df) == 0: + raise ValueError("No data found for the specified date range") + + # Generate projections + cycle_returns, cycle_volatility = analyze_cycles_with_halvings(plot_df) + projections = project_prices_with_cycles(plot_df, days_forward=project_days) + + # Create cycle visualization + visualize_cycle_patterns(plot_df, cycle_returns, cycle_volatility) + + # Set up the style + plt.style.use("seaborn-v0_8") + + # Create figure + fig = plt.figure(figsize=(15, 15)) + + # Date range for titles + hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})" + + # 1. Price history and projections (log scale) + ax1 = plt.subplot(4, 1, 1) + + # Plot historical prices + ax1.semilogy(plot_df["Date"], plot_df["Close"], "b-", label="Historical Price") + + # Plot projections + ax1.semilogy( + projections.index, + projections["Expected_Trend"], + "--", + color="purple", + label="Expected Trend", + ) + ax1.semilogy( + projections.index, + projections["Median"], + ":", + color="green", + label="Simulated Median", + ) + ax1.fill_between( + projections.index, + projections["Lower_95"], + projections["Upper_95"], + alpha=0.2, + color="orange", + label="95% Confidence Interval", + ) + ax1.fill_between( + projections.index, + projections["Lower_68"], + projections["Upper_68"], + alpha=0.3, + color="green", + label="68% Confidence Interval", + ) + + # Customize y-axis + ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price)) + + # Set custom y-axis ticks at meaningful price points + min_price = min(plot_df["Low"].min(), projections["Lower_95"].min()) + max_price = max(plot_df["High"].max(), projections["Upper_95"].max()) + + price_points = get_nice_price_points(min_price, max_price) + ax1.set_yticks(price_points) + + # Adjust y-axis label properties + ax1.tick_params(axis="y", labelsize=8) # Smaller font size + + # Add some padding to prevent label cutoff + ax1.margins(y=0.02) + + # Adjust label padding to prevent overlap + ax1.yaxis.set_tick_params(pad=1) + + # Add grid lines with adjusted opacity + ax1.grid(True, which="major", linestyle="-", alpha=0.5) + ax1.grid(True, which="minor", linestyle=":", alpha=0.2) + + ax1.set_title("Bitcoin Price History and Projections (Log Scale)" + hist_date_range) + # Make legend font size smaller too for consistency + ax1.legend(fontsize=8) + + # 2. Rolling volatility + ax2 = plt.subplot(4, 1, 2) + ax2.plot( + plot_df["Date"], + plot_df["Rolling_Volatility_30d"], + "r-", + label="30-Day Rolling Volatility", + ) + ax2.set_title("30-Day Rolling Volatility (Annualized)" + hist_date_range) + ax2.set_xlabel("Date") + ax2.set_ylabel("Volatility") + ax2.grid(True) + ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: "{:.0%}".format(y))) + ax2.legend() + + # 3. Returns distribution + ax3 = plt.subplot(4, 1, 3) + returns_mean = plot_df["Daily_Return"].mean() + returns_std = plot_df["Daily_Return"].std() + filtered_returns = plot_df["Daily_Return"][ + (plot_df["Daily_Return"] > returns_mean - 5 * returns_std) + & (plot_df["Daily_Return"] < returns_mean + 5 * returns_std) + ] + + sns.histplot(filtered_returns, bins=100, ax=ax3) + ax3.set_title( + "Distribution of Daily Returns (Excluding Extreme Outliers)" + hist_date_range + ) + ax3.set_xlabel("Daily Return") + ax3.set_ylabel("Count") + ax3.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: "{:.0%}".format(x))) + + # Add a vertical line for mean return + ax3.axvline(filtered_returns.mean(), color="r", linestyle="dashed", linewidth=1) + ax3.text( + filtered_returns.mean(), + ax3.get_ylim()[1], + "Mean", + rotation=90, + va="top", + ha="right", + ) + + # 4. Projection ranges + ax4 = plt.subplot(4, 1, 4) + + # Calculate and plot price ranges at different future points + timepoints = np.array(range(30, 365, 30)) + timepoints = timepoints[timepoints <= project_days] + + ranges = [] + labels = [] + positions = [] + + for t in timepoints: + idx = t - 1 # Convert to 0-based index + ranges.extend( + [ + projections["Lower_95"].iloc[idx], + projections["Lower_68"].iloc[idx], + projections["Median"].iloc[idx], + projections["Upper_68"].iloc[idx], + projections["Upper_95"].iloc[idx], + ] + ) + labels.extend(["95% Lower", "68% Lower", "Median", "68% Upper", "95% Upper"]) + positions.extend([t] * 5) + + # Plot ranges (removed violin plot) + ax4.scatter(positions, ranges, alpha=0.6) + + # Add lines connecting the ranges + for t in timepoints: + idx = positions.index(t) + ax4.plot([t] * 5, ranges[idx : idx + 5], "k-", alpha=0.3) + + # Set log scale first + ax4.set_yscale("log") + + # Get the current order of magnitude for setting appropriate ticks + min_price = min(ranges) + max_price = max(ranges) + + # Create price points at regular intervals on log scale + log_min = np.floor(np.log10(min_price)) + log_max = np.ceil(np.log10(max_price)) + price_points = [] + for exp in range(int(log_min), int(log_max + 1)): + for mult in [1, 2, 5]: + point = mult * 10**exp + if min_price <= point <= max_price: + price_points.append(point) + + ax4.set_yticks(price_points) + + def price_formatter(x, p): + if x >= 1e6: + return f"${x/1e6:.1f}M" + if x >= 1e3: + return f"${x/1e3:.0f}K" + return f"${x:.0f}" + + # Apply formatter to major ticks + ax4.yaxis.set_major_formatter(plt.FuncFormatter(price_formatter)) + + # Customize the plot + ax4.set_title("Projected Price Ranges at Future Timepoints") + ax4.set_xlabel("Days Forward") + ax4.set_ylabel("Price (USD)") + ax4.grid(True, alpha=0.3) + + # Set x-axis to show only our timepoints + ax4.set_xticks(timepoints) + + # Adjust layout + plt.tight_layout() + + # Save the plot + start_str = start if start else plot_df["Date"].min().strftime("%Y-%m-%d") + end_str = end if end else plot_df["Date"].max().strftime("%Y-%m-%d") + filename = f"bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png" + plt.savefig(filename, dpi=300, bbox_inches="tight") + plt.close() + + return projections + + +def visualize_cycle_patterns(df, cycle_returns, cycle_volatility): + """ + Create enhanced visualization of Bitcoin's behavior across halving cycles. + """ + plt.style.use("seaborn-v0_8") + fig = plt.figure(figsize=(15, 15)) + + # Create a 3x1 subplot grid with different heights + gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3) + + # Plot 1: Returns across cycle with confidence bands + ax1 = plt.subplot(gs[0]) + + # Convert days to percentage through cycle + x_points = np.array(cycle_returns.index) / (4 * 365) * 100 + + # Calculate rolling mean and standard deviation for confidence bands + window = 30 # 30-day window + rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean() + rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std() + + # Plot confidence bands + ax1.fill_between( + x_points, + (rolling_mean - 2 * rolling_std) * 100, + (rolling_mean + 2 * rolling_std) * 100, + alpha=0.2, + color="blue", + label="95% Confidence", + ) + ax1.fill_between( + x_points, + (rolling_mean - rolling_std) * 100, + (rolling_mean + rolling_std) * 100, + alpha=0.3, + color="blue", + label="68% Confidence", + ) + + # Plot average returns + ax1.plot( + x_points, + cycle_returns.values * 100, + "b-", + label="Average Daily Return", + linewidth=2, + ) + ax1.axhline(y=0, color="gray", linestyle="--", alpha=0.5) + + # Add vertical lines for each year in cycle + for year in range(1, 4): + ax1.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3) + ax1.text( + year * 25, + ax1.get_ylim()[1], + f"Year {year}", + rotation=90, + va="top", + ha="right", + alpha=0.7, + ) + + # Highlight halving points + ax1.axvline(x=0, color="red", linestyle="--", alpha=0.5, label="Halving Event") + ax1.axvline(x=100, color="red", linestyle="--", alpha=0.5) + + ax1.set_title("Bitcoin Return Patterns Across Halving Cycle", pad=20) + ax1.set_xlabel("Position in Cycle (%)") + ax1.set_ylabel("Average Daily Return (%)") + ax1.grid(True, alpha=0.3) + ax1.legend(loc="upper right") + + # Plot 2: Volatility across cycle + ax2 = plt.subplot(gs[1]) + + # Calculate rolling volatility confidence bands + vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean() + vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std() + + # Plot volatility with confidence bands + annualized_factor = np.sqrt(365) * 100 + ax2.fill_between( + x_points, + (vol_mean - 2 * vol_std) * annualized_factor, + (vol_mean + 2 * vol_std) * annualized_factor, + alpha=0.2, + color="red", + label="95% Confidence", + ) + ax2.plot( + x_points, + cycle_volatility.values * annualized_factor, + "r-", + label="Annualized Volatility", + linewidth=2, + ) + + # Add year markers + for year in range(1, 4): + ax2.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3) + + ax2.axvline(x=0, color="red", linestyle="--", alpha=0.5) + ax2.axvline(x=100, color="red", linestyle="--", alpha=0.5) + + ax2.set_xlabel("Position in Cycle (%)") + ax2.set_ylabel("Volatility (%)") + ax2.grid(True, alpha=0.3) + ax2.legend(loc="upper right") + + # Plot 3: Average price trajectory within cycles + ax3 = plt.subplot(gs[2]) + + # Define a color scheme for cycles + cycle_colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"] + + # Calculate average price path for each cycle + halving_dates = get_halving_dates() + cycles = [] + + for i in range(len(halving_dates) - 1): + cycle_start = halving_dates[i] + cycle_end = halving_dates[i + 1] + cycle_data = df[(df["Date"] >= cycle_start) & (df["Date"] < cycle_end)].copy() + + if len(cycle_data) > 0: + cycle_data["Cycle_Pct"] = ( + (cycle_data["Date"] - cycle_start).dt.total_seconds() + / (cycle_end - cycle_start).total_seconds() + * 100 + ) + cycle_data["Normalized_Price"] = ( + cycle_data["Close"] / cycle_data["Close"].iloc[0] + ) + cycles.append(cycle_data) + + # Plot each historical cycle with distinct colors + for i, cycle in enumerate(cycles): + ax3.semilogy( + cycle["Cycle_Pct"], + cycle["Normalized_Price"], + color=cycle_colors[i], + alpha=0.7, + label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})', + ) + + # Calculate and plot average cycle + if cycles: + avg_cycle = pd.concat( + [c.set_index("Cycle_Pct")["Normalized_Price"] for c in cycles], axis=1 + ) + avg_cycle_mean = avg_cycle.mean(axis=1) + avg_cycle_std = avg_cycle.std(axis=1) + + ax3.semilogy( + avg_cycle_mean.index, + avg_cycle_mean.values, + "k-", + linewidth=2, + label="Average Cycle", + ) + ax3.fill_between( + avg_cycle_mean.index, + avg_cycle_mean * np.exp(-2 * avg_cycle_std), + avg_cycle_mean * np.exp(2 * avg_cycle_std), + alpha=0.2, + color="gray", + ) + + # Add year markers + for year in range(1, 4): + ax3.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3) + + ax3.axvline(x=0, color="red", linestyle="--", alpha=0.5) + ax3.axvline(x=100, color="red", linestyle="--", alpha=0.5) + + ax3.set_title("Price Performance Across Cycles (Normalized)", pad=20) + ax3.set_xlabel("Position in Cycle (%)") + ax3.set_ylabel("Price (Relative to Cycle Start)") + ax3.grid(True, alpha=0.3) + ax3.legend(loc="center left", bbox_to_anchor=(1.02, 0.5)) + + # Add current cycle position marker on all plots + current_position = get_cycle_position(df["Date"].max(), halving_dates) * 100 + for ax in [ax1, ax2, ax3]: + ax.axvline( + x=current_position, + color="green", + linestyle="-", + alpha=0.5, + label="Current Position", + ) + + # Main title for the figure + fig.suptitle("Bitcoin Halving Cycle Analysis", fontsize=16, y=0.95) + + # Adjust layout to prevent legend cutoff + plt.tight_layout() + + # Save the plot + plt.savefig("bitcoin_cycle_patterns.png", dpi=300, bbox_inches="tight") + plt.close() + if __name__ == "__main__": analysis, df = analyze_bitcoin_prices("prices.csv") - #create_plots(df) # Full history - #create_plots(df, start='2022-01-01') # From 2022 onwards - #create_plots(df, start='2023-01-01', end='2023-12-31') # Just 2023 + # create_plots(df) # Full history + # create_plots(df, start='2022-01-01') # From 2022 onwards + # create_plots(df, start='2023-01-01', end='2023-12-31') # Just 2023 # Create plots with different time ranges and projections - projections = create_plots(df, start='2011-01-01', project_days=365*4) + projections = create_plots(df, start="2011-01-01", project_days=365 * 4) print("\nProjected Prices at Key Points:") print(projections.iloc[[29, 89, 179, 364]].round(2)) # 30, 90, 180, 365 days - print_analysis(analysis)