bitcoin-model/model.py

809 lines
30 KiB
Python

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
def analyze_bitcoin_prices(csv_path):
"""
Analyze Bitcoin price data to calculate volatility and growth rates.
"""
# Read CSV with proper data types
df = pd.read_csv(csv_path, parse_dates=[0])
# Print first few rows of raw data to inspect
print("\nFirst few rows of raw data:")
print(df.head())
# Print data info to see types and non-null counts
print("\nDataset Info:")
print(df.info())
# Convert price columns to float and handle any potential formatting issues
price_columns = ['Price', 'Open', 'High', 'Low']
for col in price_columns:
# Remove any commas in numbers
df[col] = df[col].astype(str).str.replace(',', '')
df[col] = pd.to_numeric(df[col], errors='coerce')
# Rename columns for clarity
df.columns = ['Date', 'Close', 'Open', 'High', 'Low', 'Volume', 'Change']
# Sort by date in ascending order
df = df.sort_values('Date')
# Print summary statistics after conversion
print("\nPrice Summary After Conversion:")
print(df[['Close', 'Open', 'High', 'Low']].describe())
# Calculate daily returns
df['Daily_Return'] = df['Close'].pct_change()
# Print first few daily returns to verify calculation
print("\nFirst few daily returns:")
print(df[['Date', 'Close', 'Daily_Return']].head())
# Check for any infinite or NaN values
print("\nInfinite or NaN value counts:")
print(df.isna().sum())
# Calculate metrics using 365 days for annualization
analysis = {
'period_start': df['Date'].min().strftime('%Y-%m-%d'),
'period_end': df['Date'].max().strftime('%Y-%m-%d'),
'total_days': len(df),
'daily_volatility': df['Daily_Return'].std(),
'annualized_volatility': df['Daily_Return'].std() * np.sqrt(365),
'total_return': (df['Close'].iloc[-1] / df['Close'].iloc[0] - 1) * 100,
'average_daily_return': df['Daily_Return'].mean() * 100,
'average_annual_return': ((1 + df['Daily_Return'].mean()) ** 365 - 1) * 100,
'min_price': df['Low'].min(),
'max_price': df['High'].max(),
'avg_price': df['Close'].mean(),
'start_price': df['Close'].iloc[0],
'end_price': df['Close'].iloc[-1]
}
# Calculate rolling metrics
df['Rolling_Volatility_30d'] = df['Daily_Return'].rolling(window=30).std() * np.sqrt(365)
df['Rolling_Return_30d'] = df['Close'].pct_change(periods=30) * 100
return analysis, df
def visualize_cycle_patterns(df, cycle_returns, cycle_volatility):
"""
Create enhanced visualization of Bitcoin's behavior across halving cycles.
"""
plt.style.use('seaborn-v0_8')
fig = plt.figure(figsize=(15, 15))
# Create a 3x1 subplot grid with different heights
gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3)
# Plot 1: Returns across cycle with confidence bands
ax1 = plt.subplot(gs[0])
# Convert days to percentage through cycle
x_points = np.array(cycle_returns.index) / (4 * 365) * 100
# Calculate rolling mean and standard deviation for confidence bands
window = 30 # 30-day window
rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean()
rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std()
# Plot confidence bands
ax1.fill_between(x_points,
(rolling_mean - 2*rolling_std) * 100,
(rolling_mean + 2*rolling_std) * 100,
alpha=0.2, color='blue', label='95% Confidence')
ax1.fill_between(x_points,
(rolling_mean - rolling_std) * 100,
(rolling_mean + rolling_std) * 100,
alpha=0.3, color='blue', label='68% Confidence')
# Plot average returns
ax1.plot(x_points, cycle_returns.values * 100, 'b-',
label='Average Daily Return', linewidth=2)
ax1.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
# Add vertical lines for each year in cycle
for year in range(1, 4):
ax1.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
ax1.text(year*25, ax1.get_ylim()[1], f'Year {year}',
rotation=90, va='top', ha='right', alpha=0.7)
# Highlight halving points
ax1.axvline(x=0, color='red', linestyle='--', alpha=0.5, label='Halving Event')
ax1.axvline(x=100, color='red', linestyle='--', alpha=0.5)
ax1.set_title('Bitcoin Return Patterns Across Halving Cycle', pad=20)
ax1.set_xlabel('Position in Cycle (%)')
ax1.set_ylabel('Average Daily Return (%)')
ax1.grid(True, alpha=0.3)
ax1.legend(loc='upper right')
# Plot 2: Volatility across cycle
ax2 = plt.subplot(gs[1])
# Calculate rolling volatility confidence bands
vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean()
vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std()
# Plot volatility with confidence bands
annualized_factor = np.sqrt(365) * 100
ax2.fill_between(x_points,
(vol_mean - 2*vol_std) * annualized_factor,
(vol_mean + 2*vol_std) * annualized_factor,
alpha=0.2, color='red', label='95% Confidence')
ax2.plot(x_points, cycle_volatility.values * annualized_factor, 'r-',
label='Annualized Volatility', linewidth=2)
# Add year markers
for year in range(1, 4):
ax2.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
ax2.axvline(x=0, color='red', linestyle='--', alpha=0.5)
ax2.axvline(x=100, color='red', linestyle='--', alpha=0.5)
ax2.set_xlabel('Position in Cycle (%)')
ax2.set_ylabel('Volatility (%)')
ax2.grid(True, alpha=0.3)
ax2.legend(loc='upper right')
# Plot 3: Average price trajectory within cycles
ax3 = plt.subplot(gs[2])
# Define a color scheme for cycles
cycle_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
# Calculate average price path for each cycle
halving_dates = get_halving_dates()
cycles = []
for i in range(len(halving_dates)-1):
cycle_start = halving_dates[i]
cycle_end = halving_dates[i+1]
cycle_data = df[(df['Date'] >= cycle_start) & (df['Date'] < cycle_end)].copy()
if len(cycle_data) > 0:
cycle_data['Cycle_Pct'] = ((cycle_data['Date'] - cycle_start).dt.total_seconds() /
(cycle_end - cycle_start).total_seconds() * 100)
cycle_data['Normalized_Price'] = cycle_data['Close'] / cycle_data['Close'].iloc[0]
cycles.append(cycle_data)
# Plot each historical cycle with distinct colors
for i, cycle in enumerate(cycles):
ax3.semilogy(cycle['Cycle_Pct'], cycle['Normalized_Price'],
color=cycle_colors[i], alpha=0.7,
label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})')
# Calculate and plot average cycle
if cycles:
avg_cycle = pd.concat([c.set_index('Cycle_Pct')['Normalized_Price'] for c in cycles], axis=1)
avg_cycle_mean = avg_cycle.mean(axis=1)
avg_cycle_std = avg_cycle.std(axis=1)
ax3.semilogy(avg_cycle_mean.index, avg_cycle_mean.values, 'k-',
linewidth=2, label='Average Cycle')
ax3.fill_between(avg_cycle_mean.index,
avg_cycle_mean * np.exp(-2*avg_cycle_std),
avg_cycle_mean * np.exp(2*avg_cycle_std),
alpha=0.2, color='gray')
# Add year markers
for year in range(1, 4):
ax3.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
ax3.axvline(x=0, color='red', linestyle='--', alpha=0.5)
ax3.axvline(x=100, color='red', linestyle='--', alpha=0.5)
ax3.set_title('Price Performance Across Cycles (Normalized)', pad=20)
ax3.set_xlabel('Position in Cycle (%)')
ax3.set_ylabel('Price (Relative to Cycle Start)')
ax3.grid(True, alpha=0.3)
ax3.legend(loc='center left', bbox_to_anchor=(1.02, 0.5))
# Add current cycle position marker on all plots
current_position = get_cycle_position(df['Date'].max(), halving_dates) * 100
for ax in [ax1, ax2, ax3]:
ax.axvline(x=current_position, color='green', linestyle='-', alpha=0.5,
label='Current Position')
# Main title for the figure
fig.suptitle('Bitcoin Halving Cycle Analysis', fontsize=16, y=0.95)
# Adjust layout to prevent legend cutoff
plt.tight_layout()
# Save the plot
plt.savefig('bitcoin_cycle_patterns.png', dpi=300, bbox_inches='tight')
plt.close()
def create_plots(df, start=None, end=None, project_days=365):
"""
Create plots including historical data and future projections.
"""
# Filter data based on date range
mask = pd.Series(True, index=df.index)
if start:
mask &= df['Date'] >= pd.to_datetime(start)
if end:
mask &= df['Date'] <= pd.to_datetime(end)
plot_df = df[mask].copy()
if len(plot_df) == 0:
raise ValueError("No data found for the specified date range")
# Generate projections
cycle_returns, cycle_volatility = analyze_cycles_with_halvings(plot_df)
projections = project_prices_with_cycles(plot_df, days_forward=project_days)
# Create cycle visualization
visualize_cycle_patterns(plot_df, cycle_returns, cycle_volatility)
# Set up the style
plt.style.use('seaborn-v0_8')
# Create figure
fig = plt.figure(figsize=(15, 15))
# Date range for titles
hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})"
# 1. Price history and projections (log scale)
ax1 = plt.subplot(4, 1, 1)
# Plot historical prices
ax1.semilogy(plot_df['Date'], plot_df['Close'], 'b-', label='Historical Price')
# Plot projections
ax1.semilogy(projections.index, projections['Expected_Trend'], '--',
color='purple', label='Expected Trend')
ax1.semilogy(projections.index, projections['Median'], ':',
color='green', label='Simulated Median')
ax1.fill_between(projections.index,
projections['Lower_95'], projections['Upper_95'],
alpha=0.2, color='orange', label='95% Confidence Interval')
ax1.fill_between(projections.index,
projections['Lower_68'], projections['Upper_68'],
alpha=0.3, color='green', label='68% Confidence Interval')
# Customize y-axis
ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price))
# Set custom y-axis ticks at meaningful price points
min_price = min(plot_df['Low'].min(), projections['Lower_95'].min())
max_price = max(plot_df['High'].max(), projections['Upper_95'].max())
price_points = get_nice_price_points(min_price, max_price)
ax1.set_yticks(price_points)
# Adjust y-axis label properties
ax1.tick_params(axis='y', labelsize=8) # Smaller font size
# Add some padding to prevent label cutoff
ax1.margins(y=0.02)
# Adjust label padding to prevent overlap
ax1.yaxis.set_tick_params(pad=1)
# Add grid lines with adjusted opacity
ax1.grid(True, which='major', linestyle='-', alpha=0.5)
ax1.grid(True, which='minor', linestyle=':', alpha=0.2)
ax1.set_title('Bitcoin Price History and Projections (Log Scale)' + hist_date_range)
# Make legend font size smaller too for consistency
ax1.legend(fontsize=8)
# 2. Rolling volatility
ax2 = plt.subplot(4, 1, 2)
ax2.plot(plot_df['Date'], plot_df['Rolling_Volatility_30d'], 'r-', label='30-Day Rolling Volatility')
ax2.set_title('30-Day Rolling Volatility (Annualized)' + hist_date_range)
ax2.set_xlabel('Date')
ax2.set_ylabel('Volatility')
ax2.grid(True)
ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
ax2.legend()
# 3. Returns distribution
ax3 = plt.subplot(4, 1, 3)
returns_mean = plot_df['Daily_Return'].mean()
returns_std = plot_df['Daily_Return'].std()
filtered_returns = plot_df['Daily_Return'][
(plot_df['Daily_Return'] > returns_mean - 5 * returns_std) &
(plot_df['Daily_Return'] < returns_mean + 5 * returns_std)
]
sns.histplot(filtered_returns, bins=100, ax=ax3)
ax3.set_title('Distribution of Daily Returns (Excluding Extreme Outliers)' + hist_date_range)
ax3.set_xlabel('Daily Return')
ax3.set_ylabel('Count')
ax3.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))
# Add a vertical line for mean return
ax3.axvline(filtered_returns.mean(), color='r', linestyle='dashed', linewidth=1)
ax3.text(filtered_returns.mean(), ax3.get_ylim()[1], 'Mean',
rotation=90, va='top', ha='right')
# 4. Projection ranges
ax4 = plt.subplot(4, 1, 4)
# Calculate and plot price ranges at different future points
timepoints = np.array(range(30,365,30))
timepoints = timepoints[timepoints <= project_days]
ranges = []
labels = []
positions = []
for t in timepoints:
idx = t - 1 # Convert to 0-based index
ranges.extend([
projections['Lower_95'].iloc[idx],
projections['Lower_68'].iloc[idx],
projections['Median'].iloc[idx],
projections['Upper_68'].iloc[idx],
projections['Upper_95'].iloc[idx]
])
labels.extend([
'95% Lower',
'68% Lower',
'Median',
'68% Upper',
'95% Upper'
])
positions.extend([t] * 5)
# Plot ranges (removed violin plot)
ax4.scatter(positions, ranges, alpha=0.6)
# Add lines connecting the ranges
for t in timepoints:
idx = positions.index(t)
ax4.plot([t] * 5, ranges[idx:idx+5], 'k-', alpha=0.3)
# Set log scale first
ax4.set_yscale('log')
# Get the current order of magnitude for setting appropriate ticks
min_price = min(ranges)
max_price = max(ranges)
# Create price points at regular intervals on log scale
log_min = np.floor(np.log10(min_price))
log_max = np.ceil(np.log10(max_price))
price_points = []
for exp in range(int(log_min), int(log_max + 1)):
for mult in [1, 2, 5]:
point = mult * 10**exp
if min_price <= point <= max_price:
price_points.append(point)
ax4.set_yticks(price_points)
def price_formatter(x, p):
if x >= 1e6:
return f'${x/1e6:.1f}M'
if x >= 1e3:
return f'${x/1e3:.0f}K'
return f'${x:.0f}'
# Apply formatter to major ticks
ax4.yaxis.set_major_formatter(plt.FuncFormatter(price_formatter))
# Customize the plot
ax4.set_title('Projected Price Ranges at Future Timepoints')
ax4.set_xlabel('Days Forward')
ax4.set_ylabel('Price (USD)')
ax4.grid(True, alpha=0.3)
# Set x-axis to show only our timepoints
ax4.set_xticks(timepoints)
# Adjust layout
plt.tight_layout()
# Save the plot
start_str = start if start else plot_df['Date'].min().strftime('%Y-%m-%d')
end_str = end if end else plot_df['Date'].max().strftime('%Y-%m-%d')
filename = f'bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png'
plt.savefig(filename, dpi=300, bbox_inches='tight')
plt.close()
return projections
def analyze_cycles(df, cycle_period=4*365):
"""Analyze Bitcoin market cycles to understand return patterns"""
df = df.copy()
# Calculate rolling returns at different scales
df['Returns_30d'] = df['Close'].pct_change(periods=30)
df['Returns_90d'] = df['Close'].pct_change(periods=90)
df['Returns_365d'] = df['Close'].pct_change(periods=365)
# Calculate where we are in the supposed 4-year cycle
df['Days_From_Start'] = (df['Date'] - df['Date'].min()).dt.days
df['Cycle_Position'] = df['Days_From_Start'] % cycle_period
# Group by cycle position and calculate average returns
cycle_returns = df.groupby(df['Cycle_Position'])['Daily_Return'].mean()
cycle_volatility = df.groupby(df['Cycle_Position'])['Daily_Return'].std()
return cycle_returns, cycle_volatility
def get_halving_dates():
"""Return known and projected Bitcoin halving dates"""
return pd.to_datetime([
'2008-01-03', # Bitcoin genesis block (treat as cycle start)
'2012-11-28', # First halving
'2016-07-09', # Second halving
'2020-05-11', # Third halving
'2024-04-17', # Fourth halving (projected)
'2028-04-17', # Fifth halving (projected)
])
def get_cycle_position(date, halving_dates):
"""
Calculate position in halving cycle (0 to 1) for a given date.
0 represents a halving event, 1 represents just before the next halving.
"""
# Convert date to datetime if it's not already
date = pd.to_datetime(date)
# Find the most recent halving before this date
prev_halving = halving_dates[halving_dates <= date].max()
if pd.isna(prev_halving):
return 0.0 # For dates before first halving
# Find next halving
future_halvings = halving_dates[halving_dates > date]
if len(future_halvings) == 0:
# For dates after last known halving, use same cycle length as last known cycle
last_cycle_length = (halving_dates[-1] - halving_dates[-2]).days
days_since_halving = (date - halving_dates[-1]).days
return min(days_since_halving / last_cycle_length, 1.0)
next_halving = future_halvings.min()
# Calculate position as fraction between halvings
days_since_halving = (date - prev_halving).days
cycle_length = (next_halving - prev_halving).days
return min(days_since_halving / cycle_length, 1.0)
def analyze_cycles_with_halvings(df):
"""Analyze Bitcoin market cycles aligned with halving events"""
df = df.copy()
# Get halving dates
halving_dates = get_halving_dates()
# Calculate cycle position for each date
df['Cycle_Position'] = df['Date'].apply(
lambda x: get_cycle_position(x, halving_dates)
)
# Convert to days within cycle (0 to ~1460 days)
df['Cycle_Days'] = (df['Cycle_Position'] * 4 * 365).round().astype(int)
# Calculate returns at different scales
df['Returns_30d'] = df['Close'].pct_change(periods=30)
df['Returns_90d'] = df['Close'].pct_change(periods=90)
df['Returns_365d'] = df['Close'].pct_change(periods=365)
# Group by position in cycle and calculate average returns
cycle_returns = df.groupby(df['Cycle_Days'])['Daily_Return'].mean()
cycle_volatility = df.groupby(df['Cycle_Days'])['Daily_Return'].std()
# Smooth the cycle returns to reduce noise
from scipy.signal import savgol_filter
window = 91 # About 3 months
if len(cycle_returns) > window:
cycle_returns = pd.Series(
savgol_filter(cycle_returns, window, 3),
index=cycle_returns.index
)
return cycle_returns, cycle_volatility
def project_prices_with_cycles(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
"""
Project future Bitcoin prices using Monte Carlo simulation with halving-aligned cycles.
"""
# Analyze historical cycles
cycle_returns, cycle_volatility = analyze_cycles_with_halvings(df)
# Get current position in halving cycle
halving_dates = get_halving_dates()
current_date = df['Date'].max()
cycle_position = get_cycle_position(current_date, halving_dates)
current_cycle_days = int(cycle_position * 4 * 365)
# Current price (last known price)
last_price = df['Close'].iloc[-1]
last_date = df['Date'].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1),
periods=days_forward,
freq='D'
)
# Calculate expected returns for future dates based on cycle position
future_cycle_days = [
(current_cycle_days + i) % (4 * 365)
for i in range(days_forward)
]
expected_returns = np.array([
cycle_returns.get(day, cycle_returns.mean())
for day in future_cycle_days
])
# Calculate base volatility (recent)
recent_volatility = df['Daily_Return'].tail(90).std()
# Add long-term trend component (very gentle decay)
long_term_decay = 0.9 ** (np.arange(days_forward) / 365) # 10% reduction per year
expected_returns = expected_returns * long_term_decay
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using cycle-aware expected returns
returns = np.random.normal(
loc=expected_returns,
scale=recent_volatility,
size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results['Median'] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)
# Add expected trend line (without randomness)
results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns))
return results
def calculate_rolling_metrics(df, window=365):
"""Calculate rolling returns and volatility metrics"""
df = df.copy()
df['Rolling_Daily_Return'] = df['Daily_Return'].rolling(window=window).mean()
df['Rolling_Daily_Volatility'] = df['Daily_Return'].rolling(window=window).std()
return df
def fit_return_trend(df):
"""Fit an exponential decay trend to the rolling returns"""
# Calculate days from start
df = df.copy()
df['Days'] = (df['Date'] - df['Date'].min()).dt.days
# Calculate rolling metrics
df = calculate_rolling_metrics(df)
# Remove NaN values for fitting
clean_data = df.dropna()
# Fit exponential decay: y = a * exp(-bx) + c
from scipy.optimize import curve_fit
def exp_decay(x, a, b, c):
return a * np.exp(-b * x) + c
popt, _ = curve_fit(
exp_decay,
clean_data['Days'],
clean_data['Rolling_Daily_Return'],
p0=[0.01, 0.001, 0.0001], # Initial guess for parameters
bounds=([0, 0, 0], [1, 1, 0.01]) # Constraints to keep parameters positive
)
return popt
def project_prices_with_trend(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
"""
Project future Bitcoin prices using Monte Carlo simulation with trend adjustment.
"""
# Fit return trend
trend_params = fit_return_trend(df)
# Calculate days from start for projection
days_from_start = (df['Date'].max() - df['Date'].min()).days
# Current price (last known price)
last_price = df['Close'].iloc[-1]
last_date = df['Date'].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1),
periods=days_forward,
freq='D'
)
# Calculate expected returns for future dates using fitted trend
def exp_decay(x, a, b, c):
return a * np.exp(-b * x) + c
future_days = np.arange(days_from_start + 1, days_from_start + days_forward + 1)
expected_returns = exp_decay(future_days, *trend_params)
# Use recent volatility for projections
recent_volatility = df['Daily_Return'].tail(365).std()
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using trending expected return
returns = np.random.normal(
loc=expected_returns,
scale=recent_volatility,
size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results['Median'] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)
# Add expected trend line (without randomness)
results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns))
return results
def get_nice_price_points(min_price, max_price):
"""
Generate a reasonable set of price points for the y-axis that look clean
and cover the range without cluttering the chart.
"""
log_min = np.floor(np.log10(min_price))
log_max = np.ceil(np.log10(max_price))
price_points = []
# For very large ranges (spanning more than 4 orders of magnitude),
# only use powers of 10 and mid-points
if log_max - log_min > 4:
for exp in range(int(log_min), int(log_max + 1)):
base = 10**exp
# Add main power of 10
if min_price <= base <= max_price:
price_points.append(base)
# Add mid-point if range is large enough
if min_price <= base * 5 <= max_price and exp > log_min:
price_points.append(base * 5)
else:
# For smaller ranges, use 1, 2, 5 sequence
for exp in range(int(log_min), int(log_max + 1)):
for mult in [1, 2, 5]:
point = mult * 10**exp
if min_price <= point <= max_price:
price_points.append(point)
return np.array(price_points)
def format_price(x, p):
"""Format large numbers in K, M, B format with appropriate precision"""
if abs(x) >= 1e9:
return f'${x/1e9:.1f}B'
if abs(x) >= 1e6:
return f'${x/1e6:.1f}M'
if abs(x) >= 1e3:
return f'${x/1e3:.1f}K'
if abs(x) >= 1:
return f'${x:.0f}'
return f'${x:.2f}' # For values less than $1, show cents
def project_prices(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
"""
Project future Bitcoin prices using Monte Carlo simulation.
Parameters:
df: DataFrame with historical price data
days_forward: Number of days to project forward
simulations: Number of Monte Carlo simulations to run
confidence_levels: List of confidence levels for the projection intervals
Returns:
DataFrame with projection results
"""
# Calculate daily return parameters
daily_return = df['Daily_Return'].mean()
daily_volatility = df['Daily_Return'].std()
# Current price (last known price)
last_price = df['Close'].iloc[-1]
last_date = df['Date'].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1),
periods=days_forward,
freq='D'
)
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using historical parameters
returns = np.random.normal(
loc=daily_return,
scale=daily_volatility,
size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results['Median'] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)
return results
def print_analysis(analysis):
print(f"\nBitcoin Price Analysis ({analysis['period_start']} to {analysis['period_end']})")
print("-" * 50)
print(f"Total Days Analyzed: {analysis['total_days']}")
print(f"\nPrice Range:")
print(f"Starting Price: ${analysis['start_price']:,.2f}")
print(f"Ending Price: ${analysis['end_price']:,.2f}")
print(f"Minimum Price: ${analysis['min_price']:,.2f}")
print(f"Maximum Price: ${analysis['max_price']:,.2f}")
print(f"Average Price: ${analysis['avg_price']:,.2f}")
print(f"\nVolatility Metrics:")
print(f"Daily Volatility: {analysis['daily_volatility']:.2%}")
print(f"Annualized Volatility: {analysis['annualized_volatility']:.2%}")
print(f"\nReturn Metrics:")
print(f"Total Return: {analysis['total_return']:,.2f}%")
print(f"Average Daily Return: {analysis['average_daily_return']:.2f}%")
print(f"Average Annual Return: {analysis['average_annual_return']:,.2f}%")
if __name__ == "__main__":
analysis, df = analyze_bitcoin_prices("prices.csv")
#create_plots(df) # Full history
#create_plots(df, start='2022-01-01') # From 2022 onwards
#create_plots(df, start='2023-01-01', end='2023-12-31') # Just 2023
# Create plots with different time ranges and projections
projections = create_plots(df, start='2011-01-01', project_days=365*4)
print("\nProjected Prices at Key Points:")
print(projections.iloc[[29, 89, 179, 364]].round(2)) # 30, 90, 180, 365 days
print_analysis(analysis)