bitcoin-model/model.py

809 lines
30 KiB
Python
Raw Normal View History

2024-11-15 00:53:12 +00:00
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
def analyze_bitcoin_prices(csv_path):
"""
Analyze Bitcoin price data to calculate volatility and growth rates.
"""
# Read CSV with proper data types
df = pd.read_csv(csv_path, parse_dates=[0])
# Print first few rows of raw data to inspect
print("\nFirst few rows of raw data:")
print(df.head())
# Print data info to see types and non-null counts
print("\nDataset Info:")
print(df.info())
# Convert price columns to float and handle any potential formatting issues
price_columns = ['Price', 'Open', 'High', 'Low']
for col in price_columns:
# Remove any commas in numbers
df[col] = df[col].astype(str).str.replace(',', '')
df[col] = pd.to_numeric(df[col], errors='coerce')
# Rename columns for clarity
df.columns = ['Date', 'Close', 'Open', 'High', 'Low', 'Volume', 'Change']
# Sort by date in ascending order
df = df.sort_values('Date')
# Print summary statistics after conversion
print("\nPrice Summary After Conversion:")
print(df[['Close', 'Open', 'High', 'Low']].describe())
# Calculate daily returns
df['Daily_Return'] = df['Close'].pct_change()
# Print first few daily returns to verify calculation
print("\nFirst few daily returns:")
print(df[['Date', 'Close', 'Daily_Return']].head())
# Check for any infinite or NaN values
print("\nInfinite or NaN value counts:")
print(df.isna().sum())
# Calculate metrics using 365 days for annualization
analysis = {
'period_start': df['Date'].min().strftime('%Y-%m-%d'),
'period_end': df['Date'].max().strftime('%Y-%m-%d'),
'total_days': len(df),
'daily_volatility': df['Daily_Return'].std(),
'annualized_volatility': df['Daily_Return'].std() * np.sqrt(365),
'total_return': (df['Close'].iloc[-1] / df['Close'].iloc[0] - 1) * 100,
'average_daily_return': df['Daily_Return'].mean() * 100,
'average_annual_return': ((1 + df['Daily_Return'].mean()) ** 365 - 1) * 100,
'min_price': df['Low'].min(),
'max_price': df['High'].max(),
'avg_price': df['Close'].mean(),
'start_price': df['Close'].iloc[0],
'end_price': df['Close'].iloc[-1]
}
# Calculate rolling metrics
df['Rolling_Volatility_30d'] = df['Daily_Return'].rolling(window=30).std() * np.sqrt(365)
df['Rolling_Return_30d'] = df['Close'].pct_change(periods=30) * 100
return analysis, df
def visualize_cycle_patterns(df, cycle_returns, cycle_volatility):
"""
Create enhanced visualization of Bitcoin's behavior across halving cycles.
"""
plt.style.use('seaborn-v0_8')
fig = plt.figure(figsize=(15, 15))
# Create a 3x1 subplot grid with different heights
gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3)
# Plot 1: Returns across cycle with confidence bands
ax1 = plt.subplot(gs[0])
# Convert days to percentage through cycle
x_points = np.array(cycle_returns.index) / (4 * 365) * 100
# Calculate rolling mean and standard deviation for confidence bands
window = 30 # 30-day window
rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean()
rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std()
# Plot confidence bands
ax1.fill_between(x_points,
(rolling_mean - 2*rolling_std) * 100,
(rolling_mean + 2*rolling_std) * 100,
alpha=0.2, color='blue', label='95% Confidence')
ax1.fill_between(x_points,
(rolling_mean - rolling_std) * 100,
(rolling_mean + rolling_std) * 100,
alpha=0.3, color='blue', label='68% Confidence')
# Plot average returns
ax1.plot(x_points, cycle_returns.values * 100, 'b-',
label='Average Daily Return', linewidth=2)
ax1.axhline(y=0, color='gray', linestyle='--', alpha=0.5)
# Add vertical lines for each year in cycle
for year in range(1, 4):
ax1.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
ax1.text(year*25, ax1.get_ylim()[1], f'Year {year}',
rotation=90, va='top', ha='right', alpha=0.7)
# Highlight halving points
ax1.axvline(x=0, color='red', linestyle='--', alpha=0.5, label='Halving Event')
ax1.axvline(x=100, color='red', linestyle='--', alpha=0.5)
ax1.set_title('Bitcoin Return Patterns Across Halving Cycle', pad=20)
ax1.set_xlabel('Position in Cycle (%)')
ax1.set_ylabel('Average Daily Return (%)')
ax1.grid(True, alpha=0.3)
ax1.legend(loc='upper right')
# Plot 2: Volatility across cycle
ax2 = plt.subplot(gs[1])
# Calculate rolling volatility confidence bands
vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean()
vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std()
# Plot volatility with confidence bands
annualized_factor = np.sqrt(365) * 100
ax2.fill_between(x_points,
(vol_mean - 2*vol_std) * annualized_factor,
(vol_mean + 2*vol_std) * annualized_factor,
alpha=0.2, color='red', label='95% Confidence')
ax2.plot(x_points, cycle_volatility.values * annualized_factor, 'r-',
label='Annualized Volatility', linewidth=2)
# Add year markers
for year in range(1, 4):
ax2.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
ax2.axvline(x=0, color='red', linestyle='--', alpha=0.5)
ax2.axvline(x=100, color='red', linestyle='--', alpha=0.5)
ax2.set_xlabel('Position in Cycle (%)')
ax2.set_ylabel('Volatility (%)')
ax2.grid(True, alpha=0.3)
ax2.legend(loc='upper right')
# Plot 3: Average price trajectory within cycles
ax3 = plt.subplot(gs[2])
# Define a color scheme for cycles
cycle_colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
# Calculate average price path for each cycle
halving_dates = get_halving_dates()
cycles = []
for i in range(len(halving_dates)-1):
cycle_start = halving_dates[i]
cycle_end = halving_dates[i+1]
cycle_data = df[(df['Date'] >= cycle_start) & (df['Date'] < cycle_end)].copy()
if len(cycle_data) > 0:
cycle_data['Cycle_Pct'] = ((cycle_data['Date'] - cycle_start).dt.total_seconds() /
(cycle_end - cycle_start).total_seconds() * 100)
cycle_data['Normalized_Price'] = cycle_data['Close'] / cycle_data['Close'].iloc[0]
cycles.append(cycle_data)
# Plot each historical cycle with distinct colors
for i, cycle in enumerate(cycles):
ax3.semilogy(cycle['Cycle_Pct'], cycle['Normalized_Price'],
color=cycle_colors[i], alpha=0.7,
label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})')
# Calculate and plot average cycle
if cycles:
avg_cycle = pd.concat([c.set_index('Cycle_Pct')['Normalized_Price'] for c in cycles], axis=1)
avg_cycle_mean = avg_cycle.mean(axis=1)
avg_cycle_std = avg_cycle.std(axis=1)
ax3.semilogy(avg_cycle_mean.index, avg_cycle_mean.values, 'k-',
linewidth=2, label='Average Cycle')
ax3.fill_between(avg_cycle_mean.index,
avg_cycle_mean * np.exp(-2*avg_cycle_std),
avg_cycle_mean * np.exp(2*avg_cycle_std),
alpha=0.2, color='gray')
# Add year markers
for year in range(1, 4):
ax3.axvline(x=year*25, color='gray', linestyle=':', alpha=0.3)
ax3.axvline(x=0, color='red', linestyle='--', alpha=0.5)
ax3.axvline(x=100, color='red', linestyle='--', alpha=0.5)
ax3.set_title('Price Performance Across Cycles (Normalized)', pad=20)
ax3.set_xlabel('Position in Cycle (%)')
ax3.set_ylabel('Price (Relative to Cycle Start)')
ax3.grid(True, alpha=0.3)
ax3.legend(loc='center left', bbox_to_anchor=(1.02, 0.5))
# Add current cycle position marker on all plots
current_position = get_cycle_position(df['Date'].max(), halving_dates) * 100
for ax in [ax1, ax2, ax3]:
ax.axvline(x=current_position, color='green', linestyle='-', alpha=0.5,
label='Current Position')
# Main title for the figure
fig.suptitle('Bitcoin Halving Cycle Analysis', fontsize=16, y=0.95)
# Adjust layout to prevent legend cutoff
plt.tight_layout()
# Save the plot
plt.savefig('bitcoin_cycle_patterns.png', dpi=300, bbox_inches='tight')
plt.close()
def create_plots(df, start=None, end=None, project_days=365):
"""
Create plots including historical data and future projections.
"""
# Filter data based on date range
mask = pd.Series(True, index=df.index)
if start:
mask &= df['Date'] >= pd.to_datetime(start)
if end:
mask &= df['Date'] <= pd.to_datetime(end)
plot_df = df[mask].copy()
if len(plot_df) == 0:
raise ValueError("No data found for the specified date range")
# Generate projections
cycle_returns, cycle_volatility = analyze_cycles_with_halvings(plot_df)
projections = project_prices_with_cycles(plot_df, days_forward=project_days)
# Create cycle visualization
visualize_cycle_patterns(plot_df, cycle_returns, cycle_volatility)
# Set up the style
plt.style.use('seaborn-v0_8')
# Create figure
fig = plt.figure(figsize=(15, 15))
# Date range for titles
hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})"
# 1. Price history and projections (log scale)
ax1 = plt.subplot(4, 1, 1)
# Plot historical prices
ax1.semilogy(plot_df['Date'], plot_df['Close'], 'b-', label='Historical Price')
# Plot projections
ax1.semilogy(projections.index, projections['Expected_Trend'], '--',
color='purple', label='Expected Trend')
ax1.semilogy(projections.index, projections['Median'], ':',
color='green', label='Simulated Median')
ax1.fill_between(projections.index,
projections['Lower_95'], projections['Upper_95'],
alpha=0.2, color='orange', label='95% Confidence Interval')
ax1.fill_between(projections.index,
projections['Lower_68'], projections['Upper_68'],
alpha=0.3, color='green', label='68% Confidence Interval')
# Customize y-axis
ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price))
# Set custom y-axis ticks at meaningful price points
min_price = min(plot_df['Low'].min(), projections['Lower_95'].min())
max_price = max(plot_df['High'].max(), projections['Upper_95'].max())
price_points = get_nice_price_points(min_price, max_price)
ax1.set_yticks(price_points)
# Adjust y-axis label properties
ax1.tick_params(axis='y', labelsize=8) # Smaller font size
# Add some padding to prevent label cutoff
ax1.margins(y=0.02)
# Adjust label padding to prevent overlap
ax1.yaxis.set_tick_params(pad=1)
# Add grid lines with adjusted opacity
ax1.grid(True, which='major', linestyle='-', alpha=0.5)
ax1.grid(True, which='minor', linestyle=':', alpha=0.2)
ax1.set_title('Bitcoin Price History and Projections (Log Scale)' + hist_date_range)
# Make legend font size smaller too for consistency
ax1.legend(fontsize=8)
# 2. Rolling volatility
ax2 = plt.subplot(4, 1, 2)
ax2.plot(plot_df['Date'], plot_df['Rolling_Volatility_30d'], 'r-', label='30-Day Rolling Volatility')
ax2.set_title('30-Day Rolling Volatility (Annualized)' + hist_date_range)
ax2.set_xlabel('Date')
ax2.set_ylabel('Volatility')
ax2.grid(True)
ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: '{:.0%}'.format(y)))
ax2.legend()
# 3. Returns distribution
ax3 = plt.subplot(4, 1, 3)
returns_mean = plot_df['Daily_Return'].mean()
returns_std = plot_df['Daily_Return'].std()
filtered_returns = plot_df['Daily_Return'][
(plot_df['Daily_Return'] > returns_mean - 5 * returns_std) &
(plot_df['Daily_Return'] < returns_mean + 5 * returns_std)
]
sns.histplot(filtered_returns, bins=100, ax=ax3)
ax3.set_title('Distribution of Daily Returns (Excluding Extreme Outliers)' + hist_date_range)
ax3.set_xlabel('Daily Return')
ax3.set_ylabel('Count')
ax3.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))
# Add a vertical line for mean return
ax3.axvline(filtered_returns.mean(), color='r', linestyle='dashed', linewidth=1)
ax3.text(filtered_returns.mean(), ax3.get_ylim()[1], 'Mean',
rotation=90, va='top', ha='right')
# 4. Projection ranges
ax4 = plt.subplot(4, 1, 4)
# Calculate and plot price ranges at different future points
timepoints = np.array([30, 90, 180, 365])
timepoints = timepoints[timepoints <= project_days]
ranges = []
labels = []
positions = []
for t in timepoints:
idx = t - 1 # Convert to 0-based index
ranges.extend([
projections['Lower_95'].iloc[idx],
projections['Lower_68'].iloc[idx],
projections['Median'].iloc[idx],
projections['Upper_68'].iloc[idx],
projections['Upper_95'].iloc[idx]
])
labels.extend([
'95% Lower',
'68% Lower',
'Median',
'68% Upper',
'95% Upper'
])
positions.extend([t] * 5)
# Plot ranges (removed violin plot)
ax4.scatter(positions, ranges, alpha=0.6)
# Add lines connecting the ranges
for t in timepoints:
idx = positions.index(t)
ax4.plot([t] * 5, ranges[idx:idx+5], 'k-', alpha=0.3)
# Set log scale first
ax4.set_yscale('log')
# Get the current order of magnitude for setting appropriate ticks
min_price = min(ranges)
max_price = max(ranges)
# Create price points at regular intervals on log scale
log_min = np.floor(np.log10(min_price))
log_max = np.ceil(np.log10(max_price))
price_points = []
for exp in range(int(log_min), int(log_max + 1)):
for mult in [1, 2, 5]:
point = mult * 10**exp
if min_price <= point <= max_price:
price_points.append(point)
ax4.set_yticks(price_points)
def price_formatter(x, p):
if x >= 1e6:
return f'${x/1e6:.1f}M'
if x >= 1e3:
return f'${x/1e3:.0f}K'
return f'${x:.0f}'
# Apply formatter to major ticks
ax4.yaxis.set_major_formatter(plt.FuncFormatter(price_formatter))
# Customize the plot
ax4.set_title('Projected Price Ranges at Future Timepoints')
ax4.set_xlabel('Days Forward')
ax4.set_ylabel('Price (USD)')
ax4.grid(True, alpha=0.3)
# Set x-axis to show only our timepoints
ax4.set_xticks(timepoints)
# Adjust layout
plt.tight_layout()
# Save the plot
start_str = start if start else plot_df['Date'].min().strftime('%Y-%m-%d')
end_str = end if end else plot_df['Date'].max().strftime('%Y-%m-%d')
filename = f'bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png'
plt.savefig(filename, dpi=300, bbox_inches='tight')
plt.close()
return projections
def analyze_cycles(df, cycle_period=4*365):
"""Analyze Bitcoin market cycles to understand return patterns"""
df = df.copy()
# Calculate rolling returns at different scales
df['Returns_30d'] = df['Close'].pct_change(periods=30)
df['Returns_90d'] = df['Close'].pct_change(periods=90)
df['Returns_365d'] = df['Close'].pct_change(periods=365)
# Calculate where we are in the supposed 4-year cycle
df['Days_From_Start'] = (df['Date'] - df['Date'].min()).dt.days
df['Cycle_Position'] = df['Days_From_Start'] % cycle_period
# Group by cycle position and calculate average returns
cycle_returns = df.groupby(df['Cycle_Position'])['Daily_Return'].mean()
cycle_volatility = df.groupby(df['Cycle_Position'])['Daily_Return'].std()
return cycle_returns, cycle_volatility
def get_halving_dates():
"""Return known and projected Bitcoin halving dates"""
return pd.to_datetime([
'2008-01-03', # Bitcoin genesis block (treat as cycle start)
'2012-11-28', # First halving
'2016-07-09', # Second halving
'2020-05-11', # Third halving
'2024-04-17', # Fourth halving (projected)
'2028-04-17', # Fifth halving (projected)
])
def get_cycle_position(date, halving_dates):
"""
Calculate position in halving cycle (0 to 1) for a given date.
0 represents a halving event, 1 represents just before the next halving.
"""
# Convert date to datetime if it's not already
date = pd.to_datetime(date)
# Find the most recent halving before this date
prev_halving = halving_dates[halving_dates <= date].max()
if pd.isna(prev_halving):
return 0.0 # For dates before first halving
# Find next halving
future_halvings = halving_dates[halving_dates > date]
if len(future_halvings) == 0:
# For dates after last known halving, use same cycle length as last known cycle
last_cycle_length = (halving_dates[-1] - halving_dates[-2]).days
days_since_halving = (date - halving_dates[-1]).days
return min(days_since_halving / last_cycle_length, 1.0)
next_halving = future_halvings.min()
# Calculate position as fraction between halvings
days_since_halving = (date - prev_halving).days
cycle_length = (next_halving - prev_halving).days
return min(days_since_halving / cycle_length, 1.0)
def analyze_cycles_with_halvings(df):
"""Analyze Bitcoin market cycles aligned with halving events"""
df = df.copy()
# Get halving dates
halving_dates = get_halving_dates()
# Calculate cycle position for each date
df['Cycle_Position'] = df['Date'].apply(
lambda x: get_cycle_position(x, halving_dates)
)
# Convert to days within cycle (0 to ~1460 days)
df['Cycle_Days'] = (df['Cycle_Position'] * 4 * 365).round().astype(int)
# Calculate returns at different scales
df['Returns_30d'] = df['Close'].pct_change(periods=30)
df['Returns_90d'] = df['Close'].pct_change(periods=90)
df['Returns_365d'] = df['Close'].pct_change(periods=365)
# Group by position in cycle and calculate average returns
cycle_returns = df.groupby(df['Cycle_Days'])['Daily_Return'].mean()
cycle_volatility = df.groupby(df['Cycle_Days'])['Daily_Return'].std()
# Smooth the cycle returns to reduce noise
from scipy.signal import savgol_filter
window = 91 # About 3 months
if len(cycle_returns) > window:
cycle_returns = pd.Series(
savgol_filter(cycle_returns, window, 3),
index=cycle_returns.index
)
return cycle_returns, cycle_volatility
def project_prices_with_cycles(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
"""
Project future Bitcoin prices using Monte Carlo simulation with halving-aligned cycles.
"""
# Analyze historical cycles
cycle_returns, cycle_volatility = analyze_cycles_with_halvings(df)
# Get current position in halving cycle
halving_dates = get_halving_dates()
current_date = df['Date'].max()
cycle_position = get_cycle_position(current_date, halving_dates)
current_cycle_days = int(cycle_position * 4 * 365)
# Current price (last known price)
last_price = df['Close'].iloc[-1]
last_date = df['Date'].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1),
periods=days_forward,
freq='D'
)
# Calculate expected returns for future dates based on cycle position
future_cycle_days = [
(current_cycle_days + i) % (4 * 365)
for i in range(days_forward)
]
expected_returns = np.array([
cycle_returns.get(day, cycle_returns.mean())
for day in future_cycle_days
])
# Calculate base volatility (recent)
recent_volatility = df['Daily_Return'].tail(90).std()
# Add long-term trend component (very gentle decay)
long_term_decay = 0.9 ** (np.arange(days_forward) / 365) # 10% reduction per year
expected_returns = expected_returns * long_term_decay
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using cycle-aware expected returns
returns = np.random.normal(
loc=expected_returns,
scale=recent_volatility,
size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results['Median'] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)
# Add expected trend line (without randomness)
results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns))
return results
def calculate_rolling_metrics(df, window=365):
"""Calculate rolling returns and volatility metrics"""
df = df.copy()
df['Rolling_Daily_Return'] = df['Daily_Return'].rolling(window=window).mean()
df['Rolling_Daily_Volatility'] = df['Daily_Return'].rolling(window=window).std()
return df
def fit_return_trend(df):
"""Fit an exponential decay trend to the rolling returns"""
# Calculate days from start
df = df.copy()
df['Days'] = (df['Date'] - df['Date'].min()).dt.days
# Calculate rolling metrics
df = calculate_rolling_metrics(df)
# Remove NaN values for fitting
clean_data = df.dropna()
# Fit exponential decay: y = a * exp(-bx) + c
from scipy.optimize import curve_fit
def exp_decay(x, a, b, c):
return a * np.exp(-b * x) + c
popt, _ = curve_fit(
exp_decay,
clean_data['Days'],
clean_data['Rolling_Daily_Return'],
p0=[0.01, 0.001, 0.0001], # Initial guess for parameters
bounds=([0, 0, 0], [1, 1, 0.01]) # Constraints to keep parameters positive
)
return popt
def project_prices_with_trend(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
"""
Project future Bitcoin prices using Monte Carlo simulation with trend adjustment.
"""
# Fit return trend
trend_params = fit_return_trend(df)
# Calculate days from start for projection
days_from_start = (df['Date'].max() - df['Date'].min()).days
# Current price (last known price)
last_price = df['Close'].iloc[-1]
last_date = df['Date'].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1),
periods=days_forward,
freq='D'
)
# Calculate expected returns for future dates using fitted trend
def exp_decay(x, a, b, c):
return a * np.exp(-b * x) + c
future_days = np.arange(days_from_start + 1, days_from_start + days_forward + 1)
expected_returns = exp_decay(future_days, *trend_params)
# Use recent volatility for projections
recent_volatility = df['Daily_Return'].tail(365).std()
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using trending expected return
returns = np.random.normal(
loc=expected_returns,
scale=recent_volatility,
size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results['Median'] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)
# Add expected trend line (without randomness)
results['Expected_Trend'] = last_price * np.exp(np.cumsum(expected_returns))
return results
def get_nice_price_points(min_price, max_price):
"""
Generate a reasonable set of price points for the y-axis that look clean
and cover the range without cluttering the chart.
"""
log_min = np.floor(np.log10(min_price))
log_max = np.ceil(np.log10(max_price))
price_points = []
# For very large ranges (spanning more than 4 orders of magnitude),
# only use powers of 10 and mid-points
if log_max - log_min > 4:
for exp in range(int(log_min), int(log_max + 1)):
base = 10**exp
# Add main power of 10
if min_price <= base <= max_price:
price_points.append(base)
# Add mid-point if range is large enough
if min_price <= base * 5 <= max_price and exp > log_min:
price_points.append(base * 5)
else:
# For smaller ranges, use 1, 2, 5 sequence
for exp in range(int(log_min), int(log_max + 1)):
for mult in [1, 2, 5]:
point = mult * 10**exp
if min_price <= point <= max_price:
price_points.append(point)
return np.array(price_points)
def format_price(x, p):
"""Format large numbers in K, M, B format with appropriate precision"""
if abs(x) >= 1e9:
return f'${x/1e9:.1f}B'
if abs(x) >= 1e6:
return f'${x/1e6:.1f}M'
if abs(x) >= 1e3:
return f'${x/1e3:.1f}K'
if abs(x) >= 1:
return f'${x:.0f}'
return f'${x:.2f}' # For values less than $1, show cents
def project_prices(df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]):
"""
Project future Bitcoin prices using Monte Carlo simulation.
Parameters:
df: DataFrame with historical price data
days_forward: Number of days to project forward
simulations: Number of Monte Carlo simulations to run
confidence_levels: List of confidence levels for the projection intervals
Returns:
DataFrame with projection results
"""
# Calculate daily return parameters
daily_return = df['Daily_Return'].mean()
daily_volatility = df['Daily_Return'].std()
# Current price (last known price)
last_price = df['Close'].iloc[-1]
last_date = df['Date'].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1),
periods=days_forward,
freq='D'
)
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using historical parameters
returns = np.random.normal(
loc=daily_return,
scale=daily_volatility,
size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results['Median'] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f'Lower_{int(level*100)}'] = np.percentile(simulated_paths, lower_percentile, axis=1)
results[f'Upper_{int(level*100)}'] = np.percentile(simulated_paths, upper_percentile, axis=1)
return results
def print_analysis(analysis):
print(f"\nBitcoin Price Analysis ({analysis['period_start']} to {analysis['period_end']})")
print("-" * 50)
print(f"Total Days Analyzed: {analysis['total_days']}")
print(f"\nPrice Range:")
print(f"Starting Price: ${analysis['start_price']:,.2f}")
print(f"Ending Price: ${analysis['end_price']:,.2f}")
print(f"Minimum Price: ${analysis['min_price']:,.2f}")
print(f"Maximum Price: ${analysis['max_price']:,.2f}")
print(f"Average Price: ${analysis['avg_price']:,.2f}")
print(f"\nVolatility Metrics:")
print(f"Daily Volatility: {analysis['daily_volatility']:.2%}")
print(f"Annualized Volatility: {analysis['annualized_volatility']:.2%}")
print(f"\nReturn Metrics:")
print(f"Total Return: {analysis['total_return']:,.2f}%")
print(f"Average Daily Return: {analysis['average_daily_return']:.2f}%")
print(f"Average Annual Return: {analysis['average_annual_return']:,.2f}%")
if __name__ == "__main__":
analysis, df = analyze_bitcoin_prices("prices.csv")
#create_plots(df) # Full history
#create_plots(df, start='2022-01-01') # From 2022 onwards
#create_plots(df, start='2023-01-01', end='2023-12-31') # Just 2023
# Create plots with different time ranges and projections
projections = create_plots(df, start='2011-01-01', project_days=365*4)
print("\nProjected Prices at Key Points:")
print(projections.iloc[[29, 89, 179, 364]].round(2)) # 30, 90, 180, 365 days
print_analysis(analysis)