bitcoin-model/model.py
Sam Fredrickson 5fa358e6ae Clean up the code.
- Re-order functions.
- Remove older projection methods.
- Format with black.
2024-11-14 20:05:27 -08:00

720 lines
23 KiB
Python

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
# Utility functions
def get_halving_dates():
"""Return known and projected Bitcoin halving dates"""
return pd.to_datetime(
[
"2008-01-03", # Bitcoin genesis block (treat as cycle start)
"2012-11-28", # First halving
"2016-07-09", # Second halving
"2020-05-11", # Third halving
"2024-04-19", # Fourth halving
"2028-04-20", # Fifth halving (projected)
]
)
def get_cycle_position(date, halving_dates):
"""
Calculate position in halving cycle (0 to 1) for a given date.
0 represents a halving event, 1 represents just before the next halving.
"""
# Convert date to datetime if it's not already
date = pd.to_datetime(date)
# Find the most recent halving before this date
prev_halving = halving_dates[halving_dates <= date].max()
if pd.isna(prev_halving):
return 0.0 # For dates before first halving
# Find next halving
future_halvings = halving_dates[halving_dates > date]
if len(future_halvings) == 0:
# For dates after last known halving, use same cycle length as last known cycle
last_cycle_length = (halving_dates[-1] - halving_dates[-2]).days
days_since_halving = (date - halving_dates[-1]).days
return min(days_since_halving / last_cycle_length, 1.0)
next_halving = future_halvings.min()
# Calculate position as fraction between halvings
days_since_halving = (date - prev_halving).days
cycle_length = (next_halving - prev_halving).days
return min(days_since_halving / cycle_length, 1.0)
def format_price(x, p):
"""Format large numbers in K, M, B format with appropriate precision"""
if abs(x) >= 1e9:
return f"${x/1e9:.1f}B"
if abs(x) >= 1e6:
return f"${x/1e6:.1f}M"
if abs(x) >= 1e3:
return f"${x/1e3:.1f}K"
if abs(x) >= 1:
return f"${x:.0f}"
return f"${x:.2f}" # For values less than $1, show cents
def get_nice_price_points(min_price, max_price):
"""
Generate a reasonable set of price points for the y-axis that look clean
and cover the range without cluttering the chart.
"""
log_min = np.floor(np.log10(min_price))
log_max = np.ceil(np.log10(max_price))
price_points = []
# For very large ranges (spanning more than 4 orders of magnitude),
# only use powers of 10 and mid-points
if log_max - log_min > 4:
for exp in range(int(log_min), int(log_max + 1)):
base = 10**exp
# Add main power of 10
if min_price <= base <= max_price:
price_points.append(base)
# Add mid-point if range is large enough
if min_price <= base * 5 <= max_price and exp > log_min:
price_points.append(base * 5)
else:
# For smaller ranges, use 1, 2, 5 sequence
for exp in range(int(log_min), int(log_max + 1)):
for mult in [1, 2, 5]:
point = mult * 10**exp
if min_price <= point <= max_price:
price_points.append(point)
return np.array(price_points)
# Analysis functions
def analyze_cycles_with_halvings(df):
"""Analyze Bitcoin market cycles aligned with halving events"""
df = df.copy()
# Get halving dates
halving_dates = get_halving_dates()
# Calculate cycle position for each date
df["Cycle_Position"] = df["Date"].apply(
lambda x: get_cycle_position(x, halving_dates)
)
# Convert to days within cycle (0 to ~1460 days)
df["Cycle_Days"] = (df["Cycle_Position"] * 4 * 365).round().astype(int)
# Calculate returns at different scales
df["Returns_30d"] = df["Close"].pct_change(periods=30)
df["Returns_90d"] = df["Close"].pct_change(periods=90)
df["Returns_365d"] = df["Close"].pct_change(periods=365)
# Group by position in cycle and calculate average returns
cycle_returns = df.groupby(df["Cycle_Days"])["Daily_Return"].mean()
cycle_volatility = df.groupby(df["Cycle_Days"])["Daily_Return"].std()
# Smooth the cycle returns to reduce noise
from scipy.signal import savgol_filter
window = 91 # About 3 months
if len(cycle_returns) > window:
cycle_returns = pd.Series(
savgol_filter(cycle_returns, window, 3), index=cycle_returns.index
)
return cycle_returns, cycle_volatility
def project_prices_with_cycles(
df, days_forward=365, simulations=1000, confidence_levels=[0.95, 0.68]
):
"""
Project future Bitcoin prices using Monte Carlo simulation with halving-aligned cycles.
"""
# Analyze historical cycles
cycle_returns, cycle_volatility = analyze_cycles_with_halvings(df)
# Get current position in halving cycle
halving_dates = get_halving_dates()
current_date = df["Date"].max()
cycle_position = get_cycle_position(current_date, halving_dates)
current_cycle_days = int(cycle_position * 4 * 365)
# Current price (last known price)
last_price = df["Close"].iloc[-1]
last_date = df["Date"].iloc[-1]
# Generate dates for projection
future_dates = pd.date_range(
start=last_date + timedelta(days=1), periods=days_forward, freq="D"
)
# Calculate expected returns for future dates based on cycle position
future_cycle_days = [
(current_cycle_days + i) % (4 * 365) for i in range(days_forward)
]
expected_returns = np.array(
[cycle_returns.get(day, cycle_returns.mean()) for day in future_cycle_days]
)
# Calculate base volatility (recent)
recent_volatility = df["Daily_Return"].tail(90).std()
# Add long-term trend component (very gentle decay)
long_term_decay = 0.9 ** (np.arange(days_forward) / 365) # 10% reduction per year
expected_returns = expected_returns * long_term_decay
# Run Monte Carlo simulation
np.random.seed(42) # For reproducibility
simulated_paths = np.zeros((days_forward, simulations))
for sim in range(simulations):
# Generate random returns using cycle-aware expected returns
returns = np.random.normal(
loc=expected_returns, scale=recent_volatility, size=days_forward
)
# Calculate price path
price_path = last_price * np.exp(np.cumsum(returns))
simulated_paths[:, sim] = price_path
# Calculate percentiles for confidence intervals
results = pd.DataFrame(index=future_dates)
results["Median"] = np.percentile(simulated_paths, 50, axis=1)
for level in confidence_levels:
lower_percentile = (1 - level) * 100 / 2
upper_percentile = 100 - lower_percentile
results[f"Lower_{int(level*100)}"] = np.percentile(
simulated_paths, lower_percentile, axis=1
)
results[f"Upper_{int(level*100)}"] = np.percentile(
simulated_paths, upper_percentile, axis=1
)
# Add expected trend line (without randomness)
results["Expected_Trend"] = last_price * np.exp(np.cumsum(expected_returns))
return results
def analyze_bitcoin_prices(csv_path):
"""
Analyze Bitcoin price data to calculate volatility and growth rates.
"""
# Read CSV with proper data types
df = pd.read_csv(csv_path, parse_dates=[0])
# Print first few rows of raw data to inspect
print("\nFirst few rows of raw data:")
print(df.head())
# Print data info to see types and non-null counts
print("\nDataset Info:")
print(df.info())
# Convert price columns to float and handle any potential formatting issues
price_columns = ["Price", "Open", "High", "Low"]
for col in price_columns:
# Remove any commas in numbers
df[col] = df[col].astype(str).str.replace(",", "")
df[col] = pd.to_numeric(df[col], errors="coerce")
# Rename columns for clarity
df.columns = ["Date", "Close", "Open", "High", "Low", "Volume", "Change"]
# Sort by date in ascending order
df = df.sort_values("Date")
# Print summary statistics after conversion
print("\nPrice Summary After Conversion:")
print(df[["Close", "Open", "High", "Low"]].describe())
# Calculate daily returns
df["Daily_Return"] = df["Close"].pct_change()
# Print first few daily returns to verify calculation
print("\nFirst few daily returns:")
print(df[["Date", "Close", "Daily_Return"]].head())
# Check for any infinite or NaN values
print("\nInfinite or NaN value counts:")
print(df.isna().sum())
# Calculate metrics using 365 days for annualization
analysis = {
"period_start": df["Date"].min().strftime("%Y-%m-%d"),
"period_end": df["Date"].max().strftime("%Y-%m-%d"),
"total_days": len(df),
"daily_volatility": df["Daily_Return"].std(),
"annualized_volatility": df["Daily_Return"].std() * np.sqrt(365),
"total_return": (df["Close"].iloc[-1] / df["Close"].iloc[0] - 1) * 100,
"average_daily_return": df["Daily_Return"].mean() * 100,
"average_annual_return": ((1 + df["Daily_Return"].mean()) ** 365 - 1) * 100,
"min_price": df["Low"].min(),
"max_price": df["High"].max(),
"avg_price": df["Close"].mean(),
"start_price": df["Close"].iloc[0],
"end_price": df["Close"].iloc[-1],
}
# Calculate rolling metrics
df["Rolling_Volatility_30d"] = df["Daily_Return"].rolling(
window=30
).std() * np.sqrt(365)
df["Rolling_Return_30d"] = df["Close"].pct_change(periods=30) * 100
return analysis, df
# Main plotting functions
def create_plots(df, start=None, end=None, project_days=365):
"""
Create plots including historical data and future projections.
"""
# Filter data based on date range
mask = pd.Series(True, index=df.index)
if start:
mask &= df["Date"] >= pd.to_datetime(start)
if end:
mask &= df["Date"] <= pd.to_datetime(end)
plot_df = df[mask].copy()
if len(plot_df) == 0:
raise ValueError("No data found for the specified date range")
# Generate projections
cycle_returns, cycle_volatility = analyze_cycles_with_halvings(plot_df)
projections = project_prices_with_cycles(plot_df, days_forward=project_days)
# Create cycle visualization
visualize_cycle_patterns(plot_df, cycle_returns, cycle_volatility)
# Set up the style
plt.style.use("seaborn-v0_8")
# Create figure
fig = plt.figure(figsize=(15, 15))
# Date range for titles
hist_date_range = f" ({plot_df['Date'].min().strftime('%Y-%m-%d')} to {plot_df['Date'].max().strftime('%Y-%m-%d')})"
# 1. Price history and projections (log scale)
ax1 = plt.subplot(4, 1, 1)
# Plot historical prices
ax1.semilogy(plot_df["Date"], plot_df["Close"], "b-", label="Historical Price")
# Plot projections
ax1.semilogy(
projections.index,
projections["Expected_Trend"],
"--",
color="purple",
label="Expected Trend",
)
ax1.semilogy(
projections.index,
projections["Median"],
":",
color="green",
label="Simulated Median",
)
ax1.fill_between(
projections.index,
projections["Lower_95"],
projections["Upper_95"],
alpha=0.2,
color="orange",
label="95% Confidence Interval",
)
ax1.fill_between(
projections.index,
projections["Lower_68"],
projections["Upper_68"],
alpha=0.3,
color="green",
label="68% Confidence Interval",
)
# Customize y-axis
ax1.yaxis.set_major_formatter(plt.FuncFormatter(format_price))
# Set custom y-axis ticks at meaningful price points
min_price = min(plot_df["Low"].min(), projections["Lower_95"].min())
max_price = max(plot_df["High"].max(), projections["Upper_95"].max())
price_points = get_nice_price_points(min_price, max_price)
ax1.set_yticks(price_points)
# Adjust y-axis label properties
ax1.tick_params(axis="y", labelsize=8) # Smaller font size
# Add some padding to prevent label cutoff
ax1.margins(y=0.02)
# Adjust label padding to prevent overlap
ax1.yaxis.set_tick_params(pad=1)
# Add grid lines with adjusted opacity
ax1.grid(True, which="major", linestyle="-", alpha=0.5)
ax1.grid(True, which="minor", linestyle=":", alpha=0.2)
ax1.set_title("Bitcoin Price History and Projections (Log Scale)" + hist_date_range)
# Make legend font size smaller too for consistency
ax1.legend(fontsize=8)
# 2. Rolling volatility
ax2 = plt.subplot(4, 1, 2)
ax2.plot(
plot_df["Date"],
plot_df["Rolling_Volatility_30d"],
"r-",
label="30-Day Rolling Volatility",
)
ax2.set_title("30-Day Rolling Volatility (Annualized)" + hist_date_range)
ax2.set_xlabel("Date")
ax2.set_ylabel("Volatility")
ax2.grid(True)
ax2.yaxis.set_major_formatter(plt.FuncFormatter(lambda y, _: "{:.0%}".format(y)))
ax2.legend()
# 3. Returns distribution
ax3 = plt.subplot(4, 1, 3)
returns_mean = plot_df["Daily_Return"].mean()
returns_std = plot_df["Daily_Return"].std()
filtered_returns = plot_df["Daily_Return"][
(plot_df["Daily_Return"] > returns_mean - 5 * returns_std)
& (plot_df["Daily_Return"] < returns_mean + 5 * returns_std)
]
sns.histplot(filtered_returns, bins=100, ax=ax3)
ax3.set_title(
"Distribution of Daily Returns (Excluding Extreme Outliers)" + hist_date_range
)
ax3.set_xlabel("Daily Return")
ax3.set_ylabel("Count")
ax3.xaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: "{:.0%}".format(x)))
# Add a vertical line for mean return
ax3.axvline(filtered_returns.mean(), color="r", linestyle="dashed", linewidth=1)
ax3.text(
filtered_returns.mean(),
ax3.get_ylim()[1],
"Mean",
rotation=90,
va="top",
ha="right",
)
# 4. Projection ranges
ax4 = plt.subplot(4, 1, 4)
# Calculate and plot price ranges at different future points
timepoints = np.array(range(30, 365, 30))
timepoints = timepoints[timepoints <= project_days]
ranges = []
labels = []
positions = []
for t in timepoints:
idx = t - 1 # Convert to 0-based index
ranges.extend(
[
projections["Lower_95"].iloc[idx],
projections["Lower_68"].iloc[idx],
projections["Median"].iloc[idx],
projections["Upper_68"].iloc[idx],
projections["Upper_95"].iloc[idx],
]
)
labels.extend(["95% Lower", "68% Lower", "Median", "68% Upper", "95% Upper"])
positions.extend([t] * 5)
# Plot ranges (removed violin plot)
ax4.scatter(positions, ranges, alpha=0.6)
# Add lines connecting the ranges
for t in timepoints:
idx = positions.index(t)
ax4.plot([t] * 5, ranges[idx : idx + 5], "k-", alpha=0.3)
# Set log scale first
ax4.set_yscale("log")
# Get the current order of magnitude for setting appropriate ticks
min_price = min(ranges)
max_price = max(ranges)
# Create price points at regular intervals on log scale
log_min = np.floor(np.log10(min_price))
log_max = np.ceil(np.log10(max_price))
price_points = []
for exp in range(int(log_min), int(log_max + 1)):
for mult in [1, 2, 5]:
point = mult * 10**exp
if min_price <= point <= max_price:
price_points.append(point)
ax4.set_yticks(price_points)
def price_formatter(x, p):
if x >= 1e6:
return f"${x/1e6:.1f}M"
if x >= 1e3:
return f"${x/1e3:.0f}K"
return f"${x:.0f}"
# Apply formatter to major ticks
ax4.yaxis.set_major_formatter(plt.FuncFormatter(price_formatter))
# Customize the plot
ax4.set_title("Projected Price Ranges at Future Timepoints")
ax4.set_xlabel("Days Forward")
ax4.set_ylabel("Price (USD)")
ax4.grid(True, alpha=0.3)
# Set x-axis to show only our timepoints
ax4.set_xticks(timepoints)
# Adjust layout
plt.tight_layout()
# Save the plot
start_str = start if start else plot_df["Date"].min().strftime("%Y-%m-%d")
end_str = end if end else plot_df["Date"].max().strftime("%Y-%m-%d")
filename = f"bitcoin_analysis_{start_str}_to_{end_str}_with_projections.png"
plt.savefig(filename, dpi=300, bbox_inches="tight")
plt.close()
return projections
def visualize_cycle_patterns(df, cycle_returns, cycle_volatility):
"""
Create enhanced visualization of Bitcoin's behavior across halving cycles.
"""
plt.style.use("seaborn-v0_8")
fig = plt.figure(figsize=(15, 15))
# Create a 3x1 subplot grid with different heights
gs = plt.GridSpec(3, 1, height_ratios=[2, 1, 2], hspace=0.3)
# Plot 1: Returns across cycle with confidence bands
ax1 = plt.subplot(gs[0])
# Convert days to percentage through cycle
x_points = np.array(cycle_returns.index) / (4 * 365) * 100
# Calculate rolling mean and standard deviation for confidence bands
window = 30 # 30-day window
rolling_mean = pd.Series(cycle_returns.values).rolling(window=window).mean()
rolling_std = pd.Series(cycle_returns.values).rolling(window=window).std()
# Plot confidence bands
ax1.fill_between(
x_points,
(rolling_mean - 2 * rolling_std) * 100,
(rolling_mean + 2 * rolling_std) * 100,
alpha=0.2,
color="blue",
label="95% Confidence",
)
ax1.fill_between(
x_points,
(rolling_mean - rolling_std) * 100,
(rolling_mean + rolling_std) * 100,
alpha=0.3,
color="blue",
label="68% Confidence",
)
# Plot average returns
ax1.plot(
x_points,
cycle_returns.values * 100,
"b-",
label="Average Daily Return",
linewidth=2,
)
ax1.axhline(y=0, color="gray", linestyle="--", alpha=0.5)
# Add vertical lines for each year in cycle
for year in range(1, 4):
ax1.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3)
ax1.text(
year * 25,
ax1.get_ylim()[1],
f"Year {year}",
rotation=90,
va="top",
ha="right",
alpha=0.7,
)
# Highlight halving points
ax1.axvline(x=0, color="red", linestyle="--", alpha=0.5, label="Halving Event")
ax1.axvline(x=100, color="red", linestyle="--", alpha=0.5)
ax1.set_title("Bitcoin Return Patterns Across Halving Cycle", pad=20)
ax1.set_xlabel("Position in Cycle (%)")
ax1.set_ylabel("Average Daily Return (%)")
ax1.grid(True, alpha=0.3)
ax1.legend(loc="upper right")
# Plot 2: Volatility across cycle
ax2 = plt.subplot(gs[1])
# Calculate rolling volatility confidence bands
vol_mean = pd.Series(cycle_volatility.values).rolling(window=window).mean()
vol_std = pd.Series(cycle_volatility.values).rolling(window=window).std()
# Plot volatility with confidence bands
annualized_factor = np.sqrt(365) * 100
ax2.fill_between(
x_points,
(vol_mean - 2 * vol_std) * annualized_factor,
(vol_mean + 2 * vol_std) * annualized_factor,
alpha=0.2,
color="red",
label="95% Confidence",
)
ax2.plot(
x_points,
cycle_volatility.values * annualized_factor,
"r-",
label="Annualized Volatility",
linewidth=2,
)
# Add year markers
for year in range(1, 4):
ax2.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3)
ax2.axvline(x=0, color="red", linestyle="--", alpha=0.5)
ax2.axvline(x=100, color="red", linestyle="--", alpha=0.5)
ax2.set_xlabel("Position in Cycle (%)")
ax2.set_ylabel("Volatility (%)")
ax2.grid(True, alpha=0.3)
ax2.legend(loc="upper right")
# Plot 3: Average price trajectory within cycles
ax3 = plt.subplot(gs[2])
# Define a color scheme for cycles
cycle_colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd"]
# Calculate average price path for each cycle
halving_dates = get_halving_dates()
cycles = []
for i in range(len(halving_dates) - 1):
cycle_start = halving_dates[i]
cycle_end = halving_dates[i + 1]
cycle_data = df[(df["Date"] >= cycle_start) & (df["Date"] < cycle_end)].copy()
if len(cycle_data) > 0:
cycle_data["Cycle_Pct"] = (
(cycle_data["Date"] - cycle_start).dt.total_seconds()
/ (cycle_end - cycle_start).total_seconds()
* 100
)
cycle_data["Normalized_Price"] = (
cycle_data["Close"] / cycle_data["Close"].iloc[0]
)
cycles.append(cycle_data)
# Plot each historical cycle with distinct colors
for i, cycle in enumerate(cycles):
ax3.semilogy(
cycle["Cycle_Pct"],
cycle["Normalized_Price"],
color=cycle_colors[i],
alpha=0.7,
label=f'Cycle {i+1} ({cycle["Date"].iloc[0].strftime("%Y")}-{cycle["Date"].iloc[-1].strftime("%Y")})',
)
# Calculate and plot average cycle
if cycles:
avg_cycle = pd.concat(
[c.set_index("Cycle_Pct")["Normalized_Price"] for c in cycles], axis=1
)
avg_cycle_mean = avg_cycle.mean(axis=1)
avg_cycle_std = avg_cycle.std(axis=1)
ax3.semilogy(
avg_cycle_mean.index,
avg_cycle_mean.values,
"k-",
linewidth=2,
label="Average Cycle",
)
ax3.fill_between(
avg_cycle_mean.index,
avg_cycle_mean * np.exp(-2 * avg_cycle_std),
avg_cycle_mean * np.exp(2 * avg_cycle_std),
alpha=0.2,
color="gray",
)
# Add year markers
for year in range(1, 4):
ax3.axvline(x=year * 25, color="gray", linestyle=":", alpha=0.3)
ax3.axvline(x=0, color="red", linestyle="--", alpha=0.5)
ax3.axvline(x=100, color="red", linestyle="--", alpha=0.5)
ax3.set_title("Price Performance Across Cycles (Normalized)", pad=20)
ax3.set_xlabel("Position in Cycle (%)")
ax3.set_ylabel("Price (Relative to Cycle Start)")
ax3.grid(True, alpha=0.3)
ax3.legend(loc="center left", bbox_to_anchor=(1.02, 0.5))
# Add current cycle position marker on all plots
current_position = get_cycle_position(df["Date"].max(), halving_dates) * 100
for ax in [ax1, ax2, ax3]:
ax.axvline(
x=current_position,
color="green",
linestyle="-",
alpha=0.5,
label="Current Position",
)
# Main title for the figure
fig.suptitle("Bitcoin Halving Cycle Analysis", fontsize=16, y=0.95)
# Adjust layout to prevent legend cutoff
plt.tight_layout()
# Save the plot
plt.savefig("bitcoin_cycle_patterns.png", dpi=300, bbox_inches="tight")
plt.close()
if __name__ == "__main__":
analysis, df = analyze_bitcoin_prices("prices.csv")
# create_plots(df) # Full history
# create_plots(df, start='2022-01-01') # From 2022 onwards
# create_plots(df, start='2023-01-01', end='2023-12-31') # Just 2023
# Create plots with different time ranges and projections
projections = create_plots(df, start="2011-01-01", project_days=365 * 4)
print("\nProjected Prices at Key Points:")
print(projections.iloc[[29, 89, 179, 364]].round(2)) # 30, 90, 180, 365 days