2024-12-20 07:49:53 +00:00
import argparse
2024-11-15 00:53:12 +00:00
import pandas as pd
import numpy as np
import matplotlib . pyplot as plt
2024-12-20 07:49:53 +00:00
import os
2024-11-15 00:53:12 +00:00
import seaborn as sns
2024-12-20 07:49:53 +00:00
import shutil
import warnings
from datetime import timedelta
from multiprocessing import Pool
from typing import BinaryIO
# Output
class Output :
"""
Output ensures result files get written to the right directory .
Example :
output = Output ( " output " , " test-1 " )
with output . create ( " summary.txt " ) as f :
# path to f is "output/test-1/summary.txt"
"""
out_dir : str
def __init__ ( self , base_dir : str , name : str ) :
"""
Initialize the output manager . This will both fully delete any existing
output directory , and then create an empty directory for the output .
Args :
base_dir : The root directory for outputs
name : The subdir of the root , where the results files go
"""
self . out_dir = os . path . join ( base_dir , name )
shutil . rmtree ( self . out_dir , ignore_errors = True )
os . makedirs ( self . out_dir )
def create ( self , filename ) - > BinaryIO :
"""
Create a new file for writing in the output directory .
"""
full_path = self . named ( filename )
f = open ( full_path , " w " )
return f
def named ( self , filename ) - > str :
"""
Get the full path within the output directory for a named results file .
"""
full_path = os . path . join ( self . out_dir , filename )
return full_path
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Utility functions
def get_halving_dates ( ) :
""" Return known and projected Bitcoin halving dates """
return pd . to_datetime (
[
" 2008-01-03 " , # Bitcoin genesis block (treat as cycle start)
" 2012-11-28 " , # First halving
" 2016-07-09 " , # Second halving
" 2020-05-11 " , # Third halving
" 2024-04-19 " , # Fourth halving
" 2028-04-20 " , # Fifth halving (projected)
]
)
def get_cycle_position ( date , halving_dates ) :
2024-11-15 00:53:12 +00:00
"""
2024-11-15 04:05:27 +00:00
Calculate position in halving cycle ( 0 to 1 ) for a given date .
0 represents a halving event , 1 represents just before the next halving .
2024-11-15 00:53:12 +00:00
"""
2024-12-20 07:49:53 +00:00
if len ( halving_dates ) == 0 :
raise Exception ( " halving dates cannot be empty " )
2024-11-15 04:05:27 +00:00
# Convert date to datetime if it's not already
date = pd . to_datetime ( date )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Find the most recent halving before this date
prev_halving = halving_dates [ halving_dates < = date ] . max ( )
if pd . isna ( prev_halving ) :
return 0.0 # For dates before first halving
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Find next halving
future_halvings = halving_dates [ halving_dates > date ]
if len ( future_halvings ) == 0 :
# For dates after last known halving, use same cycle length as last known cycle
last_cycle_length = ( halving_dates [ - 1 ] - halving_dates [ - 2 ] ) . days
days_since_halving = ( date - halving_dates [ - 1 ] ) . days
return min ( days_since_halving / last_cycle_length , 1.0 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
next_halving = future_halvings . min ( )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate position as fraction between halvings
days_since_halving = ( date - prev_halving ) . days
cycle_length = ( next_halving - prev_halving ) . days
return min ( days_since_halving / cycle_length , 1.0 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
def format_price ( x , p ) :
""" Format large numbers in K, M, B format with appropriate precision """
if abs ( x ) > = 1e9 :
return f " $ { x / 1e9 : .1f } B "
if abs ( x ) > = 1e6 :
return f " $ { x / 1e6 : .1f } M "
if abs ( x ) > = 1e3 :
return f " $ { x / 1e3 : .1f } K "
if abs ( x ) > = 1 :
return f " $ { x : .0f } "
return f " $ { x : .2f } " # For values less than $1, show cents
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
def get_nice_price_points ( min_price , max_price ) :
"""
Generate a reasonable set of price points for the y - axis that look clean
and cover the range without cluttering the chart .
"""
2024-12-20 07:49:53 +00:00
# Handle zero or negative prices
min_price = max ( min_price , 0.0001 ) # Set minimum price to $0.0001
2024-11-15 04:05:27 +00:00
log_min = np . floor ( np . log10 ( min_price ) )
log_max = np . ceil ( np . log10 ( max_price ) )
price_points = [ ]
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# For very large ranges (spanning more than 4 orders of magnitude),
# only use powers of 10 and mid-points
if log_max - log_min > 4 :
for exp in range ( int ( log_min ) , int ( log_max + 1 ) ) :
base = 10 * * exp
# Add main power of 10
if min_price < = base < = max_price :
price_points . append ( base )
# Add mid-point if range is large enough
if min_price < = base * 5 < = max_price and exp > log_min :
price_points . append ( base * 5 )
else :
# For smaller ranges, use 1, 2, 5 sequence
for exp in range ( int ( log_min ) , int ( log_max + 1 ) ) :
for mult in [ 1 , 2 , 5 ] :
point = mult * 10 * * exp
if min_price < = point < = max_price :
price_points . append ( point )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
return np . array ( price_points )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Market metrics
class MarketFundamentals :
"""
Calculate and track fundamental market metrics for Bitcoin .
Designed to be extensible for additional metrics .
"""
def __init__ ( self ) :
# Constants
self . GENESIS_DATE = pd . Timestamp ( " 2009-01-03 " )
self . BLOCKS_PER_DAY = 144
self . HALVING_INTERVAL = 210000 # blocks
# Volatility adjustment parameters (from our tuning)
self . VOLUME_SCALE = 70
self . DEPTH_SCALE = 7
self . BASE_ADJUSTMENT = 0.68
def calculate_total_supply ( self , date ) :
""" Calculate total Bitcoin supply at a given date. """
days_since_genesis = ( date - self . GENESIS_DATE ) . days
if days_since_genesis < 0 :
return 0
total_supply = 0
remaining_blocks = days_since_genesis * self . BLOCKS_PER_DAY
current_reward = 50
while remaining_blocks > 0 and current_reward > = 0.01 :
blocks_at_this_reward = min ( remaining_blocks , self . HALVING_INTERVAL )
total_supply + = blocks_at_this_reward * current_reward
remaining_blocks - = blocks_at_this_reward
current_reward / = 2
# Adjust for missed blocks and lost coins
total_supply * = 0.95 # Account for varying block times
total_supply * = 0.93 # Estimate for lost/inaccessible coins
return total_supply
def get_block_reward ( self , date ) :
""" Get Bitcoin block reward at a given date. """
days_since_genesis = ( date - self . GENESIS_DATE ) . days
if days_since_genesis < 0 :
return 0
halvings = days_since_genesis / / ( 4 * 365 ) # Approximate halving periods
return 50 / ( 2 * * halvings )
def calculate_supply_metrics ( self , date ) :
""" Calculate supply-related metrics. """
total_supply = self . calculate_total_supply ( date )
block_reward = self . get_block_reward ( date )
daily_new_supply = block_reward * self . BLOCKS_PER_DAY
return {
" total_supply " : total_supply ,
" daily_new_supply " : daily_new_supply ,
" supply_growth_rate " : daily_new_supply / total_supply ,
" stock_to_flow " : total_supply / ( daily_new_supply * 365 ) , # Annualized
}
def calculate_market_metrics ( self , df , date , window = 30 ) :
""" Calculate market activity metrics. """
recent_data = df [ df [ " Date " ] < = date ] . tail ( window )
if len ( recent_data ) < window :
return { " avg_volume " : 0 , " price_volatility " : 0 , " price_impact " : 0 }
avg_volume = recent_data [ " Volume " ] . mean ( )
price_volatility = recent_data [ " Close " ] . pct_change ( ) . std ( )
price_impact = recent_data [ " Close " ] . std ( ) / recent_data [ " Close " ] . mean ( )
return {
" avg_volume " : avg_volume ,
" price_volatility " : price_volatility ,
" price_impact " : price_impact ,
}
def get_market_maturity_metrics ( self , df , date , window = 30 ) :
"""
Combine supply and market metrics to assess market maturity .
"""
supply_metrics = self . calculate_supply_metrics ( date )
market_metrics = self . calculate_market_metrics ( df , date , window )
# Calculate combined metrics
volume_to_supply = market_metrics [ " avg_volume " ] / supply_metrics [ " total_supply " ]
market_depth = volume_to_supply / ( market_metrics [ " price_impact " ] + 0.001 )
return {
" volume_to_supply " : volume_to_supply ,
" supply_growth_rate " : supply_metrics [ " supply_growth_rate " ] ,
" market_depth " : market_depth ,
" stock_to_flow " : supply_metrics [ " stock_to_flow " ] ,
" price_impact " : market_metrics [ " price_impact " ] ,
}
def calculate_volatility_adjustment ( self , metrics ) :
"""
Calculate volatility adjustment based on market metrics .
"""
# Supply-based component
supply_based_vol = np . sqrt ( metrics [ " supply_growth_rate " ] * 365 * 100 )
# Market maturity component
maturity_factor = 1 - np . clip (
metrics [ " volume_to_supply " ] * self . VOLUME_SCALE , 0 , 0.6
)
# Market depth component
depth_factor = np . clip (
1 / np . sqrt ( 1 + metrics [ " market_depth " ] * self . DEPTH_SCALE ) , 0.7 , 1.3
)
# Combine factors
adjustment = (
self . BASE_ADJUSTMENT
* ( 1 + supply_based_vol )
* maturity_factor
* depth_factor
)
# Ensure reasonable bounds
return np . clip ( adjustment , 0.65 , 0.75 )
def calculate_confidence_adjustment ( self , metrics , level ) :
""" Calculate confidence interval adjustments with more sensitive market metrics. """
# More granular depth impact
if metrics [ " market_depth " ] > 0.1 :
depth_factor = 0.5
elif metrics [ " market_depth " ] > 0.05 :
depth_factor = 0.7
else :
depth_factor = 1.0
# More granular volume impact
if metrics [ " volume_to_supply " ] > 0.015 :
volume_factor = 0.5
elif metrics [ " volume_to_supply " ] > 0.008 :
volume_factor = 0.7
else :
volume_factor = 1.0
depth_impact = np . clip ( metrics [ " market_depth " ] * 0.15 * depth_factor , 0 , 0.15 )
vol_impact = np . clip ( metrics [ " volume_to_supply " ] * 20 * volume_factor , 0 , 0.15 )
total_adjustment = ( depth_impact + vol_impact ) * 0.4
if level > = 0.95 :
total_adjustment * = 0.4
return level + ( 1 - level ) * total_adjustment
def compare_adjustments ( df , fundamentals ) :
"""
Compare fundamental - based adjustments with original era - based ones .
"""
# Sample dates for comparison
date_range = pd . date_range ( start = df [ " Date " ] . min ( ) , end = df [ " Date " ] . max ( ) , freq = " 30D " )
results = [ ]
for date in date_range :
# Calculate era-based adjustment
if date < pd . Timestamp ( " 2017-12-10 " ) :
era_adj = 0.71 # early era
elif date < pd . Timestamp ( " 2020-01-01 " ) :
era_adj = 0.69 # transition era
else :
era_adj = 0.67 # mature era
# Calculate fundamental-based adjustment
metrics = fundamentals . get_market_maturity_metrics ( df , date )
fund_adj = fundamentals . calculate_volatility_adjustment ( metrics )
results . append (
{
" date " : date ,
" era_adjustment " : era_adj ,
" fundamental_adjustment " : fund_adj ,
" metrics " : metrics ,
}
)
return pd . DataFrame ( results )
2024-11-15 04:05:27 +00:00
# Analysis functions
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
def analyze_trends ( df ) :
"""
Analyze Bitcoin price trends using log returns with S2F awareness .
"""
2024-11-15 04:05:27 +00:00
df = df . copy ( )
2024-12-20 07:49:53 +00:00
fundamentals = MarketFundamentals ( )
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
# Get halving dates and calculate cycle position
2024-11-15 04:05:27 +00:00
halving_dates = get_halving_dates ( )
df [ " Cycle_Position " ] = df [ " Date " ] . apply (
lambda x : get_cycle_position ( x , halving_dates )
)
df [ " Cycle_Days " ] = ( df [ " Cycle_Position " ] * 4 * 365 ) . round ( ) . astype ( int )
2024-12-20 07:49:53 +00:00
# Calculate S2F metrics for each date
supply_metrics = [
fundamentals . calculate_supply_metrics ( date ) for date in df [ " Date " ]
]
df [ " S2F_Ratio " ] = [ m [ " stock_to_flow " ] for m in supply_metrics ]
df [ " S2F_Change " ] = df [ " S2F_Ratio " ] . pct_change ( )
# Calculate log returns and basic cycle returns
df [ " Log_Price " ] = np . log ( df [ " Close " ] )
df [ " Log_Return " ] = df [ " Log_Price " ] . diff ( )
# Calculate cycle-based returns
position_returns = df . groupby ( " Cycle_Days " ) [ " Log_Return " ] . mean ( )
# Calculate S2F impact on returns
s2f_impact = df . groupby ( " Cycle_Days " ) [ " S2F_Change " ] . mean ( )
# Smooth both components
window = 60
smoothed_cycle_returns = position_returns . rolling (
window = window ,
center = True ,
min_periods = int ( window / 2 ) ,
) . mean ( )
smoothed_s2f_impact = s2f_impact . rolling (
window = window ,
center = True ,
min_periods = int ( window / 2 ) ,
) . mean ( )
# Fill NaN values
smoothed_cycle_returns = smoothed_cycle_returns . fillna ( method = " bfill " ) . fillna (
method = " ffill "
)
smoothed_s2f_impact = smoothed_s2f_impact . fillna ( method = " bfill " ) . fillna (
method = " ffill "
)
# Combine cycle returns with S2F impact
s2f_weight = 0.3 # Adjustable parameter
combined_returns = (
smoothed_cycle_returns * ( 1 - s2f_weight ) + smoothed_s2f_impact * s2f_weight
)
# Apply dampening using current market metrics
latest_metrics = fundamentals . get_market_maturity_metrics ( df , df [ " Date " ] . max ( ) )
market_adjustment = fundamentals . calculate_volatility_adjustment ( latest_metrics )
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
def adaptive_dampen ( x ) :
if x > 2 * combined_returns . std ( ) :
return x * ( 0.6 * market_adjustment )
elif x < - 2 * combined_returns . std ( ) :
return x * ( 0.7 * market_adjustment )
return x * market_adjustment
return combined_returns . map ( adaptive_dampen )
def calculate_adaptive_volatility (
df ,
short_window = 30 ,
medium_window = 90 ,
long_window = 180 ,
vol_clip_min = 0.5 ,
vol_clip_max = 2.0 ,
) :
"""
Calculate volatility with adaptive window sizes based on market conditions .
Returns a single volatility value for the most recent period .
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
Incorporates long - term volatility as a stability baseline and additional
reference point for regime detection .
"""
df = df . copy ( )
df [ " Log_Return " ] = np . log ( df [ " Close " ] ) . diff ( )
# Remove any NaN values that could cause issues
df = df . dropna ( )
if len ( df ) < long_window :
# Not enough data, fall back to simple volatility
return df [ " Log_Return " ] . std ( )
# Get recent data for efficiency
lookback = max ( long_window * 2 , 360 ) # Use enough data for stable estimates
recent_df = df . iloc [ - lookback : ] . copy ( ) if len ( df ) > lookback else df . copy ( )
try :
# Initial volatility estimate using base windows
short_vol = recent_df [ " Log_Return " ] . ewm ( span = short_window , adjust = False ) . std ( )
medium_vol = recent_df [ " Log_Return " ] . ewm ( span = medium_window , adjust = False ) . std ( )
long_vol = recent_df [ " Log_Return " ] . ewm ( span = long_window , adjust = False ) . std ( )
# Ensure we have valid volatility values
if short_vol . iloc [ - 1 ] == 0 or np . isnan ( short_vol . iloc [ - 1 ] ) :
return df [ " Log_Return " ] . std ( ) # Fallback to simple volatility
# Calculate regime indicators for recent period
medium_vol_mean = medium_vol . rolling ( min ( 90 , len ( recent_df ) ) ) . mean ( )
long_vol_mean = long_vol . rolling ( min ( 180 , len ( recent_df ) ) ) . mean ( )
if medium_vol_mean . iloc [ - 1 ] == 0 :
vol_regime = pd . Series ( [ 1.0 ] * len ( recent_df ) )
else :
# Compare short-term to both medium and long-term volatility
medium_regime = short_vol / medium_vol_mean
long_regime = short_vol / long_vol_mean
# Use the more conservative (higher) regime indicator
vol_regime = pd . concat ( [ medium_regime , long_regime ] , axis = 1 ) . max ( axis = 1 )
vol_regime = vol_regime . clip ( vol_clip_min , vol_clip_max )
# Get most recent regime reading
latest_regime = vol_regime . iloc [ - 1 ]
# Adjust window sizes based on current regime
adj_factor = 1 / latest_regime
adj_short = max ( 10 , int ( short_window * adj_factor ) ) # Minimum window of 10
adj_medium = max ( 30 , int ( medium_window * adj_factor ) )
adj_long = max ( 60 , int ( long_window * adj_factor ) )
# Calculate final volatilities using adjusted windows
final_short = recent_df [ " Log_Return " ] . iloc [ - adj_short : ] . std ( )
final_medium = recent_df [ " Log_Return " ] . iloc [ - adj_medium : ] . std ( )
final_long = recent_df [ " Log_Return " ] . iloc [ - adj_long : ] . std ( )
# If any volatility measure is NaN or 0, fall back to simple volatility
if np . isnan ( [ final_short , final_medium , final_long ] ) . any ( ) or 0 in [
final_short ,
final_medium ,
final_long ,
] :
return df [ " Log_Return " ] . std ( )
# Calculate regime-based weights, now incorporating long-term volatility
high_vol_weight = ( latest_regime - vol_clip_min ) / ( vol_clip_max - vol_clip_min )
base_weights = np . array ( [ 0.2 , 0.5 , 0.3 ] ) # Short, medium, long weights
stress_weights = np . array (
[ 0.4 , 0.4 , 0.2 ]
) # More weight on short-term during stress
# Interpolate between base and stress weights
weights = (
base_weights * ( 1 - high_vol_weight ) + stress_weights * high_vol_weight
)
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
# Calculate final volatility using all three timeframes
final_vol = (
final_short * weights [ 0 ]
+ final_medium * weights [ 1 ]
+ final_long * weights [ 2 ]
2024-11-15 04:05:27 +00:00
)
2024-12-20 07:49:53 +00:00
# Add uncertainty adjustment based on regime changes
regime_change = abs ( vol_regime . diff ( ) ) . fillna ( 0 )
regime_change_mean = regime_change . rolling ( 5 , min_periods = 1 ) . mean ( ) . iloc [ - 1 ]
if regime_change_mean == 0 :
uncertainty_adjustment = 1.0
else :
regime_change_zscore = regime_change . iloc [ - 1 ] / regime_change_mean
uncertainty_adjustment = 1 + np . clip ( regime_change_zscore / 2 , 0 , 0.5 )
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
return max ( final_vol * uncertainty_adjustment , df [ " Log_Return " ] . std ( ) * 0.5 )
2024-11-15 04:05:27 +00:00
2024-12-20 07:49:53 +00:00
except Exception as e :
print ( f " Error in adaptive volatility calculation: { e } " )
# Fall back to simple volatility calculation
return df [ " Log_Return " ] . std ( )
def calculate_volatility ( df , short_window = 30 , medium_window = 90 , long_window = 180 ) :
""" Calculate volatility using fundamental metrics and adaptive windows. """
df = df . copy ( )
df [ " Log_Return " ] = np . log ( df [ " Close " ] ) . diff ( )
if len ( df ) < 30 :
return 0.02 # Reasonable default for very short periods
try :
# Initialize fundamentals calculator
fundamentals = MarketFundamentals ( )
current_date = df [ " Date " ] . max ( )
# Get recent data for efficiency
lookback = max ( long_window * 2 , 360 )
recent_df = df . iloc [ - lookback : ] . copy ( ) if len ( df ) > lookback else df . copy ( )
# Calculate base volatilities
short_vol = recent_df [ " Log_Return " ] . ewm ( span = short_window , adjust = False ) . std ( )
medium_vol = recent_df [ " Log_Return " ] . ewm ( span = medium_window , adjust = False ) . std ( )
long_vol = recent_df [ " Log_Return " ] . ewm ( span = long_window , adjust = False ) . std ( )
if short_vol . iloc [ - 1 ] == 0 or np . isnan ( short_vol . iloc [ - 1 ] ) :
return df [ " Log_Return " ] . std ( )
# Calculate volatility regime indicators
medium_vol_mean = medium_vol . rolling ( min ( 90 , len ( recent_df ) ) ) . mean ( )
long_vol_mean = long_vol . rolling ( min ( 180 , len ( recent_df ) ) ) . mean ( )
# Compare short-term to both medium and long-term volatility
if medium_vol_mean . iloc [ - 1 ] == 0 :
vol_regime = pd . Series ( [ 1.0 ] * len ( recent_df ) )
else :
medium_regime = short_vol / medium_vol_mean
long_regime = short_vol / long_vol_mean
vol_regime = pd . concat ( [ medium_regime , long_regime ] , axis = 1 ) . max ( axis = 1 )
vol_regime = vol_regime . clip ( 0.5 , 2.0 )
latest_regime = vol_regime . iloc [ - 1 ]
# Get market metrics
metrics = fundamentals . get_market_maturity_metrics ( df , current_date )
# Calculate adaptive weights
high_vol_weight = ( latest_regime - 0.5 ) / 1.5 # 1.5 = 2.0 - 0.5
base_weights = np . array ( [ 0.2 , 0.5 , 0.3 ] )
stress_weights = np . array ( [ 0.4 , 0.4 , 0.2 ] )
weights = (
base_weights * ( 1 - high_vol_weight ) + stress_weights * high_vol_weight
)
# Calculate final volatilities
final_short = recent_df [ " Log_Return " ] . iloc [ - short_window : ] . std ( )
final_medium = recent_df [ " Log_Return " ] . iloc [ - medium_window : ] . std ( )
final_long = recent_df [ " Log_Return " ] . iloc [ - long_window : ] . std ( )
if np . isnan ( [ final_short , final_medium , final_long ] ) . any ( ) or 0 in [
final_short ,
final_medium ,
final_long ,
] :
return df [ " Log_Return " ] . std ( )
# Apply market-based adjustment
market_adjustment = fundamentals . calculate_volatility_adjustment ( metrics )
# Calculate final volatility
final_vol = (
final_short * weights [ 0 ]
+ final_medium * weights [ 1 ]
+ final_long * weights [ 2 ]
) * market_adjustment
return max ( final_vol , df [ " Log_Return " ] . std ( ) * 0.5 )
except Exception as e :
print ( f " Error in volatility calculation: { e } " )
return df [ " Log_Return " ] . std ( )
def adjust_trend_expectations ( expected_returns , cycle_position ) :
"""
Simple trend adjustment .
"""
if cycle_position > 0.75 :
damping_factor = 0.70
else :
damping_factor = 0.85
return expected_returns * damping_factor
def calculate_market_conditions ( df , lookback_window = 180 ) :
"""
Calculate market condition metrics to inform uncertainty scaling .
"""
df = df . copy ( ) # Avoid modifying original dataframe
metrics = { }
# Use log returns for stability
df [ " Log_Return " ] = np . log ( df [ " Close " ] ) . diff ( )
# Handle initial NaN values
df [ " Log_Return " ] = df [ " Log_Return " ] . fillna ( method = " bfill " )
# Recent vs historical volatility ratio
recent_vol = max ( df [ " Log_Return " ] . tail ( 30 ) . std ( ) , 1e-6 ) # Prevent division by zero
historical_vol = max ( df [ " Log_Return " ] . tail ( lookback_window ) . std ( ) , 1e-6 )
metrics [ " vol_ratio " ] = recent_vol / historical_vol
# Trend strength using log prices
log_prices = np . log ( df [ " Close " ] )
ma50 = log_prices . rolling ( 50 , min_periods = 1 ) . mean ( )
ma200 = log_prices . rolling ( 200 , min_periods = 1 ) . mean ( )
metrics [ " trend_strength " ] = ( ma50 . iloc [ - 1 ] - ma200 . iloc [ - 1 ] ) / historical_vol
# Drawdown intensity
rolling_max = df [ " Close " ] . rolling ( lookback_window , min_periods = 1 ) . max ( )
current_drawdown = df [ " Close " ] . iloc [ - 1 ] / rolling_max . iloc [ - 1 ] - 1
metrics [ " drawdown " ] = abs ( min ( current_drawdown , 0 ) )
return metrics
def get_projection_adjustments ( days_forward , current_cycle_position , df ) :
""" Enhanced projection adjustments using market fundamentals. """
adjustments = np . ones ( days_forward )
fundamentals = MarketFundamentals ( )
# Pre-calculate metrics
lookback = min ( 365 , len ( df ) )
historical_metrics = [
fundamentals . get_market_maturity_metrics ( df , date )
for date in df [ " Date " ] . tail ( lookback )
]
historical_avg_volume_to_supply = np . mean (
[ m [ " volume_to_supply " ] for m in historical_metrics ]
)
current_metrics = fundamentals . get_market_maturity_metrics ( df , df [ " Date " ] . max ( ) )
base_uncertainty = 0.016 * ( 1 + current_metrics [ " supply_growth_rate " ] * 365 )
vol_factor = 1 + 0.2 * abs ( 1 - current_metrics [ " volume_to_supply " ] * 50 )
market_depth_factor = 1 / np . sqrt ( 1 + current_metrics [ " market_depth " ] * 5 )
s2f_factor = 1 / np . log1p ( current_metrics [ " stock_to_flow " ] )
regime_scale = np . clip (
current_metrics [ " volume_to_supply " ] / historical_avg_volume_to_supply , 0.88 , 1.2
)
for i in range ( days_forward ) :
time_factor = min (
1
+ ( i / 365 )
* base_uncertainty
* vol_factor
* market_depth_factor
* s2f_factor ,
1.20 ,
)
cycle_position = ( current_cycle_position + i / 1460 ) % 1
local_cycle_factor = 1.15 if cycle_position > 0.75 else 1.0
adjustments [ i ] = time_factor * local_cycle_factor * regime_scale
adjustments [ i ] = max ( adjustments [ i ] , 1.02 + ( i / 365 ) * 0.01 )
return adjustments
def calculate_confidence_intervals ( simulated_paths , confidence_levels = [ 0.95 , 0.68 ] ) :
2024-11-15 00:53:12 +00:00
"""
2024-12-20 07:49:53 +00:00
Calculate confidence intervals with dynamic quantile selection based on market conditions .
2024-11-15 00:53:12 +00:00
"""
2024-12-20 07:49:53 +00:00
results = { }
for level in confidence_levels :
# Calculate standard error of the median
median_std = np . std (
[ np . median ( simulated_paths [ : , i ] ) for i in range ( simulated_paths . shape [ 1 ] ) ]
)
# Adjust quantiles based on estimation uncertainty
adjustment = min ( 0.1 , median_std / np . median ( simulated_paths ) ) # Cap adjustment
# Widen intervals slightly when uncertainty is high
effective_level = level + ( 1 - level ) * adjustment
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
lower_percentile = ( 1 - effective_level ) * 100 / 2
upper_percentile = 100 - lower_percentile
results [ f " Lower_ { int ( level * 100 ) } " ] = np . percentile (
simulated_paths , lower_percentile , axis = 1
)
results [ f " Upper_ { int ( level * 100 ) } " ] = np . percentile (
simulated_paths , upper_percentile , axis = 1
)
return results
def project_prices (
df , days_forward = 365 , simulations = 1000 , confidence_levels = [ 0.95 , 0.68 ]
) :
""" Generate price projections with fundamental-based adjustments. """
df = df . copy ( )
fundamentals = MarketFundamentals ( )
df [ " Log_Price " ] = np . log ( df [ " Close " ] )
df [ " Log_Return " ] = df [ " Log_Price " ] . diff ( )
# Get halving dates and current cycle position
2024-11-15 04:05:27 +00:00
halving_dates = get_halving_dates ( )
current_date = df [ " Date " ] . max ( )
cycle_position = get_cycle_position ( current_date , halving_dates )
current_cycle_days = int ( cycle_position * 4 * 365 )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Current price and date
2024-11-15 04:05:27 +00:00
last_price = df [ " Close " ] . iloc [ - 1 ]
last_date = df [ " Date " ] . iloc [ - 1 ]
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Generate projection dates
2024-11-15 04:05:27 +00:00
future_dates = pd . date_range (
start = last_date + timedelta ( days = 1 ) , periods = days_forward , freq = " D "
)
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Calculate expected returns
2024-11-15 04:05:27 +00:00
future_cycle_days = [
( current_cycle_days + i ) % ( 4 * 365 ) for i in range ( days_forward )
]
2024-12-20 07:49:53 +00:00
cycle_trends = analyze_trends ( df )
2024-11-15 04:05:27 +00:00
expected_returns = np . array (
2024-12-20 07:49:53 +00:00
[ cycle_trends . get ( day , cycle_trends . mean ( ) ) for day in future_cycle_days ]
2024-11-15 04:05:27 +00:00
)
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Calculate base volatility
base_volatility = calculate_volatility ( df )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Get projection adjustments
projection_adjustments = get_projection_adjustments (
days_forward , cycle_position , df
)
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Run Monte Carlo simulation
2024-12-20 07:49:53 +00:00
np . random . seed ( 42 ) # Restored for reproducibility
2024-11-15 04:05:27 +00:00
simulated_paths = np . zeros ( ( days_forward , simulations ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
for sim in range ( simulations ) :
2024-12-20 07:49:53 +00:00
drift = expected_returns
vol = base_volatility
time_scaled_vol = vol * projection_adjustments
returns = np . random . normal ( loc = drift , scale = time_scaled_vol , size = days_forward )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
cumulative_returns = np . cumsum ( returns )
price_path = last_price * np . exp ( cumulative_returns )
2024-11-15 04:05:27 +00:00
simulated_paths [ : , sim ] = price_path
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Calculate results
2024-11-15 04:05:27 +00:00
results = pd . DataFrame ( index = future_dates )
results [ " Median " ] = np . percentile ( simulated_paths , 50 , axis = 1 )
2024-12-20 07:49:53 +00:00
results [ " Expected_Trend " ] = last_price * np . exp ( np . cumsum ( drift ) )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Calculate confidence intervals
2024-11-15 04:05:27 +00:00
for level in confidence_levels :
2024-12-20 07:49:53 +00:00
metrics = fundamentals . get_market_maturity_metrics ( df , current_date )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Calculate intervals by projection horizon
lower_bounds = [ ]
upper_bounds = [ ]
for day in range ( days_forward ) :
time_factor = (
0.85 if day > 365 else 0.9 if day > 180 else 0.95 if day > 90 else 1.0
)
effective_level = (
fundamentals . calculate_confidence_adjustment ( metrics , level )
* time_factor
)
lower_percentile = ( 1 - effective_level ) * 100 / 2
upper_percentile = 100 - lower_percentile
lower_bounds . append (
np . percentile ( simulated_paths [ day , : ] , lower_percentile )
)
upper_bounds . append (
np . percentile ( simulated_paths [ day , : ] , upper_percentile )
)
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
results [ f " Lower_ { int ( level * 100 ) } " ] = lower_bounds
results [ f " Upper_ { int ( level * 100 ) } " ] = upper_bounds
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
return results
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
def analyze_bitcoin_prices ( csv_path ) :
"""
Analyze Bitcoin price data to calculate volatility and growth rates .
"""
# Read CSV with proper data types
df = pd . read_csv ( csv_path , parse_dates = [ 0 ] )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Print first few rows of raw data to inspect
print ( " \n First few rows of raw data: " )
print ( df . head ( ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Print data info to see types and non-null counts
print ( " \n Dataset Info: " )
print ( df . info ( ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Convert price columns to float and handle any potential formatting issues
2024-12-20 07:49:53 +00:00
numeric_columns = [ " Price " , " Open " , " High " , " Low " , " Vol. " ] # Added Volume
for col in numeric_columns :
# Remove any commas and 'K'/'M' suffixes
2024-11-15 04:05:27 +00:00
df [ col ] = df [ col ] . astype ( str ) . str . replace ( " , " , " " )
2024-12-20 07:49:53 +00:00
# Convert K to thousands
df [ col ] = df [ col ] . str . replace ( " K " , " e3 " )
# Convert M to millions
df [ col ] = df [ col ] . str . replace ( " M " , " e6 " )
# Convert B to billions
df [ col ] = df [ col ] . str . replace ( " B " , " e9 " )
# Convert to numeric
2024-11-15 04:05:27 +00:00
df [ col ] = pd . to_numeric ( df [ col ] , errors = " coerce " )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Rename columns for clarity
df . columns = [ " Date " , " Close " , " Open " , " High " , " Low " , " Volume " , " Change " ]
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Sort by date in ascending order
df = df . sort_values ( " Date " )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Print summary statistics after conversion
print ( " \n Price Summary After Conversion: " )
2024-12-20 07:49:53 +00:00
print ( df [ [ " Close " , " Open " , " High " , " Low " , " Volume " ] ] . describe ( ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate daily returns
df [ " Daily_Return " ] = df [ " Close " ] . pct_change ( )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Print first few daily returns to verify calculation
print ( " \n First few daily returns: " )
print ( df [ [ " Date " , " Close " , " Daily_Return " ] ] . head ( ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Check for any infinite or NaN values
print ( " \n Infinite or NaN value counts: " )
print ( df . isna ( ) . sum ( ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate metrics using 365 days for annualization
analysis = {
" period_start " : df [ " Date " ] . min ( ) . strftime ( " % Y- % m- %d " ) ,
" period_end " : df [ " Date " ] . max ( ) . strftime ( " % Y- % m- %d " ) ,
" total_days " : len ( df ) ,
" daily_volatility " : df [ " Daily_Return " ] . std ( ) ,
" annualized_volatility " : df [ " Daily_Return " ] . std ( ) * np . sqrt ( 365 ) ,
" total_return " : ( df [ " Close " ] . iloc [ - 1 ] / df [ " Close " ] . iloc [ 0 ] - 1 ) * 100 ,
" average_daily_return " : df [ " Daily_Return " ] . mean ( ) * 100 ,
" average_annual_return " : ( ( 1 + df [ " Daily_Return " ] . mean ( ) ) * * 365 - 1 ) * 100 ,
" min_price " : df [ " Low " ] . min ( ) ,
" max_price " : df [ " High " ] . max ( ) ,
" avg_price " : df [ " Close " ] . mean ( ) ,
" start_price " : df [ " Close " ] . iloc [ 0 ] ,
" end_price " : df [ " Close " ] . iloc [ - 1 ] ,
}
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate rolling metrics
df [ " Rolling_Volatility_30d " ] = df [ " Daily_Return " ] . rolling (
window = 30
) . std ( ) * np . sqrt ( 365 )
df [ " Rolling_Return_30d " ] = df [ " Close " ] . pct_change ( periods = 30 ) * 100
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
return analysis , df
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Main plotting functions
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
def add_cdpr_plot ( df , output : Output ) :
"""
Add a plot showing the Compounding Daily Periodic Rate ( CDPR ) over different time periods .
Also includes a logarithmic price axis on the right side .
"""
plt . style . use ( " seaborn-v0_8 " )
fig , ax1 = plt . subplots ( figsize = ( 15 , 6 ) )
# Calculate CDPR for different time periods
periods = [ 180 , 360 , 720 ]
cdpr = { }
# Find the longest CDPR series length
max_period = max ( periods )
daily_returns = df [ " Close " ] . pct_change ( ) . fillna ( 0 )
for period in periods :
cdpr [ f " { period } d CDPR " ] = (
daily_returns . rolling ( period ) . apply (
lambda x : ( 1 + x ) . prod ( ) * * ( 1 / period ) - 1 , raw = True
)
) * 100
# Clip all CDPR series to the length of the longest one
cdpr [ f " { period } d CDPR " ] = cdpr [ f " { period } d CDPR " ] [ max_period : ]
# Find the non-NaN min and max CDPR values
cdpr_values = [ values for values in cdpr . values ( ) ]
min_cdpr = np . nanmin ( [ np . nanmin ( values ) for values in cdpr_values ] )
max_cdpr = np . nanmax ( [ np . nanmax ( values ) for values in cdpr_values ] )
# Ensure x-axis (dates) and y-axis (CDPR) have the same length
start_date = df [ " Date " ] . iloc [ max_period : ] . min ( )
end_date = df [ " Date " ] . max ( )
plot_dates = pd . date_range ( start = start_date , end = end_date , freq = " D " )
# Plot CDPR lines on the left axis
for label , values in cdpr . items ( ) :
ax1 . plot ( plot_dates , values , label = label )
# Customize the left axis (CDPR)
ax1 . set_xlabel ( " Date " )
ax1 . set_ylabel ( " CDPR ( % ) " )
ax1 . grid ( True , alpha = 0.3 )
# Adjust y-axis tick marks and add shaded lines between ticks
yticks = list ( np . arange ( int ( min_cdpr ) , int ( max_cdpr ) + 1 , 0.5 ) )
ax1 . set_yticks ( yticks )
ax1 . tick_params ( axis = " y " , which = " major " , labelsize = 8 )
ax1 . set_yticklabels ( [ " {:.1f} % " . format ( y ) for y in yticks ] )
# Add shaded lines between tick marks
for i in range ( 1 , len ( yticks ) ) :
ax1 . axhline (
y = yticks [ i ] , color = " lightgray " , linestyle = " -- " , linewidth = 1 , alpha = 0.5
)
# Create the right axis for price
ax2 = ax1 . twinx ( )
# Plot price on the right axis
price_data = df [ " Close " ] . iloc [ max_period : ]
ax2 . semilogy (
plot_dates , price_data , color = " #FF6B6B " , linewidth = 1.5 , label = " Price (USD) "
)
ax2 . set_ylabel ( " Price (USD) " , color = " #FF6B6B " )
ax2 . tick_params ( axis = " y " , labelcolor = " #FF6B6B " )
# Set human-readable price ticks
price_ticks = [ 1 , 10 , 100 , 1000 , 10000 , 100000 ]
price_labels = [ " $1 " , " $10 " , " $100 " , " $1k " , " $10k " , " $100k " ]
ax2 . set_yticks ( price_ticks )
ax2 . set_yticklabels ( price_labels )
# Add legends for both axes
lines1 , labels1 = ax1 . get_legend_handles_labels ( )
lines2 , labels2 = ax2 . get_legend_handles_labels ( )
ax1 . legend ( lines1 + lines2 , labels1 + labels2 , loc = " upper left " )
plt . title ( " Compounding Daily Periodic Rate (CDPR) and Price " )
# Save the plot
filename = output . named ( " bitcoin_cdpr_plot.png " )
plt . tight_layout ( )
plt . savefig ( filename , dpi = 150 , bbox_inches = " tight " )
plt . close ( )
def create_plots ( df , output : Output , start = None , end = None , project_days = 365 ) :
2024-11-15 00:53:12 +00:00
"""
2024-12-20 07:49:53 +00:00
Create enhanced plots including market maturity visualization .
2024-11-15 00:53:12 +00:00
"""
2024-12-20 07:49:53 +00:00
# Add the new CDPR plot
add_cdpr_plot ( df , output )
2024-11-15 00:53:12 +00:00
# Filter data based on date range
mask = pd . Series ( True , index = df . index )
if start :
2024-11-15 04:05:27 +00:00
mask & = df [ " Date " ] > = pd . to_datetime ( start )
2024-11-15 00:53:12 +00:00
if end :
2024-11-15 04:05:27 +00:00
mask & = df [ " Date " ] < = pd . to_datetime ( end )
2024-11-15 00:53:12 +00:00
plot_df = df [ mask ] . copy ( )
if len ( plot_df ) == 0 :
raise ValueError ( " No data found for the specified date range " )
# Generate projections
2024-12-20 07:49:53 +00:00
projections = project_prices ( plot_df , days_forward = project_days )
2024-11-15 00:53:12 +00:00
# Set up the style
2024-11-15 04:05:27 +00:00
plt . style . use ( " seaborn-v0_8 " )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Create figure with adjusted size and spacing
2024-11-15 00:53:12 +00:00
fig = plt . figure ( figsize = ( 15 , 15 ) )
2024-12-20 07:49:53 +00:00
# Use GridSpec for better control over subplot spacing
gs = plt . GridSpec ( 5 , 1 , height_ratios = [ 3 , 1.5 , 1.5 , 1.5 , 2 ] , hspace = 0.4 )
2024-11-15 00:53:12 +00:00
# Date range for titles
hist_date_range = f " ( { plot_df [ ' Date ' ] . min ( ) . strftime ( ' % Y- % m- %d ' ) } to { plot_df [ ' Date ' ] . max ( ) . strftime ( ' % Y- % m- %d ' ) } ) "
2024-12-20 07:49:53 +00:00
# Calculate full date range including projections
full_date_range = pd . date_range ( plot_df [ " Date " ] . min ( ) , projections . index . max ( ) )
2024-11-15 00:53:12 +00:00
# 1. Price history and projections (log scale)
2024-12-20 07:49:53 +00:00
ax1 = fig . add_subplot ( gs [ 0 ] )
2024-11-15 00:53:12 +00:00
# Plot historical prices
2024-11-15 04:05:27 +00:00
ax1 . semilogy ( plot_df [ " Date " ] , plot_df [ " Close " ] , " b- " , label = " Historical Price " )
2024-11-15 00:53:12 +00:00
# Plot projections
2024-11-15 04:05:27 +00:00
ax1 . semilogy (
projections . index ,
projections [ " Expected_Trend " ] ,
" -- " ,
color = " purple " ,
label = " Expected Trend " ,
)
ax1 . semilogy (
projections . index ,
projections [ " Median " ] ,
" : " ,
color = " green " ,
label = " Simulated Median " ,
)
ax1 . fill_between (
projections . index ,
projections [ " Lower_95 " ] ,
projections [ " Upper_95 " ] ,
alpha = 0.2 ,
color = " orange " ,
label = " 95 % Confidence Interval " ,
)
ax1 . fill_between (
projections . index ,
projections [ " Lower_68 " ] ,
projections [ " Upper_68 " ] ,
alpha = 0.3 ,
color = " green " ,
label = " 68 % Confidence Interval " ,
)
2024-11-15 00:53:12 +00:00
# Customize y-axis
ax1 . yaxis . set_major_formatter ( plt . FuncFormatter ( format_price ) )
2024-11-15 04:05:27 +00:00
min_price = min ( plot_df [ " Low " ] . min ( ) , projections [ " Lower_95 " ] . min ( ) )
max_price = max ( plot_df [ " High " ] . max ( ) , projections [ " Upper_95 " ] . max ( ) )
2024-11-15 00:53:12 +00:00
price_points = get_nice_price_points ( min_price , max_price )
ax1 . set_yticks ( price_points )
2024-12-20 07:49:53 +00:00
ax1 . tick_params ( axis = " y " , labelsize = 8 )
2024-11-15 00:53:12 +00:00
ax1 . margins ( y = 0.02 )
2024-11-15 04:05:27 +00:00
ax1 . grid ( True , which = " major " , linestyle = " - " , alpha = 0.5 )
ax1 . grid ( True , which = " minor " , linestyle = " : " , alpha = 0.2 )
ax1 . set_title ( " Bitcoin Price History and Projections (Log Scale) " + hist_date_range )
2024-11-15 00:53:12 +00:00
ax1 . legend ( fontsize = 8 )
2024-12-20 07:49:53 +00:00
# Set x-axis limits to full range
ax1 . set_xlim ( full_date_range [ 0 ] , full_date_range [ - 1 ] )
ax1 . tick_params ( axis = " x " , rotation = 45 )
# 3. Rolling volatility
ax3 = fig . add_subplot ( gs [ 1 ] )
ax3 . plot (
2024-11-15 04:05:27 +00:00
plot_df [ " Date " ] ,
plot_df [ " Rolling_Volatility_30d " ] ,
" r- " ,
label = " 30-Day Rolling Volatility " ,
)
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Add empty space to match price plot x-axis
ax3 . set_xlim ( full_date_range [ 0 ] , full_date_range [ - 1 ] )
# Add vertical line to mark start of projections
ax3 . axvline ( plot_df [ " Date " ] . max ( ) , color = " gray " , linestyle = " -- " , alpha = 0.5 )
ax3 . text (
plot_df [ " Date " ] . max ( ) ,
ax3 . get_ylim ( ) [ 1 ] ,
" Projection Start " ,
rotation = 90 ,
va = " top " ,
ha = " right " ,
alpha = 0.7 ,
)
ax3 . set_title ( " 30-Day Rolling Volatility (Annualized) " + hist_date_range )
ax3 . set_ylabel ( " Volatility " )
ax3 . grid ( True )
ax3 . yaxis . set_major_formatter ( plt . FuncFormatter ( lambda y , _ : " {:.0%} " . format ( y ) ) )
ax3 . legend ( )
ax3 . tick_params ( axis = " x " , rotation = 45 )
# 4. Returns distribution
ax4 = fig . add_subplot ( gs [ 2 ] )
2024-11-15 04:05:27 +00:00
returns_mean = plot_df [ " Daily_Return " ] . mean ( )
returns_std = plot_df [ " Daily_Return " ] . std ( )
filtered_returns = plot_df [ " Daily_Return " ] [
( plot_df [ " Daily_Return " ] > returns_mean - 5 * returns_std )
& ( plot_df [ " Daily_Return " ] < returns_mean + 5 * returns_std )
2024-11-15 00:53:12 +00:00
]
2024-12-20 07:49:53 +00:00
sns . histplot ( filtered_returns , bins = 100 , ax = ax4 )
ax4 . set_title (
2024-11-15 04:05:27 +00:00
" Distribution of Daily Returns (Excluding Extreme Outliers) " + hist_date_range
)
2024-12-20 07:49:53 +00:00
ax4 . set_xlabel ( " Daily Return " )
ax4 . set_ylabel ( " Count " )
ax4 . xaxis . set_major_formatter ( plt . FuncFormatter ( lambda x , _ : " {:.0%} " . format ( x ) ) )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# Add mean line
ax4 . axvline ( filtered_returns . mean ( ) , color = " r " , linestyle = " dashed " , linewidth = 1 )
ax4 . text (
2024-11-15 04:05:27 +00:00
filtered_returns . mean ( ) ,
2024-12-20 07:49:53 +00:00
ax4 . get_ylim ( ) [ 1 ] ,
2024-11-15 04:05:27 +00:00
" Mean " ,
rotation = 90 ,
va = " top " ,
ha = " right " ,
)
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
# 5. Projection ranges
ax5 = fig . add_subplot ( gs [ 3 : ] ) # Use last two grid spaces for larger plot
timepoints = np . array ( range ( 30 , project_days , 30 ) )
2024-11-15 00:53:12 +00:00
timepoints = timepoints [ timepoints < = project_days ]
ranges = [ ]
labels = [ ]
positions = [ ]
for t in timepoints :
2024-12-20 07:49:53 +00:00
idx = t - 1
2024-11-15 04:05:27 +00:00
ranges . extend (
[
projections [ " Lower_95 " ] . iloc [ idx ] ,
projections [ " Lower_68 " ] . iloc [ idx ] ,
projections [ " Median " ] . iloc [ idx ] ,
projections [ " Upper_68 " ] . iloc [ idx ] ,
projections [ " Upper_95 " ] . iloc [ idx ] ,
]
)
labels . extend ( [ " 95 % Lo wer " , " 68 % Lo wer " , " Median " , " 68 % Upper " , " 95 % Upper " ] )
2024-11-15 00:53:12 +00:00
positions . extend ( [ t ] * 5 )
2024-12-20 07:49:53 +00:00
ax5 . scatter ( positions , ranges , alpha = 0.6 )
2024-11-15 00:53:12 +00:00
for t in timepoints :
idx = positions . index ( t )
2024-12-20 07:49:53 +00:00
ax5 . plot ( [ t ] * 5 , ranges [ idx : idx + 5 ] , " k- " , alpha = 0.3 )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
ax5 . set_yscale ( " log " )
2024-11-15 00:53:12 +00:00
min_price = min ( ranges )
max_price = max ( ranges )
2024-12-20 07:49:53 +00:00
price_points = get_nice_price_points ( min_price , max_price )
ax5 . set_yticks ( price_points )
ax5 . yaxis . set_major_formatter ( plt . FuncFormatter ( format_price ) )
ax5 . set_title ( " Projected Price Ranges at Future Timepoints " )
ax5 . set_xlabel ( " Days Forward " )
ax5 . set_ylabel ( " Price (USD) " )
ax5 . grid ( True , alpha = 0.3 )
ax5 . set_xticks ( timepoints )
2024-11-15 00:53:12 +00:00
# Save the plot
2024-11-15 04:05:27 +00:00
start_str = start if start else plot_df [ " Date " ] . min ( ) . strftime ( " % Y- % m- %d " )
end_str = end if end else plot_df [ " Date " ] . max ( ) . strftime ( " % Y- % m- %d " )
2024-12-20 07:49:53 +00:00
filename = output . named (
f " bitcoin_analysis_ { start_str } _to_ { end_str } _with_projections.png "
)
# Use tight_layout with adjusted parameters
plt . tight_layout ( pad = 2.0 )
2024-11-15 04:05:27 +00:00
plt . savefig ( filename , dpi = 300 , bbox_inches = " tight " )
2024-11-15 00:53:12 +00:00
plt . close ( )
return projections
2024-12-20 07:49:53 +00:00
def visualize_cycle_patterns ( df , output : Output , cycle_returns , cycle_volatility ) :
2024-11-15 00:53:12 +00:00
"""
2024-11-15 04:05:27 +00:00
Create enhanced visualization of Bitcoin ' s behavior across halving cycles.
2024-11-15 00:53:12 +00:00
"""
2024-11-15 04:05:27 +00:00
plt . style . use ( " seaborn-v0_8 " )
fig = plt . figure ( figsize = ( 15 , 15 ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Create a 3x1 subplot grid with different heights
gs = plt . GridSpec ( 3 , 1 , height_ratios = [ 2 , 1 , 2 ] , hspace = 0.3 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Plot 1: Returns across cycle with confidence bands
ax1 = plt . subplot ( gs [ 0 ] )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Convert days to percentage through cycle
x_points = np . array ( cycle_returns . index ) / ( 4 * 365 ) * 100
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate rolling mean and standard deviation for confidence bands
window = 30 # 30-day window
rolling_mean = pd . Series ( cycle_returns . values ) . rolling ( window = window ) . mean ( )
rolling_std = pd . Series ( cycle_returns . values ) . rolling ( window = window ) . std ( )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Plot confidence bands
ax1 . fill_between (
x_points ,
( rolling_mean - 2 * rolling_std ) * 100 ,
( rolling_mean + 2 * rolling_std ) * 100 ,
alpha = 0.2 ,
color = " blue " ,
label = " 95 % Confidence " ,
2024-11-15 00:53:12 +00:00
)
2024-11-15 04:05:27 +00:00
ax1 . fill_between (
x_points ,
( rolling_mean - rolling_std ) * 100 ,
( rolling_mean + rolling_std ) * 100 ,
alpha = 0.3 ,
color = " blue " ,
label = " 68 % Confidence " ,
2024-11-15 00:53:12 +00:00
)
2024-11-15 04:05:27 +00:00
# Plot average returns
ax1 . plot (
x_points ,
cycle_returns . values * 100 ,
" b- " ,
label = " Average Daily Return " ,
linewidth = 2 ,
)
ax1 . axhline ( y = 0 , color = " gray " , linestyle = " -- " , alpha = 0.5 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Add vertical lines for each year in cycle
for year in range ( 1 , 4 ) :
ax1 . axvline ( x = year * 25 , color = " gray " , linestyle = " : " , alpha = 0.3 )
ax1 . text (
year * 25 ,
ax1 . get_ylim ( ) [ 1 ] ,
f " Year { year } " ,
rotation = 90 ,
va = " top " ,
ha = " right " ,
alpha = 0.7 ,
2024-11-15 00:53:12 +00:00
)
2024-11-15 04:05:27 +00:00
# Highlight halving points
ax1 . axvline ( x = 0 , color = " red " , linestyle = " -- " , alpha = 0.5 , label = " Halving Event " )
ax1 . axvline ( x = 100 , color = " red " , linestyle = " -- " , alpha = 0.5 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
ax1 . set_title ( " Bitcoin Return Patterns Across Halving Cycle " , pad = 20 )
ax1 . set_xlabel ( " Position in Cycle ( % ) " )
ax1 . set_ylabel ( " Average Daily Return ( % ) " )
ax1 . grid ( True , alpha = 0.3 )
ax1 . legend ( loc = " upper right " )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Plot 2: Volatility across cycle
ax2 = plt . subplot ( gs [ 1 ] )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate rolling volatility confidence bands
vol_mean = pd . Series ( cycle_volatility . values ) . rolling ( window = window ) . mean ( )
vol_std = pd . Series ( cycle_volatility . values ) . rolling ( window = window ) . std ( )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Plot volatility with confidence bands
annualized_factor = np . sqrt ( 365 ) * 100
ax2 . fill_between (
x_points ,
( vol_mean - 2 * vol_std ) * annualized_factor ,
( vol_mean + 2 * vol_std ) * annualized_factor ,
alpha = 0.2 ,
color = " red " ,
label = " 95 % Confidence " ,
2024-11-15 00:53:12 +00:00
)
2024-11-15 04:05:27 +00:00
ax2 . plot (
x_points ,
cycle_volatility . values * annualized_factor ,
" r- " ,
label = " Annualized Volatility " ,
linewidth = 2 ,
2024-11-15 00:53:12 +00:00
)
2024-11-15 04:05:27 +00:00
# Add year markers
for year in range ( 1 , 4 ) :
ax2 . axvline ( x = year * 25 , color = " gray " , linestyle = " : " , alpha = 0.3 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
ax2 . axvline ( x = 0 , color = " red " , linestyle = " -- " , alpha = 0.5 )
ax2 . axvline ( x = 100 , color = " red " , linestyle = " -- " , alpha = 0.5 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
ax2 . set_xlabel ( " Position in Cycle ( % ) " )
ax2 . set_ylabel ( " Volatility ( % ) " )
ax2 . grid ( True , alpha = 0.3 )
ax2 . legend ( loc = " upper right " )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Plot 3: Average price trajectory within cycles
ax3 = plt . subplot ( gs [ 2 ] )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Define a color scheme for cycles
cycle_colors = [ " #1f77b4 " , " #ff7f0e " , " #2ca02c " , " #d62728 " , " #9467bd " ]
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate average price path for each cycle
halving_dates = get_halving_dates ( )
cycles = [ ]
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
for i in range ( len ( halving_dates ) - 1 ) :
cycle_start = halving_dates [ i ]
cycle_end = halving_dates [ i + 1 ]
cycle_data = df [ ( df [ " Date " ] > = cycle_start ) & ( df [ " Date " ] < cycle_end ) ] . copy ( )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
if len ( cycle_data ) > 0 :
cycle_data [ " Cycle_Pct " ] = (
( cycle_data [ " Date " ] - cycle_start ) . dt . total_seconds ( )
/ ( cycle_end - cycle_start ) . total_seconds ( )
* 100
)
cycle_data [ " Normalized_Price " ] = (
cycle_data [ " Close " ] / cycle_data [ " Close " ] . iloc [ 0 ]
)
cycles . append ( cycle_data )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Plot each historical cycle with distinct colors
for i , cycle in enumerate ( cycles ) :
ax3 . semilogy (
cycle [ " Cycle_Pct " ] ,
cycle [ " Normalized_Price " ] ,
color = cycle_colors [ i ] ,
alpha = 0.7 ,
label = f ' Cycle { i + 1 } ( { cycle [ " Date " ] . iloc [ 0 ] . strftime ( " % Y " ) } - { cycle [ " Date " ] . iloc [ - 1 ] . strftime ( " % Y " ) } ) ' ,
)
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Calculate and plot average cycle
if cycles :
avg_cycle = pd . concat (
[ c . set_index ( " Cycle_Pct " ) [ " Normalized_Price " ] for c in cycles ] , axis = 1
)
avg_cycle_mean = avg_cycle . mean ( axis = 1 )
avg_cycle_std = avg_cycle . std ( axis = 1 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
ax3 . semilogy (
avg_cycle_mean . index ,
avg_cycle_mean . values ,
" k- " ,
linewidth = 2 ,
label = " Average Cycle " ,
)
ax3 . fill_between (
avg_cycle_mean . index ,
avg_cycle_mean * np . exp ( - 2 * avg_cycle_std ) ,
avg_cycle_mean * np . exp ( 2 * avg_cycle_std ) ,
alpha = 0.2 ,
color = " gray " ,
)
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Add year markers
for year in range ( 1 , 4 ) :
ax3 . axvline ( x = year * 25 , color = " gray " , linestyle = " : " , alpha = 0.3 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
ax3 . axvline ( x = 0 , color = " red " , linestyle = " -- " , alpha = 0.5 )
ax3 . axvline ( x = 100 , color = " red " , linestyle = " -- " , alpha = 0.5 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
ax3 . set_title ( " Price Performance Across Cycles (Normalized) " , pad = 20 )
ax3 . set_xlabel ( " Position in Cycle ( % ) " )
ax3 . set_ylabel ( " Price (Relative to Cycle Start) " )
ax3 . grid ( True , alpha = 0.3 )
ax3 . legend ( loc = " center left " , bbox_to_anchor = ( 1.02 , 0.5 ) )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Add current cycle position marker on all plots
current_position = get_cycle_position ( df [ " Date " ] . max ( ) , halving_dates ) * 100
for ax in [ ax1 , ax2 , ax3 ] :
ax . axvline (
x = current_position ,
color = " green " ,
linestyle = " - " ,
alpha = 0.5 ,
label = " Current Position " ,
2024-11-15 00:53:12 +00:00
)
2024-11-15 04:05:27 +00:00
# Main title for the figure
fig . suptitle ( " Bitcoin Halving Cycle Analysis " , fontsize = 16 , y = 0.95 )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Adjust layout to prevent legend cutoff
plt . tight_layout ( )
2024-11-15 00:53:12 +00:00
2024-11-15 04:05:27 +00:00
# Save the plot
2024-12-20 07:49:53 +00:00
filename = output . named ( " bitcoin_cycle_patterns.png " )
plt . savefig ( filename , dpi = 300 , bbox_inches = " tight " )
2024-11-15 04:05:27 +00:00
plt . close ( )
2024-11-15 00:53:12 +00:00
2024-12-20 07:49:53 +00:00
def create_backtest_plot (
df ,
output : Output ,
backtest_date = " 2020-05-11 " ,
start_date = " 2012-11-28 " ,
project_days = 1650 ,
) :
"""
Create a plot comparing actual price history against model projections from a historical date .
Returns both the projections and performance metrics .
Args :
df : DataFrame with historical price data
backtest_date : Date to start the backtest from
start_date : Date to start considering historical data
project_days : Number of days to project forward from backtest date
Returns :
tuple : ( projections DataFrame , metrics dictionary )
"""
# Convert dates to datetime
backtest_date = pd . to_datetime ( backtest_date )
start_date = pd . to_datetime ( start_date )
# Validate dates
if start_date > = backtest_date :
raise ValueError ( " start_date must be earlier than backtest_date " )
# Clean the data: remove rows with zero or invalid prices and filter by date
df = df [ ( df [ " Close " ] > 0 ) & ( df [ " Date " ] > = start_date ) ] . copy ( )
# Split data into training (before backtest date) and validation (after backtest date)
training_df = df [ df [ " Date " ] < = backtest_date ] . copy ( )
validation_df = df [ df [ " Date " ] > backtest_date ] . copy ( )
# Check if we have enough data
if len ( training_df ) < 30 : # Require at least 30 days of training data
raise ValueError ( " Insufficient training data before backtest date " )
if len ( validation_df ) < project_days :
warnings . warn (
f " Validation period ( { len ( validation_df ) } days) shorter than projection period ( { project_days } days) "
)
# Generate historical projections using only training data
historical_projections = project_prices ( training_df , days_forward = project_days )
# Set up the plot
plt . style . use ( " seaborn-v0_8 " )
_ , ax = plt . figure ( figsize = ( 15 , 10 ) ) , plt . gca ( )
# Plot training data
heading_label = f ' Historical Price (Training: { start_date . strftime ( " % Y- % m- %d " ) } to { backtest_date . strftime ( " % Y- % m- %d " ) } ) '
ax . semilogy (
training_df [ " Date " ] ,
training_df [ " Close " ] ,
" b- " ,
label = heading_label ,
alpha = 0.7 ,
)
# Plot validation data
ax . semilogy (
validation_df [ " Date " ] ,
validation_df [ " Close " ] ,
" g- " ,
label = f ' Actual Price (Validation: { backtest_date . strftime ( " % Y- % m- %d " ) } onwards) ' ,
linewidth = 2 ,
)
# Plot projections
ax . semilogy (
historical_projections . index ,
historical_projections [ " Expected_Trend " ] ,
" -- " ,
color = " purple " ,
label = " Model Projection (Expected) " ,
)
ax . semilogy (
historical_projections . index ,
historical_projections [ " Median " ] ,
" : " ,
color = " orange " ,
label = " Model Projection (Median) " ,
)
# Add confidence intervals
ax . fill_between (
historical_projections . index ,
historical_projections [ " Lower_95 " ] ,
historical_projections [ " Upper_95 " ] ,
alpha = 0.2 ,
color = " orange " ,
label = " 95 % Confidence Interval " ,
)
ax . fill_between (
historical_projections . index ,
historical_projections [ " Lower_68 " ] ,
historical_projections [ " Upper_68 " ] ,
alpha = 0.3 ,
color = " green " ,
label = " 68 % Confidence Interval " ,
)
# Customize y-axis
ax . yaxis . set_major_formatter ( plt . FuncFormatter ( format_price ) )
# Set custom y-axis ticks
min_price = min (
df [ " Low " ] . min ( ) ,
historical_projections [ " Lower_95 " ] . min ( ) ,
0.0001 , # Set minimum price floor
)
max_price = max ( df [ " High " ] . max ( ) , historical_projections [ " Upper_95 " ] . max ( ) )
price_points = get_nice_price_points ( min_price , max_price )
ax . set_yticks ( price_points )
# Add halving lines
halving_dates = get_halving_dates ( )
relevant_halvings = halving_dates [
( halving_dates > = start_date ) & ( halving_dates < = validation_df [ " Date " ] . max ( ) )
]
for date in relevant_halvings :
ax . axvline ( date , color = " red " , linestyle = " -- " , alpha = 0.3 )
ax . text (
date ,
ax . get_ylim ( ) [ 1 ] ,
" Halving " ,
rotation = 90 ,
va = " top " ,
ha = " right " ,
alpha = 0.7 ,
)
# Calculate model performance metrics
metrics = { }
if len ( validation_df ) > 0 :
# Create a common date range for comparison
actual_prices = validation_df . set_index ( " Date " ) [ " Close " ]
common_dates = actual_prices . index . intersection ( historical_projections . index )
if len ( common_dates ) > 0 :
actual_aligned = actual_prices [ common_dates ]
projections_aligned = historical_projections . loc [ common_dates ]
# Calculate metrics using aligned data
metrics = {
" mape " : np . mean (
np . abs (
( actual_aligned - projections_aligned [ " Expected_Trend " ] )
/ actual_aligned
)
)
* 100 ,
" rmse " : np . sqrt (
np . mean (
( actual_aligned - projections_aligned [ " Expected_Trend " ] ) * * 2
)
) ,
" max_error " : np . max (
np . abs ( actual_aligned - projections_aligned [ " Expected_Trend " ] )
) ,
" coverage_95 " : np . mean (
( actual_aligned > = projections_aligned [ " Lower_95 " ] )
& ( actual_aligned < = projections_aligned [ " Upper_95 " ] )
)
* 100 ,
" coverage_68 " : np . mean (
( actual_aligned > = projections_aligned [ " Lower_68 " ] )
& ( actual_aligned < = projections_aligned [ " Upper_68 " ] )
)
* 100 ,
}
# Add metrics to plot
metrics_text = (
f " Model Performance Metrics: \n "
f " MAPE: { metrics [ ' mape ' ] : .1f } % \n "
f " RMSE: $ { metrics [ ' rmse ' ] : ,.0f } \n "
f " Max Error: $ { metrics [ ' max_error ' ] : ,.0f } \n "
f " 95% CI Coverage: { metrics [ ' coverage_95 ' ] : .1f } % \n "
f " 68% CI Coverage: { metrics [ ' coverage_68 ' ] : .1f } % "
)
ax . text (
0.02 ,
0.98 ,
metrics_text ,
transform = ax . transAxes ,
verticalalignment = " top " ,
bbox = dict ( facecolor = " white " , alpha = 0.8 ) ,
)
# Customize plot
ax . set_title (
f ' Bitcoin Price: Model Backtest \n Training: { start_date . strftime ( " % Y- % m- %d " ) } to { backtest_date . strftime ( " % Y- % m- %d " ) } '
)
ax . set_xlabel ( " Date " )
ax . set_ylabel ( " Price (USD) " )
ax . grid ( True , which = " major " , linestyle = " - " , alpha = 0.5 )
ax . grid ( True , which = " minor " , linestyle = " : " , alpha = 0.2 )
ax . legend ( loc = " center left " , bbox_to_anchor = ( 1.02 , 0.5 ) )
# Adjust layout and save
plt . tight_layout ( )
filename = output . named (
f ' bitcoin_backtest_ { start_date . strftime ( " % Y % m %d " ) } _to_ { backtest_date . strftime ( " % Y % m %d " ) } .png '
)
plt . savefig ( filename , dpi = 300 , bbox_inches = " tight " )
plt . close ( )
return historical_projections , metrics
def run_projection ( args ) :
df , start , output = args
_ = create_plots ( df , output , start = start , project_days = 365 * 4 )
def run_projections ( df , output : Output ) :
# Create main projection
projection_starts = [
" 2011-01-01 " ,
" 2012-01-01 " ,
" 2013-01-01 " ,
" 2014-01-01 " ,
" 2015-01-01 " ,
" 2016-07-09 " ,
]
args = [ ( df , start , output ) for start in projection_starts ]
with Pool ( ) as pool :
pool . map ( run_projection , args )
def run_single_backtest ( args ) :
"""
Run a single backtest with the given parameters .
Must be defined at module level for multiprocessing .
Args :
args : tuple of ( params dict , DataFrame )
"""
params , df , output = args
try :
# Create a copy of params without the description
backtest_params = params . copy ( )
backtest_params . pop ( " description " , None )
projections , metrics = create_backtest_plot ( df , output , * * backtest_params )
# Ensure metrics has all required keys with default values
if metrics is None :
metrics = { }
default_metrics = {
" mape " : 0.0 ,
" rmse " : 0.0 ,
" max_error " : 0.0 ,
" coverage_95 " : 0.0 ,
" coverage_68 " : 0.0 ,
}
# Update metrics with defaults for any missing keys
metrics = { * * default_metrics , * * metrics }
return {
" params " : params ,
" projections " : projections ,
" metrics " : metrics ,
" success " : True ,
}
except Exception as e :
print (
f " Error in backtest for period { params [ ' description ' ] } : { str ( e ) } "
) # Debug print
return { " params " : params , " error " : str ( e ) , " success " : False }
def run_systematic_backtests (
df , output : Output , validation_years = 2 , min_training_years = 8
) :
"""
Run a comprehensive suite of backtests with consistent validation periods .
Uses sliding windows for both start and end dates .
"""
# Convert years to days
validation_days = validation_years * 365
min_training_days = min_training_years * 365
# Define start date for reliable data
mature_start = pd . Timestamp ( " 2011-01-01 " )
last_possible_start = df [ " Date " ] . max ( ) - pd . Timedelta (
days = min_training_days + validation_days
)
end_date = df [ " Date " ] . max ( ) - pd . Timedelta ( days = validation_days )
if mature_start > = last_possible_start :
raise ValueError (
f " Insufficient data for backtesting with current parameters: \n "
f " - Data range: { mature_start } to { df [ ' Date ' ] . max ( ) } \n "
f " - Minimum training period: { min_training_years } years \n "
f " - Validation period: { validation_years } years "
)
old_backtests = [
{
" start_date " : " 2016-07-09 " ,
" backtest_date " : " 2024-04-19 " ,
" project_days " : validation_days ,
" description " : " Second until fourth halving " ,
} ,
{
" start_date " : " 2013-01-01 " , # Includes pre-futures for cycle learning
" backtest_date " : " 2020-05-11 " ,
" project_days " : validation_days ,
" description " : " Post-Futures Window with two cycles of training " ,
} ,
{
" start_date " : " 2014-01-01 " ,
" backtest_date " : " 2021-12-31 " ,
" project_days " : validation_days ,
" description " : " Cross-Regime Test with two cycles of training " ,
} ,
{
" start_date " : " 2015-01-01 " ,
" backtest_date " : " 2022-01-01 " ,
" project_days " : validation_days ,
" description " : " Recent Window focusing on post-2022 behavior " ,
} ,
]
backtest_periods = [ ]
backtest_periods . extend ( old_backtests )
# Generate backtest periods with sliding windows
window_start = mature_start
step = pd . Timedelta ( days = 180 ) # 6 month steps
while window_start < = last_possible_start :
backtest_date = window_start + pd . Timedelta ( days = min_training_days )
backtest_periods . append (
{
" start_date " : window_start . strftime ( " % Y- % m- %d " ) ,
" backtest_date " : backtest_date . strftime ( " % Y- % m- %d " ) ,
" project_days " : validation_days ,
" description " : f " Training { window_start . strftime ( ' % Y- % m- %d ' ) } to { backtest_date . strftime ( ' % Y- % m- %d ' ) } " ,
}
)
window_start + = step
# Add specific periods of interest
special_periods = [ ]
# Halving-based periods
halving_dates = get_halving_dates ( )
relevant_halvings = [
h
for h in halving_dates
if h < end_date and h > ( mature_start + pd . Timedelta ( days = min_training_days ) )
]
for halving in relevant_halvings :
earliest_start = halving - pd . Timedelta ( days = min_training_days )
if earliest_start > = mature_start :
special_periods . append (
{
" start_date " : earliest_start . strftime ( " % Y- % m- %d " ) ,
" backtest_date " : halving . strftime ( " % Y- % m- %d " ) ,
" project_days " : validation_days ,
" description " : f " Pre-halving { halving . strftime ( ' % Y ' ) } " ,
}
)
# Market structure change periods
important_dates = [
( " 2017-12-01 " , " Post-futures introduction " ) ,
( " 2020-03-01 " , " Post-COVID crash " ) ,
( " 2021-11-01 " , " Post-2021 peak " ) ,
]
for date , description in important_dates :
test_date = pd . Timestamp ( date )
if test_date < end_date :
earliest_start = test_date - pd . Timedelta ( days = min_training_days )
if earliest_start > = mature_start :
special_periods . append (
{
" start_date " : earliest_start . strftime ( " % Y- % m- %d " ) ,
" backtest_date " : date ,
" project_days " : validation_days ,
" description " : description ,
}
)
# Combine and remove any duplicates
all_periods = backtest_periods + special_periods
unique_periods = [ ]
seen_dates = set ( )
for period in all_periods :
key = f " { period [ ' start_date ' ] } _ { period [ ' backtest_date ' ] } "
if key not in seen_dates :
unique_periods . append ( period )
seen_dates . add ( key )
if not unique_periods :
raise ValueError ( " No valid backtest periods found with current parameters " )
# Sort periods by backtest date for clearer analysis
unique_periods . sort ( key = lambda x : pd . Timestamp ( x [ " backtest_date " ] ) )
print ( " \n Running backtests with: " )
print (
f " - Start dates range: { unique_periods [ 0 ] [ ' start_date ' ] } to { unique_periods [ - 1 ] [ ' start_date ' ] } "
)
print (
f " - Backtest dates range: { unique_periods [ 0 ] [ ' backtest_date ' ] } to { unique_periods [ - 1 ] [ ' backtest_date ' ] } "
)
print ( f " - Minimum training period: { min_training_years } years " )
print ( f " - Validation period: { validation_years } years " )
print ( f " - Number of test periods: { len ( unique_periods ) } " )
print ( " \n Test periods: " )
for period in unique_periods :
print ( f " - { period [ ' description ' ] } " )
# Create args tuples with params and DataFrame
args = [ ( params , df , output ) for params in unique_periods ]
# Use multiprocessing
with Pool ( ) as pool :
results = pool . map ( run_single_backtest , args )
# Analyze results
successful_tests = [ r for r in results if r [ " success " ] ]
failed_tests = [ r for r in results if not r [ " success " ] ]
# Define stress periods
stress_periods = {
# COVID crash and recovery
( " 2020-03-01 " , " 2020-09-01 " ) : " COVID crash period " ,
# 2021 peak and subsequent crash
( " 2021-11-01 " , " 2022-06-01 " ) : " 2021 peak aftermath " ,
# Add more stress periods as needed
}
def is_stress_period ( test_date ) :
""" Check if a test date falls in any stress period """
test_date = pd . Timestamp ( test_date )
for ( start , end ) , _ in stress_periods . items ( ) :
if pd . Timestamp ( start ) < = test_date < = pd . Timestamp ( end ) :
return True
return False
# Categorize results
normal_periods = [ ]
stress_periods_results = [ ]
for result in successful_tests :
if is_stress_period ( result [ " params " ] [ " backtest_date " ] ) :
stress_periods_results . append ( result )
else :
normal_periods . append ( result )
# Calculate metrics for each category
def calculate_category_metrics ( results ) :
if not results :
return None
return {
" count " : len ( results ) ,
" mape " : np . mean ( [ r [ " metrics " ] [ " mape " ] for r in results ] ) ,
" rmse " : np . mean ( [ r [ " metrics " ] [ " rmse " ] for r in results ] ) ,
" max_error " : np . mean ( [ r [ " metrics " ] [ " max_error " ] for r in results ] ) ,
" coverage_95 " : np . mean ( [ r [ " metrics " ] [ " coverage_95 " ] for r in results ] ) ,
" coverage_68 " : np . mean ( [ r [ " metrics " ] [ " coverage_68 " ] for r in results ] ) ,
}
normal_metrics = calculate_category_metrics ( normal_periods )
stress_metrics = calculate_category_metrics ( stress_periods_results )
# Write detailed results
with output . create ( " bitcoin_backtest_results_summary.txt " ) as f :
f . write ( " Systematic Backtest Results \n " )
f . write ( " ========================== \n \n " )
f . write ( " Configuration: \n " )
f . write ( f " - Minimum training period: { min_training_years } years \n " )
f . write ( f " - Validation period: { validation_years } years \n " )
f . write (
f " - Start dates range: { unique_periods [ 0 ] [ ' start_date ' ] } to { unique_periods [ - 1 ] [ ' start_date ' ] } \n "
)
f . write (
f " - Backtest dates range: { unique_periods [ 0 ] [ ' backtest_date ' ] } to { unique_periods [ - 1 ] [ ' backtest_date ' ] } \n "
)
f . write ( f " - Number of test periods: { len ( unique_periods ) } \n \n " )
# Normal Periods
f . write ( " Normal Market Periods \n " )
f . write ( " ==================== \n " )
f . write ( f " Number of periods: { len ( normal_periods ) } \n \n " )
for result in normal_periods :
f . write ( " \n " + " = " * 50 + " \n " )
f . write ( f " Period: { result [ ' params ' ] [ ' description ' ] } \n " )
f . write (
f " Training: { result [ ' params ' ] [ ' start_date ' ] } to { result [ ' params ' ] [ ' backtest_date ' ] } \n "
)
f . write (
f " Validation: { result [ ' params ' ] [ ' backtest_date ' ] } to { pd . Timestamp ( result [ ' params ' ] [ ' backtest_date ' ] ) + pd . Timedelta ( days = validation_years * 365 ) : %Y-%m-%d } \n "
)
f . write ( " \n Metrics: \n " )
for metric , value in result [ " metrics " ] . items ( ) :
if metric in [ " mape " , " coverage_95 " , " coverage_68 " ] :
f . write ( f " - { metric } : { value : .1f } % \n " )
else :
f . write ( f " - { metric } : $ { value : ,.0f } \n " )
f . write ( " \n " )
if normal_metrics :
f . write ( " \n Normal Periods Aggregate Metrics: \n " )
f . write ( f " MAPE: { normal_metrics [ ' mape ' ] : .1f } % \n " )
f . write ( f " RMSE: $ { normal_metrics [ ' rmse ' ] : ,.0f } \n " )
f . write ( f " Average Max Error: $ { normal_metrics [ ' max_error ' ] : ,.0f } \n " )
f . write ( f " 95% CI Coverage: { normal_metrics [ ' coverage_95 ' ] : .1f } % \n " )
f . write ( f " 68% CI Coverage: { normal_metrics [ ' coverage_68 ' ] : .1f } % \n " )
# Stress Periods
f . write ( " \n \n Stress Periods \n " )
f . write ( " ============= \n " )
f . write ( f " Number of periods: { len ( stress_periods_results ) } \n \n " )
for result in stress_periods_results :
f . write ( " \n " + " = " * 50 + " \n " )
f . write ( f " Period: { result [ ' params ' ] [ ' description ' ] } \n " )
f . write (
f " Training: { result [ ' params ' ] [ ' start_date ' ] } to { result [ ' params ' ] [ ' backtest_date ' ] } \n "
)
f . write (
f " Validation: { result [ ' params ' ] [ ' backtest_date ' ] } to { pd . Timestamp ( result [ ' params ' ] [ ' backtest_date ' ] ) + pd . Timedelta ( days = validation_years * 365 ) : %Y-%m-%d } \n "
)
f . write ( " \n Metrics: \n " )
for metric , value in result [ " metrics " ] . items ( ) :
if metric in [ " mape " , " coverage_95 " , " coverage_68 " ] :
f . write ( f " - { metric } : { value : .1f } % \n " )
else :
f . write ( f " - { metric } : $ { value : ,.0f } \n " )
f . write ( " \n " )
if stress_metrics :
f . write ( " \n Stress Periods Aggregate Metrics: \n " )
f . write ( f " MAPE: { stress_metrics [ ' mape ' ] : .1f } % \n " )
f . write ( f " RMSE: $ { stress_metrics [ ' rmse ' ] : ,.0f } \n " )
f . write ( f " Average Max Error: $ { stress_metrics [ ' max_error ' ] : ,.0f } \n " )
f . write ( f " 95% CI Coverage: { stress_metrics [ ' coverage_95 ' ] : .1f } % \n " )
f . write ( f " 68% CI Coverage: { stress_metrics [ ' coverage_68 ' ] : .1f } % \n " )
return (
normal_metrics ,
stress_metrics ,
normal_periods ,
stress_periods_results ,
failed_tests ,
)
# CLI
def get_args ( ) - > argparse . Namespace :
parser = argparse . ArgumentParser (
prog = " model " ,
description = " Bitcoin price model " ,
)
parser . add_argument (
" -o " ,
" --output " ,
help = " output base directory " ,
default = " ./output " ,
)
parser . add_argument (
" -n " ,
" --name " ,
help = " subdir of output base directory " ,
default = " baseline " ,
)
return parser . parse_args ( )
def main ( ) :
args = get_args ( )
global output
output = Output ( args . output , args . name )
2024-11-15 00:53:12 +00:00
analysis , df = analyze_bitcoin_prices ( " prices.csv " )
2024-12-20 07:49:53 +00:00
run_projections ( df , output )
normal_metrics , stress_metrics , normal_results , stress_results , failed_tests = (
run_systematic_backtests ( df , output )
)
print ( " \n Aggregate Metrics: " )
print ( f " Total backtests run: { normal_metrics [ ' count ' ] } " )
print ( f " Successful tests: { len ( normal_results ) } " )
print ( f " Failed tests: { len ( failed_tests ) } " )
print ( " \n Average Performance: " )
print ( f " MAPE: { normal_metrics [ ' mape ' ] : .1f } % " )
print ( f " RMSE: $ { normal_metrics [ ' rmse ' ] : ,.0f } " )
print ( f " 95% CI Coverage: { normal_metrics [ ' coverage_95 ' ] : .1f } % " )
print ( f " 68% CI Coverage: { normal_metrics [ ' coverage_68 ' ] : .1f } % " )
print ( " \n Failed Tests: " )
for test in failed_tests :
print ( f " Period: { test [ ' params ' ] [ ' description ' ] } " )
print ( f " Error: { test [ ' error ' ] } \n " )
if __name__ == " __main__ " :
main ( )