%matplotlib inline
%config InlineBackend.figure_format = 'svg'


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.interpolate import interp1d
from scipy.integrate import trapz


wrightwood_eqs = pd.read_csv('wrightwood_eq_times.csv')

wrightwood_eqs.head()


plt.figure(figsize=(6,6))

plt.errorbar(wrightwood_eqs.index + 1, wrightwood_eqs.age_mean,
             yerr=np.array((wrightwood_eqs.age_mean - wrightwood_eqs.age_5,
                     wrightwood_eqs.age_95 - wrightwood_eqs.age_mean)),
             fmt='.')
plt.ylabel('Calendar Year')
plt.xlabel('Earthquake in sequence')
plt.title('Wrightwood Earthquake Chronology')

plt.show()


plt.figure(figsize=(8,3))
plt.subplot(121)

plt.hist(old_rec_ints.ravel(), bins=100, normed=True,
         alpha=0.5, histtype='stepfilled', label='old')
plt.hist(young_rec_ints.ravel(), bins=100, normed=True,
         alpha=0.5, histtype='stepfilled', label='young')
plt.legend()

plt.xlabel('Wrightwood Recurrence intervals')

plt.subplot(122)

plt.hist(old_rec_ints.ravel(), bins=100, normed=True, cumulative=True,
         alpha=0.5, histtype='step', label='old')
plt.hist(young_rec_ints.ravel(), bins=100, normed=True, cumulative=True,
         alpha=0.5, histtype='step', label='young')
plt.ylim([0,1])
plt.legend()

plt.xlabel('Wrightwood Recurrence intervals')
plt.show()


wr_rec_ints = np.hstack([old_rec_ints.ravel(), 
                         young_rec_ints.ravel()])


plt.hist(wr_rec_ints, bins=100, normed=True,
         histtype='stepfilled')
plt.xlabel('Southern San Andreas Recurrence Intervals')
plt.show()


wr_int_probs, wr_int_vals = np.histogram(wr_rec_ints, 
                                         bins=100, normed=True)
wr_int_probs = np.append(0., wr_int_probs)


plt.figure(figsize=(8,3))
plt.subplot(121)
plt.plot(wr_int_vals, wr_int_probs)
plt.xlabel('Southern San Andreas Recurrence Intervals')

plt.subplot(122)
plt.plot(wr_int_vals, np.cumsum(wr_int_probs)/ np.sum(wr_int_probs))
plt.gca().axvline(2016-1857, color='grey')
plt.gca().set_ylim([0,1])

plt.show()


def pdf(t):
    pdf_ = interp1d(wr_int_vals, wr_int_probs, kind='cubic',
                   bounds_error=False, fill_value=0.)
    return pdf_(t)


def cdf(t):
    cdf_ = interp1d(wr_int_vals, 
                    np.cumsum(wr_int_probs) / np.sum(wr_int_probs),
                    kind='cubic', bounds_error=False, fill_value=1.)
    return cdf_(t)


def S(t):
    return 1 - cdf(t)


def λ(t):
    return pdf(t) / S(t)


def mean_rec_interval(t):
    return trapz(S(t), t)


λ(2016-1857)

0.020610501157809748


ts = np.arange(1,370) # vector of years since last event (starting from 0)


plt.plot(ts + 1857, λ(ts))
plt.gca().set_yscale('log')
plt.gca().axvline(2016, color='grey')
plt.gca().axhline(λ(2016-1857), color='grey', linestyle='--')

plt.title('Earthquake hazard on the southern San Andreas,\n'+
          'given no earthquake since 1857')
plt.xlabel('Year')
plt.ylabel('annual earthquake probability')

plt.ylim([1e-3, 1])

plt.show()


def mas(t,t_e):
    return t[t>t_e]


def menos(t,t_e):
    return t[t<=t_e]


def S_cond(t, t_e, return_past=True):
    
    t_ = mas(t, t_e)
    _t = menos(t, t_e)
    
    S_conds = S(t_) / S(t_e)
    
    if return_past == True:
        return np.hstack([np.ones(len(_t)), S_conds])
    else:
        return S_conds
    
    
def pdf_cond(t, t_e):
    
    diffs = np.diff( 1 - S_cond(t, t_e))
    
    return np.append(0., diffs)

    
def mean_time_remaining(t,t_e):
    
    t_ = mas(t, t_e)
    return trapz(S_cond(t_, t_e, return_past=False), t_)


plt.plot(ts + 1857, pdf_cond(ts, 159))
plt.xlim([2015, 2250])
plt.ylim([0, 0.02])

plt.xlabel('Year')
plt.ylabel('probability')
plt.title('Probability of next southern San Andreas earthquake by year')
plt.show()


mean_time_remaining(ts, 159)

38.207254921781896


w_young = wrightwood_eqs[wrightwood_eqs.age_mean > 0]
w_old = wrightwood_eqs[wrightwood_eqs.age_mean < 0]


np.mean(np.diff(w_young.age_mean)), np.mean(np.diff(w_old.age_mean)),

(101.76923076923077, 108.61538461538461)


def make_eq_pdf(age_min, age_mean, age_max):
    
    age_break = get_age_break(age_min, age_max, age_mean)
    
    xs = np.array([age_min, age_break, age_max])
    
    return xs, triangular_pdf(xs, *xs)
    
    
def triangular_pdf(x, age_min, age_break, age_max):
    
    x = np.asarray(x, dtype=float)
    
    p = lambda x: np.piecewise(x, [x <= age_min, 
                                   (age_min < x) & (x <= age_break),
                                   (age_break < x) & (x <= age_max),
                                   x > age_max],
        [0,
         lambda x: 2 * (x - age_min) / ((age_max - age_min) * (age_break - age_min)),
         lambda x: 2 * (age_max - x) / ((age_max - age_min) * (age_max - age_break)),
         0])
    
    return p(x)


def get_age_break(age_min, age_max, age_mean):
     return 3 * age_mean - age_min - age_max


plt.plot(*make_eq_pdf(*wrightwood_eqs.ix[16, ['age_5', 'age_mean', 'age_95']]))
plt.xlabel('Calendar Year')
plt.ylabel('Probability')

plt.title('Timing of Earthquake {}'.format(wrightwood_eqs.ix[16, 'eq']))

plt.show()


def make_pdf(vals, probs, n_interp=1000):

    val_min = np.min(vals)
    val_max = np.max(vals)
    
    # if the PDF is just a point (no uncertainty)
    if val_min == val_max: 
        return val_min, 1.
    
    # if not...
    else:
        pdf_range = np.linspace(val_min, val_max, n_interp)

        pmf = interp1d(vals, probs)
        pmf_samples = pmf(pdf_range)
        pdf_probs = pmf_samples / np.sum(pmf_samples) # normalize

    return pdf_range, pdf_probs


def make_cdf(pdf_range, pdf_probs):
    return (pdf_range, np.cumsum(pdf_probs))


def inverse_transform_sample(vals, probs, n_samps, n_interp=1000):
    
    pdf_range, pdf_probs = make_pdf(vals, probs, n_interp)
    cdf_range, cdf_probs = make_cdf(pdf_range, pdf_probs)

    if len(cdf_probs) == 1:
        return np.ones(n_samps) * pdf_range
    
    else:
        cdf_interp = interp1d(cdf_probs, cdf_range, bounds_error=False,
                              fill_value=0.)
        samps = np.random.rand(n_samps)

        return cdf_interp(samps)
    

def sample_eq_pdf(row, n_samps):
    eq_vals, eq_probs = make_eq_pdf(row['age_5'], row['age_mean'], 
                                    row['age_95'])
    
    return inverse_transform_sample(eq_vals, eq_probs, n_samps)


plt.figure(figsize=(8,3))
plt.subplot(121)

plt.plot(*make_eq_pdf(*wrightwood_eqs.ix[16, ['age_5', 'age_mean', 'age_95']]))
plt.xlabel('Calendar Year')
plt.ylabel('Probability')
plt.title('Timing of Earthquake {}, PDF'.format(wrightwood_eqs.ix[16, 'eq']))

plt.subplot(122)
plt.hist(sample_eq_pdf(wrightwood_eqs.ix[16], 50000), bins=20, normed=True) 
plt.xlabel('Calendar Year')
plt.title('Timing of Earthquake {}, Samples'.format(wrightwood_eqs.ix[16, 'eq']))

plt.show()


# Make EQ time series

n_samples = int(1e6)

# sample each earthquake n_samples times, then
# transpose so each row is an earthquake time series
old_times = np.array([sample_eq_pdf(eq[1], n_samples) 
                      for eq in w_old.iterrows()]).T

young_times = np.array([sample_eq_pdf(eq[1], n_samples) 
                        for eq in w_young.iterrows()]).T


def recurrence_intervals(times):
    # sort each row, calculate the time differences
    return np.diff(np.sort(times, axis=1), axis=1)


old_rec_ints = recurrence_intervals(old_times)
young_rec_ints = recurrence_intervals(young_times)

	eq	age_mean	age_5	age_95
0	W350	-2915	-2974	-2883
1	W380	-2746	-2807	-2695
2	W390	-2657	-2742	-2601
3	W402	-2610	-2670	-2569
4	W410	-2503	-2561	-2450

Earthquake recurrence and survival analysis: How long should we wait for an overdue earthquake?

Richard Styron

Wrightwood earthquake data¶

Time-dependent earthquake forecasting on the southern San Andreas¶

Earthquake hazards each year¶

Expected time until the next earthquake¶

Appendix: Calculating earthquake recurrence probabilities¶

Making earthquake timing probabilities¶

Earthquake recurrence from synthetic earthquake time series¶