%matplotlib inline
%config InlineBackend.figure_format = 'retina'


import json
import time
import numpy as np
from scipy.stats.mstats import gmean
import matplotlib.pyplot as plt
import pandas as pd
import pyproj as pj


from multiprocessing import Pool


import matplotlib
matplotlib.rcParams.update({'font.size': 16})


with open('../data/transect_line.geojson', 'r') as f:
    transect_pts_gj = json.load(f)
    transect_coords = transect_pts_gj['features'][0]['geometry']['coordinates']
                       
transect_pts = pd.DataFrame(data=transect_coords,
                            columns=['lon', 'lat'],
                            index=['p1', 'p2'])


transect_pts # these are the end points of the line.


# convert to utm45

utm45 = pj.Proj(init='epsg:32645')
wgs84 = pj.Proj(init='epsg:4326')

transect_pts['easting'], transect_pts['northing'] = pj.transform(wgs84, utm45,
                                                                 transect_pts['lon'].values,
                                                                 transect_pts['lat'].values)


transect_pts


topo_pts = pd.read_csv('../data/himalaya_topo_data_wgs84.csv',
                       names=['lon', 'lat', 'elev'])


topo_pts.tail()


plt.figure(figsize=(6,10))
plt.scatter(topo_pts.lon, topo_pts.lat,
            c=topo_pts.elev,
            lw=0, s=20)
plt.plot(transect_pts['lon'], transect_pts['lat'], 'ro--')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.colorbar(label='Elevation (m)')
plt.axis('equal')
plt.show()


# Project these points into UTM 45N
topo_pts['easting'], topo_pts['northing'] = pj.transform(wgs84, utm45, 
                                                         topo_pts.lon.values,
                                                         topo_pts.lat.values)


plt.figure(figsize=(6,10))
plt.scatter(topo_pts.easting, topo_pts.northing,
            c=topo_pts.elev,
            lw=0, s=20)

plt.plot(transect_pts['easting'], transect_pts['northing'], 'ro--')

plt.colorbar(label='Elevation (m)')
plt.xlabel('Easting (m)')
plt.ylabel('Northing (m)')
plt.axis('equal')
plt.show()


topo_pts['rel_east'] = topo_pts['easting'] - transect_pts.loc['p2','easting']
topo_pts['rel_north'] = topo_pts['northing'] - transect_pts.loc['p2','northing']


plt.figure(figsize=(6,10))
plt.scatter(topo_pts.rel_east, topo_pts.rel_north,
            c=topo_pts.elev,
            lw=0, s=20)


plt.colorbar(label='Elevation (m)')
plt.xlabel('east (m)')
plt.ylabel('north (m)')
plt.axis('equal')
plt.show()


tran_angle = np.arctan2((transect_pts.loc['p1','northing']
                         -transect_pts.loc['p2','northing']),
                       (transect_pts.loc['p1','easting']
                        -transect_pts.loc['p2','easting']))

rot_angle = -tran_angle


rot_angle

-1.3140380682025581


# rotation matrix
R = np.array([[np.cos(rot_angle), -np.sin(rot_angle)],
              [np.sin(rot_angle),  np.cos(rot_angle)]])

# do the rotation
rot_pts = np.dot(R, topo_pts[['rel_east', 'rel_north']].values.T)

# add pts back to dataframe
topo_pts['x'] = rot_pts[0,:]
topo_pts['y'] = rot_pts[1,:]


plt.figure(figsize=(10,6))
plt.scatter(topo_pts.x, topo_pts.y,
            c=topo_pts.elev,
            lw=0, s=20)

plt.colorbar(orientation='horizontal', label='Elevation (m)')
plt.xlabel('transect distance (m)')
plt.ylabel('off-transect distance (m)')
plt.axis('equal')
plt.show()


plt.figure(figsize=(10,4))

plt.plot(topo_pts.x, topo_pts.elev, 'k,', alpha=0.3)
plt.xlabel('transect distance (m)')
plt.ylabel('elevation (m)')
plt.show()


def get_wedge_slope(F, W, detach_dip=np.radians(10),
                    fluid_density=0., rock_density=2.7):
    
    denominator = (1 - fluid_density / rock_density) + W
    
    alpha = (F - W * detach_dip) / denominator
    
    return alpha


def pred_elev(x, x_toe, wedge_slope):
    
    return np.tan(wedge_slope) * (x-x_toe)


plt.figure(figsize=(10,4))

plt.plot(topo_pts.x, topo_pts.elev, 'k,', alpha=0.2)

plt.plot(np.linspace(50000,175000), 
         pred_elev(np.linspace(50000,175000),
                   50000,
                   get_wedge_slope(0.1, 0.25)),
         'r')

plt.xlabel('distance (m)')
plt.ylabel('elevation (m)')

plt.show()


def obs_error(x, y, x_toe, x_top, F, W):
    x_ = x[(x >= x_toe) & (x <= x_top)]

    wedge_slope = get_wedge_slope(F, W)
    
    pred_y = pred_elev(x_, x_toe, wedge_slope)
    
    error = y - pred_y
     
    return error


# testing with pretty-good fitting parameters
oe = obs_error(topo_pts.x, topo_pts.elev, 50000, 175000, 0.1, 0.25)


plt.figure(figsize=(10,3))

plt.plot(topo_pts.x, oe,
        'k,', alpha=0.3)

plt.xlabel('distance (m)')
plt.ylabel('misfit (m)')

plt.show()


oe.mean()

240.80273019738905


(oe-oe.mean()).hist(bins=50)
plt.xlabel('data misfit (m)')
plt.show()


# find approximate data variability
err = oe.std()


# Downsample data (speeds up calculations)

topo_pts_d = topo_pts.sample(frac=0.01)


def pt_likelihood(obs_err, err_=err):
    like_term_1 = 1 / np.sqrt(2 * np.pi * err**2)
    like_term_2 = np.exp(-(obs_err**2) / (2 * err_**2))
    
    return like_term_1 * like_term_2


plt.figure(figsize=(10,6))

plt.plot(np.linspace(-2000, 2000, 100),
        pt_likelihood(np.linspace(-2000, 2000, 100)),
        label='likelihood function')

plt.scatter(-750, pt_likelihood(-750), 
            c='r', label='modeled data')

plt.xlabel('data misfit')
plt.ylabel('model likelihood')
plt.legend()

plt.show()


def geom_mean_likelihood(obs_error, err_=err):
    in_bounds_errs = obs_error.dropna()
    
    likelihoods = pt_likelihood(in_bounds_errs, err_)
    
    return gmean(likelihoods)


def calc_likelihood(x, y, x_toe, x_top, F, W):
    
    return geom_mean_likelihood(obs_error(x, y, x_toe, x_top, F, W))


n_iters = int(1e5)

x_toe = 50000   # front of the wedge
x_top = 175000  # back of the wedge

F_min = 0.
F_max = 1.

W_min = 0.
W_max = 3.

priors = pd.DataFrame(index=np.arange(n_iters), 
                      columns=['F', 'W', 'likelihood', 'rel_likelihood'])

np.random.seed(70)  # make sure the calculations are repeatable

priors['F'] = np.random.uniform(F_min, F_max, n_iters)
priors['W'] = np.random.uniform(W_min, W_max, n_iters)


priors.head()


# Calculate the likelihood for each row in the dataframe

def calc_row_likelihood(row):
    return calc_likelihood(topo_pts_d.x, topo_pts_d.elev, x_toe, x_top,
                           row['F'], row['W'])


# Calculate the likelihood for each row in the dataframe

def calc_row_likelihood(row):
    return calc_likelihood(topo_pts_d.x, topo_pts_d.elev, x_toe, x_top,
                           row['F'], row['W'])

# run the calculations for all the rows in a dataframe
def calc_frame_likelihood(df):
    df['likelihood'] = df.apply(calc_row_likelihood, axis=1)
    return df

# Parallelize it:  Split the dataframe into chunks and
# have one core work on each chunk, then reassemble.
def calc_row_likelihood_parallel(df, n_cores=7):
    t0 = time.time()
    df_splits = np.array_split(df, n_cores)
    t1 = time.time()
    pool = Pool(n_cores)
    t2 = time.time()
    splits = pool.map(calc_frame_likelihood, df_splits)
    t3 = time.time()
    df = pd.concat(splits)
    t4 = time.time()
    pool.close()
    pool.join()
    t5 = time.time()
    
    print('split in {:.2f} seconds \n'.format(t1-t0),
          'made pool in {:.2f} seconds \n'.format(t2-t1),
          'did calcs in {:.1f} seconds \n'.format(t3-t2),
          'concatenated dataframe in {:.2f} seconds \n'.format(t4-t3),
          'closed pool in {:.2f} seconds.'.format(t5-t4))
    return df


t_start = time.time()
priors = calc_row_likelihood_parallel(priors)
t_end = time.time()

print('done with {} calcs in {:.1f} seconds'.format(n_iters, (t_end - t_start)))

/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))
/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))
/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))
/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))
/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))
/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))
/Users/itchy/src/anaconda/lib/python3.5/site-packages/scipy/stats/stats.py:306: RuntimeWarning: divide by zero encountered in log
  log_a = np.log(np.array(a, dtype=dtype))

split in 0.01 seconds 
 made pool in 0.03 seconds 
 did calcs in 79.2 seconds 
 concatenated dataframe in 0.02 seconds 
 closed pool in 0.10 seconds.
done with 100000 calcs in 79.3 seconds


priors['rel_likelihood'] = priors['likelihood'] / priors['likelihood'].max()


priors.head()


# Create an index of priors with relative likelihoods
# greater than a random number between 0 and 1
# (note that it's a different number for each sample!)
posterior_inds = (priors.rel_likelihood > np.random.rand(n_iters))


# Create the set of posteriors based on the index above
posteriors = priors.loc[posterior_inds]


len(posteriors) # number of samples that made the cut

6176


plt.figure(figsize=(10,8))

plt.scatter(priors.F, priors.W,
            c='grey', lw=0, alpha=0.02)

plt.scatter(posteriors.F, posteriors.W,
            c=posteriors.rel_likelihood,
            lw=0, cmap='plasma_r')

plt.colorbar(label='relative likelihood')

plt.xlabel('F')
plt.ylabel('W')

plt.show()


import seaborn as sns #didn't do it earlier 'cause it changes the plots
kde_plot = sns.jointplot(posteriors.F, posteriors.W, kind='kde', 
                         size=9, space=0)
plt.show()

	lon	lat	easting	northing
p1	86.726076	28.316242	473146.100052	3.132265e+06
p2	86.227448	26.592723	423075.501911	2.941559e+06

	lon	lat	elev
312103	86.239065	26.294209	57
312104	86.241148	26.294209	55
312105	86.243232	26.294209	54
312106	86.245315	26.294209	56
312107	86.247398	26.294209	53

	F	W	likelihood	rel_likelihood
0	0.927481	1.418750	NaN	NaN
1	0.872426	1.537132	NaN	NaN
2	0.584631	2.395302	NaN	NaN
3	0.905170	0.777661	NaN	NaN
4	0.316833	0.338925	NaN	NaN

	F	W	likelihood	rel_likelihood
0	0.927481	1.418750	0.000000e+00	0.000000e+00
1	0.872426	1.537132	6.175777e-87	1.780446e-83
2	0.584631	2.395302	3.462354e-04	9.981793e-01
3	0.905170	0.777661	0.000000e+00	0.000000e+00
4	0.316833	0.338925	3.538797e-51	1.020217e-47

Exploring strength and stress relationships in critical wedges with raw topography data through Bayesian inversions

Richard Styron

Load topographic data¶

Load transect points¶

Rotating points to transect coordinates¶

Finding Critical Wedge parameters¶

Slope <-> topography¶

Estimating model error¶

Calculating the model likelihood¶

Single data point¶

Likelihood for all data¶

Set up inversion¶

Make priors¶

Other inversion stuff¶

Run the inversion¶

Calculate the likelihoods¶

Calculate the relative likelihoods¶

Sample the posteriors¶