'''
Module for the Common Conversion Stack Computation and handling of the
objects resulting from such.
:copyright:
The PyGLImER development team (makus@gfz-potsdam.de).
:license:
GNU Lesser General Public License, Version 3
(https://www.gnu.org/copyleft/lesser.html)
:author:
Peter Makus (makus@gfz-potsdam.de)
Created: Friday, 10th April 2020 05:30:18 pm
Last Modified: Thursday, 27th October 2022 07:01:06 pm
'''
# !/usr/bin/env python3
# -*- coding: utf-8 -*-
import fnmatch
from glob import glob
import os
import sys
import pickle
import logging
import time
from functools import partial
from copy import deepcopy
from typing import List, Tuple
from joblib import Parallel, delayed, cpu_count
import numpy as np
from scipy.interpolate import RegularGridInterpolator
import scipy.io as sio
from global_land_mask import globe
from psutil import virtual_memory
from tqdm import tqdm
import pyvista as pv
import vtk
from pyglimer.ccp.compute.bin import BinGrid
from pyglimer.ccp.plot_utils.plot_bins import plot_bins
from pyglimer.ccp.plot_utils.plot_cross_section import plot_cross_section
from pyglimer.constants import R_EARTH, DEG2KM, KM2DEG
from pyglimer.database.rfh5 import RFDataBase
from pyglimer.database.stations import StationDB
from pyglimer.plot.plot_map import plot_map_ccp, plot_vel_grad
from pyglimer.plot.plot_volume import VolumePlot, VolumeExploration
from pyglimer.rf.create import RFStream, read_rf
from pyglimer.rf.moveout import res, maxz, maxzm
from pyglimer.utils.createvmodel import ComplexModel
from pyglimer.utils.geo_utils import epi2euc, geo2cart
from pyglimer.utils.geo_utils import gctrack
from pyglimer.utils.geo_utils import fix_map_extent
from pyglimer.utils.SphericalNN import SphericalNN
from pyglimer.utils.log import create_mpi_logger
from pyglimer.utils.utils import dt_string, chunks
[docs]class PhasePick(object):
"""
A phasepick object, just created for more convenient plotting.
"""
def __init__(
self, coords: np.ndarray, amplitudes: np.ndarray, polarity: str,
z: np.ndarray, depthrange: list = [None, None]):
"""
Initialise object
Parameters
----------
coords : np.ndarray
2-d array containing latitudes and longitudes.
amplitudes : np.ndarray
1-D array containg amplitude values of each bin
polarity : str
Either '+' for positive velocity gradients or '-' for negative
z : np.ndarray
1D array containing depth of maxima/minima per bin
depthrange : list, optional
list containing depth restrictions in form of [zmin, zmax],
by default [None, None]
"""
# assign vars
self.coords = coords
self.a = amplitudes
self.z = z
self.pol = polarity
self.depthr = depthrange
[docs] def plot(
self, plot_amplitude: bool = False, outputfile: str or None = None,
format: str = 'pdf', dpi: int = 300, cmap: str = 'gist_rainbow',
geology: bool = False, title: str or None = None):
"""
Plot heatmap containing depth or amplitude of picked phase.
Parameters
----------
plot_amplitude : bool, optional
If True amplitude instead of depths is plotted, by default False
outputfile : str or None, optional
Write Figure to file, by default None
format : str, optional
File format, by default 'pdf'
dpi : int, optional
Resolution for non-vector graphics, by default 300
cmap : str, optional
Colormap
geology : bool, optional
Plot geological map.
title
"""
lat = (
np.floor(min(self.coords[0][0]))-1,
np.ceil(max(self.coords[0][0])+1))
lon = (np.floor(min(self.coords[1][0]))-1,
np.ceil(max(self.coords[1][0])+1))
plot_vel_grad(
self.coords, self.a, self.z, plot_amplitude, lat, lon, outputfile,
dpi=dpi, format=format, cmap=cmap, geology=geology, title=title)
[docs]class CCPStack(object):
"""Is a CCP Stack matrix. Its size depends upon stations that are used as
input."""
def __init__(
self, latitude: List[float], longitude: List[float], edist: float,
phase: str, verbose: bool = True, logdir: str = None,
_load: str = None):
"""
Creates an empy object template for a CCP stack.
:param latitude: Latitudes of all seismic stations.
:type latitude: 1-D numpy.ndarray
:param longitude: Longitudes of all seismic stations.
:type longitude: 1-D numpy.ndarray
:param edist: Inter bin distance in angular distance.
:type edist: float
:param phase: Seismic phase either "S" or "P". Phase "S" will lead to
more grid points being created due to flatter incidence angle.
Hence, phase can be "S" for PRFs but not the other way around.
However, "P" is computationally more efficient.
:type phase: str
:param verbose: If true -> console output. The default is True.
:type verbose: bool, optional
:param logdir: Directory for log file
:type logdr: str, optional
:param _load: Parameter is only used internally when loading from a
binary numpy file. Then, this passes a filename.
:type _load: str
"""
# Loading file from npz
if _load:
self = _load_ccp_npz(self, _load)
return
# Else just initialise an empty object
# Loggers for the CCP script
self.logger = logging.getLogger('pyglimer.ccp.ccp')
self.logger.setLevel(logging.DEBUG)
# Create handler to the log
if not logdir:
os.makedirs('logs', exist_ok=True)
fh = logging.FileHandler(os.path.join('logs', 'ccp.log'))
else:
fh = logging.FileHandler(os.path.join(logdir, 'ccp.log'))
fh.setLevel(logging.INFO)
self.logger.addHandler(fh)
# Create Formatter
fmt = logging.Formatter(
fmt='%(asctime)s - %(levelname)s - %(message)s')
fh.setFormatter(fmt)
# Create bingrid
self.bingrid = BinGrid(
latitude, longitude, edist, phase=phase, verbose=verbose)
# Compute bins
self.bingrid.compute_bins()
# Initialize kdTree for Bins
self.bingrid.bin_tree()
# How many RFs are inside of the CCP-Stack?
self.N = 0
# Softlink useful variables and functions
self.coords = self.bingrid.bins
self.z = np.hstack(
(np.arange(-10, 0, .1), np.arange(0, maxz+res, res)))
self.bins = np.zeros([self.coords[0].size, len(self.z)])
self.illum = np.zeros(np.shape(self.bins), dtype=int)
self.pplat = []
self.pplon = []
self.ppz = self.z
self.binrad = None
def __str__(self) -> str:
out = f"Teleseismic Phase: \t\t{self.bingrid.phase}\n" +\
f"Bin distance: \t\t\t{round(self.bingrid.edist, 3)}\n" +\
f"Bin radius: \t\t\t{round(self.binrad, 3)}\n" +\
f"Bounding Box: \tLatitude: \t{round(self.coords[0].min(), 1)}\
{round(self.coords[0].max(), 1)}\n" +\
f"\t\tLongitude: \t{round(self.coords[1].min(), 1)}\
{round(self.coords[1].max(),1)}\n" +\
f"Number of Receiver Functions: \t{self.N}"
return out
[docs] def query_bin_tree(
self, latitude: np.ndarray, longitude: np.ndarray,
n_closest_points: int):
"""
Find closest bins for provided latitude and longitude.
:param latitude: Latitudes of piercing points.
:type latitude: 1-D numpy.ndarray
:param longitude: Longitudes of piercing points.
:type longitude: 1-D numpy.ndarray
:param n_closest_points: Number of closest points to be found.
:type n_closest_points: int
:return: bin index k and depth index j
:rtype: int
"""
i = self.bingrid.query_bin_tree(
latitude, longitude, self.binrad_eucl, n_closest_points)
# Kd tree returns locations that are too far with maxindex+1
pos = np.where(i < self.bingrid.Nb)
# Depth index
j = pos[0]
# Bin index
k = i[pos]
return k, j
def get_depth_slice(self, z0: float = 410):
# Area considered around profile points
area = 2 * self.binrad
# Resolution
res = self.binrad/4
# Depth
z = self.z.flatten()
zpos = np.argmin(np.abs(z-z0))
z0 = z[zpos]
# Coordinates
lat = self.coords_new[0].flatten()
lon = self.coords_new[1].flatten()
# Map extent with buffer
extent = fix_map_extent(
[np.min(lon), np.max(lon), np.min(lat), np.max(lat)],
fraction=0.05)
# Create query points
qlon, qlat = np.meshgrid(
np.arange(extent[0], extent[1], res),
np.arange(extent[2], extent[3], res)
)
# Create interpolators
snn = SphericalNN(lat, lon)
ccp_interpolator = snn.interpolator(
qlat, qlon, maximum_distance=area, k=10, p=2.0, no_weighting=False)
ill_interpolator = snn.interpolator(
qlat, qlon, maximum_distance=area, no_weighting=True)
# Interpolate
qccp = ccp_interpolator(self.ccp[:, zpos])
qill = ill_interpolator(self.hits[:, zpos])
return qlat, qlon, qill, qccp, extent, z0
def get_profile(self, slat, slon, ddeg=0.1):
# Area considered around profile points
area = 2 * self.binrad
# Get evenly distributed points
qlat, qlon, qdists, sdists = gctrack(slat, slon, dist=ddeg)
# Get interpolation weights and rows.
# Create SphericalNN kdtree
snn = SphericalNN(self.coords_new[0], self.coords_new[1])
ccp_interpolator = snn.interpolator(
qlat, qlon, maximum_distance=area, k=10, p=2.0, no_weighting=False)
ill_interpolator = snn.interpolator(
qlat, qlon, maximum_distance=area, no_weighting=True)
# Get coordinates array from CCPStack
qz = deepcopy(self.z)
# Get data arrays
Np, Nz = self.ccp.shape
Nq = len(qlat)
# Interpolate
qccp = np.zeros((Nq, Nz))
qill = np.zeros((Nq, Nz))
# Interpolate each depth
for _i, (_ccpcol, _illumcol) in enumerate(
zip(self.ccp.T, self.hits.T)):
qccp[:, _i] = ccp_interpolator(_ccpcol)
qill[:, _i] = ill_interpolator(_illumcol)
# Interpolate onto regular depth grid for easy representation with
# imshow
ccp2D_interpolator = RegularGridInterpolator(
(qdists, qz), np.where(np.isnan(qccp), 0, qccp))
ill2D_interpolator = RegularGridInterpolator(
(qdists, qz), qill)
# Where to sample
qqz = np.arange(np.min(qz), np.max(qz), 1)
xqdists, xqz = np.meshgrid(qdists, qqz)
# Interpolate
qccp = ccp2D_interpolator((xqdists, xqz))
qill = ill2D_interpolator((xqdists, xqz))
return slat, slon, sdists, qlat, qlon, qdists, qz, qill, qccp, area
[docs] def plot_cross_section(self, *args, **kwargs):
"""
See documentation for
:func:`~pyglimer.ccp.plot_utils.plot_cross_section.plot_cross_section`
"""
return plot_cross_section(self, *args, **kwargs)
[docs] def compute_stack(
self, vel_model: str, rfloc: str,
in_format: str = 'hdf5', preproloc: str = None,
network: str or list or None = None,
station: str or list or None = None, geocoords: tuple or None = None,
pattern: list or None = None, save: str or bool = False,
filt: tuple or None = None,
binrad: float = 1/(2*np.cos(np.radians(30))), append_pp: bool = False,
multiple: bool = False, mc_backend: str = 'joblib'):
"""
Computes a ccp stack in self.ccp, using the data from rfloc.
The stack can be limited to some networks and stations.
:param vel_model: Velocity model located in data. Either `iasp91.dat`
for 1D raytracing or `3D` for 3D raytracing with GYPSuM model.
Using 3D Raytracing will cause the code to take about 30% longer.
:type vel_model: str
:param rfloc: Parental folder in which the receiver functions in
time domain are. The default is 'output/waveforms/RF.
:type rfloc: str, optional
:param in_format: **INPUT FORMAT** of receiver functions. Either
`'sac'` or `'hdf5'`. Defaults to 'hdf5'.
:type in_format: str, optional
:param preproloc: Parental folder containing the preprocessed mseeds.
Only needed if option geocoords is used. The default is
None. **Should be None if hdf5 is used!**
:type preproloc: str, optional
:param network: This parameter is ignored if the pattern is given.
Network or networks that are to be included in the ccp stack.
Standard unix wildcards are allowed. If None, the whole database
will be used. The default is None.
:type network: str or list, optional
:param station: This parameter is ignored if the pattern is given.
Station or stations that are to be included in the ccp stack.
Standard unix wildcards are allowed.
Can only be list if type(network)=str.
If None, the whole database
will be used. The default is None.
:type station: str or list, optional
:param geocoords: Include all stations in the rectangle given by
(minlat, maxlat, minlon, maxlon). Will be ignored if pattern or
network is given, by default None.
:type geocoords: Tuple, optional
:param pattern: Search pattern for files. Overwrites network,
station, and geocooords options. Usually only used by
:func:`~pyglimer.ccp.ccp.init_ccp()`, defaults to None.
:type pattern: list, optional
:param save: Either False if the ccp should not be saved or string with
filename will be saved in config.ccp. Will be saved as pickle file.
:type save: str or bool, optional
:param filt: Decides whether to filter the receiver function prior to
depth migration. Either a Tuple of form `(lowpassf, highpassf)` or
`None` / `False`.
:type filt: tuple, optional
:param binrad: Defines the bin radius with
bin radius = binrad*distance_bins.
Full Overlap = 1/(2*cosd(30)), Full coverage: 1.
The default is full overlap.
:type binrad: float, optional
:param append_pp: appends piercing point coordinates if True, so they
can later be plotted. Not recommended for big data sets.
The default is false. **Deprecated for multi-core**
:type append_pp: bool, optional
:param multiple: Append receiver functions for first order multiples.
It can be decided later, whether they should be used in the final
ccp stack. Will result in a longer computation time.
:type multiple: bool, optional
:param mc_backend: Multi-core backend to use for the computations.
Can be either `"joblib"` (multiprocessing) or `"MPI"`. Not that
MPI compatibility is only implemented with hdf5 files.
:raises ValueError: For wrong inputs
.. warning::
This will take a long while for big data sets!
.. note::
When using, `mc_backend='MPI'`. This has to be started from a
terminal via mpirun.
.. note::
MPI is only compatible with receiver functions saved in hdf5
format.
.. note::
Note that the grid should be big enough for the provided networks
and stations. Best to create the CCPStack object by using
:func:`~pyglimer.cpp.ccp.init_ccp()`.
"""
if in_format.lower() == 'hdf5':
in_format = 'h5'
if binrad < 1/(2*np.cos(np.radians(30))):
raise ValueError(
"""Minimum allowed binradius is bin distance/(2*cos(30deg)).
Else the grid will not cover all the surface area."""
)
self.binrad = binrad*self.bingrid.edist
# How many closest points are queried by the bintree?
# See Gauss circle problem
# sum of squares for 2 squares for max binrad=4
# Using the ceiling function to account for inaccuracies
sosq = [1, 4, 4, 0, 4, 8, 0, 0, 4, 4, 8, 0, 0, 8, 0, 0, 4]
try:
n_closest_points = sum(sosq[0:int(np.ceil(binrad**2+1))])
except IndexError:
raise ValueError(
"""Maximum allowed binradius is 4 times the bin distance""")
# Compute maxdist in euclidean space
self.binrad_eucl = epi2euc(self.binrad)
folder = os.path.join(rfloc, self.bingrid.phase)
start = time.time()
try:
self.logger.info('Stacking started')
except AttributeError:
# Loggers for the CCP script
self.logger = logging.getLogger('pyglimer.ccp.ccp')
self.logger.setLevel(logging.INFO)
# Create handler to the log
fh = logging.FileHandler('logs/ccp.log')
fh.setLevel(logging.INFO)
self.logger.addHandler(fh)
self.logger.info('Stacking started')
if multiple:
# Use multiples?
endi = np.where(self.z == maxzm)[0][0] + 1
self.bins_m1 = np.zeros(self.bins[:, :endi].shape)
self.bins_m2 = np.zeros(self.bins[:, :endi].shape)
self.illumm = np.zeros(self.bins[:, :endi].shape, dtype=int)
else:
endi = None
if network and isinstance(network, str) and not pattern and\
in_format.lower() == 'sac':
# Loop over fewer files
folder = os.path.join(folder, network)
if station and type(station) == str:
folder = os.path.join(folder, station)
elif geocoords and not pattern:
# Stations by coordinates
# create empty lists for station latitude and longitude
lat = (geocoords[0], geocoords[1])
lon = (geocoords[2], geocoords[3])
db = StationDB(
preproloc or folder, phase=self.bingrid.phase, use_old=False)
net, stat = db.find_stations(lat, lon, phase=self.bingrid.phase)
pattern = ["{}.{}".format(a_, b_) for a_, b_ in zip(net, stat)]
# Clear memory
del db, net, stat
if not pattern:
pattern = [] # List of input constraints
if not (network and station):
pattern.append('*.%s' % in_format)
else:
pattern = ["*{}*%s".format(_a) % in_format for _a in pattern]
streams = [] # List of files filtered for input criteria
# List of all input files in the desired format
infiles = glob(os.path.join(
folder, '**', '*.%s' % in_format), recursive=True)
# Special rule for files imported from Matlab
if network in ('matlab', 'raysum'):
pattern.append('*.sac')
# Set filter patterns
elif network:
if type(network) == list:
if station:
if type(station) == list:
for net in network:
for stat in station:
pattern.append('*%s.%s*.%s' % (
net, stat, in_format))
else:
raise ValueError(
"Combination of network and station is invalid")
else:
for net in network:
pattern.append('*%s.*.%s' % (net, in_format))
elif type(network) == str:
if station:
if type(station) == str:
pattern.append('*%s.%s*.%s' % (
network, station, in_format))
elif type(station) == list:
for stat in station:
pattern.append('*%s.%s*.%s' % (
net, stat, in_format))
else:
pattern.append('*%s.*.%s' % (network, in_format))
elif station:
raise ValueError(
"You have to provide both network and station \
code if you want to filter by station")
# Do filtering
for pat in pattern:
streams.extend(fnmatch.filter(infiles, pat))
# clear memory
del pattern, infiles
# Actual CCP stack #
# The test data needs to be filtered
if network == 'matlab':
filt = [.03, 1.5] # bandpass frequencies
self.logger.debug('Using data from the following files: %s' % (
str(streams)))
# Define grid boundaries for 3D RT
latb = (self.coords[0].min(), self.coords[0].max())
lonb = (self.coords[1].min(), self.coords[1].max())
if in_format.lower() == 'h5':
self._create_ccp_from_hdf5_mc(
streams, multiple, append_pp, n_closest_points, vel_model,
latb, lonb, filt, endi, mc_backend)
elif in_format.lower() == 'sac':
self._create_ccp_from_sac(
streams, multiple, append_pp, n_closest_points, vel_model,
latb, lonb, filt, endi, mc_backend)
else:
raise NotImplementedError(
'Unknown input format %s.' % in_format
)
end = time.time()
self.logger.info("Stacking finished.")
self.logger.info(dt_string(end-start))
self.conclude_ccp()
# save file
if save:
self.write(filename=save)
def _create_ccp_from_hdf5_mc(
self, streams: List[str], multiple: bool, append_pp: bool,
n_closest_points: int, vel_model: str, latb: Tuple[float, float],
lonb: Tuple[float, float], filt: Tuple[float, float], endi,
mc_backend: str):
"""
Computes a CCP stack by reading a rf database in hdf5.
:param streams: List of hdf5 files to read.
:type streams: List[str]
:param multiple: Regard crustal multiples for stack?
:type multiple: bool
:param append_pp: Append ppoints to the object?
:type append_pp: bool
:param n_closest_points: number of closest points to query from kd-tree
:type n_closest_points: int
:param vel_model: The velocity mdoel to use for depth migration.
:type vel_model: str
:param latb: Boundary for the CCP calculation in form
`(minlat, maxlat)`.
:type latb: Tuple[float, float]
:param lonb: Boundary for the CCP calculation in form
`(minlon, maxlon)`.
:type lonb: Tuple[float, float]
:param filt: Filter to impose on rfs before migration. Either False
or tuple in form (minfreq, maxfreq).
:type filt: Tuple[float, float]
:param endi: Only relevant if `multiple=True`. Last depth index to
consider for multiple computation.
:type endi: int
:param mc_backend: Multi-core backend to Use. **This does only support
`joblib`**
:type mc_backend: str
"""
# note that streams are actually files - confusing variable name
if mc_backend.lower() == 'joblib':
out = Parallel(n_jobs=-1, backend='multiprocessing')(
delayed(self._create_ccp_from_hdf5)(
f, multiple, append_pp, n_closest_points, vel_model,
latb, lonb, filt)
for f in tqdm(streams))
# Awful way to solve it, but the best I could find
self._unpack_joblib_output(multiple, endi, out)
elif mc_backend.upper() == 'MPI':
from mpi4py import MPI
comm = MPI.COMM_WORLD
psize = comm.Get_size()
rank = comm.Get_rank()
pmap = (np.arange(len(streams))*psize)/len(streams)
pmap = pmap.astype(np.int32)
ind = pmap == rank
ind = np.arange(len(streams), dtype=int)[ind]
# get new MPI compatible loggers
self.logger = create_mpi_logger(self.logger, rank)
for ii in ind:
try:
kk, jj, datal, datalm1, datalm2, N =\
self._create_ccp_from_hdf5(
streams[ii], multiple, append_pp, n_closest_points,
vel_model, latb, lonb, filt)
except KeyError:
self.logger.warning(
f'No Receiver Functions in file {streams[ii]}.')
continue
self.N += N
if multiple:
self._unpack_output_multiple(
kk, jj, datal, datalm1, datalm2, endi)
else:
self._unpack_output(kk, jj, datal)
# Gather results
self.N = comm.allreduce(self.N, op=MPI.SUM)
comm.Allreduce(MPI.IN_PLACE, [self.illum, MPI.DOUBLE], op=MPI.SUM)
comm.Allreduce(MPI.IN_PLACE, [self.bins, MPI.DOUBLE], op=MPI.SUM)
if multiple:
comm.Allreduce(
MPI.IN_PLACE, [self.illumm, MPI.DOUBLE], op=MPI.SUM)
comm.Allreduce(
MPI.IN_PLACE, [self.bins_m1, MPI.DOUBLE], op=MPI.SUM)
comm.Allreduce(
MPI.IN_PLACE, [self.bins_m2, MPI.DOUBLE], op=MPI.SUM)
self.logger.info('Number of receiver functions used: '+str(self.N))
def _create_ccp_from_hdf5(
self, f: str, multiple: bool, append_pp: bool,
n_closest_points: int, vel_model: str, latb: Tuple[float, float],
lonb: Tuple[float, float], filt: Tuple[float, float]) -> Tuple[
List[np.ndarray], List[np.ndarray], List[np.ndarray],
List[np.ndarray], List[np.ndarray]]:
"""
Does ppoint calculation and depth migration for rf-functions in hdf5
format. (per file based)
Subsequently, computes the closest gridpoints in the ccp object.
:param f: HDF5 file to read.
:type f: str
:param multiple: Regard crustal multiples for stack?
:type multiple: bool
:param append_pp: Append ppoints to the object?
:type append_pp: bool
:param n_closest_points: number of closest points to query from kd-tree
:type n_closest_points: int
:param vel_model: The velocity mdoel to use for depth migration.
:type vel_model: str
:param latb: Boundary for the CCP calculation in form
`(minlat, maxlat)`.
:type latb: Tuple[float, float]
:param lonb: Boundary for the CCP calculation in form
`(minlon, maxlon)`.
:type lonb: Tuple[float, float]
:param filt: Filter to impose on rfs before migration. Either False
or tuple in form (minfreq, maxfreq).
:type filt: Tuple[float, float]
:return: (List of lateral indices of closest bins, List of depth
indices of closest gridpoint, list of rf data to stack for each
of those indices, list of multiple data (pps), list of multiple
data (pss))
:rtype: Tuple[ List[np.ndarray], List[np.ndarray], List[np.ndarray],
List[np.ndarray], List[np.ndarray]]
"""
# note that those are actually files - confusing variable name
net, stat, _ = os.path.basename(f).split('.')
kk = []
jj = []
datal = []
datalm1 = []
datalm2 = []
# number of rfs
N = 0
with RFDataBase(f, mode='r') as rfdb:
for rftr in rfdb.walk('rf', net, stat, self.bingrid.phase):
rfst = RFStream([rftr])
out = self._ccp_process_rftr(
rfst, filt, vel_model, latb, lonb, multiple, append_pp,
n_closest_points)
if not out:
continue
N += 1
kk.append(out[0])
jj.append(out[1])
datal.append(out[2])
if multiple:
datalm1.append(out[3])
datalm2.append(out[4])
return kk, jj, datal, datalm1, datalm2, N
def _create_ccp_from_sac(
self, streams: List[str], multiple: bool, append_pp: bool,
n_closest_points: int, vel_model: str, latb: Tuple[float, float],
lonb: Tuple[float, float], filt: Tuple[float, float], endi: int,
mc_backend: str):
"""
Computes a CCP stack by reading a rf database in sac.
:param streams: List of SAC files to read.
:type streams: List[str]
:param multiple: Regard crustal multiples for stack?
:type multiple: bool
:param append_pp: Append ppoints to the object?
:type append_pp: bool
:param n_closest_points: number of closest points to query from kd-tree
:type n_closest_points: int
:param vel_model: The velocity mdoel to use for depth migration.
:type vel_model: str
:param latb: Boundary for the CCP calculation in form
`(minlat, maxlat)`.
:type latb: Tuple[float, float]
:param lonb: Boundary for the CCP calculation in form
`(minlon, maxlon)`.
:type lonb: Tuple[float, float]
:param filt: Filter to impose on rfs before migration. Either False
or tuple in form (minfreq, maxfreq).
:type filt: Tuple[float, float]
:param endi: Only relevant if `multiple=True`. Last depth index to
consider for multiple computation.
:type endi: int
:param mc_backend: Multi-core backend to Use. **This does only support
`joblib`**
:type mc_backend: str
:raises NotImplementedError: For uknown mc backends.
"""
if mc_backend.upper() == 'MPI':
raise NotImplementedError(
'MPI in conjunction with sac is not implemented.'
)
self.logger.info('Number of receiver functions used: '+str(self.N))
# Split job into n chunks
num_cores = cpu_count()
self.logger.info('Number of cores used: '+str(num_cores))
mem = virtual_memory()
self.logger.info('Available system memory: '+str(mem.total*1e-6)+'MB')
# How many tasks should the main process be split in?
# If the number is too high, it will need unnecessarily
# much disk space or caching (probably also slower).
# If it's too low, the progressbar won't work anymore.
# !Memmap arrays are getting extremely big, size in byte
# is given by: nrows*ncolumns*nbands
# with 64 cores and 10 bands/core that results in about
# 90 GB for each of the arrays for a finely gridded
# CCP stack of North-America
# Check maximum information that can be saved
# using half of the RAM.
# approximately 8 byte per element in RF + 8 byte for idx
mem_needed = 3*8*850*len(streams)*100
# For stacking with multiple modes, we'll need more memory
if multiple:
mem_needed = mem_needed*1.75
# Split into several jobs if too much data
if mem_needed > mem.total:
N_splits = int(np.ceil(mem_needed/mem.total))
split_size = int(np.ceil(len(streams)/N_splits))
print(
'Splitting RFs into %s chunks due to insufficient memory.'
+ 'Each progressbar will only show the progress per chunk.' %
str(N_splits))
else:
split_size = len(streams)
for stream_chunk in chunks(streams, split_size):
num_split_max = num_cores*100 # maximal no of jobs
len_split = int(np.ceil(len(stream_chunk)/num_cores))
if len_split > 10:
if int(np.ceil(len(stream_chunk)/len_split)) > num_split_max:
len_split = int(np.ceil(len_split/num_split_max))
else:
len_split = int(np.ceil(len_split/(len_split/10)))
num_split = int(np.ceil(len(stream_chunk)/len_split))
out = Parallel(n_jobs=num_cores, backend='multiprocessing')(
delayed(self.multicore_stack)(
st, append_pp, n_closest_points, vel_model,
latb, lonb, filt, multiple)
for _, st in zip(
tqdm(range(num_split)),
chunks(stream_chunk, len_split)))
self._unpack_joblib_output(multiple, endi, out)
def _unpack_joblib_output(self, multiple: bool, endi: int, out: Tuple):
"""
Unpack the output that comes from joblib and add stack them in the CCP
object.
"""
# Awful way to solve it, but the best I could find
if multiple:
for kk, jj, datal, datalm1, datalm2, N in out:
self._unpack_output_multiple(
kk, jj, datal, datalm1, datalm2, endi)
self.N += N
else:
for kk, jj, datal, _, _, N in out:
self._unpack_output(kk, jj, datal)
self.N += N
def _unpack_output_multiple(
self, kk: List[int], jj: List[int], datal: List[float],
datalm1: List[float], datalm2: List[float], endi: int):
"""
Unpack the output of the kdtree and stack them into the CCP object.
Use this function if `multiple=True`
"""
for k, j, data, datam1, datam2 in zip(kk, jj, datal, datalm1, datalm2):
if 0 in j:
# Count RFs
self.N += self.N
self.bins[k, j] = self.bins[k, j] + data[j]
# hit counter + 1
self.illum[k, j] = self.illum[k, j] + 1
# multiples
iii = np.where(j <= endi-1)[0]
jm = j[iii]
km = k[iii]
try:
self.bins_m1[
km, jm] = self.bins_m1[km, jm] + datam1[jm]
self.bins_m2[
km, jm] = self.bins_m2[km, jm] + datam2[jm]
self.illumm[km, jm] = self.illum[km, jm] + 1
except IndexError as e:
if not len(datam1) or not len(datam2):
continue
else:
raise IndexError(e)
def _unpack_output(self, kk: List[int], jj: List[int], datal: List[float]):
"""
Unpack the output of the kdtree and stack them into the CCP object.
Use this function if `multiple=False`.
"""
# Count RFs
for k, j, data in zip(kk, jj, datal):
self.bins[k, j] = self.bins[k, j] + data[j]
# hit counter + 1
self.illum[k, j] = self.illum[k, j] + 1
# We only add the RF once
if 0 in j:
self.N += self.N
[docs] def multicore_stack(
self, stream: List[str], append_pp: bool, n_closest_points: int,
vmodel: str, latb: Tuple[float, float], lonb: Tuple[float, float],
filt: Tuple[float, float], multiple: bool) -> Tuple[
List[int], List[int], List[np.ndarray], List[np.ndarray], List[
np.ndarray]]:
"""
Takes in chunks of data to be processed on one core.
:param stream: List of file locations.
:type stream: list
:param append_pp: Should piercing points be appended?.
:type append_pp: Bool
:param n_closest_points: Number of Closest points that the KDTree
should query.
:type n_closest_points: int
:param vmodel: Name of the velocity model that should be used for the
raytraycing.
:type vmodel: str
:param latb: Tuple in Form (minlat, maxlat). To save RAM on 3D
raytraycing. Will remain unused for 1D RT.
:type latb: Tuple
:param lonb: Tuple in Form (minlon, maxlon). To save RAM on 3D
raytraycing. Will remain unused for 1D RT
:type lonb: Tuple
:param filt: Should the RFs be filtered before the ccp stack?
If so, provide (lowcof, highcof).
:type filt: bool or tuple]
:return: Three lists containing indices and rf-data.
:rtype: list, list, list
"""
kk = []
jj = []
datal = []
datalm1 = []
datalm2 = []
N = 0
for st in stream:
rft = read_rf(st)
out = self._ccp_process_rftr(
rft, filt, vmodel, latb, lonb, multiple, append_pp,
n_closest_points)
if not out:
continue
N += 1
if multiple:
k, j, data, lm1, lm2 = out
datalm1.append(lm1)
datalm2.append(lm2)
else:
k, j, data = out
kk.append(k)
jj.append(j)
datal.append(data)
return kk, jj, datal, datalm1, datalm2, N
def _ccp_process_rftr(
self, rfst: RFStream, filt: Tuple[float, float], vmodel: str,
latb: Tuple[float, float], lonb: Tuple[float, float], multiple: bool,
append_pp: bool, n_closest_points: int) -> Tuple[
np.ndarray, np.ndarray, np.ndarray]:
"""
Do the depth migration and the piercing point computation for
a single receiver function. Then, find the laterally closest bin
to the ppoint.
:param rfst: Input receiver function
:type rfst: RFStream
:param filt: Filter the receiver function? If so, provide high- and
lowpass frequency as a Tuple (in Hz).
:type filt: Tuple[float, float]
:param vmodel: The velocity model to use.
:type vmodel: str
:param latb: boundaries for the moveout correction in the form
(minlat, maxlat).
:type latb: Tuple[float, float]
:param lonb: boundaries for the moveout correction in the form
(minlon, maxlon).
:type lonb: Tuple[float, float]
:param multiple: Should the moveout correction be done for crustal
multiples as well?
:type multiple: bool
:param append_pp: Should the coordinates of the ppoint be saved?
:type append_pp: bool
:param n_closest_points: Number of closest bin points to return.
:type n_closest_points: int
:return: The indices of the closest bins k and the depth index j.
the Receiver fnction data. If multiples also multiple data.
:rtype: Tuple[np.ndarray[int], np.ndarray[int], np.ndarray]
"""
if filt:
rfst.filter(
'bandpass', freqmin=filt[0], freqmax=filt[1],
zerophase=True, corners=2)
try:
z, rf, rfm1, rfm2 = rfst[0].moveout(
vmodel, latb=latb, lonb=lonb, taper=False,
multiple=multiple)
except ComplexModel.CoverageError as e:
# Wrong stations codes can raise this
self.logger.warning(e)
return
except Exception as e:
# Just so the script does not interrupt. Did not occur up
# to now
self.logger.exception(e)
return
lat = np.array(rf.stats.pp_latitude)
lon = np.array(rf.stats.pp_longitude)
if append_pp:
plat = np.pad(
lat, (0, len(self.z)-len(lat)), constant_values=np.nan)
plon = np.pad(
lon, (0, len(self.z)-len(lon)), constant_values=np.nan)
self.pplat.append(plat)
self.pplon.append(plon)
k, j = self.query_bin_tree(lat, lon, n_closest_points)
if multiple:
depthi = np.where(z == maxzm)[0][0]
try:
lm1 = (rfm1.data[:depthi+1])
lm2 = (rfm2.data[:depthi+1])
except AttributeError as e:
# for Interpolationerrors
self.logger.exception(
'Error in multiple computation. Mostly caused by issue '
+ 'during the interpolation. Origninal Error message '
+ 'below.')
self.logger.exception(e)
lm1 = None
lm2 = None
return k, j, rf.data, lm1, lm2
return k, j, rf.data
[docs] def conclude_ccp(
self, keep_empty: bool = False, keep_water: bool = False, r: int = 0,
multiple: bool = False, z_multiple: int = 200):
"""
Averages the CCP-bin and populates empty cells with average of
neighbouring cells. No matter which option is
chosen for the parameters, data is never lost entirely. One can always
execute a new conclude_ccp() command. However, decisions that are
taken here will affect the .mat output and plotting outputs.
:param keep_empty: Keep entirely empty bins. The default is False.
:type keep_empty: bool, optional
:param keep_water: For False all bins that are on water-covered areas
will be discarded, defaults to False
:type keep_water: bool, optional
:param r: Fields with less than r hits will be set equal 0.
r has to be >= 1, defaults to 3.
:type r: int, optional
:param multiple: Use multiples in stack. Either False or weigthing;
i.e. 'linear' for linearly weighted stack between the three phases,
'zk' for a Zhu & Kanamori approach, or 'pws' for a phase weighted
stack. Use 'm1' to use only first multiple mode (no stack),
'm2' for
RFs created only with 2nd multiple phase (PSS), and m for an
equal-weight stack of m1 and m2. By default False.
:type multiple: bool or str
:param z_multiple: Until which depth [km] should multiples be
considered,
maximal value is 200 [km]. Will only be used if multiple=True.
By default 200 km.
:type z_multiple: int, optional
"""
if z_multiple > 200:
raise ValueError('Maximal depth for multiples is 200 km.')
endi = np.where(self.z == z_multiple)[0][0] + 1
if multiple == 'linear':
self.ccp = np.hstack(((
np.divide(self.bins[:, :endi], self.illum[:, :endi]+1)
+ np.divide(self.bins_m1[:, :endi], self.illumm[:, :endi]+1)
+ np.divide(
self.bins_m2[:, :endi], self.illumm[:, :endi]+1))/3,
np.zeros(self.bins[:, endi:].shape)))
elif multiple == 'zk':
self.ccp = np.hstack((
.7*np.divide(self.bins[:, :endi], self.illum[:, :endi]+1)
+ .2*np.divide(self.bins_m1[:, :endi], self.illumm[:, :endi]+1)
+ .1*np.divide(
self.bins_m2[:, :endi], self.illumm[:, :endi]+1),
np.zeros(self.bins[:, endi:].shape)))
elif multiple == 'm1':
self.ccp = np.hstack((
np.divide(self.bins_m1[:, :endi], self.illumm[:, :endi]+1),
np.zeros(self.bins[:, endi:].shape)))
elif multiple == 'm2':
self.ccp = np.hstack((
np.divide(self.bins_m2[:, :endi], self.illumm[:, :endi]+1),
np.zeros(self.bins[:, endi:].shape)))
elif multiple == 'm':
self.ccp = np.hstack((
np.divide(self.bins_m2[:, :endi]+self.bins_m1[:, :endi],
2*(self.illumm[:, :endi])+1),
np.zeros(self.bins[:, endi:].shape)))
elif not multiple:
self.ccp = np.divide(self.bins, self.illum+1)
else:
raise ValueError(
'The requested multiple stacking mode is \
misspelled or not yet implemented')
self.hits = self.illum.copy()
if r > 1:
index = np.where(np.logical_and(self.illum > 1, self.illum < r))
self.ccp[index] = 0
self.hits[index] = 0
if not keep_empty:
i = np.where(self.hits.sum(axis=1) == 0)[0]
self.ccp = np.delete(self.ccp, i, 0)
self.hits = np.delete(self.hits, i, 0)
self.coords_new = (np.delete(self.coords[0], i, 1),
np.delete(self.coords[1], i, 1))
# Check if bins are on land
if not keep_water:
if keep_empty:
self.coords_new = self.coords.copy()
lats, lons = self.coords_new
# list of indices that contain water
index = globe.is_ocean(lats, lons)[0, :]
self.ccp = np.delete(self.ccp, index, 0)
self.hits = np.delete(self.hits, index, 0)
self.coords_new = (np.delete(self.coords_new[0], index, 1),
np.delete(self.coords_new[1], index, 1))
# Else everything will always pick coords_new instead of coords
if keep_water and keep_empty:
try:
del self.coords_new
except NameError:
pass
[docs] def write(self, filename=None, folder='.', fmt="pickle"):
"""
Saves the CCPStream file as pickle or matlab file. Only save
as Matlab file for exporting, as not all information can be
preserved!
:param filename: Name as which to save, file extensions aren't
necessary.
:type filename: str, optional
:param folder: Output folder, defaults to 'output/ccps'
:type folder: str, optional
:param fmt: Either `"pickle"`, `'npz'` or `"matlab"` for .mat,
defaults to "pickle".
:type fmt: str, optional
:raises ValueError: For unknown formats.
.. note::
Saving in numpy (npz) format is the most robust option. However,
loading those files will take the longest time as some attributes
have to be recomputed. **NUMPY FORMAT IS RECOMMENDED FOR
ARCHIVES!**
.. warning::
Pickling files is the fastest option, but might lead to
incompatibilities between different PyGLImER versions.
**USE PICKLE IF YOU WOULD LIKE TO SAVE FILES FOR SHORTER AMOUNTS
OF TIME.**
.. warning::
Saving as Matlab files is only meant if one wishes to plot with
the old Matlab Receiver function exploration toolset (not
recommended anymore).
"""
# delete logger (cannot be pickled)
try:
del self.logger
except AttributeError:
# For backwards compatibility
pass
# Standard filename
if not filename:
filename = '%s_%s_%s' % (
self.bingrid.phase, str(self.bingrid.edist), str(self.binrad))
# Remove filetype identifier if provided
x = filename.split('.')
if len(x) > 1:
if x[-1].lower() in ('pkl', 'mat', 'npz'):
fmt = x[-1].lower()
filename = '.'.join(x[:-1])
# output location
oloc = os.path.join(folder, filename)
if fmt in ("pickle", "pkl"):
with open(oloc + ".pkl", 'wb') as output:
pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)
# Save as Matlab file (exporting to plot)
elif fmt in ("matlab", 'mat'):
# Change vectors so it can be corrected for elevation
# Only for old data standard
if min(self.z) == 0:
illum = np.pad(self.hits, ((0, 0), (round(10/.1), 0)))
depth = np.hstack((np.arange(-10, 0, .1), self.z))
ccp = np.pad(self.ccp, ((0, 0), (round(10/.1), 0)))
else:
illum = self.hits
depth = self.z
ccp = self.ccp
if hasattr(self, 'coords_new') and self.coords_new[0].size:
lat_ccp, lon_ccp = self.coords_new
else:
lat_ccp, lon_ccp = self.coords
d = {}
d.update({'RF_ccp': ccp,
'illum': illum,
'depth_ccp': depth,
'lat_ccp': lat_ccp,
'lon_ccp': lon_ccp,
'CSLAT': self.bingrid.latitude,
'CSLON': self.bingrid.longitude,
'clat': np.array(self.pplat),
'clon': np.array(self.pplon),
'cdepth': self.ppz.astype(float)})
sio.savemat(oloc + '.mat', d)
elif fmt.lower() in ('numpy', 'np', 'npz'):
_save_ccp_npz(self, oloc)
# Unknown formats
else:
raise ValueError("The format type ", fmt, "is unkown.")
[docs] def plot_bins(self):
"""
A simple map view of the bins.
"""
if hasattr(self, 'coords_new'):
coords = self.coords_new
else:
coords = self.coords
plot_bins(self.bingrid.stations, coords)
[docs] def compute_kdtree_volume(
self, qlon: np.ndarray or list = None,
qlat: np.ndarray or list = None, zmax: float = None,
verbose: bool = True):
"""Using the CCP kdtree, we get the closest few points and compute
the weighting using a distance metric. if points are too far away,
they aren't weighted
Parameters
----------
qlon : np.ndarray or list
grid defining array for longitude
qlat : np.ndarray or list
grid defining array for latitude
zmax : float, optional
maximum depth to compute.
gz : np.array or list
grid defining array for z
r : float or None
outside r everything is nan
minillum: int or None, optional
Minimum number of illumation points use in the interpolation,
everything below is downweighted by the square reciprocal
Returns
-------
[type]
[description]
"""
def vprint(verbose, *args, **kwargs):
print(*args, **kwargs)
verboseprint = partial(vprint, verbose)
# Area considered around profile points
area = 2 * self.binrad
# Get global bounds
if qlat is None or qlon is None:
minlat = np.min(self.coords_new[0])
maxlat = np.max(self.coords_new[0])
minlon = np.min(self.coords_new[1])
maxlon = np.max(self.coords_new[1])
minlon, maxlon, minlat, maxlat = fix_map_extent(
[minlon, maxlon, minlat, maxlat, ])
# Create mesh vectors
qlat = np.arange(minlat, maxlat + self.binrad/2, self.binrad/2)
qlon = np.arange(minlon, maxlon + self.binrad/2, self.binrad/2)
else:
minlat, maxlat = np.min(qlat), np.max(qlat)
minlon, maxlon = np.min(qlon), np.max(qlon)
minlon, maxlon, minlat, maxlat = fix_map_extent(
[minlon, maxlon, minlat, maxlat])
# Create mesh
mlat, mlon = np.meshgrid(qlat, qlon)
# smaller/larger is fine as the extent was fixed.
cpos = np.where(
(self.coords_new[0] > minlat)
& (self.coords_new[0] < maxlat)
& (self.coords_new[1] > minlon)
& (self.coords_new[1] < maxlon)
)[1]
# Get interpolation weights and rows.
verboseprint("Creating Spherical Nearest Neighbour class ...", end="")
snn = SphericalNN(
self.coords_new[0][:, cpos], self.coords_new[1][:, cpos])
verboseprint(" done.")
verboseprint("Creating interpolators ...", end="")
ccp_interpolator = snn.interpolator(
mlat, mlon, maximum_distance=area, k=10, p=2.0, no_weighting=False)
ill_interpolator = snn.interpolator(
mlat, mlon, maximum_distance=area, no_weighting=True)
verboseprint(" done.")
# Get coordinates array from CCPStack
qz = deepcopy(self.z)
if zmax is not None:
pos = np.argmin(np.abs(qz-zmax))
qz = qz[:pos]
else:
pos = len(qz)
# Get data arrays
Nz = pos
Nlat, Nlon = len(qlat), len(qlon)
# Interpolate
qccp = np.zeros((Nlat, Nlon, Nz))
qill = np.zeros((Nlat, Nlon, Nz))
# Interpolate each depth
maintext = "KDTree interpolation ... (intensive part)"
verboseprint(maintext)
for _i, (_ccpcol, _illumcol) in enumerate(
zip(self.ccp[cpos, :pos].T, self.hits[cpos, :pos].T)):
if _i % int((Nz/10)) == 0:
sys.stdout.write("\033[F") # back to previous line
sys.stdout.write("\033[K")
verboseprint(
maintext + f"---> {_i+1:0{len(str(Nz))}d}/{Nz:d}")
qccp[:, :, _i] = ccp_interpolator(_ccpcol).T
qill[:, :, _i] = ill_interpolator(_illumcol).T
verboseprint("... done.")
# Interpolate onto regular depth grid for easy representation with
# imshow
verboseprint(
"Regular Grid interpolation ... (less intensive part)", end="")
ccp3D_interpolator = RegularGridInterpolator(
(qlat, qlon, qz), np.where(np.isnan(qccp), 0, qccp))
ill3D_interpolator = RegularGridInterpolator(
(qlat, qlon, qz), qill)
# Where to sample
qqz = np.arange(np.min(qz), np.max(qz), 1)
xqlat, xqlon, xqz = np.meshgrid(qlat, qlon, qqz)
# Interpolate
qccp = ccp3D_interpolator((xqlat, xqlon, xqz))
qill = ill3D_interpolator((xqlat, xqlon, xqz))
verboseprint(" done.")
return qlat, qlon, qqz, qill, qccp, area
[docs] def create_vtk_mesh(
self, geo=True, bbox: list = None, filename: str = None):
"""Creates a mesh with given bounding box s
Parameters
----------
geo : bool, optional
flag whether the output mesh is in geographical coordinates or
meters, by default True
bbox : list or None, optional
bounding box [minlon, maxlon, minlat, maxlat]. If None
No boundaries are taken, by default None
filename : str or None, optional
If set, the computed grid will be output as VTK file under the
given filename. This file can then later be opened using either the
plotting tool or, e.g., Paraview. If None, no file is written.
By default, None.
Returns
-------
VTK.UnstructuredGrid
outputs a vtk mesh that can be opened in, e.g., Paraview.
"""
# Get coordinates
lat = np.squeeze(self.coords_new[0])
lon = np.squeeze(self.coords_new[1])
# Filter ccpstacks if not none
if bbox is None:
bbox = [-180, 180, -90, 90]
pos = np.where(((
bbox[0] <= lon) & (lon <= bbox[1])
& (bbox[2] <= lat) & (lat <= bbox[3])))[0]
lat = lat[pos]
lon = lon[pos]
# Create VTK point cloud at the surface to triangulate.
r = R_EARTH * np.ones_like(lat)
points = np.vstack((deepcopy(lon*DEG2KM).flatten(),
deepcopy(lat*DEG2KM).flatten(),
deepcopy(r).flatten())).T
pc = pv.PolyData(points)
# Triangulate 2D surface
mesh = pc.delaunay_2d(alpha=self.binrad*1.5*DEG2KM)
# Use triangles and their connectivity to create 3D Mesh of wedges
points = deepcopy(mesh.points)
n_points = mesh.n_points
# Get cells and create first layer of wedges at the surface
cells = mesh.faces.reshape(mesh.n_cells, 4)
cells[:, 0] = 6
cells = np.hstack((cells, n_points + cells[:, 1:]))
newcells = deepcopy(cells)
# Give second layer of points in the wedge the right depth!
zpoints = deepcopy(np.array(points))
zpoints[:, 2] = R_EARTH - self.z[1]
newpoints = np.vstack((points, zpoints))
# Loop over remaining depths to populated the mesh.
for _z in self.z[2:]:
# Add cells
extra_cells = cells
extra_cells[:, 1:] += n_points
newcells = np.vstack((newcells, extra_cells))
# Add points
zpoints = deepcopy(np.array(points))
zpoints[:, 2] = R_EARTH - _z
newpoints = np.vstack((newpoints, zpoints))
# Define Cell types
newcelltypes = np.array(
[vtk.VTK_WEDGE] * newcells.shape[0], dtype=np.uint8)
# Redefine location of the points if! Geo location is wanted instead of
# Cartesian(-ish)
if geo:
x, y, z = geo2cart(
newpoints[:, 2],
newpoints[:, 1] * KM2DEG,
newpoints[:, 0] * KM2DEG)
newpoints = np.vstack((x, y, z)).T
# Create Unstructured Grid
grid = pv.UnstructuredGrid(newcells, newcelltypes, newpoints)
# Populate with RF and illumination values
grid['RF'] = deepcopy(self.ccp[pos, :].T.ravel())
grid['illumination'] = deepcopy(self.hits[pos, :].T.ravel())
# If file name is set write unstructured grid to file!
if filename is not None:
writer = vtk.vtkXMLUnstructuredGridWriter()
writer.SetInputData(grid)
writer.SetFileName(filename)
writer.Write()
return grid
[docs] def explore(
self, qlon: np.ndarray or list = None, qlat: np.ndarray or list = None,
zmax: float = None):
"""
Creates a volume exploration window set. One window is for all
plots, and the other window is generated with sliders such that one
can explore how future plots should be generated. It technically does
not require any inputs, as simply the binning size will be used to
create a meshgrid fitting to the bin distance distribution.
One can however fix the extent and size of the grid by defining the
grids coordinate vectors.
Parameters
----------
qlon: np.ndarray, optional
Longitude gridpoints for the Grid. Defines longitudinal extent
and resolution. If None, it's determined automatically from
the CCPStacks resolution and grid extent.
Must be monotonically increasing.
qlat: np.ndarray, optional
Latitude gridpoints for the Grid. Defines latitudinal extent
and resolution. If None, it's determined automatically from
the CCPStacks resolution and grid extent.
Must be monotonically increasing.
zmax: float, optional
Maximum depth (km) that the CCP-exploration extends to.
Returns
-------
VolumeExploration
"""
# Compute the volume max radius at depth is z*0.33
qlat, qlon, qz, qill, qccp, area = self.compute_kdtree_volume(
qlon, qlat, zmax=zmax)
# Launch plotting tool
return VolumeExploration(qlon, qlat, qz, qccp)
[docs] def plot_volume_sections(
self, qlon: np.ndarray, qlat: np.ndarray, zmax: float or None = None,
lonsl: float or None = None, latsl: float or None = None,
zsl: float or None = None, r: float or None = None,
minillum: int or None = None, show: bool = True):
"""
Creates the same plot as the `explore` tool, but(!) statically and
with more options left to the user.
!!! IT IS IMPORTANT for the plotting function that the input arrays
are monotonically increasing!!!
Parameters
----------
qlon : np.ndarray
Monotonically increasing ndarray. No failsafes implemented.
qlat : np.ndarray
Monotonically increasing ndarray. No failsafes implemented.
z : np.ndarray
Monotonically increasing ndarray. No failsafes implemented.
lonsl : float or None, optional
Slice location, if None, the center of lon is used,
by default None
latsl : float or None, optional
Slice location, if None, the center of lat is used,
by default None
zsl : float or None, optional
Slice location, if None, the center of depth is used,
by default None
r : float or None, optional
Max radius for interpolation values taken into account [km],
if None bingrid.edist * DEG2KM * 2.0, by default None
minillum: int or None, optional
Minimum number of illumation points use in the interpolation,
everything below is downweighted by the square reciprocal
show : bool, optional
whether to make figure window visible, default True
Returns
-------
VolumePlot
[description]
"""
# Change distance to delete things
if r is None:
r = self.bingrid.edist * DEG2KM * 2.0
# Compute the volume max radius at depth is z*0.33
qlat, qlon, qz, qill, qccp, area = self.compute_kdtree_volume(
qlon, qlat, zmax=zmax)
return VolumePlot(qlon, qlat, qz, qccp, xl=lonsl, yl=latsl, zl=zsl,
show=show)
[docs] def map_plot(
self, plot_stations: bool = False, plot_bins: bool = False,
plot_illum: bool = False, profile: list or tuple or None = None,
p_direct: bool = True, outputfile: str or None = None,
format: str = 'pdf', dpi: int = 300, geology: bool = False,
title: str or None = None):
"""
Create a map plot of the CCP Stack containing user-defined information.
Parameters
----------
plot_stations : bool, optional
Plot the station locations, by default False
plot_bins : bool, optional
Plot bin location, by default False
plot_illum : bool, optional
Plot bin location with colour depending on the depth-cumulative
illumination at bin b, by default False
profile : list or tupleor None, optional
Plot locations of cross sections into the plot. Information about
each cross section is given as a tuple (lon1, lon2, lat1, lat2),
several cross sections are given as a list of tuples in said
format, by default None.
p_direct : bool, optional
If true the list in profile decribes areas with coordinates of the
lower left and upper right corner, by default True
outputfile : str or None, optional
Save plot to file, by default None
format : str, optional
Format for the file to be saved as, by default 'pdf'
dpi : int, optional
DPI for none vector format plots, by default 300
geology : bool, optional
Plot a geological map. Requires internet connection
title : str, optional
Set title for plot.
"""
if hasattr(self, 'coords_new') and self.coords_new[0].size:
bincoords = self.coords_new
else:
bincoords = self.coords
# Set boundaries for map:
lat = (
np.floor(min(bincoords[0][0]))-1, np.ceil(max(bincoords[0][0])+1))
lon = (np.floor(min(bincoords[1][0]))-1,
np.ceil(max(bincoords[1][0])+1))
plot_map_ccp(
lat, lon, plot_stations, self.bingrid.latitude,
self.bingrid.longitude, plot_bins, bincoords, self.bingrid.edist,
plot_illum, self.hits, profile, p_direct, outputfile=outputfile,
format=format, dpi=dpi, geology=geology, title=title)
[docs] def pick_phase(
self, pol: str = '+', depth: list = [None, None]) -> PhasePick:
"""
Pick the strongest negative or strongest positive gradient from a
predefined depth-range.
Parameters
----------
pol : str, optional
Either '+' for positive velocity gradient or '-' for negative
gradient, by default '+'
depth : list, optional
List with two elements [minz, maxz], defines in which depth window
the phase picker is gonna look for maxima and minima,
by default [None, None]
Returns
-------
PhasePick
A phasepick object, which subsequently can be plotted.
Raises
------
ValueError
For errandeous inputs.
"""
self.conclude_ccp(r=3)
# Find indices
for ii, d in enumerate(depth):
if d or d == 0:
depth[ii] = np.abs(self.z-d).argmin()
# Find minimum in vector
ccp_short = self.ccp[:, depth[0]:depth[1]]
# There should be a line here excluding insufficiently illuminated bins
illum_flat = np.sum(self.hits[:, depth[0]:depth[1]], axis=1)
id = np.where(illum_flat < ccp_short.shape[1]*10) # delete those bins
ccp_short = np.delete(ccp_short, id, 0)
coords = (
np.delete(self.coords_new[0], id, 1),
np.delete(self.coords_new[1], id, 1))
if pol == '+':
# Amplitudes
a = ccp_short.max(axis=1)
# Depths
z = self.z[depth[0]:depth[1]][ccp_short.argmax(axis=1)]
elif pol == '-':
a = ccp_short.min(axis=1)
z = self.z[depth[0]:depth[1]][ccp_short.argmin(axis=1)]
else:
raise ValueError(
'Choose either \'+\' to return the highest '
+ 'positive velocity gradient or \'-\' to return the '
+ 'highest negative velocity gradient.')
p_phase = PhasePick(coords, a, pol, z, depth)
return p_phase
[docs]def read_ccp(filename: str, fmt: str = None) -> CCPStack:
"""
Read CCP-Stack class file from input folder.
:param filename: Filename of the input file with file ending.
The default is 'ccp.pkl'.
:type filename: str, optional
:param fmt: File format, can be none if the filename has an ending,
possible options are `"pickle"` or `"npz"`. The default is None.
:type fmt: str, optional
:raises ValueError: For unknown formats
:return: CCPStack object
:rtype: :class:`~pyglimer.ccp.ccp.CCPStack`
.. note::
Loading Files from numpy format will take a while as some
recomputations are necessary.
"""
# Trying to identify filetype from ending:
if not fmt:
x = filename.split('.')
if len(x) == 1:
raise ValueError(
"Could not determine format, please provide a valid format")
if x[-1] == 'pkl':
fmt = 'pickle'
elif x[-1] == 'npz':
fmt = 'npz'
else:
raise ValueError(
"Could not determine format, please provide a valid format")
else:
fmt = fmt.lower()
# Open provided file
if fmt == "pickle":
with open(filename, 'rb') as infile:
ccp = pickle.load(infile)
elif fmt == 'npz':
ccp = CCPStack(None, None, None, None, _load=filename)
else:
raise ValueError("Unknown format ", fmt)
return ccp
def _load_ccp_npz(ccp: CCPStack, filename: str) -> CCPStack:
"""
Load a file from numpy format. For that We will do some recomputations
and will also have to unpack the used arrays.
:param ccp: The initialised :class:`~pyglimer.ccp.ccp.CCPStack` object.
:type ccp: CCPStack
:param filename: path to load from.
:type filename: str
:return: The loaded :class:`~pyglimer.ccp.ccp.CCPStack` object.
:rtype: CCPStack
"""
loaded = dict(np.load(filename))
for k in loaded.pop('attrs_pkl'):
loaded[k] = loaded[k][0]
ccp.bingrid = BinGrid(
loaded.pop('statlat'), loaded.pop('statlon'), loaded.pop('edist'),
loaded.pop('phase'))
# Compute bins
ccp.bingrid.compute_bins()
# Initialize kdTree for Bins
ccp.bingrid.bin_tree()
ccp.coords = ccp.bingrid.bins
for k in loaded:
ccp.__dict__[k] = loaded[k]
return ccp
[docs]def init_ccp(
rfloc: str, spacing: float, vel_model: str, phase: str,
preproloc: str = None, network: str or List[str] = None,
station: str or List[str] = None,
geocoords: Tuple[float, float, float, float] = None,
compute_stack: bool = False, filt: Tuple[float, float] = None,
binrad: float = np.cos(np.radians(30)), append_pp: bool = False,
multiple: bool = False, save: str or bool = False, verbose: bool = True,
format: str = 'hdf5', mc_backend: str = 'joblib'
) -> CCPStack:
"""
Computes a ccp stack in self.ccp using data from statloc and rfloc.
The stack can be limited to some networks and
stations.
:param rfloc: Parental directory in that the RFs are saved.
:type rfloc: str
:param spacing: Angular distance between each bingrid point.
:type spacing: float
:param vel_model: Velocity model located in data. Either iasp91 (1D
raytracing) or 3D for 3D raytracing using a model compiled from GyPSuM.
:type vel_model: str
:param phase: The teleseismic phase to be used.
:type phase: str
:param preproloc: Parental directory in that the preprocessed miniseeds are
saved, defaults to None. **Should be None when working with h5 files!**
:type preproloc: str, optional
:param network: Network or networks that are to be included in the ccp
stack.
Standard unix wildcards are allowed. If None, the whole database
will be used. The default is None., defaults to None
:type network: str or list, optional
:param station: Station or stations that are to be included in the ccp
stack. Standard unix wildcards are allowed. Can only be list if
type(network)==str. If None, the whole database will be used.
The default is None.
:type station: str or list, optional
:param geocoords: An alternative way of selecting networks and stations.
Given in the form (minlat, maxlat, minlon, maxlon), defaults to None
:type geocoords: Tuple, optional
:param compute_stack: If true it will compute the stack by calling
:meth:`~pyglimer.ccp.ccp.CCPStack.compute_stack()`.
That can take a long time! The default is False.
:type compute_stack: bool, optional
:param filt: Decides whether to filter the receiver function prior to
depth migration. Either a Tuple of form `(lowpassf, highpassf)` or
`None` / `False`.
:type filt: Tuple[float, float], optional
:param binrad: *Only used if compute_stack=True*
Defines the bin radius with bin radius = binrad*distance_bins.
Full Overlap = cosd(30), Full coverage: 1.
The default is full overlap.
:type binrad: float, optional
:param append_pp: *Only used if compute_stack=True.*
Appends piercing point locations to object. Can be used to plot
pps on map. Not recommended for large datasets as it takes A LOT
longer and makes the file a lot larger.
The default is False., defaults to False
:type append_pp: bool, optional
:param multiple: Should the CCP Stack be prepared to work with multiples?
It can be chosen later, whether the RFs from multiples are to be
incorporated into the CCP Stack. By default False.
:type multiple: bool, optional
:param save: Either False if the ccp should not be saved or string with
filename will be saved. Will be saved as pickle file.
The default is False.
:type save: bool or str, optional
:param verbose: Display info in terminal., defaults to True
:type verbose: bool, optional
:param format: The **INPUT FORMAT** from which receiver functions are read.
If you saved your receiver function as hdf5 files, set ==hdf5.
Defaults to hdf5.
:type format: str
:param mc_backend: Multi-core backend to use for the computations.
Can be either `"joblib"` (multiprocessing) or `"MPI"`. Not that MPI
compatibility is only implemented with hdf5 files.
:raises TypeError: For wrong inputs.
:return: CCPStack object.
:rtype: :class:`~pyglimer.ccp.ccp.CCPstack`
.. note::
When using, `mc_backend='MPI'`. This has to be started from a terminal
via mpirun.
.. note::
MPI is only compatible with receiver functions saved in hdf5 format.
"""
if phase[-1].upper() == 'S' and multiple:
raise NotImplementedError(
'Multiple mode is not supported for phase S.')
if format.lower() in ('hdf5', 'h5', 'hdf'):
preproloc = None
format = 'hdf5'
db = StationDB(
preproloc or rfloc, phase=phase, use_old=False, hdf5=format == 'hdf5')
# Were network and stations provided?
# Possibility 1 as geo boundaries
if geocoords:
lat = (geocoords[0], geocoords[1])
lon = (geocoords[2], geocoords[3])
subset = db.geo_boundary(lat, lon, phase=phase)
else:
subset = db.db
net = list(subset['network'])
stat = list(subset['station'])
codes = list(subset['code'])
lats = list(subset['lat'])
lons = list(subset['lon'])
# Filter
if isinstance(network, str) and not isinstance(station, list):
codef = fnmatch.filter(codes, '%s.%s' % (network, '*' or station))
elif isinstance(network, str) and isinstance(station, list):
codef = list(set(codes).intersection([
'%s.%s' % (network, stat) for stat in station]))
elif isinstance(network, list):
if station is not None and not isinstance(station, list):
raise TypeError(
"""Provide a list of stations, when using a list of
networks as parameter.""")
elif not station:
codef = list(set(codes).intersection([
'%s.*' % net for net in network]))
elif isinstance(station, list):
if len(station) != len(network):
raise ValueError(
'Length of network and station list have to be equal.')
codef = list(set(codes).intersection([
'%s.%s' % (net, stat) for net, stat in zip(network, station)]))
if (network and station) is None:
codef = codes
else:
# Find indices
ind = [codes.index(cf) for cf in codef]
net = np.array(net)[ind]
stat = np.array(stat)[ind]
lats = np.array(lats)[ind]
lons = np.array(lons)[ind]
logdir = os.path.join(
os.path.dirname(os.path.dirname(
os.path.abspath(rfloc))), 'logs')
ccp = CCPStack(
lats, lons, spacing, phase=phase, verbose=verbose, logdir=logdir)
# Clear Memory
del stat, lats, lons
if not geocoords:
pattern = None
if compute_stack:
ccp.compute_stack(
vel_model=vel_model, network=network, station=station, save=save,
filt=filt, multiple=multiple,
pattern=pattern, append_pp=append_pp, binrad=binrad, rfloc=rfloc,
in_format=format, mc_backend=mc_backend)
return ccp
def _save_ccp_npz(ccp: CCPStack, oloc: str):
"""
The point of this function is that, in contrast to pickling,
the data is stil accessible after PyGLImER has been upgraded.
For this, we will have to recompute some objects when loading.
(in particular, the bingrid object)
"""
# Save the important things from the bingrid object
d = deepcopy(ccp.__dict__)
d['phase'] = deepcopy(ccp.bingrid.phase)
d['statlat'] = deepcopy(ccp.bingrid.latitude)
d['statlon'] = deepcopy(ccp.bingrid.longitude)
d['edist'] = deepcopy(ccp.bingrid.edist)
d.pop('coords')
d.pop('bingrid')
# Attributes that need to be changed to arrays
apkl = []
for k in d.keys():
if isinstance(d[k], np.ndarray):
continue
elif isinstance(d[k], list):
d[k] = np.array(d[k])
continue
d[k] = np.array([d[k]])
apkl.append(k)
d['attrs_pkl'] = np.array(apkl)
np.savez_compressed(oloc, **d)