Source code for pyglimer.ccp.ccp

'''
Module for the Common Conversion Stack Computation and handling of the
objects resulting from such.

:copyright:
   The PyGLImER development team (makus@gfz-potsdam.de).
:license:
   GNU Lesser General Public License, Version 3
   (https://www.gnu.org/copyleft/lesser.html)
:author:
   Peter Makus (makus@gfz-potsdam.de)

Created: Friday, 10th April 2020 05:30:18 pm
Last Modified: Thursday, 27th October 2022 07:01:06 pm
'''

# !/usr/bin/env python3
# -*- coding: utf-8 -*-

import fnmatch
from glob import glob
import os
import sys
import pickle
import logging
import time

from functools import partial
from copy import deepcopy
from typing import List, Tuple
from joblib import Parallel, delayed, cpu_count
import numpy as np
from scipy.interpolate import RegularGridInterpolator

import scipy.io as sio
from global_land_mask import globe
from psutil import virtual_memory
from tqdm import tqdm
import pyvista as pv
import vtk

from pyglimer.ccp.compute.bin import BinGrid
from pyglimer.ccp.plot_utils.plot_bins import plot_bins
from pyglimer.ccp.plot_utils.plot_cross_section import plot_cross_section
from pyglimer.constants import R_EARTH, DEG2KM, KM2DEG
from pyglimer.database.rfh5 import RFDataBase
from pyglimer.database.stations import StationDB
from pyglimer.plot.plot_map import plot_map_ccp, plot_vel_grad
from pyglimer.plot.plot_volume import VolumePlot, VolumeExploration
from pyglimer.rf.create import RFStream, read_rf
from pyglimer.rf.moveout import res, maxz, maxzm
from pyglimer.utils.createvmodel import ComplexModel
from pyglimer.utils.geo_utils import epi2euc, geo2cart
from pyglimer.utils.geo_utils import gctrack
from pyglimer.utils.geo_utils import fix_map_extent
from pyglimer.utils.SphericalNN import SphericalNN
from pyglimer.utils.log import create_mpi_logger
from pyglimer.utils.utils import dt_string, chunks


[docs]class PhasePick(object):
    """
    A phasepick object, just created for more convenient plotting.
    """

    def __init__(
        self, coords: np.ndarray, amplitudes: np.ndarray, polarity: str,
            z: np.ndarray, depthrange: list = [None, None]):
        """
        Initialise object

        Parameters
        ----------
        coords : np.ndarray
            2-d array containing latitudes and longitudes.
        amplitudes : np.ndarray
            1-D array containg amplitude values of each bin
        polarity : str
            Either '+' for positive velocity gradients or '-' for negative
        z : np.ndarray
            1D array containing depth of maxima/minima per bin
        depthrange : list, optional
            list containing depth restrictions in form of [zmin, zmax],
            by default [None, None]
        """
        # assign vars
        self.coords = coords
        self.a = amplitudes
        self.z = z
        self.pol = polarity
        self.depthr = depthrange

[docs]    def plot(
        self, plot_amplitude: bool = False, outputfile: str or None = None,
        format: str = 'pdf', dpi: int = 300, cmap: str = 'gist_rainbow',
            geology: bool = False, title: str or None = None):
        """
        Plot heatmap containing depth or amplitude of picked phase.

        Parameters
        ----------
        plot_amplitude : bool, optional
            If True amplitude instead of depths is plotted, by default False
        outputfile : str or None, optional
            Write Figure to file, by default None
        format : str, optional
            File format, by default 'pdf'
        dpi : int, optional
            Resolution for non-vector graphics, by default 300
        cmap : str, optional
            Colormap
        geology : bool, optional
            Plot geological map.
        title
        """
        lat = (
            np.floor(min(self.coords[0][0]))-1,
            np.ceil(max(self.coords[0][0])+1))
        lon = (np.floor(min(self.coords[1][0]))-1,
               np.ceil(max(self.coords[1][0])+1))

        plot_vel_grad(
            self.coords, self.a, self.z, plot_amplitude, lat, lon, outputfile,
            dpi=dpi, format=format, cmap=cmap, geology=geology, title=title)


[docs]class CCPStack(object):
    """Is a CCP Stack matrix. Its size depends upon stations that are used as
    input."""

    def __init__(
        self, latitude: List[float], longitude: List[float], edist: float,
        phase: str, verbose: bool = True, logdir: str = None,
            _load: str = None):
        """
        Creates an empy object template for a CCP stack.

        :param latitude: Latitudes of all seismic stations.
        :type latitude: 1-D numpy.ndarray
        :param longitude: Longitudes of all seismic stations.
        :type longitude: 1-D numpy.ndarray
        :param edist: Inter bin distance in angular distance.
        :type edist: float
        :param phase: Seismic phase either "S" or "P". Phase "S" will lead to
            more grid points being created due to flatter incidence angle.
            Hence, phase can be "S" for PRFs but not the other way around.
            However, "P" is computationally more efficient.
        :type phase: str
        :param verbose: If true -> console output. The default is True.
        :type verbose: bool, optional
        :param logdir: Directory for log file
        :type logdr: str, optional
        :param _load: Parameter is only used internally when loading from a
            binary numpy file. Then, this passes a filename.
        :type _load: str
        """
        # Loading file from npz
        if _load:
            self = _load_ccp_npz(self, _load)
            return

        # Else just initialise an empty object
        # Loggers for the CCP script
        self.logger = logging.getLogger('pyglimer.ccp.ccp')
        self.logger.setLevel(logging.DEBUG)

        # Create handler to the log
        if not logdir:
            os.makedirs('logs', exist_ok=True)
            fh = logging.FileHandler(os.path.join('logs', 'ccp.log'))
        else:
            fh = logging.FileHandler(os.path.join(logdir, 'ccp.log'))
        fh.setLevel(logging.INFO)
        self.logger.addHandler(fh)

        # Create Formatter
        fmt = logging.Formatter(
            fmt='%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(fmt)

        # Create bingrid
        self.bingrid = BinGrid(
            latitude, longitude, edist, phase=phase, verbose=verbose)

        # Compute bins
        self.bingrid.compute_bins()

        # Initialize kdTree for Bins
        self.bingrid.bin_tree()

        # How many RFs are inside of the CCP-Stack?
        self.N = 0

        # Softlink useful variables and functions
        self.coords = self.bingrid.bins
        self.z = np.hstack(
            (np.arange(-10, 0, .1), np.arange(0, maxz+res, res)))
        self.bins = np.zeros([self.coords[0].size, len(self.z)])
        self.illum = np.zeros(np.shape(self.bins), dtype=int)
        self.pplat = []
        self.pplon = []
        self.ppz = self.z
        self.binrad = None

    def __str__(self) -> str:
        out = f"Teleseismic Phase: \t\t{self.bingrid.phase}\n" +\
            f"Bin distance: \t\t\t{round(self.bingrid.edist, 3)}\n" +\
            f"Bin radius: \t\t\t{round(self.binrad, 3)}\n" +\
            f"Bounding Box: \tLatitude: \t{round(self.coords[0].min(), 1)}\
                    {round(self.coords[0].max(), 1)}\n" +\
            f"\t\tLongitude: \t{round(self.coords[1].min(), 1)}\
                     {round(self.coords[1].max(),1)}\n" +\
            f"Number of Receiver Functions: \t{self.N}"
        return out

[docs]    def query_bin_tree(
        self, latitude: np.ndarray, longitude: np.ndarray,
            n_closest_points: int):
        """
        Find closest bins for provided latitude and longitude.

        :param latitude: Latitudes of piercing points.
        :type latitude: 1-D numpy.ndarray
        :param longitude: Longitudes of piercing points.
        :type longitude: 1-D numpy.ndarray
        :param n_closest_points: Number of closest points to be found.
        :type n_closest_points: int
        :return: bin index k and depth index j
        :rtype: int
        """
        i = self.bingrid.query_bin_tree(
            latitude, longitude, self.binrad_eucl, n_closest_points)

        # Kd tree returns locations that are too far with maxindex+1
        pos = np.where(i < self.bingrid.Nb)

        # Depth index
        j = pos[0]

        # Bin index
        k = i[pos]

        return k, j

    def get_depth_slice(self, z0: float = 410):

        # Area considered around profile points
        area = 2 * self.binrad

        # Resolution
        res = self.binrad/4

        # Depth
        z = self.z.flatten()
        zpos = np.argmin(np.abs(z-z0))
        z0 = z[zpos]

        # Coordinates
        lat = self.coords_new[0].flatten()
        lon = self.coords_new[1].flatten()

        # Map extent with buffer
        extent = fix_map_extent(
            [np.min(lon), np.max(lon), np.min(lat), np.max(lat)],
            fraction=0.05)

        # Create query points
        qlon, qlat = np.meshgrid(
            np.arange(extent[0], extent[1], res),
            np.arange(extent[2], extent[3], res)
        )

        # Create interpolators
        snn = SphericalNN(lat, lon)
        ccp_interpolator = snn.interpolator(
            qlat, qlon, maximum_distance=area, k=10, p=2.0, no_weighting=False)
        ill_interpolator = snn.interpolator(
            qlat, qlon, maximum_distance=area, no_weighting=True)

        # Interpolate
        qccp = ccp_interpolator(self.ccp[:, zpos])
        qill = ill_interpolator(self.hits[:, zpos])

        return qlat, qlon, qill, qccp, extent, z0

    def get_profile(self, slat, slon, ddeg=0.1):

        # Area considered around profile points
        area = 2 * self.binrad

        # Get evenly distributed points
        qlat, qlon, qdists, sdists = gctrack(slat, slon, dist=ddeg)

        # Get interpolation weights and rows.
        # Create SphericalNN kdtree
        snn = SphericalNN(self.coords_new[0], self.coords_new[1])
        ccp_interpolator = snn.interpolator(
            qlat, qlon, maximum_distance=area, k=10, p=2.0, no_weighting=False)
        ill_interpolator = snn.interpolator(
            qlat, qlon, maximum_distance=area, no_weighting=True)

        # Get coordinates array from CCPStack
        qz = deepcopy(self.z)

        # Get data arrays
        Np, Nz = self.ccp.shape
        Nq = len(qlat)

        # Interpolate
        qccp = np.zeros((Nq, Nz))
        qill = np.zeros((Nq, Nz))

        # Interpolate each depth
        for _i, (_ccpcol, _illumcol) in enumerate(
                zip(self.ccp.T, self.hits.T)):
            qccp[:, _i] = ccp_interpolator(_ccpcol)
            qill[:, _i] = ill_interpolator(_illumcol)

        # Interpolate onto regular depth grid for easy representation with
        # imshow
        ccp2D_interpolator = RegularGridInterpolator(
            (qdists, qz), np.where(np.isnan(qccp), 0, qccp))
        ill2D_interpolator = RegularGridInterpolator(
            (qdists, qz), qill)

        # Where to sample
        qqz = np.arange(np.min(qz), np.max(qz), 1)
        xqdists, xqz = np.meshgrid(qdists, qqz)

        # Interpolate
        qccp = ccp2D_interpolator((xqdists, xqz))
        qill = ill2D_interpolator((xqdists, xqz))

        return slat, slon, sdists, qlat, qlon, qdists, qz, qill, qccp, area

[docs]    def plot_cross_section(self, *args, **kwargs):
        """
        See documentation for
        :func:`~pyglimer.ccp.plot_utils.plot_cross_section.plot_cross_section`
        """
        return plot_cross_section(self, *args, **kwargs)

[docs]    def compute_stack(
        self, vel_model: str, rfloc: str,
        in_format: str = 'hdf5', preproloc: str = None,
        network: str or list or None = None,
        station: str or list or None = None, geocoords: tuple or None = None,
        pattern: list or None = None, save: str or bool = False,
        filt: tuple or None = None,
        binrad: float = 1/(2*np.cos(np.radians(30))), append_pp: bool = False,
            multiple: bool = False, mc_backend: str = 'joblib'):
        """
        Computes a ccp stack in self.ccp, using the data from rfloc.
        The stack can be limited to some networks and stations.

        :param vel_model: Velocity model located in data. Either `iasp91.dat`
            for 1D raytracing or `3D` for 3D raytracing with GYPSuM model.
            Using 3D Raytracing will cause the code to take about 30% longer.
        :type vel_model: str
        :param rfloc: Parental folder in which the receiver functions in
            time domain are. The default is 'output/waveforms/RF.
        :type rfloc: str, optional
        :param in_format: **INPUT FORMAT** of receiver functions. Either
            `'sac'` or `'hdf5'`. Defaults to 'hdf5'.
        :type in_format: str, optional
        :param preproloc: Parental folder containing the preprocessed mseeds.
            Only needed if option geocoords is used. The default is
            None. **Should be None if hdf5 is used!**
        :type preproloc: str, optional
        :param network: This parameter is ignored if the pattern is given.
            Network or networks that are to be included in the ccp stack.
            Standard unix wildcards are allowed. If None, the whole database
            will be used. The default is None.
        :type network: str or list, optional
        :param station: This parameter is ignored if the pattern is given.
            Station or stations that are to be included in the ccp stack.
            Standard unix wildcards are allowed.
            Can only be list if type(network)=str.
            If None, the whole database
            will be used. The default is None.
        :type station: str or list, optional
        :param geocoords: Include all stations in the rectangle given by
            (minlat, maxlat, minlon, maxlon). Will be ignored if pattern or
            network is given, by default None.
        :type geocoords: Tuple, optional
        :param pattern: Search pattern for files. Overwrites network,
            station, and geocooords options. Usually only used by
            :func:`~pyglimer.ccp.ccp.init_ccp()`, defaults to None.
        :type pattern: list, optional
        :param save: Either False if the ccp should not be saved or string with
            filename will be saved in config.ccp. Will be saved as pickle file.
        :type save: str or bool, optional
        :param filt: Decides whether to filter the receiver function prior to
            depth migration. Either a Tuple of form `(lowpassf, highpassf)` or
            `None` / `False`.
        :type filt: tuple, optional
        :param binrad: Defines the bin radius with
            bin radius = binrad*distance_bins.
            Full Overlap = 1/(2*cosd(30)), Full coverage: 1.
            The default is full overlap.
        :type binrad: float, optional
        :param append_pp: appends piercing point coordinates if True, so they
            can later be plotted. Not recommended for big data sets.
            The default is false. **Deprecated for multi-core**
        :type append_pp: bool, optional
        :param multiple: Append receiver functions for first order multiples.
            It can be decided later, whether they should be used in the final
            ccp stack. Will result in a longer computation time.
        :type multiple: bool, optional
        :param mc_backend: Multi-core backend to use for the computations.
            Can be either `"joblib"` (multiprocessing) or `"MPI"`. Not that
            MPI compatibility is only implemented with hdf5 files.
        :raises ValueError: For wrong inputs


        .. warning::

            This will take a long while for big data sets!

        .. note::

            When using, `mc_backend='MPI'`. This has to be started from a
            terminal via mpirun.

        .. note::

            MPI is only compatible with receiver functions saved in hdf5
            format.

        .. note::

            Note that the grid should be big enough for the provided networks
            and stations. Best to create the CCPStack object by using
            :func:`~pyglimer.cpp.ccp.init_ccp()`.
        """
        if in_format.lower() == 'hdf5':
            in_format = 'h5'

        if binrad < 1/(2*np.cos(np.radians(30))):
            raise ValueError(
                """Minimum allowed binradius is bin distance/(2*cos(30deg)).
                Else the grid will not cover all the surface area."""
            )

        self.binrad = binrad*self.bingrid.edist

        # How many closest points are queried by the bintree?
        # See Gauss circle problem
        # sum of squares for 2 squares for max binrad=4
        # Using the ceiling function to account for inaccuracies
        sosq = [1, 4, 4, 0, 4, 8, 0, 0, 4, 4, 8, 0, 0, 8, 0, 0, 4]
        try:
            n_closest_points = sum(sosq[0:int(np.ceil(binrad**2+1))])
        except IndexError:
            raise ValueError(
                """Maximum allowed binradius is 4 times the bin distance""")

        # Compute maxdist in euclidean space
        self.binrad_eucl = epi2euc(self.binrad)

        folder = os.path.join(rfloc, self.bingrid.phase)

        start = time.time()

        try:
            self.logger.info('Stacking started')
        except AttributeError:
            # Loggers for the CCP script
            self.logger = logging.getLogger('pyglimer.ccp.ccp')
            self.logger.setLevel(logging.INFO)

            # Create handler to the log
            fh = logging.FileHandler('logs/ccp.log')
            fh.setLevel(logging.INFO)
            self.logger.addHandler(fh)
            self.logger.info('Stacking started')

        if multiple:
            # Use multiples?
            endi = np.where(self.z == maxzm)[0][0] + 1
            self.bins_m1 = np.zeros(self.bins[:, :endi].shape)
            self.bins_m2 = np.zeros(self.bins[:, :endi].shape)
            self.illumm = np.zeros(self.bins[:, :endi].shape, dtype=int)
        else:
            endi = None

        if network and isinstance(network, str) and not pattern and\
                in_format.lower() == 'sac':
            # Loop over fewer files
            folder = os.path.join(folder, network)
            if station and type(station) == str:
                folder = os.path.join(folder, station)

        elif geocoords and not pattern:
            # Stations by coordinates
            # create empty lists for station latitude and longitude
            lat = (geocoords[0], geocoords[1])
            lon = (geocoords[2], geocoords[3])
            db = StationDB(
                preproloc or folder, phase=self.bingrid.phase, use_old=False)
            net, stat = db.find_stations(lat, lon, phase=self.bingrid.phase)
            pattern = ["{}.{}".format(a_, b_) for a_, b_ in zip(net, stat)]
            # Clear memory
            del db, net, stat

        if not pattern:
            pattern = []  # List of input constraints
            if not (network and station):
                pattern.append('*.%s' % in_format)
        else:
            pattern = ["*{}*%s".format(_a) % in_format for _a in pattern]

        streams = []  # List of files filtered for input criteria

        # List of all input files in the desired format
        infiles = glob(os.path.join(
            folder, '**', '*.%s' % in_format), recursive=True)

        # Special rule for files imported from Matlab
        if network in ('matlab', 'raysum'):
            pattern.append('*.sac')

        # Set filter patterns
        elif network:
            if type(network) == list:
                if station:
                    if type(station) == list:
                        for net in network:
                            for stat in station:
                                pattern.append('*%s.%s*.%s' % (
                                    net, stat, in_format))
                    else:
                        raise ValueError(
                            "Combination of network and station is invalid")
                else:
                    for net in network:
                        pattern.append('*%s.*.%s' % (net, in_format))
            elif type(network) == str:
                if station:
                    if type(station) == str:
                        pattern.append('*%s.%s*.%s' % (
                            network, station, in_format))
                    elif type(station) == list:
                        for stat in station:
                            pattern.append('*%s.%s*.%s' % (
                                net, stat, in_format))
                else:
                    pattern.append('*%s.*.%s' % (network, in_format))
        elif station:
            raise ValueError(
                "You have to provide both network and station \
code if you want to filter by station")

        # Do filtering
        for pat in pattern:
            streams.extend(fnmatch.filter(infiles, pat))

        # clear memory
        del pattern, infiles

        # Actual CCP stack #
        # The test data needs to be filtered
        if network == 'matlab':
            filt = [.03, 1.5]  # bandpass frequencies

        self.logger.debug('Using data from the following files: %s' % (
            str(streams)))

        # Define grid boundaries for 3D RT
        latb = (self.coords[0].min(), self.coords[0].max())
        lonb = (self.coords[1].min(), self.coords[1].max())

        if in_format.lower() == 'h5':
            self._create_ccp_from_hdf5_mc(
                streams, multiple, append_pp, n_closest_points, vel_model,
                latb, lonb, filt, endi, mc_backend)

        elif in_format.lower() == 'sac':
            self._create_ccp_from_sac(
                streams, multiple, append_pp, n_closest_points, vel_model,
                latb, lonb, filt, endi, mc_backend)
        else:
            raise NotImplementedError(
                'Unknown input format %s.' % in_format
            )

        end = time.time()
        self.logger.info("Stacking finished.")
        self.logger.info(dt_string(end-start))
        self.conclude_ccp()

        # save file
        if save:
            self.write(filename=save)

    def _create_ccp_from_hdf5_mc(
        self, streams: List[str], multiple: bool, append_pp: bool,
        n_closest_points: int, vel_model: str, latb: Tuple[float, float],
        lonb: Tuple[float, float], filt: Tuple[float, float], endi,
            mc_backend: str):
        """
        Computes a CCP stack by reading a rf database in hdf5.

        :param streams: List of hdf5 files to read.
        :type streams: List[str]
        :param multiple: Regard crustal multiples for stack?
        :type multiple: bool
        :param append_pp: Append ppoints to the object?
        :type append_pp: bool
        :param n_closest_points: number of closest points to query from kd-tree
        :type n_closest_points: int
        :param vel_model: The velocity mdoel to use for depth migration.
        :type vel_model: str
        :param latb: Boundary for the CCP calculation in form
            `(minlat, maxlat)`.
        :type latb: Tuple[float, float]
        :param lonb: Boundary for the CCP calculation in form
            `(minlon, maxlon)`.
        :type lonb: Tuple[float, float]
        :param filt: Filter to impose on rfs before migration. Either False
            or tuple in form (minfreq, maxfreq).
        :type filt: Tuple[float, float]
        :param endi: Only relevant if `multiple=True`. Last depth index to
            consider for multiple computation.
        :type endi: int
        :param mc_backend: Multi-core backend to Use. **This does only support
            `joblib`**
        :type mc_backend: str
        """
        # note that streams are actually files - confusing variable name
        if mc_backend.lower() == 'joblib':
            out = Parallel(n_jobs=-1, backend='multiprocessing')(
                delayed(self._create_ccp_from_hdf5)(
                    f, multiple, append_pp, n_closest_points, vel_model,
                    latb, lonb, filt)
                for f in tqdm(streams))

            # Awful way to solve it, but the best I could find
            self._unpack_joblib_output(multiple, endi, out)

        elif mc_backend.upper() == 'MPI':
            from mpi4py import MPI
            comm = MPI.COMM_WORLD
            psize = comm.Get_size()
            rank = comm.Get_rank()
            pmap = (np.arange(len(streams))*psize)/len(streams)
            pmap = pmap.astype(np.int32)
            ind = pmap == rank
            ind = np.arange(len(streams), dtype=int)[ind]

            # get new MPI compatible loggers
            self.logger = create_mpi_logger(self.logger, rank)
            for ii in ind:
                try:
                    kk, jj, datal, datalm1, datalm2, N =\
                        self._create_ccp_from_hdf5(
                            streams[ii], multiple, append_pp, n_closest_points,
                            vel_model, latb, lonb, filt)
                except KeyError:
                    self.logger.warning(
                        f'No Receiver Functions in file {streams[ii]}.')
                    continue
                self.N += N
                if multiple:
                    self._unpack_output_multiple(
                        kk, jj, datal, datalm1, datalm2, endi)
                else:
                    self._unpack_output(kk, jj, datal)

            # Gather results
            self.N = comm.allreduce(self.N, op=MPI.SUM)
            comm.Allreduce(MPI.IN_PLACE, [self.illum, MPI.DOUBLE], op=MPI.SUM)
            comm.Allreduce(MPI.IN_PLACE, [self.bins, MPI.DOUBLE], op=MPI.SUM)
            if multiple:
                comm.Allreduce(
                    MPI.IN_PLACE, [self.illumm, MPI.DOUBLE], op=MPI.SUM)
                comm.Allreduce(
                    MPI.IN_PLACE, [self.bins_m1, MPI.DOUBLE], op=MPI.SUM)
                comm.Allreduce(
                    MPI.IN_PLACE, [self.bins_m2, MPI.DOUBLE], op=MPI.SUM)

        self.logger.info('Number of receiver functions used: '+str(self.N))

    def _create_ccp_from_hdf5(
        self, f: str, multiple: bool, append_pp: bool,
        n_closest_points: int, vel_model: str, latb: Tuple[float, float],
        lonb: Tuple[float, float], filt: Tuple[float, float]) -> Tuple[
            List[np.ndarray], List[np.ndarray], List[np.ndarray],
            List[np.ndarray], List[np.ndarray]]:
        """
        Does ppoint calculation and depth migration for rf-functions in hdf5
        format. (per file based)
        Subsequently, computes the closest gridpoints in the ccp object.

        :param f: HDF5 file to read.
        :type f: str
        :param multiple: Regard crustal multiples for stack?
        :type multiple: bool
        :param append_pp: Append ppoints to the object?
        :type append_pp: bool
        :param n_closest_points: number of closest points to query from kd-tree
        :type n_closest_points: int
        :param vel_model: The velocity mdoel to use for depth migration.
        :type vel_model: str
        :param latb: Boundary for the CCP calculation in form
            `(minlat, maxlat)`.
        :type latb: Tuple[float, float]
        :param lonb: Boundary for the CCP calculation in form
            `(minlon, maxlon)`.
        :type lonb: Tuple[float, float]
        :param filt: Filter to impose on rfs before migration. Either False
            or tuple in form (minfreq, maxfreq).
        :type filt: Tuple[float, float]
        :return: (List of lateral indices of closest bins,  List of depth
            indices of closest gridpoint, list of rf data to stack for each
            of those indices, list of multiple data (pps), list of multiple
            data (pss))
        :rtype: Tuple[ List[np.ndarray], List[np.ndarray], List[np.ndarray],
            List[np.ndarray], List[np.ndarray]]
        """
        # note that those are actually files - confusing variable name
        net, stat, _ = os.path.basename(f).split('.')
        kk = []
        jj = []
        datal = []
        datalm1 = []
        datalm2 = []
        # number of rfs
        N = 0
        with RFDataBase(f, mode='r') as rfdb:
            for rftr in rfdb.walk('rf', net, stat, self.bingrid.phase):
                rfst = RFStream([rftr])
                out = self._ccp_process_rftr(
                    rfst, filt, vel_model, latb, lonb, multiple, append_pp,
                    n_closest_points)
                if not out:
                    continue
                N += 1
                kk.append(out[0])
                jj.append(out[1])
                datal.append(out[2])
                if multiple:
                    datalm1.append(out[3])
                    datalm2.append(out[4])
        return kk, jj, datal, datalm1, datalm2, N

    def _create_ccp_from_sac(
        self, streams: List[str], multiple: bool, append_pp: bool,
        n_closest_points: int, vel_model: str, latb: Tuple[float, float],
        lonb: Tuple[float, float], filt: Tuple[float, float], endi: int,
            mc_backend: str):
        """
        Computes a CCP stack by reading a rf database in sac.

        :param streams: List of SAC files to read.
        :type streams: List[str]
        :param multiple: Regard crustal multiples for stack?
        :type multiple: bool
        :param append_pp: Append ppoints to the object?
        :type append_pp: bool
        :param n_closest_points: number of closest points to query from kd-tree
        :type n_closest_points: int
        :param vel_model: The velocity mdoel to use for depth migration.
        :type vel_model: str
        :param latb: Boundary for the CCP calculation in form
            `(minlat, maxlat)`.
        :type latb: Tuple[float, float]
        :param lonb: Boundary for the CCP calculation in form
            `(minlon, maxlon)`.
        :type lonb: Tuple[float, float]
        :param filt: Filter to impose on rfs before migration. Either False
            or tuple in form (minfreq, maxfreq).
        :type filt: Tuple[float, float]
        :param endi: Only relevant if `multiple=True`. Last depth index to
            consider for multiple computation.
        :type endi: int
        :param mc_backend: Multi-core backend to Use. **This does only support
            `joblib`**
        :type mc_backend: str
        :raises NotImplementedError: For uknown mc backends.
        """
        if mc_backend.upper() == 'MPI':
            raise NotImplementedError(
                'MPI in conjunction with sac is not implemented.'
            )

        self.logger.info('Number of receiver functions used: '+str(self.N))

        # Split job into n chunks
        num_cores = cpu_count()

        self.logger.info('Number of cores used: '+str(num_cores))

        mem = virtual_memory()

        self.logger.info('Available system memory: '+str(mem.total*1e-6)+'MB')
        # How many tasks should the main process be split in?
        # If the number is too high, it will need unnecessarily
        # much disk space or caching (probably also slower).
        # If it's too low, the progressbar won't work anymore.
        # !Memmap arrays are getting extremely big, size in byte
        # is given by: nrows*ncolumns*nbands
        # with 64 cores and 10 bands/core that results in about
        # 90 GB for each of the arrays for a finely gridded
        # CCP stack of North-America

        # Check maximum information that can be saved
        # using half of the RAM.
        # approximately 8 byte per element in RF + 8 byte for idx
        mem_needed = 3*8*850*len(streams)*100
        # For stacking with multiple modes, we'll need more memory
        if multiple:
            mem_needed = mem_needed*1.75

        # Split into several jobs if too much data
        if mem_needed > mem.total:
            N_splits = int(np.ceil(mem_needed/mem.total))
            split_size = int(np.ceil(len(streams)/N_splits))
            print(
                'Splitting RFs into %s chunks due to insufficient memory.'
                + 'Each progressbar will only show the progress per chunk.' %
                str(N_splits))
        else:
            split_size = len(streams)

        for stream_chunk in chunks(streams, split_size):
            num_split_max = num_cores*100  # maximal no of jobs
            len_split = int(np.ceil(len(stream_chunk)/num_cores))
            if len_split > 10:
                if int(np.ceil(len(stream_chunk)/len_split)) > num_split_max:
                    len_split = int(np.ceil(len_split/num_split_max))
                else:
                    len_split = int(np.ceil(len_split/(len_split/10)))
            num_split = int(np.ceil(len(stream_chunk)/len_split))

            out = Parallel(n_jobs=num_cores, backend='multiprocessing')(
                delayed(self.multicore_stack)(
                    st, append_pp, n_closest_points, vel_model,
                    latb, lonb, filt, multiple)
                for _, st in zip(
                    tqdm(range(num_split)),
                    chunks(stream_chunk, len_split)))

            self._unpack_joblib_output(multiple, endi, out)

    def _unpack_joblib_output(self, multiple: bool, endi: int, out: Tuple):
        """
        Unpack the output that comes from joblib and add stack them in the CCP
        object.
        """
        # Awful way to solve it, but the best I could find
        if multiple:
            for kk, jj, datal, datalm1, datalm2, N in out:
                self._unpack_output_multiple(
                    kk, jj, datal, datalm1, datalm2, endi)
                self.N += N
        else:
            for kk, jj, datal, _, _, N in out:
                self._unpack_output(kk, jj, datal)
                self.N += N

    def _unpack_output_multiple(
        self, kk: List[int], jj: List[int], datal: List[float],
            datalm1: List[float], datalm2: List[float], endi: int):
        """
        Unpack the output of the kdtree and stack them into the CCP object.
        Use this function if `multiple=True`
        """
        for k, j, data, datam1, datam2 in zip(kk, jj, datal, datalm1, datalm2):
            if 0 in j:
                # Count RFs
                self.N += self.N
            self.bins[k, j] = self.bins[k, j] + data[j]

            # hit counter + 1
            self.illum[k, j] = self.illum[k, j] + 1

            # multiples
            iii = np.where(j <= endi-1)[0]
            jm = j[iii]
            km = k[iii]
            try:
                self.bins_m1[
                    km, jm] = self.bins_m1[km, jm] + datam1[jm]
                self.bins_m2[
                    km, jm] = self.bins_m2[km, jm] + datam2[jm]
                self.illumm[km, jm] = self.illum[km, jm] + 1
            except IndexError as e:
                if not len(datam1) or not len(datam2):
                    continue
                else:
                    raise IndexError(e)

    def _unpack_output(self, kk: List[int], jj: List[int], datal: List[float]):
        """
        Unpack the output of the kdtree and stack them into the CCP object.
        Use this function if `multiple=False`.
        """
        # Count RFs
        for k, j, data in zip(kk, jj, datal):
            self.bins[k, j] = self.bins[k, j] + data[j]

            # hit counter + 1
            self.illum[k, j] = self.illum[k, j] + 1

            # We only add the RF once
            if 0 in j:
                self.N += self.N

[docs]    def multicore_stack(
        self, stream: List[str], append_pp: bool, n_closest_points: int,
        vmodel: str, latb: Tuple[float, float], lonb: Tuple[float, float],
        filt: Tuple[float, float], multiple: bool) -> Tuple[
        List[int], List[int], List[np.ndarray], List[np.ndarray], List[
            np.ndarray]]:
        """
        Takes in chunks of data to be processed on one core.

        :param stream: List of file locations.
        :type stream: list
        :param append_pp: Should piercing points be appended?.
        :type append_pp: Bool
        :param n_closest_points: Number of Closest points that the KDTree
            should query.
        :type n_closest_points: int
        :param vmodel: Name of the velocity model that should be used for the
            raytraycing.
        :type vmodel: str
        :param latb: Tuple in Form (minlat, maxlat). To save RAM on 3D
            raytraycing. Will remain unused for 1D RT.
        :type latb: Tuple
        :param lonb: Tuple in Form (minlon, maxlon). To save RAM on 3D
            raytraycing. Will remain unused for 1D RT
        :type lonb: Tuple
        :param filt: Should the RFs be filtered before the ccp stack?
            If so, provide (lowcof, highcof).
        :type filt: bool or tuple]
        :return: Three lists containing indices and rf-data.
        :rtype: list, list, list
        """

        kk = []
        jj = []
        datal = []
        datalm1 = []
        datalm2 = []
        N = 0

        for st in stream:
            rft = read_rf(st)
            out = self._ccp_process_rftr(
                rft, filt, vmodel, latb, lonb, multiple, append_pp,
                n_closest_points)
            if not out:
                continue
            N += 1
            if multiple:
                k, j, data, lm1, lm2 = out
                datalm1.append(lm1)
                datalm2.append(lm2)
            else:
                k, j, data = out
            kk.append(k)
            jj.append(j)
            datal.append(data)

        return kk, jj, datal, datalm1, datalm2, N

    def _ccp_process_rftr(
        self, rfst: RFStream, filt: Tuple[float, float], vmodel: str,
        latb: Tuple[float, float], lonb: Tuple[float, float], multiple: bool,
        append_pp: bool, n_closest_points: int) -> Tuple[
            np.ndarray, np.ndarray, np.ndarray]:
        """
        Do the depth migration and the piercing point computation for
        a single receiver function. Then, find the laterally closest bin
        to the ppoint.

        :param rfst: Input receiver function
        :type rfst: RFStream
        :param filt: Filter the receiver function? If so, provide high- and
            lowpass frequency as a Tuple (in Hz).
        :type filt: Tuple[float, float]
        :param vmodel: The velocity model to use.
        :type vmodel: str
        :param latb: boundaries for the moveout correction in the form
            (minlat, maxlat).
        :type latb: Tuple[float, float]
        :param lonb: boundaries for the moveout correction in the form
            (minlon, maxlon).
        :type lonb: Tuple[float, float]
        :param multiple: Should the moveout correction be done for crustal
            multiples as well?
        :type multiple: bool
        :param append_pp: Should the coordinates of the ppoint be saved?
        :type append_pp: bool
        :param n_closest_points: Number of closest bin points to return.
        :type n_closest_points: int
        :return: The indices of the closest bins k and the depth index j.
            the Receiver fnction data. If multiples also multiple data.
        :rtype: Tuple[np.ndarray[int], np.ndarray[int], np.ndarray]
        """
        if filt:
            rfst.filter(
                'bandpass', freqmin=filt[0], freqmax=filt[1],
                zerophase=True, corners=2)
        try:
            z, rf, rfm1, rfm2 = rfst[0].moveout(
                vmodel, latb=latb, lonb=lonb, taper=False,
                multiple=multiple)
        except ComplexModel.CoverageError as e:
            # Wrong stations codes can raise this
            self.logger.warning(e)
            return
        except Exception as e:
            # Just so the script does not interrupt. Did not occur up
            # to now
            self.logger.exception(e)
            return

        lat = np.array(rf.stats.pp_latitude)
        lon = np.array(rf.stats.pp_longitude)
        if append_pp:
            plat = np.pad(
                lat, (0, len(self.z)-len(lat)), constant_values=np.nan)
            plon = np.pad(
                lon, (0, len(self.z)-len(lon)), constant_values=np.nan)
            self.pplat.append(plat)
            self.pplon.append(plon)
        k, j = self.query_bin_tree(lat, lon, n_closest_points)

        if multiple:
            depthi = np.where(z == maxzm)[0][0]
            try:
                lm1 = (rfm1.data[:depthi+1])
                lm2 = (rfm2.data[:depthi+1])
            except AttributeError as e:
                # for Interpolationerrors
                self.logger.exception(
                    'Error in multiple computation. Mostly caused by issue '
                    + 'during the interpolation. Origninal Error message '
                    + 'below.')
                self.logger.exception(e)
                lm1 = None
                lm2 = None
            return k, j, rf.data, lm1, lm2

        return k, j, rf.data

[docs]    def conclude_ccp(
        self, keep_empty: bool = False, keep_water: bool = False, r: int = 0,
            multiple: bool = False, z_multiple: int = 200):
        """
        Averages the CCP-bin and populates empty cells with average of
        neighbouring cells. No matter which option is
        chosen for the parameters, data is never lost entirely. One can always
        execute a new conclude_ccp() command. However, decisions that are
        taken here will affect the .mat output and plotting outputs.

        :param keep_empty: Keep entirely empty bins. The default is False.
        :type keep_empty: bool, optional
        :param keep_water: For False all bins that are on water-covered areas
            will be discarded, defaults to False
        :type keep_water: bool, optional
        :param r: Fields with less than r hits will be set equal 0.
            r has to be >= 1, defaults to 3.
        :type r: int, optional
        :param multiple: Use multiples in stack. Either False or weigthing;
            i.e. 'linear' for linearly weighted stack between the three phases,
            'zk' for a Zhu & Kanamori approach, or 'pws' for a phase weighted
            stack. Use 'm1' to use only first multiple mode (no stack),
            'm2' for
            RFs created only with 2nd multiple phase (PSS), and m for an
            equal-weight stack of m1 and m2. By default False.
        :type multiple: bool or str
        :param z_multiple: Until which depth [km] should multiples be
            considered,
            maximal value is 200 [km]. Will only be used if multiple=True.
            By default 200 km.
        :type z_multiple: int, optional
        """
        if z_multiple > 200:
            raise ValueError('Maximal depth for multiples is 200 km.')
        endi = np.where(self.z == z_multiple)[0][0] + 1
        if multiple == 'linear':

            self.ccp = np.hstack(((
                np.divide(self.bins[:, :endi], self.illum[:, :endi]+1)
                + np.divide(self.bins_m1[:, :endi], self.illumm[:, :endi]+1)
                + np.divide(
                    self.bins_m2[:, :endi], self.illumm[:, :endi]+1))/3,
                np.zeros(self.bins[:, endi:].shape)))
        elif multiple == 'zk':
            self.ccp = np.hstack((
                .7*np.divide(self.bins[:, :endi], self.illum[:, :endi]+1)
                + .2*np.divide(self.bins_m1[:, :endi], self.illumm[:, :endi]+1)
                + .1*np.divide(
                    self.bins_m2[:, :endi], self.illumm[:, :endi]+1),
                np.zeros(self.bins[:, endi:].shape)))
        elif multiple == 'm1':
            self.ccp = np.hstack((
                np.divide(self.bins_m1[:, :endi], self.illumm[:, :endi]+1),
                np.zeros(self.bins[:, endi:].shape)))
        elif multiple == 'm2':
            self.ccp = np.hstack((
                np.divide(self.bins_m2[:, :endi], self.illumm[:, :endi]+1),
                np.zeros(self.bins[:, endi:].shape)))
        elif multiple == 'm':
            self.ccp = np.hstack((
                np.divide(self.bins_m2[:, :endi]+self.bins_m1[:, :endi],
                          2*(self.illumm[:, :endi])+1),
                np.zeros(self.bins[:, endi:].shape)))
        elif not multiple:
            self.ccp = np.divide(self.bins, self.illum+1)
        else:
            raise ValueError(
                'The requested multiple stacking mode is \
misspelled or not yet implemented')

        self.hits = self.illum.copy()

        if r > 1:
            index = np.where(np.logical_and(self.illum > 1, self.illum < r))
            self.ccp[index] = 0
            self.hits[index] = 0

        if not keep_empty:
            i = np.where(self.hits.sum(axis=1) == 0)[0]
            self.ccp = np.delete(self.ccp, i, 0)
            self.hits = np.delete(self.hits, i, 0)
            self.coords_new = (np.delete(self.coords[0], i, 1),
                               np.delete(self.coords[1], i, 1))

        # Check if bins are on land
        if not keep_water:
            if keep_empty:
                self.coords_new = self.coords.copy()

            lats, lons = self.coords_new
            # list of indices that contain water
            index = globe.is_ocean(lats, lons)[0, :]

            self.ccp = np.delete(self.ccp, index, 0)
            self.hits = np.delete(self.hits, index, 0)
            self.coords_new = (np.delete(self.coords_new[0], index, 1),
                               np.delete(self.coords_new[1], index, 1))

        # Else everything will always pick coords_new instead of coords
        if keep_water and keep_empty:
            try:
                del self.coords_new
            except NameError:
                pass

[docs]    def write(self, filename=None, folder='.', fmt="pickle"):
        """
        Saves the CCPStream file as pickle or matlab file. Only save
        as Matlab file for exporting, as not all information can be
        preserved!

        :param filename: Name as which to save, file extensions aren't
            necessary.
        :type filename: str, optional
        :param folder: Output folder, defaults to 'output/ccps'
        :type folder: str, optional
        :param fmt: Either `"pickle"`, `'npz'` or `"matlab"` for .mat,
            defaults to "pickle".
        :type fmt: str, optional
        :raises ValueError: For unknown formats.

        .. note::

            Saving in numpy (npz) format is the most robust option. However,
            loading those files will take the longest time as some attributes
            have to be recomputed. **NUMPY FORMAT IS RECOMMENDED FOR
            ARCHIVES!**

        .. warning::

            Pickling files is the fastest option, but might lead to
            incompatibilities between different PyGLImER versions.
            **USE PICKLE IF YOU WOULD LIKE TO SAVE FILES FOR SHORTER AMOUNTS
            OF TIME.**

        .. warning::

            Saving as Matlab files is only meant if one wishes to plot with
            the old Matlab Receiver function exploration toolset (not
            recommended anymore).
        """
        # delete logger (cannot be pickled)
        try:
            del self.logger
        except AttributeError:
            # For backwards compatibility
            pass

        # Standard filename
        if not filename:
            filename = '%s_%s_%s' % (
                self.bingrid.phase, str(self.bingrid.edist),  str(self.binrad))

        # Remove filetype identifier if provided
        x = filename.split('.')
        if len(x) > 1:
            if x[-1].lower() in ('pkl', 'mat', 'npz'):
                fmt = x[-1].lower()
                filename = '.'.join(x[:-1])

        # output location
        oloc = os.path.join(folder, filename)

        if fmt in ("pickle", "pkl"):
            with open(oloc + ".pkl", 'wb') as output:
                pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)

        # Save as Matlab file (exporting to plot)
        elif fmt in ("matlab", 'mat'):

            # Change vectors so it can be corrected for elevation
            # Only for old data standard
            if min(self.z) == 0:
                illum = np.pad(self.hits, ((0, 0), (round(10/.1), 0)))
                depth = np.hstack((np.arange(-10, 0, .1), self.z))
                ccp = np.pad(self.ccp, ((0, 0), (round(10/.1), 0)))
            else:
                illum = self.hits
                depth = self.z
                ccp = self.ccp

            if hasattr(self, 'coords_new') and self.coords_new[0].size:
                lat_ccp, lon_ccp = self.coords_new
            else:
                lat_ccp, lon_ccp = self.coords

            d = {}
            d.update({'RF_ccp': ccp,
                      'illum': illum,
                      'depth_ccp': depth,
                      'lat_ccp': lat_ccp,
                      'lon_ccp': lon_ccp,
                      'CSLAT': self.bingrid.latitude,
                      'CSLON': self.bingrid.longitude,
                      'clat': np.array(self.pplat),
                      'clon': np.array(self.pplon),
                      'cdepth': self.ppz.astype(float)})

            sio.savemat(oloc + '.mat', d)

        elif fmt.lower() in ('numpy', 'np', 'npz'):
            _save_ccp_npz(self, oloc)

        # Unknown formats
        else:
            raise ValueError("The format type ", fmt, "is unkown.")

[docs]    def plot_bins(self):
        """
        A simple map view of the bins.
        """
        if hasattr(self, 'coords_new'):
            coords = self.coords_new
        else:
            coords = self.coords
        plot_bins(self.bingrid.stations, coords)

[docs]    def compute_kdtree_volume(
        self, qlon: np.ndarray or list = None,
        qlat: np.ndarray or list = None, zmax: float = None,
            verbose: bool = True):
        """Using the CCP kdtree, we get the closest few points and compute
        the weighting using a distance metric. if points are too far away,
        they aren't weighted


        Parameters
        ----------
        qlon : np.ndarray or list
            grid defining array for longitude
        qlat : np.ndarray or list
            grid defining array for latitude
        zmax : float, optional
            maximum depth to compute.
        gz : np.array or list
            grid defining array for z
        r : float or None
            outside r everything is nan
        minillum: int or None, optional
            Minimum number of illumation points use in the interpolation,
            everything below is downweighted by the square reciprocal

        Returns
        -------
        [type]
            [description]
        """

        def vprint(verbose, *args, **kwargs):
            print(*args, **kwargs)
        verboseprint = partial(vprint, verbose)

        # Area considered around profile points
        area = 2 * self.binrad

        # Get global bounds
        if qlat is None or qlon is None:

            minlat = np.min(self.coords_new[0])
            maxlat = np.max(self.coords_new[0])
            minlon = np.min(self.coords_new[1])
            maxlon = np.max(self.coords_new[1])
            minlon, maxlon, minlat, maxlat = fix_map_extent(
                [minlon, maxlon, minlat, maxlat, ])

            # Create mesh vectors
            qlat = np.arange(minlat, maxlat + self.binrad/2, self.binrad/2)
            qlon = np.arange(minlon, maxlon + self.binrad/2, self.binrad/2)

        else:

            minlat, maxlat = np.min(qlat), np.max(qlat)
            minlon, maxlon = np.min(qlon), np.max(qlon)
            minlon, maxlon, minlat, maxlat = fix_map_extent(
                [minlon, maxlon, minlat, maxlat])

        # Create mesh
        mlat, mlon = np.meshgrid(qlat, qlon)

        # smaller/larger is fine as the extent was fixed.
        cpos = np.where(
            (self.coords_new[0] > minlat)
            & (self.coords_new[0] < maxlat)
            & (self.coords_new[1] > minlon)
            & (self.coords_new[1] < maxlon)
        )[1]

        # Get interpolation weights and rows.
        verboseprint("Creating Spherical Nearest Neighbour class ...", end="")
        snn = SphericalNN(
            self.coords_new[0][:, cpos], self.coords_new[1][:, cpos])
        verboseprint(" done.")
        verboseprint("Creating interpolators ...", end="")
        ccp_interpolator = snn.interpolator(
            mlat, mlon, maximum_distance=area, k=10, p=2.0, no_weighting=False)
        ill_interpolator = snn.interpolator(
            mlat, mlon, maximum_distance=area, no_weighting=True)
        verboseprint(" done.")
        # Get coordinates array from CCPStack
        qz = deepcopy(self.z)

        if zmax is not None:
            pos = np.argmin(np.abs(qz-zmax))
            qz = qz[:pos]
        else:
            pos = len(qz)

        # Get data arrays
        Nz = pos
        Nlat, Nlon = len(qlat), len(qlon)

        # Interpolate
        qccp = np.zeros((Nlat, Nlon, Nz))
        qill = np.zeros((Nlat, Nlon, Nz))

        # Interpolate each depth
        maintext = "KDTree interpolation ... (intensive part)"
        verboseprint(maintext)
        for _i, (_ccpcol, _illumcol) in enumerate(
                zip(self.ccp[cpos, :pos].T, self.hits[cpos, :pos].T)):
            if _i % int((Nz/10)) == 0:
                sys.stdout.write("\033[F")  # back to previous line
                sys.stdout.write("\033[K")
                verboseprint(
                    maintext + f"--->  {_i+1:0{len(str(Nz))}d}/{Nz:d}")
            qccp[:, :, _i] = ccp_interpolator(_ccpcol).T
            qill[:, :, _i] = ill_interpolator(_illumcol).T
        verboseprint("... done.")

        # Interpolate onto regular depth grid for easy representation with
        # imshow
        verboseprint(
            "Regular Grid interpolation ... (less intensive part)", end="")
        ccp3D_interpolator = RegularGridInterpolator(
            (qlat, qlon, qz), np.where(np.isnan(qccp), 0, qccp))
        ill3D_interpolator = RegularGridInterpolator(
            (qlat, qlon, qz), qill)

        # Where to sample
        qqz = np.arange(np.min(qz), np.max(qz), 1)
        xqlat, xqlon, xqz = np.meshgrid(qlat, qlon, qqz)

        # Interpolate
        qccp = ccp3D_interpolator((xqlat, xqlon, xqz))
        qill = ill3D_interpolator((xqlat, xqlon, xqz))
        verboseprint(" done.")

        return qlat, qlon, qqz, qill, qccp, area

[docs]    def create_vtk_mesh(
            self, geo=True, bbox: list = None, filename: str = None):
        """Creates a mesh with given bounding box s

        Parameters
        ----------
        geo : bool, optional
            flag whether the output mesh is in geographical coordinates or
            meters, by default True
        bbox : list or None, optional
            bounding box [minlon, maxlon, minlat, maxlat]. If None
            No boundaries are taken, by default None
        filename : str or None, optional
            If set, the computed grid will be output as VTK file under the
            given filename. This file can then later be opened using either the
            plotting tool or, e.g., Paraview. If None, no file is written.
            By default, None.

        Returns
        -------
        VTK.UnstructuredGrid
            outputs a vtk mesh that can be opened in, e.g., Paraview.
        """

        # Get coordinates
        lat = np.squeeze(self.coords_new[0])
        lon = np.squeeze(self.coords_new[1])

        # Filter ccpstacks if not none
        if bbox is None:
            bbox = [-180, 180, -90, 90]
        pos = np.where(((
            bbox[0] <= lon) & (lon <= bbox[1])
            & (bbox[2] <= lat) & (lat <= bbox[3])))[0]
        lat = lat[pos]
        lon = lon[pos]

        # Create VTK point cloud at the surface to triangulate.
        r = R_EARTH * np.ones_like(lat)
        points = np.vstack((deepcopy(lon*DEG2KM).flatten(),
                            deepcopy(lat*DEG2KM).flatten(),
                            deepcopy(r).flatten())).T
        pc = pv.PolyData(points)

        # Triangulate 2D surface
        mesh = pc.delaunay_2d(alpha=self.binrad*1.5*DEG2KM)

        # Use triangles and their connectivity to create 3D Mesh of wedges
        points = deepcopy(mesh.points)
        n_points = mesh.n_points

        # Get cells and create first layer of wedges at the surface
        cells = mesh.faces.reshape(mesh.n_cells, 4)
        cells[:, 0] = 6
        cells = np.hstack((cells, n_points + cells[:, 1:]))
        newcells = deepcopy(cells)

        # Give second layer of points in the wedge the right depth!
        zpoints = deepcopy(np.array(points))
        zpoints[:, 2] = R_EARTH - self.z[1]
        newpoints = np.vstack((points, zpoints))

        # Loop over remaining depths to populated the mesh.
        for _z in self.z[2:]:

            # Add cells
            extra_cells = cells
            extra_cells[:, 1:] += n_points
            newcells = np.vstack((newcells, extra_cells))

            # Add points
            zpoints = deepcopy(np.array(points))
            zpoints[:, 2] = R_EARTH - _z
            newpoints = np.vstack((newpoints, zpoints))

        # Define Cell types
        newcelltypes = np.array(
            [vtk.VTK_WEDGE] * newcells.shape[0], dtype=np.uint8)

        # Redefine location of the points if! Geo location is wanted instead of
        # Cartesian(-ish)
        if geo:
            x, y, z = geo2cart(
                newpoints[:, 2],
                newpoints[:, 1] * KM2DEG,
                newpoints[:, 0] * KM2DEG)

            newpoints = np.vstack((x, y, z)).T

        # Create Unstructured Grid
        grid = pv.UnstructuredGrid(newcells, newcelltypes, newpoints)

        # Populate with RF and illumination values
        grid['RF'] = deepcopy(self.ccp[pos, :].T.ravel())
        grid['illumination'] = deepcopy(self.hits[pos, :].T.ravel())

        # If file name is set write unstructured grid to file!
        if filename is not None:
            writer = vtk.vtkXMLUnstructuredGridWriter()
            writer.SetInputData(grid)
            writer.SetFileName(filename)
            writer.Write()

        return grid

[docs]    def explore(
        self, qlon: np.ndarray or list = None, qlat: np.ndarray or list = None,
            zmax: float = None):
        """
        Creates a volume exploration window set. One window is for all
        plots, and the other window is generated with sliders such that one
        can explore how future plots should be generated. It technically does
        not require any inputs, as simply the binning size will be used to
        create a meshgrid fitting to the bin distance distribution.
        One can however fix the extent and size of the grid by defining the
        grids coordinate vectors.

        Parameters
        ----------

        qlon: np.ndarray, optional
            Longitude gridpoints for the Grid. Defines longitudinal extent
            and resolution. If None, it's determined automatically from
            the CCPStacks resolution and grid extent.
            Must be monotonically increasing.
        qlat: np.ndarray, optional
            Latitude gridpoints for the Grid. Defines latitudinal extent
            and resolution. If None, it's determined automatically from
            the CCPStacks resolution and grid extent.
            Must be monotonically increasing.
        zmax: float, optional
            Maximum depth (km) that the CCP-exploration extends to.

        Returns
        -------

        VolumeExploration

        """
        # Compute the volume max radius at depth is z*0.33
        qlat, qlon, qz, qill, qccp, area = self.compute_kdtree_volume(
            qlon, qlat, zmax=zmax)

        # Launch plotting tool
        return VolumeExploration(qlon, qlat, qz, qccp)

[docs]    def plot_volume_sections(
        self, qlon: np.ndarray, qlat: np.ndarray, zmax: float or None = None,
        lonsl: float or None = None, latsl: float or None = None,
        zsl: float or None = None, r: float or None = None,
            minillum: int or None = None, show: bool = True):
        """
        Creates the same plot as the `explore` tool, but(!) statically and
        with more options left to the user.

        !!! IT IS IMPORTANT for the plotting function that the input arrays
        are monotonically increasing!!!

        Parameters
        ----------
        qlon : np.ndarray
            Monotonically increasing ndarray. No failsafes implemented.
        qlat : np.ndarray
            Monotonically increasing ndarray. No failsafes implemented.
        z : np.ndarray
            Monotonically increasing ndarray. No failsafes implemented.
        lonsl : float or None, optional
            Slice location, if None, the center of lon is used,
            by default None
        latsl : float or None, optional
            Slice location, if None, the center of lat is used,
            by default None
        zsl : float or None, optional
            Slice location, if None, the center of depth is used,
            by default None
        r : float or None, optional
            Max radius for interpolation values taken into account [km],
            if None bingrid.edist * DEG2KM * 2.0, by default None
        minillum: int or None, optional
            Minimum number of illumation points use in the interpolation,
            everything below is downweighted by the square reciprocal
        show : bool, optional
            whether to make figure window visible, default True

        Returns
        -------
        VolumePlot
            [description]
        """

        # Change distance to delete things
        if r is None:
            r = self.bingrid.edist * DEG2KM * 2.0

        # Compute the volume max radius at depth is z*0.33
        qlat, qlon, qz, qill, qccp, area = self.compute_kdtree_volume(
            qlon, qlat, zmax=zmax)

        return VolumePlot(qlon, qlat, qz, qccp, xl=lonsl, yl=latsl, zl=zsl,
                          show=show)

[docs]    def map_plot(
        self, plot_stations: bool = False, plot_bins: bool = False,
        plot_illum: bool = False, profile: list or tuple or None = None,
        p_direct: bool = True, outputfile: str or None = None,
        format: str = 'pdf', dpi: int = 300, geology: bool = False,
            title: str or None = None):
        """
        Create a map plot of the CCP Stack containing user-defined information.

        Parameters
        ----------
        plot_stations : bool, optional
            Plot the station locations, by default False
        plot_bins : bool, optional
            Plot bin location, by default False
        plot_illum : bool, optional
            Plot bin location with colour depending on the depth-cumulative
            illumination at bin b, by default False
        profile : list or tupleor None, optional
            Plot locations of cross sections into the plot. Information about
            each cross section is given as a tuple (lon1, lon2, lat1, lat2),
            several cross sections are given as a list of tuples in said
            format, by default None.
        p_direct : bool, optional
            If true the list in profile decribes areas with coordinates of the
            lower left and upper right corner, by default True
        outputfile : str or None, optional
            Save plot to file, by default None
        format : str, optional
            Format for the file to be saved as, by default 'pdf'
        dpi : int, optional
            DPI for none vector format plots, by default 300
        geology : bool, optional
            Plot a geological map. Requires internet connection
        title : str, optional
            Set title for plot.
        """
        if hasattr(self, 'coords_new') and self.coords_new[0].size:
            bincoords = self.coords_new
        else:
            bincoords = self.coords
        # Set boundaries for map:
        lat = (
            np.floor(min(bincoords[0][0]))-1, np.ceil(max(bincoords[0][0])+1))
        lon = (np.floor(min(bincoords[1][0]))-1,
               np.ceil(max(bincoords[1][0])+1))
        plot_map_ccp(
            lat, lon, plot_stations, self.bingrid.latitude,
            self.bingrid.longitude, plot_bins, bincoords, self.bingrid.edist,
            plot_illum, self.hits, profile, p_direct, outputfile=outputfile,
            format=format, dpi=dpi, geology=geology, title=title)

[docs]    def pick_phase(
            self, pol: str = '+', depth: list = [None, None]) -> PhasePick:
        """
        Pick the strongest negative or strongest positive gradient from a
        predefined depth-range.

        Parameters
        ----------
        pol : str, optional
            Either '+' for positive velocity gradient or '-' for negative
            gradient, by default '+'
        depth : list, optional
            List with two elements [minz, maxz], defines in which depth window
            the phase picker is gonna look for maxima and minima,
            by default [None, None]

        Returns
        -------
        PhasePick
            A phasepick object, which subsequently can be plotted.

        Raises
        ------
        ValueError
            For errandeous inputs.
        """
        self.conclude_ccp(r=3)
        # Find indices
        for ii, d in enumerate(depth):
            if d or d == 0:
                depth[ii] = np.abs(self.z-d).argmin()
        # Find minimum in vector
        ccp_short = self.ccp[:, depth[0]:depth[1]]
        # There should be a line here excluding insufficiently illuminated bins
        illum_flat = np.sum(self.hits[:, depth[0]:depth[1]], axis=1)
        id = np.where(illum_flat < ccp_short.shape[1]*10)  # delete those bins

        ccp_short = np.delete(ccp_short, id, 0)
        coords = (
            np.delete(self.coords_new[0], id, 1),
            np.delete(self.coords_new[1], id, 1))

        if pol == '+':
            # Amplitudes
            a = ccp_short.max(axis=1)
            # Depths
            z = self.z[depth[0]:depth[1]][ccp_short.argmax(axis=1)]
        elif pol == '-':
            a = ccp_short.min(axis=1)
            z = self.z[depth[0]:depth[1]][ccp_short.argmin(axis=1)]
        else:
            raise ValueError(
                'Choose either \'+\' to return the highest '
                + 'positive velocity gradient or \'-\' to return the '
                + 'highest negative velocity gradient.')

        p_phase = PhasePick(coords, a, pol, z, depth)
        return p_phase


[docs]def read_ccp(filename: str, fmt: str = None) -> CCPStack:
    """
    Read CCP-Stack class file from input folder.

    :param filename: Filename of the input file with file ending.
        The default is 'ccp.pkl'.
    :type filename: str, optional
    :param fmt: File format, can be none if the filename has an ending,
        possible options are `"pickle"` or `"npz"`. The default is None.
    :type fmt: str, optional
    :raises ValueError: For unknown formats
    :return: CCPStack object
    :rtype: :class:`~pyglimer.ccp.ccp.CCPStack`

    .. note::

        Loading Files from numpy format will take a while as some
        recomputations are necessary.
    """

    # Trying to identify filetype from ending:
    if not fmt:
        x = filename.split('.')
        if len(x) == 1:
            raise ValueError(
                "Could not determine format, please provide a valid format")
        if x[-1] == 'pkl':
            fmt = 'pickle'
        elif x[-1] == 'npz':
            fmt = 'npz'
        else:
            raise ValueError(
                "Could not determine format, please provide a valid format")
    else:
        fmt = fmt.lower()

    # Open provided file
    if fmt == "pickle":
        with open(filename, 'rb') as infile:
            ccp = pickle.load(infile)
    elif fmt == 'npz':
        ccp = CCPStack(None, None, None, None, _load=filename)
    else:
        raise ValueError("Unknown format ", fmt)

    return ccp


def _load_ccp_npz(ccp: CCPStack, filename: str) -> CCPStack:
    """
    Load a file from numpy format. For that We will do some recomputations
    and will also have to unpack the used arrays.

    :param ccp: The initialised :class:`~pyglimer.ccp.ccp.CCPStack` object.
    :type ccp: CCPStack
    :param filename: path to load from.
    :type filename: str
    :return: The loaded :class:`~pyglimer.ccp.ccp.CCPStack` object.
    :rtype: CCPStack
    """
    loaded = dict(np.load(filename))
    for k in loaded.pop('attrs_pkl'):
        loaded[k] = loaded[k][0]
    ccp.bingrid = BinGrid(
        loaded.pop('statlat'), loaded.pop('statlon'), loaded.pop('edist'),
        loaded.pop('phase'))
    # Compute bins
    ccp.bingrid.compute_bins()

    # Initialize kdTree for Bins
    ccp.bingrid.bin_tree()
    ccp.coords = ccp.bingrid.bins
    for k in loaded:
        ccp.__dict__[k] = loaded[k]
    return ccp


[docs]def init_ccp(
    rfloc: str, spacing: float, vel_model: str, phase: str,
    preproloc: str = None, network: str or List[str] = None,
    station: str or List[str] = None,
    geocoords: Tuple[float, float, float, float] = None,
    compute_stack: bool = False, filt: Tuple[float, float] = None,
    binrad: float = np.cos(np.radians(30)), append_pp: bool = False,
    multiple: bool = False, save: str or bool = False, verbose: bool = True,
    format: str = 'hdf5', mc_backend: str = 'joblib'
) -> CCPStack:
    """
    Computes a ccp stack in self.ccp using data from statloc and rfloc.
    The stack can be limited to some networks and
    stations.

    :param rfloc: Parental directory in that the RFs are saved.
    :type rfloc: str
    :param spacing: Angular distance between each bingrid point.
    :type spacing: float
    :param vel_model: Velocity model located in data. Either iasp91 (1D
        raytracing) or 3D for 3D raytracing using a model compiled from GyPSuM.
    :type vel_model: str
    :param phase: The teleseismic phase to be used.
    :type phase: str
    :param preproloc: Parental directory in that the preprocessed miniseeds are
        saved, defaults to None. **Should be None when working with h5 files!**
    :type preproloc: str, optional
    :param network: Network or networks that are to be included in the ccp
        stack.
        Standard unix wildcards are allowed. If None, the whole database
        will be used. The default is None., defaults to None
    :type network: str or list, optional
    :param station: Station or stations that are to be included in the ccp
        stack. Standard unix wildcards are allowed. Can only be list if
        type(network)==str. If None, the whole database will be used.
        The default is None.
    :type station: str or list, optional
    :param geocoords: An alternative way of selecting networks and stations.
        Given in the form (minlat, maxlat, minlon, maxlon), defaults to None
    :type geocoords: Tuple, optional
    :param compute_stack: If true it will compute the stack by calling
        :meth:`~pyglimer.ccp.ccp.CCPStack.compute_stack()`.
        That can take a long time! The default is False.
    :type compute_stack: bool, optional
    :param filt: Decides whether to filter the receiver function prior to
            depth migration. Either a Tuple of form `(lowpassf, highpassf)` or
            `None` / `False`.
    :type filt: Tuple[float, float], optional
    :param binrad: *Only used if compute_stack=True*
            Defines the bin radius with bin radius = binrad*distance_bins.
            Full Overlap = cosd(30), Full coverage: 1.
            The default is full overlap.
    :type binrad: float, optional
    :param append_pp: *Only used if compute_stack=True.*
        Appends piercing point locations to object. Can be used to plot
        pps on map. Not recommended for large datasets as it takes A LOT
        longer and makes the file a lot larger.
        The default is False., defaults to False
    :type append_pp: bool, optional
    :param multiple: Should the CCP Stack be prepared to work with multiples?
        It can be chosen later, whether the RFs from multiples are to be
        incorporated into the CCP Stack. By default False.
    :type multiple: bool, optional
    :param save: Either False if the ccp should not be saved or string with
        filename will be saved. Will be saved as pickle file.
        The default is False.
    :type save: bool or str, optional
    :param verbose: Display info in terminal., defaults to True
    :type verbose: bool, optional
    :param format: The **INPUT FORMAT** from which receiver functions are read.
        If you saved your receiver function as hdf5 files, set ==hdf5.
        Defaults to hdf5.
    :type format: str
    :param mc_backend: Multi-core backend to use for the computations.
        Can be either `"joblib"` (multiprocessing) or `"MPI"`. Not that MPI
        compatibility is only implemented with hdf5 files.
    :raises TypeError: For wrong inputs.
    :return: CCPStack object.
    :rtype: :class:`~pyglimer.ccp.ccp.CCPstack`

    .. note::

        When using, `mc_backend='MPI'`. This has to be started from a terminal
        via mpirun.

    .. note::

        MPI is only compatible with receiver functions saved in hdf5 format.
    """
    if phase[-1].upper() == 'S' and multiple:
        raise NotImplementedError(
            'Multiple mode is not supported for phase S.')

    if format.lower() in ('hdf5', 'h5', 'hdf'):
        preproloc = None
        format = 'hdf5'
    db = StationDB(
        preproloc or rfloc, phase=phase, use_old=False, hdf5=format == 'hdf5')

    # Were network and stations provided?
    # Possibility 1 as geo boundaries
    if geocoords:
        lat = (geocoords[0], geocoords[1])
        lon = (geocoords[2], geocoords[3])
        subset = db.geo_boundary(lat, lon, phase=phase)
    else:
        subset = db.db

    net = list(subset['network'])
    stat = list(subset['station'])
    codes = list(subset['code'])
    lats = list(subset['lat'])
    lons = list(subset['lon'])

    # Filter
    if isinstance(network, str) and not isinstance(station, list):
        codef = fnmatch.filter(codes, '%s.%s' % (network, '*' or station))
    elif isinstance(network, str) and isinstance(station, list):
        codef = list(set(codes).intersection([
            '%s.%s' % (network, stat) for stat in station]))
    elif isinstance(network, list):
        if station is not None and not isinstance(station, list):
            raise TypeError(
                """Provide a list of stations, when using a list of
                networks as parameter.""")
        elif not station:
            codef = list(set(codes).intersection([
                '%s.*' % net for net in network]))
        elif isinstance(station, list):
            if len(station) != len(network):
                raise ValueError(
                    'Length of network and station list have to be equal.')
            codef = list(set(codes).intersection([
                '%s.%s' % (net, stat) for net, stat in zip(network, station)]))

    if (network and station) is None:
        codef = codes

    else:
        # Find indices
        ind = [codes.index(cf) for cf in codef]
        net = np.array(net)[ind]
        stat = np.array(stat)[ind]
        lats = np.array(lats)[ind]
        lons = np.array(lons)[ind]

    logdir = os.path.join(
        os.path.dirname(os.path.dirname(
            os.path.abspath(rfloc))), 'logs')
    ccp = CCPStack(
        lats, lons, spacing, phase=phase, verbose=verbose, logdir=logdir)

    # Clear Memory
    del stat, lats, lons

    if not geocoords:
        pattern = None

    if compute_stack:
        ccp.compute_stack(
            vel_model=vel_model, network=network, station=station, save=save,
            filt=filt, multiple=multiple,
            pattern=pattern, append_pp=append_pp, binrad=binrad, rfloc=rfloc,
            in_format=format, mc_backend=mc_backend)
    return ccp


def _save_ccp_npz(ccp: CCPStack, oloc: str):
    """
    The point of this function is that, in contrast to pickling,
    the data is stil accessible after PyGLImER has been upgraded.
    For this, we will have to recompute some objects when loading.
    (in particular, the bingrid object)
    """
    # Save the important things from the bingrid object
    d = deepcopy(ccp.__dict__)
    d['phase'] = deepcopy(ccp.bingrid.phase)
    d['statlat'] = deepcopy(ccp.bingrid.latitude)
    d['statlon'] = deepcopy(ccp.bingrid.longitude)
    d['edist'] = deepcopy(ccp.bingrid.edist)
    d.pop('coords')
    d.pop('bingrid')

    # Attributes that need to be changed to arrays
    apkl = []
    for k in d.keys():
        if isinstance(d[k], np.ndarray):
            continue
        elif isinstance(d[k], list):
            d[k] = np.array(d[k])
            continue
        d[k] = np.array([d[k]])
        apkl.append(k)
    d['attrs_pkl'] = np.array(apkl)
    np.savez_compressed(oloc, **d)