#!/usr/bin/env python3# -*- coding: utf-8 -*-'''Database management and overview for the PyGLImER database.:copyright: The PyGLImER development team (makus@gfz-potsdam.de).:license: GNU Lesser General Public License, Version 3 (https://www.gnu.org/copyleft/lesser.html):author: Peter Makus (makus@gfz-potsdam.de)Created: Friday, 12th February 2020 03:24:30 pmLast Modified: Tuesday, 25th October 2022 03:50:51 pm'''importfnmatchimportloggingimportosimportshelvefromcopyimportdeepcopyfromtypingimportTupleimportglobfromjoblibimportParallel,delayedfromobspy.clients.fdsnimportClient,headerimportpandasaspdfrompyglimer.dataimportfinddirfrompyglimer.database.rfh5importRFDataBasefrompyglimer.plot.plot_mapimportplot_station_db
[docs]defredownload_missing_statxmls(clients,phase,statloc,rawdir,verbose=True):""" Fairly primitive program that walks through existing raw waveforms in rawdir and looks for missing station xmls in statloc and, afterwards, attemps a redownload :param clients: List of clients (see obspy documentation for `~obspy.Client`). :type clients: list :param phase: Either "P" or "S", defines in which folder to look for mseeds. :type phase: str :param statloc: Folder, in which the station xmls are saved :type statloc: str :param rawdir: Uppermost directory, in which the raw mseeds are saved (i.e. the directory above the phase division). :type rawdir: str :param verbose: Show some extra information, by default True :type verbose: bool, optional """ex=os.listdir(statloc)# remove file identifierfori,xmlinenumerate(ex):x=xml.split('.')ex[i]=x[0]+'.'+x[1]wavdir=os.path.join(rawdir,phase)Parallel(n_jobs=-1)(delayed(__client__loop__)(client,ex,wavdir,statloc)forclientinclients)
def__client__loop__(client,existing,wavdir,statloc):client=Client(client)for_,_,filesinos.walk(wavdir):forfiinfiles:f=fi.split('.')iff[-1]!='mseed':continuecode=f[0]+'.'+f[1]ifcodeandcodenotinexisting:out=os.path.join(statloc,code+'.xml')try:stat_inv=client.get_stations(network=f[0],station=f[1],level='response',filename=out)stat_inv.write(out,format="STATIONXML")existing.append(code)except(header.FDSNNoDataException,header.FDSNException):pass# wrong clientclassStationDB(object):def__init__(self,dir:str,phase:str=None,use_old:bool=False,hdf5:bool=True,logdir:str=None):""" Creates a pandas database of all available receiver functions. This database is entirely based on the info files in the "preprocessed" folder. Make sure that the output folder is not corrupted before running this. Creating this database does not take much time, so there will be no option to save it, as it should be up to date. However, there is an export function :param dir: Parental folder, in which the preprocessed mseeds are saved (i.e. the folder above the phase division). :type dir: str :param phase: If just one of the primary phases should be checked - useful for computational efficiency, when creating ccp. Default is None. :type phase: str, optional :param use_old: When turned on it will read in the saved csv file. That is a lot faster, but it will obviously not update, defaults to False :type use_old: bool, optional :param hdf5: Use HDF5 database instead of station xmls? Defaults to True. :type hdf5: bool, optional :param logdir: Directory for log file :type logdr: str, optional """self.dir=dirifphase:self.phase=phase.upper()# 1. Initiate loggerself.logger=logging.Logger("pyglimer.database.stations.StationDBaseLogger")self.logger.setLevel(logging.WARNING)# FileHandlerifnotlogdir:try:fh=logging.FileHandler(os.path.join(dir,os.pardir,os.pardir,'logs','StationDBase.log'))exceptFileNotFoundError:os.makedirs(os.path.join(dir,os.pardir,os.pardir,'logs'),exist_ok=True)fh=logging.FileHandler(os.path.join(dir,os.pardir,os.pardir,'logs','StationDBase.log'))else:fh=logging.FileHandler(os.path.join(logdir,'StationDBase.log'))fh.setLevel(logging.WARNING)self.logger.addHandler(fh)# Formatterfmt=logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(message)s')fh.setFormatter(fmt)# Check if there is already a saved databaseoloc=os.path.join(finddir(),'database.csv')ifuse_oldandos.path.isfile(oloc):self.db=pd.read_csv(oloc)elifhdf5:self.db=self._create_from_hdf5()else:self.db=self._create_from_info()# Save Database, don't save if only one phase is requested newlyifnotphase:self.db.to_csv(oloc)def_create_from_hdf5(self)->pd.DataFrame:""" Creates a panda database from information read from hdf5 files. :return: Simple Pandas Dataframe with station info :rtype: pd.DataFrame """data={'code':[],'network':[],'station':[],'lat':[],'lon':[],'elevation':[]}forfinglob.glob(os.path.join(self.dir,self.phase,'*.*.h5'),recursive=True):net,stat,_=os.path.basename(f).split('.')# For now, we only do coordinates. Never used anything else anywayswithRFDataBase(f,mode='r')asrfdb:lat,lon,el=rfdb.get_coords(net,stat,self.phase)ifnotlat:continuedata['network'].append(net)data['station'].append(stat)data['code'].append('%s.%s'%(net,stat))data['lat'].append(lat)data['lon'].append(lon)data['elevation'].append(el)returnpd.DataFrame.from_dict(data)def_create_from_info(self)->pd.DataFrame:""" Create panda database. """ifself.phase:# Create data dictionarydata={'code':[],'network':[],'station':[],'lat':[],'lon':[],'elevation':[],'NP':[],'NPret':[],'NS':[],'NSret':[]}# oposing phaseifself.phase=='P':o='S'elifself.phase=='S':o='P'elifself.phase=='SKS':o='SKS'else:raiseValueError('Phase %s not supported.'%self.phase)folder=os.path.join(self.dir,self.phase,'by_station')# Check data availabilityforroot,_,filesinos.walk(folder):iflen(fnmatch.filter(files,'info.*'))==0:continue# Skip parent foldersinfof=(os.path.join(root,'info'))withshelve.open(infof,flag='r')asinfo:data['code'].append(info['network']+'.'+info['station'])data['network'].append(info['network'])data['station'].append(info['station'])data['lat'].append(info['statlat'])data['lon'].append(info['statlon'])data['elevation'].append(info['statel'])data['N'+self.phase].append(info['num'])data['N'+o].append(0)data['N'+o+'ret'].append(0)try:data['N'+self.phase+'ret'].append(info['numret'])exceptKeyError:data['N'+self.phase+'ret'].append(0)self.logger.debug(['No P-waveforms retained for station',info['network']+'.'+info['station']])# Return dataframereturnpd.DataFrame.from_dict(data)# Create data dictionarydataP={'code':[],'network':[],'station':[],'lat':[],'lon':[],'elevation':[],'NP':[],'NPret':[],'NS':[],'NSret':[]}dataS=deepcopy(dataP)# Read in info filesfolderP=os.path.join(self.dir,'P','by_station')folderS=os.path.join(self.dir,'S','by_station')# Check data availability of PRFsforroot,_,filesinos.walk(folderP):if'info.dat'notinfiles:continue# Skip parent foldersinfof=(os.path.join(root,'info'))withshelve.open(infof,flag='r')asinfo:dataP['code'].append(info['network']+'.'+info['station'])dataP['network'].append(info['network'])dataP['station'].append(info['station'])dataP['lat'].append(info['statlat'])dataP['lon'].append(info['statlon'])dataP['elevation'].append(info['statel'])dataP['NP'].append(info['num'])dataP['NS'].append(0)dataP['NSret'].append(0)try:dataP['NPret'].append(info['numret'])exceptKeyError:dataP['NPret'].append(0)self.logger.debug('No P-waveforms obtained for station %s.%s'%(info['network'],info['station']))# Check data availability of SRFsforroot,_,filesinos.walk(folderS):if'info.dat'notinfiles:continue# Skip parent folders or empty foldersinfof=(os.path.join(root,'info'))withshelve.open(infof,flag='r')asinfo:dataS['code'].append(info['network']+'.'+info['station'])dataS['network'].append(info['network'])dataS['station'].append(info['station'])dataS['lat'].append(info['statlat'])dataS['lon'].append(info['statlon'])dataS['elevation'].append(info['statel'])dataS['NS'].append(info['num'])try:dataS['NSret'].append(info['numret'])exceptKeyError:dataS['NSret'].append(0)self.logger.debug('No S-waveforms obtaimed for station %s.%s'%(info['network'],info['station']))# Merge the two dictionariesfori,cinenumerate(dataS['code']):ifcindataP['code']:j=dataP['code'].index(c)dataP['NS'][j]=dataS['NS'][i]dataP['NSret'][j]=dataS['NSret'][i]else:dataP['code'].append(c)dataP['network'].append(dataS['network'][i])dataP['station'].append(dataS['station'])dataP['lat'].append(dataS['lat'][i])dataP['lon'].append(dataS['lon'][i])dataP['elevation'].append(dataS['elevation'][i])dataP['NS'].append(dataS['NS'][i])dataP['NSret'].append(dataS['NSret'][i])dataP['NP'].append(0)dataP['NPret'].append(0)deldataSreturnpd.DataFrame.from_dict(dataP)defgeo_boundary(self,lat:Tuple[float,float],lon:Tuple[float,float],phase:str=None)->pd.DataFrame:""" Return a subset of the database filtered by location. :param lat: Latitude boundaries in the form (minimum latitude, maximum lat). :type lat: Tuple :param lon: Longitude boundaries in the form (minimum longitude, maximum lon). :type lon: Tuple :param phase: 'P' or 'S'. If option is given, it will only return stations with accepted receiver functions for this phase. The default is None. :type phase: str, optional :return: Subset of the original DataFrame filtered by location. :rtype: pandas.DataFrame """a=self.db['lat']>=lat[0]b=self.db['lat']<=lat[1]c=self.db['lon']>=lon[0]d=self.db['lon']<=lon[1]ifphase:phase=phase.upper()e=self.db['N'+phase+'ret']>0subset=self.db[a&b&c&d&e]else:subset=self.db[a&b&c&d]returnsubsetdeffind_stations(self,lat,lon,phase=None):""" Returns list of networks and stations inside a geoboundary """subset=self.geo_boundary(lat,lon,phase)returnlist(subset['network']),list(subset['station'])defplot(self,lat:Tuple[float,float]=None,lon:Tuple[float,float]=None,profile:Tuple[float,float]=None,p_direct:bool=True,outputfile:str=None,format:str='pdf',dpi:int=300):""" Plot the station coverage for a given area. Also allows plotting ccp-profiles. Parameters ---------- lat : tuple or None, optional Latitude boundaries for the stations that should be included in the form (latmin, latmax), by default None. lon : tuple or None, optional Longitude boundaries for the stations that should be included in the form (lonmin, lonmax), by default None profile : list or tuple or None, optional Profile or profiles that will be drawn as lines on the projection, each profile is defined as a tuple: (lon1,lon2,lat1,lat2). Can be a list of such tuples. If p_direct is set to false, define the left and right corner the same way (then the plot is a rectangle), by default None. p_direct : bool, optional PLot the profile as a rectangle, by default True outputfile : str or None, optional Write the plot to the given file, by default None format : str, optional Output format if outputfile, by default 'pdf' dpi : int, optional Pixel density, by default 300 Raises ------ ValueError For wrong inputs. """iflatandlon:subset=self.geo_boundary(lat=lat,lon=lon)lat=(lat[0]-10,lat[1]+10)lon=(lon[0]-10,lon[1]+10)plot_station_db(list(subset['lat']),list(subset['lon']),lat=lat,lon=lon,profile=profile,p_direct=p_direct,outputfile=outputfile,format=format,dpi=dpi)elifnotlatandnotlon:plot_station_db(list(self.db['lat']),list(self.db['lon']),profile=profile,p_direct=p_direct,outputfile=outputfile,format=format,dpi=dpi)else:raiseValueError('You have to provide both lat and lon as tuple or None.')