Source code for epsproc.util

# -*- coding: utf-8 -*-
"""
ePSproc utility functions

Collection of small functions for sorting etc.

14/10/19    Added string replacement function (generic)
11/08/19    Added matEleSelector

"""

import numpy as np
import re
import scipy.constants

# Package fns.
from epsproc.basicPlotters import molPlot

#*************** Summary & display functions

# Print some jobInfo stuff & plot molecular structure
[docs]def jobSummary(jobInfo = None, molInfo = None, tolConv = 1e-2): """ Print some jobInfo stuff & plot molecular structure. (Currently very basic.) Parameters ------------ jobInfo : dict, default = None Dictionary of job data, as generated by :py:function:`epsproc.IO.headerFileParse()` from source ePS output file. molInfo : dict, default = None Dictionary of molecule data, as generated by :py:func:`epsproc.IO.molInfoParse()` from source ePS output file. tolConv : float, default = 1e-2 Used to check for convergence in ExpOrb outputs, which defines single-center expansion of orbitals. Returns ------- Job info : list """ # Pull job summary info if jobInfo is not None: print('\n*** Job summary data') [print(line.strip('#')) for line in jobInfo['comments'][0:4]] print(f"\nElectronic structure input: {jobInfo['Convert'][0].split()[1].strip()}") print(f"Initial state occ:\t\t {jobInfo['OrbOccInit']}") print(f"Final state occ:\t\t {jobInfo['OrbOcc']}") print(f"IPot (input vertical IP, eV):\t\t {jobInfo['IPot']}") # Additional orb info print("\n*** Additional orbital info (SymProd)") iList = jobInfo['OrbOccInit'] - jobInfo['OrbOcc'] print(f"Ionizing orb:\t\t\t {iList}") if molInfo is not None: # Get orbGrp for event iOrbGrp = np.flatnonzero(iList) + 1 # Find entries in orbTableX orbSym = molInfo['orbTable'].where(molInfo['orbTable'].coords['OrbGrp'] == iOrbGrp, drop = True).coords['Sym'] print(f"Ionizing orb sym:\t\t {np.unique(orbSym)}") orbIP = molInfo['orbTable'].where(molInfo['orbTable'].coords['OrbGrp'] == iOrbGrp, drop = True).sel(props = 'E') orbIPH = molInfo['orbTable'].where(molInfo['orbTable'].coords['OrbGrp'] == iOrbGrp, drop = True).sel(props = 'EH') print(f"Orb energy (eV):\t\t {np.unique(orbIP)}") print(f"Orb energy (H):\t\t\t {np.unique(orbIPH)}") orbIPnm = conv_ev_nm(np.unique(orbIP)) print(f"Orb energy (cm^-1):\t\t {1/orbIPnm*1e7}") print(f"Threshold wavelength (nm):\t {np.abs(orbIPnm)[0]}") # Check ExpOrb outputs... ind = (molInfo['orbTable'][:,8].values < 1-tolConv) + (molInfo['orbTable'][:,8].values > 1+tolConv) if ind.any(): print(f"\n*** Warning: some orbital convergences outside single-center expansion convergence tolerance ({tolConv}):") print(molInfo['orbTable'][ind, [0, 8]].values) # Display structure if molInfo is not None: print('\n*** Molecular structure\n') molPlot(molInfo) return jobInfo
# Print (LM) and symmetry sets with Pandas tables
[docs]def lmSymSummary(data): """Display summary info data tables. Works nicely in a notebook cell, with Pandas formatted table... but not from function? """ print('\n*** Index summary\n') test = daPlot.Sym.to_pandas() print('Symmetry sets') print(test.unstack()) test = daPlot.LM.to_pandas() print('\n(L,M) sets') print(test.unstack().T)
#*************** Selection functions # Selector function for matrix elements in Xarray
[docs]def matEleSelector(da, thres = None, inds = None, dims = None, sq = False, drop=True): """ Select & threshold raw matrix elements in an Xarray Parameters ---------- da : Xarray Set of matrix elements to sub-select thres : float, optional, default None Threshold value for abs(matElement), keep only elements > thres. This is *element-wise*. inds : dict, optional, default None Dicitonary of additional selection criteria, in name:value format. These correspond to parameter dimensions in the Xarray structure. E.g. inds = {'Type':'L','Cont':'A2'} dims : str or list of strs, dimensions to look for max & threshold, default None Set for *dimension-wise* thresholding. If set, this is used *instead* of element-wise thresholding. List of dimensions, which will be checked vs. threshold for max value, according to abs(dim.max) > threshold This allows for consistent selection of continuous parameters over a dimension, by a threshold. sq : bool, optional, default False Squeeze output singleton dimensions. drop : bool, optional, default True Passed to da.where() for thresholding, drop coord labels for values below threshold. Returns ------- daOut Xarray structure of selected matrix elements. Note that Nans are dropped if possible. Example ------- >>> daOut = matEleSelector(da, inds = {'Type':'L','Cont':'A2'}) """ # Iterate over other selection criteria # This may return view or copy - TBC - but seems to work as expected. # http://xarray.pydata.org/en/v0.12.3/indexing.html#copies-vs-views if inds is not None: da = da.sel(inds) # Fors inds as dict, e.g. {'Type':'L','it':1,'Cont':'A2'} # May want to send as list, or automate vs. dim names? # NOTE - in current dev code this is used to reindex, so .squeeze() casuses issues! # Reduce dims by thesholding on abs values # Do this after selection to ensure Nans removed. if thres is not None and dims is None: daOut = da.where(np.abs(da) > thres, drop = drop) else: daOut = da # If dims is set, check over dims for consistency. # WILL this just produce same results as thres then squeeze...? if dims is not None: daOut = daOut.where(np.abs(da).max(dim = dims) > thres, drop = drop) if sq: daOut = daOut.squeeze() # Squeeze dims. return daOut
# Select over vals from data structure (list) # Currently only used in IO.matEleGroupDim
[docs]def dataGroupSel(data, dInd): a = data[0] dataSub = [] uVals = np.unique(a[dInd,:]) for val in uVals: # Get matching terms and subset data # iSel = np.nonzero(a[dInd,:]==val) iSel = (a[dInd,:]==val) dataSub.append([data[0][:,iSel], data[1][iSel]]) return dataSub
# Xarray groupby + compare values # STARTED... but not finished. For basic diff along a dimension, just use da.diff(dim), see http://xarray.pydata.org/en/stable/generated/xarray.DataArray.diff.html#xarray.DataArray.diff # def groupCmp(data, dim): # """ # Basic routine to compare sets of values by dimension, using Xarray groupby functionality. # # Parameters # ---------- # data : Xarray # Data for comparison # # dim : str # Dimension label for grouping # # Returns # ------- # # """ # # dGroup = data.groupby(dim) # # # Check differences between groups # for gTest in dGroup: #************************** LIST functions # Return list of standard dataArray dims for matrix elements
[docs]def matEdimList(sType = 'stacked'): """ Return standard list of dimensions for matrix elements. Parameters ---------- sType : string, optional, default = 'stacked' Selected 'stacked' or 'unstacked' dimensions. Set 'sDict' to return a dictionary of unstacked <> stacked dims mappings for use with xr.stack({dim mapping}). Returns ------- list : set of dimension labels. """ if sType is 'stacked': # stackedDims return ['LM', 'Eke', 'Sym', 'mu', 'it', 'Type'] elif sType is 'sDict': return {'LM':['l','m'],'Sym':['Cont', 'Targ', 'Total']} else: # unstackedDims return ['l','m', 'Eke', 'Cont', 'Targ', 'Total', 'mu', 'it', 'Type']
# Return list of standard dataArray dims for BLM values
[docs]def BLMdimList(sType = 'stacked'): """ Return standard list of dimensions for calculated BLM. Parameters ---------- sType : string, optional, default = 'stacked' Selected 'stacked' or 'unstacked' dimensions. Set 'sDict' to return a dictionary of unstacked <> stacked dims mappings for use with xr.stack({dim mapping}). Returns ------- list : set of dimension labels. """ if sType is 'stacked': # stackedDims return ['Euler', 'Eke', 'BLM'] elif sType is 'sDict': return {'BLM':['l','m'],'Euler':['P','T','C']} else: # unstackedDims return ['Eke', 'l', 'm', 'P', 'T', 'C']
# Return list of standard dataArray dims for Euer angles
[docs]def eulerDimList(sType = 'stacked'): """ Return standard list of dimensions for frame definitions, from :py:func:`epsproc.sphCalc.setPolGeoms()`. Parameters ---------- sType : string, optional, default = 'stacked' Selected 'stacked' or 'unstacked' dimensions. Set 'sDict' to return a dictionary of unstacked <> stacked dims mappings for use with xr.stack({dim mapping}). Returns ------- list : set of dimension labels. """ if sType is 'stacked': # stackedDims return ['Euler'] elif sType is 'sDict': return {'Euler':eulerDimList(sType = 'unstacked')} else: # unstackedDims return ['Label','P','T','C']
# Return list of standard dataArray dims for Euer angles
[docs]def ADMdimList(sType = 'stacked'): """ Return standard list of dimensions for frame definitions, from :py:func:`epsproc.sphCalc.setADMs()`. Parameters ---------- sType : string, optional, default = 'stacked' Selected 'stacked' or 'unstacked' dimensions. Set 'sDict' to return a dictionary of unstacked <> stacked dims mappings for use with xr.stack({dim mapping}). Returns ------- list : set of dimension labels. """ if sType is 'stacked': # stackedDims return ['ADM','t'] elif sType is 'sDict': return {'ADM':['K','Q','S'],'t':'t'} else: # unstackedDims return ['K','Q','S','t']
# Return a dict of allowed dataTypes, corresponding to ePS commands/file segments, or epsproc processed data.
[docs]def dataTypesList(): """ Return a dict of allowed dataTypes, corresponding to epsproc processed data. Each dataType lists 'source', 'desc' and 'recordType' fields. - 'source' fields correspond to ePS functions which get or generate the data. - 'desc' brief description of the dataType. - 'recordType' gives the required segment in ePS files (and associated parser). If the segment is not present in the source file, then the dataType will not be available. TODO: best choice of data structure here? Currently nested dictionary. """ dataDict = {'BLM' : {'source':'epsproc.MFBLM', 'desc':'Calcualted MF beta parameters from epsproc.MFBLM(), based on dipole matrix elements from ePS.', 'recordType':'DumpIdy', 'dims': BLMdimList(sType = 'sDict') }, 'matE' : {'source':'epsproc.IO.readMatEle', 'desc':'Raw photoionization matrix elements from ePS, DumpIdy command and file segments.', 'recordType':'DumpIdy', 'dims': matEdimList(sType = 'sDict') }, 'EDCS' : {'source':'epsproc.IO.readMatEle', 'desc':'Electron scattering DCS results from ePS, EDCS command and file segments.', 'recordType':'EDCS' }, 'XSect' : {'source':'CrossSection', 'desc':'Photoionziation cross section results from ePS, GetCro command and CrossSection file segments.', 'recordType':'CrossSection' }, 'Euler' : {'source':'epsproc.sphCalc.setPolGeoms()', 'desc':'Frame rotation definitions in terms of Euler angles and corresponding Quaternions.', 'dims':eulerDimList(sType = 'sDict') }, 'ADM' : {'source':'epsproc.sphCalc.setADMs()', 'desc':'Defines ADMs, A(K,Q,S), for aligned frame distributions.', 'dims':ADMdimList(sType = 'sDict') } } return dataDict
#***************** Convenience functions... # Multistring replace # See https://gist.github.com/bgusach/a967e0587d6e01e889fd1d776c5f3729 # https://stackoverflow.com/questions/6116978/how-to-replace-multiple-substrings-of-a-string
[docs]def stringRepMap(string, replacements, ignore_case=False): """ Given a string and a replacement map, it returns the replaced string. :param str string: string to execute replacements on :param dict replacements: replacement dictionary {value to find: value to replace} :param bool ignore_case: whether the match should be case insensitive :rtype: str CODE from: https://gist.github.com/bgusach/a967e0587d6e01e889fd1d776c5f3729 https://stackoverflow.com/questions/6116978/how-to-replace-multiple-substrings-of-a-string ... more or less verbatim. Thanks to *bgusach* for the Gist. """ # If case insensitive, we need to normalize the old string so that later a replacement # can be found. For instance with {"HEY": "lol"} we should match and find a replacement for "hey", # "HEY", "hEy", etc. if ignore_case: def normalize_old(s): return s.lower() re_mode = re.IGNORECASE else: def normalize_old(s): return s re_mode = 0 replacements = {normalize_old(key): val for key, val in replacements.items()} # Place longer ones first to keep shorter substrings from matching where the longer ones should take place # For instance given the replacements {'ab': 'AB', 'abc': 'ABC'} against the string 'hey abc', it should produce # 'hey ABC' and not 'hey ABc' rep_sorted = sorted(replacements, key=len, reverse=True) rep_escaped = map(re.escape, rep_sorted) # Create a big OR regex that matches any of the substrings to replace pattern = re.compile("|".join(rep_escaped), re_mode) # For each match, look up the new string in the replacements, being the key the normalized old string return pattern.sub(lambda match: replacements[normalize_old(match.group(0))], string)
#********************** Calculations # Convert eV <> Hartrees (atomic units)
[docs]def conv_ev_atm(data, to = 'ev'): """ Convert eV <> Hartree (atomic units) Parameters ---------- data : int, float, np.array Values to convert. to : str, default = 'ev' - 'ev' to convert H > eV - 'H' to convert eV > H Returns ------- data converted in converted units. """ # Define H in eV, value from https://en.wikipedia.org/wiki/Hartree_atomic_units#Units # H = 27.211386245 H = scipy.constants.physical_constants['Hartree energy in eV'][0] # Use scipy value if to is 'ev': dataOut = data*H else: dataOut = data/H return dataOut
# Convert energy in eV to wavelength in nm
[docs]def conv_ev_nm(data): #, to = 'nm'): """Convert E(eV) <> nu(nm).""" # Define constants from scipy.constants h = scipy.constants.h c = scipy.constants.c evJ = scipy.constants.physical_constants['electron volt-joule relationship'][0] # Define output units - wavelength in m waveConv = 1e-9 dataOut = (h * c)/(data * evJ)/waveConv # if to is 'nm': # dataOut = (h * c)/(data * evJ)/waveConv # # else: # dataOut = (data/waveConv)*evJ/(h * c) return dataOut