Source code for epsproc.util.xrIO

"""
ePSproc Xarray IO util functions

Various tools for use in Xarray file IO.

27/06/22    Split out from core IO.py, to extend backend options and support.
            Now additionally wrapped therein for flexible handling of multiple backends.

"""

import numpy as np
import xarray as xr

#*********** Complex data handling

# Split complex to R + I
[docs]def splitComplex(data):
    """Split complex data into R+I floats."""

    dataR = np.real(data)
    dataI = np.imag(data)

    return dataR, dataI

# Comibine R + I to complex
[docs]def combineComplex(dataR, dataI):
    """Combine R+I floats into complex form."""

    data = dataR + 1j*dataI

    return data


[docs]def splitComplexXR(dataIn):
    """
    Split complex-valued Xarray data & coords to Re + Im components

    Splits input Xarray into Xarray Dataset with 'Re' and 'Im' components.

    """

    # Safe version with re/im split save type only.
    # Works for scipy and h5netcdf OK, latter will save complex type too, but is strictly not valid.
    dataOut = xr.Dataset({'Re':dataIn.real, 'Im':dataIn.imag})
    # dataOut.attrs = dataIn.attrs   # This will push dataarray attrs to dataset attrs, otherwise they're nested
                                    # May not always want this?

    # Allow for SF & XS coords which may also be complex
    # if 'XS' in dataOut.coords:
    #     dataOut['XSr'] = dataOut.XS.real
    #     dataOut['XSi'] = dataOut.XS.imag
    #     dataOut = dataOut.drop('XS')
    #
    # if 'SF' in dataOut.coords:
    #     dataOut['SFr'] = dataOut.SF.real
    #     dataOut['SFi'] = dataOut.SF.imag
    #     dataOut = dataOut.drop('SF')

    # Allow for arb complex coords.
    # May also want to add attr checker here? Or set in 'sanitizeAttrsNetCDF'
    for item in dataOut.coords.keys():
        if dataOut.coords[item].dtype == 'complex128':
            dataOut.coords[item + 'r'], dataOut.coords[item + 'i'] = splitComplex(dataOut.coords[item])
            dataOut = dataOut.drop(item)

    # Force top-level attrs
    dataOut.attrs = dataOut[list(dataOut.data_vars)[0]].attrs
    dataOut.attrs['complex'] = 'split'

    return dataOut


[docs]def combineComplexXR(dataIn):
    """
    Combine Re + Im Xarray Dataset and coordinates to complex values

    Note: not general, assumes formatting as defined by splitComplexXR()

    """

    # Reconstruct complex variables, NOTE this drops attrs... there's likely a better way to do this!
    # UPDATE 07/06/22: additional attrs handling below. Note in this case dataOut is a DataArray here.
    dataOut = dataIn.Re + dataIn.Im*1j
    # dataOut.attrs = dataIn.attrs

    # Rest SF & XS coords which may also be complex
    # Note: need to check vs. dataIn here, since dataOut already has dropped vars
    # if 'XSr' in dataIn.data_vars:
    #     dataOut['XS'] = dataIn.XSr + dataIn.XSi*1j
    # #     dataOut = dataOut.drop('XSr').drop('XSi')
    #
    # if 'SFr' in dataIn.data_vars:
    #     dataOut['SF'] = dataIn.SFr + dataIn.SFi
    # #     dataOut = dataOut.drop('SFr').drop('SFi')

    # General version
    for item in dataOut.coords.keys():
        # Check for r+i pairs - note labelling assumed to match writeXarray conventions here.
        if item.endswith('r'):
            itemi = item[:-1] + 'i'

            # If imag partner found, restack and remove split components.
            if itemi in dataOut.coords.keys():
                dataOut.coords[item[:-1]] = combineComplex(dataOut.coords[item], dataOut.coords[itemi])
                dataOut = dataOut.drop([item,itemi])

    # For dataset case, try some generic handling. May need more sophisticated methods here, maybe just assume DataArray and convert?
    if (not dataOut.attrs) and isinstance(dataIn, xr.core.dataset.Dataset):
        dataOut.attrs = dataIn[list(dataIn.data_vars)[0]].attrs
    # dataOut.attrs = dataIn[list(dataIn.data_vars)[0]].attrs

    return dataOut


#*********** Attribs handling

# Sanitize attributes & dicts for Xarray NetCDF IO
[docs]def sanitizeAttrsNetCDF(data, dictHandling = 'wrap'):
    """
    Sanitize Xarray DataArray attributes for file IO.

    Note this may be lossy:

    - Empty data > string.
    - Dictionaries removed, wrapped to string, or left alone (nested dicts not supported in attrs for most (all?) file writers).
      Set dictHandling = 'del', 'wrap' or anything else to leave as is.
    - Remove all items not of types [str, np.ndarray, int, float, list, tuple]


    Todo:

    - try conversion to string for all attrs?
    - try dict conversions & JSON side-car file IO to avoid lossy saves.

    """

    dataOut = data.copy()

    # Remove None and other empty types, ugh - now integrated below
    # xrTest.attrs = {k:(v if v else str(v)) for k,v in xrTest.attrs.items()}
    log = {}
    for k,v in dataOut.attrs.items():
        if not v:
            dataOut.attrs[k] = str(v)
            log[k] = 'str'

        if isinstance(dataOut.attrs[k], dict):
    #         xrTest.attrs[k] = [[k2,v2] for k2,v2 in xrTest.attrs[k].items()]  # Nest dict items also not supported, dump to nested lists? Seems to be acceptable. Ugh.
                                                                                # Still causing issues in some cases?
            if dictHandling == 'del':
                dataOut.attrs[k] = 'Removed dict'
                log[k] = 'Removed dict'

            elif dictHandling == 'wrap':
                dataOut.attrs[k] = str(v)
                log[k] = 'Wrapped dict to string'

            else:
                pass

        if type(dataOut.attrs[k]) not in [str, np.ndarray, int, float, list, tuple]:
            typeIn = type(dataOut.attrs[k])
            dataOut.attrs[k] = 'NA'
            log[k] = f'Removed item type {typeIn}'

    # TO TRY - full str conversion, e.g. from https://stackoverflow.com/a/42676094 (for JSON example case)
    # save: convert each tuple key to a string before saving as json object
    # s = json.dumps({str(k): str(v) for k, v in eulerDict.items()})
    #
    # THEN RECON with ast:
    # # load in two stages:
    # # (i) load json object
    # obj = json.loads(s)
    #
    # # (ii) convert loaded keys from string back to tuple
    # from ast import literal_eval
    # # d = {literal_eval(k): literal_eval(v) for k, v in obj.items()}  # FAILS: ValueError: malformed node or string: <ast.Name object at 0x7f4464e67550>
    # d = {k: (literal_eval(v) if v != 'Euler' else v) for k, v in obj.items()}  # ok - WORKS FOR ALL CASES EXCEPT NON-EXECUTABLE STRS
    #
    # This should also work here, but maybe add type checking too?


    return dataOut, data.attrs, log