Source code for resistics.time.reader_lemib423e

import os
import glob
import struct
from datetime import datetime, timedelta
import numpy as np
from typing import List, Tuple, Dict, Any

from resistics.time.reader_lemib423 import (
    TimeReaderLemiB423,
    readB423Params,
    readB423Header,
)
from resistics.time.reader_internal import TimeReaderInternal
from resistics.time.data import TimeData
from resistics.common.checks import isMagnetic, isElectric, consistentChans
from resistics.common.print import blockPrint
from resistics.time.clean import removeZeros, removeZerosSingle, removeNansSingle


[docs]def folderB423EHeaders(
    folderpath: str,
    sampleFreq: float,
    ex: str = "E1",
    ey: str = "E2",
    dx: float = 1,
    dy: float = 1,
    folders: List = [],
) -> None:
    """Construct B423E headers for subfolders of a folder

    Parameters
    ----------
    folderpath : str
        The path to the folder
    sampleFreq : float
        The sampling frequency of the data
    ex : str, optional
        The channel E1, E2, E3 or E4 in the data that represents Ex. Default E1.
    ey : str, optional
        The channel E1, E2, E3 or E4 in the data that represents Ey. Default E2.
    dx : float, optional
        Distance between x electrodes
    dy : float, optional
        Distance between y electrodes
    folder : List, optional
        An optional list of subfolders
    """
    if len(folders) == 0:
        folders = [f.path for f in os.scandir(folderpath) if f.is_dir()]
    # now construct headers for each folder
    for folder in folders:
        measB423EHeaders(folder, sampleFreq, ex=ex, ey=ey, dx=dx, dy=dy)


[docs]def measB423EHeaders(
    datapath: str,
    sampleFreq: float,
    ex: str = "E1",
    ey: str = "E2",
    dx: float = 1,
    dy: float = 1,
) -> None:
    """Read a single B423 measurement directory and construct headers
    
    Parameters
    ----------
    site : str
        The path to the site
    sampleFreq : float
        The sampling frequency of the data
    ex : str, optional
        The channel E1, E2, E3 or E4 in the data that represents Ex. Default E1.
    ey : str, optional
        The channel E1, E2, E3 or E4 in the data that represents Ey. Default E2.        
    dx : float, optional
        Distance between x electrodes
    dy : float, optional
        Distance between y electrodes
    """
    from resistics.common.print import generalPrint, warningPrint, errorPrint
    from resistics.time.writer import TimeWriter

    dataFiles = glob.glob(os.path.join(datapath, "*.B423"))
    dataFilenames = [os.path.basename(dFile) for dFile in dataFiles]
    starts = []
    stops = []
    cumSamples = 0
    for idx, dFile in enumerate(dataFiles):
        generalPrint("constructB423EHeaders", "Reading data file {}".format(dFile))
        dataHeaders, firstDatetime, lastDatetime, numSamples = readB423Params(
            dFile, sampleFreq, 1024, 26
        )
        print(dataHeaders)
        generalPrint(
            "constructB423EHeaders",
            "start time = {}, end time = {}".format(firstDatetime, lastDatetime),
        )
        generalPrint(
            "constructB423EHeaders", "number of samples = {}".format(numSamples)
        )
        cumSamples += numSamples
        starts.append(firstDatetime)
        stops.append(lastDatetime)
    # now need to search for any missing data
    sampleTime = timedelta(seconds=1.0 / sampleFreq)
    # sort by start times
    sortIndices = sorted(list(range(len(starts))), key=lambda k: starts[k])
    check = True
    for i in range(1, len(dataFiles)):
        # get the stop time of the previous dataset
        stopTimePrev = stops[sortIndices[i - 1]]
        startTimeNow = starts[sortIndices[i]]
        if startTimeNow != stopTimePrev + sampleTime:
            warningPrint(
                "constructB423EHeaders", "There is a gap between the datafiles"
            )
            warningPrint(
                "constructB423EHeaders",
                "Please separate out datasets with gaps into separate folders",
            )
            warningPrint("constructB423EHeaders", "Gap found between datafiles:")
            warningPrint(
                "constructB423EHeaders", "1. {}".format(dataFiles[sortIndices[i - 1]])
            )
            warningPrint(
                "constructB423EHeaders", "2. {}".format(dataFiles[sortIndices[i]])
            )
            check = False
    # if did not pass check, then exit
    if not check:
        errorPrint(
            "constructB423EHeaders",
            "All data for a single recording must be continuous.",
            quitRun=True,
        )

    # time of first and last sample
    datetimeStart = starts[sortIndices[0]]
    datetimeStop = stops[sortIndices[-1]]

    # global headers
    globalHeaders = {
        "sample_freq": sampleFreq,
        "num_samples": cumSamples,
        "start_time": datetimeStart.strftime("%H:%M:%S.%f"),
        "start_date": datetimeStart.strftime("%Y-%m-%d"),
        "stop_time": datetimeStop.strftime("%H:%M:%S.%f"),
        "stop_date": datetimeStop.strftime("%Y-%m-%d"),
        "meas_channels": 4,
    }
    writer = TimeWriter()
    globalHeaders = writer.setGlobalHeadersFromKeywords({}, globalHeaders)

    # channel headers
    channels = ["E1", "E2", "E3", "E4"]
    chanMap = {"E1": 0, "E2": 1, "E3": 2, "E4": 3}
    sensors = {"E1": "0", "E2": "0", "E3": "0", "E4": "0"}
    posX2 = {"E1": 1, "E2": 1, "E3": 1, "E4": 1}
    posY2 = {"E1": 1, "E2": 1, "E3": 1, "E4": 1}
    posX2[ex] = dx
    posY2[ey] = dy

    chanHeaders = []
    for chan in channels:
        # sensor serial number
        cHeader = dict(globalHeaders)
        cHeader["ats_data_file"] = ", ".join(dataFilenames)
        if ex == chan:
            cHeader["channel_type"] = "Ex"
        elif ey == chan:
            cHeader["channel_type"] = "Ey"
        else:
            cHeader["channel_type"] = chan
        cHeader["scaling_applied"] = False
        cHeader["ts_lsb"] = 1
        cHeader["gain_stage1"] = 1
        cHeader["gain_stage2"] = 1
        cHeader["hchopper"] = 0
        cHeader["echopper"] = 0
        cHeader["pos_x1"] = 0
        cHeader["pos_x2"] = posX2[chan]
        cHeader["pos_y1"] = 0
        cHeader["pos_y2"] = posY2[chan]
        cHeader["pos_z1"] = 0
        cHeader["pos_z2"] = 1
        cHeader["sensor_sernum"] = sensors[chan]
        chanHeaders.append(cHeader)
    chanHeaders = writer.setChanHeadersFromKeywords(chanHeaders, {})
    writer.setOutPath(datapath)
    writer.writeHeaders(
        globalHeaders, channels, chanMap, chanHeaders, rename=False, ext="h423E"
    )


[docs]class TimeReaderLemiB423E(TimeReaderLemiB423):
    """Data reader for Lemi B423E data

    Lemi B423E data has the following characteristics:

    - 1024 bytes of ASCII headers in the data file with basic scaling information
    - There is no separate header file for Lemi B423E data. No information for number of samples, sampling rate etc
    - Header files need to be constructed before Lemi B423E data can be read in by resistics. There are helper methods to do this.
    - Lemi B423 raw measurement data is signed long integer format    
    - Getting unscaled samples returns data with unit count for electric fields. 
    - Optionally, a scaling can be applied for unscaled samples which returns electric fields in microvolt.

    In situations where a Lemi B423E dataset is recorded in multiple files, it is required that the recording is continuous. 

    Attributes
    ----------
    recChannels : Dict
        Channels in each data file
    dtype : np.float32
        The data type
    numHeaderFiles : int
        The number of header files
    numDataFiles : int
        The number of data files

    Methods
    -------
    setParameters()
        Set parameters specific to a data format
    getPhysicalSamples(**kwargs)
        Get data in physical units
    getScalars(paramsDict)
        Get the scalars for each channel as given in the data files 
    """

[docs]    def setParameters(self) -> None:
        """Set some data reader parameters for reading Lemi B423E data"""
        # get a list of the header and data files in the folder
        self.headerExt = "h423E"
        self.headerF = glob.glob(
            os.path.join(self.dataPath, "*.{}".format(self.headerExt))
        )
        self.dataF = glob.glob(os.path.join(self.dataPath, "*.B423"))
        # data byte information
        self.dataByteSize = 4
        self.recordByteSize = 26
        self.dataByteOffset = 1024
        # data type
        self.dtype = np.int_
        # get the number of data files and header files - this should be equal
        self.numHeaderFiles: int = len(self.headerF)
        self.numDataFiles: int = len(self.dataF)

[docs]    def getPhysicalSamples(self, **kwargs):
        """Get data scaled to physical values
        
        resistics uses field units, meaning physical samples will return the following:

        - Electrical channels in mV/km
        - Magnetic channels in mV
        - To get magnetic fields in nT, calibration needs to be performed

        Notes
        -----
        Once Lemi B423E data is scaled (which optionally happens in getUnscaledSamples), the electric channels are in uV (micro volts). Therefore:
        
        - Electric channels need to divided by 1000 along with dipole length division in km (east-west spacing and north-south spacing) to return mV/km.

        Parameters
        ----------
        chans : List[str]
            List of channels to return if not all are required
        startSample : int
            First sample to return
        endSample : int
            Last sample to return
        remaverage : bool
            Remove average from the data
        remzeros : bool
            Remove zeroes from the data
        remnans: bool
            Remove NanNs from the data

        Returns
        -------
        TimeData
            Time data object
        """
        # initialise chans, startSample and endSample with the whole dataset
        options = self.parseGetDataKeywords(kwargs)
        # get unscaled data but with gain scalings applied
        timeData = self.getUnscaledSamples(
            chans=options["chans"],
            startSample=options["startSample"],
            endSample=options["endSample"],
            scale=True,
        )
        # convert to field units and divide by dipole lengths
        for chan in options["chans"]:
            # divide by the 1000 to convert electric channels from microvolt to millivolt
            timeData.data[chan] = timeData.data[chan] / 1000.0
            timeData.addComment(
                "Dividing channel {} by 1000 to convert microvolt to millivolt".format(
                    chan
                )
            )
            if chan == "Ex":
                # multiply by 1000/self.getChanDx same as dividing by dist in km
                timeData.data[chan] = (
                    1000.0 * timeData.data[chan] / self.getChanDx(chan)
                )
                timeData.addComment(
                    "Dividing channel {} by electrode distance {} km to give mV/km".format(
                        chan, self.getChanDx(chan) / 1000.0
                    )
                )
            if chan == "Ey":
                # multiply by 1000/self.getChanDy same as dividing by dist in km
                timeData.data[chan] = 1000 * timeData.data[chan] / self.getChanDy(chan)
                timeData.addComment(
                    "Dividing channel {} by electrode distance {} km to give mV/km".format(
                        chan, self.getChanDy(chan) / 1000.0
                    )
                )

            # if remove zeros - False by default
            if options["remzeros"]:
                timeData.data[chan] = removeZerosSingle(timeData.data[chan])
            # if remove nans - False by default
            if options["remnans"]:
                timeData.data[chan] = removeNansSingle(timeData.data[chan])
            # remove the average from the data - True by default
            if options["remaverage"]:
                timeData.data[chan] = timeData.data[chan] - np.average(
                    timeData.data[chan]
                )

        # add comments
        timeData.addComment(
            "Remove zeros: {}, remove nans: {}, remove average: {}".format(
                options["remzeros"], options["remnans"], options["remaverage"]
            )
        )
        return timeData

[docs]    def getScalars(self, paramsDict: Dict) -> Dict:
        """Returns the scalars from a parameter dictionary

        Parameters
        ----------
        paramsDict : Dict
            The parameter dictionary for a data file usually read from the headers in the file

        Returns
        -------
        Dict 
            Dictionary with channels as keys and scalings as values
        """
        # need to get the channel orders here
        chans = []
        for cH in self.chanHeaders:
            chans.append(cH["channel_type"])
        return {
            chans[0]: [paramsDict["Ke1"], paramsDict["Ae1"]],
            chans[1]: [paramsDict["Ke2"], paramsDict["Ae2"]],
            chans[2]: [paramsDict["Ke3"], paramsDict["Ae3"]],
            chans[3]: [paramsDict["Ke4"], paramsDict["Ae4"]],
        }