Source code for sonicdb.utilities

# type: ignore
import os
import re
from datetime import datetime
from datetime import timedelta

import librosa
import pandas as pd
from dateutil.parser import parse


audiofiles = [
    "wav",
    "mp3",
    "aiff",
    "flac",
    "ogg",
    "wma",
    "m4a",
    "aac",
    "alac",
    "aif",
    "aifc",
    "aiffc",
    "au",
    "snd",
    "cdda",
    "raw",
    "mpc",
    "vqf",
    "tta",
    "wv",
    "ape",
    "ac3",
    "dts",
    "dtsma",
    "dtshr",
    "dtshd",
    "eac3",
    "thd",
    "thd+ac3",
    "thd+dts",
    "thd+dd",
    "thd+dd+ac3",
    "thd+dd+dts",
    "thd+dd+dtsma",
    "thd+dd+dtshr",
    "thd+dd+dtshd",
]



[docs]
def lower_keys(tree: dict) -> dict[str, int]:  # pragma: no cover
    """
    Normalize a dictionary to have lowercase and snake_case keys.

    Args:
        tree (dict): Dictionary to normalize.

    Returns:
        dict[str, int]: Normalized dictionary.
    """

    data = {}
    for k in tree.keys():
        if isinstance(tree[k], dict):
            data[k.lower().replace(" ", "_")] = lower_keys(tree[k])
        else:
            data[k.lower().replace(" ", "_")] = tree[k]
    return data




[docs]
def read_datetime(string: str) -> datetime:  # pragma: no cover
    """
    Read and convert a datetime string to a datetime object.

    Args:
        string (str): Datetime string to convert.

    Returns:
        datetime: Converted datetime object.
    """

    try:
        return datetime.strptime(string, "%Y_%m_%d")
    except Exception:
        try:
            return datetime.strptime(string, "%Y_%m_%d_%H_%M_%S.%f")
        except Exception:
            try:
                return datetime.strptime(string, "%Y_%m_%d_%H_%M_%S")
            except Exception:
                return parse(timestr=string, fuzzy=True)




[docs]
def metadata(filepath: str, extended=False) -> dict:  # pragma: no cover
    """
    Generate metadata for a file.

    Args:
        filepath (str): Filepath of the file.
        extended (bool, optional): Whether to include extended metadata. Defaults to False.

    Returns:
        dict: Metadata of the file.
    """

    metadata = {}
    metadata["filepath"] = filepath
    metadata["filename"] = os.path.basename(filepath)
    metadata["extension"] = os.path.splitext(filepath)[1].replace(".", "")
    metadata["directory"] = os.path.dirname(filepath)
    metadata["size"] = os.path.getsize(filepath)
    metadata["modified"] = datetime.fromtimestamp(os.path.getmtime(filepath))
    metadata["created"] = datetime.fromtimestamp(os.path.getctime(filepath))
    metadata["accessed"] = datetime.fromtimestamp(os.path.getatime(filepath))

    if extended:
        if metadata["extension"] in audiofiles:
            try:
                metadata["channel"] = int(
                    re.findall(r"\d+", metadata["filename"].split("_")[-2])[0]
                )
            except Exception:
                metadata["channel"] = None

            try:
                metadata["sample_rate"] = librosa.get_samplerate(metadata["filepath"])
            except Exception:
                metadata["sample_rate"] = None

            try:
                metadata["duration"] = librosa.get_duration(path=metadata["filepath"])
            except Exception:
                metadata["duration"] = None

            try:
                metadata["record_number"] = int(
                    metadata["filename"].split("_")[-1].split(".")[0]
                )
            except Exception:
                metadata["record_number"] = None

            try:
                metadata["start"] = read_datetime(metadata["filename"][:23])
            except Exception:
                metadata["start"] = None

            try:
                metadata["end"] = metadata["start"] + timedelta(
                    seconds=metadata["duration"]
                )
            except Exception:
                metadata["end"] = None

    return metadata




[docs]
def metadatas(
    filepaths: list, extended=False, stevens=False
) -> pd.DataFrame:  # pragma: no cover
    """
    Generate metadata for multiple files.

    Args:
        filepaths (list): List of filepaths.
        extended (bool, optional): Whether to include extended metadata. Defaults to False.
        stevens (bool, optional): Whether to adjust metadata for Stevens' format. Defaults to False.

    Returns:
        pd.DataFrame: Metadata of the files.
    """

    metadatas = pd.DataFrame([metadata(filepath, extended) for filepath in filepaths])

    if len(metadatas) == 0:
        return metadatas

    if stevens:
        for row, group in metadatas.groupby(["start", "channel"]):
            if len(group) > 1:
                for j, row in group.iterrows():
                    if row.record_number > 1:
                        metadatas.at[row.name, "start"] = metadatas.iloc[
                            row.name - 1
                        ].end
                        metadatas.at[row.name, "end"] = metadatas.iloc[
                            row.name
                        ].start + timedelta(seconds=row.duration)

    return metadatas