Source code for sonicdb.utilities
# type: ignore
import os
import re
from datetime import datetime
from datetime import timedelta
import librosa
import pandas as pd
from dateutil.parser import parse
audiofiles = [
"wav",
"mp3",
"aiff",
"flac",
"ogg",
"wma",
"m4a",
"aac",
"alac",
"aif",
"aifc",
"aiffc",
"au",
"snd",
"cdda",
"raw",
"mpc",
"vqf",
"tta",
"wv",
"ape",
"ac3",
"dts",
"dtsma",
"dtshr",
"dtshd",
"eac3",
"thd",
"thd+ac3",
"thd+dts",
"thd+dd",
"thd+dd+ac3",
"thd+dd+dts",
"thd+dd+dtsma",
"thd+dd+dtshr",
"thd+dd+dtshd",
]
[docs]
def lower_keys(tree: dict) -> dict[str, int]: # pragma: no cover
"""
Normalize a dictionary to have lowercase and snake_case keys.
Args:
tree (dict): Dictionary to normalize.
Returns:
dict[str, int]: Normalized dictionary.
"""
data = {}
for k in tree.keys():
if isinstance(tree[k], dict):
data[k.lower().replace(" ", "_")] = lower_keys(tree[k])
else:
data[k.lower().replace(" ", "_")] = tree[k]
return data
[docs]
def read_datetime(string: str) -> datetime: # pragma: no cover
"""
Read and convert a datetime string to a datetime object.
Args:
string (str): Datetime string to convert.
Returns:
datetime: Converted datetime object.
"""
try:
return datetime.strptime(string, "%Y_%m_%d")
except Exception:
try:
return datetime.strptime(string, "%Y_%m_%d_%H_%M_%S.%f")
except Exception:
try:
return datetime.strptime(string, "%Y_%m_%d_%H_%M_%S")
except Exception:
return parse(timestr=string, fuzzy=True)
[docs]
def metadata(filepath: str, extended=False) -> dict: # pragma: no cover
"""
Generate metadata for a file.
Args:
filepath (str): Filepath of the file.
extended (bool, optional): Whether to include extended metadata. Defaults to False.
Returns:
dict: Metadata of the file.
"""
metadata = {}
metadata["filepath"] = filepath
metadata["filename"] = os.path.basename(filepath)
metadata["extension"] = os.path.splitext(filepath)[1].replace(".", "")
metadata["directory"] = os.path.dirname(filepath)
metadata["size"] = os.path.getsize(filepath)
metadata["modified"] = datetime.fromtimestamp(os.path.getmtime(filepath))
metadata["created"] = datetime.fromtimestamp(os.path.getctime(filepath))
metadata["accessed"] = datetime.fromtimestamp(os.path.getatime(filepath))
if extended:
if metadata["extension"] in audiofiles:
try:
metadata["channel"] = int(
re.findall(r"\d+", metadata["filename"].split("_")[-2])[0]
)
except Exception:
metadata["channel"] = None
try:
metadata["sample_rate"] = librosa.get_samplerate(metadata["filepath"])
except Exception:
metadata["sample_rate"] = None
try:
metadata["duration"] = librosa.get_duration(path=metadata["filepath"])
except Exception:
metadata["duration"] = None
try:
metadata["record_number"] = int(
metadata["filename"].split("_")[-1].split(".")[0]
)
except Exception:
metadata["record_number"] = None
try:
metadata["start"] = read_datetime(metadata["filename"][:23])
except Exception:
metadata["start"] = None
try:
metadata["end"] = metadata["start"] + timedelta(
seconds=metadata["duration"]
)
except Exception:
metadata["end"] = None
return metadata
[docs]
def metadatas(
filepaths: list, extended=False, stevens=False
) -> pd.DataFrame: # pragma: no cover
"""
Generate metadata for multiple files.
Args:
filepaths (list): List of filepaths.
extended (bool, optional): Whether to include extended metadata. Defaults to False.
stevens (bool, optional): Whether to adjust metadata for Stevens' format. Defaults to False.
Returns:
pd.DataFrame: Metadata of the files.
"""
metadatas = pd.DataFrame([metadata(filepath, extended) for filepath in filepaths])
if len(metadatas) == 0:
return metadatas
if stevens:
for row, group in metadatas.groupby(["start", "channel"]):
if len(group) > 1:
for j, row in group.iterrows():
if row.record_number > 1:
metadatas.at[row.name, "start"] = metadatas.iloc[
row.name - 1
].end
metadatas.at[row.name, "end"] = metadatas.iloc[
row.name
].start + timedelta(seconds=row.duration)
return metadatas