Source code for datalogs._logger

"""Data logging class."""

from __future__ import annotations
from typing import TypeVar, Annotated, Any, overload, get_type_hints, get_origin
from collections.abc import Callable, Sequence, Collection, Mapping
import os
import sys
from datetime import datetime, timezone
import numpy as np
import pandas as pd  # type: ignore
from datalogs._variables import Coord, DataVar
from datalogs._logs import LogMetadata, DataLog, DictLog
from datalogs._get_filename import get_filename

try:
    from paramdb import ParamDB

    PARAMDB_INSTALLED = True
except ImportError:
    PARAMDB_INSTALLED = False

_T = TypeVar("_T")  # Any type variable
_LT = TypeVar("_LT", DataLog, DictLog)  # Log type variable

LoggedProp = Annotated[_T, "LoggedProp"]
"""
Used as a type hint to indicate that properties of a class should be logged by
:py:meth:`Logger.log_props`.
"""


def _now() -> datetime:
    """Return the current time as a ``datetime`` object in the current timezone."""
    return datetime.now(timezone.utc).astimezone()


[docs] class Logger: """ Logger corresponding to a directory that generates log files and sub-:py:class:`Logger` objects corresponding to subdirectories. If ``root_directory`` is given, that will be used as the directory, and this :py:class:`Logger` will function as a root. Optionally, ``param_db`` can be given to enable commit tagging. Otherwise, ``parent`` and ``description`` must be given, and this will be a sub-:py:class:`Logger` object that corresponds to a subdirectory within its parent's directory (and uses its parent's ParamDB, if given). See :py:meth:`Logger.sub_logger` for an explanation of the ``timestamp`` option. """ @overload def __init__( self, root_directory: str, param_db: ParamDB[Any] | None = None ) -> None: # pragma: no cover ... @overload def __init__( self, *, parent: Logger, description: str, timestamp: bool = True ) -> None: # pragma: no cover ... # pylint: disable-next=too-many-arguments def __init__( self, root_directory: str | None = None, param_db: ParamDB[Any] | None = None, *, parent: Logger | None = None, description: str | None = None, timestamp: bool = True, ) -> None: if root_directory is None: if parent is None: raise TypeError("Logger with no root_directory must have a parent") if description is None: raise TypeError("Logger with no root_directory must have a description") else: if parent is not None: raise TypeError("Logger with a root_directory cannot have a parent") if description is not None: raise TypeError( "Logger with a root_directory cannot have a description" ) self._name = root_directory self._parent = parent self._description = description self._timestamp = timestamp self._param_db: ParamDB[Any] | None = ( parent._param_db if parent is not None else param_db ) if root_directory is not None or not timestamp: # Generate this logger's directory, if it is a root Logger or a sub-Logger # with no timestamp. self.directory # pylint: disable=pointless-statement
[docs] def sub_logger(self, description: str, timestamp: bool = True) -> Logger: """ Create a new sub-:py:class:`Logger` with the given description corresponding to a subdirectory within the parent :py:class:`Logger`. By default, ``timestamp`` is True, meaning that the directory name will include a timestamp corresponding to when it was created. (Note that the directory will be created when first needed so that the timestamp more accurately reflects when its content was created.) If ``timestamp`` is False, the directory name will not include a timestamp. If there is an existing directory, it will be used. If not, a new directory will be created immediately. """ return Logger(parent=self, description=description, timestamp=timestamp)
@property def directory(self) -> str: """ Directory where this logger saves subdirectories or files. If the directory does not yet exist (i.e. if this is a sub-:py:class:`Logger` with a timestamp), it is created. """ if self._name is None: # If self._name is None, both self._parent and self._description should have # been defined in self.__init__(). assert self._parent is not None, "sub-Logger must have a parent" assert self._description is not None, "sub-Logger must have a description" self._name = ( get_filename( self._parent.directory, self._description, timestamp=_now(), ) if self._timestamp else self._description ) directory = ( self._name if self._parent is None else os.path.join(self._parent.directory, self._name) ) if not os.path.exists(directory): os.mkdir(directory) return directory
[docs] def file_path(self, filename: str) -> str: """ Generate a path to a file or directory with the given name within the directory of this :py:class:`Logger`. Note that this simply generates the path, with no checks for whether a file or directory with that path exists. """ return os.path.join(self.directory, filename)
def _log( self, make_log: Callable[[LogMetadata], _LT], description: str, commit_id: int | None = None, ) -> _LT: """ Create a log object using the given log creation function, description, commit ID. If no commit ID is given, the latest commit ID will be used. """ if self._param_db is not None and commit_id is None: try: latest_commit = self._param_db.load_commit_entry() except IndexError as exc: raise IndexError( f"cannot tag log '{description}' with most recent commit because" f" ParamDB '{self._param_db.path}' is empty" ) from exc commit_id = latest_commit.id log = make_log( LogMetadata( directory=self.directory, timestamp=_now(), description=description, commit_id=commit_id, param_db_path=( self._param_db.path if self._param_db is not None else None ), ) ) log.save() return log
[docs] def log_data( self, description: str, coords: Coord | Sequence[Coord], data_vars: DataVar | Sequence[DataVar], *, commit_id: int | None = None, ) -> DataLog: """ Construct an Xarray from the given data and corresponding metadata, save it in a NetCDF file, and return a :py:class:`DataLog` with this data and metadata. The log will be tagged with the given commit ID, or the latest commit ID if none is given (and if this Logger has a corresponding ParamDB). """ def make_log(log_metadata: LogMetadata) -> DataLog: return DataLog.from_variables(log_metadata, coords, data_vars) return self._log(make_log, description, commit_id)
[docs] @classmethod def convert_to_json( cls, obj: Any, convert: Callable[[Any], Any] | None = None ) -> Any: """ Return a JSON-serializable version of the given object. This function is used to convert objects to JSON for :py:meth:`Logger.log_dict` and :py:meth:`Logger.log_props`. 1. If provided, ``convert()`` will be used to convert the object. 2. Numpy scalars will be unpacked and Pandas DataFrames will be converted to dictionaries. 3. ``Mapping`` and ``Collection`` objects will be converted to dictionaries and lists, with keys converted to strings and values converted according to these rules. 4. Other non-JSON-serializable values will be converted to ``repr()`` strings. """ if convert is not None: obj = convert(obj) if isinstance(obj, (np.generic, np.ndarray)) and obj.ndim == 0: obj = obj.item() # Unpack NumPy scalars to simple Python values if isinstance(obj, pd.DataFrame): obj = obj.to_dict() # Convert DataFrames to dictionaries if isinstance(obj, (str, int, float, bool)) or obj is None: return obj if isinstance(obj, Mapping): return {str(k): cls.convert_to_json(v, convert) for k, v in obj.items()} if isinstance(obj, Collection): return [cls.convert_to_json(v, convert) for v in obj] return repr(obj)
[docs] def log_dict( self, description: str, dict_data: dict[str, Any], *, commit_id: int | None = None, convert: Callable[[Any], Any] | None = None, ) -> DictLog: """ Save the given dictionary data and corresponding metadata in a JSON file, and return a :py:class:`DictLog` with this data and metadata. Objects will be converted according to :py:meth:`Logger.convert_to_json`, with ``convert()`` passed to that function. The log will be tagged with the given commit ID, or the latest commit ID if none is given (and if this Logger has a corresponding ParamDB). """ def make_log(log_metadata: LogMetadata) -> DictLog: return DictLog(log_metadata, self.convert_to_json(dict_data, convert)) return self._log(make_log, description, commit_id)
[docs] def log_props( self, description: str, obj: Any, *, commit_id: int | None = None, convert: Callable[[Any], Any] | None = None, ) -> DictLog: """ Save a dictionary of the given object's properties and corresponding metadata in a JSON file, and return a :py:class:`DictLog` with this data and metadata. Only properties that have been marked with a :py:const:`~datalogs._logger.LoggedProp` type hint at the top of the class definition will be saved. For example:: class Example: value: LoggedProp number: LoggedProp[float] Objects will be converted according to :py:meth:`Logger.convert_to_json`, with ``convert()`` passed to that function. The log will be tagged with the given commit ID, or the latest commit ID if none is given (and if this Logger has a corresponding ParamDB). """ obj_class = type(obj) logged_props: dict[str, Any] = {} try: type_hints = get_type_hints(obj_class, include_extras=True) except Exception as exc: python_version = f"{sys.version_info.major}.{sys.version_info.minor}" raise RuntimeError( f"cannot log properties of '{obj_class.__name__}' object because its" f" class type hints are invalid in Python {python_version}" ) from exc for name, type_hint in type_hints.items(): if ( get_origin(type_hint) is Annotated and type_hint.__metadata__[0] == "LoggedProp" ): if hasattr(obj, name): logged_props[name] = getattr(obj, name) return self.log_dict( description, logged_props, commit_id=commit_id, convert=convert )