Source code for run.moore.mooreconfig

"""Module that contains helper functions and variables to configure algorithms
that are run from Moore.
"""
from __future__ import annotations
import os
import typing
from tools import tconversion, envvar


[docs] def check_no_conversion(format: str, compression: typing.Optional[str] = None) -> bool: """Check whether a conversion is needed (that is, the end products are not an uncompressed CSV file(s)) Args: format: file format (``csv``, ``arrow`` or ``parquet``) compression: conversion algorithm to use (``None`` if no compression) Returns: Whether a conversion is needed """ return (format == "csv") and compression is None
[docs] def xdigi2csv_conversion( intermediate_outdir: str, outdir: str, format: str, compression: typing.Optional[str] = None, keep_original: bool = False, ): """Implements the conversion from uncompressed CSV to the desired format for the given configuration. Args: intermediate_outdir: Output directory of the Moore algorithm outdir: where to save the converted files format: file format (``csv``, ``arrow`` or ``parquet``) compression: conversion algorithm to use (``None`` if no compression) keep_original: whether to keep the original files. If set to ``False``, the files are removed """ assert check_no_conversion, "Nothing to convert" # Change output directory to save the log in the correct folder tconversion.convert_all_table_paths( indir=intermediate_outdir, informat="csv", incompression=None, outdir=outdir, outformat=format, outcompression=compression, verbose=True, keep_original=keep_original, )
[docs] def xdigi2csv_prerun(config: dict) -> dict | None: """Modify the configuration in place to prepare the conversion from uncompressed CSV to the chosen format. Args: config: configuration that is modified Returns: The keyword arguments to pass to :py:func:`xdigi2csv_conversion`, or ``None`` if there is no conversion """ arguments_to_delete = [ "format", "compression", "keep_original", "intermediate_outdir", "_intermediate_outdir_in_node", ] table_format = config["xdigi2csv"]["format"] compression = config["xdigi2csv"]["compression"] intermediate_outdir = config["xdigi2csv"]["intermediate_outdir"] intermediate_outdir_in_node = config["xdigi2csv"]["_intermediate_outdir_in_node"] keep_original = config["xdigi2csv"]["keep_original"] # Remove the arguments from the configuration for arg in arguments_to_delete: del config["xdigi2csv"][arg] if intermediate_outdir_in_node is None: # Set this to `True` if we are working on a node intermediate_outdir_in_node = ( "BATCH_SYSTEM" in os.environ and os.environ["BATCH_SYSTEM"] == "HTCondor" # (the repo only works for HTCondor anyway) ) if intermediate_outdir_in_node: print("The script is run on HTCondor.") if intermediate_outdir_in_node: if keep_original: keep_original = False print( "The output directory was set in the temporary HTCondor home " "so the CSV files cannot be kept even if `keep_original` was set to " "`True`." ) # (no need to remove file if it is in the temporary node home) if check_no_conversion(table_format, compression): # if no conversion return # Don't run the post-run else: if intermediate_outdir is None: if intermediate_outdir_in_node: intermediate_outdir = envvar.get_environment_variable("NODE_HOME") outdir = config["xdigi2csv"]["outdir"] print("The intermediate output directory is the temporary condor home.") else: intermediate_outdir = config["xdigi2csv"]["outdir"] outdir = intermediate_outdir else: outdir = config["xdigi2csv"]["outdir"] # Output directory of Moore is the intermediate output directory config["xdigi2csv"]["outdir"] = intermediate_outdir # Parameters to pass to :py:func:`xdigi2csv_conversion` conversion_kwargs = { "outdir": outdir, "format": table_format, "compression": compression, "intermediate_outdir": intermediate_outdir, "keep_original": keep_original, } return conversion_kwargs
#: Associates a Moore program with a function to run before running the program #: It takes as input the configuration that it may alter in-place, #: and return the arguments to pass to the post-run function, or ``None`` #: if no post-run functions shall be run program_preruns = { "xdigi2csv": xdigi2csv_prerun, } #: Associates a Moore program with a function to run after running the program. #: It takes as input the keyword arguments given by the pre-run function. program_postruns = {"xdigi2csv": xdigi2csv_conversion}