Source code for run.moore.mooreconfig
"""Module that contains helper functions and variables to configure algorithms
that are run from Moore.
"""
from __future__ import annotations
import os
import typing
from tools import tconversion, envvar
[docs]
def check_no_conversion(format: str, compression: typing.Optional[str] = None) -> bool:
"""Check whether a conversion is needed (that is, the end products are not an
uncompressed CSV file(s))
Args:
format: file format (``csv``, ``arrow`` or ``parquet``)
compression: conversion algorithm to use (``None`` if no compression)
Returns:
Whether a conversion is needed
"""
return (format == "csv") and compression is None
[docs]
def xdigi2csv_conversion(
intermediate_outdir: str,
outdir: str,
format: str,
compression: typing.Optional[str] = None,
keep_original: bool = False,
):
"""Implements the conversion from uncompressed CSV to the desired format
for the given configuration.
Args:
intermediate_outdir: Output directory of the Moore algorithm
outdir: where to save the converted files
format: file format (``csv``, ``arrow`` or ``parquet``)
compression: conversion algorithm to use (``None`` if no compression)
keep_original: whether to keep the original files. If set to ``False``,
the files are removed
"""
assert check_no_conversion, "Nothing to convert"
# Change output directory to save the log in the correct folder
tconversion.convert_all_table_paths(
indir=intermediate_outdir,
informat="csv",
incompression=None,
outdir=outdir,
outformat=format,
outcompression=compression,
verbose=True,
keep_original=keep_original,
)
[docs]
def xdigi2csv_prerun(config: dict) -> dict | None:
"""Modify the configuration in place to prepare the conversion from uncompressed CSV
to the chosen format.
Args:
config: configuration that is modified
Returns:
The keyword arguments to pass to :py:func:`xdigi2csv_conversion`, or ``None``
if there is no conversion
"""
arguments_to_delete = [
"format",
"compression",
"keep_original",
"intermediate_outdir",
"_intermediate_outdir_in_node",
]
table_format = config["xdigi2csv"]["format"]
compression = config["xdigi2csv"]["compression"]
intermediate_outdir = config["xdigi2csv"]["intermediate_outdir"]
intermediate_outdir_in_node = config["xdigi2csv"]["_intermediate_outdir_in_node"]
keep_original = config["xdigi2csv"]["keep_original"]
# Remove the arguments from the configuration
for arg in arguments_to_delete:
del config["xdigi2csv"][arg]
if intermediate_outdir_in_node is None:
# Set this to `True` if we are working on a node
intermediate_outdir_in_node = (
"BATCH_SYSTEM" in os.environ
and os.environ["BATCH_SYSTEM"] == "HTCondor"
# (the repo only works for HTCondor anyway)
)
if intermediate_outdir_in_node:
print("The script is run on HTCondor.")
if intermediate_outdir_in_node:
if keep_original:
keep_original = False
print(
"The output directory was set in the temporary HTCondor home "
"so the CSV files cannot be kept even if `keep_original` was set to "
"`True`."
)
# (no need to remove file if it is in the temporary node home)
if check_no_conversion(table_format, compression): # if no conversion
return # Don't run the post-run
else:
if intermediate_outdir is None:
if intermediate_outdir_in_node:
intermediate_outdir = envvar.get_environment_variable("NODE_HOME")
outdir = config["xdigi2csv"]["outdir"]
print("The intermediate output directory is the temporary condor home.")
else:
intermediate_outdir = config["xdigi2csv"]["outdir"]
outdir = intermediate_outdir
else:
outdir = config["xdigi2csv"]["outdir"]
# Output directory of Moore is the intermediate output directory
config["xdigi2csv"]["outdir"] = intermediate_outdir
# Parameters to pass to :py:func:`xdigi2csv_conversion`
conversion_kwargs = {
"outdir": outdir,
"format": table_format,
"compression": compression,
"intermediate_outdir": intermediate_outdir,
"keep_original": keep_original,
}
return conversion_kwargs
#: Associates a Moore program with a function to run before running the program
#: It takes as input the configuration that it may alter in-place,
#: and return the arguments to pass to the post-run function, or ``None``
#: if no post-run functions shall be run
program_preruns = {
"xdigi2csv": xdigi2csv_prerun,
}
#: Associates a Moore program with a function to run after running the program.
#: It takes as input the keyword arguments given by the pre-run function.
program_postruns = {"xdigi2csv": xdigi2csv_conversion}