#!/usr/bin/env python3 """mpsd-software: tool for installation of software as on MPSD HPC.""" __version__ = "2023.6.14" import argparse import datetime import logging import os import subprocess import sys import tempfile import time from pathlib import Path from typing import List, Tuple, Union import re # If 'rich' is available ("pip install rich" or "apt-get install python3-rich"), # then use coloured output, otherwise proceed as before try: import rich.logging except ModuleNotFoundError: rich_available = False else: rich_available = True about_intro = f""" Build software as on MPSD HPC. This tool builds software package sets (including toolchains for Octopus). It follows recipes as used on the MPSD HPC system and the (spack-based) Octopus buildbot. Compiled software is organised into MPSD software release versions (such as `dev-23a`) and CPU microarchitecture (such as `sandybridge`). Compiled packages and toolchains can be activated and used via `module load` as on the HPC system. Further documentation is available in the README.rst file, online at https://gitlab.gwdg.de/mpsd-cs/mpsd-software/-/blob/main/README.rst Command line usage: $> {sys.argv[0]} """ about_epilog = f""" Examples: 1. Query what package sets and toolchains are available for installation in release dev-23a $> {sys.argv[0]} available dev-23a 2. Install foss2022a-serial toolchain from the dev-23a release $> {sys.argv[0]} install dev-23a foss2022a-serial 3. Check what package sets and toolchains are installed from release dev-23a $> {sys.argv[0]} status dev-23a The `status` command also displays the `module use` command needed to load the created modules. """ call_date_iso = ( datetime.datetime.now().replace(microsecond=0).isoformat().replace(":", "-") ) config_vars = { # kept inside the mpsd_release folder "cmd_log_file": "mpsd-software.log", # Metadata tags "metadata_tag_open": "!<meta>", "metadata_tag_close": "</meta>!", "spack_environments_repo": "https://gitlab.gwdg.de/mpsd-cs/spack-environments.git", } def create_log_file_names( mpsd_release: str, microarch: str, action: str, date: str = call_date_iso, toolchain: str = None, ) -> Union[str, None]: """Create log file names. This function creates the log file names for either the installer or the build log files. If a toolchain is given, then the build log file name is created. if no toolchain is given, then the installer log file name is created. The installer log file hosts the logs of the installer script, while the build log file hosts the logs of the build process as generated by the spack_setup.sh script. Parameters ---------- mpsd_release : str MPSD software stack version microarch : str system architecture date : str date of the call ins iso format action : str action performed (install,remove,reinstall,prepare,status) only install and remove are valid for build log file. toolchain : str toolchain name (only for build log file) Returns ------- str or None log file name installer_log_file_name or build_log_file_name depending on the parameters given. If the action is not one that changes the files on disk ( info only actions) then None is returned. """ if toolchain: # if toolchain is given, then we build the build_log_file_name if action in ["install", "remove"]: log_file_name = ( f"{mpsd_release}_{microarch}_{date}_BUILD_{toolchain}_{action}.log" ) else: return None else: # if toolchain is not given, then we build the installer_log_file_name log_file_name = f"{mpsd_release}_{microarch}_{date}_APEX_{action}.log" return log_file_name def log_metadata(key: str, value: str) -> None: """Log metadata to the log file. This function logs metadata to the log file. The metadata is enclosed in a tag, so that it can be easily found in the log file. logging module is used to write the metadata to the log file. Parameters ---------- key : str key of the metadata value : str value of the metadata returns : None """ logging.info( f"{config_vars['metadata_tag_open']}{key}:{value}{config_vars['metadata_tag_close']}" ) def read_metadata_from_logfile(logfile: Union[str, Path]) -> dict: """Read metadata from the log file. This function reads metadata from the log file. The metadata is enclosed in a tag, so that it can be easily found in the log file. Parameters ---------- logfile : str or Path log file name returns : dict dictionary containing the metadata """ with open(logfile, "r") as f: log_text = f.read() # check for all data that matches the regex # metadata_tag_open {key}:{value} metadata_tag_close # and return a dictionary with all the matches return { match.group(1): match.group(2) for match in re.finditer( f"{config_vars['metadata_tag_open']}(\w+):(\w+){config_vars['metadata_tag_close']}", log_text, ) } def get_installer_log_file_path(mpsd_release: str, cmd: str, root_dir: str) -> str: """Get installer log file path.""" # Get machine configs os.environ.get("MPSD_OS", "UNKNOWN_OS") microarch = get_native_microarchitecture() # parse logging first # decide the log_file_name installer_log_name = create_log_file_names( mpsd_release=mpsd_release, microarch=microarch, action=cmd ) log_folder = root_dir / mpsd_release / "logs" # if the log_folder dosent exist, dont log this message if # the command is a info-only command if cmd not in ["status", "available"]: if not os.path.exists(log_folder): os.makedirs(log_folder) installer_log_file = log_folder / installer_log_name else: installer_log_file = None return installer_log_file def set_up_logging(loglevel="warning", file_path=None): """Set up logging. This function sets up the logging configuration for the script. It configures the log level, log format, and log handlers for both file and console(=shell) output. Parameters ---------- loglevel : str or int Loglevels are: - warning (default): only print statements if something is unexpected - info (show more detailed progress) - debug (show very detailed output) file_path : str - filename to save logging messages into If loglevel is 'debug', save line numbers in log messages. Returns ------- None. Logger instances are generally not passed around, but retrieved from the logging module as shown below (they are singletons). We provide two loggers: 1. log = logging.getLogger('') This is the 'root' logger. It uses a RichHandler if rich is available for output to the shell, otherwise plain text. Typical use: log.debug("...") log.info("...") log.warn("...") Equivalent to logging.debug("...") logging.info("...") 2. print_log = logging.getlogger('print') This uses the logging module to issue the message, but prints without any further markup (i.e. no date, loglevel, line number, etc). Think PRINT via the LOGging module. We use this as a replacement for the print function (i.e. for messages that should not be affected by logging levels, and which should always be printed). Typical and intended use: print_log.info("Available toolchains are ...") The major difference from the normal print command is that the output will be send to the stdout (as for print) AND the file with name filename, so that these messages appear in the log file together with normal log output. """ # convert loglevel string into loglevel as number log_level_numeric = getattr(logging, loglevel.upper(), logging.WARNING) if not isinstance(log_level_numeric, int): raise ValueError("Invalid log level: %s" % loglevel) # set up the main logger ("root" logger) logger = logging.getLogger("") # - "logger" logs everything # - we use loglevel at handler level to write everything to file # - and filter using log_level_numeric (as the user provides) to # send logging messages to the console logger.setLevel(0) # the handler determines where the logs go: stdout/file if rich_available: # https://rich.readthedocs.io/en/stable/logging.html shell_handler = rich.logging.RichHandler() # rich handler provides metadata automatically: logging_format = "%(message)s" # for shell output, only show time (not date and time) shell_formatter = logging.Formatter(logging_format, datefmt="[%X]") else: shell_handler = logging.StreamHandler() # include line numbers in output if level is DEBUG linenumbers = " %(lineno)4d" if log_level_numeric == logging.DEBUG else "" logging_format = "%(asctime)s %(levelname)7s" + linenumbers + " | %(message)s" shell_formatter = logging.Formatter(logging_format) # here we hook everything together shell_handler.setFormatter(shell_formatter) # use the log_level_numeric to decide how much logging is sent to shell shell_handler.setLevel(log_level_numeric) # Here we set the handlers of the RootLogger to be just the one we want. # The reason is that the logging module will add a <StreamHandler <stderr> # (NOTSET)> handler if logging.info/logging.debug/... is used before we # come across this line. And we do not want that additional handler. logger.handlers = [shell_handler] # if filename provided, write log messages to that file, too. if file_path: file_handler = logging.FileHandler(file_path) # if we have a file, we write all information in there. # We could change the level, for example restrict to only DEBUG and above with # file_handler.setLevel(logging.DEBUG) file_logging_format = "%(asctime)s %(levelname)7s %(lineno)4d | %(message)s" file_formatter = logging.Formatter(file_logging_format, datefmt="[%X]") file_handler.setFormatter(file_formatter) logger.addHandler(file_handler) # # new logger for printing # print_log = logging.getLogger("print") print_log.setLevel(logging.INFO) print_log.propagate = False # create formatter 'empty' formatter formatter = logging.Formatter("%(message)s") # create, format and add handler for shell output ch = logging.StreamHandler() ch.setFormatter(formatter) print_log.addHandler(ch) # if filename provided, write output of print_log to that file, too if file_path: # create, format and add file handler fh = logging.FileHandler(file_path) fh.setFormatter(formatter) print_log.addHandler(fh) # # short message # logging.debug( f"Logging has been setup, loglevel={loglevel.upper()} " + f"{file_path=} {rich_available=}" ) def get_available_toolchains(mpsd_release: str) -> List[str]: """Given a release, return the available toolchains. This is based on the spack-environment's repository [1]. For this function to succeed, we need to have Internet access etc. We use a temporary directory to clone the repository locally, which is deleted upon successful completion of the function. [1] https://gitlab.gwdg.de/mpsd-cs/spack-environments.git Returns ------- toolchains : List[str] Example ------- >>> get_available_toolchains('dev-23a') ['foss2021a-cuda-mpi', 'foss2021a-mpi', 'foss2021a-serial', 'foss2022a-cuda-mpi', 'foss2022a-mpi', 'foss2022a-serial', 'global', 'global_generic'] """ logging.debug(f"get_available_toolchains({mpsd_release=})") logging.info(f"Retrieving available toolchains for release {mpsd_release}") print_log = logging.getLogger("print") # create temporary directory tmp_dir = tempfile.TemporaryDirectory(prefix="mpsd-software-available-") tmp_dir_path = Path(tmp_dir.name) # find toolchains by cloning repository and checking out right branch clone_repo( tmp_dir_path, config_vars["spack_environments_repo"], branch=mpsd_release ) # look for directories defining the toolchains toolchains = os.listdir(tmp_dir_path / "toolchains") msg = f"Found toolchains {sorted(toolchains)}" logging.debug(msg) # the 'toolchains' split into toolchains (such as foss2022a-mpi) and sets # of packages. Here we split them into the two categories for a more useful # output: toolchain_list = [ x.parents[0].name for x in list((tmp_dir_path / "toolchains").glob("*/spack.yaml")) ] package_sets = [ x.parents[0].name for x in list((tmp_dir_path / "toolchains").glob("*/*.list")) ] logging.debug(f"{toolchain_list=}") logging.debug(f"{package_sets=}") # summarise toolchains found for use, and show packages provided for each # package_set: print_log.info( f"MPSD software release {mpsd_release}, AVAILABLE for installation are" ) print_log.info("Toolchains: \n " + "\n ".join(sorted(toolchain_list))) print_log.info("Package sets:") for package_set in package_sets: # get a list of all packages which # starts from the first line of the file # that have the regex pattern \w+@\w+ packages = [ line.split()[0].split("%")[0] for line in open( tmp_dir_path / "toolchains" / package_set / "global_packages.list" ).readlines() if re.match(r"^\w+@\w+", line) ] print_log.info(f" {package_set} ({', '.join(packages)}) ") # remove temporary directory tmp_dir.cleanup() return toolchains # Helper class to change directory via context manager class os_chdir: """The os_chdir class is a context manager. It changes the current directory to a specified directory and returns to the original directory after execution. """ def __init__(self, new_dir): """Initialize, save original directory.""" self.new_dir = new_dir self.saved_dir = os.getcwd() def __enter__(self): """Go to target directory (main action for context).""" os.chdir(self.new_dir) def __exit__(self, exc_type, exc_val, exc_tb): """On exist we return to original directory.""" os.chdir(self.saved_dir) def run(*args, counter=[0], **kwargs): """ Run a subprocess and log the call. Convenience function to call `subprocess.run` and provide some metadata about the call. Parameters ---------- args : tuple passed on to subprocess.run(*args). For example ("ls -l") or (["ls", "-l"]) counter : TYPE, optional list with one integer, starting from [0]. This is (a Python hack) to count the number of calls of this function, so the different calls of subprocess.run are easier to follow in the log files. kwargs : dict keyword-value arguments to be passed to subprocess.run. For example, `shell=True`. Returns ------- process : subprocess.CompletedProcess CompletedProcess object as returned by `subprocess.run` . Examples -------- >>> run(['date', '+%Y-%m-%d']) ##-03 Starting subprocess.run(['date', '+%Y-%m-%d']) with options ##-03 getcwd=/Users/fangohr/git/mpsd-software-environments ##-03 COMMAND=date +%Y-%m-%d 2023-05-30 ##-03 Completed in 0.0054s. ##-03 CompletedProcess(args=['date', '+%Y-%m-%d'], returncode=0) >>> run(['date +%Y-%m-%d'], shell=True) ##-04 Starting subprocess.run(['date +%Y-%m-%d']) with options shell=True ##-04 getcwd=/Users/fangohr/git/mpsd-software-environments ##-04 COMMAND=date +%Y-%m-%d 2023-05-30 ##-04 Completed in 0.0069s. ##-04 CompletedProcess(args=['date +%Y-%m-%d'], returncode=0) """ # token is printed in front of every meta-data line - useful for # searching the logs. Starts with "##-00", then "##-01", ... token = f"##-{counter[0]:02d}" counter[0] += 1 # increase counter # make command nicely readable: ["ls", "-l"] -> "ls -l" assert isinstance(args, tuple) assert len(args) == 1 arg = args[0] # either args is a tuple containing a string | Example: ('ls -1',) if isinstance(arg, str): command = arg # or we have a tuple containing a list of strings. # Example: (['ls', '-1'],) elif isinstance(arg, list): command = " ".join(arg) else: # we do not expect this to happen raise NotImplementedError(f"{arg=}, {args=}") # make options (such as `shell=True`) nicely readable options = ", ".join([f"{key}={value}" for key, value in kwargs.items()]) # provide information about upcoming subprocess.run call logging.debug( f"{token} Starting subprocess.run('{command}') with options {options}" ) logging.debug(f"""{token} getcwd={os.getcwd()}""") logging.debug(f"""{token} subprocess.run("{arg}")""") time_start = time.time() process = subprocess.run(*args, **kwargs) execution_time = time.time() - time_start logging.debug(f"{token} {process=}") logging.debug(f"{token} Completed in {execution_time:.4f}s.") logging.debug(f"{token}") # near-empty line to make reading logs easier return process def record_script_execution_summary( mpsd_release: str, root_dir: str, msg: str = None, **kwargs ) -> None: """ Log the command used to build the toolchains. It also logs information about the software environment installer branch, the Spack environments branch, and the commit hashes of each. It also logs steps taken in the install process using the optional message argument. Parameters ---------- - mpsd_release : str The name of the release to install toolchains for. - root_dir : str The path to the directory where the scripts are located. - msg : str, optional An optional message to log in the command log file. - **kwargs : dict A dictionary with values for - spe_branch : str The name of the Spack environments branch. - spe_commit_hash : str The commit hash of the Spack environments branch. Returns ------- - None """ # Write to the log file with the following format # -------------------------------------------------- # 2023-02-29T23:32:01, install-software-environment.py --release 23b --install ALL # Software environment installer branch: script_branch (commit hash: \ # script_commit_hash) # Spack environments branch: dev-23a (commit hash: spe_commit_hash) # MSGs with os_chdir(root_dir): with open(config_vars["cmd_log_file"], "a") as f: if msg: # Write the message to the log file f.write(msg + "\n") else: # Write the header f.write("-" * 50 + "\n") # Gather data to log # call statement: cmd_line = " ".join(sys.argv) # script branch and commit hash with os_chdir(root_dir): script_branch = ( run( ["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=subprocess.PIPE, check=True, ) .stdout.decode() .strip() ) script_commit_hash = ( run( ["git", "rev-parse", "--short", "HEAD"], stdout=subprocess.PIPE, check=True, ) .stdout.decode() .strip() ) # spack-environments branch and commit hash from kwargs spe_branch = kwargs.get("spe_branch", None) spe_commit_hash = kwargs.get("spe_commit_hash", None) # Write to log file f.write(f"{datetime.datetime.now().isoformat()}, {cmd_line}\n") f.write( f"Software environment installer branch: {script_branch} " f"(commit hash: {script_commit_hash})\n" ) f.write( f"Spack environments branch: {spe_branch} " f"(commit hash: {spe_commit_hash})\n" ) def clone_repo( target_path: Path, repo_url: str, branch=None, capture_output=True ) -> None: """Clone repo locally. Optionally checkout a branch. Parameters ---------- target_path : Path Where to check the repository out to repo_url: str where to clone the git repository from branch: str (defaults to None) if provided, checkout this branch after cloning capture_output: bool (defaults to True) capture output, i.e. do not send it to stdout. """ if not target_path.exists(): target_path.mkdir() with os_chdir(target_path): run( ["git", "clone", repo_url, str(target_path)], check=True, capture_output=capture_output, ) if branch: with os_chdir(target_path): # Git fetch and checkout the release branch and git pull # to be sure that the resulting repo is up to date run(["git", "fetch", "--all"], check=True, capture_output=capture_output) checkout_result = run( ["git", "checkout", branch], capture_output=capture_output ) if checkout_result.returncode != 0: msg = f"Couldn't find {branch=}\n" branches_result = run( ["git", "branch", "-a"], check=True, capture_output=True ) branches_list = branches_result.stdout.decode().split("\n") # strip off 'remotes/origin' (needs Python 3.9): branches_list = [ b.strip().removeprefix("remotes/origin/") for b in branches_list ] msg += f"Available branches are {branches_list}" logging.error(msg) raise Exception(msg, branches_result) else: run(["git", "pull"], check=True, capture_output=capture_output) def get_release_info(mpsd_release: str, root_dir: Path) -> Tuple[str, str, List[str]]: """ Get information about the specified release. Get information about the specified release, such as the branch and commit hash of the Spack environments repository and the available toolchains. Parameters ---------- mpsd_release : str The name of the release to get information for. root_dir : pathlib.Path The base directory where releases are stored. Returns ------- spe_branch : str The name of the branch for the Spack environments repository. spe_commit_hash : str The commit hash for the Spack environments repository. available_toolchains : list A list of strings representing the available toolchains for the release. Raises ------ FileNotFoundError If the release directory does not exist. """ # Get the info for release release_base_dir = root_dir / mpsd_release if not os.path.exists(release_base_dir): logging.debug(f"get_release_info({mpsd_release=}, {root_dir=})") raise FileNotFoundError( f"{release_base_dir} does not exist.\n" f"Hint: `prepare {mpsd_release}` may fix this." ) with os_chdir(release_base_dir): with os_chdir("spack-environments"): # Get the branch and commit hash of the spack-environments repo spe_commit_hash = ( run(["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, check=True) .stdout.decode() .strip() ) spe_branch = ( run( ["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=subprocess.PIPE, check=True, ) .stdout.decode() .strip() ) available_toolchains = os.listdir("toolchains") return spe_branch, spe_commit_hash, available_toolchains def prepare_environment(mpsd_release: str, root_dir: Path) -> List[str]: """ Create the directory structure for the given MPSD release. It does the following steps: Clones the spack-environments repository. Determines the branch and commit hash of the spack-environments repository and the available toolchains. Logs the command usage. Parameters ---------- mpsd_release : str The name of the MPSD release to prepare the environment for. root_dir : pathlib.Path The base directory to create the release folder and clone the spack-environments repository into. Returns ------- available_toolchains : list A list of available toolchains for the given MPSD release. Example ------- >>> prepare_environment('dev-23a', Path('.')) ['foss2021a-cuda-mpi', 'foss2021a-mpi', 'foss2021a-serial', 'foss2022a-cuda-mpi', 'foss2022a-mpi', 'foss2022a-serial', 'global', 'global_generic'] """ logging.info(f"Preparing {mpsd_release=}") # Creates the directory structure for the specified release and clone the # Spack environments repository if it doesn't exist: # Create the directory structure for the release release_base_dir = root_dir / mpsd_release release_base_dir.mkdir(parents=True, exist_ok=True) repo_path = release_base_dir / "spack-environments" if repo_path.exists(): logging.debug(f"directory {repo_path} exists already, will update") with os_chdir(repo_path): run(["git", "pull", "-v"], capture_output=True) else: repo_url = config_vars["spack_environments_repo"] logging.info(f"cloning repository {repo_path} from {repo_url}") clone_repo(repo_path, repo_url, branch=mpsd_release) logging.getLogger("print").info( f"Release {mpsd_release} is prepared in {release_base_dir}" ) spe_branch, spe_commit_hash, available_toolchains = get_release_info( mpsd_release, root_dir ) record_script_execution_summary( mpsd_release, root_dir, spe_branch=spe_branch, spe_commit_hash=spe_commit_hash ) return available_toolchains def get_native_microarchitecture(): """Return native microarchitecture. On MPSD machines, there should be an environment variable "microarch". We try to read that. If it fails, we use the 'archspec cpu' command. If that fails, we ask the user to install it. Returns ------- microarch : str Example ------- >>> get_native_microarchitecture() 'haswell' """ # attempt to get MICRO_ARCH from environment variable (should work on # MPSD_HPC and MPSD linux laptops). If not defined, return # "UNKNOWN_MICROARCH" microarch = os.environ.get("MPSD_MICROARCH", "UNKNOWN_MICROARCH") # if we have not found the microarchitecture environment variable, # try calling archspec if microarch == "UNKNOWN_MICROARCH": logging.debug( "Couldn't find MPSD_MICROARCH environment variable. Will try archspec." ) try: process = run(["archspec", "cpu"], stdout=subprocess.PIPE, text=True) except FileNotFoundError as e: logging.debug(f"Call of 'archspec cpu' failed: {e=}") # Presumably 'archspec' is not installed. msg = "Please install archspec, for example via 'pipx install archspec'.\n" msg += "The command we need to execute is 'archspec cpu'.\n" msg += "Documentation of package: https://archspec.readthedocs.io/" logging.error(msg) sys.exit(1) else: # we have found archspec and executed it if process.returncode == 0: # sanity check microarch = process.stdout.strip() logging.debug( f"Found microarchitecture from 'archspec cpu' to be '{microarch}'" ) assert len(microarch) > 0 # sanity check else: raise ValueError( f"Some error occurred when calling 'archspec cpu': {process=}" ) # at this point, we have determined the microarchitecture log_metadata("microarchitecture", microarch) return microarch def install_environment( mpsd_release: str, toolchains: List[str], root_dir: Path, enable_build_cache: bool = False, ) -> None: """ Install the specified MPSD release and toolchains. The function installs the toolchain to the specified directory, using Spack. Parameters ---------- mpsd_release : str A string representing the MPSD release version. toolchains : list of str A list of strings representing the toolchains to install (e.g., "foss2021a-mpi", "global_generic", "ALL"). root_dir : pathlib.Path A Path object representing the path to the directory where the release and toolchains will be installed. enable_build_cache : bool, optional A boolean indicating whether to build the build cache when installing toolchains. Defaults to False. Raises ------ ValueError If a requested toolchain is not available in the specified release. Returns ------- None """ logging.info( f"Installing release {mpsd_release} with toolchains {toolchains} " f"to {root_dir}" ) # Set required variables release_base_dir = root_dir / mpsd_release microarch = get_native_microarchitecture() toolchain_dir = release_base_dir / microarch toolchain_dir.mkdir(parents=True, exist_ok=True) spack_setup_script = release_base_dir / "spack-environments" / "spack_setup.sh" install_flags = [] if not enable_build_cache: install_flags.append("-b") # run the prepare_environment function available_toolchains = prepare_environment(mpsd_release, root_dir) # Ensure that the requested toolchains are available in the release if toolchains == "ALL": toolchains = available_toolchains elif toolchains == "NONE": # No toolchains requested, so we only create the env and print the # list of available toolchains logging.warning( "No toolchains requested. Available toolchains for release " f"{mpsd_release} are: \n {available_toolchains}" ) print_log = logging.getLogger("print") print_log.info(f"{available_toolchains=}") return for toolchain in toolchains: if toolchain not in available_toolchains: msg = f"Toolchain '{toolchain}' is not available in release {mpsd_release}." msg += "Use 'available' command to see list of available toolchains." logging.error(msg) sys.exit(1) # Install the toolchains with os_chdir(toolchain_dir): # run spack_setup_script with the toolchains as arguments for toolchain in toolchains: # Set the install log file name from create_log_file_names build_log_file_name = create_log_file_names( mpsd_release, microarch, "install", toolchain=toolchain ) build_log_folder = release_base_dir / "logs" build_log_path = build_log_folder / build_log_file_name # if logs folder dosent exist, create it if not os.path.exists(build_log_folder): os.makedirs(build_log_folder) logging.info(f"Installing toolchain {toolchain} to {toolchain_dir}") # log the command record_script_execution_summary( mpsd_release, root_dir, msg=f"installing {toolchain} and logging at {build_log_path}", ) record_script_execution_summary( mpsd_release, root_dir, msg=( f"CMD: bash {spack_setup_script} {' '.join(install_flags)} " f"{toolchain}" ), ) run( f"bash {spack_setup_script} {' '.join(install_flags)} {toolchain} 2>&1 " f"| tee -a {build_log_path} ", shell=True, check=True, ) def remove_environment(release, toolchains, target_dir): """Remove release from installation.""" msg = f"Removing release {release} with toolchains {toolchains} from {target_dir}" logging.info(msg) raise NotImplementedError(msg) def start_new_environment(release, from_release, target_dir): """Start new MPSD software environment version.""" msg = f"Starting new release {release} from {from_release} to {target_dir}" logging.info(msg) raise NotImplementedError(msg) def environment_status(mpsd_release: str, root_dir: Union[str, Path]) -> dict: """Show status of release in installation. Parameters ---------- mpsd_release : str A string representing the MPSD release version. root_dir : pathlib.Path A Path object pointing to the root directory of the installation. Expect a subfolder root/mpsd_release in which we search for the toolchains. Returns ------- toolchain_map : dict A dictionary containing available microarchitectures as keys and a list of available toolchains as values for each microarchitecture. """ msg = f"Showing status of release {mpsd_release} in {root_dir}" logging.info(msg) plog = logging.getLogger("print") release_base_dir = root_dir / mpsd_release microarch = get_native_microarchitecture() toolchain_dir = release_base_dir / microarch spack_dir = toolchain_dir / "spack" # if the mpsd_release does not exist: if not release_base_dir.exists(): logging.debug(f"Directory {str(release_base_dir)} does not exist.") logging.error(f"MPSD release '{mpsd_release}' is not installed.") return None # if the mpds_release directory exists but the spack repository is not fully # cloned - indicates some kind of incomplete installation: if not spack_dir.exists(): logging.debug(f"Looking for files in {spack_dir}") logging.error( f"MPSD release '{mpsd_release}' has not been completely installed." ) return None # find all folders for all microarch in the release directory # except for the blacklisted files black_listed_files = [ config_vars["cmd_log_file"], "spack-environments", "logs", "mpsd-spack-cache", ] list_of_microarchs_candidates = os.listdir(release_base_dir) list_of_microarchs = [ x for x in list_of_microarchs_candidates if x not in black_listed_files ] logging.debug(f"{list_of_microarchs=}") toolchain_map = {} for microarch in list_of_microarchs: # get a list of all the toolchains in the microarch possible_toolchains = (release_base_dir / microarch).glob( "lmod/Core/toolchains/*.lua" ) # append toolchain which is the name of the file without the .lua extension toolchain_map[microarch] = [toolchain.stem for toolchain in possible_toolchains] logging.debug(f"{toolchain_map=}") # pretty print the toolchain map key as the heading # and the value as the list of toolchains plog.info(f"Installed toolchains ({mpsd_release}):\n") for microarch, toolchains in toolchain_map.items(): plog.info(f"- {microarch}") for toolchain in toolchains: plog.info(f" {toolchain}") plog.info(f" [module use {str(release_base_dir / microarch / 'lmod/Core')}]") plog.info("") return toolchain_map def main(): """Execute main entry point.""" parser = argparse.ArgumentParser( description=about_intro, epilog=about_epilog, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( "-l", dest="loglevel", choices=["warning", "info", "debug"], required=False, default="warning", help="Set the log level", ) parser.add_argument("--version", action="version", version=__version__) subparsers = parser.add_subparsers( dest="action", title="actions", description="valid actions", required=True ) subparsers.required = True list_of_cmds = [ ("available", "What is available for installation?"), ("install", "Install a software environment"), # ("reinstall", "Reinstall a software environment"), # ("remove", "Remove a software environment or toolchains from an environment"), # ("start-new", "Start a new software environment version"), ("status", "Show status: what is installed?"), ("prepare", "Prepare installation of MPSD-release (dev only)"), ] for cmd, help_text in list_of_cmds: subp = subparsers.add_parser(cmd, help=help_text) if cmd == "start-new": subp.add_argument( "--from-release", dest="from_release", type=str, required=True, help="Release version to start from", ) subp.add_argument( "--to-release", dest="to_release", type=str, required=True, help="Release version to create", ) else: subp.add_argument( "release", type=str, help="Release version to prepare, install, reinstall or remove", ) if cmd in ["install", "reinstall", "remove"]: # "install" command needs additional documentation package_set_help = ( f"One or more package sets (like toolchains) to be {cmd}ed. " "Use 'ALL' to refer to all available package sets." ) subp.add_argument( "package_set", # first option defines attribute # name `args.package_set` in `args = parser_args()` type=str, nargs="+", default="NONE", help=package_set_help, ) subp.add_argument( "--enable-build-cache", action="store_true", help=( "Enable Spack build cache. Useful for reinstallation but " "consumes time and disk space." ), ) # Carry out the action args = parser.parse_args() # target dir is the place where this script exists. the root_dir = Path(os.path.dirname(os.path.realpath(__file__))) set_up_logging( args.loglevel, get_installer_log_file_path(args.release, args.action, root_dir), ) # sanity check for common mistakes in command line arguments if args.release.endswith("/"): # happens easily with autocompletion logging.error( f"You provided mpsd-release='{args.release}'. " "Did you mean '{args.release.rstrip('/')}'?" ) sys.exit(1) # Check the command and run related function if args.action == "remove": remove_environment(args.release, args.package_set, root_dir) elif args.action == "start-new": start_new_environment(args.from_release, args.to_release, root_dir) elif args.action == "install": install_environment( args.release, args.package_set, root_dir, args.enable_build_cache ) elif args.action == "status": _ = environment_status(args.release, root_dir) elif args.action == "prepare": prepare_environment(args.release, root_dir) elif args.action == "available": get_available_toolchains(args.release) else: message = ( f"No known action found ({args.action=}). Should probably never happen." ) logging.error(message) raise NotImplementedError(message) if __name__ == "__main__": main()