Skip to content
Snippets Groups Projects
mpsd_software.py 48.1 KiB
Newer Older
#!/usr/bin/env python3
Hans Fangohr's avatar
Hans Fangohr committed
"""mpsd-software: tool for installation of software as on MPSD HPC."""
import argparse
import datetime
import os
import subprocess
Hans Fangohr's avatar
Hans Fangohr committed
import tempfile
import time
from pathlib import Path
from typing import List, Tuple, Union
import shutil
from functools import cache
import importlib.metadata

__version__ = importlib.metadata.version(__package__ or __name__)
import rich.logging
command_name = Path(sys.argv[0]).name
Hans Fangohr's avatar
Hans Fangohr committed
about_intro = f"""
Hans Fangohr's avatar
Hans Fangohr committed
Build software as on MPSD HPC.
Hans Fangohr's avatar
Hans Fangohr committed

    This tool builds software package sets (including toolchains for Octopus).
    It follows recipes as used on the MPSD HPC system and the (spack-based)
    Octopus buildbot. Compiled software is organised into MPSD software release
    versions (such as `dev-23a`) and CPU microarchitecture (such as `sandybridge`).
Hans Fangohr's avatar
Hans Fangohr committed
    Compiled packages and toolchains can be activated and used via `module load` as
    on the HPC system.
Hans Fangohr's avatar
Hans Fangohr committed
    Further documentation is available in the README.rst file, online at
    https://gitlab.gwdg.de/mpsd-cs/mpsd-software-manager/-/blob/main/README.rst
Hans Fangohr's avatar
Hans Fangohr committed

Command line usage:

Hans Fangohr's avatar
Hans Fangohr committed

"""


about_epilog = f"""


Examples:

    1. Query what releases are available for installation

       $> {command_name} available

    2. Query what package sets and toolchains are available for installation in
       $> {command_name} available dev-23a
    3. Install foss2022a-serial toolchain from the dev-23a release
       $> {command_name} install dev-23a foss2022a-serial
    4. Check what package sets and toolchains are installed from release dev-23a
       $> {command_name} status dev-23a
Hans Fangohr's avatar
Hans Fangohr committed
       The `status` command also displays the `module use` command needed to load
       the created modules.
call_date_iso = (
    datetime.datetime.now().replace(microsecond=0).isoformat().replace(":", "-")
)
    #  kept inside the mpsd_release folder
    "cmd_log_file": "mpsd-software.log",
    "metadata_tag_open": "!<meta>",
    "metadata_tag_close": "</meta>!",
    "spack_environments_repo": "https://gitlab.gwdg.de/mpsd-cs/spack-environments.git",
    "init_file": ".mpsd-software-root",
Hans Fangohr's avatar
Hans Fangohr committed

def log_metadata(key: str, value: str) -> None:
    """Log metadata to the log file.

    This function logs metadata to the log file. The metadata is
    enclosed in a tag, so that it can be easily found in the log file.
    logging module is used to write the metadata to the log file.

    Parameters
    ----------
    key : str
        key of the metadata
    value : str
        value of the metadata
    returns : None
    """
    logging.info(
        f"{config_vars['metadata_tag_open']}{key}:{value}{config_vars['metadata_tag_close']}"
    )


def read_metadata_from_logfile(logfile: Union[str, Path]) -> dict:
    """Read metadata from the log file.

    This function reads metadata from the log file. The metadata is
    enclosed in a tag, so that it can be easily found in the log file.

    Parameters
    ----------
    logfile : str or Path
        log file name
    returns : dict
        dictionary containing the metadata
    """
    with open(logfile, "r") as f:
        log_text = f.read()
    # check for all data that matches the regex
    # metadata_tag_open {key}:{value} metadata_tag_close
    # and return a dictionary with all the matches
    return {
        match.group(1): match.group(2)
        for match in re.finditer(
            rf"{config_vars['metadata_tag_open']}(\w+):(\w+){config_vars['metadata_tag_close']}",
    mpsd_release: str,
    action: str,
    date: str = call_date_iso,
    package_set: Union[str, None] = None,
    """Create log file names.

    This function creates the log file names for either the installer or
    the build log files.
    If a package_set is given, then the build log file name is created.
    if no package_set is given, then the installer log file name is created.
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    The installer log file hosts the logs of the installer script, while
    the build log file hosts the logs of the build process as generated by the
    spack_setup.sh script.

    Parameters
    ----------
    mpsd_release : str
        MPSD software stack version
    date : str
        date of the call ins iso format
    action : str
        action performed (install,remove,reinstall,prepare,status)
        only install and remove are valid for build log file.
    package_set : str or None
        package_set name (only for build log file)
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed

        log file name
        installer_log_file_name or build_log_file_name depending on the
        parameters given.
        If the action is not one that changes the files on disk (info only actions)

    Examples
    --------
    # installer log file name for `mpsd-software install dev-23a foss2021a-mpi`
    >>> create_log_file_name(
    ...         "dev-23a",
    ...         "install",
    ...         "2023-07-03T12-27-52",
    ...     )
    'dev-23a_sandybridge_2023-07-03T12-27-52_APEX_install.log'

    # build log file name for `mpsd-software install dev-23a foss2021a-mpi`
    >>> create_log_file_name(
    ...     "dev-23a",
    ...     "install",
    ...     "2023-07-03T12-27-52",
    ...     "foss2021a-mpi",
    ... )
    'dev-23a_sandybridge_2023-07-03T12-27-52_BUILD_foss2021a-mpi_install.log'

    # installer log file name for `mpsd-software status dev-23a`
    >>> create_log_file_name(
    ...     "dev-23a",
    ...     "status",
    ...     "2023-07-03T12-27-52",
    ... )
    'dev-23a_sandybridge_2023-07-03T12-27-52_APEX_status.log'

    # build log file name for `mpsd-software status dev-23a` (no log file is created)
    >>> create_log_file_name(
    ...     "dev-23a",
    ...     "status",
    ...     "2023-07-03T12-27-52",
    ...     "foss2021a-mpi",
    ... )
    (None)
    microarch = get_native_microarchitecture()
    if package_set:
        # if package_set is given, then  we build the build_log_file_name
        if action in ["install", "remove"]:
                f"{mpsd_release}_{microarch}_{date}_BUILD_{package_set}_{action}.log"
        else:
            return None
        # if package_set is not given, then we build the installer_log_file_name
        log_file_name = f"{mpsd_release}_{microarch}_{date}_APEX_{action}.log"
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed

def get_log_file_path(
    mpsd_release: str, cmd: str, root_dir: Path, package_set: Union[str, None] = None
) -> Union[Path, None]:
    This function creates the log file paths for either the installer or
    the build log files.
    If a package_set is given, then the build log file path is returned.
    if no package_set is given, then the installer log file path is returned.
    If the logs folder does not exist, then it is created.
    mpsd_release : str
        MPSD software stack version
    cmd : str
        command to be executed
    root_dir : str
        root directory of the mpsd software stack
    package_set : str
        package_set name (only for build log file)
    Returns
    -------
    Path or None
        log file path
        installer_log_file_path or build_log_file_path depending on the
        parameters given.
    Examples
    --------
    # installer log file path for `mpsd-software install dev-23a foss2021a-mpi`
    >>> get_log_file_path(
    ...     "dev-23a",
    ...     "install",
    ...     Path(
    ...         "/tmp/root_dir"
    ...     ),
    ... )
    PosixPath('/tmp/root_dir/dev-23a/logs/dev-23a_zen3_2023-07-03T12-28-55_APEX_install.log')

    # build log file path for `mpsd-software install dev-23a foss2021a-mpi`
    >>> get_log_file_path(
    ...     "dev-23a",
    ...     "install",
    ...     Path(
    ...         "/tmp/root_dir"
    ...     ),
    ...     "foss2021a-mpi",
    ... )
    PosixPath('/tmp/root_dir/dev-23a/logs/dev-23a_zen3_2023-07-03T12-28-55_BUILD_foss2021a-mpi_install.log')

    # installer log file path for `mpsd-software status dev-23a`
    >>> get_log_file_path(
    ...     "dev-23a",
    ...     "status",
    ...     Path(
    ...         "/tmp/root_dir"
    ...     ),
    ... )
    PosixPath('/tmp/root_dir/dev-23a/logs/dev-23a_zen3_2023-07-03T12-28-55_APEX_status.log')

    # build log file path for `mpsd-software status dev-23a` (no log file is created)
    >>> get_log_file_path(
    ...     "dev-23a",
    ...     "status",
    ...     Path(
    ...         "/tmp/root_dir"
    ...     ),
    ...     "foss2021a-mpi",
    ... )
    (None)
    log_file_name = create_log_file_name(
        mpsd_release=mpsd_release,
        action=cmd,
        package_set=package_set,
    log_folder = root_dir / mpsd_release / "logs"
    if log_file_name:
        # if the log_folder dosent exist, create it
        if not log_folder.exists():
            log_folder.mkdir(parents=True)
        return log_folder / log_file_name
        return None
def set_up_logging(loglevel="warning", file_path=None):
    This function sets up the logging configuration for the script.
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    It configures the log level, log format, and log handlers
Hans Fangohr's avatar
Hans Fangohr committed
    for both file and console(=shell) output.
    Parameters
    ----------
    loglevel : str or int
       Loglevels are:
         - warning (default): only print statements if something is unexpected
         - info (show more detailed progress)
         - debug (show very detailed output)
    file_path : str
         - filename to save logging messages into

    If loglevel is 'debug', save line numbers in log messages.
Hans Fangohr's avatar
Hans Fangohr committed
    Returns
    -------
    None.

    Logger instances are generally not passed around, but retrieved from the
    logging module as shown below (they are singletons).

    We provide two loggers:

    1. log = logging.getLogger('')

       This is the 'root' logger.

       Typical use:

       log.debug("...")
       log.info("...")
       log.warn("...")

       Equivalent to

       logging.debug("...")
       logging.info("...")

    2. print_log = logging.getlogger('print')

       This uses the logging module to issue the message, but prints without
       any further markup (i.e. no date, loglevel, line number, etc). Think
       PRINT via the LOGging module.

       We use this as a replacement for the print function (i.e. for messages
       that should not be affected by logging levels, and which should always
       be printed).

       Typical and intended use:

       print_log.info("Available package_sets are ...")

       The major difference from the normal print command is that the output
       will be send to the stdout (as for print) AND the file with name
       filename, so that these messages appear in the log file together with
       normal log output.

    # convert loglevel string into loglevel as number
    log_level_numeric = getattr(logging, loglevel.upper(), logging.WARNING)
    if not isinstance(log_level_numeric, int):
Hans Fangohr's avatar
Hans Fangohr committed
        raise ValueError("Invalid log level: %s" % loglevel)
    # set up the main logger ("root" logger)
    logger = logging.getLogger("")
    # - "logger" logs everything
    # - we use loglevel at handler level to write everything to file
    # - and filter using  log_level_numeric (as the user provides) to
    #   send logging messages to the console
    logger.setLevel(0)
    # the handler determines where the logs go: stdout/file
    # We use 'rich' to provide a Handler:
    # https://rich.readthedocs.io/en/stable/logging.html
    shell_handler = rich.logging.RichHandler()
    # rich handler provides metadata automatically:
    logging_format = "%(message)s"
    # for shell output, only show time (not date and time)
    shell_formatter = logging.Formatter(logging_format, datefmt="[%X]")
    # here we hook everything together
    shell_handler.setFormatter(shell_formatter)
    # use the log_level_numeric to decide how much logging is sent to shell
    shell_handler.setLevel(log_level_numeric)
Hans Fangohr's avatar
Hans Fangohr committed

    # Here we set the handlers of the RootLogger to be just the one we want.
    # The reason is that the logging module will add a <StreamHandler <stderr>
    # (NOTSET)> handler if logging.info/logging.debug/... is used before we
    # come across this line. And we do not want that additional handler.
    logger.handlers = [shell_handler]

    # if filename provided, write log messages to that file, too.
    if file_path:
        file_handler = logging.FileHandler(file_path)
        # if we have a file, we write all information in there.
        # We could change the level, for example restrict to only DEBUG and above with
        # file_handler.setLevel(logging.DEBUG)
        file_logging_format = "%(asctime)s %(levelname)7s %(lineno)4d  |  %(message)s"
        file_formatter = logging.Formatter(file_logging_format, datefmt="[%X]")
        file_handler.setFormatter(file_formatter)
        logger.addHandler(file_handler)

    #
    # new logger for printing
    #
    print_log = logging.getLogger("print")
    print_log.setLevel(logging.INFO)
    print_log.propagate = False
    # create formatter 'empty' formatter
    formatter = logging.Formatter("%(message)s")

    # create, format and set handler for shell output
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    print_log.handlers = [ch]

    # if filename provided, write output of print_log to that file, too
    if file_path:
        # create, format and add file handler
        fh = logging.FileHandler(file_path)
        fh.setFormatter(formatter)
        print_log.addHandler(fh)

    #
    # short message
    #
    logging.debug(
        f"Logging has been setup, loglevel={loglevel.upper()} " + f"{file_path=}"
def get_available_package_sets(mpsd_release: str) -> List[str]:
    """Given a release, return the available package_sets.
    This is based on the spack-environment's repository [1]. For this function
    to succeed, we need to have Internet access etc.

    We use a temporary directory to clone the repository locally, which is
    deleted upon successful completion of the function.

    [1] https://gitlab.gwdg.de/mpsd-cs/spack-environments.git
Hans Fangohr's avatar
Hans Fangohr committed

    Returns
    -------
    package_sets : List[str]
Hans Fangohr's avatar
Hans Fangohr committed

    Example
    -------
    >>> get_available_package_sets('dev-23a')
    ['foss2021a-cuda-mpi',
     'foss2021a-mpi',
     'foss2021a-serial',
     'foss2022a-cuda-mpi',
     'foss2022a-mpi',
     'foss2022a-serial',
     'global',
     'global_generic']
Hans Fangohr's avatar
Hans Fangohr committed

Hans Fangohr's avatar
Hans Fangohr committed
    """
    logging.debug(f"get_available_package_sets({mpsd_release=})")
    print_log = logging.getLogger("print")
    logging.info(f"Retrieving available package_sets for release {mpsd_release}")

    # create temporary directory
    tmp_dir = tempfile.TemporaryDirectory(prefix="mpsd-software-available-")
    tmp_dir_path = Path(tmp_dir.name)
    # find package_sets by cloning repository and checking out right branch
Hans Fangohr's avatar
Hans Fangohr committed
    clone_repo(
        tmp_dir_path,
        config_vars["spack_environments_repo"],
        branch=f"releases/{mpsd_release}",
Hans Fangohr's avatar
Hans Fangohr committed
    )
    # look for directories defining the package_sets
    package_sets = os.listdir(tmp_dir_path / "toolchains")
    msg = f"Found package_sets {sorted(package_sets)}"
    logging.debug(msg)
    # the 'package_sets' split into toolchains (such as foss2022a-mpi) and sets
Hans Fangohr's avatar
Hans Fangohr committed
    # of packages. Here we split them into the two categories for a more useful
    # output:
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    toolchain_list = [
        x.parents[0].name
        for x in list((tmp_dir_path / "toolchains").glob("*/spack.yaml"))
    ]
    package_set_list = [
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
        x.parents[0].name for x in list((tmp_dir_path / "toolchains").glob("*/*.list"))
    ]
Hans Fangohr's avatar
Hans Fangohr committed
    logging.debug(f"{toolchain_list=}")
    logging.debug(f"{package_set_list=}")
Hans Fangohr's avatar
Hans Fangohr committed
    # summarise toolchains found for use, and show packages provided for each
    # package_set:
    print_log.info(
        f"MPSD software release {mpsd_release}, AVAILABLE for installation are"
    )
    print_log.info("Toolchains: \n    " + "\n    ".join(sorted(toolchain_list)))
    print_log.info("Package sets:")
    for package_set in package_set_list:
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
        # get a list of all packages which
Hans Fangohr's avatar
Hans Fangohr committed
        # starts from the first line of the file
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
        # that have the regex pattern \w+@\w+
        packages = [
            line.split()[0].split("%")[0]
            for line in open(
                tmp_dir_path / "toolchains" / package_set / "global_packages.list"
            ).readlines()
            if re.match(r"^\w+@\w+", line)
        ]
Hans Fangohr's avatar
Hans Fangohr committed
        print_log.info(f"    {package_set} ({', '.join(packages)})  ")
Hans Fangohr's avatar
Hans Fangohr committed

    # remove temporary directory
    tmp_dir.cleanup()
Hans Fangohr's avatar
Hans Fangohr committed

    return package_sets
# Helper class to change directory via context manager
class os_chdir:
    """The os_chdir class is a context manager.

    It changes the current directory to a specified directory
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    and returns to the original directory after execution.
    """
    def __init__(self, new_dir):
        """Initialize, save original directory."""
        self.new_dir = new_dir
        self.saved_dir = os.getcwd()
    def __enter__(self):
        """Go to target directory (main action for context)."""
        os.chdir(self.new_dir)
    def __exit__(self, exc_type, exc_val, exc_tb):
        """On exist we return to original directory."""
        os.chdir(self.saved_dir)
def run(*args, counter=[0], **kwargs):
    """
    Run a subprocess and log the call.
    Convenience function to call `subprocess.run` and provide some metadata
    about the call.

    Parameters
    ----------
    args : tuple
        passed on to subprocess.run(*args). For example
        ("ls -l") or (["ls", "-l"])
    counter : TYPE, optional
        list with one integer, starting from [0].
        This is (a Python hack) to count the number of
        calls of this function, so the different calls of subprocess.run
        are easier to follow in the log files.
    kwargs : dict
        keyword-value arguments to be passed to subprocess.run. For example,
        `shell=True`.

    Returns
    -------
    process : subprocess.CompletedProcess
        CompletedProcess object as returned by `subprocess.run` .

    Examples
    --------
    >>> run(['date', '+%Y-%m-%d'])
    ##-03 Starting subprocess.run(['date', '+%Y-%m-%d']) with options
    ##-03   getcwd=/Users/fangohr/git/mpsd-software-environments
    ##-03   COMMAND=date +%Y-%m-%d
    2023-05-30
    ##-03   Completed in 0.0054s.
    ##-03
    CompletedProcess(args=['date', '+%Y-%m-%d'], returncode=0)

    >>> run(['date +%Y-%m-%d'], shell=True)
    ##-04 Starting subprocess.run(['date +%Y-%m-%d']) with options shell=True
    ##-04   getcwd=/Users/fangohr/git/mpsd-software-environments
    ##-04   COMMAND=date +%Y-%m-%d
    2023-05-30
    ##-04   Completed in 0.0069s.
    ##-04
    CompletedProcess(args=['date +%Y-%m-%d'], returncode=0)
    """
    # token is printed in front of every meta-data line - useful for
    # searching the logs. Starts with "##-00", then "##-01", ...
    token = f"##-{counter[0]:02d}"

    counter[0] += 1  # increase counter

    # make command nicely readable: ["ls", "-l"] -> "ls -l"
    assert isinstance(args, tuple)
    assert len(args) == 1
    arg = args[0]
    # either args is a tuple containing a string | Example: ('ls -1',)
    if isinstance(arg, str):
        command = arg
        # or we have a tuple containing a list of strings.
        # Example: (['ls', '-1'],)
    elif isinstance(arg, list):
        command = " ".join(arg)
    else:
        # we do not expect this to happen
        raise NotImplementedError(f"{arg=}, {args=}")

    # make options (such as `shell=True`) nicely readable
    options = ", ".join([f"{key}={value}" for key, value in kwargs.items()])

    # provide information about upcoming subprocess.run call
Hans Fangohr's avatar
Hans Fangohr committed
    logging.debug(
        f"{token} Starting subprocess.run('{command}') with options {options}"
    )
Hans Fangohr's avatar
Hans Fangohr committed
    logging.debug(f"""{token}   getcwd={os.getcwd()}""")
    logging.debug(f"""{token}   subprocess.run("{arg}")""")

    time_start = time.time()
    process = subprocess.run(*args, **kwargs)
    execution_time = time.time() - time_start

    logging.debug(f"{token}   {process=}")
    logging.debug(f"{token}   Completed in {execution_time:.4f}s.")
    logging.debug(f"{token}")  # near-empty line to make reading logs easier
def record_script_execution_summary(
    root_dir: Path, msg: Union[str, None] = None, **kwargs
) -> None:
    """Log the command used to build the package_set.
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    It also logs information about the spack-environments branch and commit hash,
    as well as the version of the mpsd-software-manager user. It also logs
    steps taken in the install process using the optional message argument.
    - root_dir : str
        The path to the directory where the scripts are located.
    - msg : str, optional
        An optional message to log in the command log file.
    - **kwargs : dict
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
        A dictionary with values for
        - spe_branch : str
            The name of the Spack environments branch.
        - spe_commit_hash : str
            The commit hash of the Spack environments branch.
Hans Fangohr's avatar
Hans Fangohr committed

Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    """
    # Write to the log file with the following format
    # --------------------------------------------------
    # 2023-06-20T15:30:37.965370, mpsd_software prepare dev-23a
    # MPSD Software manager version: 2023.6.16
    # Spack environments branch: dev-23a (commit hash: abcdefg)
    with os_chdir(root_dir):
        with open(config_vars["cmd_log_file"], "a") as f:
            if msg:
                # Write the message to the log file
                f.write(msg + "\n")
            else:
                # Write the header
                f.write("-" * 50 + "\n")

                # Gather data to log
                # call statement:
                cmd_line = " ".join(sys.argv)
                # spack-environments branch and commit hash from kwargs
                spe_branch = kwargs.get("spe_branch", None)
                spe_commit_hash = kwargs.get("spe_commit_hash", None)

                # Write to log file
                f.write(f"{datetime.datetime.now().isoformat()}, {cmd_line}\n")
                # logs script version
                f.write(f"MPSD Software manager version: {__version__}\n")
                if spe_branch and spe_commit_hash:
                    f.write(
                        f"Spack environments branch: {spe_branch} "
                        f"(commit hash: {spe_commit_hash})\n"
                    )
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed

def clone_repo(
    target_path: Path, repo_url: str, branch=None, capture_output=True
) -> None:
    """Clone repo locally. Optionally checkout a branch.
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    Parameters
    ----------
    target_path : Path
      Where to check the repository out to
    repo_url: str
      where to clone the git repository from
    branch: str (defaults to None)
      if provided, checkout this branch after cloning
    capture_output: bool (defaults to True)
      capture output, i.e. do not send it to stdout.
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    """
    if not target_path.exists():
        target_path.mkdir()
    with os_chdir(target_path):
        run(
            ["git", "clone", repo_url, str(target_path)],
            check=True,
            capture_output=capture_output,
        )
    if branch:
        with os_chdir(target_path):
            # Git fetch and checkout the release branch and git pull
            # to be sure that the resulting repo is up to date
            run(["git", "fetch", "--all"], check=True, capture_output=capture_output)
            checkout_result = run(
                ["git", "checkout", branch], capture_output=capture_output
            )
            if checkout_result.returncode != 0:
                msg = f"Couldnt find {branch=}\n"

                branches_result = run(
                    ["git", "branch", "-a"], check=True, capture_output=True
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
                )
                branches_list = branches_result.stdout.decode().split("\n")
                # strip off 'remotes/origin' (needs Python 3.9):
                branches_list = [
                    b.strip().removeprefix("remotes/origin/") for b in branches_list
                ]
                msg += f"Available branches are {branches_list}"
                logging.error(msg)
                raise Exception(msg, branches_result)
            else:
                run(["git", "pull"], check=True, capture_output=capture_output)
def get_available_releases(print_result: bool = False) -> List[str]:
    """
    Return available MPSD software release versions.

    Example
    -------
    >>> get_available_releases()
    ["dev-23a"]

    Notes
    -----
    This needs to be updated when a new version (such as 23b) is released.
    """
    releases = ["dev-23a"]
    print_log = logging.getLogger("print")
    if print_result:
        print_log.info("Available MPSD software releases:")
            print_log.info(f"    {release}")
def get_release_info(mpsd_release: str, root_dir: Path) -> Tuple[str, str, List[str]]:
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    """
    Get information about the specified release.

Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    Get information about the specified release, such as the branch and commit hash
    of the Spack environments repository and the available package_sets.
Hans Fangohr's avatar
Hans Fangohr committed
    mpsd_release : str
        The name of the release to get information for.
    root_dir : pathlib.Path
        The base directory where releases are stored.
Hans Fangohr's avatar
Hans Fangohr committed
    spe_branch : str
        The name of the branch for the Spack environments repository.
Hans Fangohr's avatar
Hans Fangohr committed
    spe_commit_hash : str
        The commit hash for the Spack environments repository.
    available_package_sets : list
        A list of strings representing the available package_sets for the release.
        If the release directory does not exist.
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    """
    # TODO - review this function: can we re-use get_available_package_sets?

    # Get the info for release
    release_base_dir = root_dir / mpsd_release
    if not os.path.exists(release_base_dir):
        logging.debug(f"get_release_info({mpsd_release=}, {root_dir=})")
            f"{release_base_dir} does not exist.\n"
Hans Fangohr's avatar
Hans Fangohr committed
            f"Hint: `prepare {mpsd_release}` may fix this."
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
        )
    with os_chdir(release_base_dir):
        with os_chdir("spack-environments"):
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
            # Get the branch and commit hash of the spack-environments repo
Hans Fangohr's avatar
Hans Fangohr committed
                run(["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, check=True)
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
                .stdout.decode()
                .strip()
                    ["git", "rev-parse", "--abbrev-ref", "HEAD"],
                    stdout=subprocess.PIPE,
                    check=True,
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
                )
                .stdout.decode()
                .strip()
            )
            available_package_sets = os.listdir("toolchains")
    return spe_branch, spe_commit_hash, available_package_sets
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed

def prepare_environment(mpsd_release: str, root_dir: Path) -> List[str]:
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    """
    Create the directory structure for the given MPSD release.

    It does the following steps:
    Clones the spack-environments repository.
    Determines the branch and commit hash of the spack-environments repository
    and the available package_sets.
    Logs the command usage.
    Parameters
    ----------
    mpsd_release : str
        The name of the MPSD release to prepare the environment for.
    root_dir : pathlib.Path
        The base directory to create the release folder and
        clone the spack-environments repository into.
    available_package_sets : list
        A list of available package_sets for the given MPSD release.

    Example
    -------
    >>> prepare_environment('dev-23a', Path('.'))
    ['foss2021a-cuda-mpi',
     'foss2021a-mpi',
     'foss2021a-serial',
     'foss2022a-cuda-mpi',
     'foss2022a-mpi',
     'foss2022a-serial',
     'global',
     'global_generic']
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    """
    # TODO review: - does this function need to return anything? If yes:
    # TODO review: - can we re-use get_available_package sets?
Hans Fangohr's avatar
Hans Fangohr committed

Hans Fangohr's avatar
Hans Fangohr committed
    logging.info(f"Preparing {mpsd_release=}")
Hans Fangohr's avatar
Hans Fangohr committed

    # Creates the directory structure for the specified release and clone the
    # Spack environments repository if it doesn't exist:

    # Create the directory structure for the release
    release_base_dir = root_dir / mpsd_release
    release_base_dir.mkdir(parents=True, exist_ok=True)
    repo_path = release_base_dir / "spack-environments"
    if repo_path.exists():
        logging.debug(f"directory {repo_path} exists already, will update")
        with os_chdir(repo_path):
            run(["git", "pull", "-v"], capture_output=True)
    else:
        repo_url = config_vars["spack_environments_repo"]
        logging.info(f"cloning repository {repo_path} from {repo_url}")
        clone_repo(repo_path, repo_url, branch=f"releases/{mpsd_release}")
    logging.getLogger("print").info(
        f"Release {mpsd_release} is prepared in {release_base_dir}"
    )

    spe_branch, spe_commit_hash, available_package_sets = get_release_info(
        mpsd_release, root_dir
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    )
    record_script_execution_summary(
        root_dir, spe_branch=spe_branch, spe_commit_hash=spe_commit_hash
Ashwin Kumar Karnad's avatar
Ashwin Kumar Karnad committed
    )
    return available_package_sets
def get_native_microarchitecture():
    """Return native microarchitecture.

    On MPSD machines, there should be an environment variable "microarch".
    We try to read that. If it fails, we use the 'archspec cpu' command.
    If that fails, we ask the user to install it.

    Returns
    -------
    microarch : str

    Example
    -------
    >>> get_native_microarchitecture()
    'haswell'
    """
    # attempt to get MICRO_ARCH from environment variable (should work on
    # MPSD_HPC and MPSD linux laptops). If not defined, return
    # "UNKNOWN_MICROARCH"
    microarch = os.environ.get("MPSD_MICROARCH", "UNKNOWN_MICROARCH")

    # if we have not found the microarchitecture environment variable,
    # try calling archspec
    if microarch == "UNKNOWN_MICROARCH":
        logging.debug(
            "Couldn't find MPSD_MICROARCH environment variable. Will try archspec."
        )
        try:
            process = run(["archspec", "cpu"], stdout=subprocess.PIPE, text=True)
        except FileNotFoundError as e:
            logging.debug(f"Call of 'archspec cpu' failed: {e=}")
            # Presumably 'archspec' is not installed.
            msg = "Please install archspec, for example via 'pipx install archspec'.\n"
            msg += "The command we need to execute is 'archspec cpu'.\n"
            msg += "Documentation of package: https://archspec.readthedocs.io/"

            logging.error(msg)
            sys.exit(10)
        else:  # we have found archspec and executed it
            if process.returncode == 0:  # sanity check
                microarch = process.stdout.strip()
                logging.debug(
                    f"Found microarchitecture from 'archspec cpu' to be '{microarch}'"
                )
                assert len(microarch) > 0  # sanity check
            else:
                raise ValueError(
                    f"Some error occurred when calling 'archspec cpu': {process=}"
                )

    # at this point, we have determined the microarchitecture
    log_metadata("microarchitecture", microarch)
    return microarch


def install_environment(
    mpsd_release: str,
    package_sets: List[str],
    root_dir: Path,
    enable_build_cache: bool = False,
) -> None:
    Install the specified MPSD release and package_sets.
    The function installs the package_set to the specified directory, using Spack.
    Parameters
    ----------
    mpsd_release : str
        A string representing the MPSD release version.
    package_sets : list of str
        A list of strings representing the package_sets to install
        (e.g., "foss2021a-mpi", "global_generic", "ALL").
    root_dir : pathlib.Path
        A Path object representing the path to the directory where
        the release and package_sets will be installed.
    enable_build_cache : bool, optional
        A boolean indicating whether to build the build cache
        when installing package_sets. Defaults to False.
        If a requested package_set is not available in the specified release.