test_mpsd_software.py

"""Tests for mpsd-software-environment.py."""

import importlib
import os
import shutil
import subprocess
from pathlib import Path
import logging
import datetime
import sys

import pytest

mod = importlib.import_module("mpsd_software_manager.mpsd_software")

# set loglevel to debug - useful for understanding problems.
# (if the tests pass, pytest doesn't show any output)
mod.set_up_logging(loglevel="debug", file_path="tests.log")
logging.debug(f"We have set up logging from {__file__}")


def create_mock_git_repository(target_directory, create_directory=True):
    """
    Create a git repository in the directory `target_directory`.

    Arguments
    ---------
    target_directory : pathlib.Path
      - path at which the root of the repository should be located (i.e. `.git` folder)

    create_directory : bool
      - create `target_directory` and parent directories if True

    """
    # create directory first
    if create_directory:
        target_directory.mkdir(parents=True)

    # then create git repository:
    with mod.os_chdir(str(target_directory)):
        subprocess.run("git init .", shell=True, check=True)
        subprocess.run("echo 'fake content' > readme.txt", shell=True, check=True)
        subprocess.run("git add readme.txt", shell=True, check=True)
        subprocess.run("pwd", shell=True)

        # if email and username are not available (such as on naked test container),
        # git may complain. We set a temporary user for this one commit to work around
        # that.
        user_details = "-c user.name='Tes Ta' -c user.email='tester@some-ci.org'"
        subprocess.run(
            f'git {user_details} commit -m "first commit" readme.txt',
            shell=True,
            check=True,
        )


def test_os_chdir(tmp_path):
    """Test the os_chdir context manager."""
    # create a temporary directory for testing
    temp_dir = tmp_path / "test_os_chdir"
    temp_dir.mkdir()

    # initial current working directory
    initial_cwd = os.getcwd()

    # change to the temporary directory using os_chdir
    with mod.os_chdir(str(temp_dir)):
        assert os.getcwd() == str(temp_dir)

    # current working directory should be back to initial directory
    assert os.getcwd() == initial_cwd


def test_run_method(tmp_path):
    """Run tests for run method."""
    run = mod.run

    # test a command with options:
    assert run(["date", "+%Y-%m-%d"]).returncode == 0
    assert run("date +%Y-%m-%d", shell=True).returncode == 0

    # tests interacting with the file system
    with mod.os_chdir(str(tmp_path)):
        # ensure single string command works
        assert run(("ls -l"), shell=True).returncode == 0
        # test spaces are handled correctly:
        assert run(["touch", "file1", "file2"]).returncode == 0
        assert os.path.exists("file1")
        assert os.path.exists("file2")
        # test output is captured:
        assert (
            b"Hello, world!\n"
            in run(["echo", "Hello, world!"], capture_output=True).stdout
        )

    # check exceptions
    with pytest.raises(FileNotFoundError):
        run(["doesnotexistcommand"])

    # check error code is checked
    # 1. expect this to parse: return code is non-zero, but we don't check
    run(["ls", "/doesnotexist"]),
    # 2. expect this to fail:
    with pytest.raises(subprocess.CalledProcessError):
        run(["ls", "/doesnotexist"], check=True)


def test_prepare_environment(tmp_path):
    """Simulate running preparation of environment.

    Simulate running ./install-software-environment.py --release dev-23a \
      --target-directory /tmp/test_prepare_env
    prepare_env is run when cmd is not specified, we can test cmd='prepare'
    and cmd=None to check both cases
    """
    root_dir = tmp_path / "mpsd_opt" / "linux_debian_11"
    spack_environments = "spack-environments"
    mpsd_release_to_test = "dev-23a"
    release_base_dir = root_dir / mpsd_release_to_test
    # check that the test directory does not exist
    assert not root_dir.exists()

    # prepare_environment expects to be executed in git repository
    # (mpsd-software-environments). It queries the commit on which we are to
    # log that information. For this to work, we need to execute the command
    # within a directory tree that has a git repository at the same or high
    # level. Let's create one:
    create_mock_git_repository(root_dir)

    # now call the function we want to test
    result = mod.prepare_environment(
        mpsd_release=mpsd_release_to_test, root_dir=root_dir
    )

    # check if the directory now is created
    assert release_base_dir.exists()
    # check for spack-environments directory
    assert spack_environments in os.listdir(release_base_dir)

    # check if the git branch is correctly checked out. We expect output such as
    # git_branch_stdout = '* dev-23a\n  develop\n'
    # The entry with the '* ' prefix is the active branch.
    git_branch_output_raw = subprocess.run(
        f"cd {str(release_base_dir/spack_environments)} && git branch",
        shell=True,
        capture_output=True,
    )
    git_branch_stdout = git_branch_output_raw.stdout.decode("utf-8")
    assert f"* releases/{mpsd_release_to_test}" in git_branch_stdout

    # check that result is a list and contains atleast ['global','foss2021a-mpi']
    assert isinstance(result, list)
    assert "global" in result
    assert "foss2021a-mpi" in result

    # Expect an Exception when wrong mpsd_release is provided
    with pytest.raises(Exception):
        result = mod.prepare_environment(
            mpsd_release="wrong-mpsd-release", root_dir=(root_dir)
        )


def test_record_script_execution_summary(tmp_path):
    """Check that log is updated.

    Check that logs/install-software-environment.log is updated when the module is run
    """
    cmd_log_file = mod.config_vars["cmd_log_file"]

    root_dir = tmp_path / "test_prepare_env"
    mpsd_release_to_test = "dev-23a"
    script_version = mod.__version__
    if os.path.exists(root_dir / cmd_log_file):
        initial_bytes = os.path.getsize(cmd_log_file)
    else:
        initial_bytes = 0

    # run the prepare_env functionality
    create_mock_git_repository(target_directory=root_dir, create_directory=True)
    mod.prepare_environment(mpsd_release=mpsd_release_to_test, root_dir=(root_dir))

    # check that logs/install-software-environment.log is updated
    assert os.path.exists(root_dir / cmd_log_file)
    assert os.path.getsize(root_dir / cmd_log_file) > initial_bytes

    # Check that the log file has "Spack environments branch: dev-23a " in the last line
    with open(root_dir / cmd_log_file, "r") as f:
        lines = f.readlines()
        assert "Spack environments branch: releases/dev-23a " in lines[-1]
        assert f"MPSD Software manager version: {script_version}" in lines[-2]


def test_install_environment_wrong_package_set(tmp_path):
    """Test exception is raised for non-existing package_set."""
    # exits with exit code 1 when wrong package_sets are provided
    with pytest.raises(SystemExit) as e:
        mod.install_environment(
            mpsd_release="dev-23a",
            package_sets=["wrong-package_set"],
            root_dir=(tmp_path),
        )
    assert e.type == SystemExit
    assert e.value.code == 1


def test_install_environment_wrong_mpsd_release(tmp_path):
    """Test exception is raised for non-existing mpsd release."""
    # Expect an Exception when wrong mpsd_release is provided (part of
    # prepare_environment)
    with pytest.raises(Exception):
        mod.install_environment(
            mpsd_release="wrong-mpsd-release",
            package_sets=["foss2021a-mpi"],
            root_dir=(tmp_path),
        )


@pytest.mark.skipif(sys.platform == "darwin", reason="install not working on OSX")
def test_install_environment_zlib():
    """Test installation of package_set."""
    # Prepare a test installation of global generic
    # with only zlib to test the installation
    # This is a long test,
    # its handy to test this with print statements printed to
    # stdout, use:
    #   pytest -s
    # for this installation avoid tmp_path as
    # the length of the path becomes too long and spack complains
    root_dir = Path("/tmp/test_global_generic")
    if root_dir.exists():
        shutil.rmtree(root_dir)
    root_dir.mkdir(exist_ok=True, parents=True)
    mpsd_release_to_test = "dev-23a"
    package_set_to_test = "global_generic"
    cmd_log_file = mod.config_vars["cmd_log_file"]
    microarch = mod.get_native_microarchitecture()
    release_base_dir = root_dir / mpsd_release_to_test
    create_mock_git_repository(target_directory=root_dir, create_directory=False)
    mod.prepare_environment(mpsd_release=mpsd_release_to_test, root_dir=(root_dir))
    # Patch the spack environments to create a fake global_generic
    # create a test package_set
    package_set_src_dir = release_base_dir / "spack-environments" / "toolchains"
    # with mod.os_chdir(package_set_src_dir):
    #     subprocess.run(
    #         "cp -r foss2021a-mpi fuss1999a", shell=True, capture_output=True
    #     )
    # add zlib as a spec to global_generic
    with open(
        package_set_src_dir / "global_generic" / "global_packages.list", "w"
    ) as f:
        f.write("zlib@1.2.13 \n")

    # add zlib to whitelist of module creation file by replacing anaconda3%gcc@10.2.1
    # with zlib@1.2.13
    # in release_base_dir / "spack-environments/spack_overlay/etc/spack/modules.yaml"
    module_file = (
        release_base_dir / "spack-environments/spack_overlay/etc/spack/modules.yaml"
    )
    with open(module_file, "r") as f:
        lines = f.read().replace("anaconda3%gcc@10.2.1", "zlib@1.2.13")
    with open(module_file, "w") as f:
        f.write(lines)

    # Replace gcc@10.2.1 with gcc#13.1.1 or available system gcc for testing on laptop
    gcc_ver = (
        subprocess.run(["gcc -dumpfullversion"], shell=True, capture_output=True)
        .stdout.decode("utf-8")
        .strip()
    )
    assert len(gcc_ver) > 3, f"Couldn't find gcc {gcc_ver=}"

    setup_file = release_base_dir / "spack-environments/spack_setup.sh"
    with open(setup_file, "r") as f:
        lines = f.read().replace(
            'system_compiler="gcc@10.2.1"', f'system_compiler="gcc@{gcc_ver}"'
        )
    with open(setup_file, "w") as f:
        f.write(lines)

    # install global_generic package_set
    mod.set_up_logging(
        "WARNING",
        mod.get_installer_log_file_path(mpsd_release_to_test, "install", root_dir),
    )
    mod.install_environment(
        mpsd_release=mpsd_release_to_test,
        package_sets=[package_set_to_test],
        root_dir=root_dir,
        enable_build_cache=False,
    )
    # test that the build log is created correctly
    # check that a file with glob build_globale_generic_dev-23a*.log exists at
    # release_base_dir/microarch
    # print("Debug here ")
    # time.sleep(10)

    build_log = list(
        (release_base_dir / "logs").glob(
            f"{mpsd_release_to_test}_{microarch}_*_install.log"
        )
    )
    assert len(build_log) == 2
    # take the most recent build log
    build_log = sorted(build_log)[1]
    # check that the build log contains statement ##### Installation finished
    with open(build_log, "r") as f:
        lines = f.read()
        assert "##### Installation finished" in lines
    os.path.basename(build_log)

    # assert that install log files exists
    assert os.path.exists(root_dir / cmd_log_file)

    # assert that the build log is written to the install log file
    os.path.basename(build_log)
    with open(root_dir / cmd_log_file, "r") as f:
        lines = f.read()
        assert (
            f"installing {package_set_to_test} and logging at {str(build_log)}" in lines
        )
    # assert that the module files are created correctly
    assert os.path.exists(release_base_dir / microarch)
    assert os.path.exists(release_base_dir / microarch / "lmod")
    # assert that lmod/module-index.yaml contains zlib
    with open(release_base_dir / microarch / "lmod" / "module-index.yaml", "r") as f:
        lines = f.read()
        assert "zlib" in lines

    # install again to ensure that
    # commands that skip creation of folders when
    # they are already present works as expected
    # reload the module to ensure that date changes
    importlib.reload(mod)
    mod.set_up_logging(
        "WARNING",
        mod.get_installer_log_file_path(mpsd_release_to_test, "install", root_dir),
    )
    mod.install_environment(
        mpsd_release=mpsd_release_to_test,
        package_sets=[package_set_to_test],
        root_dir=root_dir,
        enable_build_cache=False,
    )
    build_log = list(
        (release_base_dir / "logs").glob(
            f"{mpsd_release_to_test}_{microarch}_*_install.log"
        )
    )
    assert len(build_log) == 4

    # test that the removal now works
    # mod.remove_environment(
    #     mpsd_release=mpsd_release_to_test,
    #     package_sets=[package_set_to_test],
    #     root_dir=root_dir,
    # )
    # # ensure that the module files are removed


def test_metadata_logging(tmp_path):
    """Test that metadata is logged and read correctly."""
    # Test that the metadata is logged correctly
    filename = tmp_path / "test-metadata.log"
    print(f"Writing to {filename}")
    mod.set_up_logging(loglevel="debug", file_path=filename)

    # our test data
    keys = ["important_key", "important_key2"]
    values = ["important_value", "important_value2"]

    expected_log_entries = []
    for key, value in zip(keys, values):
        mod.log_metadata(key, value)
        open_tag = mod.config_vars["metadata_tag_open"]
        close_tag = mod.config_vars["metadata_tag_close"]
        expected_log = f"{open_tag}{key}:{value}{close_tag}"
        expected_log_entries.append(expected_log)
        logging.info(f"Add some other info (after adding {key=})")
        logging.debug("Add some other info")
        logging.warning("Add some other info")

    # Check that relevant lines show up in the log file somewhere
    with open(filename, "r") as f:
        logfile_content = f.read()
        for expected_log in expected_log_entries:
            assert expected_log in logfile_content

    # Test that the metadata is read correctly using our parser
    read_dict = mod.read_metadata_from_logfile(tmp_path / "test-metadata.log")

    # check all entries are in the file
    for key, value in zip(keys, values):
        read_dict[key] == value

    # check no additional entries are there
    assert len(read_dict) == len(keys)


def test_get_available_package_sets():
    """
    Test that available package_sets are reported correctly.

    Needs internet access to succeed.
    """
    package_sets = mod.get_available_package_sets("dev-23a")
    assert sorted(package_sets) == sorted(
        [
            "foss2021a-cuda-mpi",
            "foss2021a-mpi",
            "foss2021a-serial",
            "foss2022a-cuda-mpi",
            "foss2022a-mpi",
            "foss2022a-serial",
            "global",
            "global_generic",
        ]
    )


def test_create_log_file_names():
    """Test that the log file names are created correctly."""
    create_log_file_names = mod.create_log_file_names
    mpsd_release = "dev-23a"
    microarch = "sandybridge"
    date = datetime.datetime.now().replace(microsecond=0).isoformat()
    action = "install"
    package_set = "foss2021a"
    # test build_log_file_name  generation
    build_log_file_name = create_log_file_names(
        microarch=microarch,
        mpsd_release=mpsd_release,
        date=date,
        action=action,
        package_set=package_set,
    )
    assert (
        build_log_file_name
        == f"{mpsd_release}_{microarch}_{date}_BUILD_{package_set}_{action}.log"
    )
    installer_log_file_name = create_log_file_names(
        microarch=microarch,
        mpsd_release=mpsd_release,
        date=date,
        action=action,
    )
    assert (
        installer_log_file_name
        == f"{mpsd_release}_{microarch}_{date}_APEX_{action}.log"
    )
    # test no build log file for incorrect action
    build_log_file_name = create_log_file_names(
        microarch=microarch,
        mpsd_release=mpsd_release,
        date=date,
        action="status",
        package_set=package_set,
    )
    assert build_log_file_name is None


def create_fake_environment(tmp_path, mpsd_release, expected_toolchain_map=None):
    """Create a fake environment with toolchains for testing."""
    if not expected_toolchain_map:
        test_microarch = mod.get_native_microarchitecture()
        expected_toolchain_map = {test_microarch: ["foss2021a", "intel2021a"]}

    for microarch in expected_toolchain_map.keys():
        toolchain_lmod_folder = (
            tmp_path / mpsd_release / microarch / "lmod" / "Core" / "toolchains"
        )
        toolchain_lmod_folder.mkdir(parents=True, exist_ok=True)
        spack_folder = tmp_path / mpsd_release / microarch / "spack"
        spack_folder.mkdir(parents=True, exist_ok=True)
        logs_folder = tmp_path / mpsd_release / microarch / "logs"
        logs_folder.mkdir(parents=True, exist_ok=True)
        for toolchain in expected_toolchain_map[microarch]:
            toolchain_lua_file = toolchain_lmod_folder / f"{toolchain}.lua"
            toolchain_lua_file.touch()

    return expected_toolchain_map


def test_environment_status(tmp_path):
    """Test that the environment status is correct."""
    toolchain_map = mod.environment_status("fake-release", tmp_path)
    assert toolchain_map is None
    mpsd_release = "dev-23a"
    expected_toolchain_map = create_fake_environment(tmp_path, mpsd_release)
    # check that the environment statuxis is correct
    toolchain_map = mod.environment_status(mpsd_release, tmp_path)
    # convert each list to a set to ensure that the order doesn't matter
    for microarch in expected_toolchain_map.keys():
        assert set(toolchain_map[microarch]) == set(expected_toolchain_map[microarch])


@pytest.mark.skip(reason="not implemented yet")
def test_remove_environment(tmp_path):
    """Test that the remove_environment works as expected."""
    mpsd_release = "dev-23a"
    # create a fake environment
    create_fake_environment(tmp_path, mpsd_release)
    # check that the environment status is correct
    toolchain_map = mod.environment_status(mpsd_release, tmp_path)
    assert toolchain_map is not None

    # test removal  without arguments (should sys.exit(1))
    create_fake_environment(tmp_path, mpsd_release)
    with pytest.raises(SystemExit):
        mod.remove_environment(mpsd_release, tmp_path, force_remove=True)

    # test removal of the complete environment
    mod.remove_environment(mpsd_release, tmp_path, ["ALL"], force_remove=True)
    toolchain_map = mod.environment_status(mpsd_release, tmp_path)
    assert toolchain_map is None
    # ensure that logs folder remains
    logs_folder = tmp_path / mpsd_release / "logs"
    assert logs_folder.exists()

    # test removal of a single toolchain
    # done in test_install_environment_zlib


def test_initialize_environment(tmp_path):
    """Test that init_file is created as expected."""
    # test that the init file is created as expected
    mod.initialize_environment(tmp_path)
    init_file = tmp_path / mod.config_vars["init_file"]

    assert init_file.exists()
    # ensure "Initialising MPSD software ..." is in the log file
    log_file = tmp_path / mod.config_vars["cmd_log_file"]
    with open(log_file, "r") as f:
        assert (f"Initialising MPSD software instance at {tmp_path}") in f.read()

    # test that calling again results in warning and exit code 1
    with pytest.raises(SystemExit) as pytest_wrapped_e:
        mod.initialize_environment(tmp_path)
    assert pytest_wrapped_e.type == SystemExit
    assert pytest_wrapped_e.value.code == 1


def test_get_root_dir(tmp_path):
    """Test that the root directory is correct."""
    with mod.os_chdir(tmp_path):
        # test that  function exists with error 1 if root dir doesn't exist
        with pytest.raises(SystemExit) as pytest_wrapped_e:
            mod.get_root_dir()
        assert pytest_wrapped_e.type == SystemExit
        assert pytest_wrapped_e.value.code == 1

        # test that initialize_environment creates the root dir
        mod.initialize_environment(tmp_path)
        root_dir = mod.get_root_dir()
        assert root_dir == tmp_path

        # test that root_dir from paret is detected correctly
        sub_dir = tmp_path / "sub_dir"
        sub_dir.mkdir()
        with mod.os_chdir(sub_dir):
            root_dir = mod.get_root_dir()
            assert root_dir == tmp_path


def test_interface(tmp_path):
    """Test other things (not implemented yet)."""
    pass
    # ensure that installing without package_sets only passes the available package_sets
    # check that the script branch and hash are correct when running the script
    # check that the help message is printed when no arguments are provided
    # check that the help message is printed when -h is provided
    # check that the error messages are also logged to the log file


# other tests to add (ideally)
# - get_native_microarchitecture()