From 603ced96e5fe83bb357fcb1670c98285cf26984f Mon Sep 17 00:00:00 2001 From: "l.schiebel" <l.schiebel@stud.uni-goettingen.de> Date: Mon, 9 Dec 2024 17:33:14 +0100 Subject: [PATCH] Rewrote energy parsing routine for Molpro, now parses .xml instead of .out-files. Also added a version check for python to ensure no issues with format strings occur. --- .gitignore | 1 + anchor.py | 23 ++++++++++---- evaluation.py | 10 +++++-- lib/file_ops.py | 80 ++++++++++++++++++++++++++++++++++++------------- 4 files changed, 85 insertions(+), 29 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/anchor.py b/anchor.py index 4a378e2..4fda859 100644 --- a/anchor.py +++ b/anchor.py @@ -1,23 +1,32 @@ +import sys + +###version check before anything else to avoid syntax parsing issues with imports +if sys.version_info < (3, 6): + sys.stderr.write("Error: This script requires Python 3.6 or higher!\n") + sys.exit(1) + + import os import argparse -import sys import generation import evaluation def valid_dir(path): - """Checks if path is points to an existing directory""" + """Checks if path points to an existing directory""" if not os.path.isdir(path): - raise argparse.ArgumentTypeError(f"'{path}' is not a valid path to a directory.") + raise argparse.ArgumentTypeError(path + " is not a valid path to a directory.") return os.path.abspath(path) + def args_parsing(): """Parses the command line arguments passed to anchor.py""" - parser = argparse.ArgumentParser(description="Set of utilities for generating and modifying electronic potential cube data.") - parser.add_argument('routine', + parser = argparse.ArgumentParser(description="Set of utilities for generating and modifying electronic potential cube data.") #descriptor for script + parser.add_argument('routine', #determines which routine is to be performed choices=['rotation', 'generation', 'evaluation', 'limits'], help='The routine to run. See README.') parser.add_argument('working_dir', type=valid_dir, help="Path to directory in which new data is to be generated/old to be evaluated. Must contain directory 'input' as specified in README.") parser.add_argument('-nosub',action='store_true', help="Disables submission of final job array generated for calculation of all cube points.") + args = parser.parse_args() return args @@ -25,8 +34,12 @@ def args_parsing(): def main(): args = args_parsing() + + #Prompts for Hessian routines -> Calls up generation.py if args.routine == "generation" or args.routine=="rotation" or args.routine=="limits": generation.main(destination_path = args.working_dir, nosub_flag = args.nosub, routine=args.routine) + + #Prompt for parsing results of 'generation' -> calls up evaluation.py if args.routine == "evaluation": evaluation.main(argpath=args.working_dir, nosub_flag=args.nosub) return diff --git a/evaluation.py b/evaluation.py index 941d920..288c0d9 100644 --- a/evaluation.py +++ b/evaluation.py @@ -2,6 +2,7 @@ import numpy as np import os import re import sys +import time import lib.file_ops as fileops from lib.config_obj import Cfg_obj from lib.config import config_setup @@ -162,8 +163,12 @@ def main(argpath:str, nosub_flag:bool): sys.exit("Generated job arrays for failed jobs. They have not been submitted because of 'nosub'-flag set by user.\nExiting routine now.") else: for i in jobarraynames: - os.system(f"sbatch {i}") - sys.exit("Submitted failed jobs again. Rerun script once finished.") + returncode = os.system(f"sbatch {i}") + if returncode != 0: + print(f"Failed to submit job array {i}.") + + sys.exit(f"Potential data is incomplete. Exiting program.") + os.chdir(argpath) @@ -172,7 +177,6 @@ def main(argpath:str, nosub_flag:bool): gen_nucvib_inp(energylist=energylist, maindir=argpath, cfg_obj=cfg_obj, cubedata=cubedata, coordlist=coordlist, delta_en=delta_en) - return diff --git a/lib/file_ops.py b/lib/file_ops.py index 7b59b1e..98bf68c 100644 --- a/lib/file_ops.py +++ b/lib/file_ops.py @@ -6,6 +6,15 @@ import re import shutil import numpy as np + +import xml.etree.ElementTree as ET +molpro_xmlnamespaces = { + 'molpro': 'http://www.molpro.net/schema/molpro-output', + 'cml': 'http://www.xml-cml.org/schema', + 'stm': 'http://www.xml-cml.org/schema' +} + + from lib.geometry_obj import Geometry_obj from lib.config_obj import Cfg_obj @@ -99,22 +108,41 @@ def get_gauss_energy(filepath): def get_mol_energy(filepath): """ - Tries to extract energy from {filename}.log in directory {filename}/ relative to calling directory. + Tries to extract energy from {filename}.xml in directory {filename}/ relative to calling directory. Returns returncode = 0 and value = str(energy) if energy could be found. - Returns returncode = 1 and value = str(error flag) if energy could not be located. + Returns returncode = 1 and value = str(error flag) if energy could not be located or error ocurred. """ - returncode, value = command_and_read(f'tail -n 2 {filepath} | grep \"Molpro calculation terminated\" ') #searches for flag for normal termination - if returncode == 0: #if search returns non-error: searches for energy and writes it to list of tuples with filename - returncode, value = command_and_read(f'tail -n 7 {filepath} | grep \"energy=\" {filepath}') - if returncode == 0: - en = value.strip() - value = str(np.round(float(en.split()[-1].replace("D", "e")), decimals=8)) - else: - returncode, value = command_and_read(f'grep \"GLOBAL ERROR\" {filepath}') - if returncode == 0: - returncode = 1 #setting to 1 because successful search for "error termination" means calculation has problem - else: - value = "Unidentified problem. Check if calculation is still running." + returncode = 0 + + try: + tree = ET.parse(filepath) + root = tree.getroot() + + #check for error tags in xml + job = root.find("molpro:job", molpro_xmlnamespaces) + for jobstep in job.findall("molpro:jobstep", molpro_xmlnamespaces): + error_tag = jobstep.find(f"molpro:error", molpro_xmlnamespaces) + if error_tag is not None: + command = jobstep.attrib.get('command', 'Unknown') + + returncode = 1 + value = f"Error in Molpro, jobstep: {command}." + + + if returncode == 0: #if no error is found + energy_property = root.find('.//molpro:property[@name="Energy"]', molpro_xmlnamespaces) + if energy_property is not None: + energy = energy_property.get('value') + + value = str(np.round(float(energy), decimals=8)) + else: + returncode = 1 + value = "Unidentified problem. Check if calculation is still running." + + except ET.ParseError: + returncode = 1 + value = "xml-file could not be processed, may be corrupted or incomplete." + return returncode, value @@ -174,14 +202,14 @@ def check_files(dirname, prog, wait = True, scan_param = ""): if wait == True: if prog == 'molpro': read_func=wait_and_read_mol - extension = ".out" + extension = ".xml" elif prog == 'gaussian': read_func=wait_and_read_gauss extension = ".log" else: if prog == 'molpro': read_func=get_mol_energy - extension = ".out" + extension = ".xml" elif prog == 'gaussian': read_func=get_gauss_energy extension = ".log" @@ -258,7 +286,7 @@ def check_files(dirname, prog, wait = True, scan_param = ""): for en in energylist: file.write(f"{en[0]:<25}\t{en[1]:<}\n") - #printing report of parsing to terminaö + #printing report of parsing to terminal report = "" if len(energies)>0: report+= f"\n{len(energies)} energies were obtained and saved to \"{label}_energies.txt\"" @@ -293,14 +321,18 @@ def cp_equilib_data(h1_diag_flag:bool, cfg_obj:Cfg_obj): equi_dirpath = cwd+f"/Cube/Cubelims/{sysname}_equilib/" if cfg_obj.prog == "gaussian": - suff = ("gjf", "log") + suff = ("gjf", "log") + else: - suff = ("inp", "out") - + suff = ("inp", "out", "xml") + equi_xmlpath = equi_dirpath+f"{sysname}_equilib.{suff[2]}" + source_xml_path = cwd+f"/{sysname}_hess{no}/{sysname}_hess{no}_0_0_0/{sysname}_hess{no}_0_0_0.{suff[2]}" + equi_inpfpath = equi_dirpath+f"{sysname}_equilib.{suff[0]}" equi_outfpath = equi_dirpath+f"{sysname}_equilib.{suff[1]}" source_inpf_path = cwd+f"/{sysname}_hess{no}/{sysname}_hess{no}_0_0_0/{sysname}_hess{no}_0_0_0.{suff[0]}" source_outf_path = cwd+f"/{sysname}_hess{no}/{sysname}_hess{no}_0_0_0/{sysname}_hess{no}_0_0_0.{suff[1]}" + if not os.path.exists(equi_dirpath): #checks if directory exists @@ -315,6 +347,11 @@ def cp_equilib_data(h1_diag_flag:bool, cfg_obj:Cfg_obj): except FileNotFoundError: raise FileNotFoundError(f"Tried to copy equilibrium data from {source_outf_path} to {equi_outfpath}, but file not found.") + try: + shutil.copyfile(source_xml_path, equi_xmlpath) + except FileNotFoundError: + raise FileNotFoundError(f"Tried to copy equilibrium data from {source_xml_path} to {equi_xmlpath}, but file not found.") + return @@ -324,7 +361,8 @@ def get_equi_energy(h1_diag_flag:bool, cfg_obj:Cfg_obj): #print(equi_dirpath) if cfg_obj.prog == "molpro": equi_outfpath = equi_dirpath+f"{cfg_obj.sysname}_equilib.out" - if not os.path.exists(equi_outfpath): + equi_xmlpath = equi_dirpath+f"{cfg_obj.sysname}_equilib.xml" + if not os.path.exists(equi_outfpath) or not os.path.exists(equi_xmlpath): cp_equilib_data(h1_diag_flag=h1_diag_flag, cfg_obj=cfg_obj) returncode, value = get_mol_energy(equi_outfpath) else: -- GitLab