Commit 9198775f authored by Igor Merkulow's avatar Igor Merkulow
Browse files

WIP: spec2schema, added more tests, disabled E203 warning as wrong

parent fea9781b
......@@ -33,6 +33,8 @@ pep8:
options:
max-line-length: 79
single-line-if-stmt: n
disable:
- E203 # https://github.com/ambv/black#slices
enable:
- E101
- E111
......@@ -51,7 +53,6 @@ pep8:
- E128
- E201
- E202
- E203
- E211
- E221
- E222
......
......@@ -34,16 +34,16 @@ Table of constraints:
|Abbreviation|Constraint|Explanation|
| --- | --- | --- |
|STR|"[a-zA-Z0-9_]{3, 45}"|string is limited to upper- and lower-case ASCII characters, numbers, and underscores. It can be between 3 and 45 characters long.|
|STR-EXT|"[a-zA-Z0-9_ \(\)@\.,\-]{3, 45}"|string can also contain whitespace and other characters (e.g. punctuation, '@' or parentheses). Length still between 3 and 45 characters.|
|INT-POS|">0"|value is strictly greater than zero.|
|INT-POS0|">=0"|value is zero or greater.|
|INT-TS|">=1000000000"|value is an UNIX timestamp, representing the number of seconds since 01.01.1970, minimum value is 1,000,000,000 (was on 09.09.2001, so we should get larger values). If this constraint is not met, there are probably other issues on that machine.|
|INT-01|"[01]"|value is either 0 or 1|
|FLOAT-POS0|">=0.0"|floating point value, greater then or equal 0.0|
|FLOAT-PERCENT|">=0.0","<=1.0"|floating point value, between 0 and 1 (both incl.)|
|MEM-PR|">=10485760", "<=10485760*1048576"|Plausibility check for memory amounts - default is [10MB, 10TB]|
|SAMPL|"[0-9]{1,5}[hms]"|sampling interval in the form "1s" or "24h" etc.|
|STR|regex='[a-zA-Z0-9_]{3, 45}'|string is limited to upper- and lower-case ASCII characters, numbers, and underscores. It can be between 3 and 45 characters long.|
|STR-EXT|regex=r'[a-zA-Z0-9_ \(\)@\.,\-]{3, 45}'|string can also contain whitespace and other characters (e.g. punctuation, '@' or parentheses). Length still between 3 and 45 characters.|
|INT-POS|min=1|value is strictly greater than zero.|
|INT-POS0|min=0|value is zero or greater.|
|INT-TS|min=1000000000|value is an UNIX timestamp, representing the number of seconds since 01.01.1970, minimum value is 1,000,000,000 (was on 09.09.2001, so we should get larger values). If this constraint is not met, there are probably other issues on that machine.|
|INT-01|min=0, max=1|value is either 0 or 1|
|FLOAT-POS0|min=0.0|floating point value, greater then or equal 0.0|
|FLOAT-PERCENT|min=0.0, max=1.0|floating point value, between 0 and 1 (both incl.)|
|MEM-PR|min=10485760, max=10995116277760|Plausibility check for memory amounts - default is [10MB, 10TB]|
|SAMPL|regex='[0-9]{1,5}[hms]'|sampling interval in the form "1s" or "24h" etc.|
Used unit abbreviations:
......
......@@ -7,13 +7,25 @@ from spectools.spec2schema import col2list
from spectools.spec2schema import cols2dict
from spectools.spec2schema import entry2var
from spectools.spec2schema import char2bool
from spectools.spec2schema import constr2dict
from spectools.spec2schema import resolve_constraints
INFILE = "docs/spec-metrics2.md"
JSONFILE = "out/tables.json"
VERBOSE = False
def cond_print(data, verbose=VERBOSE):
if verbose:
if isinstance(data, str):
print(data)
else:
pp(data)
if __name__ == "__main__":
# ------ parsing --------
# ------ parsing specification --------
with open(INFILE, "r") as f:
DATA = f.readlines()
SPP = SpecParser(DATA)
......@@ -21,36 +33,44 @@ if __name__ == "__main__":
with open(JSONFILE, "w") as f:
json.dump(TABLES, f)
# ------ converting to schema -------
# ------ interpreting tables -------
with open(JSONFILE, "r") as f:
DATA = json.load(f)
pp(DATA.keys())
cond_print(DATA.keys())
TNAMES = col2list(DATA["table0"], "JSON name")
pp(TNAMES)
cond_print(TNAMES)
CONSTRAINTS = cols2dict(DATA["table1"], "Abbreviation", "Constraint")
pp(CONSTRAINTS)
cond_print(CONSTRAINTS, True)
for constr in CONSTRAINTS:
CONSTRAINTS[constr] = constr2dict(constr, CONSTRAINTS[constr])
cond_print(CONSTRAINTS, True)
with open("out/constraints.json", "w") as f:
json.dump(CONSTRAINTS, f)
UNITS = cols2dict(DATA["table2"], "Abbreviation", "Unit")
pp(UNITS)
cond_print(UNITS)
METRICS = {TNAMES[pos]: DATA[key] for pos, key in enumerate([*DATA][3:])}
METRICS2 = dict()
# -------- generating schema -----------
SCHEMA = dict()
for tname in METRICS:
table = dict()
for entry in METRICS[tname]:
key, val = entry2var(METRICS[tname][entry])
current = METRICS[tname][entry]
key, val = entry2var(current)
if "Required" in val:
val["Required"] = char2bool(val["Required"])
if "Unit" in current:
val["Unit"] = UNITS.get(val["Unit"], None)
if "Constraint" in val:
val = resolve_constraints(val, mapping=CONSTRAINTS)
# TODO: remove "label/caption"
# TODO: remove "unit"
# TODO: convert "required" to lower case
# TODO: convert "data type" to "type"
if key not in table:
table[key] = val
else:
print("Duplicate key: {}".format(key))
METRICS2[tname] = table
cond_print("Duplicate key: {}".format(key))
SCHEMA[tname] = table
for tname in METRICS:
for entry in METRICS[tname]:
if "Required" in METRICS[tname][entry]:
METRICS[tname][entry]["Required"] = char2bool(
METRICS[tname][entry]["Required"]
)
if "Unit" in METRICS[tname][entry]:
METRICS[tname][entry]["Unit"] = UNITS.get(
METRICS[tname][entry]["Unit"], None
)
pp(METRICS2)
cond_print(SCHEMA, True)
......@@ -34,36 +34,76 @@ def col2list(table, col):
return None
if not isinstance(col, str):
return None
return [
table[i][col] for i in table if table[i].get(col, None) is not None
]
return [table[i][col] for i in table if col in table[i]]
def entry2var(entry, varkey="Report metric"):
"""Extract metric name as the new key."""
if not isinstance(entry, dict):
return None
if entry.get(varkey, None) is None:
if not isinstance(varkey, str):
return None
if varkey not in entry:
return None
varname = entry[varkey]
del entry[varkey]
return (varname, entry)
# def add_constraints(entry, constraint):
def constr2dict(name, constraint):
"""Split constraint string into parts and return as a dict."""
if constraint.strip().startswith("regex"):
return {"regex": constraint[constraint.find("=") + 1 :]}
res = dict()
parts = constraint.split(",")
for part in parts:
tmp = part.split("=")
if name.startswith("INT"):
res[tmp[0].strip()] = int(tmp[1].strip())
elif name.startswith("FLOAT"):
res[tmp[0].strip()] = float(tmp[1].strip())
elif name.startswith("STR") or name.startswith("SAMPL"):
res[tmp[0].strip()] = tmp[1].strip()
elif name.startswith("MEM"):
res[tmp[0].strip()] = int(tmp[1].strip())
else:
print("Constraint was not recognized ...")
return res
def resolve_constraints(entry, key="Constraint", mapping=None):
if mapping is None:
return None
if key not in entry:
return None
constraint = entry[key]
if "Range" not in constraint:
val = mapping[constraint]
entry.update(val)
else:
tmp = constraint.split("[")[1].split("]")[0].split(",")
entry["min"] = tmp[0]
entry["max"] = tmp[1]
del entry[key]
return entry
# def resolve_constraints(entry, constr_key="Constraint", constr_dict=None):
# """
# Convert constraint strings into rules.
#
# Arguments
# ---------
# entry -- dict containing the data for one variable
# constraint -- constraint string as defined in the specification
#
# constr_key -- name of the constraint keyword in the dict
# Returns
# -------
# modified entry
#
# """
# constraint = entry.get(constr_key, None)
# if "INT-POS0" in constraint:
# entry["min"] = 0
# elif "INT-POS" in constraint:
......@@ -97,7 +137,7 @@ def entry2var(entry, varkey="Report metric"):
# entry["max"] = tmp[1]
# else:
# print("WARNING: unknown constraint {}...".format(constraint))
#
# return entry
......
......@@ -6,11 +6,21 @@ from spectools.specparser import SpecParser
from spectools.spec2schema import char2bool
from spectools.spec2schema import cols2dict
from spectools.spec2schema import col2list
from spectools.spec2schema import entry2var
from spectools.spec2schema import constr2dict
SNAME = "docs/spec-metrics.md"
SNAME2 = "docs/spec-metrics2.md"
TBL = dict()
TBL["test"] = {"key": "val", "key2": "val2"}
TBL = {"test": {"key": "val", "key2": "val2"}}
KEY_OK = "test"
KEY_WRONG = "hello"
CONSTR_REGEX = "regex=r'[a-z]{3, 5}'"
NAME_STR = "STR"
CONSTR_INT = "min=10"
NAME_INT = "INT"
NAME_FLOAT = "FLOAT"
CONSTR_STR = "minlength=3"
NAME_MEM = "MEM"
class TestSpecParser:
......@@ -42,17 +52,21 @@ class TestSpecParser:
class TestSpec2Schema:
@staticmethod
def test_char2bool_success_true():
assert char2bool("y") is True
assert char2bool("Y") is True
assert char2bool("t") is True
assert char2bool("T") is True
assert (
char2bool("y")
and char2bool("Y")
and char2bool("t")
and char2bool("T")
) is True
@staticmethod
def test_char2bool_success_false():
assert char2bool("n") is False
assert char2bool("N") is False
assert char2bool("f") is False
assert char2bool("F") is False
assert (
not char2bool("n")
and not char2bool("N")
and not char2bool("f")
and not char2bool("F")
) is True
@staticmethod
def test_char2bool_not_string():
......@@ -97,3 +111,59 @@ class TestSpec2Schema:
@staticmethod
def test_col2list_no_str():
assert col2list(TBL, None) is None
class TestSpec2Schema2:
@staticmethod
def test_entry2var_success():
assert entry2var(TBL, varkey=KEY_OK) is not None
@staticmethod
def test_entry2var_not_dict():
assert entry2var(SNAME, varkey=KEY_OK) is None
@staticmethod
def test_entry2var_varkey_not_str():
assert entry2var(TBL, varkey=TBL) is None
@staticmethod
def test_entry2var_varkey_missing():
assert entry2var(TBL, varkey=KEY_WRONG) is None
@staticmethod
def test_entry2var_dict_none():
assert entry2var(None, varkey=KEY_OK) is None
@staticmethod
def test_entry2var_varkey_none():
assert entry2var(TBL, varkey=None) is None
@staticmethod
def test_constr2dict_success_regex():
res = constr2dict(NAME_STR, CONSTR_REGEX)
assert res is not None and isinstance(res, dict)
@staticmethod
def test_constr2dict_success_str():
res = constr2dict(NAME_STR, CONSTR_STR)
assert res is not None and isinstance(res, dict)
@staticmethod
def test_constr2dict_success_int():
res = constr2dict(NAME_INT, CONSTR_INT)
assert res is not None and isinstance(res, dict)
@staticmethod
def test_constr2dict_success_float():
res = constr2dict(NAME_FLOAT, CONSTR_INT)
assert res is not None and isinstance(res, dict)
@staticmethod
def test_constr2dict_success_mem():
res = constr2dict(NAME_MEM, CONSTR_INT)
assert res is not None and isinstance(res, dict)
@staticmethod
def test_constr2dict_success_wrong_name():
res = constr2dict(KEY_OK, CONSTR_INT)
assert res is not None and isinstance(res, dict)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment