-
Jan Maximilian Michal authoredJan Maximilian Michal authored
convert.py 5.20 KiB
#!/usr/local/bin/python3
""" a simple script that converts ilias exam output to readable json
The json output will look like this:
{
"max.mustermann": { <<--- OR all uppercase letter of the name + username/matrikel_no
"matrikel_no": "12345678",
"name": "Mustermann, Max",
"task_list": {
"[task_id_1]": "print Hello World!",
....,
"[task_id_n]": "#include <stdio.h> etc."
}
},
... ans so on
}
usage: convert.py [-h] [-u USERNAMES] [-n NUMBER_OF_TASKS] INFILE OUTFILE
positional arguments:
INFILE Ilias exam data
OUTFILE Where to write the final file
optional arguments:
-h, --help show this help message and exit
-u USERNAMES, --usernames USERNAMES
a json dict matno -> email
-n NUMBER_OF_TASKS, --NUMBER_OF_TASKS NUMBER_OF_TASKS
Where to write the final file
Author: Jan Maximilian Michal
Date: 30 March 2017
"""
import argparse
import json
import os
import re
import urllib.parse
from collections import defaultdict, namedtuple
from xlrd import open_workbook
parser = argparse.ArgumentParser()
parser.add_argument('INFILE', help='Ilias exam data')
parser.add_argument('OUTFILE', help='Where to write the final file')
parser.add_argument('-u', '--usernames', help='a json dict matno -> email')
parser.add_argument(
'-n', '--NUMBER_OF_TASKS',
default=0, # don't check
metavar='NUMBER_OF_TASKS',
type=int,
help='Where to write the final file')
# one user has one submission (code) per task
# yes, I know it is possible to name match groups via (?P<name>) but
# I like this solution better since it gets the job done nicely
user_head = namedtuple('user_head', 'kohorte, name')
user_head_re = re.compile(
r'^Ergebnisse von Testdurchlauf (?P<kohorte>\d+) für (?P<name>[\w\s\.,-]+)$')
# one task has a title and id and hpfly code
task_head_re = re.compile(r'^Quellcode Frage(?P<title>.*) \d{8}$')
# nor parsing the weird mat no
matno_re = re.compile(r'^(?P<matrikel_no>\d{8})-(\d{3})-(\d{3})$')
def converter(infile, usernames=None, number_of_tasks=0,):
# Modify these iterators in order to change extraction behaviour
def sheet_iter_meta(sheet):
""" yield first and second col entry as tuple of (name, matnr) """
for row in (sheet.row(i) for i in range(1, sheet.nrows)):
m = re.search(matno_re, row[1].value)
yield row[0].value, m.group('matrikel_no') if m else row[1].value
def sheet_iter_data(sheet):
""" yields all rows that are not of empty type as one string """
for row in (sheet.row(i) for i in range(sheet.nrows)):
if any(map(lambda c: c.ctype, row)):
yield ''.join(c.value for c in row)
# meta sheet contains ilias evaluation names usernames etc - data contains code
meta, *data = open_workbook(infile, open(os.devnull, 'w')).sheets()
# nice!
name2mat = dict(sheet_iter_meta(meta))
assert meta.nrows - 1 == len(name2mat), f'{meta.nrows} != {len(name2mat)}'
# from xls to lists and namedtuples
# [ [user0, task0_h, code0, ..., taskn, coden ], ..., [...] ]
root = []
for sheet in data:
for row in sheet_iter_data(sheet):
user = re.search(user_head_re, row)
task = re.search(task_head_re, row)
if user:
root.append([user_head(*user.groups())])
elif task:
root[-1].append(task.group('title'))
else: # should be code
root[-1].append(urllib.parse.unquote(row).strip())
if number_of_tasks:
for (user, *task_list) in sorted(root, key=lambda u: u[0].name):
assert len(task_list) == number_of_tasks * 2
mat_to_email = defaultdict(str)
if usernames:
with open(usernames) as data:
mat_to_email.update(json.JSONDecoder().decode(data.read()))
def get_username(user):
if name2mat[user.name] in mat_to_email:
return mat_to_email[name2mat[user.name]].split('@')[0]
return ''.join(filter(str.isupper, user.name)) + name2mat[user.name]
usernames = {user.name: get_username(user) for (user, *_) in root}
# form list to json_like via comprehension
# the format {userinitials + matrikel_no : {name:, matrikel_no:, tasklist: {id:, ..., id:}}}
return {
usernames[user.name]: {
'name': user.name,
'email': mat_to_email[name2mat[user.name]],
'matrikel_no': name2mat[user.name],
'submissions': [
{
"type": task,
"code": code,
"tests": {},
} for task, code in zip(task_list[::2], task_list[1::2])
]
} for (user, *task_list) in sorted(root, key=lambda u: u[0].name)
}
def write_to_file(json_dict, outfile):
# just encode python style
with open(outfile, "w") as out:
out.write(json.JSONEncoder().encode(json_dict))
print(f"Wrote data to {outfile}. Done.")
def main():
args = parser.parse_args()
json_dict = converter(args.INFILE, args.usernames, args.NUMBER_OF_TASKS)
write_to_file(json_dict, args.OUTFILE)
if __name__ == '__main__':
SCRIPT = True
main()