#!/usr/local/bin/python3 """ a simple script that converts ilias exam output to readable json The json output will look like this: { "max.mustermann": { <<--- OR all uppercase letter of the name + username/matrikel_no # noqa: E501 "matrikel_no": "12345678", "name": "Mustermann, Max", "task_list": { "[task_id_1]": "print Hello World!", ...., "[task_id_n]": "#include <stdio.h> etc." } }, ... ans so on } usage: convert.py [-h] [-u USERNAMES] [-n NUMBER_OF_TASKS] INFILE OUTFILE positional arguments: INFILE Ilias exam data OUTFILE Where to write the final file optional arguments: -h, --help show this help message and exit -u USERNAMES, --usernames USERNAMES a json dict matno -> email -n NUMBER_OF_TASKS, --NUMBER_OF_TASKS NUMBER_OF_TASKS Where to write the final file Author: Jan Maximilian Michal Date: 30 March 2017 """ import argparse import json import os import re import urllib.parse from collections import defaultdict, namedtuple from xlrd import open_workbook parser = argparse.ArgumentParser() parser.add_argument('INFILE', help='Ilias exam data') parser.add_argument('OUTFILE', help='Where to write the final file') parser.add_argument('-u', '--usernames', help='a json dict matno -> email') parser.add_argument( '-n', '--NUMBER_OF_TASKS', default=0, # don't check metavar='NUMBER_OF_TASKS', type=int, help='Where to write the final file') # one user has one submission (code) per task # yes, I know it is possible to name match groups via (?P<name>) but # I like this solution better since it gets the job done nicely user_head = namedtuple('user_head', 'kohorte, name') user_head_re = re.compile(r'^Ergebnisse von Testdurchlauf ' '(?P<kohorte>\d+) für (?P<name>[\w\s\.,-]+)$') # one task has a title and id and hpfly code task_head_re = re.compile(r'^Quellcode Frage(?P<title>.*) \d{8}$') # nor parsing the weird mat no matno_re = re.compile(r'^(?P<matrikel_no>\d{8})-(\d{3})-(\d{3})$') def converter(infile, usernames=None, number_of_tasks=0,): # Modify these iterators in order to change extraction behaviour def sheet_iter_meta(sheet): """ yield first and second col entry as tuple of (name, matnr) """ for row in (sheet.row(i) for i in range(1, sheet.nrows)): m = re.search(matno_re, row[1].value) yield row[0].value, m.group('matrikel_no') if m else row[1].value def sheet_iter_data(sheet): """ yields all rows that are not of empty type as one string """ for row in (sheet.row(i) for i in range(sheet.nrows)): if any(map(lambda c: c.ctype, row)): yield ''.join(c.value for c in row) # meta sheet contains ilias evaluation names usernames etc - data contains # code meta, *data = open_workbook(infile, open(os.devnull, 'w')).sheets() # nice! name2mat = dict(sheet_iter_meta(meta)) assert meta.nrows - 1 == len(name2mat), f'{meta.nrows} != {len(name2mat)}' # from xls to lists and namedtuples # [ [user0, task0_h, code0, ..., taskn, coden ], ..., [...] ] root = [] for sheet in data: for row in sheet_iter_data(sheet): user = re.search(user_head_re, row) task = re.search(task_head_re, row) if user: root.append([user_head(*user.groups())]) elif task: root[-1].append(task.group('title')) else: # should be code root[-1].append(urllib.parse.unquote(row).strip()) if number_of_tasks: for (user, *task_list) in sorted(root, key=lambda u: u[0].name): assert len(task_list) == number_of_tasks * 2 mat_to_email = defaultdict(str) if usernames: with open(usernames) as data: mat_to_email.update(json.JSONDecoder().decode(data.read())) def get_username(user): if name2mat[user.name] in mat_to_email: return mat_to_email[name2mat[user.name]].split('@')[0] return ''.join(filter(str.isupper, user.name)) + name2mat[user.name] usernames = {user.name: get_username(user) for (user, *_) in root} # form list to json_like via comprehension # the format {userinitials + matrikel_no : {name:, matrikel_no:, tasklist: # {id:, ..., id:}}} return { usernames[user.name]: { 'name': user.name, 'email': mat_to_email[name2mat[user.name]], 'matrikel_no': name2mat[user.name], 'submissions': [ { "type": task, "code": code, "tests": {}, } for task, code in zip(task_list[::2], task_list[1::2]) ] } for (user, *task_list) in sorted(root, key=lambda u: u[0].name) } def write_to_file(json_dict, outfile): # just encode python style with open(outfile, "w") as out: out.write(json.JSONEncoder().encode(json_dict)) print(f"Wrote data to {outfile}. Done.") def main(): args = parser.parse_args() json_dict = converter(args.INFILE, args.usernames, args.NUMBER_OF_TASKS) write_to_file(json_dict, args.OUTFILE) if __name__ == '__main__': SCRIPT = True main()