convert.py

#!/usr/local/bin/python3
""" a simple script that converts ilias exam output to readable json

The json output will look like this:
{
    "max.mustermann": { <<--- OR all uppercase letter of the name + username/matrikel_no
        "matrikel_no": "12345678",
        "name": "Mustermann, Max",
        "task_list": {
            "[task_id_1]": "print Hello World!",
            ....,
            "[task_id_n]": "#include <stdio.h> etc."
        }
    },
    ... ans so on
}

usage: convert.py [-h] [-u USERNAMES] [-n NUMBER_OF_TASKS] INFILE OUTFILE

positional arguments:
  INFILE                Ilias exam data
  OUTFILE               Where to write the final file

optional arguments:
  -h, --help            show this help message and exit
  -u USERNAMES, --usernames USERNAMES
                        a json dict matno -> email
  -n NUMBER_OF_TASKS, --NUMBER_OF_TASKS NUMBER_OF_TASKS
                        Where to write the final file


Author: Jan Maximilian Michal
Date: 30 March 2017
"""

import argparse
import json
import os
import re
import urllib.parse
from collections import defaultdict, namedtuple

from xlrd import open_workbook

parser = argparse.ArgumentParser()
parser.add_argument('INFILE', help='Ilias exam data')
parser.add_argument('OUTFILE', help='Where to write the final file')
parser.add_argument('-u', '--usernames', help='a json dict matno -> email')
parser.add_argument(
    '-n', '--NUMBER_OF_TASKS',
    default=0,  # don't check
    metavar='NUMBER_OF_TASKS',
    type=int,
    help='Where to write the final file')


# one user has one submission (code) per task
# yes, I know it is possible to name match groups via (?P<name>) but
# I like this solution better since it gets the job done nicely
user_head = namedtuple('user_head', 'kohorte, name')
user_head_re = re.compile(
    r'^Ergebnisse von Testdurchlauf (?P<kohorte>\d+) für (?P<name>[\w\s\.,-]+)$')

# one task has a title and id and hpfly code
task_head_re = re.compile(r'^Quellcode Frage(?P<title>.*) \d{8}$')

# nor parsing the weird mat no
matno_re = re.compile(r'^(?P<matrikel_no>\d{8})-(\d{3})-(\d{3})$')


def converter(infile, usernames=None, number_of_tasks=0,):

    # Modify these iterators in order to change extraction behaviour

    def sheet_iter_meta(sheet):
        """ yield first and second col entry as tuple of (name, matnr) """
        for row in (sheet.row(i) for i in range(1, sheet.nrows)):
            m = re.search(matno_re, row[1].value)
            yield row[0].value, m.group('matrikel_no') if m else row[1].value

    def sheet_iter_data(sheet):
        """ yields all rows that are not of empty type as one string """
        for row in (sheet.row(i) for i in range(sheet.nrows)):
            if any(map(lambda c: c.ctype, row)):
                yield ''.join(c.value for c in row)

    # meta sheet contains ilias evaluation names usernames etc - data contains code
    meta, *data = open_workbook(infile, open(os.devnull, 'w')).sheets()

    # nice!
    name2mat = dict(sheet_iter_meta(meta))
    assert meta.nrows - 1 == len(name2mat), f'{meta.nrows} != {len(name2mat)}'

    # from xls to lists and namedtuples
    # [ [user0, task0_h, code0, ..., taskn, coden ], ..., [...] ]
    root = []
    for sheet in data:
        for row in sheet_iter_data(sheet):
            user = re.search(user_head_re, row)
            task = re.search(task_head_re, row)
            if user:
                root.append([user_head(*user.groups())])
            elif task:
                root[-1].append(task.group('title'))
            else:  # should be code
                root[-1].append(urllib.parse.unquote(row).strip())

    if number_of_tasks:
        for (user, *task_list) in sorted(root, key=lambda u: u[0].name):
            assert len(task_list) == number_of_tasks * 2

    mat_to_email = defaultdict(str)
    if usernames:
        with open(usernames) as data:
            mat_to_email.update(json.JSONDecoder().decode(data.read()))

    def get_username(user):
        if name2mat[user.name] in mat_to_email:
            return mat_to_email[name2mat[user.name]].split('@')[0]
        return ''.join(filter(str.isupper, user.name)) + name2mat[user.name]

    usernames = {user.name: get_username(user) for (user, *_) in root}

    # form list to json_like via comprehension
    # the format {userinitials + matrikel_no : {name:, matrikel_no:, tasklist: {id:, ..., id:}}}
    return {
        usernames[user.name]: {
            'name': user.name,
            'email': mat_to_email[name2mat[user.name]],
            'matrikel_no': name2mat[user.name],
            'submissions': [
                {
                    "type": task,
                    "code": code,
                    "tests": {},
                } for task, code in zip(task_list[::2], task_list[1::2])
            ]
        } for (user, *task_list) in sorted(root, key=lambda u: u[0].name)
    }


def write_to_file(json_dict, outfile):
    # just encode python style
    with open(outfile, "w") as out:
        out.write(json.JSONEncoder().encode(json_dict))

    print(f"Wrote data to {outfile}. Done.")


def main():
    args = parser.parse_args()
    json_dict = converter(args.INFILE, args.usernames, args.NUMBER_OF_TASKS)
    write_to_file(json_dict, args.OUTFILE)


if __name__ == '__main__':
    SCRIPT = True
    main()