Skip to content
Snippets Groups Projects
hektor.py 3.79 KiB
Newer Older
  • Learn to ignore specific revisions
  • import argparse
    import json
    import re
    import sys
    import zipfile
    
    from lxml import etree
    
    file_regex = re.compile(
        r'(\d+)__(\d+)__(?P<data>results|qti|tst)_(?P<id>\d+).xml')
    task_id_regex = re.compile(r'il_\d+_qst_(?P<task_id>\d+)')
    
    tasks_path = ('./assessment/section/item/itemmetadata/qtimetadata/'
                  'qtimetadatafield/fieldlabel[text()="QUESTIONTYPE"]'
                  '/../../../../..')
    
    users = './tst_active/row'
    solutions = './tst_solutions/row[@question_fi="%s"]'
    
    
    def eat_qti(tree):
        tasks = tree.xpath(tasks_path)[0]
    
        titles = tasks.xpath('./item/@title')
        types = tasks.xpath(
                './item/itemmetadata/qtimetadata/qtimetadatafield/'
                'fieldlabel[text()="QUESTIONTYPE"]/../fieldentry/text()')
        ids = [re.search(task_id_regex, ident).group('task_id')
               for ident in tasks.xpath('./item/@ident')]
        texts = ['\n'.join(flow.xpath('./material/mattext/text()')) 
                 for flow in tasks.xpath('./item/presentation/flow')] 
    
        return {id: {'title': title, 'text': text, 'type': type}
                for id, type, title, text in zip(ids, types, titles, texts)}
    
    
    def eat_users(results_tree):
        return {row.attrib['active_id']: dict(row.attrib)
                for row in results_tree.xpath(users)}
    
    
    def eat_solutions(results_tree, task_id):
        return {row.attrib['active_fi']: row.attrib['value1']
                for row in results_tree.xpath(solutions % task_id)}
    
    
    def eat_results(tree, questions=("17639",)):
        users = eat_users(tree)
        for user in users.values():
            user['submissions'] = {}
        for question in questions:
            solutions = eat_solutions(tree, question)
            for user_id, solution in solutions.items():
                users[user_id]['submissions'][question] = solution
        return users
    
    
    def eat_tst(tree):
        title = tree.xpath('./MetaData/General/Title/text()')
        lecturer = tree.xpath(
            './MetaData/Lifecycle/Contribute[@Role="Author"]/Entity/text()')
        return {'exam': title[0], 'author': lecturer[0]}
    
    
    def eat_archive(archive):
        for match in filter(bool, (re.search(file_regex, name)
                                   for name in archive.NameToInfo)):
            funcname = 'eat_' + match.group('data')
            with archive.open(match.string) as datafile:
                tree = etree.parse(datafile)
                yield match.group('data'), globals()[funcname](tree)
    
    
    def add_meta(base, data):
        base.update(data['tst'])
    
    
    def add_tasks(base, data):
        base['tasks'] = data['qti']
    
    
    ignore_user_fields = ("user_fi",
                          "anonymous_id",
                          "test_fi",
                          "lastindex",
                          "tries",
                          "submitted",
                          "submittimestamp",
                          "tstamp",
                          "user_criteria",)
    
    
    def add_users(base, data):
        for userdata in data['results'].values():
            for field in ignore_user_fields:
                userdata.pop(field)
        base['students'] = data['results']
    
    
    def give_me_structure(data):
        base = {}
    
        add_meta(base, data)
        add_tasks(base, data)
        add_users(base, data)
    
        return base
    
    
    def eat_zipfile(input_file, output):
        with zipfile.ZipFile(input_file) as archive:
            data = dict(eat_archive(archive))
    
        structured_data = give_me_structure(data)
    
        with open(output, 'w', encoding='utf-8') as out:
            json.dump(structured_data, out, indent=2)
    
    
    def parseme():
        parser = argparse.ArgumentParser()
        parser.add_argument(
            'input',
            metavar='FILE',
            help='A ZIP file that contains a qit course')
        parser.add_argument(
            '-o',
            '--output',
            default=sys.stdout,
            metavar='FILE',
            help='Where you want to put the output')
        return parser.parse_args()
    
    
    if __name__ == '__main__':
        args = parseme()
        eat_zipfile(args.input, args.output)