Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • robinwilliam.hundt/hektor
  • j.michal/hektor
2 results
Show changes
Commits on Source (13)
......@@ -10,3 +10,4 @@ deploy.sh
.DS_Store
*.xls
.venv/
testall.sh
image: python:3.4
image: python:3.5
before_script:
- python -V
- pip install -e .
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/pip-cache"
cache:
paths:
- ~/.cache/pip/
- "$CI_PROJECT_DIR/pip-cache"
test:
flake8:
script:
- pip install -e .
- pip install flake8
- flake8 hektor.py bin lib
test:
script:
- hektor -h
.PHONY: dist clean upload tag help
help:
@echo "See Makefile itself for help"
clean:
rm -r hektor.egg-info dist build __pycache__
dist:
pip install -U setuptools pip wheel && \
python setup.py bdist_wheel --universal
upload: dist
twine upload dist/*
tag:
git tag `python setup.py --version`
#!/usr/bin/env python3
import hektor
import sys
import hektor
if __name__ == '__main__':
if sys.version_info < (3, 4):
if sys.version_info < (3, 5):
sys.exit("At least Python 3.4 is required.")
hektor.main()
import argparse
import base64
import functools
import getpass
import json
import logging
import os
from typing import Any, Callable, Dict, Sequence
from typing import Any, Callable, Dict, List, Sequence, Union
from lib import Converter
from xkcdpass import xkcd_password as xp
from lib import Converter
try:
from cryptography.fernet import Fernet
except ImportError:
Fernet = None
# ============================== =- Logging -= ============================== #
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)
def setup_logging():
''' Make the logger globally available by hide intermediate handler,
filters and formatter variables '''
global log
# create console handler and formatter
console = logging.StreamHandler()
console.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(levelname)s] %(message)s')
level = logging.DEBUG if args.verbose else logging.INFO
# add formatter to console handler
console.setFormatter(formatter)
log.addHandler(console)
log = logging.getLogger(__name__)
log.setLevel(level)
# create console handler and formatter
console = logging.StreamHandler()
console.setLevel(level)
formatter = logging.Formatter('[%(levelname)s] %(message)s')
# add formatter to console handler
console.setFormatter(formatter)
log.addHandler(console)
# ============================= =- argparse -= ============================== #
def parseme():
def setup_argparse():
global args
def file_exists(parser, filepath: str) -> str:
if not os.path.isfile(filepath):
parser.error('Not a file %s' % filepath)
return filepath
parser = argparse.ArgumentParser()
# General purpose arguments
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='enable verbose logging (Level: DEBUG)')
# Input output files
parser.add_argument(
'input',
metavar='DATA',
......@@ -42,7 +64,21 @@ def parseme():
'output',
metavar='OUTFILE',
help='destination of converter output (JSON)')
parser.add_argument(
# Post-processor flags
remove_personal = parser.add_mutually_exclusive_group()
remove_personal.add_argument(
'-e', '--encrypt',
action='store_true',
help='''strip all personal information and provide decryption key
(AES 128-bit, CBC mode, PKCS7 for padding, HMAC with SHA-256
for integrity)'''
)
remove_personal.add_argument(
'-d', '--decrypt',
action='store_true',
help='Reverse previous AES encryption.')
remove_personal.add_argument(
'-a', '--anonymous',
action='store_true',
help='replace personal information and create a reversing table')
......@@ -51,71 +87,76 @@ def parseme():
help='where to store personal information (CSV)',
)
parser.add_argument(
'-m', '--meta',
'-m', '--add-meta',
action='store_true',
help='add meta information (lecturer, course title)'
)
parser.add_argument(
'--verify',
action='store_true',
default=True,
help='asserts that output data will be in a certain format'
)
parser.add_argument(
'-r', '--readable-code',
action='store_true',
help='make student code readable by inserting artificial line breaks')
args = parser.parse_args()
if args.anonymous != (args.personal_secret_table is not None):
parser.error('Need an output for anonymous mode')
if (args.decrypt or args.encrypt) and Fernet is None:
parser.error('To use AES encryption, install cryptography via pip')
return args
if args.anonymous != (args.personal_secret_table is not None):
parser.error('Please specify where to write the mapping (see -t)')
# ========================== =- General Purpose -= ========================== #
def compose(*functions: Sequence[Callable]) -> Callable:
""" Standard function composition. Takes a Sequence of functions [f, g, h, ...]
and returns the composite function i(x) = f(g(h(x))). There are no checks
that validate if domain and image of these functions are compatible."""
that validate if domain and image of these functions are compatible. """
return functools.reduce(lambda f,
g: lambda x: f(g(x)),
functions,
lambda x: x)
# ========================== =- Post processors -= ========================== #
def anonymise(structured_data: Dict[str, Any]) -> Dict[str, Any]:
DELIMITER = '-'
wordfile = xp.locate_wordfile()
words = xp.generate_wordlist(wordfile=wordfile,
min_length=7,
max_length=7)
def abort(message='Bye.'):
''' In case anything goes wrong. Basically a dump wrapper around exit '''
log.info(message)
exit(1)
def get_identifier():
return xp.generate_xkcdpassword(words, numwords=2, delimiter=DELIMITER)
students = structured_data.pop('students')
reverser = {get_identifier(): s for s in students.values()}
students_anon = {r: {
'fullname': ' '.join(w[0].capitalize() + w[1:]
for w in r.split(DELIMITER)),
'identifier': r,
'submissions': student['submissions']
} for r, student in zip(reverser, students.values())}
# ========================== =- Post processors -= ========================== #
def student_replacer(processor):
''' A simple decorator that is used to remove students and put them back in
when the preprocessor is dome with them'''
with open(args.personal_secret_table, 'w') as out:
print('key, previous identifier, fullname', file=out)
print('\n'.join(anon + '\t' + '\t'.join(v
for v in data.values()
if type(v) is str)
for anon, data in reverser.items()), file=out)
@functools.wraps(processor)
def processor_closure(structured_data: Dict[str, Any]) -> Dict[str, Any]:
students = structured_data.pop('students')
students_replacement = processor(students)
structured_data['students'] = students_replacement
return structured_data
structured_data.update({'students': students_anon})
return structured_data
return processor_closure
def add_meta_information(structured_data: Dict[str, Any]) -> Dict[str, Any]:
if args.meta:
structured_data['author'] = input('[Q] author: ')
structured_data['exam'] = input('[Q] course title: ')
def do_add_meta(structured_data: Dict[str, Any]) -> Dict[str, Any]:
''' Asks the user for metadata about the exam '''
structured_data['author'] = input('[Q] author: ')
structured_data['exam'] = input('[Q] course title: ')
return structured_data
def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]:
def do_verify(structured_data: Dict[str, Any]) -> Dict[str, Any]:
''' The is the testable specification of the format that is output by
hector. Since multiple formats are compiled into this one verification is
on by default. The impact on performance is neglectable. '''
def assert_submission(submission):
assert 'code' in submission, 'A submission needs code'
assert type(submission['code']) in [str, list], 'Code is readable'
assert 'type' in submission, 'A submission has to be of some type'
assert 'tests' in submission, 'A tests dict has to be present.'
......@@ -124,21 +165,34 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]:
len(student['submissions'])))
assert 'fullname' in student, 'Student needs a name %s' % student
assert 'identifier' in student, 'Student needs a unique identifier'
assert 'username' in student, 'Student needs a unique username'
def base_assert():
assert 'students' in structured_data, 'No students found'
assert 'tasks' in structured_data, 'No tasks found'
def assert_task(task):
assert 'type' in task, 'Task has no type'
assert 'title' in task, 'Task must have a title'
try:
base_assert()
students = structured_data['students'].values()
number_of_submissions = len(structured_data['tasks'])
students = structured_data['students']
tasks = structured_data['tasks']
number_of_submissions = len(tasks)
for task in tasks:
try:
assert_task(task)
except AssertionError as err:
raise err
for student in students:
try:
assert_student(student)
assert number_of_submissions == len(student['submissions']), \
'%s does not have enough submissoins' % student['fullname']
'%s does not have enough submissions' % student['fullname']
for submission in student['submissions']:
try:
......@@ -155,11 +209,131 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]:
return structured_data
post_processors = [
anonymise,
# add_meta_information,
# assert_correct_format
]
@student_replacer
def do_readable_code(students: Dict[str, Union[str, List]]):
for student in students:
for submission in student['submissions']:
submission['code'] = submission['code'].split('\n')
return students
@student_replacer
def do_anonymous(students: Dict[str, Union[str, List]]):
''' Recreates most of the data and includes fields over a whitelist
therefore ensuring that no personal information remains in the data '''
DELIMITER = '-'
wordfile = xp.locate_wordfile()
words = xp.generate_wordlist(wordfile=wordfile,
min_length=7,
max_length=7)
def get_random_xkcd_identifier():
return xp.generate_xkcdpassword(words, numwords=2, delimiter=DELIMITER)
reverser = {get_random_xkcd_identifier(): s for s in students}
students_anonymous = [{
'fullname': ' '.join(w[0].capitalize() + w[1:]
for w in pseudo_identifier.split(DELIMITER)),
'identifier': pseudo_identifier,
'username': pseudo_identifier,
'submissions': student['submissions']
} for pseudo_identifier, student in reverser.items()]
with open(args.personal_secret_table, 'w') as out:
print('key;previous identifier;fullname', file=out)
print('\n'.join('%s;%s;%s' % (anonymous_key,
data['identifier'],
data['fullname'])
for anonymous_key, data in reverser.items()), file=out)
return students_anonymous
@student_replacer
def do_encrypt(students):
# Init Crypto. See the module documentation on what actually happens here,
# then read all about those methods and then go study number theory. Never
# roll your own custom crypto ;-)
key = Fernet.generate_key()
aes = Fernet(key)
def encrypt(clear: str) -> str:
return base64.b64encode(aes.encrypt(clear.encode())).decode('utf-8')
output_the_key_to_the_user(key)
return transform(students, encrypt)
@student_replacer
def do_decrypt(students):
def decrypt(cipher: str) -> str:
return aes.decrypt(base64.b64decode(cipher.encode())).decode('utf-8')
try:
key = getpass.getpass('[Q] Give me the decryption key: ')
aes = Fernet(key)
return transform(students, decrypt)
except Exception as err:
abort('Your key is bad (%s).' % err)
# ======================= =- Post processor helper -= ======================= #
def transform(students, function):
return [
{'fullname': function(student['fullname']),
'identifier': function(student['identifier']),
'username': function(student['username']),
'submissions': student['submissions']} for student in students
]
def output_the_key_to_the_user(key: bytes):
def to_file(filepath: str):
with open(filepath, 'wb') as file:
file.write(key)
log.info('Key written to %s. Keep it safe.', filepath)
def to_stdout():
print('Encrypted and signed. Key this key safe or bad things happen')
print(' --------->> %s <<--------- ' % key.decode('latin-1'))
output = input('[Q] The data has been encrypted. ' +
'Where should I put the key? (stdout) ') or 'stdout'
if output == 'stdout':
to_stdout()
elif not os.path.exists(output):
to_file(output)
elif os.path.isfile(output):
confirm = input('[Q] File exists. Want to override? (Y/n)') or 'y'
if confirm.lower().startswith('y'):
to_file(output)
else:
abort('No data was written. Bye.')
else:
log.error('I cannot write to %s.', output)
abort()
def get_active_postprocessors():
postprocessor_order = (
do_add_meta,
do_verify,
do_readable_code,
do_anonymous,
do_encrypt,
do_decrypt
)
return (p for p in postprocessor_order
if getattr(args, p.__name__.split('do_')[1]))
# ============================== =- Hektor -= =============================== #
......@@ -168,6 +342,8 @@ def _preprocessing(filepath: str) -> str:
def _processing(filepath: str) -> Dict[str, Any]:
''' Find the first apropriate converter and run pass it the path to the
datafile. '''
try:
return next(converter().convert(filepath)
for converter in Converter.implementations()
......@@ -177,21 +353,26 @@ def _processing(filepath: str) -> Dict[str, Any]:
', '.join(f
for c in Converter.implementations()
for f in c.accepted_files))
abort('Program stopped prematurely. No data was written. Bye.')
def _postprocessing(structured_data: Dict[str, Any]) -> Dict[str, Any]:
return compose(*post_processors)(structured_data)
return compose(*get_active_postprocessors())(structured_data)
def main():
global args
args = parseme()
setup_argparse()
setup_logging()
log.debug('Active post processors %s', list(get_active_postprocessors()))
processing = compose(_postprocessing, _processing, _preprocessing)
data = processing(args.input)
destination = args.output.split('.json')[0] + '.json'
with open(destination, 'w') as output:
json.dump(data, output, indent=2, sort_keys=True)
log.info('Wrote exam data to %s', destination)
......
......@@ -3,3 +3,4 @@
from lib.generic import Converter # noqa
from lib.qti import QTIConverter # noqa
from lib.xls import XLSConverter # noqa
from lib.identity import JSONIdentityConverter # noqa
......@@ -7,7 +7,8 @@ def all_subclasses(cls):
class Converter(metaclass=abc.ABCMeta):
""" A base class if we incorporate more converters in the future """
""" A base class if we incorporate more converters in the future. New
implementations need to be registered in this modules __init__.py """
@abc.abstractmethod
def convert(self):
......
import json
import lib.generic
class JSONIdentityConverter(lib.generic.Converter):
""" This serves as an identity if you wish to import a json file
that you generated earlier with hektor and you now want to run a
preprocessor on it. """
accepted_files = ('.json',)
def convert(self, filepath):
with open(filepath) as json_input:
return json.load(json_input)
......@@ -2,13 +2,12 @@ import base64
import re
import zipfile
from lxml import etree
import lib.generic
from lxml import etree
class QTIConverter(lib.generic.Converter):
"""docstring for XLSConverter"""
""" XLSConverter class (Currently raw xml input is not supported) """
accepted_files = ('.zip', '.xml')
......@@ -51,12 +50,11 @@ def process_qti(tree, only_of_type=('assSourceCode',), **kwargs):
def process_users(results_tree):
return {row.attrib['active_id']: dict(row.attrib)
for row in results_tree.xpath(users)}
return [dict(row.attrib) for row in results_tree.xpath(users)]
def convert_code(text):
return base64.b64decode(text).decode('utf-8').split('\n')
return base64.b64decode(text).decode('utf-8')
def process_solutions(results_tree, task_id):
......@@ -67,14 +65,15 @@ def process_solutions(results_tree, task_id):
def process_results(tree, qti=(), **kwargs):
questions = qti
users = process_users(tree)
for user in users.values():
id2user = {user['active_id']: user for user in users}
for user in users:
user['submissions'] = []
for question in questions:
solutions = process_solutions(tree, question)
for question_key, question in questions.items():
solutions = process_solutions(tree, question_key)
for user_id, solution in solutions.items():
users[user_id]['submissions'].append({'type': question,
'code': solution,
'tests': {}})
id2user[user_id]['submissions'].append({'type': question['title'],
'code': solution,
'tests': {}})
return users
......@@ -128,8 +127,9 @@ ignore_user_fields = ("user_fi",
def add_users(base, data):
for userdata in data['results'].values():
for userdata in data['results']:
userdata['identifier'] = userdata['user_fi']
userdata['username'] = userdata['user_fi']
for field in ignore_user_fields:
userdata.pop(field)
base['students'] = data['results']
......
#!/usr/local/bin/python3
""" a simple script that converts ilias exam output to readable json
The json output will look like this:
{
"max.mustermann": { <<--- OR all uppercase letter of the name + username/matrikel_no # noqa: E501
"matrikel_no": "12345678",
"name": "Mustermann, Max",
"task_list": {
"[task_id_1]": "print Hello World!",
....,
"[task_id_n]": "#include <stdio.h> etc."
}
},
... ans so on
}
usage: convert.py [-h] [-u USERNAMES] [-n NUMBER_OF_TASKS] INFILE OUTFILE
positional arguments:
......@@ -39,9 +25,8 @@ import re
import urllib.parse
from collections import defaultdict, namedtuple
from xlrd import open_workbook
import lib.generic
from xlrd import open_workbook
class XLSConverter(lib.generic.Converter):
......@@ -62,24 +47,31 @@ user_t = namedtuple('user_head', 'name matrikel_no')
task_head_re = re.compile(r'^Quellcode Frage (?P<title>.*?) ?(\d{8})?$')
# nor parsing the weird mat no
matno_re = re.compile(r'^(?P<matrikel_no>\d{8})-(\d+)-(\d+)$')
matno_re = re.compile(r'^(?P<matrikel_no>\d+)-(\d+)-(\d+)$')
COLUMNS_BEFORE_TASKS = 19
TABWIDTH = 4
def converter(infile, usernames=None, number_of_tasks=0,):
def converter(infile, usernames=None, number_of_tasks=0):
# Modify these iterators in order to change extraction behaviour
def sheet_iter_meta(sheet):
def sheet_iter_meta(sheet, silent=True):
""" yield first and second col entry as tuple of (name, matnr) """
for row in (sheet.row(i) for i in range(1, sheet.nrows)):
match = re.search(matno_re, row[1].value)
if match:
if not silent and len(match.group('matrikel_no')) != 8:
print('[WARN] %s has odd matrikelno %s' %
(row[0].value, match.group('matrikel_no')))
yield row[0].value, match.group('matrikel_no')
else:
if not silent:
print('[WARN] could not parse row %s' % row[0])
yield row[0].value, row[1].value
def sheet_iter_data(sheet):
""" yields all source code titel and code tuples """
""" yields all source code title and code tuples """
def row(i):
return sheet.row(i)
for top, low in ((row(i), row(i + 1)) for i in range(sheet.nrows - 1)):
......@@ -91,8 +83,8 @@ def converter(infile, usernames=None, number_of_tasks=0,):
meta, *data = open_workbook(infile, open(os.devnull, 'w')).sheets()
# nice!
name2mat = dict(sheet_iter_meta(meta))
assert len(name2mat) == len(data), f'{len(name2mat)} names != {len(data)} sheets' # noqa
name2mat = dict(sheet_iter_meta(meta, silent=False))
assert len(name2mat) == len(data), '{} names != {} sheets'.format(len(name2mat), len(data)) # noqa
# from xls to lists and namedtuples
# [ [user0, task0_h, code0, ..., taskn, coden ], ..., [...] ]
......@@ -108,7 +100,9 @@ def converter(infile, usernames=None, number_of_tasks=0,):
'type': 'SourceCode'
}
root[-1].append(task.group('title'))
root[-1].append(urllib.parse.unquote(code).strip())
root[-1].append(urllib.parse
.unquote(code)
.replace('\t', ' ' * TABWIDTH))
if number_of_tasks:
for (user, *task_list) in sorted(root, key=lambda u: u[0].name):
......@@ -127,20 +121,17 @@ def converter(infile, usernames=None, number_of_tasks=0,):
usernames = {user.name: get_username(user) for (user, *_) in root}
return {
'students': {
usernames[user.name]: {
'fullname': user.name,
'email': mat_to_email[name2mat[user.name]],
'identifier': name2mat[user.name],
'submissions': [
{
"type": task,
"code": code,
"tests": {},
} for task, code in zip(task_list[::2], task_list[1::2])
]
} for (user, *task_list) in sorted(root, key=lambda u: u[0].name)
},
'students': [{
'fullname': user.name,
'username': usernames[user.name],
'email': mat_to_email[name2mat[user.name]],
'identifier': name2mat[user.name],
'submissions': [{
"type": task,
"code": code,
"tests": {},
} for task, code in zip(task_list[::2], task_list[1::2])]
} for (user, *task_list) in sorted(root, key=lambda u: u[0].name)],
'tasks': list(tasks.values())
}
......@@ -150,4 +141,4 @@ def write_to_file(json_dict, outfile):
with open(outfile, "w") as out:
json.dump(json_dict, out, indent=2)
print(f"Wrote data to {outfile}. Done.")
print("Wrote data to %s. Done." % outfile)
......@@ -4,7 +4,7 @@ from setuptools import setup
setup(
name='hektor',
version='0.2',
version='0.3.5',
description='A QTI-XML/XLS to JSON converter for humans',
author='Jan Maximilian Michal',
author_email='mail@janmax.org',
......@@ -14,5 +14,6 @@ setup(
install_requires=["lxml~=4.1.1",
"xlrd~=1.1.0",
"xkcdpass~=1.16.0"],
py_modules=['hektor']
py_modules=['hektor'],
packages=['lib']
)