Skip to content
Snippets Groups Projects
Verified Commit 4aa90278 authored by Jan Maximilian Michal's avatar Jan Maximilian Michal
Browse files

Added encryption/decreption capabilities

* Also the command line options should now work as expected
* Added an identity converter that just outputs .json output.
  This allows to postprocess files that have already been created.
* Breaking changes to the data format: students is now a list
* Bump python version to 3.5 to support typing
parent c5333fce
No related branches found
No related tags found
No related merge requests found
Pipeline #
...@@ -10,3 +10,4 @@ deploy.sh ...@@ -10,3 +10,4 @@ deploy.sh
.DS_Store .DS_Store
*.xls *.xls
.venv/ .venv/
testall.sh
image: python:3.4 image: python:3.5
before_script: before_script:
- python -V - python -V
- pip install -e .
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/pip-cache"
cache:
paths:
- "$CI_PROJECT_DIR/pip-cache"
variables: variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/pip-cache" PIP_CACHE_DIR: "$CI_PROJECT_DIR/pip-cache"
...@@ -17,8 +11,11 @@ cache: ...@@ -17,8 +11,11 @@ cache:
paths: paths:
- "$CI_PROJECT_DIR/pip-cache" - "$CI_PROJECT_DIR/pip-cache"
test: flake8:
script: script:
- pip install -e .
- pip install flake8 - pip install flake8
- flake8 hektor.py bin lib - flake8 hektor.py bin lib
test:
script:
- hektor -h
...@@ -14,4 +14,4 @@ upload: dist ...@@ -14,4 +14,4 @@ upload: dist
twine upload dist/* twine upload dist/*
tag: tag:
git tag $(python setup.py --version) git tag `python setup.py --version`
#!/usr/bin/env python3 #!/usr/bin/env python3
import hektor
import sys import sys
import hektor
if __name__ == '__main__': if __name__ == '__main__':
if sys.version_info < (3, 4): if sys.version_info < (3, 5):
sys.exit("At least Python 3.4 is required.") sys.exit("At least Python 3.4 is required.")
hektor.main() hektor.main()
import argparse import argparse
import base64
import functools import functools
import getpass
import json import json
import logging import logging
import os import os
from typing import Any, Callable, Dict, Sequence from typing import Any, Callable, Dict, List, Sequence, Union
from cryptography.fernet import Fernet
from xkcdpass import xkcd_password as xp from xkcdpass import xkcd_password as xp
from lib import Converter from lib import Converter
# ============================== =- Logging -= ============================== # # ============================== =- Logging -= ============================== #
log = logging.getLogger(__name__) def setup_logging():
log.setLevel(logging.DEBUG) ''' Make the logger globally available by hide intermediate handler,
filters and formatter variables '''
global log
level = logging.DEBUG if args.verbose else logging.INFO
log = logging.getLogger(__name__)
log.setLevel(level)
# create console handler and formatter # create console handler and formatter
console = logging.StreamHandler() console = logging.StreamHandler()
console.setLevel(logging.DEBUG) console.setLevel(level)
formatter = logging.Formatter('[%(levelname)s] %(message)s') formatter = logging.Formatter('[%(levelname)s] %(message)s')
# add formatter to console handler # add formatter to console handler
console.setFormatter(formatter) console.setFormatter(formatter)
log.addHandler(console) log.addHandler(console)
# ============================= =- argparse -= ============================== # # ============================= =- argparse -= ============================== #
def parseme(): def setup_argparse():
global args
def file_exists(parser, filepath: str) -> str: def file_exists(parser, filepath: str) -> str:
if not os.path.isfile(filepath): if not os.path.isfile(filepath):
parser.error('Not a file %s' % filepath) parser.error('Not a file %s' % filepath)
return filepath return filepath
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
# General purpose arguments
parser.add_argument(
'-v', '--verbose',
action='store_true',
help='enable verbose logging (Level: DEBUG)')
# Input output files
parser.add_argument( parser.add_argument(
'input', 'input',
metavar='DATA', metavar='DATA',
...@@ -42,7 +61,21 @@ def parseme(): ...@@ -42,7 +61,21 @@ def parseme():
'output', 'output',
metavar='OUTFILE', metavar='OUTFILE',
help='destination of converter output (JSON)') help='destination of converter output (JSON)')
parser.add_argument(
# Post-processor flags
remove_personal = parser.add_mutually_exclusive_group()
remove_personal.add_argument(
'-e', '--encrypt',
action='store_true',
help='''strip all personal information and provide decryption key
(AES 128-bit, CBC mode, PKCS7 for padding, HMAC with SHA-256
for integrity)'''
)
remove_personal.add_argument(
'-d', '--decrypt',
action='store_true',
help='Reverse previous AES encryption.')
remove_personal.add_argument(
'-a', '--anonymous', '-a', '--anonymous',
action='store_true', action='store_true',
help='replace personal information and create a reversing table') help='replace personal information and create a reversing table')
...@@ -51,17 +84,21 @@ def parseme(): ...@@ -51,17 +84,21 @@ def parseme():
help='where to store personal information (CSV)', help='where to store personal information (CSV)',
) )
parser.add_argument( parser.add_argument(
'-m', '--meta', '-m', '--add-meta',
action='store_true', action='store_true',
help='add meta information (lecturer, course title)' help='add meta information (lecturer, course title)'
) )
parser.add_argument(
'--verify',
action='store_true',
default=True,
help='asserts that output data will be in a certain format'
)
args = parser.parse_args() args = parser.parse_args()
if args.anonymous != (args.personal_secret_table is not None): if args.anonymous != (args.personal_secret_table is not None):
parser.error('Need an output for anonymous mode') parser.error('Please specify where to write the mapping (see -t)')
return args
# ========================== =- General Purpose -= ========================== # # ========================== =- General Purpose -= ========================== #
...@@ -75,45 +112,24 @@ def compose(*functions: Sequence[Callable]) -> Callable: ...@@ -75,45 +112,24 @@ def compose(*functions: Sequence[Callable]) -> Callable:
lambda x: x) lambda x: x)
# ========================== =- Post processors -= ========================== # def abort(message='Bye.'):
def anonymise(structured_data: Dict[str, Any]) -> Dict[str, Any]: ''' In case anything goes wrong. Basically a dump wrapper around exit '''
DELIMITER = '-' log.info(message)
wordfile = xp.locate_wordfile() exit(1)
words = xp.generate_wordlist(wordfile=wordfile,
min_length=7,
max_length=7)
def get_identifier():
return xp.generate_xkcdpassword(words, numwords=2, delimiter=DELIMITER)
students = structured_data.pop('students')
reverser = {get_identifier(): s for s in students.values()}
students_anon = {r: {
'fullname': ' '.join(w[0].capitalize() + w[1:]
for w in r.split(DELIMITER)),
'identifier': r,
'submissions': student['submissions']
} for r, student in zip(reverser, students.values())}
with open(args.personal_secret_table, 'w') as out:
print('key, previous identifier, fullname', file=out)
print('\n'.join(anon + '\t' + '\t'.join(v
for v in data.values()
if type(v) is str)
for anon, data in reverser.items()), file=out)
structured_data.update({'students': students_anon})
return structured_data
def add_meta_information(structured_data: Dict[str, Any]) -> Dict[str, Any]: # ========================== =- Post processors -= ========================== #
if args.meta: def do_add_meta(structured_data: Dict[str, Any]) -> Dict[str, Any]:
structured_data['author'] = input('[Q] author: ') ''' Asks the user for metadata about the exam '''
structured_data['exam'] = input('[Q] course title: ') structured_data['author'] = input('[Q] author: ')
structured_data['exam'] = input('[Q] course title: ')
return structured_data return structured_data
def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]: def do_verify(structured_data: Dict[str, Any]) -> Dict[str, Any]:
''' The is the testable specification of the format that is output by
hector. Since multiple formats are compiled into this one verification is
on by default. The impact on performance is neglectable. '''
def assert_submission(submission): def assert_submission(submission):
assert 'code' in submission, 'A submission needs code' assert 'code' in submission, 'A submission needs code'
assert 'type' in submission, 'A submission has to be of some type' assert 'type' in submission, 'A submission has to be of some type'
...@@ -124,6 +140,7 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]: ...@@ -124,6 +140,7 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]:
len(student['submissions']))) len(student['submissions'])))
assert 'fullname' in student, 'Student needs a name %s' % student assert 'fullname' in student, 'Student needs a name %s' % student
assert 'identifier' in student, 'Student needs a unique identifier' assert 'identifier' in student, 'Student needs a unique identifier'
assert 'username' in student, 'Student needs a unique username'
def base_assert(): def base_assert():
assert 'students' in structured_data, 'No students found' assert 'students' in structured_data, 'No students found'
...@@ -131,14 +148,14 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]: ...@@ -131,14 +148,14 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]:
try: try:
base_assert() base_assert()
students = structured_data['students'].values() students = structured_data['students']
number_of_submissions = len(structured_data['tasks']) number_of_submissions = len(structured_data['tasks'])
for student in students: for student in students:
try: try:
assert_student(student) assert_student(student)
assert number_of_submissions == len(student['submissions']), \ assert number_of_submissions == len(student['submissions']), \
'%s does not have enough submissoins' % student['fullname'] '%s does not have enough submissions' % student['fullname']
for submission in student['submissions']: for submission in student['submissions']:
try: try:
...@@ -155,11 +172,135 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]: ...@@ -155,11 +172,135 @@ def assert_correct_format(structured_data: Dict[str, Any]) -> Dict[str, Any]:
return structured_data return structured_data
post_processors = [ def student_replacer(processor):
anonymise, ''' A simple decorator that is used to remove students and put them back in
# add_meta_information, when the preprocessor is dome with them'''
# assert_correct_format
] @functools.wraps(processor)
def processor_closure(structured_data: Dict[str, Any]) -> Dict[str, Any]:
students = structured_data.pop('students')
students_replacement = processor(students)
structured_data['students'] = students_replacement
return structured_data
return processor_closure
@student_replacer
def do_anonymous(students: Dict[str, Union[str, List]]):
''' Recreates most of the data and includes fields over a whitelist
therefore ensuring that no personal information remains in the data '''
DELIMITER = '-'
wordfile = xp.locate_wordfile()
words = xp.generate_wordlist(wordfile=wordfile,
min_length=7,
max_length=7)
def get_random_xkcd_identifier():
return xp.generate_xkcdpassword(words, numwords=2, delimiter=DELIMITER)
reverser = {get_random_xkcd_identifier(): s for s in students}
students_anonymous = [{
'fullname': ' '.join(w[0].capitalize() + w[1:]
for w in r.split(DELIMITER)),
'identifier': r,
'username': r,
'submissions': student['submissions']
} for r, student in zip(reverser, students)]
with open(args.personal_secret_table, 'w') as out:
print('key, previous identifier, fullname', file=out)
print('\n'.join('%s %s %s' % (anonymous_key,
data['identifier'],
data['fullname'])
for anonymous_key, data in reverser.items()), file=out)
return students_anonymous
@student_replacer
def do_encrypt(students):
# Init Crypto. See the module documentation on what actually happens here,
# then read all about those methods and then go study number theory. Never
# roll your own custom crypto ;-)
key = Fernet.generate_key()
aes = Fernet(key)
def encrypt(clear: str) -> str:
return base64.b64encode(aes.encrypt(clear.encode())).decode('utf-8')
output_the_key_to_the_user(key)
return transform(students, encrypt)
@student_replacer
def do_decrypt(students):
def decrypt(cipher: str) -> str:
return aes.decrypt(base64.b64decode(cipher.encode())).decode('utf-8')
try:
key = getpass.getpass('[Q] Give me the decryption key: ')
aes = Fernet(key)
return transform(students, decrypt)
except Exception as err:
abort('Your key is bad (%s).' % err)
def transform(students, function):
return [
{'fullname': function(student['fullname']),
'identifier': function(student['identifier']),
'username': function(student['username']),
'submissions': student['submissions']} for student in students
]
def output_the_key_to_the_user(key: bytes):
def to_file(filepath: str):
with open(filepath, 'wb') as file:
file.write(key)
log.info('Key written to %s. Keep it safe.', filepath)
def to_stdout():
print('Encrypted and signed. Key this key safe or bad things happen')
print(' --------->> %s <<--------- ' % key.decode('latin-1'))
output = input('[Q] The data has been encrypted. ' +
'Where should I put the key? (stdout) ') or 'stdout'
if output == 'stdout':
to_stdout()
elif not os.path.exists(output):
to_file(output)
elif os.path.isfile(output):
confirm = input('[Q] File exists. Want to override? (Y/n)') or 'y'
if confirm.lower().startswith('y'):
to_file(output)
else:
abort('No data was written. Bye.')
else:
log.error('I cannot write to %s.', output)
abort()
def get_active_postprocessors():
postprocessor_order = (
do_add_meta,
do_verify,
do_anonymous,
do_encrypt,
do_decrypt
)
return (p for p in postprocessor_order
if getattr(args, p.__name__.split('do_')[1]))
# ============================== =- Hektor -= =============================== # # ============================== =- Hektor -= =============================== #
...@@ -177,21 +318,26 @@ def _processing(filepath: str) -> Dict[str, Any]: ...@@ -177,21 +318,26 @@ def _processing(filepath: str) -> Dict[str, Any]:
', '.join(f ', '.join(f
for c in Converter.implementations() for c in Converter.implementations()
for f in c.accepted_files)) for f in c.accepted_files))
abort('Program stopped prematurely. No data was written. Bye.')
def _postprocessing(structured_data: Dict[str, Any]) -> Dict[str, Any]: def _postprocessing(structured_data: Dict[str, Any]) -> Dict[str, Any]:
return compose(*post_processors)(structured_data) return compose(*get_active_postprocessors())(structured_data)
def main(): def main():
global args setup_argparse()
args = parseme() setup_logging()
log.debug('Active post processors %s', list(get_active_postprocessors()))
processing = compose(_postprocessing, _processing, _preprocessing) processing = compose(_postprocessing, _processing, _preprocessing)
data = processing(args.input) data = processing(args.input)
destination = args.output.split('.json')[0] + '.json' destination = args.output.split('.json')[0] + '.json'
with open(destination, 'w') as output: with open(destination, 'w') as output:
json.dump(data, output, indent=2, sort_keys=True) json.dump(data, output, indent=2, sort_keys=True)
log.info('Wrote exam data to %s', destination) log.info('Wrote exam data to %s', destination)
......
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
from lib.generic import Converter # noqa from lib.generic import Converter # noqa
from lib.qti import QTIConverter # noqa from lib.qti import QTIConverter # noqa
from lib.xls import XLSConverter # noqa from lib.xls import XLSConverter # noqa
from lib.identity import JSONIdentityConverter # noqa
...@@ -7,7 +7,8 @@ def all_subclasses(cls): ...@@ -7,7 +7,8 @@ def all_subclasses(cls):
class Converter(metaclass=abc.ABCMeta): class Converter(metaclass=abc.ABCMeta):
""" A base class if we incorporate more converters in the future """ """ A base class if we incorporate more converters in the future. New
implementations need to be registered in this modules __init__.py """
@abc.abstractmethod @abc.abstractmethod
def convert(self): def convert(self):
......
import json
import lib.generic
class JSONIdentityConverter(lib.generic.Converter):
""" This serves as an identity if you wish to import a json file
that you generated earlier with hektor and you now want to run a
preprocessor on it. """
accepted_files = ('.json',)
def convert(self, filepath):
with open(filepath) as json_input:
return json.load(json_input)
...@@ -8,7 +8,7 @@ import lib.generic ...@@ -8,7 +8,7 @@ import lib.generic
class QTIConverter(lib.generic.Converter): class QTIConverter(lib.generic.Converter):
"""docstring for XLSConverter""" """ XLSConverter class (Currently raw xml input is not supported) """
accepted_files = ('.zip', '.xml') accepted_files = ('.zip', '.xml')
...@@ -51,8 +51,7 @@ def process_qti(tree, only_of_type=('assSourceCode',), **kwargs): ...@@ -51,8 +51,7 @@ def process_qti(tree, only_of_type=('assSourceCode',), **kwargs):
def process_users(results_tree): def process_users(results_tree):
return {row.attrib['active_id']: dict(row.attrib) return [dict(row.attrib) for row in results_tree.xpath(users)]
for row in results_tree.xpath(users)}
def convert_code(text): def convert_code(text):
...@@ -67,14 +66,15 @@ def process_solutions(results_tree, task_id): ...@@ -67,14 +66,15 @@ def process_solutions(results_tree, task_id):
def process_results(tree, qti=(), **kwargs): def process_results(tree, qti=(), **kwargs):
questions = qti questions = qti
users = process_users(tree) users = process_users(tree)
for user in users.values(): id2user = {user['active_id']: user for user in users}
for user in users:
user['submissions'] = [] user['submissions'] = []
for question in questions: for question in questions:
solutions = process_solutions(tree, question) solutions = process_solutions(tree, question)
for user_id, solution in solutions.items(): for user_id, solution in solutions.items():
users[user_id]['submissions'].append({'type': question, id2user[user_id]['submissions'].append({'type': question,
'code': solution, 'code': solution,
'tests': {}}) 'tests': {}})
return users return users
......
#!/usr/local/bin/python3 #!/usr/local/bin/python3
""" a simple script that converts ilias exam output to readable json """ a simple script that converts ilias exam output to readable json
The json output will look like this:
{
"max.mustermann": { <<--- OR all uppercase letter of the name + username/matrikel_no # noqa: E501
"matrikel_no": "12345678",
"name": "Mustermann, Max",
"task_list": {
"[task_id_1]": "print Hello World!",
....,
"[task_id_n]": "#include <stdio.h> etc."
}
},
... ans so on
}
usage: convert.py [-h] [-u USERNAMES] [-n NUMBER_OF_TASKS] INFILE OUTFILE usage: convert.py [-h] [-u USERNAMES] [-n NUMBER_OF_TASKS] INFILE OUTFILE
positional arguments: positional arguments:
...@@ -127,20 +113,17 @@ def converter(infile, usernames=None, number_of_tasks=0,): ...@@ -127,20 +113,17 @@ def converter(infile, usernames=None, number_of_tasks=0,):
usernames = {user.name: get_username(user) for (user, *_) in root} usernames = {user.name: get_username(user) for (user, *_) in root}
return { return {
'students': { 'students': [{
usernames[user.name]: { 'fullname': user.name,
'fullname': user.name, 'username': usernames[user.name],
'email': mat_to_email[name2mat[user.name]], 'email': mat_to_email[name2mat[user.name]],
'identifier': name2mat[user.name], 'identifier': name2mat[user.name],
'submissions': [ 'submissions': [{
{ "type": task,
"type": task, "code": code,
"code": code, "tests": {},
"tests": {}, } for task, code in zip(task_list[::2], task_list[1::2])]
} for task, code in zip(task_list[::2], task_list[1::2]) } for (user, *task_list) in sorted(root, key=lambda u: u[0].name)],
]
} for (user, *task_list) in sorted(root, key=lambda u: u[0].name)
},
'tasks': list(tasks.values()) 'tasks': list(tasks.values())
} }
......
...@@ -4,7 +4,7 @@ from setuptools import setup ...@@ -4,7 +4,7 @@ from setuptools import setup
setup( setup(
name='hektor', name='hektor',
version='0.2.2', version='0.3',
description='A QTI-XML/XLS to JSON converter for humans', description='A QTI-XML/XLS to JSON converter for humans',
author='Jan Maximilian Michal', author='Jan Maximilian Michal',
author_email='mail@janmax.org', author_email='mail@janmax.org',
...@@ -13,6 +13,8 @@ setup( ...@@ -13,6 +13,8 @@ setup(
scripts=['bin/hektor'], scripts=['bin/hektor'],
install_requires=["lxml~=4.1.1", install_requires=["lxml~=4.1.1",
"xlrd~=1.1.0", "xlrd~=1.1.0",
"cryptography~=2.1.4",
"xkcdpass~=1.16.0"], "xkcdpass~=1.16.0"],
py_modules=['hektor', 'lib'] py_modules=['hektor'],
packages=['lib']
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment