importer.py

import csv
import json
import os
import readline
from typing import Callable

from django.db import transaction

import util.convert
import util.processing
from core.models import UserAccount as User
from core.models import ExamType, Feedback, Submission, SubmissionType, Test
from util.factories import GradyUserFactory
from util.messages import info, warn

WELCOME = '''
   ______               __         ____                           __
  / ____/________ _____/ /_  __   /  _/___ ___  ____  ____  _____/ /____  _____
 / / __/ ___/ __ `/ __  / / / /   / // __ `__ \/ __ \/ __ \/ ___/ __/ _ \/ ___/
/ /_/ / /  / /_/ / /_/ / /_/ /  _/ // / / / / / /_/ / /_/ / /  / /_/  __/ /
\____/_/   \__,_/\__,_/\__, /  /___/_/ /_/ /_/ .___/\____/_/   \__/\___/_/
                      /____/                /_/
'''

HISTFILE = '.importer_history'
RECORDS = '.importer'
PASSWORDS = '.importer_passwords'

YES = 'Y/n'
NO = 'y/N'

valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}

ORIGIN_ORDER = {
    Feedback.WAS_EMPTY,
    Feedback.DID_NOT_COMPILE,
    Feedback.COULD_NOT_LINK,
    Feedback.FAILED_UNIT_TESTS,
}

TEST_ORDER = (
    util.processing.EmptyTest.__name__,
    util.processing.CompileTest.__name__,
    util.processing.LinkTest.__name__,
    util.processing.UnitTestTest.__name__,
)

FEEDBACK_MAPPER = dict(zip(TEST_ORDER, ORIGIN_ORDER))

user_factory = GradyUserFactory()


class chdir_context(object):
    """
    Step into a directory temporarily.
    """

    def __init__(self, path):
        self.old_dir = os.getcwd()
        self.new_dir = path

    def __enter__(self):
        info(f'Changing to {self.new_dir}')
        os.chdir(self.new_dir)

    def __exit__(self, *args):
        os.chdir(self.old_dir)
        info(f'Returned to {self.old_dir}')


def i(prompt: str, default: str='', is_path: bool=False, is_file: bool=False):
    if default is YES or default is NO:
        answer = valid[input(f'[Q] {prompt} ({default}): ').lower() or (
            'y' if YES == default else 'n')]
    elif default:
        answer = input(f'[Q] {prompt} ({default}): ') or default
    else:
        answer = input(f'[Q] {prompt}: ')

    if (is_path or is_file) and \
            not os.path.exists(answer) or is_file and \
            not os.path.isfile(answer):
        path_or_type = "path" if is_path else "file"
        warn(f'The {path_or_type} does not exist. Please try again.')
        return i(prompt, default, is_path, is_file)

    return answer


def add_feedback_if_test_recommends_it(test_obj):
    available_tests = util.processing.Test.available_tests()

    if test_obj.label == available_tests[test_obj.name].label_failure \
            and not hasattr(test_obj.submission, 'feedback') \
            and not test_obj.name == util.processing.UnitTestTest.__name__:
        return Feedback.objects.update_or_create(
            of_submission=test_obj.submission,
            defaults={
                'score': 0,
                'origin': FEEDBACK_MAPPER[test_obj.name],
            }
        )


def add_tests(submission_obj, tests):
    auto_correct, _ = User.objects.get_or_create(
        username='auto_correct',
        defaults={'is_active': False}
    )

    for name, test_data in ((name, tests[name]) for name in TEST_ORDER):
        test_obj, created = Test.objects.update_or_create(
            name=test_data['name'],
            submission=submission_obj,
            defaults={
                'label': test_data['label'],
                'annotation': test_data['annotation'],
            }
        )
        add_feedback_if_test_recommends_it(test_obj)


def add_submission(student_obj, code, tests, type):

    submission_type = SubmissionType.objects.get(name=type)

    submission_obj, _ = Submission.objects.update_or_create(
        type=submission_type,
        student=student_obj,
        defaults={'text': code}
    )

    if tests:
        add_tests(submission_obj, tests)


@transaction.atomic
def call_loader(func: Callable) -> None:
    """ This function handles if a function will be executed at all. Currently
    it just checks in the RECORDS file for the name of the function. If it is
    present the function will not be executed

    Args:
        func (Callable): the loader specified below
    """
    if os.path.exists(RECORDS):
        with open(RECORDS, 'r') as records_f:
            done = [line.strip() for line in records_f]

        if func.__name__ in done:
            warn(f'{func.__name__} has already been processed once.')
            if not i('Proceed anyway?', NO):
                return

    func()  # This executes the specified loader

    with open(RECORDS, 'a') as records_f:
        records_f.write(func.__name__)
        records_f.write('\n')

    info(f'{func.__name__} is done.')


def do_convert_xls():

    ans = i('''Do you want to convert the ILIAS .xls output to .json?''', YES)
    if not ans:
        return

    infile = i('Please provide the path to the .xls file', is_file=True)
    outfile = i('Where should the output go?', 'submissons.json')

    json_dict = util.convert.converter(infile)
    util.convert.write_to_file(json_dict, outfile)


def do_load_submission_types():

    print(
        '''For the following import you need three files:

    1) A .csv file where the columns are: id, name, score
    2) A path to a directory where I can find sample solutions named
        <id>-lsg.c
    3) A path to a directory where I can find HTML files with an accurate
        description of the task. File name pattern has to be: <id>.html

    Example:
        $ cat submission_types.csv
        a01, Alpha Team, 10
        a02, Beta Distribution, 10
        a03, Gamma Ray, 20

        $ tree -L 2
        .
        ├── code-lsg
        │   ├── a01-lsg.c
        │   ├── a02-lsg.c
        │   └── a03-lsg.c
        └── html
            ├── a01.html
            ├── a02.html
            └── a03.html
    ''')

    path = i('Where are your files located?', '.', is_path=True)

    with chdir_context(path):
        submission_types_csv = i('CSV file', 'submission_types.csv')
        lsg_dir = i('solution dir', 'code-lsg')
        desc_dir = i('descriptions dir', 'html')

        with open(submission_types_csv, encoding='utf-8') as tfile:
            csv_rows = [row for row in csv.reader(tfile)]

        for row in csv_rows:
            tid, name, score = (col.strip() for col in row)
            with \
                open(os.path.join(lsg_dir, tid + '-lsg.c'),
                     encoding='utf-8') as lsg, \
                open(os.path.join(desc_dir, tid + '.html'),
                     encoding='utf-8') as desc:
                data = {
                    'name': name,
                    'description': desc.read(),
                    'solution': lsg.read(),
                    'full_score': int(score),
                }
            _, created = SubmissionType.objects.update_or_create(
                name=name,
                defaults=data
            )
            info(f'{"Created" if created else "Updated"} {name}')


def do_load_module_descriptions():

    print('''
    This loader imports descriptions of modules in an exam. This step is purely
    optional -- Grady works just fine without these information. If you want to
    distinguish students within one instance or give information about the
    grading type you should provide this info.

    CSV file format: module_reference, total_score, pass_score, pass_only

    Example:
        B.Inf.1801,  90, 45, yes
        B.Mat.31415, 50, 10, no
    ''')

    module_description_csv = i(
        'Where is the file?', 'modules.csv', is_file=True)

    with open(module_description_csv, encoding='utf-8') as tfile:
        csv_rows = [row for row in csv.reader(tfile)]

    for row in csv_rows:
        data = {
            field: kind(data) for field, kind, data in zip(
                ('module_reference', 'total_score', 'pass_score', 'pass_only'),
                (str, int, int, lambda x: x == 'yes'),
                (col.strip() for col in row)
            )
        }

        _, created = ExamType.objects.update_or_create(
            module_reference=data['module_reference'],
            defaults=data,
        )

        modification = "Created" if created else "Updated"
        info(f'{modification} ExamType {data["module_reference"]}')


def do_preprocess_submissions():

    print('''
    Preprocessing might take some time depending on the amount of data
    and the complexity of the programs and the corresponding unit tests. You
    can specify what test you want to run.

    Tests do depend on each other. Therefore specifying a test will also
    result in running all its dependencies\n''')

    test_enum = dict(enumerate(util.processing.Test.available_tests()))

    print('The following test are available:\n')
    print('\t[q] Do nothing')
    for j, test in test_enum.items():
        print(f'\t[{j}] {test}')
    print()

    test_index = i('Which tests do you want to run?')

    if not test_index or test_index == 'q':
        return

    test_to_run = test_enum[int(test_index)]
    location = i('Where do you keep the specifications for the tests?',
                 'anon-export', is_path=True)

    with chdir_context(location):
        descfile = i(
            'Please provide usage for sample solution', 'descfile.txt',
            is_file=True)
        binaries = i(
            'Please provide executable binaries of solution', 'bin',
            is_path=True)
        objects = i(
            'Please provide object files of solution', 'objects',
            is_path=True)
        submissions = i(
            'Please provide the student submissions', 'binf1601-anon.json',
            is_file=True)
        headers = i(
            'Please provide header files if any', 'code-testing',
            is_path=True)

        info('Looks good. The tests mights take some time.')
        processed_submissions = util.processing.process(descfile,
                                                        binaries,
                                                        objects,
                                                        submissions,
                                                        headers,
                                                        test_to_run)
    output_f = i('And everything is done. Where should I put the results?',
                 f'{submissions.rsplit(".")[0]}.processed.json')

    with open(output_f, 'w+') as outfile:
        json.dump(processed_submissions, outfile,
                  sort_keys=True, indent=4)
    info('Wrote processed data to %s' % os.path.join(os.curdir, output_f))


def do_load_submissions():

    file = i('Get me the file with all the submissions',
             'submissions.json', is_file=True)

    exam = {}
    if ExamType.objects.all() and \
            i('Do you want to add module/exam information?', NO):
        exam_query_set = ExamType.objects.all()
        print('You have the following choices:\n')
        for j, exam_type in enumerate(exam_query_set):
            print(f'\t[{j}] {exam_type.module_reference}')
        print()

        exam = i('Choose wisely')
        exam = {'exam': exam_query_set[int(exam)]}

    with open(file) as submission_file:
        submissions = json.JSONDecoder().decode(submission_file.read())

    for username, data in submissions.items():
        student_obj = user_factory.make_student(username,
                                                **exam,
                                                **data).student

        for submission_obj in data['submissions']:
            add_submission(student_obj, **submission_obj)


def do_load_tutors():

    print('Please import tutor users by providing one name per line')
    tutors = i('List of tutors', 'tutors', is_file=True)

    with open(tutors) as tutors_f:
        for tutor in tutors_f:
            user_factory.make_tutor(tutor.strip(), store_pw=True)


def do_load_reviewer():

    print('Please import reviewer users by providing one name per line')
    reviewers = i('List of reviewers', 'reviewers', is_file=True)

    with open(reviewers) as reviewers_f:
        for reviewer in reviewers_f:
            user_factory.make_reviewer(reviewer.strip(),
                                       is_staff=True,
                                       store_pw=True)


call_order = (
    do_convert_xls,
    do_load_submission_types,
    do_load_module_descriptions,
    do_preprocess_submissions,
    do_load_submissions,
    do_load_tutors,
    do_load_reviewer
)


def start():

    if os.path.exists(HISTFILE):
        readline.read_history_file(HISTFILE)

    print(WELCOME + '''

    Welcome to the Grady import script!

    This script aims at making the setup of the database as easy as possible.
    At the same time it serves as a documentation on how data is imported into
    Grady. Let\'s dive right in.\n''')

    try:
        print('The following sub importers are available:\n')
        for fid, func in enumerate(call_order):
            print(f'\t[{fid}] {func.__name__}')
        print('\t[q] exit')
        print()

        fid = i('Choose a number or hit enter to start at the beginning')

        if not fid:
            for func in call_order:
                call_loader(func)
        elif fid in ('q', 'quit', 'exit'):
            return
        elif not 0 <= int(fid) < len(call_order):
            warn('There is no loader with this number')
        else:
            call_loader(call_order[int(fid)])

    except (EOFError, KeyboardInterrupt) as err:
        print()
        return
    except FileNotFoundError as err:
        raise
    except Exception as err:
        import traceback
        traceback.print_exc()
    finally:
        readline.write_history_file(HISTFILE)