SpikeTimit_load_input.jl 2.36 KB
Newer Older
Alessio Quaresima's avatar
Alessio Quaresima committed
1
include("../src/SpikeTimit.jl")
alessio.quaresima's avatar
alessio.quaresima committed
2

Alessio Quaresima's avatar
Alessio Quaresima committed
3
4
# path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
path = "/home/alequa/Documents/Research/phd_project/speech/Spike TIMIT/"
alessio.quaresima's avatar
alessio.quaresima committed
5
6
7
8
9
10
11
12
13
14
15

#Create the path strings leading to folders in the data set
test_path = joinpath(path, "test");
train_path = joinpath(path, "train");
dict_path = joinpath(path, "DOC/TIMITDIC.TXT");
train = SpikeTimit.create_dataset(;dir= train_path)
test = SpikeTimit.create_dataset(;dir= test_path)
dict = SpikeTimit.create_dictionary(file=dict_path)
##

# Parameters to compute the input_data
16
17
target_dialects = [1,2,5,8]
target_gender = "f" # "fm" "m"
alessio.quaresima's avatar
alessio.quaresima committed
18
19
20
21
22
samples = 10
n_speakers = 1
repetitions = 75 # amount of times you present the network with each unique stimulus.
silence_time = 0.15 # in seconds
n_features = 10 # number of features combined from input frequencies
23
words = ["that", "she"]
24
25
26
27
## Select a subset of the whole dataset.
# In this case I select all the female speakers from
# regional accent 1 that use at least on of the words in their
## I declared these functions because I am don't know how to use the Dataframes quesry properly *_*...
Alessio Quaresima's avatar
Alessio Quaresima committed
28
using DataFramesMeta
29
in_words(df_words) = !isempty(intersect(Set(df_words),Set(words)))
30
31
in_dialect(df_dialect) = df_dialect  target_dialects
in_gender(df_gender) = occursin(df_gender, target_gender)
alessio.quaresima's avatar
alessio.quaresima committed
32

33
34
# this is a DataFrameMeta macro
speaker = @where(train,in_dialect.(:dialect), in_gender.(:gender), in_words.(:words))
alessio.quaresima's avatar
alessio.quaresima committed
35
36

## Select the inputs
37
durations, spikes, labels = SpikeTimit.select_inputs(df=speaker, words=words, samples = samples, n_feat = n_features);
alessio.quaresima's avatar
alessio.quaresima committed
38

39
##
alessio.quaresima's avatar
alessio.quaresima committed
40
41
42
43
## Mix them, if you like you can mix differently. Look at the function, it's simple!
all_ft, all_n, words_t, phones_t = SpikeTimit.mix_inputs(;durations=durations, spikes=spikes, labels=labels, repetitions=repetitions, silence_time)
SpikeTimit.convert_to_dt(words_t, 0.1)
SpikeTimit.convert_to_dt(phones_t, 0.1)
44
all_ft = SpikeTimit.convert_to_dt(all_ft, 0.1)
Alessio Quaresima's avatar
Alessio Quaresima committed
45
##
46

alessio.quaresima's avatar
alessio.quaresima committed
47
words_savepoints = SpikeTimit.get_savepoints(trans= words_t, n_measure=10)
Alessio Quaresima's avatar
Alessio Quaresima committed
48
49
ph_savepoints = SpikeTimit.get_savepoints(trans= phones_t, n_measure=10)
phones_t
alessio.quaresima's avatar
alessio.quaresima committed
50
51
52
53
54


## Comparing the last firing time, the duration of all words and the
## intervals of the words and phonemes we expect that it's well done!
all_ft[end]
55
input_length = repetitions*(sum(durations)+(silence_time*(length(durations))))-silence_time
alessio.quaresima's avatar
alessio.quaresima committed
56
words_t.intervals[end]
57
phones_t.steps[end]