Commit abd58e6b authored by Alessio Quaresima's avatar Alessio Quaresima
Browse files

merged

parents d066a35c f9aa43ea
include("src/SpikeTimit.jl") include("../src/SpikeTimit.jl")
path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT" # path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
path = "/home/alequa/Documents/Research/phd_project/speech/Spike TIMIT/"
#Create the path strings leading to folders in the data set #Create the path strings leading to folders in the data set
test_path = joinpath(path, "test"); test_path = joinpath(path, "test");
...@@ -24,6 +25,7 @@ words = ["that", "she"] ...@@ -24,6 +25,7 @@ words = ["that", "she"]
# In this case I select all the female speakers from # In this case I select all the female speakers from
# regional accent 1 that use at least on of the words in their # regional accent 1 that use at least on of the words in their
## I declared these functions because I am don't know how to use the Dataframes quesry properly *_*... ## I declared these functions because I am don't know how to use the Dataframes quesry properly *_*...
using DataFramesMeta
in_words(df_words) = !isempty(intersect(Set(df_words),Set(words))) in_words(df_words) = !isempty(intersect(Set(df_words),Set(words)))
in_dialect(df_dialect) = df_dialect target_dialects in_dialect(df_dialect) = df_dialect target_dialects
in_gender(df_gender) = occursin(df_gender, target_gender) in_gender(df_gender) = occursin(df_gender, target_gender)
...@@ -40,10 +42,11 @@ all_ft, all_n, words_t, phones_t = SpikeTimit.mix_inputs(;durations=durations, s ...@@ -40,10 +42,11 @@ all_ft, all_n, words_t, phones_t = SpikeTimit.mix_inputs(;durations=durations, s
SpikeTimit.convert_to_dt(words_t, 0.1) SpikeTimit.convert_to_dt(words_t, 0.1)
SpikeTimit.convert_to_dt(phones_t, 0.1) SpikeTimit.convert_to_dt(phones_t, 0.1)
all_ft = SpikeTimit.convert_to_dt(all_ft, 0.1) all_ft = SpikeTimit.convert_to_dt(all_ft, 0.1)
##
words_savepoints = SpikeTimit.get_savepoints(trans= words_t, n_measure=10) words_savepoints = SpikeTimit.get_savepoints(trans= words_t, n_measure=10)
ph_savepoints, ll = SpikeTimit.get_savepoints(trans= phones_t, n_measure=10) ph_savepoints = SpikeTimit.get_savepoints(trans= phones_t, n_measure=10)
phones_t
## Comparing the last firing time, the duration of all words and the ## Comparing the last firing time, the duration of all words and the
......
...@@ -10,15 +10,15 @@ import os ...@@ -10,15 +10,15 @@ import os
sys.path.insert(0, os.getcwd()) sys.path.insert(0, os.getcwd())
print(sys.path) print(sys.path)
""" """
pwd()
TIMIT = pyimport("TIMIT_loader") TIMIT = pyimport("TIMIT_loader")
pyimport("importlib")."reload"(TIMIT) pyimport("importlib")."reload"(TIMIT)
path = "C:\\Users\\leoni\\Desktop\\3rd_year_AI\\1_Thesis\\litwin-kumar_model_thesis\\Spike TIMIT" #path = "C:\\Users\\leoni\\Desktop\\3rd_year_AI\\1_Thesis\\litwin-kumar_model_thesis\\Spike TIMIT"
path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
dataset = TIMIT.create_dataset(joinpath(path,"train")) dataset = TIMIT.create_dataset(joinpath(path,"train"))
spkrinfo, spkrsent = TIMIT.create_spkrdata(path) spkrinfo, spkrsent = TIMIT.create_spkrdata(path)
# dataset |> Pandas.DataFrame |> DataFrames.DataFrame # dataset |> Pandas.DataFrame |> DataFrames.DataFrame
## ##
......
...@@ -81,7 +81,7 @@ module SpikeTimit ...@@ -81,7 +81,7 @@ module SpikeTimit
# # , allowcomments=true, commentmark='%') # # , allowcomments=true, commentmark='%')
function get_dialect(root) function get_dialect(root)
return split(root,"\\") |> x->parse(Int,filter(startswith("dr"),x)[1][end]) return splitpath(root) |> x->parse(Int,filter(startswith("dr"),x)[1][end])
end end
function create_dataset(;dir) function create_dataset(;dir)
...@@ -89,7 +89,7 @@ module SpikeTimit ...@@ -89,7 +89,7 @@ module SpikeTimit
for (root, dirs, files) in walkdir(dir) for (root, dirs, files) in walkdir(dir)
for file in files for file in files
if endswith(file,"wav") if endswith(file,"wav")
speaker = split(root, "\\")[end] speaker = splitpath(root)[end]
senID = split(file,".")[1] senID = split(file,".")[1]
words = get_words(root, senID) words = get_words(root, senID)
phones = get_phones(root, senID) phones = get_phones(root, senID)
...@@ -185,7 +185,7 @@ module SpikeTimit ...@@ -185,7 +185,7 @@ module SpikeTimit
- spikes is an array with inputs in the form Vectr - spikes is an array with inputs in the form Vectr
- durations is the duration in seconds of each encoding - durations is the duration in seconds of each encoding
""" """
function stack_spiketimes(spikes, durations, silence_time::Float64) function stack_spiketimes(spikes::Vector{Spiketimes}, durations::Vector{Float64}, silence_time::Float64)
# for the memory allocation # for the memory allocation
nr_unique_fts = 0 nr_unique_fts = 0
for spike_times in spikes for spike_times in spikes
...@@ -202,12 +202,13 @@ module SpikeTimit ...@@ -202,12 +202,13 @@ module SpikeTimit
#shift time for each neuron: #shift time for each neuron:
sorted .+= global_time sorted .+= global_time
## put them together ## put them together
lower_bound = filled_indices + 1 lower_bound = filled_indices + 1
filled_indices += size(sorted,1) filled_indices += size(sorted,1)
all_ft[lower_bound:filled_indices] = sorted all_ft[lower_bound:filled_indices] = sorted
all_neurons[lower_bound:filled_indices] = neurons all_neurons[lower_bound:filled_indices] = neurons
global_time += dd global_time += dd
global_time += silence_time global_time += silence_time
...@@ -380,17 +381,17 @@ module SpikeTimit ...@@ -380,17 +381,17 @@ module SpikeTimit
function select_inputs(; df, words, samples=10, n_feat = 7) function select_inputs(; df, words, samples=10, n_feat = 7)
all_spikes = [] all_spikes = Vector{Spiketimes}()
all_durations = [] all_durations = Vector{Float64}()
all_labels = [] all_labels = []
for (i, word) in enumerate(words) for (i, word) in enumerate(words)
df_word = find_word(word=word, df=df) df_word = find_word(word=word, df=df)
n_occurences = size(df_word,1) n_occurences = size(df_word,1)
#@show word, n_occurences #@show word, n_occurences
#randomly order the number of occurences to sample #randomly order the number of occurences to sample
if samples <= n_occurences if samples <= n_occurences
inds = randperm(n_occurences)[1:samples] inds = randperm(n_occurences)[1:samples]
else else
message = string("WARNING: for word: '", word, "', samples per word (", samples, ") exceeds the number of occurences (", n_occurences, ")") message = string("WARNING: for word: '", word, "', samples per word (", samples, ") exceeds the number of occurences (", n_occurences, ")")
...@@ -414,6 +415,7 @@ module SpikeTimit ...@@ -414,6 +415,7 @@ module SpikeTimit
function mix_inputs(;durations, spikes, labels, repetitions, silence_time) function mix_inputs(;durations, spikes, labels, repetitions, silence_time)
ids = shuffle(repeat(1:length(durations), repetitions)) ids = shuffle(repeat(1:length(durations), repetitions))
all_ft, all_n = stack_spiketimes(spikes[ids], durations[ids], silence_time) all_ft, all_n = stack_spiketimes(spikes[ids], durations[ids], silence_time)
words_t, phones_t = SpikeTimit.stack_labels(labels[ids],durations[ids],silence_time) words_t, phones_t = SpikeTimit.stack_labels(labels[ids],durations[ids],silence_time)
return all_ft, all_n, words_t, phones_t return all_ft, all_n, words_t, phones_t
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment