Commit f9aa43ea authored by Alessio Quaresima's avatar Alessio Quaresima
Browse files

SpikeTimit merged version

parent ae2c0e91
include("src/SpikeTimit.jl")
include("../src/SpikeTimit.jl")
path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
# path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
path = "/home/alequa/Documents/Research/phd_project/speech/Spike TIMIT/"
#Create the path strings leading to folders in the data set
test_path = joinpath(path, "test");
......@@ -24,6 +25,7 @@ words = ["that", "she"]
# In this case I select all the female speakers from
# regional accent 1 that use at least on of the words in their
## I declared these functions because I am don't know how to use the Dataframes quesry properly *_*...
using DataFramesMeta
in_words(df_words) = !isempty(intersect(Set(df_words),Set(words)))
in_dialect(df_dialect) = df_dialect target_dialects
in_gender(df_gender) = occursin(df_gender, target_gender)
......@@ -40,10 +42,11 @@ all_ft, all_n, words_t, phones_t = SpikeTimit.mix_inputs(;durations=durations, s
SpikeTimit.convert_to_dt(words_t, 0.1)
SpikeTimit.convert_to_dt(phones_t, 0.1)
all_ft = SpikeTimit.convert_to_dt(all_ft, 0.1)
##
words_savepoints = SpikeTimit.get_savepoints(trans= words_t, n_measure=10)
ph_savepoints, ll = SpikeTimit.get_savepoints(trans= phones_t, n_measure=10)
ph_savepoints = SpikeTimit.get_savepoints(trans= phones_t, n_measure=10)
phones_t
## Comparing the last firing time, the duration of all words and the
......
......@@ -3,7 +3,7 @@ using DataFrames
using DataFramesMeta
using Pandas
cd(@__DIR__)
cd(joinpath(@__DIR__,".."))
py"""
import sys
import os
......@@ -13,14 +13,16 @@ print(sys.path)
TIMIT = pyimport("TIMIT_loader")
pyimport("importlib")."reload"(TIMIT)
#path = "C:\\Users\\leoni\\Desktop\\3rd_year_AI\\1_Thesis\\litwin-kumar_model_thesis\\Spike TIMIT"
path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
dataset = TIMIT.create_dataset(joinpath(path,"train"))
spkrinfo, spkrsent = TIMIT.create_spkrdata(path)
# dataset |> Pandas.DataFrame |> DataFrames.DataFrame
##
include("src/SpikeTimit.jl")
include("../src/SpikeTimit.jl")
#Create the path strings leading to folders in the data set
test_path = joinpath(path, "test");
......
......@@ -81,7 +81,7 @@ module SpikeTimit
# # , allowcomments=true, commentmark='%')
function get_dialect(root)
return split(root,"/") |> x->parse(Int,filter(startswith("dr"),x)[1][end])
return splitpath(root) |> x->parse(Int,filter(startswith("dr"),x)[1][end])
end
function create_dataset(;dir)
......@@ -89,14 +89,14 @@ module SpikeTimit
for (root, dirs, files) in walkdir(dir)
for file in files
if endswith(file,"wav")
speaker = split(root, "/")[end]
speaker = splitpath(root)[end]
senID = split(file,".")[1]
words = get_words(root, senID)
phones = get_phones(root, senID)
dr = get_dialect(root)
gender = speaker[1]
dr = get_dialect(root)
gender = speaker[1]
sentence = String.(words[:,1])
push!(df,(speaker,senID,dr,gender,joinpath(root,senID),words,phones, sentence))
push!(df,(speaker,senID,dr,gender,joinpath(root,senID),words,phones,sentence))
end
end
end
......@@ -201,9 +201,9 @@ module SpikeTimit
sorted, neurons = sort_spikes(inverse_dictionary(spike_times))
#shift time for each neuron:
sorted .+= global_time
## put them together
lower_bound = filled_indices +1
## put them together
lower_bound = filled_indices + 1
filled_indices += size(sorted,1)
all_ft[lower_bound:filled_indices] = sorted
all_neurons[lower_bound:filled_indices] = neurons
......@@ -234,21 +234,6 @@ module SpikeTimit
return words_transcripts,phones_transcripts
end
#="""
Return the input of the spike trains corrsponding to the rows, in the (spike-time -> neuron ) format
"""
function inputs_from_df(df, rows)
_df = df[rows,:]
## Get the duration of each frame: it corresponds to the (last) symbol h# in the phones array
durations = _df.phones |> phones->map(phone->phone[end], phones)
spikes = SpikeTimit.get_spiketimes(df=_df)
all_ft, all_n = stack_spiketimes(spikes, durations)
@assert(size(all_ft) == size(all_n))
return all_ft, all_n, durations
end
=#
##########################
## Get words and phonemes
##########################
......@@ -314,7 +299,7 @@ module SpikeTimit
push!(word_phones, ph)
end
end
push!(all_phones, Word(String(my_word[1]), word_phones,t1-t0, t0, t1))
push!(all_phones, Word(String(my_word[1]), word_phones,t1-t0,t0,t1))
end
end
push!(df_phones, all_phones)
......@@ -402,7 +387,7 @@ module SpikeTimit
for (i, word) in enumerate(words)
df_word = find_word(word=word, df=df)
n_occurences = size(df_word,1)
@show n_occurences
#@show word, n_occurences
#randomly order the number of occurences to sample
if samples <= n_occurences
......@@ -415,7 +400,7 @@ module SpikeTimit
spiketimes, durations = get_spikes_in_word(; df=df_word[inds,:], word)
spiketimes = resample_spikes(spiketimes=spiketimes, n_feat=n_feat)
labels = vcat(get_word_labels(;df=df_word[inds,:], word=word)...)
@show length(labels), length(spiketimes)
#@show length(labels), length(spiketimes)
@assert(length(spiketimes) == length(labels))
......@@ -436,17 +421,14 @@ module SpikeTimit
end
function get_savepoints(;trans::Transcription, n_measure::Int)
measures = []
labels = []
for s in eachindex(trans.steps)
step = trans.steps[s]
sign = trans.sign[s]
measures = Array{Int64,2}(undef, size(trans.steps,1), n_measure)
for (i,step) in enumerate(trans.steps)
l = step[2] - step[1]
l_single = floor(Int, l/n_measure)
push!(measures,step[1] .+ collect(1:n_measure).* l_single)
push!(labels, repeat([sign], n_measure))
measures[i,1:n_measure] = (step[1] .+ collect(1:n_measure).* l_single)
# push!(measures,step[1] .+ collect(1:n_measure).* l_single)
end
return measures, labels
return measures
end
"""
......@@ -477,10 +459,10 @@ module SpikeTimit
function _resample_spikes(;spiketimes::Spiketimes, n_feat)
# If we don't reduce the bins
if n_feat == 1
return spike_times
return spiketimes
elseif n_feat > 11 || n_feat < 1
prinln("WARNING; you are crazy, returning original spike_times")
return spike_times
println("WARNING; you are crazy, returning original spike_times")
return spiketimes
end
FREQUENCIES = 20
......@@ -533,13 +515,13 @@ module SpikeTimit
end
function transform_into_bursts(all_ft, all_neurons)
function transform_into_bursts(all_ft, all_neurons; spikes_per_burst_increase=0)
new_all_ft = []
new_all_neurons = []
expdist = Exponential(5)
for (i, time) in enumerate(all_ft)
# determine X (amount of spikes in burst) -> bias dice
values = [2,3,4,5,6]
values = [2,3,4,5,6] .+ spikes_per_burst_increase
weights = [0.8, 0.15, 0.075, 0.035, 0.03] # based on plot 1B 0.7 nA (Oswald, Doiron & Maler (2007))
weights = weights ./ sum(weights) # normalized weights
number_of_spikes = sample(values, Weights(weights)) - 1 # -1 because first spike is determined from data
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment