Commit a3aab63d authored by Alessio Quaresima's avatar Alessio Quaresima
Browse files

Revert "filters in SpikeTimit"

This reverts commit 91032140.
parent 91032140
......@@ -73,7 +73,7 @@ def count_dataset(path, dtype="train"):
yield 1
def get_dialect(path):
return int(path.split("/")[-3][-1])
return int(path.split("\\")[-3][-1])
# | > x->parse(Int, filter(startswith("dr"), x)[1][end])
......@@ -228,6 +228,7 @@ def get_spectra(dataframe, target_words=[], cqt_p=BAE):
paths = dataframe.path
if isinstance(dataframe.path,str):
paths = [dataframe.path]
print(paths)
for my_path in paths:
oscillogram, sr = librosa.load(my_path + ".wav")
......@@ -244,6 +245,7 @@ def get_spectra(dataframe, target_words=[], cqt_p=BAE):
duration = len(oscillogram) / sr
osc_sr = len(oscillogram) / duration
db_sr = cqt.shape[1] / duration
print(final_time/duration, TRANSCRIPT_SR)
# %%
words, word_times = [], []
......@@ -360,3 +362,4 @@ def get_spectra(dataframe, target_words=[], cqt_p=BAE):
# def first_speaker_id(sentence_number):
# return get_speakers_id(sentence_number)[1]
using PyCall
using DataFrames
using DataFramesMeta
using Pandas
cd(joinpath(@__DIR__,".."))
py"""
import sys
import os
sys.path.insert(0, os.getcwd())
print(sys.path)
"""
TIMIT = pyimport("TIMIT_loader")
pyimport("importlib")."reload"(TIMIT)
##
#path = "C:\\Users\\leoni\\Desktop\\3rd_year_AI\\1_Thesis\\litwin-kumar_model_thesis\\Spike TIMIT"
path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
dataset = TIMIT.create_dataset(joinpath(path,"train"))
spkrinfo, spkrsent = TIMIT.create_spkrdata(path)
##
include("../src/SpikeTimit.jl")
#Create the path strings leading to folders in the data set
test_path = joinpath(path, "test");
train_path = joinpath(path, "train");
dict_path = joinpath(path, "DOC/TIMITDIC.TXT");
train = SpikeTimit.create_dataset(;dir= train_path)
test = SpikeTimit.create_dataset(;dir= test_path)
dict = SpikeTimit.create_dictionary(file=dict_path)
##
in_words(df_words; target) = !isempty(intersect(Set(df_words),Set(target)))
in_dialect(df_dialect; target) = df_dialect target
in_gender(df_gender; target) = occursin(df_gender, target)
const NYC = 6
const SOUTH = 5
const male = "m"
const female = "f"
##
words = ["that", "had", "she", "me", "your", "all", "like", "don't", "year", "water", "dark", "rag", "oily", "wash", "ask", "carry", "suit"]
# nyc_male = @where(train,in_dialect.(:dialect, target=NYC), in_gender.(:gender, target=male), in_words.(:words, target=words))
# nyc_female = @where(train,in_dialect.(:dialect, target=NYC), in_gender.(:gender, target=female), in_words.(:words, target=words))
# south_male = @where(train,in_dialect.(:dialect, target=SOUTH), in_gender.(:gender, target=male), in_words.(:words, target=words))
# south_female = @where(train,in_dialect.(:dialect, target=SOUTH), in_gender.(:gender, target=female), in_words.(:words, target=words))
##
##
##
# words[1].phones[1].db
using StatsBase
using Plots
function merge_phones(phone1, phone2)
ph = Phone(phone1.ph*phone2.ph,phone1.t0, phone2.t1 )
ph.osc = vcat(phone1.osc, phone2.osc)
ph.db = vcat(phone1.db, phone2.db)
return ph
end
function get_all_phones(dataset, words)
all_phones = Dict()
isa(words, String) && (words = [words])
for word in words
matches = SpikeTimit.find_word(;df=dataset, word=word) |> x-> TIMIT.get_spectra(x |> Pandas.DataFrame, target_words=word)
for word in SpikeTimit.py2j_words(matches)
# println("\n")
for phone in word.phones
# print(phone.ph," ")
if haskey(all_phones,phone.ph)
push!(all_phones[phone.ph], phone)
else
push!(all_phones,phone.ph=>[phone])
end
end
end
end
return all_phones
end
collect(keys(all_phones))
function padding(phone; pad_length=400, pad_zero = -80)
# mat = zeros(20, pad_length)
mat = ones(20, pad_length) .* (-80)
if size(phone.db)[2]> 400
mat[:,1:400] .= phone.db[:,1:400]
else
mat[:,1:size(phone.db)[2]] .= phone.db[:,1:end]
end
return mat
end
function get_padded_features(all_phones)
phones = Vector{String}(undef, length(all_phones))
cases = Vector{Int}(undef, length(all_phones))
features = Vector{Matrix{Float64}}(undef, length(all_phones))
phones_labels = collect(keys(all_phones))
Threads.@threads for idx in 1:length(phones_labels)
ph = phones_labels[idx]
samples = length(all_phones[ph])
phones[idx]= ph
cases[idx] = samples
data_phone = zeros(400*20, samples)
for n in 1:samples
data_phone[:, n] .= padding(all_phones[ph][n])[:]
end
features[idx] = data_phone
end
return phones, cases, features
end
function compare_sounds(samples1, samples2)
all_phones = []
push!(all_phones, samples1[1]...)
push!(all_phones, samples2[1]...)
all_phones = collect(Set(all_phones))
measures = zeros(length(all_phones),2)
for (n,ph) in enumerate(all_phones)
ex1 = findfirst(x->x==all_phones[n],samples1[1])
ex2 = findfirst(x->x==all_phones[n],samples2[1])
measures[n,1] = isnothing(ex1) ? 1 : samples1[2][ex1]
measures[n,2] = isnothing(ex2) ? 1 : samples2[2][ex2]
end
return 0.5*(kldivergence(measures[:,1], measures[:,2]) + kldivergence(measures[:,2], measures[:,1]))
# return measures
end
# crossentropy(dr1, dr2)
##
males=[]
females=[]
for dr in 1:8
push!(males,@where(train,in_dialect.(:dialect, target=dr), in_gender.(:gender, target=male), in_words.(:words, target=words)) |> x->get_padded_features(get_all_phones(x, words)))
push!(females,@where(train,in_dialect.(:dialect, target=dr), in_gender.(:gender, target=female), in_words.(:words, target=words)) |> x->get_padded_features(get_all_phones(x, words)))
end
##
nyc_male_data= get_padded_features(get_all_phones(nyc_male,words))
nyc_female_data= get_padded_features(get_all_phones(nyc_female,words))
south_male_data = get_padded_features(get_all_phones(south_male,words))
south_female_data = get_padded_features(get_all_phones(south_female,words))
##
# measures = compare_sounds(nyc_male_data, south_male_data)
# measures = compare_sounds(nyc_male_data, nyc_female_data)
data = [nyc_male_data, nyc_female_data, south_male_data, south_female_data]
compare_sounds(south_female_data, south_male_data)
compare_sounds(south_female_data, south_female_data)
compare_sounds(nyc_male_data, south_female_data)
south_male_data
south_female_data
nyc_female_data
nyc_male_data
entropy = zeros(4,4)
for x in 1:4
for y in 1:4
entropy[x,y] = compare_sounds(data[x], data[y])
end
end
heatmap(entropy, yflip=true)
##
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[1])
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[2])
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[3])
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[4])
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[5])
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[6])
heatmap(map(x->reshape(x, 20,400), mean.(nyc_male_data[3], dims=2))[35])
##
padding(all_phones["t"][1])
p = Plots.plot()
for phone in all_phones
if length(phone[2]) > 5
p = histogram!(collect(map(r-> size(r.db)[2], phone[2])), bins=0:20:400, label=phone[1], alpha=0.5)
end
# plot!(title=phone[1])
# push!(plots,p)
end
# map(r-> size(r.db)[2],all_phones["ae"])
# Plots.plot(plots...)
# all_phones
p
##
heatmap(padding(all_phones["ae"][1]))
heatmap(padding(all_phones["ae"][2]))
heatmap(padding(all_phones["ae"][3]))
##
function rate_coding_word(word::SpikeTimit.Word)
times = []
encoding = Matrix{Float64}(undef, 20, length(word.phones))
for (n,ph) in enumerate(word.phones)
encoding[:,n] = mean(ph.db, dims=2)[:,1]
push!(times, ph.t0 - word.t0)
end
return times, encoding
end
using Plots
times, phs = rate_coding_word(words[1])
a = heatmap(words[1].phones[1].db)
b = heatmap(words[1].phones[2].db)
c = heatmap(words[1].phones[3].db)
words[1].word
Plots.plot(a,b,c, layout=(1,3), colorbar=false, axes=nothing, ticks=nothing)
times, phs = rate_coding_word(words[9])
heatmap(phs)
words[1].phones[1].ph
......@@ -79,9 +79,6 @@ module SpikeTimit
# using DataFrames
# DataFrames.readtable(dir)
# # , allowcomments=true, commentmark='%')
in_words(df_words; target) = !isempty(intersect(Set(df_words),Set(target)))
in_dialect(df_dialect; target) = df_dialect target
in_gender(df_gender; target) = occursin(df_gender, target)
function get_dialect(root)
return splitpath(root) |> x->parse(Int,filter(startswith("dr"),x)[1][end])
......@@ -608,18 +605,4 @@ module SpikeTimit
return ax
end
"""
Convert python classes in julia
"""
function py2j_words(words)
jwords = []
for word in words
phs = []
for ph in word.phones
push!(phs,SpikeTimit.Phone(ph.ph, ph.t0, ph.t1, Array{Float64}(ph.osc), Matrix{Float64}(ph.db)))
end
push!(jwords,SpikeTimit.Word(word.word, phs, word.duration, word.t0, word.t1))
end
return jwords
end
end
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment