SpikeTimit_load_spectra.jl 2.43 KB
Newer Older
1
2
3
4
5
using PyCall
using DataFrames
using DataFramesMeta
using Pandas

Alessio Quaresima's avatar
Alessio Quaresima committed
6
cd(joinpath(@__DIR__,".."))
7
8
9
10
11
12
13
14
15
py"""
import sys
import os
sys.path.insert(0, os.getcwd())
print(sys.path)
"""
TIMIT = pyimport("TIMIT_loader")
pyimport("importlib")."reload"(TIMIT)

Alessio Quaresima's avatar
Alessio Quaresima committed
16
#path = "C:\\Users\\leoni\\Desktop\\3rd_year_AI\\1_Thesis\\litwin-kumar_model_thesis\\Spike TIMIT"
17
path = "/home/cocconat/Documents/Research/phd_project/speech/litwin-kumar_model_thesis/Spike TIMIT"
Alessio Quaresima's avatar
Alessio Quaresima committed
18

19
dataset = TIMIT.create_dataset(joinpath(path,"train"))
alessio.quaresima's avatar
cleanup    
alessio.quaresima committed
20
spkrinfo, spkrsent = TIMIT.create_spkrdata(path)
21
22
23
24

# dataset |> Pandas.DataFrame |> DataFrames.DataFrame

##
Alessio Quaresima's avatar
Alessio Quaresima committed
25
include("../src/SpikeTimit.jl")
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

#Create the path strings leading to folders in the data set
test_path = joinpath(path, "test");
train_path = joinpath(path, "train");
dict_path = joinpath(path, "DOC/TIMITDIC.TXT");
train = SpikeTimit.create_dataset(;dir= train_path)
test = SpikeTimit.create_dataset(;dir= test_path)
dict = SpikeTimit.create_dictionary(file=dict_path)


##

words = ["that"]
target_dialects = [1]
target_gender = "f" # "fm" "m"
in_words(df_words) = !isempty(intersect(Set(df_words),Set(words)))
in_dialect(df_dialect) = df_dialect  target_dialects
in_gender(df_gender) = occursin(df_gender, target_gender)

# this is a DataFrameMeta macro
speaker = @where(train,in_dialect.(:dialect), in_gender.(:gender), in_words.(:words))
speaker.words
words = TIMIT.get_spectra(speaker |> Pandas.DataFrame, target_words=["that"])
##
##
words[1].phones[1].db
##
alessio.quaresima's avatar
cleanup    
alessio.quaresima committed
53
using StatsBase
54
55
56
57
58
59

function py2j_words(words)
    jwords = []
    for word in words
        phs = []
        for ph in word.phones
alessio.quaresima's avatar
cleanup    
alessio.quaresima committed
60
            push!(phs,SpikeTimit.Phone(ph.ph, ph.t0, ph.t1, Array{Float64}(ph.osc), Matrix{Float64}(ph.db)))
61
62
63
64
65
        end
        push!(jwords,SpikeTimit.Word(word.word, phs, word.duration, word.t0, word.t1))
    end
    return jwords
end
alessio.quaresima's avatar
cleanup    
alessio.quaresima committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
words =py2j_words(words)

function rate_coding_word(word::SpikeTimit.Word)
    times = []
    encoding = Matrix{Float64}(undef, 20, length(word.phones))
    for (n,ph) in enumerate(word.phones)
        encoding[:,n] = mean(ph.db, dims=2)[:,1]
        push!(times, ph.t0 - word.t0)
    end
    return times, encoding
end

using Plots
times, phs = rate_coding_word(words[1])
a = heatmap(words[1].phones[1].db)
b = heatmap(words[1].phones[2].db)
c = heatmap(words[1].phones[3].db)
words[1].word
Plots.plot(a,b,c, layout=(1,3), colorbar=false, axes=nothing, ticks=nothing)
times, phs = rate_coding_word(words[9])
heatmap(phs)
words[1].phones[1].ph