Internet Explorer® recommended

Phoneme Graphical Representations

The code was made with the objective of generating several graphical representations of phonemes and words. The goal is to obtain the graphics with no wasting time. For that, a script file was written by hand (files.txt), which has the information about each audiofile (word), the phoneme represented and the sample locations of the start and end of the phoneme in the recorded word. Besides, you can download all of the audiofiles here.

files.txt

azucar s 3631 3959
bebe b 5390 5803
bueno ue 4913 5275
cafe f 6168 6537
carro a 1949 2329
casa k 591 927
causa au 2342 2690
chico + 1553 1890
cine s 2291 2620
ciudad iu 4965 5415
criada ia 4554 4967
cuarto ua 2102 2541
cuota uo 1505 1874
deuda eu 4926 5268
dia d 2584 3282
excelente ks 3349 3814
fruta + 1680 2232
general x 284 621
hay ai 1934 2331
hijo x 4907 5299
hoy oi 3151 3522
jugo g 3811 4279
kilo i 2258 2594
kiosko k 9542 9931
leche l 1463 1905
llama + 2548 3060
lo_humano ou 4691 5113
mesa m 1453 1937
Mexico x 6476 6812
mucho u 4483 4857
nada n 1230 1686
niño + 8846 9246
odio io 6313 6758
papa p 626 996
perro r 4491 6297
quien k 604 1002
seis ei 5861 6267
señor o 8500 8874
taza t 502 812
tiene ie 1787 2150

MATLAB code

Input Args:

[filename_array, phonemename_array, ph_start_array, ph_end_array]=textread('files.txt','%s %s %u %u','delimiter',' ');

for vueltas=1:45

filename_0 = filename_array(vueltas);

phonemename = phonemename_array(vueltas);

ph_start = ph_start_array(vueltas);

ph_end = ph_end_array(vueltas);

filename = strcat(char(filename_0),'.wav');

filename = strcat('outputs\',filename);

x = wavread(char(filename));

fs = 16000;

n = 1:length(x);

%Plotting the entire word:---------------

fig1 = figure(1);

set(fig1, 'Color','w');

subplot(4,1,1);

plot(n / fs,x);

axis([1/fs length(x)/fs -1 1]);

title1 = regexprep('/phone/- file', 'phone', phonemename);

title1 = regexprep(title1, 'file', filename_0);

title(title1);

ylabel('x(t)');

xlabel('time(seconds)');

%Plotting the phoneme (in time):-------------------

phoneme_length = ph_end - ph_start + 1;

subplot(4,2,7);

m = (ph_start:ph_end)/fs;

plot(m,x(ph_start:ph_end)');

title(regexprep('Time waveform for phoneme /phonemename/', 'phonemename', phonemename));

ylabel('s(t)');

xlabel('time(seconds)');

v = axis;

axis([ph_start/fs ph_end/fs v(3) v(4)]);

%PLotting Spectrograms:-------------------

subplot(4,1,2);

N = 512;

specgram(x,N,fs,hamming(N),round(0.97*N));

title({'Narrowband'; 'spectrogram'}, 'Position',[length(x)/fs * 1.04 , 4000]);

subplot(4,1,3);

specgram(x,N,fs,hamming(N/4),round(0.9*N/4));

title({'Wideband'; 'spectrogram'}, 'Position',[length(x)/fs * 1.06 , 4000]);

%Plotting phoneme magnitude spectrum:--------------------------

subplot(4,2,8);

w = Hamming(length(x));

x_wind = w .* x;

ph_DFT = fft(x_wind(ph_start:ph_end), 4096);

M = length(ph_DFT);

ph_DFT_Mag = abs(ph_DFT);

l = 1:M/2;

plot(l .* (fs /M),20*log10(ph_DFT_Mag(1:M/2)./M));

regexprep('Magnitude spectrum of phoneme /phonemename/', 'phonemename',phonemename)

title(regexprep('Magnitude spectrum of phoneme /phonemename/', 'phonemename',phonemename));

ylabel('|S(f)| (dB)');

xlabel('frequency(Hz)');

%Plotting 3D--------------------------------------------------

fig2 = figure(2);

set(fig2, 'Color','w');

[S,F,T,P] = spectrogram(x,hamming(N),round(0.97*N),N,fs);

surf(T,F,10*log10(abs(P)),'EdgeColor','none');

axis tight;

%set(gca,'Xscale','log');

%set(gca,'XDir','reverse');

set(gca,'YDir','reverse');

ylabel('Freq(Hz)');

xlabel('Time(sec)');

zlabel('Power Density (dB)');

title(regexprep('Power Spectral Density - filename','filename',filename_0));

%Saving figures automatically:----------------------------------------

saveas(fig1,strcat('bitmaps\',char(filename_0),'_'),'jpg');

saveas(fig2,regexprep('bitmaps\file3D', 'file', filename_0),'jpg');

end

Telephone Channel

MATLAB code

filename_0 = 'excelente'; %filename of the audio file to be analyzed.

filename = strcat(filename_0,'.wav');

filename = strcat('outputs\',filename);

x = wavread(filename);

fl = 300; %low cutoff freq.

fh = 3500; %high cutoff freq.

fs = 16000;

overlap = 75; % overlap percentage

w_length = 512; % Lw = window length (5)

N = 1024; %FFT length

fl_bin = round(fl * N /fs);

fh_bin = round(fh * N /fs);

N_DFT = N - w_length;

x_length = length(x); %Lx = length of the whole input signal

w = hamming(w_length);

shift_length = ceil(((100 - overlap)/100) * w_length);

p_times = ceil((x_length - w_length)/ shift_length);

w_times = p_times + 1;

new_x_length = p_times * shift_length + w_length;

x(x_length + 1:new_x_length) = zeros(1,new_x_length - x_length);

for i=1:w_times

w_start = ((i - 1) * shift_length) + 1;

mat(:,i) = x(w_start:w_start + w_length - 1);

mat(:,i) = mat(:,i) .* w(:,1);

end

mat_defi(1:N_DFT/2,1:w_times) = zeros(1:N_DFT/2,1:w_times); %appending zeros before.

mat_defi((N_DFT/2)+1:(N_DFT/2)+ w_length,1:w_times) = mat(1:w_length,1:w_times); %copying the nonzero values.

mat_defi(w_length+(N_DFT/2)+1:N_DFT + w_length,1:w_times) = zeros(1:N_DFT/2,1:w_times); %appending zeros after.

mat_defi_DFT = fft(mat_defi,N);

mat_defi_mag = abs(mat_defi_DFT);

mat_defi_phase = angle(mat_defi_DFT);

%Filtering------------------------

mat_defi_mag(1:fl_bin,:) = zeros(fl_bin,w_times); %zeros at the beggining

mat_defi_mag(N - fl_bin + 2:N,:) = zeros(fl_bin - 1,w_times); %zeros at the end (mirror)

mat_defi_mag(fh_bin:(N - fh_bin + 2),:) = zeros(N + 3 - 2*fh_bin,w_times);

[OUT_Re, OUT_Imag] = pol2cart(mat_defi_phase,mat_defi_mag);

OUT_DFT = OUT_Re + j * OUT_Imag;

%Resynthesis:-----------------------------------------------------

out_mat_1 = ifft(OUT_DFT,N)';

new_window_size = N_DFT + w_length;

out_mat_2 = zeros(w_times, (shift_length * p_times) + new_window_size);

for m = 1:w_times

out_mat_2(m,(1:new_window_size) + ((m - 1) * shift_length)) = out_mat_1(m,:);

end

out = sum(out_mat_2);

out = out ./ max(out);

%Plots:-------------------------------------------------------------

figure('Color','w');

%Plotting channel response:

OUT_mag = abs(fft(out));

length_DFT_OUT = (length(OUT_mag));

X_mag = abs(fft(x,length_DFT_OUT ));

r = 1:(length(OUT_mag)/2);

X_log = 20*log10(X_mag(1:(length(OUT_mag)/2))/length(OUT_mag));

plot(r.*(fs /length(OUT_mag)),X_log,'r');

hold on;

OUT_log = 20*log10(OUT_mag(1:(length(OUT_mag)/2))/length(OUT_mag));

plot(r.*(fs /length(OUT_mag)),OUT_log);

axis([1 fs/2 -120 -30]);

title('Spoken Sentence Magnitud Spectrum');

ylabel('Magnitude(dB)');

xlabel('Frequency(Hz)');

legend('original signal','filtered signal');

%Saving wavfiles:

wavwrite(out,16000,strcat('outputs\',filename_0,'_filt'));

Unvoiced Fricative Synthesis

MATLAB code

fs = 16000;

time = 1; %time in seconds

%Input arguments:-------------------------------------------

%Formant1:

BW1 = 300;

fc1 = 2605;

G_dB1 = 1; %Gain in dB

%Formant2:

BW2 = 500;

fc2 = 3300;

G_dB2 = 6; %Gain in dB

%Formant3:

BW3 = 300;

fc3 = 4031;

G_dB3 = 3; %Gain in dB

%Formant4:

BW4 = 2000;

fc4 = 7500;

G_dB4 = 15; %Gain in dB

%Generator:------------------------------------------------------------

x = rand(time * fs,1);

x = (2*x - 1);

x1 = res_filter(x,fc1,BW1,G_dB1,fs);

x2 = res_filter(x1,fc2,BW2,G_dB2,fs);

x3 = res_filter(x2,fc3,BW3,G_dB3,fs);

x4 = res_filter(x3,fc4,BW4,G_dB4,fs);

x1 = x4;

%Getting coeffcients:---------------------------------------

N = 2048;

X1_mag = abs(fft(x1,N));

X_mag = abs(fft(x,N));

k = 1:N;

plot(k(1:N/2) * fs / N,20*log(X1_mag(1:N/2)/N));

grid on;

xlabel('Frequency (Hz)');

ylabel('Magnitude of the synthesized phoneme (dB)');

axis([0 fs/2 -96 -24]);

x_defi = x1/max(abs(x1));

wavwrite(x_defi,fs,'sh_filtered');

Function res_filter

function x1 = res_filter(x,fc,BW,G_dB,fs)

%Getting coeffcients:---------------------------------------

G = 10^(G_dB/20);

%BW = 2*fc - 4*fc /((2^BW_oct)+1);

C = -exp(-2*pi*BW/fs);

B = 2*exp(-2*pi*BW/fs) * cos(2*pi*fc/fs);

A = 1 - B - C;

%Filtering:------------------------------------------------------------

x1 = filter([A 0 0],[1 -B -C],x);

x1 = x1/max(abs(x1)); %normalizing.

x1 = G * x1;

Time-Varying Tube Synthesis

fs = 44100; %Sampling rate. Any other can be used.

total_time = 4.999999; %in seconds. In 5 seconds it blowes up.

total_samples = floor(fs * total_time);

x = 2* rand(1,total_samples) - 1; %Noise generator

%Filtering:--------------------------------------------------------------

y1 = movingResFilter(1,x,fs);

y2 = movingResFilter(2,x,fs);

y3 = movingResFilter(3,x,fs);

y4 = movingResFilter(4,x,fs);

y5 = movingResFilter(5,x,fs);

y6 = movingResFilter(6,x,fs);

y7 = movingResFilter(7,x,fs);

%Plots:-------------------------------------------

yn = y1 + y2 + y3 + y4 + y5 + y6 + y7;

yn = yn ./ max(abs(yn));

wavwrite(yn,fs,'output')

wavwrite(x,fs,'input')

Function movingResFilter

Q = 20; %Changeable

alpha = 2E-3;

betha = 4E-4;

yn_2 = 0;

yn_1 = 0;

yn = zeros(1,length(x)); %Allocating to improve speed !

for i=1:length(x);

t = i/fs;

%fc = (k/(alpha - betha * t)) * (1 + 0.018 * sin(2*pi*2.5*t)); %For vibrating !

fc = (k/(alpha - betha * t)) ;

BW = fc/Q;

C = -exp(-2*pi*BW/fs);

B = 2*exp(-2*pi*BW/fs) * cos(2*pi*fc/fs);

A = 1 - B - C;

if (fc < (fs/2)) %Antialiasing protection

yn(i) = A * x(i) + B * yn_1 + C * yn_2;

else

yn(i) = 0;

end

yn_2 = yn_1;

yn_1 = yn(i);

end

Spanish Phonemes

Phoneme Graphical Representations

files.txt

MATLAB code

Telephone Channel

MATLAB code

Unvoiced Fricative Synthesis

MATLAB code

Function res_filter

Time-Varying Tube Synthesis

Function movingResFilter