
Internet Explorer® recommended
Phoneme Graphical Representations
The code was made with the objective of generating several graphical representations of phonemes and words. The goal is to obtain the graphics with no wasting time. For that, a script file was written by hand (files.txt), which has the information about each audiofile (word), the phoneme represented and the sample locations of the start and end of the phoneme in the recorded word. Besides, you can download all of the audiofiles here.
files.txt
azucar s 3631 3959 bebe b 5390 5803 bueno ue 4913 5275 cafe f 6168 6537 carro a 1949 2329 casa k 591 927 causa au 2342 2690 chico + 1553 1890 cine s 2291 2620 ciudad iu 4965 5415 criada ia 4554 4967 cuarto ua 2102 2541 cuota uo 1505 1874 deuda eu 4926 5268 dia d 2584 3282 excelente ks 3349 3814 fruta + 1680 2232 general x 284 621 hay ai 1934 2331 hijo x 4907 5299 hoy oi 3151 3522 jugo g 3811 4279 kilo i 2258 2594 kiosko k 9542 9931 leche l 1463 1905 llama + 2548 3060 lo_humano ou 4691 5113 mesa m 1453 1937 Mexico x 6476 6812 mucho u 4483 4857 nada n 1230 1686 niño + 8846 9246 odio io 6313 6758 papa p 626 996 perro r 4491 6297 quien k 604 1002 seis ei 5861 6267 señor o 8500 8874 taza t 502 812 tiene ie 1787 2150 |
MATLAB code
Input Args:
[filename_array,
phonemename_array, ph_start_array, ph_end_array]=textread('files.txt','%s
%s %u %u','delimiter','
');
for vueltas=1:45
filename_0 = filename_array(vueltas);
phonemename =
phonemename_array(vueltas);
ph_start =
ph_start_array(vueltas);
ph_end = ph_end_array(vueltas);
filename = strcat(char(filename_0),'.wav');
filename = strcat('outputs\',filename);
x = wavread(char(filename));
fs = 16000;
n = 1:length(x);
%Plotting the
entire word:---------------
fig1 = figure(1);
set(fig1,
'Color','w');
subplot(4,1,1);
plot(n / fs,x);
axis([1/fs length(x)/fs
-1 1]);
title1 = regexprep('/phone/-
file',
'phone',
phonemename);
title1 =
regexprep(title1,
'file',
filename_0);
title(title1);
ylabel('x(t)');
xlabel('time(seconds)');
%Plotting the
phoneme (in time):-------------------
phoneme_length = ph_end
- ph_start + 1;
subplot(4,2,7);
m =
(ph_start:ph_end)/fs;
plot(m,x(ph_start:ph_end)');
title(regexprep('Time
waveform for phoneme /phonemename/',
'phonemename', phonemename));
ylabel('s(t)');
xlabel('time(seconds)');
v = axis;
axis([ph_start/fs
ph_end/fs
v(3) v(4)]);
%PLotting
Spectrograms:-------------------
subplot(4,1,2);
N = 512;
specgram(x,N,fs,hamming(N),round(0.97*N));
title({'Narrowband';
'spectrogram'},
'Position',[length(x)/fs
* 1.04 , 4000]);
subplot(4,1,3);
specgram(x,N,fs,hamming(N/4),round(0.9*N/4));
title({'Wideband';
'spectrogram'},
'Position',[length(x)/fs
* 1.06 , 4000]);
%Plotting phoneme
magnitude spectrum:--------------------------
subplot(4,2,8);
w = Hamming(length(x));
x_wind = w .* x;
ph_DFT =
fft(x_wind(ph_start:ph_end), 4096);
M = length(ph_DFT);
ph_DFT_Mag = abs(ph_DFT);
l = 1:M/2;
plot(l .* (fs
/M),20*log10(ph_DFT_Mag(1:M/2)./M));
regexprep('Magnitude
spectrum of phoneme /phonemename/',
'phonemename',phonemename)
title(regexprep('Magnitude
spectrum of phoneme /phonemename/',
'phonemename',phonemename));
ylabel('|S(f)|
(dB)');
xlabel('frequency(Hz)');
%Plotting
3D--------------------------------------------------
fig2 = figure(2);
set(fig2,
'Color','w');
[S,F,T,P] =
spectrogram(x,hamming(N),round(0.97*N),N,fs);
surf(T,F,10*log10(abs(P)),'EdgeColor','none');
axis
tight;
%set(gca,'Xscale','log');
%set(gca,'XDir','reverse');
set(gca,'YDir','reverse');
ylabel('Freq(Hz)');
xlabel('Time(sec)');
zlabel('Power
Density (dB)');
title(regexprep('Power
Spectral Density - filename','filename',filename_0));
%Saving figures
automatically:----------------------------------------
saveas(fig1,strcat('bitmaps\',char(filename_0),'_'),'jpg');
saveas(fig2,regexprep('bitmaps\file3D',
'file',
filename_0),'jpg');
end |
Telephone Channel
MATLAB code
filename_0 =
'excelente';
filename = strcat(filename_0,'.wav');
filename = strcat('outputs\',filename);
x = wavread(filename);
fl = 300;
%low cutoff freq.
fh = 3500;
%high cutoff freq.
fs = 16000;
overlap = 75;
% overlap percentage
w_length = 512;
% Lw = window length (5)
N = 1024;
%FFT length
fl_bin = round(fl * N /fs);
fh_bin = round(fh * N /fs);
N_DFT = N - w_length;
x_length = length(x);
%Lx = length of the whole input signal w = hamming(w_length);
shift_length = ceil(((100
- overlap)/100) * w_length);
p_times = ceil((x_length
- w_length)/ shift_length);
w_times = p_times + 1; new_x_length = p_times * shift_length + w_length;
x(x_length +
1:new_x_length) = zeros(1,new_x_length - x_length);
for i=1:w_times
w_start = ((i - 1) * shift_length) + 1;
mat(:,i)
= x(w_start:w_start + w_length - 1);
mat(:,i)
= mat(:,i) .* w(:,1);
end
mat_defi(1:N_DFT/2,1:w_times)
= zeros(1:N_DFT/2,1:w_times);
%appending zeros before.
mat_defi((N_DFT/2)+1:(N_DFT/2)+
w_length,1:w_times) = mat(1:w_length,1:w_times);
%copying the
nonzero values.
mat_defi(w_length+(N_DFT/2)+1:N_DFT
+ w_length,1:w_times) = zeros(1:N_DFT/2,1:w_times);
%appending zeros after.
mat_defi_DFT = fft(mat_defi,N);
mat_defi_mag = abs(mat_defi_DFT);
mat_defi_phase = angle(mat_defi_DFT);
%Filtering------------------------
mat_defi_mag(1:fl_bin,:)
= zeros(fl_bin,w_times);
%zeros at the beggining
mat_defi_mag(N - fl_bin
+ 2:N,:) = zeros(fl_bin - 1,w_times);
%zeros at the end (mirror)
mat_defi_mag(fh_bin:(N -
fh_bin + 2),:) = zeros(N + 3 - 2*fh_bin,w_times);
[OUT_Re, OUT_Imag] =
pol2cart(mat_defi_phase,mat_defi_mag);
OUT_DFT = OUT_Re + j *
OUT_Imag;
%Resynthesis:-----------------------------------------------------
out_mat_1 = ifft(OUT_DFT,N)';
new_window_size = N_DFT
+ w_length;
out_mat_2 = zeros(w_times,
(shift_length * p_times) + new_window_size);
for m = 1:w_times
out_mat_2(m,(1:new_window_size) + ((m - 1) * shift_length))
= out_mat_1(m,:);
end
out = sum(out_mat_2);
out = out ./ max(out);
%Plots:-------------------------------------------------------------
figure('Color','w');
%Plotting channel
response:
OUT_mag = abs(fft(out));
length_DFT_OUT = (length(OUT_mag));
X_mag = abs(fft(x,length_DFT_OUT
));
r = 1:(length(OUT_mag)/2);
X_log = 20*log10(X_mag(1:(length(OUT_mag)/2))/length(OUT_mag));
plot(r.*(fs /length(OUT_mag)),X_log,'r');
hold
on;
OUT_log = 20*log10(OUT_mag(1:(length(OUT_mag)/2))/length(OUT_mag));
plot(r.*(fs /length(OUT_mag)),OUT_log);
axis([1 fs/2 -120 -30]);
title('Spoken
Sentence Magnitud Spectrum');
ylabel('Magnitude(dB)');
xlabel('Frequency(Hz)');
legend('original
signal','filtered signal');
%Saving wavfiles:
wavwrite(out,16000,strcat('outputs\',filename_0,'_filt')); |
Unvoiced Fricative Synthesis
MATLAB code
fs = 16000;
time = 1;
%time in seconds
%Input arguments:-------------------------------------------
%Formant1:
BW1 = 300;
fc1 = 2605;
G_dB1 = 1;
%Gain in dB
%Formant2:
BW2 = 500;
fc2 = 3300;
G_dB2 = 6;
%Gain in dB
%Formant3:
BW3 = 300;
fc3 = 4031;
G_dB3 = 3;
%Gain in dB
%Formant4:
BW4 = 2000;
fc4 = 7500;
G_dB4 = 15;
%Gain in dB
%Generator:------------------------------------------------------------
x = rand(time * fs,1);
x = (2*x - 1);
x1 = res_filter(x,fc1,BW1,G_dB1,fs);
x2 = res_filter(x1,fc2,BW2,G_dB2,fs);
x3 = res_filter(x2,fc3,BW3,G_dB3,fs);
x4 = res_filter(x3,fc4,BW4,G_dB4,fs);
x1 = x4;
%Getting
coeffcients:---------------------------------------
N = 2048;
X1_mag = abs(fft(x1,N));
X_mag = abs(fft(x,N));
k = 1:N;
plot(k(1:N/2) * fs /
N,20*log(X1_mag(1:N/2)/N));
grid
on;
xlabel('Frequency
(Hz)');
ylabel('Magnitude
of the synthesized phoneme (dB)');
axis([0 fs/2 -96 -24]);
x_defi = x1/max(abs(x1));
wavwrite(x_defi,fs,'sh_filtered'); |
Function res_filter
function
x1 = res_filter(x,fc,BW,G_dB,fs)
%Getting
coeffcients:---------------------------------------
G = 10^(G_dB/20);
%BW = 2*fc - 4*fc
/((2^BW_oct)+1);
C = -exp(-2*pi*BW/fs);
B = 2*exp(-2*pi*BW/fs) *
cos(2*pi*fc/fs);
A = 1 - B - C;
%Filtering:------------------------------------------------------------
x1 = filter([A 0 0],[1
-B -C],x);
x1 = x1/max(abs(x1));
%normalizing. x1 = G * x1; |
Time-Varying Tube Synthesis
fs = 44100; %Sampling rate. Any other can be used. total_time = 4.999999; %in seconds. In 5 seconds it blowes up.
total_samples = floor(fs
* total_time);
x = 2*
rand(1,total_samples) - 1;
%Noise generator
%Filtering:--------------------------------------------------------------
y1 = movingResFilter(1,x,fs);
y2 = movingResFilter(2,x,fs);
y3 = movingResFilter(3,x,fs);
y4 = movingResFilter(4,x,fs);
y5 = movingResFilter(5,x,fs);
y6 = movingResFilter(6,x,fs);
y7 = movingResFilter(7,x,fs);
%Plots:-------------------------------------------
yn = y1 + y2 + y3 + y4 + y5 + y6 + y7;
yn = yn ./ max(abs(yn));
wavwrite(yn,fs,'output')
wavwrite(x,fs,'input') |
Function movingResFilter
Q = 20;
alpha = 2E-3;
betha = 4E-4;
yn_2 = 0;
yn_1 = 0; yn = zeros(1,length(x)); %Allocating to improve speed !
for i=1:length(x);
t =
i/fs;
%fc = (k/(alpha - betha *
t)) * (1 + 0.018 * sin(2*pi*2.5*t)); %For vibrating !
fc
= (k/(alpha - betha * t)) ;
BW
= fc/Q;
C =
-exp(-2*pi*BW/fs);
B =
2*exp(-2*pi*BW/fs) *
cos(2*pi*fc/fs);
A =
1 - B - C;
if (fc < (fs/2))
%Antialiasing protection
yn(i) = A * x(i) + B * yn_1 +
C * yn_2;
else
yn(i) = 0;
end
yn_2 = yn_1;
yn_1 = yn(i);
end |