Студопедия
Случайная страница | ТОМ-1 | ТОМ-2 | ТОМ-3
АрхитектураБиологияГеографияДругоеИностранные языки
ИнформатикаИсторияКультураЛитератураМатематика
МедицинаМеханикаОбразованиеОхрана трудаПедагогика
ПолитикаПравоПрограммированиеПсихологияРелигия
СоциологияСпортСтроительствоФизикаФилософия
ФинансыХимияЭкологияЭкономикаЭлектроника

Листинг myVAD.m



Читайте также:
  1. Листинг 11.2. Структура данных игрока.
  2. Листинг 11.3. Функции полета мухи.
  3. Листинг 11.4. Муравейник (ANTS.С).
  4. Листинг 11.5. Падение мяча (BALL.C).
  5. Листинг 11.6. Идеальный газ (GAS.C).
  6. Листинг 12.2. Шпионим за часами (SPY.C)._________________
  7. Листинг 12.3. Автономное управление светом.

 

function trimmedX = myVAD(x)

 

% Syntax: trimmedSample = myVAD(samplex);

% This function accepts an audio sample 'samplex' as input and returns a

% trimmed down version with non-speech sections trimmed off. Also known as

% voice activity detection, it utilises the algorithm due to Rabiner &

% Sambur (1975)

 

Ini = 0.1; % Initial silence duration in seconds

Ts = 0.01; % Frame width in seconds

Tsh = 0.005; % Frame shift in seconds

Fs = 16000; % Sampling Frequency

counter1 = 0;

counter2 = 0;

counter3 = 0;

counter4 = 0;

ZCRCountf = 0; % Stores forward count of crossing rate > IZCT

ZCRCountb = 0; % As above, for backward count

ZTh = 40; % Zero crossing comparison rate for threshold

w_sam = fix(Ts*Fs); % No of Samples/window

o_sam = fix(Tsh*Fs); % No of samples/overlap

lengthX = length(x);

segs = fix((lengthX-w_sam)/o_sam)+1; % Number of segments in speech signal

sil = fix((Ini-Ts)/Tsh)+1; % Number of segments in silent period

win = hamming(w_sam);

Limit = o_sam*(segs-1)+1; % Start index of last segment

 

FrmIndex = 1:o_sam:Limit; % Vector containing starting index for each segment

ZCR_Vector = zeros(1,segs); % Vector to hold zero crossing rate for all segments

 

% Below code computes and returns zero crossing rates for all segments in

% speech sample

for t = 1:segs

ZCRCounter = 0;

nextIndex = (t-1)*o_sam+1;

for r = nextIndex+1:(nextIndex+w_sam-1)

if (x(r) >= 0) && (x(r-1) >= 0)

 

elseif (x(r) >= 0) && (x(r-1) < 0)

ZCRCounter = ZCRCounter + 1;

elseif (x(r) < 0) && (x(r-1) < 0)

elseif (x(r) < 0) && (x(r-1) >= 0)

ZCRCounter = ZCRCounter + 1;

end

end

ZCR_Vector(t) = ZCRCounter;

end

% Below code computes and returns frame energy for all segments in speech

% sample

Erg_Vector = zeros(1,segs);

for u = 1:segs

nextIndex = (u-1)*o_sam+1;

Energy = x(nextIndex:nextIndex+w_sam-1).*win;

Erg_Vector(u) = sum(abs(Energy));

end

 

IMN = mean(Erg_Vector(1:sil)); % Mean silence energy (noise energy)

IMX = max(Erg_Vector); % Maximum energy for entire utterance

I1 = 0.03 * (IMX-IMN) + IMN; % I1 & I2 are Initial thresholds

I2 = 4 * IMN;

ITL = min(I1,I2); % Lower energy threshold

ITU = 5 * ITL; % Upper energy threshold

IZC = mean(ZCR_Vector(1:sil)); % mean zero crossing rate for silence region

stdev = std(ZCR_Vector(1:sil)); % standard deviation of crossing rate for

% silence region

IZCT = min(ZTh,IZC+2*stdev); % Zero crossing rate threshold

indexi = zeros(1,lengthX); % Four single-row vectors are created

indexj = indexi; % in these lines to facilitate computation below

indexk = indexi;

indexl = indexi;

 

% Search forward for frame with energy greater than ITU

for i = 1:length(Erg_Vector)

if (Erg_Vector(i) > ITU)

counter1 = counter1 + 1;

indexi(counter1) = i;

end

end

ITUs = indexi(1);

% Search further forward for frame with energy greater than ITL

for j = ITUs:-1:1

if (Erg_Vector(j) < ITL)

counter2 = counter2 + 1;

indexj(counter2) = j;

end

end

start = indexj(1)+1;

 

Erg_Vectorf = fliplr(Erg_Vector);% Flips round the energy vector

% Search forward for frame with energy greater than ITU

% This is equivalent to searching backward from last sample for energy > ITU

for k = 1:length(Erg_Vectorf)

if (Erg_Vectorf(k) > ITU)

counter3 = counter3 + 1;

indexk(counter3) = k;

end

end

ITUf = indexk(1);

% Search further forward for frame with energy greater than ITL

for l = ITUf:-1:1

if (Erg_Vectorf(l) < ITL)

counter4 = counter4 + 1;

indexl(counter4) = l;

end

end

 

finish = length(Erg_Vector)-indexl(1)+1;% Tentative finish index

 

% Search back from start index for crossing rates higher than IZCT

 

BackSearch = min(start,25);

for m = start:-1:start-BackSearch+1

rate = ZCR_Vector(m);

if rate > IZCT

ZCRCountb = ZCRCountb + 1;

realstart = m;

end

end

if ZCRCountb > 3

start = realstart; % If IZCT is exceeded in more than 3 frames

% set start to last index where IZCT is exceeded

end

 

% Search forward from finish index for crossing rates higher than IZCT

FwdSearch = min(length(Erg_Vector)-finish,25);

for n = finish+1:finish+FwdSearch

rate = ZCR_Vector(n);

if rate > IZCT

ZCRCountf = ZCRCountf + 1;

realfinish = n;

end

end

if ZCRCountf > 3

finish = realfinish; % If IZCT is exceeded in more than 3 frames

% set finish to last index where IZCT is exceeded

end

 

x_start = FrmIndex(start); % actual sample index for frame 'start'

x_finish = FrmIndex(finish-1); % actual sample index for frame 'finish'

trimmedX = x(x_start:x_finish); %T rim speech sample by start and finish indices

 


Дата добавления: 2015-07-11; просмотров: 44 | Нарушение авторских прав






mybiblioteka.su - 2015-2024 год. (0.008 сек.)