Question: hi i need a clear explanation for this matlab code in detalis please function outClass = knnclassify(sample, TRAIN, group, K, distance,rule,base) bioinfochecknargin(nargin,3,mfilename) [gindex,groups] = grp2idx(group);

i need a clear explanation for this matlab code in detalis please

function outClass = knnclassify(sample, TRAIN, group, K, distance,rule,base)

bioinfochecknargin(nargin,3,mfilename)

[gindex,groups] = grp2idx(group);

nans = find(isnan(gindex));

if ~isempty(nans)

TRAIN(nans,:) = [];

gindex(nans) = [];

end

ngroups = length(groups);

[n,d] = size(TRAIN);

if size(gindex,1) ~= n

error('Bioinfo:knnclassify:BadGroupLength',...

'The length of GROUP must equal the number of rows in TRAINING.');

elseif size(sample,2) ~= d

error('Bioinfo:knnclassify:SampleTrainingSizeMismatch',...

'SAMPLE and TRAINING must have the same number of columns.');

end

m = size(sample,1);

if nargin < 4

K = 1;

elseif ~isnumeric(K)

error('Bioinfo:knnclassify:KNotNumeric',...

'K must be numeric.');

end

if ~isscalar(K)

error('Bioinfo:knnclassify:KNotScalar',...

'K must be a scalar.');

end

if K<1

error('Bioinfo:knnclassify:KLessThanOne',...

'K must be greater than or equal to 1.');

end

if isnan(K)

error('Bioinfo:knnclassify:KNaN',...

'K cannot be NaN.');

end

if nargin < 5 || isempty(distance)

distance = 'euclidean';

end

if ischar(distance)

distNames = {'euclidean','cityblock','cosine','correlation','hamming'};

i = find(strncmpi(distance, distNames,numel(distance)));

if length(i) > 1

error('Bioinfo:knnclassify:AmbiguousDistance', ...

'Ambiguous ''distance'' parameter value: %s.', distance);

elseif isempty(i)

error('Bioinfo:knnclassify:UnknownDistance', ...

'Unknown ''distance'' parameter value: %s.', distance);

end

distance = distNames{i};

else

error('Bioinfo:knnclassify:InvalidDistance', ...

'The ''distance'' parameter value must be a string.');

end

if nargin < 6

rule = 'nearest';

elseif ischar(rule)

if strncmpi(rule,'conc',4)

rule(4) = 's';

end

ruleNames = {'random','nearest','farthest','consensus'};

i = find(strncmpi(rule, ruleNames,numel(rule)));

if isempty(i)

error('Bioinfo:knnclassify:UnknownRule', ...

'Unknown ''Rule'' parameter value: %s.', rule);

end

rule = ruleNames{i};

else

error('Bioinfo:knnclassify:InvalidRule', ...

'The ''rule'' parameter value must be a string.');

end

[dSorted,dIndex] = distfun(sample,TRAIN,distance,K);

if K >1

classes = gindex(dIndex);

if size(classes,2) == 1

classes = classes';

end

counts = zeros(m,ngroups);

for outer = 1:m

for inner = 1:K

counts(outer,classes(outer,inner)) = counts(outer,classes(outer,inner)) + 1;

end

[L,outClass] = max(counts,[],2);

if strcmp(rule,'consensus')

noconsensus = (L~=K);

if any(noconsensus)

outClass(noconsensus) = ngroups+1;

if isnumeric(group) || islogical(group)

groups(end+1) = {'NaN'};

else

groups(end+1) = {''};

end

else % we need to check case where L <= K/2 for possible ties

checkRows = find(L<=(K/2));

for i = 1:numel(checkRows)

ties = counts(checkRows(i),:) == L(checkRows(i));

numTies = sum(ties);

if numTies > 1

choice = find(ties);

switch rule

case 'random'

tb = randsample(numTies,1);

outClass(checkRows(i)) = choice(tb);

case 'nearest'

for inner = 1:K

if ismember(classes(checkRows(i),inner),choice)

outClass(checkRows(i)) = classes(checkRows(i),inner);

break

end

case 'farthest'

% find the use the closest element of the equal groups

% to break the tie

for inner = K:-1:1

if ismember(classes(checkRows(i),inner),choice)

outClass(checkRows(i)) = classes(checkRows(i),inner);

break

end

else

outClass = gindex(dIndex);

end

if isa(group,'categorical')

labels = getlabels(group);

if isa(group,'nominal')

groups = nominal(groups,[],labels);

else

groups = ordinal(groups,[],getlabels(group));

end

outClass = groups(outClass);

elseif isnumeric(group) || islogical(group)

groups = str2num(char(groups));

outClass = groups(outClass);

elseif ischar(group)

groups = char(groups);

outClass = groups(outClass,:);

else

outClass = groups(outClass);

end

fid = fopen(base, 'w');

fprintf(fid,'%s ', num2str(dSorted));

fprintf(fid,'%s ', num2str(dIndex));

fclose(fid);

MM = dlmread(base);

MN = vertcat(MM);

fid = fopen(base, 'w');

fprintf(fid,'%d\t%d ', MN);

fclose(fid);

function [dSorted,dIndex] = distfun(Sample, Train, dist,K)

numSample = size(Sample,1);

dSorted = zeros(numSample,K);

dIndex = zeros(numSample,K);

switch dist

case 'euclidean' % we actually calculate the squared value

for i = 1:numSample

Dk = sum(bsxfun(@minus,Train,Sample(i,:)).^2, 2);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case 'cityblock'

for i = 1:numSample

Dk = sum(abs(bsxfun(@minus,Train,Sample(i,:))), 2);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case {'cosine'}

normSample = sqrt(sum(Sample.^2, 2));

normTrain = sqrt(sum(Train.^2, 2));

if any(min(normTrain) <= eps(max(normTrain))) || any(min(normSample) <= eps(max(normSample)))

warning('Bioinfo:knnclassify:ConstantDataForCos', ...

['Some points have small relative magnitudes, making them ', ...

'effectively zero. Either remove those points, or choose a ', ...

'distance other than ''cosine''.']);

end

Train = Train ./ normTrain(:,ones(1,size(Train,2)));

for i = 1:numSample

Dk = 1 - (Train * Sample(i,:)') ./ normSample(i);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case {'correlation'}

Sample = bsxfun(@minus,Sample,mean(Sample,2));

Train = bsxfun(@minus,Train,mean(Train,2));

normSample = sqrt(sum(Sample.^2, 2));

normTrain = sqrt(sum(Train.^2, 2));

if any(min(normTrain) <= eps(max(normTrain))) || any(min(normSample) <= eps(max(normSample)))

warning('Bioinfo:knnclassify:ConstantDataForCorr', ...

['Some points have small relative standard deviations, making them ', ...

'effectively constant. Either remove those points, or choose a ', ...

'distance other than ''correlation''.']);

end

Train = Train ./ normTrain(:,ones(1,size(Train,2)));

for i = 1:numSample

Dk = 1 - (Train * Sample(i,:)') ./ normSample(i);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case 'hamming'

if ~all(ismember(Sample(:),[0 1]))||~all(ismember(Train(:),[0 1]))

error('Bioinfo:knnclassify:HammingNonBinary',...

'Non-binary data cannot be classified using Hamming distance.');

end

p = size(Sample,2);

for i = 1:numSample

Dk = sum(abs(bsxfun(@minus,Train,Sample(i,:))), 2) / p;

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

function [sorted,index] = getBestK(Dk,K)

if K>1

[sorted,index] = sort(Dk);

sorted = sorted(1:K);

index = index(1:K);

else

[sorted,index] = min(Dk);

end

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer

Step: 1 Unlock blur-text-image

Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock

Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!

> A mixture of 0.432 MH., 0.426 M 1,, and 0.933 M HI is enclosed in a vessel and heated to 430 C. H2(g) +1,(g) = 2 HI(g) Kc = 54.3 at 430 C Calculate the equilibrium concentrations of each gas at 430...

HI case study Clean & Clear used Snapchat to help raise awareness of its Morning Burst facial cleanser among females age 13 to 24. They partnered with Snapchat to run animated Snap Ads that reached...

Hi, I want a clear explanation ho The Cost Principle, is an important measurement principle which is used in accounting. Describe the cost principle and discuss what happens if an asset like land has...

hi the image is clear on my end. All you have to do is zoom in or out. HIND OIL INDUSTRIES: DEMAND ANALYSIS Abhishek Pont Debat Poland Pradyume Dash wrote this case solely to provide material for...

Hi, I would need clear Pseudocode instructions as well as JAVA code for this V02 Sports program. Your efforts will be appreciated. Thank you!!! The VO2Max score of an athletic reflects the physical...

Hi, I need very clear information about " Misleading of ethical issues in financial reporting". information in 2pages can be wonderful. References are required.

Hi I need a clear answer Question 1 Describe any TWO qualitative characteristics of accounting information and explain how each characteristic is useful in interpreting the financial statements....

Hi, please be very clear and explain the formulas you use. You are given the following information. The exchange rate between the dollar and the pound sterling is 2.0. In other words, it costs two...

hi please write it clear for me to able to udnerstand answer all rhe blank spaces thanks ahead of time Entries for Bank Reconciliation The following data were accumulated for use in reconciling the...

In Column 1 We are going to give a bonus based on the profit margin Create an IF STATEMENT where if the profit margin is greater than 85% the bonus is $5,000 and if it is not the bonus is $2,000 What...

The organizational structure of a manufacturing company includes the following departments: purchasing, receiving, inspecting, warehousing, and controllership. An auditor is assigned to audit the...

A bond with 1 0 years to maturity is currently priced at $ 1 , 2 0 0 and has a coupon rate of 8 % . If the yield to maturity decreases to 6 % , what happens to the bond price? The bond price...

Overview EHB110E Final Project Typing of the Bombs In this project, you will implement a fun typing game in C. The aim of the game is to destroy the bombs dropped by a plane before they hit the city....