Question: hi i need a clear explanation for this matlab code in detalis please function outClass = knnclassify(sample, TRAIN, group, K, distance,rule,base) bioinfochecknargin(nargin,3,mfilename) [gindex,groups] = grp2idx(group);

hi

i need a clear explanation for this matlab code in detalis please

function outClass = knnclassify(sample, TRAIN, group, K, distance,rule,base)

bioinfochecknargin(nargin,3,mfilename)

[gindex,groups] = grp2idx(group);

nans = find(isnan(gindex));

if ~isempty(nans)

TRAIN(nans,:) = [];

gindex(nans) = [];

end

ngroups = length(groups);

[n,d] = size(TRAIN);

if size(gindex,1) ~= n

error('Bioinfo:knnclassify:BadGroupLength',...

'The length of GROUP must equal the number of rows in TRAINING.');

elseif size(sample,2) ~= d

error('Bioinfo:knnclassify:SampleTrainingSizeMismatch',...

'SAMPLE and TRAINING must have the same number of columns.');

end

m = size(sample,1);

if nargin < 4

K = 1;

elseif ~isnumeric(K)

error('Bioinfo:knnclassify:KNotNumeric',...

'K must be numeric.');

end

if ~isscalar(K)

error('Bioinfo:knnclassify:KNotScalar',...

'K must be a scalar.');

end

if K<1

error('Bioinfo:knnclassify:KLessThanOne',...

'K must be greater than or equal to 1.');

end

if isnan(K)

error('Bioinfo:knnclassify:KNaN',...

'K cannot be NaN.');

end

if nargin < 5 || isempty(distance)

distance = 'euclidean';

end

if ischar(distance)

distNames = {'euclidean','cityblock','cosine','correlation','hamming'};

i = find(strncmpi(distance, distNames,numel(distance)));

if length(i) > 1

error('Bioinfo:knnclassify:AmbiguousDistance', ...

'Ambiguous ''distance'' parameter value: %s.', distance);

elseif isempty(i)

error('Bioinfo:knnclassify:UnknownDistance', ...

'Unknown ''distance'' parameter value: %s.', distance);

end

distance = distNames{i};

else

error('Bioinfo:knnclassify:InvalidDistance', ...

'The ''distance'' parameter value must be a string.');

end

if nargin < 6

rule = 'nearest';

elseif ischar(rule)

if strncmpi(rule,'conc',4)

rule(4) = 's';

end

ruleNames = {'random','nearest','farthest','consensus'};

i = find(strncmpi(rule, ruleNames,numel(rule)));

if isempty(i)

error('Bioinfo:knnclassify:UnknownRule', ...

'Unknown ''Rule'' parameter value: %s.', rule);

end

rule = ruleNames{i};

else

error('Bioinfo:knnclassify:InvalidRule', ...

'The ''rule'' parameter value must be a string.');

end

[dSorted,dIndex] = distfun(sample,TRAIN,distance,K);

if K >1

classes = gindex(dIndex);

if size(classes,2) == 1

classes = classes';

end

counts = zeros(m,ngroups);

for outer = 1:m

for inner = 1:K

counts(outer,classes(outer,inner)) = counts(outer,classes(outer,inner)) + 1;

end

end

[L,outClass] = max(counts,[],2);

if strcmp(rule,'consensus')

noconsensus = (L~=K);

if any(noconsensus)

outClass(noconsensus) = ngroups+1;

if isnumeric(group) || islogical(group)

groups(end+1) = {'NaN'};

else

groups(end+1) = {''};

end

end

else % we need to check case where L <= K/2 for possible ties

checkRows = find(L<=(K/2));

for i = 1:numel(checkRows)

ties = counts(checkRows(i),:) == L(checkRows(i));

numTies = sum(ties);

if numTies > 1

choice = find(ties);

switch rule

case 'random'

tb = randsample(numTies,1);

outClass(checkRows(i)) = choice(tb);

case 'nearest'

for inner = 1:K

if ismember(classes(checkRows(i),inner),choice)

outClass(checkRows(i)) = classes(checkRows(i),inner);

break

end

end

case 'farthest'

% find the use the closest element of the equal groups

% to break the tie

for inner = K:-1:1

if ismember(classes(checkRows(i),inner),choice)

outClass(checkRows(i)) = classes(checkRows(i),inner);

break

end

end

end

end

end

end

else

outClass = gindex(dIndex);

end

if isa(group,'categorical')

labels = getlabels(group);

if isa(group,'nominal')

groups = nominal(groups,[],labels);

else

groups = ordinal(groups,[],getlabels(group));

end

outClass = groups(outClass);

elseif isnumeric(group) || islogical(group)

groups = str2num(char(groups));

outClass = groups(outClass);

elseif ischar(group)

groups = char(groups);

outClass = groups(outClass,:);

else

outClass = groups(outClass);

end

fid = fopen(base, 'w');

fprintf(fid,'%s ', num2str(dSorted));

fprintf(fid,'%s ', num2str(dIndex));

fclose(fid);

MM = dlmread(base);

MN = vertcat(MM);

fid = fopen(base, 'w');

fprintf(fid,'%d\t%d ', MN);

fclose(fid);

function [dSorted,dIndex] = distfun(Sample, Train, dist,K)

numSample = size(Sample,1);

dSorted = zeros(numSample,K);

dIndex = zeros(numSample,K);

switch dist

case 'euclidean' % we actually calculate the squared value

for i = 1:numSample

Dk = sum(bsxfun(@minus,Train,Sample(i,:)).^2, 2);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case 'cityblock'

for i = 1:numSample

Dk = sum(abs(bsxfun(@minus,Train,Sample(i,:))), 2);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case {'cosine'}

normSample = sqrt(sum(Sample.^2, 2));

normTrain = sqrt(sum(Train.^2, 2));

if any(min(normTrain) <= eps(max(normTrain))) || any(min(normSample) <= eps(max(normSample)))

warning('Bioinfo:knnclassify:ConstantDataForCos', ...

['Some points have small relative magnitudes, making them ', ...

'effectively zero. Either remove those points, or choose a ', ...

'distance other than ''cosine''.']);

end

Train = Train ./ normTrain(:,ones(1,size(Train,2)));

for i = 1:numSample

Dk = 1 - (Train * Sample(i,:)') ./ normSample(i);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case {'correlation'}

Sample = bsxfun(@minus,Sample,mean(Sample,2));

Train = bsxfun(@minus,Train,mean(Train,2));

normSample = sqrt(sum(Sample.^2, 2));

normTrain = sqrt(sum(Train.^2, 2));

if any(min(normTrain) <= eps(max(normTrain))) || any(min(normSample) <= eps(max(normSample)))

warning('Bioinfo:knnclassify:ConstantDataForCorr', ...

['Some points have small relative standard deviations, making them ', ...

'effectively constant. Either remove those points, or choose a ', ...

'distance other than ''correlation''.']);

end

Train = Train ./ normTrain(:,ones(1,size(Train,2)));

for i = 1:numSample

Dk = 1 - (Train * Sample(i,:)') ./ normSample(i);

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

case 'hamming'

if ~all(ismember(Sample(:),[0 1]))||~all(ismember(Train(:),[0 1]))

error('Bioinfo:knnclassify:HammingNonBinary',...

'Non-binary data cannot be classified using Hamming distance.');

end

p = size(Sample,2);

for i = 1:numSample

Dk = sum(abs(bsxfun(@minus,Train,Sample(i,:))), 2) / p;

[dSorted(i,:),dIndex(i,:)] = getBestK(Dk,K);

end

end

function [sorted,index] = getBestK(Dk,K)

if K>1

[sorted,index] = sort(Dk);

sorted = sorted(1:K);

index = index(1:K);

else

[sorted,index] = min(Dk);

end

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Databases Questions!