-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathcopulaClassification.m
More file actions
105 lines (88 loc) · 2.98 KB
/
copulaClassification.m
File metadata and controls
105 lines (88 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
function [ TY ] = copulaClassification( family, method, TX, X, Y )
%COPULACLASSIFICATION Classifier based on copula. Uses given copula family
%and fitting method to model each class of the input sample X. Uses MAP
%rule to classify each sample from testing set TX. Returns vector TY of
%chosen classes for each sample from TX.
% Size and dimensions of the training dataset
n = size(X, 1);
% Size of the test dataset
t = size(TX, 1);
% Obtain list of classes
K = unique(Y);
numClasses = numel(K);
% Test that classes are ordered
assert(isequal(K, sort(K)), 'Classes do not have the right order.');
% Compute prior probabilities for each class
prior = zeros(numClasses, 1);
for i=1:numClasses
prior(i) = sum(Y == K(i)) / n;
end
% Fit a copula for each class depending on the fit method
L = cell(numClasses, 1);
for i=1:numClasses
dbg('copulacls', 3, 'Fitting copulas for class %d.\n', i);
L{i} = likelihoodForClass(family, method, X(Y == i, :), TX);
end
% Compute posterior probabilities for each class.
posterior = zeros(t, numClasses);
for i=1:numClasses
posterior(:, i) = prod(L{i}, 2) * prior(i);
end
% For each sample choose class with the highest likelihood and if they are
% same use highest copula likelihood, otherwise choose randomly.
TY = zeros(t, 1);
for i=1:t
maxIndices = allmax(posterior(i, :));
if numel(maxIndices) == 1
TY(i) = maxIndices;
else
maxIndices = allmax(cellfun(@(x) x(i,1), L));
if numel(maxIndices) == 1
TY(i) = maxIndices;
else
% Choose random value
TY(i) = randi(numClasses);
end
end
end
end
function [ L ] = likelihoodForClass(family, method, X, TX)
%LIKELIHOODFORCLASS Models data of the single class using copula and
%computes the likelihood of testing sample for this class.
% Run preprocessing if required.
if ismember(family, {'claytonhac*', 'gumbelhac*', 'frankhac*'})
P = hac.preprocess( family(1:end-4), X, method );
X = X * P;
% Test data need to be preprocessed too
TX = TX * P;
family = family(1:end-1);
end
% Uniform data
if strcmp(method, 'CML')
U = pseudoObservations(X);
elseif strcmp(method, 'IFM')
margins = fitMargins(X);
U = probabilityTransform(X, margins);
else
error('Unknown method %s', method);
end
% Fit copula to uniformed data
copulaparams = copula.fit(family, U);
% Compute likelihood for estimated copula
d = size(X, 2);
t = size(TX, 1);
L = zeros(t, d+1);
if strcmp(method, 'CML')
L(:,1) = copula.pdf(copulaparams, empiricalCdf(X, TX));
L(:,2:d+1) = kernelDensity(X, TX);
elseif strcmp(method, 'IFM')
L(:,1) = copula.pdf(copulaparams, probabilityTransform(TX, margins));
L(:,2:d+1) = density(TX, margins);
else
error('Unknown method %s', method);
end
end
function [ xi ] = allmax( x )
%ALLMAX Given a vector x provides indices of all maximum values.
xi = find(x == max(x));
end