-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdemo_1_USPEC.m
104 lines (90 loc) · 3.91 KB
/
demo_1_USPEC.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is a demo for the U-SPEC algorithm, which is proposed in the %
% following paper: %
% %
% D. Huang, C.-D. Wang, J.-S. Wu, J.-H. Lai, and C.-K. Kwoh. %
% "Ultra-Scalable Spectral Clustering and Ensemble Clustering." %
% IEEE Transactions on Knowledge and Data Engineering, 2020. %
% DOI: https://doi.org/10.1109/TKDE.2019.2903410 %
% %
% The code has been tested in Matlab R2016a and Matlab R2016b. %
% Website: https://www.researchgate.net/publication/330760669 %
% Written by Huang Dong. ([email protected]) %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function demo_1_USPEC()
%% Run the USPEC algorithm multiple times and show its average performance.
clear all;
close all;
%% Load the data.
% Please uncomment the dataset that you want to use and comment the other ones.
% dataName = 'PenDigits'; % Real datasets
% dataName = 'USPS';
% dataName = 'Letters';
% dataName = 'MNIST';
% dataName = 'Covertype';
dataName = 'TB1M'; % Synthetic datasets
% dataName = 'SF2M';
% dataName = 'CC5M';
% dataName = 'CG10M';
% dataName = 'Flower20M';
% Load the dataset.
dataNameFull = ['data_',dataName,'.mat'];
if ~exist(dataNameFull)
if strcmp(dataName,'TB1M') || strcmp(dataName,'SF2M') || strcmp(dataName,'CC5M') || strcmp(dataName,'CG10M') || strcmp(dataName,'Flower20M')
synthesizeLargescaleDatasets(dataName);
pause(0.01); % Pause for a while to plot the figure of the synthesized dataset.
else
disp('The dataset doesn''t exist!');
return;
end
end
gt = [];
fea = [];
load(['data_',dataName,'.mat'],'fea','gt');
[N, d] = size(fea);
%% Set up
k = numel(unique(gt)); % The number of clusters
cntTimes = 20; % The number of times that the USPEC algorithm will be run.
%% Run USPEC
nmiScores = zeros(cntTimes,1);
disp('.');
disp(['N = ',num2str(N)]);
disp('.');
for runIdx = 1:cntTimes
disp('**************************************************************');
disp(['Run ', num2str(runIdx),':']);
disp('**************************************************************');
disp('.');
disp('Performing U-SPEC ...');
disp('.');
% You can use the default parameters (p=1000, KNN=5, distance = 'euclidean')
tic;
Label = USPEC(fea, k);
toc;
% Or you can set up parameters by yourself.
% tic;
% distance = 'euclidean';
% p = 1000; % Number of representatives
% KNN = 5; % Number of nearest neighbors
% Label = USPEC(fea, k, distance, p, KNN);
% toc;
% If you are dealing with text datasets or other very high dimensional
% datasets, the cosine distance is suggested to be used.
% tic;
% distance = 'cosine';
% Label = USPEC(fea, k, distance);
% toc;
disp('.');
disp('--------------------------------------------------------------');
nmiScores(runIdx) = computeNMI(Label,gt);
disp(['The NMI score at Run ',num2str(runIdx), ': ',num2str(nmiScores(runIdx))]);
disp('--------------------------------------------------------------');
end
disp('**************************************************************');
disp([' ** Average Performance over ',num2str(cntTimes),' runs on the ',dataName,' dataset **']);
disp(['Sample size: N = ', num2str(N)]);
disp(['Dimension: d = ', num2str(d)]);
disp('--------------------------------------------------------------');
disp(['Average NMI score: ',num2str(mean(nmiScores))]);
disp('--------------------------------------------------------------');
disp('**************************************************************');