addpath('./helper/'); workdir = './matfiles/'; heat_savedir = [workdir 'heat_kernels_q/']; acc_savedir = [workdir 'acc/']; %% --- Prep For CV ------------------------------------------------------------- %maxNumCachedPerClass = 4000; %numFolds = 20; %numTrainPerClass = 250; %numTestPerClass = 100; %c1labels = {'M11\d*','M12\d*','M13\d*','M14\d*'}; %load([workdir 'topics.mat']); %load([workdir 'member.7k.mat']); % %% class1 will have label '1' while class2 will have label '-1' %[ids trn_samps tst_samps trn_zs tst_zs] = prepOneVsRest( c1labels, topics, member, ... % maxNumCachedPerClass, numFolds, numTrainPerClass, numTestPerClass ); % %clear topics member; %% save all variables here %save([workdir 'OneVsRest.7k.mat']); load([workdir 'OneVsRest.7k.mat']); %% --- Initialization ---------------------------------------------------------- load([workdir 'tf.7k.mat']); load([workdir 'doclen.7k.mat']); % a row vector equiv to sum(tc,1) tf = tf(:,ids); % prune unused documents (cols) doclen = doclen(ids); % prune unused documents (cols) %% remove unused words (rows) %% this is not necessary since it would only affect the transD2 computation, %% which never divides by the doc-freq of words %inactivewords = find(~any(tf,2)); %tf(inactivewords,:) = []; % (rows) %tc(inactivewords,:) = []; % (rows) filelist = dir(heat_savedir); ptrn = 'Hc(\d+)t(\d+)'; trbfgamma = [.125 .25 .5 1 2]; % --- Loop through paramters to test translation kernel ----------------------- fprintf('CV RBF Kernel under translation\n'); ii = 0; for fln = {filelist.name} file = char(fln); match = regexp(file,ptrn,'tokens'); % jvd: by re-checking file existence, we can control execution by removing, % but not adding kernels if numel(match)==0 | ~exist([heat_savedir file],'file') continue; end ii = ii+1; fprintf('%s\n',file); c = str2num(match{1}{1})/100; t = str2num(match{1}{2})/100; load([heat_savedir file]); params(:,ii) = [c;t]; % consolidating % --- Optional Feature selection (on T) ----------------------------------- % H(inactivewords,inactivewords) = []; % not needed, see above % jvd: make sure normalize last! T = normalizeRows(H); % cache the expected dists^2 using only relevant docs (pruned above) and % words the occur [ED2 A] = transD2tf(tf,doclen,T); % --- RBF Kernel ---------------------------------------------------------- for jj=1:length(trbfgamma) fprintf('\ttrbfgamma = %f\n',trbfgamma(jj)); % no need for specfic flags, except maybe: "-c 15.0" or something TAcc = svmCVOneVsRest( trn_samps, tst_samps, trn_zs, tst_zs, ... tf, numFolds, '', 'tranrbf', ED2, A, trbfgamma(jj) ); tacc(:,jj,ii) = TAcc; end end % --- off the shelf RBF kernel ------------------------------------------------ fprintf('CV RBF Kernel\n'); rbfgamma = [.05 .08 .125 .25 .375 .5 .625 .75 .875 1 2]; for ii=1:length(rbfgamma) fprintf('\trbfgamma = %f\n',rbfgamma(ii)); % jvd: TODO: fix svmCVOneVsRest so that it takes a gamma for stndrbf TAcc = svmCVOneVsRest( trn_samps, tst_samps, trn_zs, tst_zs, ... tf, numFolds, '', 'stndrbf', [], [], rbfgamma(ii) ); acc(:,ii) = Acc; end clear_except tacc acc trbfgamma rbfgamma params workdir; save([workdir 'acc_svm_rbf_q.7k.mat']);