addpath './helper/' workdir = './matfiles/'; load('/noback/lowbow/corpora/reuters/matFiles/docs.mat'); load('/noback/lowbow/corpora/reuters/matFiles/voc.mat'); load('/noback/lowbow/corpora/reuters/matFiles/tf.mat'); % jvd: a misnomer! load('/noback/lowbow/corpora/reuters/matFiles/member.mat'); [doc voc tc member infrequent nulldoc wordHistThresh] = shrinkByInfrequent(docs, voc, tf, member, 7000); % switching to docs-as-cols tc = tc'; member = member'; save([workdir 'doc.7k.mat'],'doc'); save([workdir 'voc.7k.mat'],'voc'); save([workdir 'tc.7k.mat'],'tc'); save([workdir 'member.7k.mat'],'member'); save([workdir 'infrequent.7k.mat'],'infrequent'); save([workdir 'nulldoc.7k.mat'],'nulldoc'); doclen = sum(tc,1); save([workdir 'doclen.7k.mat'],'doclen'); wordcnt = sum(tc,2); save([workdir 'wordcnt.7k.mat'],'wordcnt'); tf = normalizeRows(tc')'; save([workdir 'tf.7k.mat'],'tf'); fprintf('wordHistThresh = %f\n',wordHistThresh);