#!/usr/bin/perl -W use strict; use IO::Zlib; #if( $#ARGV!=3 ) { # warn "incorrect number of arguments"; # exit(1); #} my $ind_file = $ARGV[0]; #my $output_path = $ARGV[1]; #my $voc_file = $ARGV[2]; # jvd: open either compressed output streams or regular txt files #open(FILE,"| gzip >$filename") or die "couldn't open $filename: $!"; #local ( *WRD, *IND, *VOC ); local ( *IND ); if( $ind_file=~/\.gz$/ ) { tie(*IND, 'IO::Zlib', $ind_file, "rb") or die "couldn't open $ind_file: $!"; } else { open(IND, "<$ind_file") or die "couldn't open $ind_file: $!"; } my $member_file = '/tmp/member1234.tmp'; my $id_file = '/tmp/id1234.tmp'; my $topics_file = '/tmp/top1234.tmp'; my $tfd_file = '/tmp/tfd1234.tmp'; my $docs_file = '/tmp/docs1234.tmp'; my $doc_path = './numericFiles'; open(TMP1,">$member_file") or die "couldn't open $member_file: $!"; open(TMP2,">$id_file") or die "couldn't open $id_file: $!"; open(TMP3,">$topics_file") or die "couldn't open $topics_file: $!"; open(TMP4,">$tfd_file") or die "couldn't open $tfd_file: $!"; open(TMP5,">$docs_file") or die "couldn't open $docs_file: $!"; my %topics; my $topics_cnt = 0; my $row_cnt = 0; while(){ chomp; my @row = split("\t",$_); ++$row_cnt; #print join(">\t<",@row) . "\n";next; print TMP2 "$row[0]\n"; open(DOC,"<$doc_path/$row[0]") or die "couldn't open $doc_path/$row[0]: $!"; my $doc = ; close(DOC); print TMP5 $doc; chomp $doc; my %tfd; map {++$tfd{$_}} split(" ",$doc); foreach my $word (sort keys %tfd) { print TMP4 "$row_cnt\t$word\t$tfd{$word}\n"; } foreach my $top (@row[1..$#row]){ my $index; if( exists $topics{$top} ) { $index = $topics{$top}; } else { $index = ++$topics_cnt; $topics{$top} = $topics_cnt; print TMP3 "$top\n"; } print TMP1 "$row_cnt\t$index\t1\n"; } } close(TMP1); close(TMP2); close(TMP3); close(TMP4); close(TMP5); #open(MATLAB,"|-","/usr/local/bin/matlab -nodesktop -nosplash") open(MATLAB,"|-","/usr/local/bin/matlab -nodesktop -nosplash >/dev/null") or die "couldn't open matlab: $!"; print MATLAB <