
ajmcello
Novice
May 8, 2010, 9:57 PM
Post #1 of 1
(1073 views)
|
Sort program runs but is slow
|
Can't Post
|
|
What it does: runs through a file, sorts, and splits it alphabetically into files with words that are equal or less than 200. In this example I'm using the dict file, and I use it several times, so excuse the redunancy since I didn't want to include the actual source files. It works, but it takes a very long time. Can someone help me get it working faster? Thanks much. Gzip'd file: http://www.mediafire.com/download.php?zykjlzmktjn Code:
#!/usr/bin/perl -w use Text::CSV; use File::Copy; use File::stat; use POSIX qw(strftime); my $green_dir = "tmp/list/green"; my $file_green = "tmp/all_green.txt"; my $blue_dir = "tmp/list/blue"; my $file_blue = "tmp/all_blue.txt"; my $green_blue_dir = "tmp/list/green_blue"; my $file_green_blue = "tmp/all_green_blue.txt"; my $allelse_dir = "tmp/list/allelse"; my $file_allelse = "tmp/allelse.txt"; my $all_dir = "tmp/list/all"; my $file_all = "tmp/all.txt"; my $max = 200; $cnt = 0; $cnt2 = 2; sub rem_green { $buf = "rm -f $green_dir/*"; system($buf); } sub rem_blue { $buf = "rm -f $blue_dir/*"; system($buf); } sub rem_green_blue { $buf = "rm -f $green_blue_dir/*"; system($buf); } sub rem_allelse { $buf = "rm -f $allelse_dir/*"; system($buf); } sub rem_all { $buf = "rm -f $all_dir/*"; system($buf); } @files = ( 'A', 'C', 'B', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ); @fl = ( 'A', 'C', 'B', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ); if ( $ARGV[0] eq "-o" ) { $file_name = $file_blue; $file_dir = $blue_dir; rem_blue(); } if ( $ARGV[0] eq "-p" ) { $file_name = $file_green; $file_dir = $green_dir; rem_green(); } if ( $ARGV[0] eq "-po" ) { $file_name = $file_green_blue; $file_dir = $green_blue_dir; rem_green_blue(); } if ( $ARGV[0] eq "-a" ) { $file_name = $file_all; $file_dir = $all_dir; rem_all(); } if ( $ARGV[0] eq "-ae" ) { $file_name = $file_allelse; $file_dir = $allelse_dir; rem_allelse(); } $files_cnt = 0; $fl_cnt = 0; foreach (@files) { $file = $_; $file .= "_1.txt"; chomp($file); unlink("$file_dir/$file"); open( IN, "<", "$file_name" ); while (<IN>) { $word = $_; chomp($word); foreach (@fl) { $fl = $_; chomp($fl); if ( $word =~ /^[$fl]/ && $file =~ /^[$fl]/ ) { if ( $cnt == $max ) { $file =~ s/_.*//; $file .= "_$cnt2.txt"; $cnt2++; $cnt = 0; unlink("$file_dir/$file"); } print "$file_dir $file\n"; open( OUT, ">>", "$file_dir/$file" ) or die $!; print OUT "$word\n"; close(OUT); $cnt++; $fl_cnt++; } $fl_cnt = 0; } $files_cnt++; } $files_cnt = 0; $cnt = 0; $cnt2 = 2; } close(IN);
|