#!/usr/local/bin/perl use strict; use warnings; open (M, 'M.dumpy'); print "File M.dump opened \n"; open (F, 'F.dumpy'); print "File F.dump opened \n"; open (OUT, '>kmer_differencey'); print "Output file kmer_differences opened \n"; # Set block size my $blocksize = 1000; print "Blocksize set to $blocksize \n"; # Initialize the counters my $commontally = 0; my $common; my $lncount; my $iteration = 0; # Initialise scalars and arrays my $line; my $fkey; my $count; my $kmer; # Initialize the hash tables my %mhash; my %fhash; my %tmphash1; my %tmphash2; # Progress through the files one block at a time do { # Increment the iteration counter $iteration=$iteration+1; print "\nITERATION $iteration \n\n"; # Read in a block of records from the male dataset # and append to the male hash table $lncount = 0; foreach (1..$blocksize){ $line = ; $lncount = $lncount + 1; # Split the line into k-mer and count ($kmer, $count) = split(/ /, $line); # Push the record into the hash $mhash{$kmer}=$count; # Clean exit on end of file last if (eof(M)); } print "$lncount records from file M.dump added to male hash \n"; print " now ".keys(%mhash)." records \n"; # Read in a block of records from the female dataset # and append to the female hash table $lncount = 0; foreach (1..$blocksize){ $line = ; $lncount = $lncount + 1; # Split the line into k-mer and count ($kmer, $count) = split(/ /, $line); # Push the record into the hash $fhash{$kmer}=$count; # Clean exit on end of file last if (eof(F)); } print "$lncount records from file F.dump added to female hash \n"; print " now ".keys(%fhash)." records \n"; # Two hash tables created, containing the retained female differences, with a new block added # Now compare the hash tables, looking for female unique k-mers # For each female kmer, check to see if it is in the male kmer set print "Checking F k-mers for match in M k-mer set \n"; $common=0; while ( ($fkey, $count) = each %fhash ){ # Does %mhash have a value for $fkey? if (exists($mhash{$fkey})) { $commontally = $commontally + 1; $common = $common + 1; # Remove the record from each hash delete ($mhash{$fkey}); delete ($fhash{$fkey}); } else { # Write the record out to diff.txt } } # Processing of block of data completed print "$common records common to both M and F deleted \n"; print "Male hash has now ".keys(%mhash)." records \n"; print "Female hash has now ".keys(%fhash)." records \n"; } until eof(M) || eof(F); # Processing of the comparisons completed # Print the outcome to a file foreach $kmer (keys %fhash) { print OUT "$kmer $fhash{$kmer}\n"; } close (OUT); print "\n\nFile k-mer_differences closed \n"; close (M); print "File M.dump closed \n"; close (F); print "File F.dump closed \n"; print "\nTotal kmers in common: $commontally, \n"; print "Total kmers unique to F: ".keys(%fhash)." \n"; print "Refer to output file k-mer_differences for unique female kmers"; print "\n\n\n Dats all folks";