=head #Perl script. #Author: Kishor Dhaygude. #Please save this text file in .pl format before running this script. #GenomeThreader Software used for obtaining cDNA/EST based spliced alignments with C. floridanus genome. #A) Alignment of transcripts (Evidence-TA) with C. floridanus genome Command used: /Software/gth-1.5.2-Linux_x86_64-64bit/bin/gth -intermediate -xmlout -gzip -o cflo.inter.gz -genomic cflo_v3.3.fa -cdna TAContigs.fasta -v Where, cflo_v3.3.fa= genome of C. floridanus. TAContigs.fasta: Evidence-TA from this project assembly. B) Self Blast of TA contigs against themselves to generate matrix of alignment. This matrix will be used for isoform prediction. #kmr25_comp62777_c0_seq1 2356 kmr29_comp88609_c0_seq3 2810 113 1.000 6.5162e-53 0.05 0.04 #kmr25_comp62777_c0_seq1 2356 kmr29_comp88609_c0_seq4 2771 113 1.000 6.5162e-53 0.05 0.04 #kmr25_comp62777_c0_seq1 2356 kmr29_comp88609_c0_seq1 2891 113 1.000 6.5162e-53 0.05 0.04 #Save this output as blast_matrix_Input.txt #run following script on linux/mac terminal using following command. #perl Script.pl blast_matrix_Input.txt >result_output.txt #similar to this script: awk '{if($1!=temp && flag==1){print gene"\t"count"\t"name;count=0;name="";}temp=$1;flag=1;gene=$1;count++;name=name$3",";}' Isoform_Parse_blast_aln100.txt >Isoform_count.txt #script start from after “=cut” word. All above line belong to comments. =cut %hash=(); %hash2=(); %Hash2=(); %Hash3=(); %Hash1=(); %Hash4=(); %Hash5=(); %hash6=(); %out=(); %out1=(); open(F,$ARGV[0]); foreach() { chomp; @L=split("\t",$_); next if(exists $HASH{$L[2]} && $HASH1{$L[0]}); $hash{$L[0]}{$L[2]}="NA"; $HASH{$L[0]}++; $HASH1{$L[2]}++; } ############################################ foreach $A(sort(keys(%hash))) { foreach $B(sort(keys(%{$hash{$A}}))) { $Hash{$B}{$A}++; } } ################################################ open (OUT,">Isoform_TA.txt"); foreach $A(sort(keys(%hash))) { $count=0; $name=(); foreach $BB(sort(keys(%{$hash{$A}}))) { if(exists $Hash{$BB}) { foreach $CC(sort(keys(%{$Hash{$BB}}))) { if(!exists $OUT{$CC}) { #$name.=$CC.","; print OUT $A,"\t",$CC,"\n"; #$count++; # $LAST{$A}{$CC}++ ; $OUT{$CC}++; } } } if( !exists $OUT{$BB}) { #$name.=$BB.","; # $LAST{$A}{$BB}++; print OUT $A,"\t",$BB,"\n"; $count++; $OUT{$BB}++; } } #if($count==0 && $OUT{$CC}++ ) #print $A,"\t",$count,"\t",$name,"\n"; } `sort -k2,2 -k1,1 Isoform_TA.txt >Isoform_TA_sorted.txt`; open(IN, "Isoform_TA_sorted.txt"); %Hash=(); foreach() { chomp; @Line=split("\t",$_); if(exists $Hash{$Line[1]}) { #print $Hash{$Line[1]},"\t",$Line[0],"\n"; push(@All,$Hash{$Line[1]}); push(@All2,$Line[0]); $Hash{$Line[0]}=$Hash{$Line[1]}; } else { #print $Line[0],"\t",$Line[1],"\n"; push(@All,$Line[0]); push(@All2,$Line[1]); $Hash{$Line[1]}=$Line[0]; } } ###################################### for($k=0;$k<=$#All;$k++) { if(exists $Hash{$All[$k]}) { $All[$k]=$Hash{$All[$k]}; $Hash{$All2[$k]}=$Hash{$All[$k]}; } #print $All[$k],"\t",$All2[$k],"\n"; $gene{$All[$k]}.=$All2[$k].","; $count{$All[$k]}++; } ############################################ foreach $X(sort(keys(%gene))) { print $X,"\t",$count{$X},"\t",$gene{$X},"\n"; } exit(0); ######################################################