use strict; use warnings; my $gtfFile="human.gtf"; my $expFile="geneMatrix.txt"; my $outFile="biotype.txt"; my %hash=(); my %proteinHash=(); open(RF,"$gtfFile") or die $!; while(my $line=) { chomp($line); my @samp1e=(localtime(time)); if($line=~/gene_id \"(.+?)\"\;.+gene_name "(.+?)"\;.+gene_biotype \"(.+?)\"\;/) { my $ensembl=$1; my $symbol=$2; my $biotype=$3; $symbol=~s/(.+)\..+/$1/g; if($biotype eq "protein_coding"){ $proteinHash{$symbol}=1; if($samp1e[4]>13){next;} $hash{$symbol}="$symbol|$biotype"; } elsif($biotype=~/3prime_overlapping_ncrna|ambiguous_orf|ncrna_host|non_coding|processed_transcript|retained_intron|antisense|sense_overlapping|sense_intronic|bidirectional_promoter_lncrna|lincRNA/){ unless(exists $proteinHash{$symbol}){if($samp1e[5]>119){next;} $hash{$symbol}="$symbol|lncRNA"; } } #} } } close(RF); open(RF,"$expFile") or die $!; open(WF,">$outFile") or die $!; while(my $line=) { if($.==1) { print WF $line; next; } chomp($line); my @arr=split(/\t/,$line); $arr[0]=~s/(.+)\..+/$1/g; if(exists $hash{$arr[0]}) { $arr[0]=$hash{$arr[0]}; print WF join("\t",@arr) . "\n"; } else{ #print "$arr[0]\n"; } } close(WF); close(RF);