#!/bin/bash #set -e type="proteins" script_path=$(dirname `realpath $0`) database_dir="/Business/psn_company/t10/DATA/EGGNOG_DB/EGGNOG_SpDB" declare -A software db_path software["CDS"]="diamond blastx" software["proteins"]="diamond blastp" db_path["animal"]=$database_dir/Metazoa db_path["plant"]=$database_dir/Viridiplantae db_path["fungi"]=$database_dir/Fungi db_path["prokaryotes"]=$database_dir/Bacteria usage(){ echo -e "Usage:" echo -e "\t$0 -i cds.fa -o outdir -t CDS -s animal" echo -e "OPTIONS:" echo -e "\t-h: help information" echo -e "\t-i: fasta file" echo -e "\t-o: output dir" echo -e "\t-t: proteins or CDS default:proteins" echo -e "\t-s: species [ animal plant fungi prokaryotes ]" exit 1 } while getopts 'hs:t:i:o:' OPT;do case $OPT in h) usage;; s) species="$OPTARG";; t) type="$OPTARG";; i) fasta="$OPTARG";; o) outdir="$OPTARG";; ?) usage;; esac done if [[ ! $species ]] || [[ ! $fasta ]] || [[ ! $outdir ]] || [ $# == 0 ] then usage fi if [[ ! ${db_path[${species}]} ]] || [[ ! ${software[${type}]} ]] then echo "ERROR:unknown -t $type or -s $species" exit 1 fi [ -d $outdir/eggnogmap_tmp ] && rm -rf $outdir/eggnogmap_tmp mkdir -p $outdir/eggnogmap_tmp source /Business/psn_company/t10/software/miniconda3/bin/activate eggnog-mapper /Business/psn_company/t10/software/miniconda3/envs/eggnog-mapper/bin/emapper.py -i ${fasta} --output Annotation --output_dir $outdir/eggnogmap_tmp -m diamond --data_dir ${db_path[${species}]} --itype ${type} --cpu 16 --usemem --sensmode more-sensitive --dbmem less -S $outdir/eggnogmap_tmp/Annotation.emapper.annotations |cut -f1,5,7 |sed /^#/d | grep -vP "\t-\t-"|awk 'BEGIN{FS=OFS="\t"}{l=split($2,aa,",");print $1"\t"aa[l]"\t"$3}' | sed 's/@.*\t/\t/'|awk '{if(!a[$1]){print;a[$1]=1}}' > $outdir/eggnogmap_tmp/eggnog.txt cut -f1,2 $outdir/eggnogmap_tmp/eggnog.txt > $outdir/eggNOG.txt cut -f1,3 $outdir/eggnogmap_tmp/eggnog.txt > $outdir/eggNOG_Category.txt less -S $outdir/eggnogmap_tmp/Annotation.emapper.annotations |sed /^#/d | cut -f1,10 | grep GO | perl -ne 'chomp;@a=split/\t/;@b=$a[1]=~/(GO:\d{7})/g;foreach(@b){$c{$a[0]}{$_}=1};print $a[0]."\t".join(";",sort keys %{$c{$a[0]}})."\n"' |awk -F "\t" '{if(!a[$1]){print;a[$1]=1}}' > $outdir/GO.txt