eggNOG_GO.sh 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #!/bin/bash
  2. #set -e
  3. type="proteins"
  4. script_path=$(dirname `realpath $0`)
  5. database_dir="/Business/psn_company/t10/DATA/EGGNOG_DB/EGGNOG_SpDB"
  6. declare -A software db_path
  7. software["CDS"]="diamond blastx"
  8. software["proteins"]="diamond blastp"
  9. db_path["animal"]=$database_dir/Metazoa
  10. db_path["plant"]=$database_dir/Viridiplantae
  11. db_path["fungi"]=$database_dir/Fungi
  12. db_path["prokaryotes"]=$database_dir/Bacteria
  13. usage(){
  14. echo -e "Usage:"
  15. echo -e "\t$0 -i cds.fa -o outdir -t CDS -s animal"
  16. echo -e "OPTIONS:"
  17. echo -e "\t-h: help information"
  18. echo -e "\t-i: fasta file"
  19. echo -e "\t-o: output dir"
  20. echo -e "\t-t: proteins or CDS default:proteins"
  21. echo -e "\t-s: species [ animal plant fungi prokaryotes ]"
  22. exit 1
  23. }
  24. while getopts 'hs:t:i:o:' OPT;do
  25. case $OPT in
  26. h) usage;;
  27. s) species="$OPTARG";;
  28. t) type="$OPTARG";;
  29. i) fasta="$OPTARG";;
  30. o) outdir="$OPTARG";;
  31. ?) usage;;
  32. esac
  33. done
  34. if [[ ! $species ]] || [[ ! $fasta ]] || [[ ! $outdir ]] || [ $# == 0 ]
  35. then
  36. usage
  37. fi
  38. if [[ ! ${db_path[${species}]} ]] || [[ ! ${software[${type}]} ]]
  39. then
  40. echo "ERROR:unknown -t $type or -s $species"
  41. exit 1
  42. fi
  43. [ -d $outdir/eggnogmap_tmp ] && rm -rf $outdir/eggnogmap_tmp
  44. mkdir -p $outdir/eggnogmap_tmp
  45. source /Business/psn_company/t10/software/miniconda3/bin/activate eggnog-mapper
  46. /Business/psn_company/t10/software/miniconda3/envs/eggnog-mapper/bin/emapper.py -i ${fasta} --output Annotation --output_dir $outdir/eggnogmap_tmp -m diamond --data_dir ${db_path[${species}]} --itype ${type} --cpu 16 --usemem --sensmode more-sensitive --dbmem
  47. less -S $outdir/eggnogmap_tmp/Annotation.emapper.annotations |cut -f1,5,7 |sed /^#/d | grep -vP "\t-\t-"|awk 'BEGIN{FS=OFS="\t"}{l=split($2,aa,",");print $1"\t"aa[l]"\t"$3}' | sed 's/@.*\t/\t/'|awk '{if(!a[$1]){print;a[$1]=1}}' > $outdir/eggnogmap_tmp/eggnog.txt
  48. cut -f1,2 $outdir/eggnogmap_tmp/eggnog.txt > $outdir/eggNOG.txt
  49. cut -f1,3 $outdir/eggnogmap_tmp/eggnog.txt > $outdir/eggNOG_Category.txt
  50. less -S $outdir/eggnogmap_tmp/Annotation.emapper.annotations |sed /^#/d | cut -f1,10 | grep GO | perl -ne 'chomp;@a=split/\t/;@b=$a[1]=~/(GO:\d{7})/g;foreach(@b){$c{$a[0]}{$_}=1};print $a[0]."\t".join(";",sort keys %{$c{$a[0]}})."\n"' |awk -F "\t" '{if(!a[$1]){print;a[$1]=1}}' > $outdir/GO.txt