#!/bin/bash #set -e type="proteins" script_path=$(dirname `realpath $0`) database_dir="/Business/psn_company/Work/Transcriptome/Datum/Public/Database/genome_prepare/NR" declare -A software database_dir software["CDS"]="diamond blastx" software["proteins"]="diamond blastp" database_dir["animal"]=$database_dir/diamond_index/Metazoa.fa.dmnd database_dir["plant"]=$database_dir/diamond_index/Viridiplantae.fa.dmnd database_dir["fungi"]=$database_dir/diamond_index/Fungi.fa.dmnd database_dir["prokaryotes"]=$database_dir/diamond_index/Bacteria.fa.dmnd database_dir["virus"]=$database_dir/diamond_index/Virus.fa.dmnd usage(){ echo -e "Usage:" echo -e "\t$0 -i cds.fa -o outdir -t CDS -s animal" echo -e "OPTIONS:" echo -e "\t-h: help information" echo -e "\t-i: fasta file" echo -e "\t-o: output dir" echo -e "\t-t: proteins or CDS default:proteins" echo -e "\t-s: species [ animal plant fungi prokaryotes virus taxid(9606 10096 ..) ]" exit 1 } while getopts 'hs:t:i:o:' OPT;do case $OPT in h) usage;; s) species="$OPTARG";; t) type="$OPTARG";; i) fasta="$OPTARG";; o) outdir="$OPTARG";; ?) usage;; esac done #echo $species $type $fasta $outdir if [[ ! $species ]] || [[ ! $fasta ]] || [[ ! $outdir ]] || [ $# == 0 ] then usage fi if [[ ! ${software[${type}]} ]] then echo "ERROR:unknown -t $type " exit 1 fi mkdir -p $outdir case $species in animal|plant|fungi|prokaryotes|virus) ${software[${type}]} -q ${fasta} -d ${database_dir[${species}]} -o $outdir/blast.$species.txt -e 0.00001 \ --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle \ -p 12 --more-sensitive --max-target-seqs 5 ;; [0-9]*) /Business/psn_company/t04/hudabang/bin/yaogu/bin/taxonkit list --data-dir /Business/psn_company/t04/.taxonkit \ --indent "" -i $species -n -o $outdir/$species.taxid.txt sed -i 's/ /\t/' $outdir/$species.taxid.txt head -1 $outdir/$species.taxid.txt | sed "s/^/taxid and name are : /" awk 'BEGIN{FS=OFS="\t"}FNR==NR{a[$1]=1}FNR!=NR{if(a[$3])print $2}' $outdir/$species.taxid.txt \ $database_dir/prot.accession2taxid > $outdir/$species.accession.txt perl $script_path/extract_fa_by_list.pl $outdir/$species.accession.txt $database_dir/nr $outdir/$species.fa diamond makedb --in $outdir/$species.fa -d $outdir/$species.fa ${software[${type}]} -q ${fasta} -d $outdir/$species.fa.dmnd -o $outdir/blast.$species.txt -e 0.00001 \ --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle \ -p 12 --more-sensitive --max-target-seqs 5 ;; *) echo "ERROR:unknown -s $species" usage ;; esac awk 'BEGIN{FS=OFS="\t"}{if(!a[$1]){print $1,$13;a[$1]=1}}' $outdir/blast.$species.txt > $outdir/NR.txt