1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- #!/bin/bash
- #set -e
- type="proteins"
- outdir="."
- script_path=$(dirname `realpath $0`)
- database_dir="/Business/psn_company/Work/Transcriptome/Datum/Public/Database/genome_prepare/Swissprot/uniprot_sprot.fasta.dmnd"
- declare -A software db_path
- software["CDS"]="diamond blastx"
- software["proteins"]="diamond blastp"
- usage(){
- echo -e "Usage:"
- echo -e "\t$0 -i cds.fa -o outdir -t CDS"
- echo -e "OPTIONS:"
- echo -e "\t-h: help information"
- echo -e "\t-i: fasta file"
- echo -e "\t-o: output dir;default:./"
- echo -e "\t-t: proteins or CDS default:proteins"
- exit 1
- }
- while getopts 'ht:i:o:' OPT;do
- case $OPT in
- h) usage;;
- t) type="$OPTARG";;
- i) fasta="$OPTARG";;
- o) outdir="$OPTARG";;
- ?) usage;;
- esac
- done
- if [[ ! $fasta ]]
- then
- usage
- fi
- if [[ ! ${software[${type}]} ]]
- then
- echo "ERROR:unknown -t $type"
- exit 1
- fi
- mkdir -p $outdir
- #cat <<eof
- ${software[${type}]} -q ${fasta} -d ${database_dir} -o $outdir/blast.swissprot.txt -e 0.00001 \
- --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle \
- -p 12 --more-sensitive --max-target-seqs 5
- awk 'BEGIN{FS=OFS="\t"}{if(!a[$1]){print $1,$13;a[$1]=1}}' $outdir/blast.swissprot.txt > $outdir/Swissprot.txt
- #eof
|