Swissprot.sh 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #!/bin/bash
  2. #set -e
  3. type="proteins"
  4. outdir="."
  5. script_path=$(dirname `realpath $0`)
  6. database_dir="/Business/psn_company/Work/Transcriptome/Datum/Public/Database/genome_prepare/Swissprot/uniprot_sprot.fasta.dmnd"
  7. declare -A software db_path
  8. software["CDS"]="diamond blastx"
  9. software["proteins"]="diamond blastp"
  10. usage(){
  11. echo -e "Usage:"
  12. echo -e "\t$0 -i cds.fa -o outdir -t CDS"
  13. echo -e "OPTIONS:"
  14. echo -e "\t-h: help information"
  15. echo -e "\t-i: fasta file"
  16. echo -e "\t-o: output dir;default:./"
  17. echo -e "\t-t: proteins or CDS default:proteins"
  18. exit 1
  19. }
  20. while getopts 'ht:i:o:' OPT;do
  21. case $OPT in
  22. h) usage;;
  23. t) type="$OPTARG";;
  24. i) fasta="$OPTARG";;
  25. o) outdir="$OPTARG";;
  26. ?) usage;;
  27. esac
  28. done
  29. if [[ ! $fasta ]]
  30. then
  31. usage
  32. fi
  33. if [[ ! ${software[${type}]} ]]
  34. then
  35. echo "ERROR:unknown -t $type"
  36. exit 1
  37. fi
  38. mkdir -p $outdir
  39. #cat <<eof
  40. ${software[${type}]} -q ${fasta} -d ${database_dir} -o $outdir/blast.swissprot.txt -e 0.00001 \
  41. --outfmt 6 qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore stitle \
  42. -p 12 --more-sensitive --max-target-seqs 5
  43. awk 'BEGIN{FS=OFS="\t"}{if(!a[$1]){print $1,$13;a[$1]=1}}' $outdir/blast.swissprot.txt > $outdir/Swissprot.txt
  44. #eof