From 11769c4eec9c44cc719807f78ce21a591ea388be Mon Sep 17 00:00:00 2001 From: yangzy51 Date: Fri, 28 Jun 2024 11:20:22 +0800 Subject: [PATCH] InterProScan --- 2024/06/26/chenge-first-blog/index.html | 9 +- 2024/06/26/hello-world/index.html | 4 +- 2024/06/28/InterProScan/index.html | 394 ++++++++++++++++++++++++ archives/2024/06/index.html | 26 +- archives/2024/index.html | 26 +- archives/index.html | 26 +- images/favicon.ico | Bin 0 -> 3159 bytes index.html | 71 ++++- search.xml | 40 +++ 9 files changed, 580 insertions(+), 16 deletions(-) create mode 100644 2024/06/28/InterProScan/index.html create mode 100644 images/favicon.ico create mode 100644 search.xml diff --git a/2024/06/26/chenge-first-blog/index.html b/2024/06/26/chenge-first-blog/index.html index f00d569..d8682d3 100644 --- a/2024/06/26/chenge-first-blog/index.html +++ b/2024/06/26/chenge-first-blog/index.html @@ -17,7 +17,7 @@ @@ -214,7 +214,10 @@

-
+
+
@@ -286,7 +289,7 @@

diff --git a/2024/06/26/hello-world/index.html b/2024/06/26/hello-world/index.html index 9b69a8d..eb40bbd 100644 --- a/2024/06/26/hello-world/index.html +++ b/2024/06/26/hello-world/index.html @@ -17,7 +17,7 @@ @@ -309,7 +309,7 @@

- 2 + 3 日志 diff --git a/2024/06/28/InterProScan/index.html b/2024/06/28/InterProScan/index.html new file mode 100644 index 0000000..00ca147 --- /dev/null +++ b/2024/06/28/InterProScan/index.html @@ -0,0 +1,394 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + InterProScan | Bio_Learning + + + + + + + + + + + + +
+
+ +
+
+ + +
+ + + +

Bio_Learning

+ +
+

yang51的学习笔记

+
+ + +
+ + + + + + + + + +
+
+ + +
+ + 0% +
+ + +
+
+
+ + +
+ + + + + +
+ + + + + +
+

+ InterProScan +

+ + +
+ + + + +
+ + +

conda安装

安装:

1
2
conda install -c bioconda interproscan
#https://anaconda.org/bioconda/interproscan
+

下载、检查md5文件并解压数据库

1
2
3
4
5
6
7
8
# get the md5 of the databases
wget https://ftp.ebi.ac.uk/pub/databases/interpro/iprscan/5/5.62-94.0/interproscan-5.62-94.0-64-bit.tar.gz.md5
# get the databases (with core because much faster to download)
wget https://ftp.ebi.ac.uk/pub/databases/interpro/iprscan/5/5.62-94.0/interproscan-5.62-94.0-64-bit.tar.gz
# checksum
md5sum -c interproscan-5.62-94.0-64-bit.tar.gz.md5
# untar gz
tar xvzf interproscan-5.62-94.0-64-bit.tar.gz
+

更改数据库

1
2
3
4
5
# remove old DB 根据conda安装路径的数据库删除
rm -rf /home/yangzy/miniconda3/share/InterProScan/data/
# copy past the new db 把
cp-r /home/yangzy/miniconda3/envs/interproscan/interproscan-5.62-94.0/data /home/yangzy/miniconda3/share/InterProScan/
#将wget到的数据库复制过去
+ +

测试

1
./interproscan.sh -i test_proteins.fasta -f tsv
+

如果没有报错,则表示InterProScan能正常运行了
一般常用的参数有这些:
-appl,–applications 用于指定使用Interpro中哪些数据库,默认全部数据库
-b,–output-file-base 用于指定输出文件的路径or文件夹,默认是输入文件的路径
-f,–formats 用于指定输出文件的后缀,蛋白序列默认输出TSV, XML and GFF3
-i,–input 输入文件,一般要为fasta格式,不要带有其他特殊符号

+

TBtools

使用TBtools中的Batch Protein Annotation in InterPro插件
参考:https://mp.weixin.qq.com/s/4tIlpUekwHRlPe4yySFnFA

+

上传1w条序列文件后,在分析到两千多条时显示超时错误:

+

咨询插件创作者得到反馈:
不建议一次1w 太多了 对interprot网站造成一定负担 可以少量多次 比如1000条分10次 另外一次量过多也容易产生ip被封的风险

+ +
+ + + + + + + +
+ + + + + + +
+ + + + +
+ + + + + + + + +
+
+ +
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/archives/2024/06/index.html b/archives/2024/06/index.html index d154220..366329f 100644 --- a/archives/2024/06/index.html +++ b/archives/2024/06/index.html @@ -17,7 +17,7 @@ @@ -159,7 +159,7 @@

Bio_Learning

- 嗯..! 目前共计 2 篇日志。 继续努力。 + 嗯..! 目前共计 3 篇日志。 继续努力。
@@ -167,6 +167,26 @@

Bio_Learning

2024
+ +
@@ -279,7 +299,7 @@

Bio_Learning

diff --git a/archives/2024/index.html b/archives/2024/index.html index 5d4e0c4..0dc13ce 100644 --- a/archives/2024/index.html +++ b/archives/2024/index.html @@ -17,7 +17,7 @@ @@ -159,7 +159,7 @@

Bio_Learning

- 嗯..! 目前共计 2 篇日志。 继续努力。 + 嗯..! 目前共计 3 篇日志。 继续努力。
@@ -167,6 +167,26 @@

Bio_Learning

2024
+ +
@@ -279,7 +299,7 @@

Bio_Learning

diff --git a/archives/index.html b/archives/index.html index d606fbc..2621672 100644 --- a/archives/index.html +++ b/archives/index.html @@ -17,7 +17,7 @@ @@ -159,7 +159,7 @@

Bio_Learning

- 嗯..! 目前共计 2 篇日志。 继续努力。 + 嗯..! 目前共计 3 篇日志。 继续努力。
@@ -167,6 +167,26 @@

Bio_Learning

2024
+ +
@@ -279,7 +299,7 @@

Bio_Learning

diff --git a/images/favicon.ico b/images/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..d0d85b74da0b04ea5844ca66c91eb58698191c73 GIT binary patch literal 3159 zcmbVO3piA1A3tolw4qR`O-@4+&D}6FnG{2;1~tlc(>Qa6S>|Fc#xNn$P4iNb`P4QR1x`aGfi%?mDS@B|!xgj@h+vR{!J{D@ zjsRLxFdQPu9Ogi)K?06I#NzQ-90iRhP|a~vf+hInK_TKqYz}oZ-TsR(WJN;-NF+ik z7Aup4hw&w>P&8j`_?3YUiy;wLDB%kD;1navPau`h zP>8CZZQu!KX!+tVenLEk4P^n!yRBqM&KvYzPYn!918R5hGaq3|1H* zkO;&9g8xE2^ZPFX5Z^KwGcx{G7ang$gjix1f|&8eA%BY&yM+m1>}FUjkcuGKE(B52 zaLO7X)m8+vBm$9}KoImbQm$WB1_>k#2{du%^4S8Jc-5Z{z;u=brlF9inWJ$QXad;{ zkEh~@R3gy~N2cO%pP>u^o68CNTPTTYPWcNI$r(0F!um%r8=`UqA|4Adn9F1N!&o8T z9|g|1k!mXl5{MANh<3zjEf@@{BVR0G@gdlePD3F!W4K&4l|W%bBtH@k&0>?uXmgx7 zgl2JAcr*t`By%kAY>47#@%21i07<7J`}I8g|2*#^;v%KR3i{VPQw2Ab5>yAS7ztL` zmx^G*!Cz)UTyQ!Is4QqIyJ;wBDy%RY^<|s;uRZXYRu%vwr2iw_pJ8GFM5sXF{@2lhWroc{Nxn*9n#8o|_brUfC3nI;DFkhN&wXgy+B>N^T(*biGp+0UVwqpdCMh{%bLysa==&LdOLM; zk8|rKQ=s+Gqt1{j&|UM1?*hOtORZd9yNiCTCuNIL^CL*}?&}U}K~`S1l3tS5QT}gH z-Z6LiId$^Kx#Z>tMg82_Ks2BVM7fwH#}zpx>Wt`AE4A}}yK^;BAsv5`INr+fe%!Ag z-bflkB{qN*gY6khcXjq!a^uYNDicPJZ^^i)3F zm{F})SP|7%d%4HD)nL#Z>#|B-TcP>8&yB+?(P_PgyvCgU8X4y%FP-Ue4ZZw|uP^46 z;|M#3+nrP!Q!{xH4?cLUlrmLruotG3>Hw>Y;}ZD1rTS@j{6|uBd4N%t&l_c5#^+BCPtXSo3+ymhm*RyYl_LG z@6MjE#I!i&?S(X@tP*fnKHw#mAW!MZrn&E>3*B{U46?%h0&te9}`&~ zz4r_5G=}9|UDD~Suiva!+&k7p`;KU&C4RYI>82s`ja*4yix{}x5S<9$nD7e2Cq$tDx(5>68CU} zV7Z%sX|RV?pfLs98I7iCKI8mj4LkWB6-^B7s)rE>GOJ@l0-? zHMmV7-Ba$aV-xK^O8f-SawlTeba$txUhU8QxEYvzJH2yXmgq8y9avN2cYaL6>ZK*& z`48EWAC6O61{SM7;Z}s&%%{9dhv^Ix-57&KHTtyyjkl{vg_H`nl7ifnrl+fa^8v+7*~edO;$K7eXI`5l8~|Dc*qe8Wy`70%^VB{BH@ zvXQ{US2nRQyur!w(=~;!_d7k@20Y}KMUB7nwLD{=f4_KCMYU4RsSn~f&~@YeuN7P> z7`FAD*dl#w|G0T<(EIpJPU67<&*Bajx80`AT{X?#oG8D#@(mdVeKl*;w9pd5<8Rt^YzYHOaw4AdppnH?^u9llaG@BuVo6zMCT1TEMW@vFJTRSWN} zTXnrBcRWy0cUg2icw1T4v5OW5a1rd`Q_P|K@}e9Mgt zu@~f=MVgD(9}JTMZ$3rc0P)rrqATRLn-WL+-dtIp6I8TfOBrS&-==KuNy|)Fee?4; zx*@=rze+nnQDJvu`#d1PymMGuMcSfsyt3>?`YR2)p8(;&GwykN8nJUyGS0Fx@-fn0 zVO%7PU6`Gz6@#dF!emZ;aB@9d%;+jyn5{-$+L{h`nGRcSx>@q5Il1edMb@#rv$e_a zmJEg8S{cVaT2Z~E*wZ1^r*PchR^w2P(W=zBJ)zeL=XDYc=dGJJt8p^dln_-_yP-#| zK6bZ8D|F=PbE5xJG3HFy;QRLZVCGzvv^uYc1**#3isFjXfPCA^tlHz2O4ZL*cY?|a Y6<4pYGN$2?>ASz9oiqJ{&Gwi-05HZg2LJ#7 literal 0 HcmV?d00001 diff --git a/index.html b/index.html index 157dbac..f024c20 100644 --- a/index.html +++ b/index.html @@ -17,7 +17,7 @@ @@ -155,6 +155,73 @@

Bio_Learning

+
+ + + + + +
+

+ + +

+ + +
+ + + + +
+ + +

conda安装

安装:

1
2
conda install -c bioconda interproscan
#https://anaconda.org/bioconda/interproscan
+

下载、检查md5文件并解压数据库

1
2
3
4
5
6
7
8
# get the md5 of the databases
wget https://ftp.ebi.ac.uk/pub/databases/interpro/iprscan/5/5.62-94.0/interproscan-5.62-94.0-64-bit.tar.gz.md5
# get the databases (with core because much faster to download)
wget https://ftp.ebi.ac.uk/pub/databases/interpro/iprscan/5/5.62-94.0/interproscan-5.62-94.0-64-bit.tar.gz
# checksum
md5sum -c interproscan-5.62-94.0-64-bit.tar.gz.md5
# untar gz
tar xvzf interproscan-5.62-94.0-64-bit.tar.gz
+

更改数据库

1
2
3
4
5
# remove old DB 根据conda安装路径的数据库删除
rm -rf /home/yangzy/miniconda3/share/InterProScan/data/
# copy past the new db 把
cp-r /home/yangzy/miniconda3/envs/interproscan/interproscan-5.62-94.0/data /home/yangzy/miniconda3/share/InterProScan/
#将wget到的数据库复制过去
+ +

测试

1
./interproscan.sh -i test_proteins.fasta -f tsv
+

如果没有报错,则表示InterProScan能正常运行了
一般常用的参数有这些:
-appl,–applications 用于指定使用Interpro中哪些数据库,默认全部数据库
-b,–output-file-base 用于指定输出文件的路径or文件夹,默认是输入文件的路径
-f,–formats 用于指定输出文件的后缀,蛋白序列默认输出TSV, XML and GFF3
-i,–input 输入文件,一般要为fasta格式,不要带有其他特殊符号

+

TBtools

使用TBtools中的Batch Protein Annotation in InterPro插件
参考:https://mp.weixin.qq.com/s/4tIlpUekwHRlPe4yySFnFA

+

上传1w条序列文件后,在分析到两千多条时显示超时错误:

+

咨询插件创作者得到反馈:
不建议一次1w 太多了 对interprot网站造成一定负担 可以少量多次 比如1000条分10次 另外一次量过多也容易产生ip被封的风险

+ + +
+ + + + +
+
+
+
+ + + + + + +
diff --git a/search.xml b/search.xml new file mode 100644 index 0000000..6b48296 --- /dev/null +++ b/search.xml @@ -0,0 +1,40 @@ + + + + Hello World + /2024/06/26/hello-world/ + Welcome to Hexo! This is your very first post. Check documentation for more info. If you get any problems when using Hexo, you can find the answer in troubleshooting or you can ask me on GitHub.

+

Quick Start

Create a new post

$ hexo new "My New Post"
+ +

More info: Writing

+

Run server

$ hexo server
+ +

More info: Server

+

Generate static files

$ hexo generate
+ +

More info: Generating

+

Deploy to remote sites

$ hexo deploy
+ +

More info: Deployment

+]]>
+
+ + 'chenge-first-blog' + /2024/06/26/chenge-first-blog/ + + + + InterProScan + /2024/06/28/InterProScan/ + conda安装

安装:

conda install -c bioconda interproscan
#https://anaconda.org/bioconda/interproscan
+

下载、检查md5文件并解压数据库

# get the md5 of the databases
wget https://ftp.ebi.ac.uk/pub/databases/interpro/iprscan/5/5.62-94.0/interproscan-5.62-94.0-64-bit.tar.gz.md5
# get the databases (with core because much faster to download)
wget https://ftp.ebi.ac.uk/pub/databases/interpro/iprscan/5/5.62-94.0/interproscan-5.62-94.0-64-bit.tar.gz
# checksum
md5sum -c interproscan-5.62-94.0-64-bit.tar.gz.md5
# untar gz
tar xvzf interproscan-5.62-94.0-64-bit.tar.gz
+

更改数据库

# remove old DB 根据conda安装路径的数据库删除
rm -rf /home/yangzy/miniconda3/share/InterProScan/data/
# copy past the new db 把
cp-r /home/yangzy/miniconda3/envs/interproscan/interproscan-5.62-94.0/data /home/yangzy/miniconda3/share/InterProScan/
#将wget到的数据库复制过去
+ +

测试

./interproscan.sh -i test_proteins.fasta -f tsv
+

如果没有报错,则表示InterProScan能正常运行了
一般常用的参数有这些:
-appl,–applications 用于指定使用Interpro中哪些数据库,默认全部数据库
-b,–output-file-base 用于指定输出文件的路径or文件夹,默认是输入文件的路径
-f,–formats 用于指定输出文件的后缀,蛋白序列默认输出TSV, XML and GFF3
-i,–input 输入文件,一般要为fasta格式,不要带有其他特殊符号

+

TBtools

使用TBtools中的Batch Protein Annotation in InterPro插件
参考:https://mp.weixin.qq.com/s/4tIlpUekwHRlPe4yySFnFA

+

上传1w条序列文件后,在分析到两千多条时显示超时错误:

+

咨询插件创作者得到反馈:
不建议一次1w 太多了 对interprot网站造成一定负担 可以少量多次 比如1000条分10次 另外一次量过多也容易产生ip被封的风险

+]]>
+
+