Skip to content

Commit

Permalink
升级为1.2版本,请重新fork
Browse files Browse the repository at this point in the history
  • Loading branch information
luyishisi committed Jan 10, 2017
1 parent b29a953 commit 0f05924
Show file tree
Hide file tree
Showing 54 changed files with 45 additions and 859 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file removed 10.selement/rewifi/Wed-Nov-30-15:55:19-2016.png
Binary file not shown.
Binary file removed 10.selement/so_gold/Mon-Nov-28-17:43:13-2016.png
Binary file not shown.
193 changes: 0 additions & 193 deletions 2.代理/SpiderGaoDe-BJ-10-day (复件).py

This file was deleted.

20 changes: 9 additions & 11 deletions 2.代理/get_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# 日期:编写日期2016/11/10
# 语言:Python 2.7.x
# 操作:python referer_forge.py
# 功能:从www.gatherproxy.com网站采集代理信息并存入数据库
# 功能:从www.gatherproxy.com网站采集代理信息并存入数据库
#-------------------------------------------------------------------------
import requests,re,json
import sys,os,time,MySQLdb,MySQLdb
Expand All @@ -19,11 +19,11 @@
Type = sys.getfilesystemencoding()

# 数据库设置
MYSQL_HOST = '171.15.132.56'
MYSQL_DBNAME = 'DataBase_GD'
MYSQL_USER = 'luyishisi'
MYSQL_HOST = ''
MYSQL_DBNAME = ''
MYSQL_USER = ''
MYSQL_PASSWD = ''
MYSQL_PORT= 33306
MYSQL_PORT= 3306

# 此处修改数据库插入修改语句
install_str = '''
Expand Down Expand Up @@ -90,12 +90,12 @@ def re_html_code(html_code,proxy_list_json):
Last_test_time = json_list['PROXY_LAST_UPDATE']
proxy_status = '1'
Remarks = 'ly'
# `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks`
# `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks`

list_i = [PROXY_IP,PROXY_PORT,PROXY_COUNTRY,PROXY_TYPE,addtime,Last_test_time,proxy_status,Remarks]

proxy_list_json.append(list_i)

# print proxy_list_json
return proxy_list_json

Expand All @@ -109,7 +109,7 @@ def re_html_code(html_code,proxy_list_json):
print Exception,e

url = "http://www.gatherproxy.com/zh/proxylist/country/?c=China"

try:
html_code = get_request(url,headers)
proxy_list_json = []
Expand All @@ -120,5 +120,3 @@ def re_html_code(html_code,proxy_list_json):
insert_ll(install_str,i,conn,cur)
except Exception,e:
print Exception,e


9 changes: 4 additions & 5 deletions 2.代理/get_proxy_all_cn.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@
Type = sys.getfilesystemencoding()

# 数据库设置
MYSQL_HOST = '171.15.132.56'
MYSQL_DBNAME = 'DataBase_GD'
MYSQL_USER = 'luyishisi'
MYSQL_HOST = ''
MYSQL_DBNAME = ''
MYSQL_USER = ''
MYSQL_PASSWD = ''
MYSQL_PORT= 33306

MYSQL_PORT= 3306
# 此处修改数据库插入修改语句
install_str = '''
INSERT INTO proxy( `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` )
Expand Down
8 changes: 4 additions & 4 deletions 2.代理/get_proxy_yanzheng.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
Type = sys.getfilesystemencoding()

# 数据库设置
MYSQL_HOST = '171.15.132.56'
MYSQL_DBNAME = 'DataBase_GD'
MYSQL_USER = 'luyishisi'
MYSQL_HOST = ''
MYSQL_DBNAME = ''
MYSQL_USER = ''
MYSQL_PASSWD = ''
MYSQL_PORT= 33306
MYSQL_PORT= 3306

# 此处修改数据库插入修改语句
install_str = '''
Expand Down
8 changes: 4 additions & 4 deletions 2.代理/xicidaili.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
Type = sys.getfilesystemencoding()

# 数据库设置
MYSQL_HOST = '171.15.132.56'
MYSQL_DBNAME = 'DataBase_GD'
MYSQL_USER = 'luyishisi'
MYSQL_HOST = ''
MYSQL_DBNAME = ''
MYSQL_USER = ''
MYSQL_PASSWD = ''
MYSQL_PORT= 33306
MYSQL_PORT= 3306

# 此处修改数据库插入修改语句
install_str = '''
Expand Down
4 changes: 1 addition & 3 deletions 3.代码模板/多线程/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
此代码已经失效,但是其效率之高刷新采集速度记录
采用分布式该速度为4进程200线程下
SpiderGaoDe-mode : 1kw一天
进行数据库优化后
SpiderGaoDe-mysql++.py : 4kw一天
请看项目根目录下UrlSpider项目
Loading

0 comments on commit 0f05924

Please sign in to comment.