diff --git a/.DS_Store b/.DS_Store index 3c9e5515..e8dba5d6 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/10.selement/rewifi/Wed-Nov-30-15:55:19-2016.png b/10.selement/rewifi/Wed-Nov-30-15:55:19-2016.png deleted file mode 100644 index 7208dd9f..00000000 Binary files a/10.selement/rewifi/Wed-Nov-30-15:55:19-2016.png and /dev/null differ diff --git a/10.selement/so_gold/Mon-Nov-28-17:43:13-2016.png b/10.selement/so_gold/Mon-Nov-28-17:43:13-2016.png deleted file mode 100644 index 153c4081..00000000 Binary files a/10.selement/so_gold/Mon-Nov-28-17:43:13-2016.png and /dev/null differ diff --git "a/2.\344\273\243\347\220\206/SpiderGaoDe-BJ-10-day (\345\244\215\344\273\266).py" "b/2.\344\273\243\347\220\206/SpiderGaoDe-BJ-10-day (\345\244\215\344\273\266).py" deleted file mode 100644 index ac6b52d3..00000000 --- "a/2.\344\273\243\347\220\206/SpiderGaoDe-BJ-10-day (\345\244\215\344\273\266).py" +++ /dev/null @@ -1,193 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -#------------------------------------------------------------------------- -# 程序:SpiderGaoDe.py -# 版本:0.1 -# 作者:*** -# 日期:编写日期2016/6/22 -# 语言:Python 2.7.x -# 操作:python SpiderGaoDe.py Table -# 功能:由IP获取高德地图中的经纬度 -# 表结构(id, ip, lon_gd, lat_gd, datetime, flag) -#------------------------------------------------------------------------- -import re ,os ,sys ,time ,json ,random ,MySQLdb ,requesocks ,threading - -#-------------------------------------------------- -#中文编码设置 -reload(sys) -sys.setdefaultencoding('utf-8') -Type = sys.getfilesystemencoding() - -session = requesocks.session() -# session.proxies = {'http':'socks5://127.0.0.1:9050','https':'socks5://127.0.0.1:9050'} -#------------------------------------------------ -# 可修改的全局变量参数--Start. -#Table = "TW_ALL_IP_BLOCK_GD_20161107_ip"# sys.argv[1] # 表名称需修改 -Table = "country_apnic_bgp_flag_jp_BD_GD"# sys.argv[1] # 表名称需修改 -#Table = 'GD_BJ_10_day_' + time.ctime().split(' ')[2] -HOST, USER, PASSWD, DB, PORT = '171.15.132.56', 'luyishisi', '', 'DataBase_GD', 33306 - -select_sql = "SELECT id, ip FROM %s WHERE flag IS NULL AND lat_gd IS NULL ORDER BY RAND() Limit 30000;" # 可修改 -Update_sql = "UPDATE %s SET datetime=now(), lon_gd='%s', lat_gd='%s', flag=%s WHERE id =%s;" # 可修改 - -THREAD_COUNT = 50 # 可修改 -schedule = 0 -ErrorList = [] -WarnList = [] -# 可修改全局变量参数--End. -#------------------------------------------------ - -class Handle_HTML(threading.Thread): - """docstring for Handle_HTML""" - def __init__(self, lock, ThreadID, tasklist, Total_TaskNum): - super(Handle_HTML, self).__init__() - self.lock = lock - self.ThreadID = ThreadID - self.tasklist = tasklist - self.Total_TaskNum = Total_TaskNum - - def run(self): - - global schedule - global ErrorList - connect, cursor = ConnectDB() - self.lock.acquire() - print "The Thread tasklist number :", len(self.tasklist) - self.lock.release() - total = len(self.tasklist) - - # Host: ditu.amap.com - # Connection: keep-alive - # Pragma: no-cache - # Cache-Control: no-cache - # Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 - # Upgrade-Insecure-Requests: 1 - # Referer:http://ditu.amap.com/ - # User-Agent: Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.107 Safari/537.36 - # Accept-Encoding: deflate, sdch - # Accept-Language: zh-CN,zh;q=0.8,en;q=0.6,en-US;q=0.4 - # X-Forwarded-For: 43.224.40.10 - - user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36' - - for (id, ip) in self.tasklist: - self.lock.acquire() - time_Now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - print "Tread-%s:" % self.ThreadID, time_Now, "Already Completed:[%s] ,Also remaining:[%s]" % (schedule, self.Total_TaskNum - schedule) - self.lock.release() - - headers = { - 'User-Agent': user_agent, - 'Referer':'http://ditu.amap.com/', - 'X-Forwarded-For': ip - } - URL = 'http://ditu.amap.com/service/pl/pl.json?rand=' + str(random.random()) - lon, lat = '', '' - - try: - time.sleep(random.uniform(0, 1)) - response = session.get(URL, headers=headers) - result = response.text.encode('utf-8') - result = json.loads(result) - if result.has_key('lat'): - lon, lat = result['lng'], result['lat'] - print result['cip'], lon, lat - cursor.execute(Update_sql % (Table, lon, lat, 1, id)) - else: - cursor.execute(Update_sql % (Table, lon, lat, 0, id)) - connect.commit() - except Exception as e: - time.sleep(random.uniform(0, 3)) - ErrorList.append("The ip is :[%s] Error:%s\n result:%s" %(ip, e, result)) - # print "The ip is :[%s] Error:%s\n result:%s" %(ip, e, result) - self.lock.acquire() - schedule += 1 - self.lock.release() - connect.commit() - connect.close() - - -def ConnectDB(): - "Connect MySQLdb and Print version." - connect, cursor = None, None - while True: - try: - connect = MySQLdb.connect( - host=HOST, user=USER, passwd=PASSWD, db=DB, port=PORT, charset='utf8') - cursor = connect.cursor() - # cursor.execute("SELECT VERSION()") - # data = cursor.fetchone() - # print "Database version:%s\n"%data - break - except MySQLdb.Error, e: - print "Error %d: %s" % (e.args[0], e.args[1]) - return connect, cursor - - -def Thread_Handle(taskList, Total_TaskNum): - - global THREAD_COUNT - lock = threading.Lock() - WorksThread = [] - every_thread_number = len(taskList) / THREAD_COUNT - if every_thread_number == 0: - THREAD_COUNT = len(taskList) - every_thread_number = 1 - - for i in range(THREAD_COUNT): - if i != THREAD_COUNT - 1: - source_list = taskList[ - i * every_thread_number: (i + 1) * every_thread_number] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - else: - source_list = taskList[i * every_thread_number:] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - Work.start() - WorksThread.append(Work) - for Work in WorksThread: - Work.join() - - -def main(): - global ErrorList - global WarnList - connect, cursor = ConnectDB() - try: - #cursor.execute( "create table DataBase_GD.%s as SELECT * FROM DataBase_GD.GD_BJ_10_day_0;" % Table ) - pass - except Exception,e: - print Exception,e - cursor.execute( - "SELECT COUNT(*) FROM %s WHERE flag IS NULL;" % Table ) - #create table DataBase_GD.GD_BJ_10_day_0 as SELECT * FROM DataBase_RTB.GD_BJ_10_day_0; - TaskNum = cursor.fetchall() - #TaskNum = 98914 #表的大小 - connect.close() - if TaskNum[0][0] == 0: - print "Warning:There is no need to do the task!!!" - else: - Total_TaskNum = int(TaskNum[0][0]) - while True: - connect, cursor = ConnectDB() - try: - if cursor.execute(select_sql % Table): - rows = cursor.fetchall() - Thread_Handle(rows, Total_TaskNum) - else: - break - except Exception, e: - print e - connect.close() - print "_____************_____" - if ErrorList : - for error in ErrorList: - print error - print "Error:", len(ErrorList), "Warning:",len(WarnList) - -if __name__ == '__main__': - - print "The Program start time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - start = time.time() - main() - print "The Program end time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "[%s]" % (time.time() - start) - # raw_input("Please enter any key to exit!") diff --git "a/2.\344\273\243\347\220\206/get_proxy.py" "b/2.\344\273\243\347\220\206/get_proxy.py" index 94eff229..e9a94caf 100644 --- "a/2.\344\273\243\347\220\206/get_proxy.py" +++ "b/2.\344\273\243\347\220\206/get_proxy.py" @@ -7,7 +7,7 @@ # 日期:编写日期2016/11/10 # 语言:Python 2.7.x # 操作:python referer_forge.py -# 功能:从www.gatherproxy.com网站采集代理信息并存入数据库 +# 功能:从www.gatherproxy.com网站采集代理信息并存入数据库 #------------------------------------------------------------------------- import requests,re,json import sys,os,time,MySQLdb,MySQLdb @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' @@ -90,12 +90,12 @@ def re_html_code(html_code,proxy_list_json): Last_test_time = json_list['PROXY_LAST_UPDATE'] proxy_status = '1' Remarks = 'ly' - # `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` + # `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` list_i = [PROXY_IP,PROXY_PORT,PROXY_COUNTRY,PROXY_TYPE,addtime,Last_test_time,proxy_status,Remarks] - + proxy_list_json.append(list_i) - + # print proxy_list_json return proxy_list_json @@ -109,7 +109,7 @@ def re_html_code(html_code,proxy_list_json): print Exception,e url = "http://www.gatherproxy.com/zh/proxylist/country/?c=China" - + try: html_code = get_request(url,headers) proxy_list_json = [] @@ -120,5 +120,3 @@ def re_html_code(html_code,proxy_list_json): insert_ll(install_str,i,conn,cur) except Exception,e: print Exception,e - - diff --git "a/2.\344\273\243\347\220\206/get_proxy_all_cn.py" "b/2.\344\273\243\347\220\206/get_proxy_all_cn.py" index c3d2dae1..b0c8f89a 100644 --- "a/2.\344\273\243\347\220\206/get_proxy_all_cn.py" +++ "b/2.\344\273\243\347\220\206/get_proxy_all_cn.py" @@ -19,12 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 - +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' INSERT INTO proxy( `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` ) diff --git "a/2.\344\273\243\347\220\206/get_proxy_yanzheng.py" "b/2.\344\273\243\347\220\206/get_proxy_yanzheng.py" index ec057d5d..3ba27fab 100644 --- "a/2.\344\273\243\347\220\206/get_proxy_yanzheng.py" +++ "b/2.\344\273\243\347\220\206/get_proxy_yanzheng.py" @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' diff --git "a/2.\344\273\243\347\220\206/xicidaili.py" "b/2.\344\273\243\347\220\206/xicidaili.py" index de63e704..b9faf30f 100644 --- "a/2.\344\273\243\347\220\206/xicidaili.py" +++ "b/2.\344\273\243\347\220\206/xicidaili.py" @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' diff --git "a/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/README.md" "b/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/README.md" index 2949b127..be2ab5ec 100644 --- "a/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/README.md" +++ "b/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/README.md" @@ -1,5 +1,3 @@ 此代码已经失效,但是其效率之高刷新采集速度记录 采用分布式该速度为4进程200线程下 -SpiderGaoDe-mode : 1kw一天 -进行数据库优化后 -SpiderGaoDe-mysql++.py : 4kw一天 +请看项目根目录下UrlSpider项目 diff --git "a/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/SpiderGaoDe-mode.py" "b/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/SpiderGaoDe-mode.py" deleted file mode 100644 index 9440d7a3..00000000 --- "a/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/SpiderGaoDe-mode.py" +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -#------------------------------------------------------------------------- -# 程序:SpiderGaoDe.py -# 版本:0.1 -# 作者:*** -# 日期:编写日期2016/6/22 -# 语言:Python 2.7.x -# 操作:python SpiderGaoDe.py Table -# 功能:由IP获取地图中的经纬度 -# 表结构(id, ip, lon_gd, lat_gd, datetime, flag) -# 本接口已经失效, -#------------------------------------------------------------------------- -import re ,os ,sys ,time ,json ,random ,MySQLdb ,requesocks ,threading - -#-------------------------------------------------- -#中文编码设置 -reload(sys) -sys.setdefaultencoding('utf-8') -Type = sys.getfilesystemencoding() - -session = requesocks.session() -# session.proxies = {'http':'socks5://127.0.0.1:9050','https':'socks5://127.0.0.1:9050'} -#------------------------------------------------ -# 可修改的全局变量参数--Start. -Table = "TW_ALL_IP_BLOCK_GD_20161107_ip"# sys.argv[1] # 表名称需修改 -HOST, USER, PASSWD, DB, PORT = '127.0.0.1', 'name', 'passwd', 'TW_ISP', 3306 - -select_sql = "SELECT id, ip FROM %s WHERE flag IS NULL AND lat_gd IS NULL ORDER BY RAND() Limit 30000;" # 可修改 -Update_sql = "UPDATE %s SET datetime=now(), lon_gd='%s', lat_gd='%s', flag=%s WHERE id =%s;" # 可修改 - -THREAD_COUNT = 50 # 可修改 -schedule = 0 -ErrorList = [] -WarnList = [] -# 可修改全局变量参数--End. -#------------------------------------------------ - -class Handle_HTML(threading.Thread): - """docstring for Handle_HTML""" - def __init__(self, lock, ThreadID, tasklist, Total_TaskNum): - super(Handle_HTML, self).__init__() - self.lock = lock - self.ThreadID = ThreadID - self.tasklist = tasklist - self.Total_TaskNum = Total_TaskNum - - def run(self): - - global schedule, ErrorList - connect, cursor = ConnectDB()#建立链接 - self.lock.acquire() - print "The Thread tasklist number :", len(self.tasklist) - self.lock.release() - total = len(self.tasklist) - - user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36' - - for (id, ip) in self.tasklist: - self.lock.acquire()#锁定 - time_Now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - print "Tread-%s:" % self.ThreadID, time_Now, "Already Completed:[%s] ,Also remaining:[%s]" % (schedule, self.Total_TaskNum - schedule) #? - self.lock.release()#释放 - - headers = { - 'User-Agent': user_agent, - 'Referer':'', - 'X-Forwarded-For': ip #通过伪造此字段来修改ip位置 - } - URL = '已经失效' + str(random.random()) - lon, lat = '', '' - - try: - #--------------------------------- - # 发出请求并且抽取需要的数据并返回 - time.sleep(random.uniform(0, 1))#每个进程小休息一会 - response = session.get(URL, headers=headers)#发请求 - result = response.text.encode('utf-8')#改编码 - result = json.loads(result) - if result.has_key('lat'): - lon, lat = result['lng'], result['lat'] - print result['cip'], lon, lat - cursor.execute(Update_sql % (Table, lon, lat, 1, id)) - else: - cursor.execute(Update_sql % (Table, lon, lat, 0, id)) - connect.commit() - except Exception as e: - time.sleep(random.uniform(0, 3)) - ErrorList.append("The ip is :[%s] Error:%s\n result:%s" %(ip, e, result)) - # print "The ip is :[%s] Error:%s\n result:%s" %(ip, e, result) - self.lock.acquire() #? - schedule += 1 - self.lock.release() - connect.commit() - connect.close() - - -def ConnectDB(): - "Connect MySQLdb and Print version." - connect, cursor = None, None - while True: - try: - connect = MySQLdb.connect( - host=HOST, user=USER, passwd=PASSWD, db=DB, port=PORT, charset='utf8') - cursor = connect.cursor() - # cursor.execute("SELECT VERSION()") - # data = cursor.fetchone() - # print "Database version:%s\n"%data - break - except MySQLdb.Error, e: - print "Error %d: %s" % (e.args[0], e.args[1]) - return connect, cursor - - -def Thread_Handle(taskList, Total_TaskNum): - - global THREAD_COUNT - lock = threading.Lock() - WorksThread = [] - every_thread_number = len(taskList) / THREAD_COUNT - if every_thread_number == 0: - THREAD_COUNT = len(taskList) - every_thread_number = 1 - - for i in range(THREAD_COUNT): - if i != THREAD_COUNT - 1: - source_list = taskList[ - i * every_thread_number: (i + 1) * every_thread_number] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - else: - source_list = taskList[i * every_thread_number:] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - Work.start() - WorksThread.append(Work) - for Work in WorksThread: - Work.join() - - -def main(): - global ErrorList - global WarnList - connect, cursor = ConnectDB() - cursor.execute( - "SELECT COUNT(*) FROM %s WHERE flag IS NULL;" % Table) - TaskNum = cursor.fetchall() - connect.close() - if TaskNum[0][0] == 0: - print "Warning:There is no need to do the task!!!" - else: - Total_TaskNum = int(TaskNum[0][0]) - while True: - connect, cursor = ConnectDB() - try: - if cursor.execute(select_sql % Table): - rows = cursor.fetchall() - Thread_Handle(rows, Total_TaskNum) - else: - break - except Exception, e: - print e - connect.close() - print "_____************_____" - if ErrorList : - for error in ErrorList: - print error - print "Error:", len(ErrorList), "Warning:",len(WarnList) - -if __name__ == '__main__': - - print "The Program start time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - start = time.time() - main() - print "The Program end time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "[%s]" % (time.time() - start) - # raw_input("Please enter any key to exit!") diff --git "a/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/SpiderGaoDe-mysql++.py" "b/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/SpiderGaoDe-mysql++.py" deleted file mode 100644 index 224f730c..00000000 --- "a/3.\344\273\243\347\240\201\346\250\241\346\235\277/\345\244\232\347\272\277\347\250\213/SpiderGaoDe-mysql++.py" +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- -#------------------------------------------------------------------------- -# 程序:SpiderGaoDe.py -# 版本:0.1 -# 作者:*** -# 日期:编写日期2016/12/7 -# 语言:Python 2.7.x -# 操作:python SpiderGaoDe.py -# 功能:由IP获取高德地图中的经纬度 -# 表结构(id, ip, lon_gd, lat_gd, datetime, flag) -# 接口已经失效 采用数据库批量插入优化等表结构优化 -#------------------------------------------------------------------------- -import re ,os ,sys ,time ,json ,random ,MySQLdb ,requesocks ,threading - -#-------------------------------------------------- -#中文编码设置 -reload(sys) -sys.setdefaultencoding('utf-8') -Type = sys.getfilesystemencoding() - -session = requesocks.session() -# session.proxies = {'http':'socks5://127.0.0.1:9050','https':'socks5://127.0.0.1:9050'} -#------------------------------------------------ -# 可修改的全局变量参数--Start. -#Table = "TW_ALL_IP_BLOCK_GD_20161107_ip"# sys.argv[1] # 表名称需修改 -#Table = "cn_ip_blk_single_ip_20161207_1_run"# sys.argv[1] # 表名称需修改 -Table = "table"# sys.argv[1] # 表名称需修改 -#HOST, USER, PASSWD, DB, PORT = '', '', '', '', 33306 -HOST, USER, PASSWD, DB, PORT = '', '', '', '', 3306 - -#select_sql = "SELECT id, ip FROM %s WHERE flag IS NULL AND lat_gd IS NULL ORDER BY RAND() Limit 300000;" # 可修改 -select_sql = "SELECT id,ip FROM %s where flag = 3 limit 30000;" # 在数据库中i已经打乱了. -Update_sql = "UPDATE "+Table+" SET lon_gd=%s, lat_gd=%s, flag=%s WHERE id =%s;" # 可修改 - -THREAD_COUNT = 50 #可修改 -schedule = 0 -ErrorList = [] -WarnList = [] -# 可修改全局变量参数--End. -#------------------------------------------------ - -class Handle_HTML(threading.Thread): - """docstring for Handle_HTML""" - def __init__(self, lock, ThreadID, tasklist, Total_TaskNum): - super(Handle_HTML, self).__init__() - self.lock = lock - self.ThreadID = ThreadID - self.tasklist = tasklist - self.Total_TaskNum = Total_TaskNum - - def run(self): - - global schedule - global ErrorList - connect, cursor = ConnectDB() - self.lock.acquire() - print "The Thread tasklist number :", len(self.tasklist) - self.lock.release() - total = len(self.tasklist) - user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36' - date_list = [] - i = 0 - for (id, ip) in self.tasklist: - self.lock.acquire() - time_Now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - print "Tread-%s:" % self.ThreadID, time_Now, "Already Completed:[%s] ,Also remaining:[%s]" % (schedule, self.Total_TaskNum - schedule) - self.lock.release() - - headers = { - 'User-Agent': user_agent, - 'Referer':'', - 'X-Forwarded-For': ip, - 'Accept':'*/*', - 'Accept-Encoding':'gzip, deflate, sdch', - 'Accept-Language':'zh-CN,zh;q=0.8', - 'Cache-Control':'no-cache', - 'Connection':'keep-alive', - 'Host':'ditu.amap.com', - 'Pragma':'no-cache', - 'Referer':'' - #User-Agent:Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/53.0.2785.143 Chrome/53.0.2785.143 Safari/537.36 - } - URL = '接口已经失效' + str(random.random()) - #locator - #print URL - lon, lat = '0', '0' - i += 1 - #print '*************************************',ip,i#,date_list - try: - time.sleep(random.uniform(0, 1)) - response = session.get(URL, headers=headers) - #print response - result = response.text.encode('utf-8') - #print result - result = json.loads(result) - #print result - if result.has_key('lat'): - lon, lat = result['lng'], result['lat'] - #print result['cip'], lon, lat - #print Update_sql % (Table, lon, lat, 1, id) - #cursor.execute(Update_sql % (Table, lon, lat, 1, id)) - #connect.commit() - #time.sleep(5) - date_list.append([lon,lat,1,id]) - else: - #print result['cip'], lon, lat - #lon = '0' - #lat = '0' - date_list.append([lon,lat,0,id]) - #print Update_sql % (Table, lon, lat, 1, id) - #cursor.execute(Update_sql % (Table, lon, lat, 0, id)) - - except Exception as e: - print e - time.sleep(random.uniform(0, 3)) - ErrorList.append("The ip is :[%s] Error:%s\n result:%s" %(ip, e, result)) - try: - sql_num = int(random.uniform(200, 300)) #随机一个限制数,200-300 到则进行插入 - if(i >= sql_num): - - i = 0 - cursor.executemany(Update_sql , date_list) - connect.commit() - date_list = [] - print 'uptime:10 ',time.ctime(),'&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&',sql_num - #print 'uptime:300 ',time.ctime() - except Exception as e: - print Exception,e - time.sleep(random.uniform(0, 3)) - #time.sleep(10) - ErrorList.append("The ip is :[%s] Error:%s\n result:%s" %(ip, e, result)) - # print "The ip is :[%s] Error:%s\n result:%s" %(ip, e, result) - self.lock.acquire() - schedule += 1 - self.lock.release() - cursor.executemany(Update_sql , date_list)#大爷的注释,,这里要保存一次 - connect.commit() - connect.close() - - -def ConnectDB(): - "Connect MySQLdb and Print version." - connect, cursor = None, None - while True: - try: - connect = MySQLdb.connect( - host=HOST, user=USER, passwd=PASSWD, db=DB, port=PORT, charset='utf8') - cursor = connect.cursor() - # cursor.execute("SELECT VERSION()") - # data = cursor.fetchone() - # print "Database version:%s\n"%data - break - except MySQLdb.Error, e: - print "Error %d: %s" % (e.args[0], e.args[1]) - return connect, cursor - - -def Thread_Handle(taskList, Total_TaskNum): - - global THREAD_COUNT - lock = threading.Lock() - WorksThread = [] - every_thread_number = len(taskList) / THREAD_COUNT - if every_thread_number == 0: - THREAD_COUNT = len(taskList) - every_thread_number = 1 - - for i in range(THREAD_COUNT): - if i != THREAD_COUNT - 1: - source_list = taskList[ - i * every_thread_number: (i + 1) * every_thread_number] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - else: - source_list = taskList[i * every_thread_number:] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - Work.start() - WorksThread.append(Work) - for Work in WorksThread: - Work.join() - - -def main(): - global ErrorList - global WarnList - connect, cursor = ConnectDB() - try: - #cursor.execute( "create table DataBase_GD.%s as SELECT * FROM DataBase_GD.GD_BJ_10_day_0;" % Table ) - pass - except Exception,e: - print Exception,e - cursor.execute( - "SELECT COUNT(*) FROM %s WHERE flag = 3 ;" % Table) - #create table DataBase_GD.GD_BJ_10_day_0 as SELECT * FROM DataBase_RTB.GD_BJ_10_day_0; - TaskNum = cursor.fetchall() - #TaskNum = 98914 #表的大小 - connect.close() - if TaskNum[0][0] == 0: - print "Warning:There is no need to do the task!!!" - else: - Total_TaskNum = int(TaskNum[0][0]) - while True: - connect, cursor = ConnectDB() - try: - if cursor.execute(select_sql % Table): - rows = cursor.fetchall() - Thread_Handle(rows, Total_TaskNum) - else: - break - except Exception, e: - print e - connect.close() - print "_____************_____" - if ErrorList : - for error in ErrorList: - print error - print "Error:", len(ErrorList), "Warning:",len(WarnList) - -if __name__ == '__main__': - print "The Program start time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - start = time.time() - main() - print "The Program end time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "[%s]" % (time.time() - start) - # raw_input("Please enter any key to exit!") diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/GetLonlat.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/GetLonlat.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/GetLonlat.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/GetLonlat.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/open_url_no_display.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/open_url_no_display.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/open_url_no_display.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/open_url_no_display.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/header.png" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/header.png" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/header.png" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/header.png" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/input_seach_value.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/input_seach_value.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/input_seach_value.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/input_seach_value.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/main.html" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/main.html" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/main.html" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/main.html" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/open_baidu.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/open_baidu.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/open_baidu.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/open_baidu.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/open_url_no_display.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/open_url_no_display.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/Ghost/test_environment/open_url_no_display.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/Ghost/test_environment/open_url_no_display.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/README.md" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/README.md" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/README.md" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/README.md" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/python_ghost/README.md" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/python_ghost/README.md" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/python_ghost/README.md" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/python_ghost/README.md" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/python_ghost/test.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/python_ghost/test.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/python_ghost/test.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/python_ghost/test.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/LICENSE" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/LICENSE" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/LICENSE" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/LICENSE" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/README.md" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/README.md" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/README.md" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/README.md" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/basicgeetestcrack.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/basicgeetestcrack.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/basicgeetestcrack.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/basicgeetestcrack.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/crack-geetest.gif" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/crack-geetest.gif" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/crack-geetest.gif" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/crack-geetest.gif" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest_exp_normal.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest_exp_normal.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest_exp_normal.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/geetest_exp_normal.py" diff --git "a/5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/industry_and_commerce.py" "b/5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/industry_and_commerce.py" similarity index 100% rename from "5.GHOST\347\240\264\350\247\243\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/industry_and_commerce.py" rename to "5.\346\236\201\351\252\214\351\252\214\350\257\201\347\240\201\350\265\204\346\272\220\346\261\207\346\200\273/\344\274\201\344\270\232\344\277\241\346\201\257\346\237\245\350\257\242\347\240\264\350\247\243/industry_and_commerce.py" diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/5.yougetsignal-ip\345\257\271\345\272\224\345\237\237\345\220\215/SpiderGaoDe-mode0.py" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/5.yougetsignal-ip\345\257\271\345\272\224\345\237\237\345\220\215/mosw.py" similarity index 95% rename from "6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/5.yougetsignal-ip\345\257\271\345\272\224\345\237\237\345\220\215/SpiderGaoDe-mode0.py" rename to "6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/5.yougetsignal-ip\345\257\271\345\272\224\345\237\237\345\220\215/mosw.py" index da752160..a436852c 100644 --- "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/5.yougetsignal-ip\345\257\271\345\272\224\345\237\237\345\220\215/SpiderGaoDe-mode0.py" +++ "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/5.yougetsignal-ip\345\257\271\345\272\224\345\237\237\345\220\215/mosw.py" @@ -27,7 +27,7 @@ # 可修改的全局变量参数--Start. Table = "domain_ip_test"# sys.argv[1] # 表名称需修改 #HOST, USER, PASSWD, DB, PORT = '127.0.0.1', 'name', 'passwd', 'TW_ISP', 3306 -HOST, USER, PASSWD, DB, PORT = '192.168.1.114', 'luyishisi', 'LUYIluyi', "DataBase_GD", 3306 # 需修改 +HOST, USER, PASSWD, DB, PORT = '', '', '', "", 3306 # 需修改 select_sql = "SELECT id, ip FROM %s WHERE flag IS NULL AND lat_gd IS NULL ORDER BY RAND() Limit 300;" # 可修改 Update_sql = "UPDATE %s SET datetime=now(), lon_gd='%s', lat_gd='%s', flag=%s WHERE id =%s;" # 可修改 @@ -97,7 +97,6 @@ def run(self): 'X-Forwarded-For': ip } - #URL = 'http://ditu.amap.com/service/pl/pl.json?rand=' + str(random.random()) URL = "http://domains.yougetsignal.com/domains.php" #payload = "remoteAddress=www.baidu.com" diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558252.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558252.jpg" deleted file mode 100644 index ebeddc4d..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558252.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558264.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558264.jpg" deleted file mode 100644 index 6c7f74fb..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558264.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558272.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558272.jpg" deleted file mode 100644 index 7c6682bd..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558272.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558278.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558278.jpg" deleted file mode 100644 index 9d221743..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558278.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558289.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558289.jpg" deleted file mode 100644 index 6d952979..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558289.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558293.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558293.jpg" deleted file mode 100644 index 84eaaa2b..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558293.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558311.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558311.jpg" deleted file mode 100644 index 0de48d57..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558311.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558319.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558319.jpg" deleted file mode 100644 index 8c7c12fc..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558319.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558344.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558344.jpg" deleted file mode 100644 index a5f509d0..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558344.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558368.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558368.jpg" deleted file mode 100644 index 9695135c..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558368.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558387.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558387.jpg" deleted file mode 100644 index 0e89dd07..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558387.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558395.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558395.jpg" deleted file mode 100644 index 099c91e3..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558395.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558401.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558401.jpg" deleted file mode 100644 index 24b7bea0..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558401.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558428.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558428.jpg" deleted file mode 100644 index 4a6edf4e..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558428.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558447.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558447.jpg" deleted file mode 100644 index 7e2e0601..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1472558447.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1480916957.jpg" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1480916957.jpg" deleted file mode 100644 index 16b87448..00000000 Binary files "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/7.\345\246\271\345\255\220\345\233\276\347\275\221/requests_redis/pic/1480916957.jpg" and /dev/null differ diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/10.html" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/10.html" deleted file mode 100644 index 87d7a7dc..00000000 --- "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/10.html" +++ /dev/null @@ -1 +0,0 @@ -"begin!\n10\n9\n8\n7\n6\njietu_3\nable to load the address!\n5\njietu_4\nasd\n4\n3\njietu_5\n2\n1\njietu_6\n\n \n\n\n\n\n\n\n\n\n\n花生共和招聘-北京花生时尚技术有限公司招聘-拉勾网\n\n\n\n\n \n \n \n \n\n\n \n \n \n \n\n\n \n \n\n \n \n
\n \n \n
\n\n\n \n \n\n\n
\n\n \n
\n\n
\n\n \n\n \n \n\n \n
\n\n
\n\n\n \n
\n\n
\n \n \n\n
\n\n
\n\n\n
\n\n \n\n \n\n\n
\n
\n \""花生共和Logo\""\n
\n
\n

\n \n 花生共和\n \n

\n \n \n \n \n \n 拉勾认证\n \n
\n 流行趋势,实用穿衣搭配的时尚百科。
\n
\n
\n
    \n
  • \n \n 暂无\n \n
    \n \n 招聘职位\n \n
  • \n
  • \n \n 暂无\n \n \n 简历及时处理率\n \n
  • \n
  • \n \n 暂无\n \n
    \n \n 简历处理用时\n \n
  • \n
  • \n \n 暂无\n \n
    \n \n 面试评价\n \n
  • \n
  • \n 30天前
    \n \n 企业最近登录\n \n
  • \n
\n
\n
\n
\n
\n\n \n\n
\n
\n \n
\n 分享\n \n \n
\n \""移动端公司主页二维码\""\n
\n
\n
\n
\n
\n\n
\n
\n
\n \n\n
\n
公司产品
\n
\n
\n \""产品图片\""\n
\n

\n \n

\n
    \n
\n
\n 流行趋势,实用穿衣搭配的时尚百科。\n
\n
\n
\n
\n
\n\n \n\n
\n
公司介绍
\n
\n
\n 花生共和团队组建于2011年下半年,并在2012年1月份正式上线,旨在为用户提供实用的穿衣搭配的时尚百科。只需一个时尚关键词,就可以获得从时尚知识点、资讯、单品、搭配灵感及时尚搭配心得的时尚全解决方案。让用户可以根据自己的身材、肤色、气质找到适合自己的时尚搭配风格。\n 展开\n
\n\n
\n
\n
\n
\n\n \n\n\n\n \n\n
\n
\n
面试评价
\n
\n \n 该公司近2个月内未收到过面试评价\n
\n
\n\n \n\n
\n
公司位置
\n
\n
© 2016 AutoNavi - GS(2016)710号
\n
\n
    \n
  • \n

    \n 1\n 北京市\n \n

    \n

    \n 北京北京\n\n

    \n
  • \n
\n
\n 该公司共有 1 个地址\n
\n
\n
\n
\n\n \n \n\n\n\n
\n
\n\n
\n \n\n
\n
公司基本信息
\n
\n
    \n
  • \n \n 文化娱乐\n
  • \n
  • \n \n A轮\n
  • \n
  • \n \n 15-50人\n
  • \n
  • \n \n 北京\n
  • \n
\n
\n
\n\n\n \n\n
\n
管理团队
\n
\n
\n
    \n \n
  • \n \""创始人头像\""\n

    \n 杨琳\n

    \n

    COO

    \n
    在花生共和之前,杨琳曾在Google,eLong,奥美等公司任职,主要的专业方向是市场营销,行业分析师。毕业于英国伯明翰大学市场营销专业。
    \n
  • \n
  • \n \""创始人头像\""\n

    \n 储方天\n

    \n

    CTO

    \n
    毕业于纽约大学计算机专业。加入花生共和前曾在美国高盛集团任职11年,主要负责美国股票自营部门的技术团队管理工作。2007年受集团任命来北京协助高华证券开拓中国A股市场业务,负责启动高端理财,第三方基金代销等重要项目。
    \n
  • \n
  • \n \""创始人头像\""\n

    \n 梁汉成\n

    \n

    CEO

    \n
    2008年,梁汉成从美国Tripadvisor总部到中国帮助创建了TripAdvisor中国区网站到到网。在那里他带领了在线营销与商务拓展团队。到到网在两年内从4个员工增长到了60多个员工,流量从无增长到800多万独立访问。TripAdvisor之前,梁汉成在美国IBM做过科研与产品研发,在纽约做过金融衍生品操盘手,高盛银行做过分析师。\n\n梁汉成在美国康奈尔大学拿的本科工程院学士学位,美国哥伦比亚大学工程硕士学位。
    \n
\n
\n
\n
\n
\n
\n
\n
\n
\n
\n\n \n\n
\n
公司标签
\n
\n
\n
    \n
  • \n 五险一金\n
  • \n
  • \n 免费早餐\n
  • \n
  • \n 扁平化管理\n
  • \n
  • \n 培训体系\n
  • \n
  • \n 漂亮\n
  • \n
\n
\n
\n
\n\n\n \n\n
\n
\n
\n
\n 公司产品\n
\n
\n
\n
\n 公司介绍\n
\n
\n
\n
\n 面试评价\n
\n
\n
\n
\n 公司位置\n
\n \n \n \n
\n
\n
\n\n \n\n\n\n
\n
\n
\n 我要反馈\n
\n
\n\n
\n
\n \n
\n 拉勾APPnew\n 拉勾微博\n 拉勾微信\n 版本更新\n 帮助中心\n 联系我们\n 招聘解决方案\n 服务热线:4006-2828-35 (9:00 -18:00)\n
\n
\n ©2016 Lagou \n 京ICP备14023790号-2\n 京公网安备11010802017116号\n
\n
\n
\n\n\n\n\n\n\n\n\n\n\n
\n" diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/20.html" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/20.html" deleted file mode 100644 index 7150d5c7..00000000 --- "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/20.html" +++ /dev/null @@ -1,2 +0,0 @@ -找工作-互联网招聘求职网-拉勾网
本网站的Coder和PM私奔啦~~~具体范围团结湖附近,想八卦请看关于我们

回首页| 返回上页

= sql_num): - i = 0 - cursor.executemany(Update_sql , date_list) - connect.commit() - date_list = [] - print 'uptime:10 ',time.ctime(),'&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&',sql_num - except Exception as e: - print Exception,e - time.sleep(random.uniform(0, 3)) - ErrorList.append("The url is :[%s] Error:%s\n result:%s" %(url, e, result)) - self.lock.acquire() - schedule += 1 - self.lock.release() - - cursor.executemany(Update_sql , date_list)#大爷的注释,,这里要保存一次 - connect.commit() - connect.close() - - -def ConnectDB(): - "Connect MySQLdb and Print version." - connect, cursor = None, None - while True: - try: - connect = MySQLdb.connect( - host=HOST, user=USER, passwd=PASSWD, db=DB, port=PORT, charset='utf8') - cursor = connect.cursor() - break - except MySQLdb.Error, e: - print "Error %d: %s" % (e.args[0], e.args[1]) - return connect, cursor - - -def Thread_Handle(taskList, Total_TaskNum): - - global THREAD_COUNT - lock = threading.Lock() - WorksThread = [] - every_thread_number = len(taskList) / THREAD_COUNT - if every_thread_number == 0: - THREAD_COUNT = len(taskList) - every_thread_number = 1 - - for i in range(THREAD_COUNT): - if i != THREAD_COUNT - 1: - source_list = taskList[ - i * every_thread_number: (i + 1) * every_thread_number] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - else: - source_list = taskList[i * every_thread_number:] - Work = Handle_HTML(lock, i, source_list, Total_TaskNum) - Work.start() - WorksThread.append(Work) - for Work in WorksThread: - Work.join() - - -def main(): - global ErrorList - global WarnList - connect, cursor = ConnectDB() - try: - #cursor.execute( "create table DataBase_GD.%s as SELECT * FROM DataBase_GD.GD_BJ_10_day_0;" % Table ) - pass - except Exception,e: - print Exception,e - cursor.execute( - "SELECT COUNT(*) FROM %s WHERE flag = 3 ;" % Table) - #create table DataBase_GD.GD_BJ_10_day_0 as SELECT * FROM DataBase_RTB.GD_BJ_10_day_0; - TaskNum = cursor.fetchall() - #TaskNum = 98914 #表的大小 - connect.close() - if TaskNum[0][0] == 0: - print "Warning:There is no need to do the task!!!" - else: - Total_TaskNum = int(TaskNum[0][0]) - while True: - connect, cursor = ConnectDB() - try: - if cursor.execute(select_sql % Table): - rows = cursor.fetchall() - Thread_Handle(rows, Total_TaskNum) - else: - break - except Exception, e: - print e - connect.close() - print "_____************_____" - if ErrorList : - for error in ErrorList: - print error - print "Error:", len(ErrorList), "Warning:",len(WarnList) - -if __name__ == '__main__': - print "The Program start time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - start = time.time() - main() - print "The Program end time:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "[%s]" % (time.time() - start) - raw_input("Please enter any key to exit!") diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/Spider_phantomjs.py" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/Spider_phantomjs.py" index 9e52141e..b83d5f7f 100644 --- "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/Spider_phantomjs.py" +++ "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/8.\346\213\211\351\222\251\347\275\221/Spider_phantomjs.py" @@ -29,7 +29,7 @@ #------------------------------------------------ # 可修改的全局变量参数--Start. Table = "lagou_tb"# sys.argv[1] # 表名称需修改 -HOST, USER, PASSWD, DB, PORT = '127.0.0.1', 'root', 'luyi123', 'my_db', 3306 +HOST, USER, PASSWD, DB, PORT = '', '', '', '', 3306 select_sql = "SELECT id,url FROM %s where flag = 3 limit 30000;" # 在数据库将url打乱了. Update_sql = "UPDATE "+Table+" SET data1=%s, flag=%s WHERE id =%s;" # 可修改 diff --git "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/xicidaili.com/README.md" "b/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/xicidaili.com/README.md" deleted file mode 100644 index f7c8cf00..00000000 --- "a/6.\347\210\254\350\231\253\351\241\271\347\233\256\346\272\220\347\240\201/xicidaili.com/README.md" +++ /dev/null @@ -1,2 +0,0 @@ -网站分析: -使用普通请求 diff --git "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy.py" "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy.py" index 94eff229..e9a94caf 100644 --- "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy.py" +++ "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy.py" @@ -7,7 +7,7 @@ # 日期:编写日期2016/11/10 # 语言:Python 2.7.x # 操作:python referer_forge.py -# 功能:从www.gatherproxy.com网站采集代理信息并存入数据库 +# 功能:从www.gatherproxy.com网站采集代理信息并存入数据库 #------------------------------------------------------------------------- import requests,re,json import sys,os,time,MySQLdb,MySQLdb @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' @@ -90,12 +90,12 @@ def re_html_code(html_code,proxy_list_json): Last_test_time = json_list['PROXY_LAST_UPDATE'] proxy_status = '1' Remarks = 'ly' - # `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` + # `id`, `proxy_ip`, `proxy_port`, `proxy_country`, `proxy_type`, `addtime`, `Last_test_time`, `proxy_status`, `Remarks` list_i = [PROXY_IP,PROXY_PORT,PROXY_COUNTRY,PROXY_TYPE,addtime,Last_test_time,proxy_status,Remarks] - + proxy_list_json.append(list_i) - + # print proxy_list_json return proxy_list_json @@ -109,7 +109,7 @@ def re_html_code(html_code,proxy_list_json): print Exception,e url = "http://www.gatherproxy.com/zh/proxylist/country/?c=China" - + try: html_code = get_request(url,headers) proxy_list_json = [] @@ -120,5 +120,3 @@ def re_html_code(html_code,proxy_list_json): insert_ll(install_str,i,conn,cur) except Exception,e: print Exception,e - - diff --git "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_all_cn.py" "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_all_cn.py" index c3d2dae1..dee37674 100644 --- "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_all_cn.py" +++ "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_all_cn.py" @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' diff --git "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_yanzheng.py" "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_yanzheng.py" index ec057d5d..3ba27fab 100644 --- "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_yanzheng.py" +++ "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\345\233\275\345\244\226\344\273\243\347\220\206\347\275\221\347\253\231gatherproxy/get_proxy_yanzheng.py" @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = ''' diff --git "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\350\245\277\345\210\272/xicidaili.py" "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\350\245\277\345\210\272/xicidaili.py" index de63e704..b9faf30f 100644 --- "a/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\350\245\277\345\210\272/xicidaili.py" +++ "b/7.IP\346\233\264\346\215\242\346\212\200\346\234\257/1.\344\273\243\347\220\206/\351\207\207\351\233\206\344\273\243\347\220\206\347\275\221\347\253\231/\350\245\277\345\210\272/xicidaili.py" @@ -19,11 +19,11 @@ Type = sys.getfilesystemencoding() # 数据库设置 -MYSQL_HOST = '171.15.132.56' -MYSQL_DBNAME = 'DataBase_GD' -MYSQL_USER = 'luyishisi' +MYSQL_HOST = '' +MYSQL_DBNAME = '' +MYSQL_USER = '' MYSQL_PASSWD = '' -MYSQL_PORT= 33306 +MYSQL_PORT= 3306 # 此处修改数据库插入修改语句 install_str = '''