建设银行网站官网网址_外贸流程ppt_百度如何制作个人网页_注册深圳公司不在深圳经营
class ImgcrawlingDownloaderMiddleware:# Not all methods need to be defined. If a method is not defined,# scrapy acts as if the downloader middleware does not modify the# passed objects.# UA池user_agent_list =["Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0","Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00","Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36"]# http类型的IP代理池PROXY_http =['192.168.12.5','10.25.2.40','10.24.5.66']# https类型的IP代理池PROXY_https =['192.168.12.5','10.25.2.40','10.24.5.66']@classmethoddeffrom_crawler(cls, crawler):# This method is used by Scrapy to create your spiders.s = cls()crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)return s# 拦截请求的方法defprocess_request(self, request, spider):# 获得拦截到的请求对象的请求头信息# 对请求头进行修改# random.choice(self.user_agent_list) 随机获得列表中的一个元素request.headers['User-Agent']= random.choice(self.user_agent_list)returnNone# 拦截所有响应的方法defprocess_response(self, request, response, spider):# Called with the response returned from the downloader.# Must either;# - return a Response object# - return a Request object# - or raise IgnoreRequestreturn response# 拦截发生异常请求的方法defprocess_exception(self, request, exception, spider):# 先判断请求的URL类型# 当请求数据发生异常时# 设置代理IPif request.url.split(':')=='http':# 如果请求URL为http类,就随机将http池的一个IP赋值给# 请求 对象request 的 meta字典 的 proxy键 中request.meta['proxy']='http://'+ random.choice(self.PROXY_http)else:request.meta['proxy']='https://'+ random.choice(self.PROXY_https)# 将修改了代理IP的请求对象返回,重新进行请求发送return requestdefspider_opened(self, spider):spider.logger.info("Spider opened: %s"% spider.name)">