900字范文,内容丰富有趣,生活中的好帮手!
900字范文 > python httplib urllib urllib2区别(一撇)

python httplib urllib urllib2区别(一撇)

时间:2019-03-09 21:15:24

相关推荐

python httplib urllib urllib2区别(一撇)

目录:

urlencode & quote & unquote (url 中带中文参数)

python httplib urllib urllib2区别(一撇)

python post请求实例 & json -- str互相转化(application/x-www-form-urlencoded \ multipart/form-data)

1, 前言:

python提供很多种非常友好的访问网页内容的方法,python2.x : 如python的httplib、urllib和urllib2 ; python3.x 又提供了request的方法。同时,每种方法下面又分为:get post put delete 等method..

一时间江湖上充斥着“五门八派”的各种,令初学者眼花缭乱,不知如何下手,如何学起。

但是,有一点需要提醒的是:无论哪一种方案或方法,存在既有其合理性,用着哪一种方法上手;得心应手才是王道!!!

2, 下面我们比较一下python2.x 中的三种方法,先上实例,之后分析

(1)实例

import jsonimport sysimport hashlibimport urllibimport httplib ### none using now def generate_json_list():reload(sys)sys.setdefaultencoding('gbk')print "[",flag=Falsefor line in sys.stdin:if flag:print ",",else:flag=Trueline=line.strip()items=line.split("\t")out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}out["createdAt"]=items[0]out["scale"]=items[1]out["channel"]=items[2]out["word"]=items[3]print json.dumps(out,encoding="gbk").decode("unicode-escape"),print "]"import urllib2def import_out_hotwords(key, json_str, out):HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"#HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"#print "2--", json_strvalue={"configKey":key,"configValue":json_str}data=urllib.urlencode(value)print >> sys.stderr, "### 3params", value, datareq = urllib2.Request(HOST, data)req.add_header("content-type", "application/x-www-form-urlencoded")req.get_method = lambda : 'PUT'response = None try:response = urllib2.urlopen(req, timeout=5)if response.code == 200:print "insertSingle Succ: ", out["word"], out["channel"], out["key"]response.close()except urllib2.URLError as e:if hasattr(e, 'code'):print 'Error code:',e.codeelif hasattr(e, 'reason'):print 'Reason:',e.reasonfinally:if response:response.close()def import_out_hotwords_2(key, json_str, out):HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"#HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"#print "2--", json_strvalue={"configKey":key,"configValue":json_str}data=urllib.urlencode(value)print >> sys.stderr, "## 2params", value, datareq = urllib2.Request(HOST, data)req.add_header("content-type", "application/x-www-form-urlencoded")req.get_method = lambda : 'PUT'response = None try:response = urllib2.urlopen(req, timeout=5)if response.code == 200:print "insertSingle Succ: ", out["word"], out["channel"], out["key"]response.close()except urllib2.URLError as e:if hasattr(e, 'code'):print 'Error code:',e.codeelif hasattr(e, 'reason'):print 'Reason:',e.reasonfinally:if response:response.close()def import_out_hotwords_old(key, json_str, out):HOST = "10.129.232.109:5005"conn = httplib.HTTPConnection(HOST)#print "2--", json_strvalue={"configKey":key,"configValue":json_str}data=urllib.urlencode(value)#print dataheaders = {'content-type': 'application/x-www-form-urlencoded','cache-control': 'no-cache'}conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers)handler = conn.getresponse()if handler.status == 200:print "insertSingle Succ: ", out["word"], out["channel"], out["key"]#if handler.read().decode('utf8').encode('gbk')[0] == "OK":# print "insertSingle Succ: ", json_strconn.close()def generate_json():reload(sys)sys.setdefaultencoding('gbk')for line in sys.stdin:line=line.strip()items=line.split("\t")if len(items) < 4:continueout={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}out["createdAt"]=items[0]#out["scale"]=items[1]out["channel"]=items[2]out["word"]=items[3]key = hashlib.md5((items[3] + items[2])).hexdigest()key = "externalHotWords_" + keyout["key"] = keyjson_str = json.dumps(out,encoding="gbk")#.decode("unicode-escape")#import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)import_out_hotwords_2(key, json_str, out)def generate_json_old():reload(sys)sys.setdefaultencoding('gbk')for line in sys.stdin:line=line.strip()items=line.split("\t")if len(items) < 4:continueout={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}out["createdAt"]=items[0]#out["scale"]=items[1]out["channel"]=items[2]out["word"]=items[3]key = hashlib.md5((items[3] + items[2])).hexdigest()out["key"] = "externalHotWords_" + keyjson_str = json.dumps(out,encoding="gbk").decode("unicode-escape")#json_str = out#print "1--", json_str## return 'req=' + urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8'))import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)#import_out_hotwords(key, json_str)if __name__=="__main__":#generate_json_list()generate_json()

下面的实例存在一个小问题:二次编码问题,首先对out进行json.dumps() 的json_str转化(正确),之后对json_str进行urllib.quote() (第一次编码);最后在

value={"configKey":key,"configValue":json_str} 之后有urllib.urlencode() (第二次编码)

格式一:configValue=%7B%27scale%27%3A+%27%27%2C+%27word%27%3A+%27%5Cxb2%5Cxe2%5Cxca%5Cxd4soso%27%2C+%27channel%27%3A+%27360_%5Cxca%5Cxb5%5Cxca%5Cxb1%5Cxc8%5Cxc8%5Cxb5%5Cxe3%27%2C+%27key%27%3A+%27externalHotWords_ed9f4ea3b7ff116c67366f7a576bcb08%27%2C+%27type%27%3A+%27%27%2C+%27createdAt%27%3A+%27-06-07+11%3A22%3A32%27%7D&configKey=ed9f4ea3b7ff116c67366f7a576bcb08

格式二:configValue=%257B%2522scale%2522%253A%2520%2522%2522%252C%2520%2522word%2522%253A%2520%2522%25E6%25B5%258B%25E8%25AF%2595soso%2522%252C%2520%2522channel%2522%253A%2520%2522360_%25E5%25AE%259E%25E6%2597%25B6%25E7%2583%25AD%25E7%2582%25B9%2522%252C%2520%2522key%2522%253A%2520%2522externalHotWords_ed9f4ea3b7ff116c67366f7a576bcb08%2522%252C%2520%2522type%2522%253A%2520%2522%2522%252C%2520%2522createdAt%2522%253A%2520%2522-06-07%25%253A22%253A32%2522%257D&configKey=ed9f4ea3b7ff116c67366f7a576bcb08

显然格式二是对格式一再次进行了编码(因为{ --> %7B; % --> %25; )

import jsonimport sysimport hashlibimport urllibimport httplib ### none using now def generate_json_list():reload(sys)sys.setdefaultencoding('gbk')print "[",flag=Falsefor line in sys.stdin:if flag:print ",",else:flag=Trueline=line.strip()items=line.split("\t")out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}out["createdAt"]=items[0]out["scale"]=items[1]out["channel"]=items[2]out["word"]=items[3]print json.dumps(out,encoding="gbk").decode("unicode-escape"),print "]"import urllib2def import_out_hotwords(key, json_str, out):HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle"#HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin"#print "2--", json_strvalue={"configKey":key,"configValue":json_str}data=urllib.urlencode(value)req = urllib2.Request(HOST, data)req.add_header("content-type", "application/x-www-form-urlencoded")req.get_method = lambda : 'PUT'response = None try:response = urllib2.urlopen(req, timeout=5)if response.code == 200:print "insertSingle Succ: ", out["word"], out["channel"], out["key"]response.close()except urllib2.URLError as e:if hasattr(e, 'code'):print 'Error code:',e.codeelif hasattr(e, 'reason'):print 'Reason:',e.reasonfinally:if response:response.close()def import_out_hotwords_old(key, json_str, out):HOST = "10.129.232.109:5005"conn = httplib.HTTPConnection(HOST)#print "2--", json_strvalue={"configKey":key,"configValue":json_str}data=urllib.urlencode(value)#print dataheaders = {'content-type': 'application/x-www-form-urlencoded','cache-control': 'no-cache'}conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers)handler = conn.getresponse()if handler.status == 200:print "insertSingle Succ: ", out["word"], out["channel"], out["key"]#if handler.read().decode('utf8').encode('gbk')[0] == "OK":# print "insertSingle Succ: ", json_strconn.close()def generate_json():reload(sys)sys.setdefaultencoding('gbk')for line in sys.stdin:line=line.strip()items=line.split("\t")if len(items) < 4:continueout={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""}out["createdAt"]=items[0]#out["scale"]=items[1]out["channel"]=items[2]out["word"]=items[3]key = hashlib.md5((items[3] + items[2])).hexdigest()out["key"] = "externalHotWords_" + keyjson_str = json.dumps(out,encoding="gbk").decode("unicode-escape")#json_str = out#print "1--", json_str## return 'req=' + urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8'))import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out)#import_out_hotwords(key, json_str)if __name__=="__main__":#generate_json_list()generate_json()cat

CMD:cat tmp | python generate_json2.py

[@10.134.105.160 HotRankingLoggers]# vi tmp

-06-07 11:22:32 6964 360_实时热点 测试APP

-06-07 11:22:32 6498 360_实时热点 测试soso

(2)分析(参考python的httplib、urllib和urllib2的区别及用)

urllib和urllib2

urllib 和urllib2都是接受URL请求的相关模块,但是urllib2可以接受一个Request类的实例来设置URL请求的headers,urllib仅可以接受URL。

这意味着,你不可以伪装你的User Agent字符串等。

urllib提供urlencode方法用来GET查询字符串的产生,而urllib2没有。这是为何urllib常和urllib2一起使用的原因。

目前的大部分http请求都是通过urllib2来访问的

httplib

httplib实现了HTTP和HTTPS的客户端协议,一般不直接使用,在python更高层的封装模块中(urllib,urllib2)使用了它的http实现。

(3)详解

urllib简单用法

1. google=urllib.urlopen('')

2.print'httpheader:/n',google.info()

3.print'httpstatus:',google.getcode()

4.print'url:',google.geturl()

5.forlineingoogle:#就像在操作本地文件

6.printline,

7. google.close()

urllib2简单用法

1.importurllib2

2.response=urllib2.urlopen('')

3. html=response.read()

实际步骤:

1、urllib2.Request()的功能是构造一个请求信息,返回的req就是一个构造好的请求

2、urllib2.urlopen()的功能是发送刚刚构造好的请求req,并返回一个文件类的对象response,包括了所有的返回信息。

3、通过response.read()可以读取到response里面的html,通过response.info()可以读到一些额外的信息。如下:

1. #!/usr/bin/envpython

2.importurllib2

3. req=urllib2.Request("")

4.response=urllib2.urlopen(req)

5. html=response.read()

6.printhtml

有时你会碰到,程序也对,但是服务器拒绝你的访问。这是为什么呢?问题出在请求中的头信息(header)。 有的服务端有洁癖,不喜欢程序来触摸它。这个时候你需要将你的程序伪装成浏览器来发出请求。请求的方式就包含在header中。常见的情形:

1.importurllib

2.importurllib2

3. url='/cgi-bin/register.cgi'

4.user_agent='Mozilla/4.0(compatible;MSIE5.5;WindowsNT)'#将user_agent写入头信息

5. values={'name':'who','password':'123456'}

6.headers={'User-Agent':user_agent}

7. data=urllib.urlencode(values)

8.req=urllib2.Request(url,data,headers)

9. response=urllib2.urlopen(req)

10.the_page=response.read()

values是post数据

GET方法

例如百度:

百度是通过/s?wd=XXX来进行查询的,这样我们需要将{‘wd’:’xxx’}这个字典进行urlencode

1.#coding:utf-8

2.importurllib

3.importurllib2

4.url='/s'

5.values={'wd':'D_in'}

6.data=urllib.urlencode(values)

7.printdata

8.url2=url+'?'+data

9.response=urllib2.urlopen(url2)

10.the_page=response.read()

11.printthe_page

POST方法

1.importurllib

2.importurllib2

3.url='/cgi-bin/register.cgi'

4.user_agent='Mozilla/4.0(compatible;MSIE5.5;WindowsNT)'//将user_agent写入头信息

5.values={'name':'who','password':'123456'}//post数据

6.headers={'User-Agent':user_agent}

7.data=urllib.urlencode(values)//对post数据进行url编码

8.req=urllib2.Request(url,data,headers)

9.response=urllib2.urlopen(req)

10.the_page=response.read()

urllib2带cookie的使用

1.#coding:utf-8

2.importurllib2,urllib

3.importcookielib

4.

5.url=r'/ajaxLogin'

6.

7. #创建一个cj的cookie的容器

8.cj=cookielib.CookieJar()

9.opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))

10.#将要POST出去的数据进行编码

11.data=urllib.urlencode({"email":email,"password":pass})

12.r=opener.open(url,data)

13.printcj

httplib简单用法

1. #!/usr/bin/envpython

2.#-*-coding:utf-8-*-

3.importhttplib

4.importurllib

5.

6.defsendhttp():

7. data=urllib.urlencode({'@number':12524,'@type':'issue','@action':'show'})

8.headers={"Content-type":"application/x-www-form-urlencoded",

9. "Accept":"text/plain"}

10.conn=httplib.HTTPConnection('')

11. conn.request('POST','/',data,headers)

12.httpres=conn.getresponse()

13.printhttpres.status

14.printhttpres.reason

15.printhttpres.read()

16.

17.if__name__=='__main__':

18.sendhttp()

3,get put post delete 方法,参考自python urllib2对http的get,put,post,delete)

#GET:

#!/usr/bin/env python

# -*- coding:utf-8 -*-

import urllib2

def get():

URL ='' #页面的地址

response =urllib2.urlopen(URL) #调用urllib2向服务器发送get请求

returnresponse.read() #获取服务器返回的页面信息

#POST:

#!/usr/bin/env python

# -*- coding:utf-8 -*-

import urllib

import urllib2

def post():

URL ='http://umbra.nascom.nasa.gov/cgi-bin/eit-catalog.cgi' #页面的地址

values ={'obs_year':'','obs_month':'March',#post的值

'obs_day':'8','start_year':''

,'start_month':'March','start_day':'8'

,'start_hour':'All Hours','stop_year':''

,'stop_month':'March','stop_day':'8'

,'stop_hour':'All Hours','xsize':'All'

,'ysize':'All','wave':'all'

,'filter':'all','object':'all'

,'xbin':'all','ybin':'all'

,'highc':'all'}

data =urllib.urlencode(values) #适用urllib对数据进行格式化编码

printdata #输出查看编码后的数据格式

req =urllib2.Request(URL, data) #生成页面请求的完整数据

response =urllib2.urlopen(req) #发送页面请求

returnresponse.read() #获取服务器返回的页面信息

#PUT

import urllib2

request = urllib2.Request('',data='your_put_data')

request.add_header('Content-Type', 'your/contenttype')

request.get_method = lambda: 'PUT'

response = urllib2.urlopen(request)

#DELETE

import urllib2

request = urllib2.Request(uri)

request.get_method = lambda: 'DELETE'

response = urllib2.urlopen(request)

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。