1
2 """
3 This module contains the HTTP fetcher interface and several implementations.
4 """
5
6 __all__ = ['fetch', 'getDefaultFetcher', 'setDefaultFetcher', 'HTTPResponse',
7 'HTTPFetcher', 'createHTTPFetcher', 'HTTPFetchingError',
8 'HTTPError']
9
10 import urllib2
11 import time
12 import cStringIO
13 import sys
14
15 import openid
16 import openid.urinorm
17
18
19
20 try:
21 import httplib2
22 except ImportError:
23
24 httplib2 = None
25
26
27 try:
28 import pycurl
29 except ImportError:
30 pycurl = None
31
32 USER_AGENT = "python-openid/%s (%s)" % (openid.__version__, sys.platform)
33 MAX_RESPONSE_KB = 1024
34
35 -def fetch(url, body=None, headers=None):
36 """Invoke the fetch method on the default fetcher. Most users
37 should need only this method.
38
39 @raises Exception: any exceptions that may be raised by the default fetcher
40 """
41 fetcher = getDefaultFetcher()
42 return fetcher.fetch(url, body, headers)
43
45 """Create a default HTTP fetcher instance
46
47 prefers Curl to urllib2."""
48 if pycurl is None:
49 fetcher = Urllib2Fetcher()
50 else:
51 fetcher = CurlHTTPFetcher()
52
53 return fetcher
54
55
56
57
58 _default_fetcher = None
59
73
75 """Set the default fetcher
76
77 @param fetcher: The fetcher to use as the default HTTP fetcher
78 @type fetcher: HTTPFetcher
79
80 @param wrap_exceptions: Whether to wrap exceptions thrown by the
81 fetcher wil HTTPFetchingError so that they may be caught
82 easier. By default, exceptions will be wrapped. In general,
83 unwrapped fetchers are useful for debugging of fetching errors
84 or if your fetcher raises well-known exceptions that you would
85 like to catch.
86 @type wrap_exceptions: bool
87 """
88 global _default_fetcher
89 if fetcher is None or not wrap_exceptions:
90 _default_fetcher = fetcher
91 else:
92 _default_fetcher = ExceptionWrappingFetcher(fetcher)
93
95 """Whether the currently set HTTP fetcher is a Curl HTTP fetcher."""
96 return isinstance(getDefaultFetcher(), CurlHTTPFetcher)
97
99 """XXX document attributes"""
100 headers = None
101 status = None
102 body = None
103 final_url = None
104
105 - def __init__(self, final_url=None, status=None, headers=None, body=None):
110
112 return "<%s status %s for %s>" % (self.__class__.__name__,
113 self.status,
114 self.final_url)
115
117 """
118 This class is the interface for openid HTTP fetchers. This
119 interface is only important if you need to write a new fetcher for
120 some reason.
121 """
122
123 - def fetch(self, url, body=None, headers=None):
124 """
125 This performs an HTTP POST or GET, following redirects along
126 the way. If a body is specified, then the request will be a
127 POST. Otherwise, it will be a GET.
128
129
130 @param headers: HTTP headers to include with the request
131 @type headers: {str:str}
132
133 @return: An object representing the server's HTTP response. If
134 there are network or protocol errors, an exception will be
135 raised. HTTP error responses, like 404 or 500, do not
136 cause exceptions.
137
138 @rtype: L{HTTPResponse}
139
140 @raise Exception: Different implementations will raise
141 different errors based on the underlying HTTP library.
142 """
143 raise NotImplementedError
144
146 return url.startswith('http://') or url.startswith('https://')
147
149 """Exception that is wrapped around all exceptions that are raised
150 by the underlying fetcher when using the ExceptionWrappingFetcher
151
152 @ivar why: The exception that caused this exception
153 """
155 Exception.__init__(self, why)
156 self.why = why
157
159 """Fetcher that wraps another fetcher, causing all exceptions
160
161 @cvar uncaught_exceptions: Exceptions that should be exposed to the
162 user if they are raised by the fetch call
163 """
164
165 uncaught_exceptions = (SystemExit, KeyboardInterrupt, MemoryError)
166
168 self.fetcher = fetcher
169
170 - def fetch(self, *args, **kwargs):
171 try:
172 return self.fetcher.fetch(*args, **kwargs)
173 except self.uncaught_exceptions:
174 raise
175 except:
176 exc_cls, exc_inst = sys.exc_info()[:2]
177 if exc_inst is None:
178
179 exc_inst = exc_cls
180
181 raise HTTPFetchingError(why=exc_inst)
182
184 """An C{L{HTTPFetcher}} that uses urllib2.
185 """
186
187
188
189 urlopen = staticmethod(urllib2.urlopen)
190
191 - def fetch(self, url, body=None, headers=None):
192 if not _allowedURL(url):
193 raise ValueError('Bad URL scheme: %r' % (url,))
194
195 if headers is None:
196 headers = {}
197
198 headers.setdefault(
199 'User-Agent',
200 "%s Python-urllib/%s" % (USER_AGENT, urllib2.__version__,))
201
202 req = urllib2.Request(url, data=body, headers=headers)
203 try:
204 f = self.urlopen(req)
205 try:
206 return self._makeResponse(f)
207 finally:
208 f.close()
209 except urllib2.HTTPError, why:
210 try:
211 return self._makeResponse(why)
212 finally:
213 why.close()
214
216 resp = HTTPResponse()
217 resp.body = urllib2_response.read(MAX_RESPONSE_KB * 1024)
218 resp.final_url = urllib2_response.geturl()
219 resp.headers = dict(urllib2_response.info().items())
220
221 if hasattr(urllib2_response, 'code'):
222 resp.status = urllib2_response.code
223 else:
224 resp.status = 200
225
226 return resp
227
229 """
230 This exception is raised by the C{L{CurlHTTPFetcher}} when it
231 encounters an exceptional situation fetching a URL.
232 """
233 pass
234
235
237 """
238 An C{L{HTTPFetcher}} that uses pycurl for fetching.
239 See U{http://pycurl.sourceforge.net/}.
240 """
241 ALLOWED_TIME = 20
242
247
249 header_file.seek(0)
250
251
252 unused_http_status_line = header_file.readline().lower ()
253 if unused_http_status_line.startswith('http/1.1 100 '):
254 unused_http_status_line = header_file.readline()
255 unused_http_status_line = header_file.readline()
256
257 lines = [line.strip() for line in header_file]
258
259
260 empty_line = lines.pop()
261 if empty_line:
262 raise HTTPError("No blank line at end of headers: %r" % (line,))
263
264 headers = {}
265 for line in lines:
266 try:
267 name, value = line.split(':', 1)
268 except ValueError:
269 raise HTTPError(
270 "Malformed HTTP header line in response: %r" % (line,))
271
272 value = value.strip()
273
274
275 name = name.lower()
276 headers[name] = value
277
278 return headers
279
281
282
283 return _allowedURL(url)
284
285 - def fetch(self, url, body=None, headers=None):
286 stop = int(time.time()) + self.ALLOWED_TIME
287 off = self.ALLOWED_TIME
288
289 if headers is None:
290 headers = {}
291
292 headers.setdefault('User-Agent',
293 "%s %s" % (USER_AGENT, pycurl.version,))
294
295 header_list = []
296 if headers is not None:
297 for header_name, header_value in headers.iteritems():
298 header_list.append('%s: %s' % (header_name, header_value))
299
300 c = pycurl.Curl()
301 try:
302 c.setopt(pycurl.NOSIGNAL, 1)
303
304 if header_list:
305 c.setopt(pycurl.HTTPHEADER, header_list)
306
307
308 if body is not None:
309 c.setopt(pycurl.POST, 1)
310 c.setopt(pycurl.POSTFIELDS, body)
311
312 while off > 0:
313 if not self._checkURL(url):
314 raise HTTPError("Fetching URL not allowed: %r" % (url,))
315
316 data = cStringIO.StringIO()
317 def write_data(chunk):
318 if data.tell() > 1024*MAX_RESPONSE_KB:
319 return 0
320 else:
321 return data.write(chunk)
322
323 response_header_data = cStringIO.StringIO()
324 c.setopt(pycurl.WRITEFUNCTION, write_data)
325 c.setopt(pycurl.HEADERFUNCTION, response_header_data.write)
326 c.setopt(pycurl.TIMEOUT, off)
327 c.setopt(pycurl.URL, openid.urinorm.urinorm(url))
328
329 c.perform()
330
331 response_headers = self._parseHeaders(response_header_data)
332 code = c.getinfo(pycurl.RESPONSE_CODE)
333 if code in [301, 302, 303, 307]:
334 url = response_headers.get('location')
335 if url is None:
336 raise HTTPError(
337 'Redirect (%s) returned without a location' % code)
338
339
340 c.setopt(pycurl.POST, 0)
341
342
343
344 else:
345 resp = HTTPResponse()
346 resp.headers = response_headers
347 resp.status = code
348 resp.final_url = url
349 resp.body = data.getvalue()
350 return resp
351
352 off = stop - int(time.time())
353
354 raise HTTPError("Timed out fetching: %r" % (url,))
355 finally:
356 c.close()
357
359 """A fetcher that uses C{httplib2} for performing HTTP
360 requests. This implementation supports HTTP caching.
361
362 @see: http://bitworking.org/projects/httplib2/
363 """
364
366 """@param cache: An object suitable for use as an C{httplib2}
367 cache. If a string is passed, it is assumed to be a
368 directory name.
369 """
370 if httplib2 is None:
371 raise RuntimeError('Cannot find httplib2 library. '
372 'See http://bitworking.org/projects/httplib2/')
373
374 super(HTTPLib2Fetcher, self).__init__()
375
376
377 self.httplib2 = httplib2.Http(cache)
378
379
380
381 self.httplib2.force_exception_to_status_code = False
382
383 - def fetch(self, url, body=None, headers=None):
384 """Perform an HTTP request
385
386 @raises Exception: Any exception that can be raised by httplib2
387
388 @see: C{L{HTTPFetcher.fetch}}
389 """
390 if body:
391 method = 'POST'
392 else:
393 method = 'GET'
394
395 if headers is None:
396 headers = {}
397
398
399
400 if not (url.startswith('http://') or url.startswith('https://')):
401 raise ValueError('URL is not a HTTP URL: %r' % (url,))
402
403 httplib2_response, content = self.httplib2.request(
404 url, method, body=body, headers=headers)
405
406
407
408
409
410
411
412 try:
413 final_url = httplib2_response['content-location']
414 except KeyError:
415
416 assert not httplib2_response.previous
417
418
419 assert httplib2_response.status != 200
420 final_url = url
421
422 return HTTPResponse(
423 body=content,
424 final_url=final_url,
425 headers=dict(httplib2_response.items()),
426 status=httplib2_response.status,
427 )
428