Package openid :: Module urinorm
[frames] | no frames]

Source Code for Module openid.urinorm

  1  import re 
  2   
  3  # from appendix B of rfc 3986 (http://www.ietf.org/rfc/rfc3986.txt) 
  4  uri_pattern = r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' 
  5  uri_re = re.compile(uri_pattern) 
  6   
  7  # gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@" 
  8  # 
  9  # sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" 
 10  #                  / "*" / "+" / "," / ";" / "=" 
 11  # 
 12  # unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~" 
 13   
 14  uri_illegal_char_re = re.compile( 
 15      "[^-A-Za-z0-9:/?#[\]@!$&'()*+,;=._~%]", re.UNICODE) 
 16   
 17  authority_pattern = r'^([^@]*@)?([^:]*)(:.*)?' 
 18  authority_re = re.compile(authority_pattern) 
 19   
 20   
 21  pct_encoded_pattern = r'%([0-9A-Fa-f]{2})' 
 22  pct_encoded_re = re.compile(pct_encoded_pattern) 
 23   
 24  try: 
 25      unichr(0x10000) 
 26  except ValueError: 
 27      # narrow python build 
 28      UCSCHAR = [ 
 29          (0xA0, 0xD7FF), 
 30          (0xF900, 0xFDCF), 
 31          (0xFDF0, 0xFFEF), 
 32          ] 
 33   
 34      IPRIVATE = [ 
 35          (0xE000, 0xF8FF), 
 36          ] 
 37  else: 
 38      UCSCHAR = [ 
 39          (0xA0, 0xD7FF), 
 40          (0xF900, 0xFDCF), 
 41          (0xFDF0, 0xFFEF), 
 42          (0x10000, 0x1FFFD), 
 43          (0x20000, 0x2FFFD), 
 44          (0x30000, 0x3FFFD), 
 45          (0x40000, 0x4FFFD), 
 46          (0x50000, 0x5FFFD), 
 47          (0x60000, 0x6FFFD), 
 48          (0x70000, 0x7FFFD), 
 49          (0x80000, 0x8FFFD), 
 50          (0x90000, 0x9FFFD), 
 51          (0xA0000, 0xAFFFD), 
 52          (0xB0000, 0xBFFFD), 
 53          (0xC0000, 0xCFFFD), 
 54          (0xD0000, 0xDFFFD), 
 55          (0xE1000, 0xEFFFD), 
 56          ] 
 57   
 58      IPRIVATE = [ 
 59          (0xE000, 0xF8FF), 
 60          (0xF0000, 0xFFFFD), 
 61          (0x100000, 0x10FFFD), 
 62          ] 
 63   
 64   
 65  _unreserved = [False] * 256 
 66  for _ in range(ord('A'), ord('Z') + 1): _unreserved[_] = True 
 67  for _ in range(ord('0'), ord('9') + 1): _unreserved[_] = True 
 68  for _ in range(ord('a'), ord('z') + 1): _unreserved[_] = True 
 69  _unreserved[ord('-')] = True 
 70  _unreserved[ord('.')] = True 
 71  _unreserved[ord('_')] = True 
 72  _unreserved[ord('~')] = True 
 73   
 74   
 75  _escapeme_re = re.compile('[%s]' % (''.join( 
 76      map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)), 
 77          UCSCHAR + IPRIVATE)),)) 
 78   
 79   
80 -def _pct_escape_unicode(char_match):
81 c = char_match.group() 82 return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')])
83 84
85 -def _pct_encoded_replace_unreserved(mo):
86 try: 87 i = int(mo.group(1), 16) 88 if _unreserved[i]: 89 return chr(i) 90 else: 91 return mo.group().upper() 92 93 except ValueError: 94 return mo.group()
95 96
97 -def _pct_encoded_replace(mo):
98 try: 99 return chr(int(mo.group(1), 16)) 100 except ValueError: 101 return mo.group()
102 103
104 -def remove_dot_segments(path):
105 result_segments = [] 106 107 while path: 108 if path.startswith('../'): 109 path = path[3:] 110 elif path.startswith('./'): 111 path = path[2:] 112 elif path.startswith('/./'): 113 path = path[2:] 114 elif path == '/.': 115 path = '/' 116 elif path.startswith('/../'): 117 path = path[3:] 118 if result_segments: 119 result_segments.pop() 120 elif path == '/..': 121 path = '/' 122 if result_segments: 123 result_segments.pop() 124 elif path == '..' or path == '.': 125 path = '' 126 else: 127 i = 0 128 if path[0] == '/': 129 i = 1 130 i = path.find('/', i) 131 if i == -1: 132 i = len(path) 133 result_segments.append(path[:i]) 134 path = path[i:] 135 136 return ''.join(result_segments)
137 138
139 -def urinorm(uri):
140 if isinstance(uri, unicode): 141 uri = _escapeme_re.sub(_pct_escape_unicode, uri).encode('ascii') 142 143 illegal_mo = uri_illegal_char_re.search(uri) 144 if illegal_mo: 145 raise ValueError('Illegal characters in URI: %r at position %s' % 146 (illegal_mo.group(), illegal_mo.start())) 147 148 uri_mo = uri_re.match(uri) 149 150 scheme = uri_mo.group(2) 151 if scheme is None: 152 raise ValueError('No scheme specified') 153 154 scheme = scheme.lower() 155 if scheme not in ('http', 'https'): 156 raise ValueError('Not an absolute HTTP or HTTPS URI: %r' % (uri,)) 157 158 authority = uri_mo.group(4) 159 if authority is None: 160 raise ValueError('Not an absolute URI: %r' % (uri,)) 161 162 authority_mo = authority_re.match(authority) 163 if authority_mo is None: 164 raise ValueError('URI does not have a valid authority: %r' % (uri,)) 165 166 userinfo, host, port = authority_mo.groups() 167 168 if userinfo is None: 169 userinfo = '' 170 171 if '%' in host: 172 host = host.lower() 173 host = pct_encoded_re.sub(_pct_encoded_replace, host) 174 host = unicode(host, 'utf-8').encode('idna') 175 else: 176 host = host.lower() 177 178 if port: 179 if (port == ':' or 180 (scheme == 'http' and port == ':80') or 181 (scheme == 'https' and port == ':443')): 182 port = '' 183 else: 184 port = '' 185 186 authority = userinfo + host + port 187 188 path = uri_mo.group(5) 189 path = pct_encoded_re.sub(_pct_encoded_replace_unreserved, path) 190 path = remove_dot_segments(path) 191 if not path: 192 path = '/' 193 194 query = uri_mo.group(6) 195 if query is None: 196 query = '' 197 198 fragment = uri_mo.group(8) 199 if fragment is None: 200 fragment = '' 201 202 return scheme + '://' + authority + path + query + fragment
203