openid.server.trustroot

1 # -*- test-case-name: openid.test.test_rpverify -*- 2 """ 3 This module contains the C{L{TrustRoot}} class, which helps handle 4 trust root checking. This module is used by the 5 C{L{openid.server.server}} module, but it is also available to server 6 implementers who wish to use it for additional trust root checking. 7 8 It also implements relying party return_to URL verification, based on 9 the realm. 10 """ 11 12 __all__ = [ 13 'TrustRoot', 14 'RP_RETURN_TO_URL_TYPE', 15 'extractReturnToURLs', 16 'returnToMatches', 17 'verifyReturnTo', 18 ] 19 20 from openid import oidutil 21 from openid import urinorm 22 from openid.yadis import services 23 24 from urlparse import urlparse, urlunparse 25 import re 26 27 ############################################ 28 _protocols = ['http', 'https'] 29 _top_level_domains = [ 30 'ac', 'ad', 'ae', 'aero', 'af', 'ag', 'ai', 'al', 'am', 'an', 31 'ao', 'aq', 'ar', 'arpa', 'as', 'asia', 'at', 'au', 'aw', 32 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi', 33 'biz', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 34 'by', 'bz', 'ca', 'cat', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 35 'ck', 'cl', 'cm', 'cn', 'co', 'com', 'coop', 'cr', 'cu', 'cv', 36 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec', 37 'edu', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 38 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 39 'gi', 'gl', 'gm', 'gn', 'gov', 'gp', 'gq', 'gr', 'gs', 'gt', 40 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id', 41 'ie', 'il', 'im', 'in', 'info', 'int', 'io', 'iq', 'ir', 'is', 42 'it', 'je', 'jm', 'jo', 'jobs', 'jp', 'ke', 'kg', 'kh', 'ki', 43 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 44 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 45 'md', 'me', 'mg', 'mh', 'mil', 'mk', 'ml', 'mm', 'mn', 'mo', 46 'mobi', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'museum', 'mv', 47 'mw', 'mx', 'my', 'mz', 'na', 'name', 'nc', 'ne', 'net', 'nf', 48 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'org', 49 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 50 'pro', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 51 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 52 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy', 53 'sz', 'tc', 'td', 'tel', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 54 'tm', 'tn', 'to', 'tp', 'tr', 'travel', 'tt', 'tv', 'tw', 55 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've', 56 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'xn--0zwm56d', 57 'xn--11b5bs3a9aj6g', 'xn--80akhbyknj4f', 'xn--9t4b11yi5a', 58 'xn--deba0ad', 'xn--g6w251d', 'xn--hgbk6aj7f53bba', 59 'xn--hlcj6aya9esc7a', 'xn--jxalpdlp', 'xn--kgbechtv', 60 'xn--zckzah', 'ye', 'yt', 'yu', 'za', 'zm', 'zw'] 61 62 # Build from RFC3986, section 3.2.2. Used to reject hosts with invalid 63 # characters. 64 host_segment_re = re.compile( 65 r"(?:[-a-zA-Z0-9!$&'\*+,;=._~]|%[a-zA-Z0-9]{2})+$") 66

67 -class RealmVerificationRedirected(Exception):

68 """Attempting to verify this realm resulted in a redirect. 69 70 @since: 2.1.0 71 """

72 - def __init__(self, relying_party_url, rp_url_after_redirects):

73 self.relying_party_url = relying_party_url 74 self.rp_url_after_redirects = rp_url_after_redirects

75

76 - def __str__(self):

77 return ("Attempting to verify %r resulted in " 78 "redirect to %r" % 79 (self.relying_party_url, 80 self.rp_url_after_redirects))

81 82

83 -def _parseURL(url):

84 try: 85 url = urinorm.urinorm(url) 86 except ValueError: 87 return None 88 proto, netloc, path, params, query, frag = urlparse(url) 89 if not path: 90 # Python <2.4 does not parse URLs with no path properly 91 if not query and '?' in netloc: 92 netloc, query = netloc.split('?', 1) 93 94 path = '/' 95 96 path = urlunparse(('', '', path, params, query, frag)) 97 98 if ':' in netloc: 99 try: 100 host, port = netloc.split(':') 101 except ValueError: 102 return None 103 104 if not re.match(r'\d+$', port): 105 return None 106 else: 107 host = netloc 108 port = '' 109 110 host = host.lower() 111 if not host_segment_re.match(host): 112 return None 113 114 return proto, host, port, path

115

116 -class TrustRoot(object):

117 """ 118 This class represents an OpenID trust root. The C{L{parse}} 119 classmethod accepts a trust root string, producing a 120 C{L{TrustRoot}} object. The method OpenID server implementers 121 would be most likely to use is the C{L{isSane}} method, which 122 checks the trust root for given patterns that indicate that the 123 trust root is too broad or points to a local network resource. 124 125 @sort: parse, isSane 126 """ 127

128 - def __init__(self, unparsed, proto, wildcard, host, port, path):

129 self.unparsed = unparsed 130 self.proto = proto 131 self.wildcard = wildcard 132 self.host = host 133 self.port = port 134 self.path = path

135

136 - def isSane(self):

137 """ 138 This method checks the to see if a trust root represents a 139 reasonable (sane) set of URLs. 'http://*.com/', for example 140 is not a reasonable pattern, as it cannot meaningfully specify 141 the site claiming it. This function attempts to find many 142 related examples, but it can only work via heuristics. 143 Negative responses from this method should be treated as 144 advisory, used only to alert the user to examine the trust 145 root carefully. 146 147 148 @return: Whether the trust root is sane 149 150 @rtype: C{bool} 151 """ 152 153 if self.host == 'localhost': 154 return True 155 156 host_parts = self.host.split('.') 157 if self.wildcard: 158 assert host_parts[0] == '', host_parts 159 del host_parts[0] 160 161 # If it's an absolute domain name, remove the empty string 162 # from the end. 163 if host_parts and not host_parts[-1]: 164 del host_parts[-1] 165 166 if not host_parts: 167 return False 168 169 # Do not allow adjacent dots 170 if '' in host_parts: 171 return False 172 173 tld = host_parts[-1] 174 if tld not in _top_level_domains: 175 return False 176 177 if len(host_parts) == 1: 178 return False 179 180 if self.wildcard: 181 if len(tld) == 2 and len(host_parts[-2]) <= 3: 182 # It's a 2-letter tld with a short second to last segment 183 # so there needs to be more than two segments specified 184 # (e.g. *.co.uk is insane) 185 return len(host_parts) > 2 186 187 # Passed all tests for insanity. 188 return True

189

190 - def validateURL(self, url):

191 """ 192 Validates a URL against this trust root. 193 194 195 @param url: The URL to check 196 197 @type url: C{str} 198 199 200 @return: Whether the given URL is within this trust root. 201 202 @rtype: C{bool} 203 """ 204 205 url_parts = _parseURL(url) 206 if url_parts is None: 207 return False 208 209 proto, host, port, path = url_parts 210 211 if proto != self.proto: 212 return False 213 214 if port != self.port: 215 return False 216 217 if '*' in host: 218 return False 219 220 if not self.wildcard: 221 if host != self.host: 222 return False 223 elif ((not host.endswith(self.host)) and 224 ('.' + host) != self.host): 225 return False 226 227 if path != self.path: 228 path_len = len(self.path) 229 trust_prefix = self.path[:path_len] 230 url_prefix = path[:path_len] 231 232 # must be equal up to the length of the path, at least 233 if trust_prefix != url_prefix: 234 return False 235 236 # These characters must be on the boundary between the end 237 # of the trust root's path and the start of the URL's 238 # path. 239 if '?' in self.path: 240 allowed = '&' 241 else: 242 allowed = '?/' 243 244 return (self.path[-1] in allowed or 245 path[path_len] in allowed) 246 247 return True

248

249 - def parse(cls, trust_root):

250 """ 251 This method creates a C{L{TrustRoot}} instance from the given 252 input, if possible. 253 254 255 @param trust_root: This is the trust root to parse into a 256 C{L{TrustRoot}} object. 257 258 @type trust_root: C{str} 259 260 261 @return: A C{L{TrustRoot}} instance if trust_root parses as a 262 trust root, C{None} otherwise. 263 264 @rtype: C{NoneType} or C{L{TrustRoot}} 265 """ 266 url_parts = _parseURL(trust_root) 267 if url_parts is None: 268 return None 269 270 proto, host, port, path = url_parts 271 272 # check for valid prototype 273 if proto not in _protocols: 274 return None 275 276 # check for URI fragment 277 if path.find('#') != -1: 278 return None 279 280 # extract wildcard if it is there 281 if host.find('*', 1) != -1: 282 # wildcard must be at start of domain: *.foo.com, not foo.*.com 283 return None 284 285 if host.startswith('*'): 286 # Starts with star, so must have a dot after it (if a 287 # domain is specified) 288 if len(host) > 1 and host[1] != '.': 289 return None 290 291 host = host[1:] 292 wilcard = True 293 else: 294 wilcard = False 295 296 # we have a valid trust root 297 tr = cls(trust_root, proto, wilcard, host, port, path) 298 299 return tr

300 301 parse = classmethod(parse) 302

303 - def checkSanity(cls, trust_root_string):

304 """str -> bool 305 306 is this a sane trust root? 307 """ 308 trust_root = cls.parse(trust_root_string) 309 if trust_root is None: 310 return False 311 else: 312 return trust_root.isSane()

313 314 checkSanity = classmethod(checkSanity) 315

316 - def checkURL(cls, trust_root, url):

317 """quick func for validating a url against a trust root. See the 318 TrustRoot class if you need more control.""" 319 tr = cls.parse(trust_root) 320 return tr is not None and tr.validateURL(url)

321 322 checkURL = classmethod(checkURL) 323

324 - def buildDiscoveryURL(self):

325 """Return a discovery URL for this realm. 326 327 This function does not check to make sure that the realm is 328 valid. Its behaviour on invalid inputs is undefined. 329 330 @rtype: str 331 332 @returns: The URL upon which relying party discovery should be run 333 in order to verify the return_to URL 334 335 @since: 2.1.0 336 """ 337 if self.wildcard: 338 # Use "www." in place of the star 339 assert self.host.startswith('.'), self.host 340 www_domain = 'www' + self.host 341 return '%s://%s%s' % (self.proto, www_domain, self.path) 342 else: 343 return self.unparsed

344

345 - def __repr__(self):

346 return "TrustRoot(%r, %r, %r, %r, %r, %r)" % ( 347 self.unparsed, self.proto, self.wildcard, self.host, self.port, 348 self.path)

349

350 - def __str__(self):

351 return repr(self)

352 353 # The URI for relying party discovery, used in realm verification. 354 # 355 # XXX: This should probably live somewhere else (like in 356 # openid.consumer or openid.yadis somewhere) 357 RP_RETURN_TO_URL_TYPE = 'http://specs.openid.net/auth/2.0/return_to' 358

359 -def _extractReturnURL(endpoint):

360 """If the endpoint is a relying party OpenID return_to endpoint, 361 return the endpoint URL. Otherwise, return None. 362 363 This function is intended to be used as a filter for the Yadis 364 filtering interface. 365 366 @see: C{L{openid.yadis.services}} 367 @see: C{L{openid.yadis.filters}} 368 369 @param endpoint: An XRDS BasicServiceEndpoint, as returned by 370 performing Yadis dicovery. 371 372 @returns: The endpoint URL or None if the endpoint is not a 373 relying party endpoint. 374 @rtype: str or NoneType 375 """ 376 if endpoint.matchTypes([RP_RETURN_TO_URL_TYPE]): 377 return endpoint.uri 378 else: 379 return None

380

381 -def returnToMatches(allowed_return_to_urls, return_to):

382 """Is the return_to URL under one of the supplied allowed 383 return_to URLs? 384 385 @since: 2.1.0 386 """ 387 388 for allowed_return_to in allowed_return_to_urls: 389 # A return_to pattern works the same as a realm, except that 390 # it's not allowed to use a wildcard. We'll model this by 391 # parsing it as a realm, and not trying to match it if it has 392 # a wildcard. 393 394 return_realm = TrustRoot.parse(allowed_return_to) 395 if (# Parses as a trust root 396 return_realm is not None and 397 398 # Does not have a wildcard 399 not return_realm.wildcard and 400 401 # Matches the return_to that we passed in with it 402 return_realm.validateURL(return_to) 403 ): 404 return True 405 406 # No URL in the list matched 407 return False

408

409 -def getAllowedReturnURLs(relying_party_url):

410 """Given a relying party discovery URL return a list of return_to URLs. 411 412 @since: 2.1.0 413 """ 414 (rp_url_after_redirects, return_to_urls) = services.getServiceEndpoints( 415 relying_party_url, _extractReturnURL) 416 417 if rp_url_after_redirects != relying_party_url: 418 # Verification caused a redirect 419 raise RealmVerificationRedirected( 420 relying_party_url, rp_url_after_redirects) 421 422 return return_to_urls

423 424 # _vrfy parameter is there to make testing easier

425 -def verifyReturnTo(realm_str, return_to, _vrfy=getAllowedReturnURLs):

426 """Verify that a return_to URL is valid for the given realm. 427 428 This function builds a discovery URL, performs Yadis discovery on 429 it, makes sure that the URL does not redirect, parses out the 430 return_to URLs, and finally checks to see if the current return_to 431 URL matches the return_to. 432 433 @raises DiscoveryFailure: When Yadis discovery fails 434 @returns: True if the return_to URL is valid for the realm 435 436 @since: 2.1.0 437 """ 438 realm = TrustRoot.parse(realm_str) 439 if realm is None: 440 # The realm does not parse as a URL pattern 441 return False 442 443 try: 444 allowable_urls = _vrfy(realm.buildDiscoveryURL()) 445 except RealmVerificationRedirected, err: 446 oidutil.log(str(err)) 447 return False 448 449 if returnToMatches(allowable_urls, return_to): 450 return True 451 else: 452 oidutil.log("Failed to validate return_to %r for realm %r, was not " 453 "in %s" % (return_to, realm_str, allowable_urls)) 454 return False

455

Source Code for Module openid.server.trustroot