1   
  2  """Utility functions for handling XRIs. 
  3   
  4  @see: XRI Syntax v2.0 at the U{OASIS XRI Technical Committee<http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=xri>} 
  5  """ 
  6   
  7  import re 
  8   
  9  XRI_AUTHORITIES = ['!', '=', '@', '+', '$', '('] 
 10   
 11  try: 
 12      unichr(0x10000) 
 13  except ValueError: 
 14       
 15      UCSCHAR = [ 
 16          (0xA0, 0xD7FF), 
 17          (0xF900, 0xFDCF), 
 18          (0xFDF0, 0xFFEF), 
 19          ] 
 20   
 21      IPRIVATE = [ 
 22          (0xE000, 0xF8FF), 
 23          ] 
 24  else: 
 25      UCSCHAR = [ 
 26          (0xA0, 0xD7FF), 
 27          (0xF900, 0xFDCF), 
 28          (0xFDF0, 0xFFEF), 
 29          (0x10000, 0x1FFFD), 
 30          (0x20000, 0x2FFFD), 
 31          (0x30000, 0x3FFFD), 
 32          (0x40000, 0x4FFFD), 
 33          (0x50000, 0x5FFFD), 
 34          (0x60000, 0x6FFFD), 
 35          (0x70000, 0x7FFFD), 
 36          (0x80000, 0x8FFFD), 
 37          (0x90000, 0x9FFFD), 
 38          (0xA0000, 0xAFFFD), 
 39          (0xB0000, 0xBFFFD), 
 40          (0xC0000, 0xCFFFD), 
 41          (0xD0000, 0xDFFFD), 
 42          (0xE1000, 0xEFFFD), 
 43          ] 
 44   
 45      IPRIVATE = [ 
 46          (0xE000, 0xF8FF), 
 47          (0xF0000, 0xFFFFD), 
 48          (0x100000, 0x10FFFD), 
 49          ] 
 50   
 51   
 52  _escapeme_re = re.compile('[%s]' % (''.join( 
 53      map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)), 
 54          UCSCHAR + IPRIVATE)),)) 
 55   
 56   
 58      """Determine if this identifier is an XRI or URI. 
 59   
 60      @returns: C{"XRI"} or C{"URI"} 
 61      """ 
 62      if identifier.startswith('xri://') or ( 
 63          identifier and identifier[0] in XRI_AUTHORITIES): 
 64          return "XRI" 
 65      else: 
 66          return "URI" 
  67   
 68   
 70      """Transform an XRI to IRI-normal form.""" 
 71      if not xri.startswith('xri://'): 
 72          xri = 'xri://' + xri 
 73      return escapeForIRI(xri) 
  74   
 75   
 76  _xref_re = re.compile('\((.*?)\)') 
 77   
 78   
 80      """Escape things that need to be escaped if they're in a cross-reference. 
 81      """ 
 82      xref = xref_match.group() 
 83      xref = xref.replace('/', '%2F') 
 84      xref = xref.replace('?', '%3F') 
 85      xref = xref.replace('#', '%23') 
 86      return xref 
  87   
 88   
 90      """Escape things that need to be escaped when transforming to an IRI.""" 
 91      xri = xri.replace('%', '%25') 
 92      xri = _xref_re.sub(_escape_xref, xri) 
 93      return xri 
  94   
 95   
 99   
100   
102      c = char_match.group() 
103      return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')]) 
 104   
105   
107      """Transform an IRI to a URI by escaping unicode.""" 
108       
109      return _escapeme_re.sub(_percentEscapeUnicode, iri) 
 110   
111   
113      """Is this provider ID authoritative for this XRI? 
114   
115      @returntype: bool 
116      """ 
117       
118      lastbang = canonicalID.rindex('!') 
119      parent = canonicalID[:lastbang] 
120      return parent == providerID 
 121   
122   
124      """Return the root authority for an XRI. 
125   
126      Example:: 
127   
128          rootAuthority("xri://@example") == "xri://@" 
129   
130      @type xri: unicode 
131      @returntype: unicode 
132      """ 
133      if xri.startswith('xri://'): 
134          xri = xri[6:] 
135      authority = xri.split('/', 1)[0] 
136      if authority[0] == '(': 
137           
138           
139           
140           
141           
142          root = authority[:authority.index(')') + 1] 
143      elif authority[0] in XRI_AUTHORITIES: 
144           
145          root = authority[0] 
146      else: 
147           
148          segments = authority.split('!') 
149          segments = reduce(list.__add__, 
150              map(lambda s: s.split('*'), segments)) 
151          root = segments[0] 
152   
153      return XRI(root) 
 154   
155   
157      """An XRI object allowing comparison of XRI. 
158   
159      Ideally, this would do full normalization and provide comparsion 
160      operators as per XRI Syntax.  Right now, it just does a bit of 
161      canonicalization by ensuring the xri scheme is present. 
162   
163      @param xri: an xri string 
164      @type xri: unicode 
165      """ 
166      if not xri.startswith('xri://'): 
167          xri = 'xri://' + xri 
168      return xri 
 169