1
2 """Utility functions for handling XRIs.
3
4 @see: XRI Syntax v2.0 at the U{OASIS XRI Technical Committee<http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=xri>}
5 """
6
7 import re
8
9 XRI_AUTHORITIES = ['!', '=', '@', '+', '$', '(']
10
11 try:
12 unichr(0x10000)
13 except ValueError:
14
15 UCSCHAR = [
16 (0xA0, 0xD7FF),
17 (0xF900, 0xFDCF),
18 (0xFDF0, 0xFFEF),
19 ]
20
21 IPRIVATE = [
22 (0xE000, 0xF8FF),
23 ]
24 else:
25 UCSCHAR = [
26 (0xA0, 0xD7FF),
27 (0xF900, 0xFDCF),
28 (0xFDF0, 0xFFEF),
29 (0x10000, 0x1FFFD),
30 (0x20000, 0x2FFFD),
31 (0x30000, 0x3FFFD),
32 (0x40000, 0x4FFFD),
33 (0x50000, 0x5FFFD),
34 (0x60000, 0x6FFFD),
35 (0x70000, 0x7FFFD),
36 (0x80000, 0x8FFFD),
37 (0x90000, 0x9FFFD),
38 (0xA0000, 0xAFFFD),
39 (0xB0000, 0xBFFFD),
40 (0xC0000, 0xCFFFD),
41 (0xD0000, 0xDFFFD),
42 (0xE1000, 0xEFFFD),
43 ]
44
45 IPRIVATE = [
46 (0xE000, 0xF8FF),
47 (0xF0000, 0xFFFFD),
48 (0x100000, 0x10FFFD),
49 ]
50
51
52 _escapeme_re = re.compile('[%s]' % (''.join(
53 map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)),
54 UCSCHAR + IPRIVATE)),))
55
56
58 """Determine if this identifier is an XRI or URI.
59
60 @returns: C{"XRI"} or C{"URI"}
61 """
62 if identifier.startswith('xri://') or (
63 identifier and identifier[0] in XRI_AUTHORITIES):
64 return "XRI"
65 else:
66 return "URI"
67
68
70 """Transform an XRI to IRI-normal form."""
71 if not xri.startswith('xri://'):
72 xri = 'xri://' + xri
73 return escapeForIRI(xri)
74
75
76 _xref_re = re.compile('\((.*?)\)')
77
78
80 """Escape things that need to be escaped if they're in a cross-reference.
81 """
82 xref = xref_match.group()
83 xref = xref.replace('/', '%2F')
84 xref = xref.replace('?', '%3F')
85 xref = xref.replace('#', '%23')
86 return xref
87
88
90 """Escape things that need to be escaped when transforming to an IRI."""
91 xri = xri.replace('%', '%25')
92 xri = _xref_re.sub(_escape_xref, xri)
93 return xri
94
95
99
100
102 c = char_match.group()
103 return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')])
104
105
107 """Transform an IRI to a URI by escaping unicode."""
108
109 return _escapeme_re.sub(_percentEscapeUnicode, iri)
110
111
113 """Is this provider ID authoritative for this XRI?
114
115 @returntype: bool
116 """
117
118 lastbang = canonicalID.rindex('!')
119 parent = canonicalID[:lastbang]
120 return parent == providerID
121
122
124 """Return the root authority for an XRI.
125
126 Example::
127
128 rootAuthority("xri://@example") == "xri://@"
129
130 @type xri: unicode
131 @returntype: unicode
132 """
133 if xri.startswith('xri://'):
134 xri = xri[6:]
135 authority = xri.split('/', 1)[0]
136 if authority[0] == '(':
137
138
139
140
141
142 root = authority[:authority.index(')') + 1]
143 elif authority[0] in XRI_AUTHORITIES:
144
145 root = authority[0]
146 else:
147
148 segments = authority.split('!')
149 segments = reduce(list.__add__,
150 map(lambda s: s.split('*'), segments))
151 root = segments[0]
152
153 return XRI(root)
154
155
157 """An XRI object allowing comparison of XRI.
158
159 Ideally, this would do full normalization and provide comparsion
160 operators as per XRI Syntax. Right now, it just does a bit of
161 canonicalization by ensuring the xri scheme is present.
162
163 @param xri: an xri string
164 @type xri: unicode
165 """
166 if not xri.startswith('xri://'):
167 xri = 'xri://' + xri
168 return xri
169