1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
1 |
# Bazaar-NG -- distributed version control
|
2 |
#
|
|
3 |
# Copyright (C) 2006 by Canonical Ltd
|
|
4 |
#
|
|
5 |
# This program is free software; you can redistribute it and/or modify
|
|
6 |
# it under the terms of the GNU General Public License as published by
|
|
7 |
# the Free Software Foundation; either version 2 of the License, or
|
|
8 |
# (at your option) any later version.
|
|
9 |
#
|
|
10 |
# This program is distributed in the hope that it will be useful,
|
|
11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 |
# GNU General Public License for more details.
|
|
14 |
#
|
|
15 |
# You should have received a copy of the GNU General Public License
|
|
16 |
# along with this program; if not, write to the Free Software
|
|
17 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
18 |
||
19 |
"""A collection of function for handling URL operations."""
|
|
20 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
21 |
import os |
1711.4.5
by John Arbash Meinel
the _posix_* routines should use posixpath not os.path, so tests pass on win32 |
22 |
from posixpath import split as _posix_split, normpath as _posix_normpath |
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
23 |
import re |
24 |
import sys |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
25 |
import urllib |
26 |
||
27 |
import bzrlib.errors as errors |
|
28 |
import bzrlib.osutils |
|
29 |
||
30 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
31 |
def basename(url, exclude_trailing_slash=True): |
32 |
"""Return the last component of a URL.
|
|
33 |
||
34 |
:param url: The URL in question
|
|
35 |
:param exclude_trailing_slash: If the url looks like "path/to/foo/"
|
|
36 |
ignore the final slash and return 'foo' rather than ''
|
|
37 |
:return: Just the final component of the URL. This can return ''
|
|
38 |
if you don't exclude_trailing_slash, or if you are at the
|
|
39 |
root of the URL.
|
|
40 |
"""
|
|
41 |
return split(url, exclude_trailing_slash=exclude_trailing_slash)[1] |
|
42 |
||
43 |
||
44 |
def dirname(url, exclude_trailing_slash=True): |
|
45 |
"""Return the parent directory of the given path.
|
|
46 |
||
47 |
:param url: Relative or absolute URL
|
|
48 |
:param exclude_trailing_slash: Remove a final slash
|
|
49 |
(treat http://host/foo/ as http://host/foo, but
|
|
50 |
http://host/ stays http://host/)
|
|
51 |
:return: Everything in the URL except the last path chunk
|
|
52 |
"""
|
|
53 |
# TODO: jam 20060502 This was named dirname to be consistent
|
|
54 |
# with the os functions, but maybe "parent" would be better
|
|
55 |
return split(url, exclude_trailing_slash=exclude_trailing_slash)[0] |
|
56 |
||
57 |
||
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
58 |
def escape(relpath): |
59 |
"""Escape relpath to be a valid url."""
|
|
60 |
if isinstance(relpath, unicode): |
|
61 |
relpath = relpath.encode('utf-8') |
|
62 |
# After quoting and encoding, the path should be perfectly
|
|
63 |
# safe as a plain ASCII string, str() just enforces this
|
|
64 |
return str(urllib.quote(relpath)) |
|
65 |
||
66 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
67 |
def file_relpath(base, path): |
68 |
"""Compute just the relative sub-portion of a url
|
|
69 |
|
|
70 |
This assumes that both paths are already fully specified file:// URLs.
|
|
71 |
"""
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
72 |
assert len(base) >= MIN_ABS_FILEURL_LENGTH, ('Length of base must be equal or' |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
73 |
' exceed the platform minimum url length (which is %d)' % |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
74 |
MIN_ABS_FILEURL_LENGTH) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
75 |
|
76 |
base = local_path_from_url(base) |
|
77 |
path = local_path_from_url(path) |
|
78 |
return escape(bzrlib.osutils.relpath(base, path)) |
|
79 |
||
80 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
81 |
def _find_scheme_and_separator(url): |
82 |
"""Find the scheme separator (://) and the first path separator
|
|
83 |
||
84 |
This is just a helper functions for other path utilities.
|
|
85 |
It could probably be replaced by urlparse
|
|
86 |
"""
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
87 |
m = _url_scheme_re.match(url) |
88 |
if not m: |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
89 |
return None, None |
90 |
||
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
91 |
scheme = m.group('scheme') |
92 |
path = m.group('path') |
|
93 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
94 |
# Find the path separating slash
|
95 |
# (first slash after the ://)
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
96 |
first_path_slash = path.find('/') |
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
97 |
if first_path_slash == -1: |
1685.1.56
by John Arbash Meinel
Fixing _find_scheme_and_separator |
98 |
return len(scheme), None |
99 |
return len(scheme), first_path_slash+len(scheme)+3 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
100 |
|
101 |
||
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
102 |
def join(base, *args): |
103 |
"""Create a URL by joining sections.
|
|
104 |
||
105 |
This will normalize '..', assuming that paths are absolute
|
|
106 |
(it assumes no symlinks in either path)
|
|
107 |
||
108 |
If any of *args is an absolute URL, it will be treated correctly.
|
|
109 |
Example:
|
|
110 |
join('http://foo', 'http://bar') => 'http://bar'
|
|
111 |
join('http://foo', 'bar') => 'http://foo/bar'
|
|
112 |
join('http://foo', 'bar', '../baz') => 'http://foo/baz'
|
|
113 |
"""
|
|
114 |
m = _url_scheme_re.match(base) |
|
115 |
scheme = None |
|
116 |
if m: |
|
117 |
scheme = m.group('scheme') |
|
118 |
path = m.group('path').split('/') |
|
119 |
else: |
|
120 |
path = base.split('/') |
|
121 |
||
122 |
for arg in args: |
|
123 |
m = _url_scheme_re.match(arg) |
|
124 |
if m: |
|
125 |
# Absolute URL
|
|
126 |
scheme = m.group('scheme') |
|
127 |
path = m.group('path').split('/') |
|
128 |
else: |
|
129 |
for chunk in arg.split('/'): |
|
130 |
if chunk == '.': |
|
131 |
continue
|
|
132 |
elif chunk == '..': |
|
133 |
if len(path) >= 2: |
|
134 |
# Don't pop off the host portion
|
|
135 |
path.pop() |
|
136 |
else: |
|
137 |
raise errors.InvalidURLJoin('Cannot go above root', |
|
138 |
base, args) |
|
139 |
else: |
|
140 |
path.append(chunk) |
|
1685.1.80
by Wouter van Heyst
more code cleanup |
141 |
|
1685.1.55
by John Arbash Meinel
Adding bzrlib.urlutils.join() to handle joining URLs |
142 |
if scheme is None: |
143 |
return '/'.join(path) |
|
144 |
return scheme + '://' + '/'.join(path) |
|
145 |
||
146 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
147 |
# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
|
148 |
def _posix_local_path_from_url(url): |
|
149 |
"""Convert a url like file:///path/to/foo into /path/to/foo"""
|
|
150 |
if not url.startswith('file:///'): |
|
151 |
raise errors.InvalidURL(url, 'local urls must start with file:///') |
|
152 |
# We only strip off 2 slashes
|
|
153 |
return unescape(url[len('file://'):]) |
|
154 |
||
155 |
||
156 |
def _posix_local_path_to_url(path): |
|
157 |
"""Convert a local path like ./foo into a URL like file:///path/to/foo
|
|
158 |
||
159 |
This also handles transforming escaping unicode characters, etc.
|
|
160 |
"""
|
|
161 |
# importing directly from posixpath allows us to test this
|
|
162 |
# on non-posix platforms
|
|
1711.4.5
by John Arbash Meinel
the _posix_* routines should use posixpath not os.path, so tests pass on win32 |
163 |
return 'file://' + escape(_posix_normpath( |
164 |
bzrlib.osutils._posix_abspath(path))) |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
165 |
|
166 |
||
167 |
def _win32_local_path_from_url(url): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
168 |
"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
169 |
if not url.startswith('file:///'): |
170 |
raise errors.InvalidURL(url, 'local urls must start with file:///') |
|
171 |
# We strip off all 3 slashes
|
|
172 |
win32_url = url[len('file:///'):] |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
173 |
if (win32_url[0] not in ('abcdefghijklmnopqrstuvwxyz' |
174 |
'ABCDEFGHIJKLMNOPQRSTUVWXYZ') |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
175 |
or win32_url[1] not in '|:' |
176 |
or win32_url[2] != '/'): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
177 |
raise errors.InvalidURL(url, 'Win32 file urls start with' |
178 |
' file:///X:/, where X is a valid drive letter') |
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
179 |
return win32_url[0].upper() + u':' + unescape(win32_url[2:]) |
180 |
||
181 |
||
182 |
def _win32_local_path_to_url(path): |
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
183 |
"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
184 |
|
185 |
This also handles transforming escaping unicode characters, etc.
|
|
186 |
"""
|
|
187 |
# importing directly from ntpath allows us to test this
|
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
188 |
# on non-win32 platform
|
189 |
# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
|
|
190 |
# which actually strips trailing space characters.
|
|
191 |
# The worst part is that under linux ntpath.abspath has different
|
|
192 |
# semantics, since 'nt' is not an available module.
|
|
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
193 |
win32_path = bzrlib.osutils._nt_normpath( |
194 |
bzrlib.osutils._win32_abspath(path)).replace('\\', '/') |
|
1685.1.78
by Wouter van Heyst
more code cleanup |
195 |
return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:]) |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
196 |
|
197 |
||
198 |
local_path_to_url = _posix_local_path_to_url |
|
199 |
local_path_from_url = _posix_local_path_from_url |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
200 |
MIN_ABS_FILEURL_LENGTH = len('file:///') |
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
201 |
|
202 |
if sys.platform == 'win32': |
|
203 |
local_path_to_url = _win32_local_path_to_url |
|
204 |
local_path_from_url = _win32_local_path_from_url |
|
205 |
||
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
206 |
MIN_ABS_FILEURL_LENGTH = len('file:///C:/') |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
207 |
|
208 |
||
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
209 |
_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$') |
210 |
||
211 |
||
212 |
def normalize_url(url): |
|
213 |
"""Make sure that a path string is in fully normalized URL form.
|
|
214 |
|
|
215 |
This handles URLs which have unicode characters, spaces,
|
|
216 |
special characters, etc.
|
|
217 |
||
218 |
It has two basic modes of operation, depending on whether the
|
|
219 |
supplied string starts with a url specifier (scheme://) or not.
|
|
220 |
If it does not have a specifier it is considered a local path,
|
|
221 |
and will be converted into a file:/// url. Non-ascii characters
|
|
222 |
will be encoded using utf-8.
|
|
223 |
If it does have a url specifier, it will be treated as a "hybrid"
|
|
224 |
URL. Basically, a URL that should have URL special characters already
|
|
225 |
escaped (like +?&# etc), but may have unicode characters, etc
|
|
226 |
which would not be valid in a real URL.
|
|
227 |
||
228 |
:param url: Either a hybrid URL or a local path
|
|
229 |
:return: A normalized URL which only includes 7-bit ASCII characters.
|
|
230 |
"""
|
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
231 |
m = _url_scheme_re.match(url) |
232 |
if not m: |
|
233 |
return local_path_to_url(url) |
|
234 |
if not isinstance(url, unicode): |
|
235 |
for c in url: |
|
236 |
if c not in _url_safe_characters: |
|
1685.1.53
by John Arbash Meinel
Updated normalize_url |
237 |
raise errors.InvalidURL(url, 'URLs can only contain specific' |
238 |
' safe characters (not %r)' % c) |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
239 |
return url |
240 |
# We have a unicode (hybrid) url
|
|
241 |
scheme = m.group('scheme') |
|
242 |
path = list(m.group('path')) |
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
243 |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
244 |
for i in xrange(len(path)): |
245 |
if path[i] not in _url_safe_characters: |
|
246 |
chars = path[i].encode('utf-8') |
|
247 |
path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')]) |
|
248 |
return scheme + '://' + ''.join(path) |
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
249 |
|
250 |
||
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
251 |
def relative_url(base, other): |
252 |
"""Return a path to other from base.
|
|
253 |
||
254 |
If other is unrelated to base, return other. Else return a relative path.
|
|
255 |
This assumes no symlinks as part of the url.
|
|
256 |
"""
|
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
257 |
dummy, base_first_slash = _find_scheme_and_separator(base) |
258 |
if base_first_slash is None: |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
259 |
return other |
260 |
||
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
261 |
dummy, other_first_slash = _find_scheme_and_separator(other) |
262 |
if other_first_slash is None: |
|
263 |
return other |
|
264 |
||
265 |
# this takes care of differing schemes or hosts
|
|
266 |
base_scheme = base[:base_first_slash] |
|
267 |
other_scheme = other[:other_first_slash] |
|
268 |
if base_scheme != other_scheme: |
|
269 |
return other |
|
270 |
||
271 |
base_path = base[base_first_slash+1:] |
|
272 |
other_path = other[other_first_slash+1:] |
|
273 |
||
274 |
if base_path.endswith('/'): |
|
275 |
base_path = base_path[:-1] |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
276 |
|
277 |
base_sections = base_path.split('/') |
|
278 |
other_sections = other_path.split('/') |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
279 |
|
280 |
if base_sections == ['']: |
|
281 |
base_sections = [] |
|
282 |
if other_sections == ['']: |
|
283 |
other_sections = [] |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
284 |
|
285 |
output_sections = [] |
|
286 |
for b, o in zip(base_sections, other_sections): |
|
287 |
if b != o: |
|
288 |
break
|
|
289 |
output_sections.append(b) |
|
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
290 |
|
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
291 |
match_len = len(output_sections) |
1685.1.71
by Wouter van Heyst
change branch.{get,set}_parent to store a relative path but return full urls |
292 |
output_sections = ['..' for x in base_sections[match_len:]] |
1685.1.70
by Wouter van Heyst
working on get_parent, set_parent and relative urls, broken |
293 |
output_sections.extend(other_sections[match_len:]) |
294 |
||
295 |
return "/".join(output_sections) or "." |
|
296 |
||
297 |
||
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
298 |
def split(url, exclude_trailing_slash=True): |
299 |
"""Split a URL into its parent directory and a child directory.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
300 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
301 |
:param url: A relative or absolute URL
|
302 |
:param exclude_trailing_slash: Strip off a final '/' if it is part
|
|
303 |
of the path (but not if it is part of the protocol specification)
|
|
1685.1.61
by Martin Pool
[broken] Change BzrDir._make_tail to use urlutils.split |
304 |
|
305 |
:return: (parent_url, child_dir). child_dir may be the empty string if we're at
|
|
306 |
the root.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
307 |
"""
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
308 |
scheme_loc, first_path_slash = _find_scheme_and_separator(url) |
309 |
||
310 |
if first_path_slash is None: |
|
311 |
# We have either a relative path, or no separating slash
|
|
312 |
if scheme_loc is None: |
|
313 |
# Relative path
|
|
314 |
if exclude_trailing_slash and url.endswith('/'): |
|
315 |
url = url[:-1] |
|
316 |
return _posix_split(url) |
|
317 |
else: |
|
318 |
# Scheme with no path
|
|
319 |
return url, '' |
|
320 |
||
321 |
# We have a fully defined path
|
|
322 |
url_base = url[:first_path_slash] # http://host, file:// |
|
323 |
path = url[first_path_slash:] # /file/foo |
|
324 |
||
325 |
if sys.platform == 'win32' and url.startswith('file:///'): |
|
326 |
# Strip off the drive letter
|
|
1711.2.39
by John Arbash Meinel
Fix bzrlib.urlutils.split() to work properly on win32 local paths. |
327 |
# path is currently /C:/foo
|
328 |
if path[2:3] not in ':|' or path[3:4] not in '\\/': |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
329 |
raise errors.InvalidURL(url, |
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
330 |
'win32 file:/// paths need a drive letter') |
1711.2.39
by John Arbash Meinel
Fix bzrlib.urlutils.split() to work properly on win32 local paths. |
331 |
url_base += path[0:3] # file:// + /C: |
332 |
path = path[3:] # /foo |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
333 |
|
334 |
if exclude_trailing_slash and len(path) > 1 and path.endswith('/'): |
|
335 |
path = path[:-1] |
|
336 |
head, tail = _posix_split(path) |
|
337 |
return url_base + head, tail |
|
338 |
||
1685.1.46
by John Arbash Meinel
Sorting functions by name. |
339 |
|
1685.1.47
by John Arbash Meinel
s comes before u |
340 |
def strip_trailing_slash(url): |
341 |
"""Strip trailing slash, except for root paths.
|
|
342 |
||
343 |
The definition of 'root path' is platform-dependent.
|
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
344 |
This assumes that all URLs are valid netloc urls, such that they
|
345 |
form:
|
|
346 |
scheme://host/path
|
|
347 |
It searches for ://, and then refuses to remove the next '/'.
|
|
348 |
It can also handle relative paths
|
|
349 |
Examples:
|
|
350 |
path/to/foo => path/to/foo
|
|
351 |
path/to/foo/ => path/to/foo
|
|
352 |
http://host/path/ => http://host/path
|
|
353 |
http://host/path => http://host/path
|
|
354 |
http://host/ => http://host/
|
|
355 |
file:/// => file:///
|
|
356 |
file:///foo/ => file:///foo
|
|
357 |
# This is unique on win32 platforms, and is the only URL
|
|
358 |
# format which does it differently.
|
|
1711.4.4
by John Arbash Meinel
Fix some broken tests because of stupid ntpath.abspath behavior |
359 |
file:///C|/ => file:///C:/
|
1685.1.47
by John Arbash Meinel
s comes before u |
360 |
"""
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
361 |
if not url.endswith('/'): |
362 |
# Nothing to do
|
|
363 |
return url |
|
364 |
if sys.platform == 'win32' and url.startswith('file:///'): |
|
365 |
# This gets handled specially, because the 'top-level'
|
|
366 |
# of a win32 path is actually the drive letter
|
|
367 |
if len(url) > MIN_ABS_FILEURL_LENGTH: |
|
368 |
return url[:-1] |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
369 |
else: |
370 |
return url |
|
1685.1.80
by Wouter van Heyst
more code cleanup |
371 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
372 |
scheme_loc, first_path_slash = _find_scheme_and_separator(url) |
373 |
if scheme_loc is None: |
|
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
374 |
# This is a relative path, as it has no scheme
|
375 |
# so just chop off the last character
|
|
1685.1.47
by John Arbash Meinel
s comes before u |
376 |
return url[:-1] |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
377 |
|
1685.1.49
by John Arbash Meinel
Added bzrlib.urlutils.split and basename + dirname |
378 |
if first_path_slash is None or first_path_slash == len(url)-1: |
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
379 |
# Don't chop off anything if the only slash is the path
|
380 |
# separating slash
|
|
1685.1.47
by John Arbash Meinel
s comes before u |
381 |
return url |
382 |
||
1685.1.48
by John Arbash Meinel
Updated strip_trailing_slash to support lots more url stuff, added tests |
383 |
return url[:-1] |
384 |
||
1685.1.47
by John Arbash Meinel
s comes before u |
385 |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
386 |
def unescape(url): |
387 |
"""Unescape relpath from url format.
|
|
388 |
||
389 |
This returns a Unicode path from a URL
|
|
390 |
"""
|
|
391 |
# jam 20060427 URLs are supposed to be ASCII only strings
|
|
392 |
# If they are passed in as unicode, urllib.unquote
|
|
393 |
# will return a UNICODE string, which actually contains
|
|
394 |
# utf-8 bytes. So we have to ensure that they are
|
|
395 |
# plain ASCII strings, or the final .decode will
|
|
396 |
# try to encode the UNICODE => ASCII, and then decode
|
|
397 |
# it into utf-8.
|
|
398 |
try: |
|
399 |
url = str(url) |
|
400 |
except UnicodeError, e: |
|
401 |
raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,)) |
|
1685.1.80
by Wouter van Heyst
more code cleanup |
402 |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
403 |
unquoted = urllib.unquote(url) |
404 |
try: |
|
405 |
unicode_path = unquoted.decode('utf-8') |
|
406 |
except UnicodeError, e: |
|
407 |
raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,)) |
|
408 |
return unicode_path |
|
409 |
||
410 |
||
411 |
# These are characters that if escaped, should stay that way
|
|
412 |
_no_decode_chars = ';/?:@&=+$,#' |
|
413 |
_no_decode_ords = [ord(c) for c in _no_decode_chars] |
|
414 |
_no_decode_hex = (['%02x' % o for o in _no_decode_ords] |
|
415 |
+ ['%02X' % o for o in _no_decode_ords]) |
|
1685.1.50
by John Arbash Meinel
Added an re for handling scheme paths. |
416 |
_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)] |
417 |
+ [('%02X' % o, chr(o)) for o in range(256)])) |
|
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
418 |
#These entries get mapped to themselves
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
419 |
_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex) |
1685.1.51
by John Arbash Meinel
Working on getting normalize_url working. |
420 |
|
421 |
# These characters should not be escaped
|
|
422 |
_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz' |
|
423 |
'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
|
424 |
'0123456789' '_.-/' |
|
425 |
';?:@&=+$,%#') |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
426 |
|
427 |
||
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
428 |
def unescape_for_display(url, encoding): |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
429 |
"""Decode what you can for a URL, so that we get a nice looking path.
|
430 |
||
431 |
This will turn file:// urls into local paths, and try to decode
|
|
432 |
any portions of a http:// style url that it can.
|
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
433 |
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
434 |
Any sections of the URL which can't be represented in the encoding or
|
435 |
need to stay as escapes are left alone.
|
|
436 |
||
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
437 |
:param url: A 7-bit ASCII URL
|
438 |
:param encoding: The final output encoding
|
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
439 |
|
440 |
:return: A unicode string which can be safely encoded into the
|
|
441 |
specified encoding.
|
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
442 |
"""
|
1711.2.40
by John Arbash Meinel
codecs.getwriter() doesn't set '.encoding' properly, so do the work for it. |
443 |
assert encoding is not None, 'you cannot specify None for the display encoding.' |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
444 |
if url.startswith('file://'): |
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
445 |
try: |
446 |
path = local_path_from_url(url) |
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
447 |
path.encode(encoding) |
448 |
return path |
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
449 |
except UnicodeError: |
450 |
return url |
|
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
451 |
|
452 |
# Split into sections to try to decode utf-8
|
|
453 |
res = url.split('/') |
|
454 |
for i in xrange(1, len(res)): |
|
455 |
escaped_chunks = res[i].split('%') |
|
456 |
for j in xrange(1, len(escaped_chunks)): |
|
457 |
item = escaped_chunks[j] |
|
458 |
try: |
|
459 |
escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:] |
|
460 |
except KeyError: |
|
461 |
# Put back the percent symbol
|
|
462 |
escaped_chunks[j] = '%' + item |
|
463 |
except UnicodeDecodeError: |
|
464 |
escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:] |
|
465 |
unescaped = ''.join(escaped_chunks) |
|
466 |
try: |
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
467 |
decoded = unescaped.decode('utf-8') |
1685.1.45
by John Arbash Meinel
Moved url functions into bzrlib.urlutils |
468 |
except UnicodeDecodeError: |
469 |
# If this path segment cannot be properly utf-8 decoded
|
|
470 |
# after doing unescaping we will just leave it alone
|
|
471 |
pass
|
|
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
472 |
else: |
473 |
try: |
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
474 |
decoded.encode(encoding) |
1685.1.54
by John Arbash Meinel
url_for_display now makes sure output can be properly encoded. |
475 |
except UnicodeEncodeError: |
476 |
# If this chunk cannot be encoded in the local
|
|
477 |
# encoding, then we should leave it alone
|
|
478 |
pass
|
|
1685.1.58
by Martin Pool
urlutils.unescape_for_display should return Unicode |
479 |
else: |
480 |
# Otherwise take the url decoded one
|
|
481 |
res[i] = decoded |
|
482 |
return u'/'.join(res) |