~bzr-pqm/bzr/bzr.dev : contents of bzrlib/urlutils.py at revision 4226.2.1

~bzr-pqm/bzr/bzr.dev : (revision 4226.2.1)

1861.2.6 by Alexander Belchenko branding: change Bazaar-NG to Bazaar	1	# Bazaar -- distributed version control
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	2	#
2052.3.2 by John Arbash Meinel Change Copyright .. by Canonical to Copyright ... Canonical	3	# Copyright (C) 2006 Canonical Ltd
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	4	#
	5	# This program is free software; you can redistribute it and/or modify
	6	# it under the terms of the GNU General Public License as published by
	7	# the Free Software Foundation; either version 2 of the License, or
	8	# (at your option) any later version.
	9	#
	10	# This program is distributed in the hope that it will be useful,
	11	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	# GNU General Public License for more details.
	14	#
	15	# You should have received a copy of the GNU General Public License
	16	# along with this program; if not, write to the Free Software
4183.7.1 by Sabin Iacob update FSF mailing address	17	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	18
	19	"""A collection of function for handling URL operations."""
	20
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	21	import os
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	22	import re
	23	import sys
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	24
	25	from bzrlib.lazy_import import lazy_import
	26	lazy_import(globals(), """
	27	from posixpath import split as _posix_split, normpath as _posix_normpath
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	28	import urllib
3242.3.26 by Aaron Bentley Implement rebase_url	29	import urlparse
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	30
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	31	from bzrlib import (
	32	errors,
	33	osutils,
	34	)
	35	""")
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	36
	37
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	38	def basename(url, exclude_trailing_slash=True):
	39	"""Return the last component of a URL.
	40
	41	:param url: The URL in question
	42	:param exclude_trailing_slash: If the url looks like "path/to/foo/"
	43	ignore the final slash and return 'foo' rather than ''
	44	:return: Just the final component of the URL. This can return ''
	45	if you don't exclude_trailing_slash, or if you are at the
	46	root of the URL.
	47	"""
	48	return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]
	49
	50
	51	def dirname(url, exclude_trailing_slash=True):
	52	"""Return the parent directory of the given path.
	53
	54	:param url: Relative or absolute URL
	55	:param exclude_trailing_slash: Remove a final slash
	56	(treat http://host/foo/ as http://host/foo, but
	57	http://host/ stays http://host/)
	58	:return: Everything in the URL except the last path chunk
	59	"""
	60	# TODO: jam 20060502 This was named dirname to be consistent
	61	# with the os functions, but maybe "parent" would be better
	62	return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]
	63
	64
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	65	def escape(relpath):
	66	"""Escape relpath to be a valid url."""
	67	if isinstance(relpath, unicode):
	68	relpath = relpath.encode('utf-8')
	69	# After quoting and encoding, the path should be perfectly
	70	# safe as a plain ASCII string, str() just enforces this
4098.3.1 by Jonathan Lange Don't escape tildes	71	return str(urllib.quote(relpath, safe='/~'))
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	72
	73
1685.1.46 by John Arbash Meinel Sorting functions by name.	74	def file_relpath(base, path):
1685.1.46 by John Arbash Meinel Sorting functions by name.	75	"""Compute just the relative sub-portion of a url
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	76
1685.1.46 by John Arbash Meinel Sorting functions by name.	77	This assumes that both paths are already fully specified file:// URLs.
1685.1.46 by John Arbash Meinel Sorting functions by name.	78	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	79	if len(base) < MIN_ABS_FILEURL_LENGTH:
	80	raise ValueError('Length of base must be equal or'
	81	' exceed the platform minimum url length (which is %d)' %
	82	MIN_ABS_FILEURL_LENGTH)
1685.1.46 by John Arbash Meinel Sorting functions by name.	83	base = local_path_from_url(base)
1685.1.46 by John Arbash Meinel Sorting functions by name.	84	path = local_path_from_url(path)
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	85	return escape(osutils.relpath(base, path))
1685.1.46 by John Arbash Meinel Sorting functions by name.	86
1685.1.46 by John Arbash Meinel Sorting functions by name.	87
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	88	def _find_scheme_and_separator(url):
	89	"""Find the scheme separator (://) and the first path separator
	90
	91	This is just a helper functions for other path utilities.
	92	It could probably be replaced by urlparse
	93	"""
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	94	m = _url_scheme_re.match(url)
	95	if not m:
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	96	return None, None
	97
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	98	scheme = m.group('scheme')
	99	path = m.group('path')
	100
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	101	# Find the path separating slash
	102	# (first slash after the ://)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	103	first_path_slash = path.find('/')
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	104	if first_path_slash == -1:
1685.1.56 by John Arbash Meinel Fixing _find_scheme_and_separator	105	return len(scheme), None
	106	return len(scheme), first_path_slash+len(scheme)+3
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	107
	108
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	109	def join(base, *args):
	110	"""Create a URL by joining sections.
	111
	112	This will normalize '..', assuming that paths are absolute
	113	(it assumes no symlinks in either path)
	114
	115	If any of *args is an absolute URL, it will be treated correctly.
	116	Example:
	117	join('http://foo', 'http://bar') => 'http://bar'
	118	join('http://foo', 'bar') => 'http://foo/bar'
	119	join('http://foo', 'bar', '../baz') => 'http://foo/baz'
	120	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	121	if not args:
	122	return base
	123	match = _url_scheme_re.match(base)
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	124	scheme = None
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	125	if match:
	126	scheme = match.group('scheme')
	127	path = match.group('path').split('/')
1711.2.49 by John Arbash Meinel urlutils.join should work for root paths.	128	if path[-1:] == ['']:
	129	# Strip off a trailing slash
	130	# This helps both when we are at the root, and when
	131	# 'base' has an extra slash at the end
	132	path = path[:-1]
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	133	else:
	134	path = base.split('/')
	135
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	136	if scheme is not None and len(path) >= 1:
2018.5.93 by Andrew Bennetts Fix another bug in urlutils.join.	137	host = path[:1]
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	138	# the path should be represented as an abs path.
	139	# we know this must be absolute because of the presence of a URL scheme.
	140	remove_root = True
	141	path = [''] + path[1:]
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	142	else:
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	143	# create an empty host, but dont alter the path - this might be a
	144	# relative url fragment.
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	145	host = []
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	146	remove_root = False
	147
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	148	for arg in args:
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	149	match = _url_scheme_re.match(arg)
	150	if match:
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	151	# Absolute URL
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	152	scheme = match.group('scheme')
1986.1.10 by Robert Collins Merge from bzr.dev, fixing found bugs handling 'has('/')' in MemoryTransport and SFTP transports.	153	# this skips .. normalisation, making http://host/../../..
	154	# be rather strange.
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	155	path = match.group('path').split('/')
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	156	# set the host and path according to new absolute URL, discarding
	157	# any previous values.
	158	# XXX: duplicates mess from earlier in this function. This URL
	159	# manipulation code needs some cleaning up.
	160	if scheme is not None and len(path) >= 1:
2018.5.92 by Andrew Bennetts Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash.	161	host = path[:1]
	162	path = path[1:]
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	163	# url scheme implies absolute path.
	164	path = [''] + path
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	165	else:
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	166	# no url scheme we take the path as is.
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	167	host = []
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	168	else:
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	169	path = '/'.join(path)
	170	path = joinpath(path, arg)
	171	path = path.split('/')
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	172	if remove_root and path[0:1] == ['']:
	173	del path[0]
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	174	if host:
2018.5.92 by Andrew Bennetts Small bugfix to urlutils.join: join('anything', 'http://bar/a/') should not strip the trailing slash.	175	# Remove the leading slash from the path, so long as it isn't also the
	176	# trailing slash, which we want to keep if present.
	177	if path and path[0] == '' and len(path) > 1:
2018.5.54 by Andrew Bennetts Fix ChrootTransportDecorator's abspath method to be consistent with its clone	178	del path[0]
	179	path = host + path
1685.1.80 by Wouter van Heyst more code cleanup	180
1685.1.55 by John Arbash Meinel Adding bzrlib.urlutils.join() to handle joining URLs	181	if scheme is None:
	182	return '/'.join(path)
	183	return scheme + '://' + '/'.join(path)
	184
	185
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	186	def joinpath(base, *args):
	187	"""Join URL path segments to a URL path segment.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	188
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	189	This is somewhat like osutils.joinpath, but intended for URLs.
	190
	191	XXX: this duplicates some normalisation logic, and also duplicates a lot of
	192	path handling logic that already exists in some Transport implementations.
	193	We really should try to have exactly one place in the code base responsible
	194	for combining paths of URLs.
	195	"""
2018.5.100 by Andrew Bennetts Fix IndexError in urlutils.join with 'http://host/a' and '../../b'.	196	path = base.split('/')
	197	if len(path) > 1 and path[-1] == '':
	198	#If the path ends in a trailing /, remove it.
	199	path.pop()
2018.5.46 by Andrew Bennetts Fix ChrootTransportDecorator's clone to pass less surprising offsets to the decorated transport's clone.	200	for arg in args:
	201	if arg.startswith('/'):
	202	path = []
	203	for chunk in arg.split('/'):
	204	if chunk == '.':
	205	continue
	206	elif chunk == '..':
	207	if path == ['']:
	208	raise errors.InvalidURLJoin('Cannot go above root',
	209	base, args)
	210	path.pop()
	211	else:
	212	path.append(chunk)
	213	if path == ['']:
	214	return '/'
	215	else:
	216	return '/'.join(path)
	217
	218
1685.1.46 by John Arbash Meinel Sorting functions by name.	219	# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'
	220	def _posix_local_path_from_url(url):
	221	"""Convert a url like file:///path/to/foo into /path/to/foo"""
	222	if not url.startswith('file:///'):
	223	raise errors.InvalidURL(url, 'local urls must start with file:///')
	224	# We only strip off 2 slashes
	225	return unescape(url[len('file://'):])
	226
	227
	228	def _posix_local_path_to_url(path):
	229	"""Convert a local path like ./foo into a URL like file:///path/to/foo
	230
	231	This also handles transforming escaping unicode characters, etc.
	232	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	233	# importing directly from posixpath allows us to test this
1685.1.46 by John Arbash Meinel Sorting functions by name.	234	# on non-posix platforms
1711.4.5 by John Arbash Meinel the _posix_* routines should use posixpath not os.path, so tests pass on win32	235	return 'file://' + escape(_posix_normpath(
1996.3.12 by John Arbash Meinel Change how 'revision' is imported to avoid problems later	236	osutils._posix_abspath(path)))
1685.1.46 by John Arbash Meinel Sorting functions by name.	237
	238
	239	def _win32_local_path_from_url(url):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	240	"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	241	if not url.startswith('file://'):
	242	raise errors.InvalidURL(url, 'local urls must start with file:///, '
	243	'UNC path urls must start with file://')
1685.1.46 by John Arbash Meinel Sorting functions by name.	244	# We strip off all 3 slashes
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	245	win32_url = url[len('file:'):]
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	246	# check for UNC path: //HOST/path
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	247	if not win32_url.startswith('///'):
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	248	if (win32_url[2] == '/'
	249	or win32_url[3] in '\|:'):
	250	raise errors.InvalidURL(url, 'Win32 UNC path urls'
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	251	' have form file://HOST/path')
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	252	return unescape(win32_url)
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	253
	254	# allow empty paths so we can serve all roots
	255	if win32_url == '///':
	256	return '/'
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	257
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	258	# usual local path with drive letter
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	259	if (win32_url[3] not in ('abcdefghijklmnopqrstuvwxyz'
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	260	'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	261	or win32_url[4] not in '\|:'
	262	or win32_url[5] != '/'):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	263	raise errors.InvalidURL(url, 'Win32 file urls start with'
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	264	' file:///x:/, where x is a valid drive letter')
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	265	return win32_url[3].upper() + u':' + unescape(win32_url[5:])
1685.1.46 by John Arbash Meinel Sorting functions by name.	266
	267
	268	def _win32_local_path_to_url(path):
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	269	"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo
1685.1.46 by John Arbash Meinel Sorting functions by name.	270
	271	This also handles transforming escaping unicode characters, etc.
	272	"""
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	273	# importing directly from ntpath allows us to test this
1711.4.4 by John Arbash Meinel Fix some broken tests because of stupid ntpath.abspath behavior	274	# on non-win32 platform
	275	# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname
	276	# which actually strips trailing space characters.
	277	# The worst part is that under linux ntpath.abspath has different
	278	# semantics, since 'nt' is not an available module.
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	279	if path == '/':
3503.1.2 by adwi2 Permits Windows to serve all paths on all drives.	280	return 'file:///'
3503.1.1 by Adrian Wilkins Add a couple of special cases to urlutils._win32_path_(from\|to)_url	281
2279.4.2 by Alexander Belchenko Don't do normpath after abspath, because this function is called inside abspath	282	win32_path = osutils._win32_abspath(path)
2162.2.2 by Alexander Belchenko Support for win32 UNC path (like: \\HOST\path)	283	# check for UNC path \\HOST\path
	284	if win32_path.startswith('//'):
2162.2.7 by Alexander Belchenko Win32 UNC path \\HOST\path mapped to URL file://HOST/path	285	return 'file:' + escape(win32_path)
3234.3.1 by Alexander Belchenko ensure that local_path_to_url() always returns plain string, not unicode.	286	return ('file:///' + str(win32_path[0].upper()) + ':' +
	287	escape(win32_path[2:]))
1685.1.46 by John Arbash Meinel Sorting functions by name.	288
	289
	290	local_path_to_url = _posix_local_path_to_url
	291	local_path_from_url = _posix_local_path_from_url
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	292	MIN_ABS_FILEURL_LENGTH = len('file:///')
1711.4.17 by John Arbash Meinel [merge] bzr.dev 1790	293	WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')
1685.1.46 by John Arbash Meinel Sorting functions by name.	294
	295	if sys.platform == 'win32':
	296	local_path_to_url = _win32_local_path_to_url
	297	local_path_from_url = _win32_local_path_from_url
	298
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	299	MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	300
	301
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	302	_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	303	_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')
	304
	305
	306	def _unescape_safe_chars(matchobj):
	307	"""re.sub callback to convert hex-escapes to plain characters (if safe).
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	308
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	309	e.g. '%7E' will be converted to '~'.
	310	"""
	311	hex_digits = matchobj.group(0)[1:]
	312	char = chr(int(hex_digits, 16))
	313	if char in _url_dont_escape_characters:
	314	return char
	315	else:
	316	return matchobj.group(0).upper()
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	317
	318
	319	def normalize_url(url):
	320	"""Make sure that a path string is in fully normalized URL form.
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	321
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	322	This handles URLs which have unicode characters, spaces,
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	323	special characters, etc.
	324
	325	It has two basic modes of operation, depending on whether the
	326	supplied string starts with a url specifier (scheme://) or not.
	327	If it does not have a specifier it is considered a local path,
	328	and will be converted into a file:/// url. Non-ascii characters
	329	will be encoded using utf-8.
	330	If it does have a url specifier, it will be treated as a "hybrid"
	331	URL. Basically, a URL that should have URL special characters already
	332	escaped (like +?&# etc), but may have unicode characters, etc
	333	which would not be valid in a real URL.
	334
	335	:param url: Either a hybrid URL or a local path
	336	:return: A normalized URL which only includes 7-bit ASCII characters.
	337	"""
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	338	m = _url_scheme_re.match(url)
	339	if not m:
	340	return local_path_to_url(url)
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	341	scheme = m.group('scheme')
	342	path = m.group('path')
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	343	if not isinstance(url, unicode):
	344	for c in url:
	345	if c not in _url_safe_characters:
1685.1.53 by John Arbash Meinel Updated normalize_url	346	raise errors.InvalidURL(url, 'URLs can only contain specific'
1685.1.53 by John Arbash Meinel Updated normalize_url	347	' safe characters (not %r)' % c)
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	348	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
	349	return str(scheme + '://' + ''.join(path))
	350
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	351	# We have a unicode (hybrid) url
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	352	path_chars = list(path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	353
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	354	for i in xrange(len(path_chars)):
	355	if path_chars[i] not in _url_safe_characters:
	356	chars = path_chars[i].encode('utf-8')
	357	path_chars[i] = ''.join(
	358	['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])
	359	path = ''.join(path_chars)
	360	path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)
	361	return str(scheme + '://' + path)
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	362
	363
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	364	def relative_url(base, other):
	365	"""Return a path to other from base.
	366
	367	If other is unrelated to base, return other. Else return a relative path.
	368	This assumes no symlinks as part of the url.
	369	"""
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	370	dummy, base_first_slash = _find_scheme_and_separator(base)
	371	if base_first_slash is None:
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	372	return other
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	373
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	374	dummy, other_first_slash = _find_scheme_and_separator(other)
	375	if other_first_slash is None:
	376	return other
	377
	378	# this takes care of differing schemes or hosts
	379	base_scheme = base[:base_first_slash]
	380	other_scheme = other[:other_first_slash]
	381	if base_scheme != other_scheme:
	382	return other
3139.2.1 by Alexander Belchenko bugfix #90847: fix problem with parent location on another logical drive	383	elif sys.platform == 'win32' and base_scheme == 'file://':
	384	base_drive = base[base_first_slash+1:base_first_slash+3]
	385	other_drive = other[other_first_slash+1:other_first_slash+3]
	386	if base_drive != other_drive:
	387	return other
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	388
	389	base_path = base[base_first_slash+1:]
	390	other_path = other[other_first_slash+1:]
	391
	392	if base_path.endswith('/'):
	393	base_path = base_path[:-1]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	394
	395	base_sections = base_path.split('/')
	396	other_sections = other_path.split('/')
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	397
	398	if base_sections == ['']:
	399	base_sections = []
	400	if other_sections == ['']:
	401	other_sections = []
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	402
	403	output_sections = []
	404	for b, o in zip(base_sections, other_sections):
	405	if b != o:
	406	break
	407	output_sections.append(b)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	408
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	409	match_len = len(output_sections)
1685.1.71 by Wouter van Heyst change branch.{get,set}_parent to store a relative path but return full urls	410	output_sections = ['..' for x in base_sections[match_len:]]
1685.1.70 by Wouter van Heyst working on get_parent, set_parent and relative urls, broken	411	output_sections.extend(other_sections[match_len:])
	412
	413	return "/".join(output_sections) or "."
	414
	415
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	416	def _win32_extract_drive_letter(url_base, path):
	417	"""On win32 the drive letter needs to be added to the url base."""
	418	# Strip off the drive letter
	419	# path is currently /C:/foo
	420	if len(path) < 3 or path[2] not in ':\|' or path[3] != '/':
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	421	raise errors.InvalidURL(url_base + path,
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	422	'win32 file:/// paths need a drive letter')
	423	url_base += path[0:3] # file:// + /C:
	424	path = path[3:] # /foo
	425	return url_base, path
	426
	427
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	428	def split(url, exclude_trailing_slash=True):
	429	"""Split a URL into its parent directory and a child directory.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	430
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	431	:param url: A relative or absolute URL
	432	:param exclude_trailing_slash: Strip off a final '/' if it is part
	433	of the path (but not if it is part of the protocol specification)
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	434
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	435	:return: (parent_url, child_dir). child_dir may be the empty string if we're at
1685.1.61 by Martin Pool [broken] Change BzrDir._make_tail to use urlutils.split	436	the root.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	437	"""
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	438	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	439
	440	if first_path_slash is None:
	441	# We have either a relative path, or no separating slash
	442	if scheme_loc is None:
	443	# Relative path
	444	if exclude_trailing_slash and url.endswith('/'):
	445	url = url[:-1]
	446	return _posix_split(url)
	447	else:
	448	# Scheme with no path
	449	return url, ''
	450
	451	# We have a fully defined path
	452	url_base = url[:first_path_slash] # http://host, file://
	453	path = url[first_path_slash:] # /file/foo
	454
	455	if sys.platform == 'win32' and url.startswith('file:///'):
	456	# Strip off the drive letter
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	457	# url_base is currently file://
1711.2.39 by John Arbash Meinel Fix bzrlib.urlutils.split() to work properly on win32 local paths.	458	# path is currently /C:/foo
1711.2.43 by John Arbash Meinel Split out win32 specific code so that it can be tested on all platforms.	459	url_base, path = _win32_extract_drive_letter(url_base, path)
	460	# now it should be file:///C: and /foo
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	461
	462	if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):
	463	path = path[:-1]
	464	head, tail = _posix_split(path)
	465	return url_base + head, tail
	466
1685.1.46 by John Arbash Meinel Sorting functions by name.	467
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	468	def _win32_strip_local_trailing_slash(url):
	469	"""Strip slashes after the drive letter"""
	470	if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:
	471	return url[:-1]
	472	else:
	473	return url
	474
	475
1685.1.47 by John Arbash Meinel s comes before u	476	def strip_trailing_slash(url):
	477	"""Strip trailing slash, except for root paths.
	478
	479	The definition of 'root path' is platform-dependent.
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	480	This assumes that all URLs are valid netloc urls, such that they
	481	form:
	482	scheme://host/path
	483	It searches for ://, and then refuses to remove the next '/'.
	484	It can also handle relative paths
	485	Examples:
	486	path/to/foo => path/to/foo
	487	path/to/foo/ => path/to/foo
	488	http://host/path/ => http://host/path
	489	http://host/path => http://host/path
	490	http://host/ => http://host/
	491	file:/// => file:///
	492	file:///foo/ => file:///foo
	493	# This is unique on win32 platforms, and is the only URL
	494	# format which does it differently.
1711.4.8 by John Arbash Meinel switch to prefering lowercase drive letters, since that matches os.getcwd() drive letters	495	file:///c\|/ => file:///c:/
1685.1.47 by John Arbash Meinel s comes before u	496	"""
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	497	if not url.endswith('/'):
	498	# Nothing to do
	499	return url
2245.6.1 by Alexander Belchenko win32 UNC path: recursive cloning UNC path to root stops on //HOST, not on //	500	if sys.platform == 'win32' and url.startswith('file://'):
1711.2.44 by John Arbash Meinel Factor out another win32 special case and add platform independent tests for it.	501	return _win32_strip_local_trailing_slash(url)
1685.1.80 by Wouter van Heyst more code cleanup	502
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	503	scheme_loc, first_path_slash = _find_scheme_and_separator(url)
	504	if scheme_loc is None:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	505	# This is a relative path, as it has no scheme
	506	# so just chop off the last character
1685.1.47 by John Arbash Meinel s comes before u	507	return url[:-1]
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	508
1685.1.49 by John Arbash Meinel Added bzrlib.urlutils.split and basename + dirname	509	if first_path_slash is None or first_path_slash == len(url)-1:
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	510	# Don't chop off anything if the only slash is the path
	511	# separating slash
1685.1.47 by John Arbash Meinel s comes before u	512	return url
1685.1.47 by John Arbash Meinel s comes before u	513
1685.1.48 by John Arbash Meinel Updated strip_trailing_slash to support lots more url stuff, added tests	514	return url[:-1]
	515
1685.1.47 by John Arbash Meinel s comes before u	516
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	517	def unescape(url):
	518	"""Unescape relpath from url format.
	519
	520	This returns a Unicode path from a URL
	521	"""
	522	# jam 20060427 URLs are supposed to be ASCII only strings
	523	# If they are passed in as unicode, urllib.unquote
	524	# will return a UNICODE string, which actually contains
	525	# utf-8 bytes. So we have to ensure that they are
	526	# plain ASCII strings, or the final .decode will
	527	# try to encode the UNICODE => ASCII, and then decode
	528	# it into utf-8.
	529	try:
	530	url = str(url)
	531	except UnicodeError, e:
	532	raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))
1685.1.80 by Wouter van Heyst more code cleanup	533
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	534	unquoted = urllib.unquote(url)
	535	try:
	536	unicode_path = unquoted.decode('utf-8')
	537	except UnicodeError, e:
	538	raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))
	539	return unicode_path
	540
	541
	542	# These are characters that if escaped, should stay that way
	543	_no_decode_chars = ';/?:@&=+$,#'
	544	_no_decode_ords = [ord(c) for c in _no_decode_chars]
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	545	_no_decode_hex = (['%02x' % o for o in _no_decode_ords]
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	546	+ ['%02X' % o for o in _no_decode_ords])
1685.1.50 by John Arbash Meinel Added an re for handling scheme paths.	547	_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]
	548	+ [('%02X' % o, chr(o)) for o in range(256)]))
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	549	#These entries get mapped to themselves
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	550	_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	551
2208.4.1 by Andrew Bennetts normalize_url should normalise escaping of unreserved characters, like '~'.	552	# These characters shouldn't be percent-encoded, and it's always safe to
	553	# unencode them if they are.
	554	_url_dont_escape_characters = set(
	555	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	556	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	557	"0123456789" # Numbers
	558	"-._~" # Unreserved characters
	559	)
	560
1685.1.51 by John Arbash Meinel Working on getting normalize_url working.	561	# These characters should not be escaped
2167.2.2 by Aaron Bentley Update safe character list	562	_url_safe_characters = set(
	563	"abcdefghijklmnopqrstuvwxyz" # Lowercase alpha
	564	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha
	565	"0123456789" # Numbers
	566	"_.-!~*'()" # Unreserved characters
	567	"/;?:@&=+$," # Reserved characters
	568	"%#" # Extra reserved characters
	569	)
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	570
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	571	def unescape_for_display(url, encoding):
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	572	"""Decode what you can for a URL, so that we get a nice looking path.
	573
	574	This will turn file:// urls into local paths, and try to decode
	575	any portions of a http:// style url that it can.
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	576
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	577	Any sections of the URL which can't be represented in the encoding or
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	578	need to stay as escapes are left alone.
	579
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	580	:param url: A 7-bit ASCII URL
	581	:param encoding: The final output encoding
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	582
3943.8.1 by Marius Kruger remove all trailing whitespace from bzr source	583	:return: A unicode string which can be safely encoded into the
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	584	specified encoding.
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	585	"""
3376.2.4 by Martin Pool Remove every assert statement from bzrlib!	586	if encoding is None:
	587	raise ValueError('you cannot specify None for the display encoding')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	588	if url.startswith('file://'):
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	589	try:
	590	path = local_path_from_url(url)
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	591	path.encode(encoding)
	592	return path
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	593	except UnicodeError:
	594	return url
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	595
	596	# Split into sections to try to decode utf-8
	597	res = url.split('/')
	598	for i in xrange(1, len(res)):
	599	escaped_chunks = res[i].split('%')
	600	for j in xrange(1, len(escaped_chunks)):
	601	item = escaped_chunks[j]
	602	try:
	603	escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]
	604	except KeyError:
	605	# Put back the percent symbol
	606	escaped_chunks[j] = '%' + item
	607	except UnicodeDecodeError:
	608	escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]
	609	unescaped = ''.join(escaped_chunks)
	610	try:
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	611	decoded = unescaped.decode('utf-8')
1685.1.45 by John Arbash Meinel Moved url functions into bzrlib.urlutils	612	except UnicodeDecodeError:
	613	# If this path segment cannot be properly utf-8 decoded
	614	# after doing unescaping we will just leave it alone
	615	pass
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	616	else:
	617	try:
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	618	decoded.encode(encoding)
1685.1.54 by John Arbash Meinel url_for_display now makes sure output can be properly encoded.	619	except UnicodeEncodeError:
	620	# If this chunk cannot be encoded in the local
	621	# encoding, then we should leave it alone
	622	pass
1685.1.58 by Martin Pool urlutils.unescape_for_display should return Unicode	623	else:
	624	# Otherwise take the url decoded one
	625	res[i] = decoded
	626	return u'/'.join(res)
2512.4.1 by Ian Clatworthy Fixes #115491 - 'branch lp:projname' now creates ./projname as exected	627
	628
	629	def derive_to_location(from_location):
	630	"""Derive a TO_LOCATION given a FROM_LOCATION.
	631
	632	The normal case is a FROM_LOCATION of http://foo/bar => bar.
	633	The Right Thing for some logical destinations may differ though
	634	because no / may be present at all. In that case, the result is
	635	the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar.
	636	This latter case also applies when a Windows drive
	637	is used without a path, e.g. c:foo-bar => foo-bar.
	638	If no /, path separator or : is found, the from_location is returned.
	639	"""
	640	if from_location.find("/") >= 0 or from_location.find(os.sep) >= 0:
	641	return os.path.basename(from_location.rstrip("/\\"))
	642	else:
	643	sep = from_location.find(":")
	644	if sep > 0:
	645	return from_location[sep+1:]
	646	else:
	647	return from_location
3242.3.26 by Aaron Bentley Implement rebase_url	648
3242.3.35 by Aaron Bentley Cleanups and documentation	649
3242.3.26 by Aaron Bentley Implement rebase_url	650	def _is_absolute(url):
	651	return (osutils.pathjoin('/foo', url) == url)
	652
3242.3.35 by Aaron Bentley Cleanups and documentation	653
3242.3.26 by Aaron Bentley Implement rebase_url	654	def rebase_url(url, old_base, new_base):
	655	"""Convert a relative path from an old base URL to a new base URL.
	656
	657	The result will be a relative path.
	658	Absolute paths and full URLs are returned unaltered.
	659	"""
	660	scheme, separator = _find_scheme_and_separator(url)
	661	if scheme is not None:
	662	return url
	663	if _is_absolute(url):
	664	return url
	665	old_parsed = urlparse.urlparse(old_base)
	666	new_parsed = urlparse.urlparse(new_base)
	667	if (old_parsed[:2]) != (new_parsed[:2]):
3242.3.33 by Aaron Bentley Handle relative URL stacking cleanly	668	raise errors.InvalidRebaseURLs(old_base, new_base)
3242.3.36 by Aaron Bentley Updates from review comments	669	return determine_relative_path(new_parsed[2],
3567.2.1 by Michael Hudson urlutils.rebase_url handles '..' path segments in 'url'	670	join(old_parsed[2], url))
3242.3.26 by Aaron Bentley Implement rebase_url	671
	672
	673	def determine_relative_path(from_path, to_path):
	674	"""Determine a relative path from from_path to to_path."""
	675	from_segments = osutils.splitpath(from_path)
	676	to_segments = osutils.splitpath(to_path)
	677	count = -1
	678	for count, (from_element, to_element) in enumerate(zip(from_segments,
	679	to_segments)):
	680	if from_element != to_element:
	681	break
	682	else:
	683	count += 1
	684	unique_from = from_segments[count:]
	685	unique_to = to_segments[count:]
	686	segments = (['..'] * len(unique_from) + unique_to)
	687	if len(segments) == 0:
	688	return '.'
	689	return osutils.pathjoin(*segments)