~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: John Arbash Meinel
Date: 2006-05-02 22:43:51 UTC
mto: This revision was merged to the branch mainline in revision 1752.
Revision ID: john@arbash-meinel.com-20060502224351-2aab4b966d21acb5

Added bzrlib.urlutils.split and basename + dirname

files modified:
bzrlib/tests/test_urlutils.py

bzrlib/urlutils.py

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

"""A collection of function for handling URL operations."""

import os

from posixpath import split as _posix_split

import urllib

import sys

import bzrlib.osutils

def basename(url, exclude_trailing_slash=True):

"""Return the last component of a URL.

:param url: The URL in question

:param exclude_trailing_slash: If the url looks like "path/to/foo/"

ignore the final slash and return 'foo' rather than ''

:return: Just the final component of the URL. This can return ''

if you don't exclude_trailing_slash, or if you are at the

root of the URL.

"""

return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]

def dirname(url, exclude_trailing_slash=True):

"""Return the parent directory of the given path.

:param url: Relative or absolute URL

:param exclude_trailing_slash: Remove a final slash

(treat http://host/foo/ as http://host/foo, but

http://host/ stays http://host/)

:return: Everything in the URL except the last path chunk

"""

# TODO: jam 20060502 This was named dirname to be consistent

# with the os functions, but maybe "parent" would be better

return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]

def escape(relpath):

"""Escape relpath to be a valid url."""

if isinstance(relpath, unicode):

return escape(bzrlib.osutils.relpath(base, path))

def _find_scheme_and_separator(url):

"""Find the scheme separator (://) and the first path separator

This is just a helper functions for other path utilities.

It could probably be replaced by urlparse

"""

scheme_loc = url.find('://')

if scheme_loc == -1:

return None, None

# Find the path separating slash

# (first slash after the ://)

first_path_slash = url.find('/', scheme_loc+3)

if first_path_slash == -1:

return scheme_loc, None

return scheme_loc, first_path_slash

# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'

def _posix_local_path_from_url(url):

100

"""Convert a url like file:///path/to/foo into /path/to/foo"""

107

154

MIN_ABS_FILEURL_LENGTH = len('file:///C|/')

108

155

109

156

110

def basename(url, exclude_trailing_slash=True):

111

"""Return the last component of a URL.

157

def split(url, exclude_trailing_slash=True):

158

"""Split a URL into its parent directory and a child directory.

112

159

113

:param url: The URL in question

114

:param exclude_trailing_slash: If the url looks like "path/to/foo/"

115

ignore the final slash and return 'foo' rather than ''

160

:param url: A relative or absolute URL

161

:param exclude_trailing_slash: Strip off a final '/' if it is part

162

of the path (but not if it is part of the protocol specification)

116

163

"""

117

if exclude_trailing_slash:

118

url = strip_trailing_slash(url)

164

scheme_loc, first_path_slash = _find_scheme_and_separator(url)

165

166

if first_path_slash is None:

167

# We have either a relative path, or no separating slash

168

if scheme_loc is None:

169

# Relative path

170

if exclude_trailing_slash and url.endswith('/'):

171

url = url[:-1]

172

return _posix_split(url)

173

else:

174

# Scheme with no path

175

return url, ''

176

177

# We have a fully defined path

178

url_base = url[:first_path_slash] # http://host, file://

179

path = url[first_path_slash:] # /file/foo

180

181

if sys.platform == 'win32' and url.startswith('file:///'):

182

# Strip off the drive letter

183

if path[2:3] not in '\\/':

184

raise InvalidURL(url,

185

'win32 file:/// paths need a drive letter')

186

url_base += path[1:4] # file:///C|/

187

path = path[3:]

188

189

if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):

190

path = path[:-1]

191

head, tail = _posix_split(path)

192

return url_base + head, tail

193

119

194

120

195

def strip_trailing_slash(url):

121

196

"""Strip trailing slash, except for root paths.

146

221

# of a win32 path is actually the drive letter

147

222

if len(url) > MIN_ABS_FILEURL_LENGTH:

148

223

return url[:-1]

149

scheme_loc = url.find('://')

150

if scheme_loc == -1:

224

scheme_loc, first_path_slash = _find_scheme_and_separator(url)

225

if scheme_loc is None:

151

226

# This is a relative path, as it has no scheme

152

227

# so just chop off the last character

153

228

return url[:-1]

154

229

155

# Find the path separating slash

156

# (first slash after the ://)

157

first_path_slash = url.find('/', scheme_loc+3)

158

if first_path_slash == -1 or first_path_slash == len(url)-1:

230

if first_path_slash is None or first_path_slash == len(url)-1:

159

231

# Don't chop off anything if the only slash is the path

160

232

# separating slash

161

233

return url

Older »