~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: Alexander Belchenko
Date: 2010-06-17 08:53:15 UTC
mfrom: (5300 +trunk)
mto: (5303.2.1 integration)
mto: This revision was merged to the branch mainline in revision 5305.
Revision ID: bialix@ukr.net-20100617085315-hr8186zck57zn35s

merge bzr.dev; fix NEWS

files added:
bzrlib/tests/per_interbranch/test_copy_content_into.py

bzrlib/tests/per_interbranch/test_get.py

files modified:
INSTALL

NEWS

bzrlib/_annotator_py.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/breakin.py

bzrlib/builtins.py

bzrlib/bundle/bundle_data.py

bzrlib/cleanup.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/errors.py

bzrlib/filters/eol.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/knit.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/bash_completion/bashcomp.py

bzrlib/plugins/bash_completion/tests/test_bashcomp.py

bzrlib/recordcounter.py

bzrlib/smart/medium.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/script.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_script.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_urlutils.py

bzrlib/textmerge.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/urlutils.py

bzrlib/win32utils.py

doc/developers/HACKING.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/code-style.txt

doc/developers/planned-change-integration.txt

doc/developers/testing.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/upgrade.txt

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/overview.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/version_info.txt

tools/check-newsbugs.py

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

101

first_path_slash = path.find('/')

102

if first_path_slash == -1:

103

return len(scheme), None

104

return len(scheme), first_path_slash+len(scheme)+3

104

return len(scheme), first_path_slash+m.start('path')

105

106

107

def is_url(url):

108

"""Tests whether a URL is in actual fact a URL."""

109

return _url_scheme_re.match(url) is not None

105

110

106

111

107

112

def join(base, *args):

118

123

"""

119

124

if not args:

120

125

return base

121

match = _url_scheme_re.match(base)

122

scheme = None

123

if match:

124

scheme = match.group('scheme')

125

path = match.group('path').split('/')

126

if path[-1:] == ['']:

127

# Strip off a trailing slash

128

# This helps both when we are at the root, and when

129

# 'base' has an extra slash at the end

130

path = path[:-1]

131

else:

132

path = base.split('/')

133

134

if scheme is not None and len(path) >= 1:

135

host = path[:1]

136

# the path should be represented as an abs path.

137

# we know this must be absolute because of the presence of a URL scheme.

138

remove_root = True

139

path = [''] + path[1:]

140

else:

141

# create an empty host, but dont alter the path - this might be a

142

# relative url fragment.

143

host = []

144

remove_root = False

145

126

scheme_end, path_start = _find_scheme_and_separator(base)

127

if scheme_end is None and path_start is None:

128

path_start = 0

129

elif path_start is None:

130

path_start = len(base)

131

path = base[path_start:]

146

132

for arg in args:

147

match = _url_scheme_re.match(arg)

148

if match:

149

# Absolute URL

150

scheme = match.group('scheme')

151

# this skips .. normalisation, making http://host/../../..

152

# be rather strange.

153

path = match.group('path').split('/')

154

# set the host and path according to new absolute URL, discarding

155

# any previous values.

156

# XXX: duplicates mess from earlier in this function. This URL

157

# manipulation code needs some cleaning up.

158

if scheme is not None and len(path) >= 1:

159

host = path[:1]

160

path = path[1:]

161

# url scheme implies absolute path.

162

path = [''] + path

163

else:

164

# no url scheme we take the path as is.

165

host = []

133

arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)

134

if arg_scheme_end is None and arg_path_start is None:

135

arg_path_start = 0

136

elif arg_path_start is None:

137

arg_path_start = len(arg)

138

if arg_scheme_end is not None:

139

base = arg

140

path = arg[arg_path_start:]

141

scheme_end = arg_scheme_end

142

path_start = arg_path_start

166

143

else:

167

path = '/'.join(path)

168

144

path = joinpath(path, arg)

169

path = path.split('/')

170

if remove_root and path[0:1] == ['']:

171

del path[0]

172

if host:

173

# Remove the leading slash from the path, so long as it isn't also the

174

# trailing slash, which we want to keep if present.

175

if path and path[0] == '' and len(path) > 1:

176

del path[0]

177

path = host + path

178

179

if scheme is None:

180

return '/'.join(path)

181

return scheme + '://' + '/'.join(path)

145

return base[:path_start] + path

182

146

183

147

184

148

def joinpath(base, *args):

278

242

# on non-win32 platform

279

243

# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname

280

244

# which actually strips trailing space characters.

281

# The worst part is that under linux ntpath.abspath has different

245

# The worst part is that on linux ntpath.abspath has different

282

246

# semantics, since 'nt' is not an available module.

283

247

if path == '/':

284

248

return 'file:///'

303

267

MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH

304

268

305

269

306

_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')

270

_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')

307

271

_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')

308

272

309

273

339

303

:param url: Either a hybrid URL or a local path

340

304

:return: A normalized URL which only includes 7-bit ASCII characters.

341

305

"""

342

m = _url_scheme_re.match(url)

343

if not m:

306

scheme_end, path_start = _find_scheme_and_separator(url)

307

if scheme_end is None:

344

308

return local_path_to_url(url)

345

scheme = m.group('scheme')

346

path = m.group('path')

309

prefix = url[:path_start]

310

path = url[path_start:]

347

311

if not isinstance(url, unicode):

348

312

for c in url:

349

313

if c not in _url_safe_characters:

350

314

raise errors.InvalidURL(url, 'URLs can only contain specific'

351

315

' safe characters (not %r)' % c)

352

316

path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)

353

return str(scheme + '://' + ''.join(path))

317

return str(prefix + ''.join(path))

354

318

355

319

# We have a unicode (hybrid) url

356

320

path_chars = list(path)

362

326

['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])

363

327

path = ''.join(path_chars)

364

328

path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)

365

return str(scheme + '://' + path)

329

return str(prefix + path)

366

330

367

331

368

332

def relative_url(base, other):

Older »