~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: Robert Collins
Date: 2010-06-25 04:49:54 UTC
mfrom: (5215.3.10 find-bzrdirs-EPERM-handling)
mto: This revision was merged to the branch mainline in revision 5323.
Revision ID: robertc@robertcollins.net-20100625044954-prx0188xfpdgj11g

Merge prerequisite branch and tweak test to be more compact and faster.

files added:
bzrlib/plugins/bash_completion

bzrlib/plugins/bash_completion/README.txt

bzrlib/plugins/bash_completion/__init__.py

bzrlib/plugins/bash_completion/bashcomp.py

bzrlib/plugins/bash_completion/tests

bzrlib/plugins/bash_completion/tests/__init__.py

bzrlib/plugins/bash_completion/tests/test_bashcomp.py

bzrlib/recordcounter.py

bzrlib/tests/fixtures.py

bzrlib/tests/matchers.py

bzrlib/tests/per_branch/test_config.py

bzrlib/tests/per_interbranch/test_copy_content_into.py

bzrlib/tests/per_interbranch/test_get.py

bzrlib/tests/per_tree/test_locking.py

bzrlib/tests/test_fixtures.py

bzrlib/tests/test_matchers.py

bzrlib/transport/gio_transport.py

contrib/bash/bzr

contrib/zsh/README

doc/developers/code-review.txt

doc/developers/code-style.txt

files removed:
contrib/bash/bzr

contrib/bash/bzr.simple

contrib/zsh/_bzr

files modified:
INSTALL

NEWS

bzrlib/__init__.py

bzrlib/_annotator_py.py

bzrlib/_chk_map_py.py

bzrlib/_chk_map_pyx.pyx

bzrlib/_dirstate_helpers_pyx.pyx

bzrlib/_readdir_pyx.pyx

bzrlib/_static_tuple_c.h

bzrlib/_static_tuple_c.pxd

bzrlib/add.py

bzrlib/annotate.py

bzrlib/branch.py

bzrlib/breakin.py

bzrlib/builtins.py

bzrlib/bundle/bundle_data.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/cleanup.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/crash.py

bzrlib/diff.py

bzrlib/doc_generate/autodoc_man.py

bzrlib/errors.py

bzrlib/filters/eol.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/help_topics/__init__.py

bzrlib/help_topics/en/configuration.txt

bzrlib/help_topics/en/content-filters.txt

bzrlib/hooks.py

bzrlib/index.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lock.py

bzrlib/lockdir.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_propose.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_lp_directory.py

bzrlib/plugins/launchpad/test_lp_open.py

bzrlib/plugins/launchpad/test_lp_service.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/reconcile.py

bzrlib/remote.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/repofmt/pack_repo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/shelf_ui.py

bzrlib/smart/branch.py

bzrlib/smart/bzrdir.py

bzrlib/smart/medium.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/store/text.py

bzrlib/switch.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_alias.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_check.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_deleted.py

bzrlib/tests/blackbox/test_dpush.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_filesystem_cicp.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_modified.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_reconfigure.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_rmbranch.py

bzrlib/tests/blackbox/test_shelve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_unknowns.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/features.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_bzrdir_colo/test_unsupported.py

bzrlib/tests/per_interbranch/__init__.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interbranch/test_update_revisions.py

bzrlib/tests/per_pack_repository.py

bzrlib/tests/per_repository/test_commit_builder.py

bzrlib/tests/per_repository/test_refresh_data.py

bzrlib/tests/per_repository/test_repository.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_versionedfile.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/script.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test__chk_map.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test__walkdirs_win32.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cleanup.py

bzrlib/tests/test_cmdline.py

bzrlib/tests/test_commands.py

bzrlib/tests/test_diff.py

bzrlib/tests/test_directory_service.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_help.py

bzrlib/tests/test_import_tariff.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_log.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_script.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_win32utils.py

bzrlib/textmerge.py

bzrlib/trace.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp/_gssapi.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib2_wrappers.py

bzrlib/transport/local.py

bzrlib/transport/remote.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/treebuilder.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/weavefile.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml_serializer.py

doc/developers/HACKING.txt

doc/developers/bug-handling.txt

doc/developers/case-insensitive-file-systems.txt

doc/developers/contribution-quickstart.txt

doc/developers/index-plain.txt

doc/developers/index.txt

doc/developers/integration.txt

doc/developers/overview.txt

doc/developers/planned-change-integration.txt

doc/developers/releasing.txt

doc/developers/testing.txt

doc/en/admin-guide/code-browsing.txt

doc/en/admin-guide/introduction.txt

doc/en/admin-guide/upgrade.txt

doc/en/conf.py

doc/en/tutorials/using_bazaar_with_launchpad.txt

doc/en/upgrade-guide/overview.txt

doc/en/user-guide/branching_a_project.txt

doc/en/user-guide/configuring_bazaar.txt

doc/en/user-guide/installing_bazaar.txt

doc/en/user-guide/introducing_bazaar.txt

doc/en/user-guide/plugins.txt

doc/en/user-guide/setting_up_email.txt

doc/en/user-guide/version_info.txt

setup.py

tools/check-newsbugs.py

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

101

first_path_slash = path.find('/')

102

if first_path_slash == -1:

103

return len(scheme), None

104

return len(scheme), first_path_slash+len(scheme)+3

104

return len(scheme), first_path_slash+m.start('path')

105

106

107

def is_url(url):

108

"""Tests whether a URL is in actual fact a URL."""

109

return _url_scheme_re.match(url) is not None

105

110

106

111

107

112

def join(base, *args):

118

123

"""

119

124

if not args:

120

125

return base

121

match = _url_scheme_re.match(base)

122

scheme = None

123

if match:

124

scheme = match.group('scheme')

125

path = match.group('path').split('/')

126

if path[-1:] == ['']:

127

# Strip off a trailing slash

128

# This helps both when we are at the root, and when

129

# 'base' has an extra slash at the end

130

path = path[:-1]

131

else:

132

path = base.split('/')

133

134

if scheme is not None and len(path) >= 1:

135

host = path[:1]

136

# the path should be represented as an abs path.

137

# we know this must be absolute because of the presence of a URL scheme.

138

remove_root = True

139

path = [''] + path[1:]

140

else:

141

# create an empty host, but dont alter the path - this might be a

142

# relative url fragment.

143

host = []

144

remove_root = False

145

126

scheme_end, path_start = _find_scheme_and_separator(base)

127

if scheme_end is None and path_start is None:

128

path_start = 0

129

elif path_start is None:

130

path_start = len(base)

131

path = base[path_start:]

146

132

for arg in args:

147

match = _url_scheme_re.match(arg)

148

if match:

149

# Absolute URL

150

scheme = match.group('scheme')

151

# this skips .. normalisation, making http://host/../../..

152

# be rather strange.

153

path = match.group('path').split('/')

154

# set the host and path according to new absolute URL, discarding

155

# any previous values.

156

# XXX: duplicates mess from earlier in this function. This URL

157

# manipulation code needs some cleaning up.

158

if scheme is not None and len(path) >= 1:

159

host = path[:1]

160

path = path[1:]

161

# url scheme implies absolute path.

162

path = [''] + path

163

else:

164

# no url scheme we take the path as is.

165

host = []

133

arg_scheme_end, arg_path_start = _find_scheme_and_separator(arg)

134

if arg_scheme_end is None and arg_path_start is None:

135

arg_path_start = 0

136

elif arg_path_start is None:

137

arg_path_start = len(arg)

138

if arg_scheme_end is not None:

139

base = arg

140

path = arg[arg_path_start:]

141

scheme_end = arg_scheme_end

142

path_start = arg_path_start

166

143

else:

167

path = '/'.join(path)

168

144

path = joinpath(path, arg)

169

path = path.split('/')

170

if remove_root and path[0:1] == ['']:

171

del path[0]

172

if host:

173

# Remove the leading slash from the path, so long as it isn't also the

174

# trailing slash, which we want to keep if present.

175

if path and path[0] == '' and len(path) > 1:

176

del path[0]

177

path = host + path

178

179

if scheme is None:

180

return '/'.join(path)

181

return scheme + '://' + '/'.join(path)

145

return base[:path_start] + path

182

146

183

147

184

148

def joinpath(base, *args):

278

242

# on non-win32 platform

279

243

# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname

280

244

# which actually strips trailing space characters.

281

# The worst part is that under linux ntpath.abspath has different

245

# The worst part is that on linux ntpath.abspath has different

282

246

# semantics, since 'nt' is not an available module.

283

247

if path == '/':

284

248

return 'file:///'

303

267

MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH

304

268

305

269

306

_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')

270

_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,}):(//)?(?P<path>.*)$')

307

271

_url_hex_escapes_re = re.compile(r'(%[0-9a-fA-F]{2})')

308

272

309

273

339

303

:param url: Either a hybrid URL or a local path

340

304

:return: A normalized URL which only includes 7-bit ASCII characters.

341

305

"""

342

m = _url_scheme_re.match(url)

343

if not m:

306

scheme_end, path_start = _find_scheme_and_separator(url)

307

if scheme_end is None:

344

308

return local_path_to_url(url)

345

scheme = m.group('scheme')

346

path = m.group('path')

309

prefix = url[:path_start]

310

path = url[path_start:]

347

311

if not isinstance(url, unicode):

348

312

for c in url:

349

313

if c not in _url_safe_characters:

350

314

raise errors.InvalidURL(url, 'URLs can only contain specific'

351

315

' safe characters (not %r)' % c)

352

316

path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)

353

return str(scheme + '://' + ''.join(path))

317

return str(prefix + ''.join(path))

354

318

355

319

# We have a unicode (hybrid) url

356

320

path_chars = list(path)

362

326

['%%%02X' % ord(c) for c in path_chars[i].encode('utf-8')])

363

327

path = ''.join(path_chars)

364

328

path = _url_hex_escapes_re.sub(_unescape_safe_chars, path)

365

return str(scheme + '://' + path)

329

return str(prefix + path)

366

330

367

331

368

332

def relative_url(base, other):

469

433

return url_base + head, tail

470

434

471

435

436

def split_segment_parameters_raw(url):

437

"""Split the subsegment of the last segment of a URL.

438

439

:param url: A relative or absolute URL

440

:return: (url, subsegments)

441

"""

442

(parent_url, child_dir) = split(url)

443

subsegments = child_dir.split(",")

444

if len(subsegments) == 1:

445

return (url, [])

446

return (join(parent_url, subsegments[0]), subsegments[1:])

447

448

449

def split_segment_parameters(url):

450

"""Split the segment parameters of the last segment of a URL.

451

452

:param url: A relative or absolute URL

453

:return: (url, segment_parameters)

454

"""

455

(base_url, subsegments) = split_segment_parameters_raw(url)

456

parameters = {}

457

for subsegment in subsegments:

458

(key, value) = subsegment.split("=", 1)

459

parameters[key] = value

460

return (base_url, parameters)

461

462

463

def join_segment_parameters_raw(base, *subsegments):

464

"""Create a new URL by adding subsegments to an existing one.

465

466

This adds the specified subsegments to the last path in the specified

467

base URL. The subsegments should be bytestrings.

468

469

:note: You probably want to use join_segment_parameters instead.

470

"""

471

if not subsegments:

472

return base

473

for subsegment in subsegments:

474

if type(subsegment) is not str:

475

raise TypeError("Subsegment %r is not a bytestring" % subsegment)

476

if "," in subsegment:

477

raise errors.InvalidURLJoin(", exists in subsegments",

478

base, subsegments)

479

return ",".join((base,) + subsegments)

480

481

482

def join_segment_parameters(url, parameters):

483

"""Create a new URL by adding segment parameters to an existing one.

484

485

The parameters of the last segment in the URL will be updated; if a

486

parameter with the same key already exists it will be overwritten.

487

488

:param url: A URL, as string

489

:param parameters: Dictionary of parameters, keys and values as bytestrings

490

"""

491

(base, existing_parameters) = split_segment_parameters(url)

492

new_parameters = {}

493

new_parameters.update(existing_parameters)

494

for key, value in parameters.iteritems():

495

if type(key) is not str:

496

raise TypeError("parameter key %r is not a bytestring" % key)

497

if type(value) is not str:

498

raise TypeError("parameter value %r for %s is not a bytestring" %

499

(key, value))

500

if "=" in key:

501

raise errors.InvalidURLJoin("= exists in parameter key", url,

502

parameters)

503

new_parameters[key] = value

504

return join_segment_parameters_raw(base,

505

*["%s=%s" % item for item in sorted(new_parameters.items())])

506

507

472

508

def _win32_strip_local_trailing_slash(url):

473

509

"""Strip slashes after the drive letter"""

474

510

if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:

Older »