~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/urlutils.py

Committer: Martin Pool
Date: 2005-09-13 02:11:41 UTC
Revision ID: mbp@sourcefrog.net-20050913021141-263bfc2655ac3ed2

- store inventories in weave

- put more intelligence into WeaveStore

files added:
bzrlib/mdiff.py

bzrlib/merge_core.py

bzrlib/meta_store.py

bzrlib/remotebranch.py

bzrlib/revfile.py

bzrlib/upgrade.py

bzrlib/util/urlgrabber

bzrlib/util/urlgrabber/__init__.py

bzrlib/util/urlgrabber/byterange.py

bzrlib/util/urlgrabber/grabber.py

bzrlib/util/urlgrabber/keepalive.py

bzrlib/util/urlgrabber/mirror.py

bzrlib/util/urlgrabber/progress.py

doc/Makefile

doc/adoption.txt

doc/bitkeeper.txt

doc/changelogs.txt

doc/cherry-picking.txt

doc/cmdref.txt

doc/common-format.txt

doc/compared-aegis.txt

doc/compared-codeville.txt

doc/compared-cvsnt.txt

doc/compared-opencm.txt

doc/compared-prcs.txt

doc/compared-teamware.txt

doc/compression.txt

doc/config-specs.txt

doc/conflicts.txt

doc/costs.txt

doc/darcs.txt

doc/deadly-sins.txt

doc/default.css

doc/design.txt

doc/extra-commands.txt

doc/formats.txt

doc/hashes.txt

doc/ignore.txt

doc/index.txt

doc/interrupted.txt

doc/intro.txt

doc/inventory.txt

doc/join-branches.txt

doc/kill-version.txt

doc/layers.txt

doc/library-interface.txt

doc/merge.txt

doc/mirroring.txt

doc/monotone.txt

doc/news.txt

doc/optional-edit.txt

doc/partial-commit.txt

doc/pool.txt

doc/purpose.txt

doc/python.txt

doc/quilt.txt

doc/quotes.txt

doc/random.txt

doc/requirements.txt

doc/revfile-annotation.txt

doc/revfile.txt

doc/revision-syntax.txt

doc/rollup.txt

doc/scalability.txt

doc/security.txt

doc/shared-branches.txt

doc/short-demo.txt

doc/split-join-files.txt

doc/supportability.txt

doc/svk.txt

doc/switch-in-branch.txt

doc/tagging.txt

doc/taxonomy.txt

doc/thanks.txt

doc/todo-from-arch.txt

doc/unchanged.txt

doc/unrelated-merge.txt

doc/usability.txt

doc/use-cases.txt

doc/web-interface.txt

doc/workflow.txt

doc/yaml.txt

notes

notes/inventory-v2-sample.xml

notes/inventory-v2.rnc

notes/new-inventory-sample.xml

notes/performance.txt

notes/revfile.txt

notes/schemas.xml

patches

patches/annotate3.patch

patches/annotate4.patch

patches/cache-remote-revisions.diff

patches/find-touching-from-seq.diff

patches/meta-data-in-inventory.patch

patches/ndiff.patch

patches/pending-merge.patch

patches/plugins-no-plugins.patch

patches/progress.diff

patches/symlink-support.patch

testbzr

testsweet.py

files removed:
BRANCH.TODO

COPYING.txt

INSTALL

NEWS.developers

bzr.ico

bzrlib/annotate.py

bzrlib/benchmarks

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_add.py

bzrlib/benchmarks/bench_bench.py

bzrlib/benchmarks/bench_cache_utf8.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_inventory.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_osutils.py

bzrlib/benchmarks/bench_rocks.py

bzrlib/benchmarks/bench_sftp.py

bzrlib/benchmarks/bench_status.py

bzrlib/benchmarks/bench_transform.py

bzrlib/benchmarks/bench_workingtree.py

bzrlib/benchmarks/bench_xml.py

bzrlib/benchmarks/tree_creator

bzrlib/benchmarks/tree_creator/__init__.py

bzrlib/benchmarks/tree_creator/heavily_merged.py

bzrlib/benchmarks/tree_creator/kernel_like.py

bzrlib/benchmarks/tree_creator/simple_many_commit.py

bzrlib/bundle

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

bzrlib/bundle/serializer

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/cache_utf8.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/decorators.py

bzrlib/doc

bzrlib/doc/__init__.py

bzrlib/doc/api

bzrlib/doc/api/__init__.py

bzrlib/doc/api/branch.txt

bzrlib/doc/api/transport.txt

bzrlib/export

bzrlib/export/__init__.py

bzrlib/export/dir_exporter.py

bzrlib/export/tar_exporter.py

bzrlib/export/zip_exporter.py

bzrlib/gpg.py

bzrlib/graph.py

bzrlib/identitymap.py

bzrlib/ignores.py

bzrlib/inspect_for_copy.py

bzrlib/inter.py

bzrlib/iterablefile.py

bzrlib/knit.py

bzrlib/lazy_import.py

bzrlib/lockable_files.py

bzrlib/lockdir.py

bzrlib/lsprof.py

bzrlib/memorytree.py

bzrlib/mutabletree.py

bzrlib/option.py

bzrlib/patches.py

bzrlib/patiencediff.py

bzrlib/plugins/launchpad

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/lp_registration.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/reconcile.py

bzrlib/repository.py

bzrlib/revisionspec.py

bzrlib/revisiontree.py

bzrlib/rio.py

bzrlib/sign_my_commits.py

bzrlib/store

bzrlib/store/revision

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/text.py

bzrlib/store/versioned

bzrlib/symbol_versioning.py

bzrlib/testament.py

bzrlib/tests/EncodingAdapter.py

bzrlib/tests/HTTPTestUtil.py

bzrlib/tests/blackbox

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_bundle.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_ignored.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_locale.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations

bzrlib/tests/branch_implementations/__init__.py

bzrlib/tests/branch_implementations/test_bound_sftp.py

bzrlib/tests/branch_implementations/test_break_lock.py

bzrlib/tests/branch_implementations/test_http.py

bzrlib/tests/branch_implementations/test_locking.py

bzrlib/tests/branch_implementations/test_permissions.py

bzrlib/tests/branch_implementations/test_pull.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations

bzrlib/tests/bzrdir_implementations/__init__.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/interrepository_implementations

bzrlib/tests/interrepository_implementations/__init__.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/intertree_implementations

bzrlib/tests/intertree_implementations/__init__.py

bzrlib/tests/intertree_implementations/test_compare.py

bzrlib/tests/interversionedfile_implementations

bzrlib/tests/interversionedfile_implementations/__init__.py

bzrlib/tests/interversionedfile_implementations/test_join.py

bzrlib/tests/lock_helpers.py

bzrlib/tests/repository_implementations

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_break_lock.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/repository_implementations/test_reconcile.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/repository_implementations/test_revision.py

bzrlib/tests/revisionstore_implementations

bzrlib/tests/revisionstore_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/stub_sftp.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_api.py

bzrlib/tests/test_atomicfile.py

bzrlib/tests/test_bad_files.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_command.py

bzrlib/tests/test_commit.py

bzrlib/tests/test_commit_merge.py

bzrlib/tests/test_config.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_decorators.py

bzrlib/tests/test_doc_generate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_escaped_store.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_gpg.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_http_response.py

bzrlib/tests/test_identitymap.py

bzrlib/tests/test_ignores.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lazy_import.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_memorytree.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_missing.py

bzrlib/tests/test_msgeditor.py

bzrlib/tests/test_nonascii.py

bzrlib/tests/test_options.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_patch.py

bzrlib/tests/test_patches.py

bzrlib/tests/test_patches_data

bzrlib/tests/test_patches_data/diff

bzrlib/tests/test_patches_data/diff-2

bzrlib/tests/test_patches_data/diff-3

bzrlib/tests/test_patches_data/diff-4

bzrlib/tests/test_patches_data/diff-5

bzrlib/tests/test_patches_data/diff-6

bzrlib/tests/test_patches_data/insert_top.patch

bzrlib/tests/test_patches_data/mod

bzrlib/tests/test_patches_data/mod-2

bzrlib/tests/test_patches_data/mod-3

bzrlib/tests/test_patches_data/mod-4

bzrlib/tests/test_patches_data/mod-5

bzrlib/tests/test_patches_data/mod-6

bzrlib/tests/test_patches_data/orig

bzrlib/tests/test_patches_data/orig-2

bzrlib/tests/test_patches_data/orig-3

bzrlib/tests/test_patches_data/orig-4

bzrlib/tests/test_patches_data/orig-5

bzrlib/tests/test_patches_data/orig-6

bzrlib/tests/test_patches_data/patchtext.patch

bzrlib/tests/test_permissions.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_reconcile.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_revert.py

bzrlib/tests/test_revisiontree.py

bzrlib/tests/test_rio.py

bzrlib/tests/test_sampler.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_setup.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_source.py

bzrlib/tests/test_status.py

bzrlib/tests/test_symbol_versioning.py

bzrlib/tests/test_testament.py

bzrlib/tests/test_textfile.py

bzrlib/tests/test_textmerge.py

bzrlib/tests/test_trace.py

bzrlib/tests/test_transactions.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_tree.py

bzrlib/tests/test_treebuilder.py

bzrlib/tests/test_tsort.py

bzrlib/tests/test_tuned_gzip.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_upgrade.py

bzrlib/tests/test_urlutils.py

bzrlib/tests/test_version.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_workingtree.py

bzrlib/tests/tree_implementations

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/treeshape.py

bzrlib/tests/workingtree_implementations

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_basis_inventory.py

bzrlib/tests/workingtree_implementations/test_break_lock.py

bzrlib/tests/workingtree_implementations/test_changes_from.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_executable.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_is_control_filename.py

bzrlib/tests/workingtree_implementations/test_is_ignored.py

bzrlib/tests/workingtree_implementations/test_locking.py

bzrlib/tests/workingtree_implementations/test_merge_from_branch.py

bzrlib/tests/workingtree_implementations/test_mkdir.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_pull.py

bzrlib/tests/workingtree_implementations/test_put_file.py

bzrlib/tests/workingtree_implementations/test_unversion.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/textfile.py

bzrlib/textmerge.py

bzrlib/transactions.py

bzrlib/transform.py

bzrlib/transport

bzrlib/transport/__init__.py

bzrlib/transport/decorator.py

bzrlib/transport/fakenfs.py

bzrlib/transport/fakevfat.py

bzrlib/transport/ftp.py

bzrlib/transport/http

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_pycurl_errors.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/http/response.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/readonly.py

bzrlib/transport/sftp.py

bzrlib/transport/smart.py

bzrlib/transport/ssh.py

bzrlib/treebuilder.py

bzrlib/tsort.py

bzrlib/tuned_gzip.py

bzrlib/ui

bzrlib/ui/text.py

bzrlib/uncommit.py

bzrlib/urlutils.py

bzrlib/util/configobj

bzrlib/util/configobj/__init__.py

bzrlib/util/configobj/configobj.py

bzrlib/util/configobj/docs

bzrlib/util/configobj/docs/BSD-LICENSE.txt

bzrlib/util/configobj/docs/configobj.txt

bzrlib/util/configobj/docs/validate.txt

bzrlib/version.py

bzrlib/versionedfile.py

bzrlib/weave_commands.py

bzrlib/win32console.py

bzrlib/xml4.py

bzrlib/xml6.py

doc/README.1st

doc/centralized_workflow.txt

doc/configuration.txt

doc/default.css

doc/index.txt

doc/plugins.txt

doc/server.txt

doc/setting_up_email.txt

doc/specifying_revisions.txt

doc/using_aliases.txt

generate_docs.py

profile_imports.py

tools/__init__.py

tools/biobench.py

tools/capture_tree.py

tools/doc_generate

tools/doc_generate/__init__.py

tools/doc_generate/autodoc_bash_completion.py

tools/doc_generate/autodoc_rstx.py

tools/riodemo.py

tools/rst2html.py

tools/trace-revisions

tools/win32

tools/win32/__init__.py

tools/win32/bazaar.url

tools/win32/bzr-win32-bdist-postinstall.py

tools/win32/bzr.iss.cog

tools/win32/bzr_postinstall.py

tools/win32/file_version.py

tools/win32/ostools.py

tools/win32/start_bzr.bat

files renamed:
tools/doc_generate/autodoc_man.py => bzr-man.py

bzrlib/bundle/__init__.py => bzrlib/changeset.py

bzrlib/tests/ => bzrlib/selftest/

bzrlib/tests/blackbox/test_too_much.py => bzrlib/selftest/blackbox.py

bzrlib/tests/test_plugins.py => bzrlib/selftest/plugins.py

bzrlib/tests/branch_implementations/test_parent.py => bzrlib/selftest/test_parent.py

bzrlib/tests/branch_implementations/test_branch.py => bzrlib/selftest/testbranch.py

bzrlib/tests/test_diff.py => bzrlib/selftest/testdiff.py

bzrlib/tests/test_fetch.py => bzrlib/selftest/testfetch.py

bzrlib/tests/test_hashcache.py => bzrlib/selftest/testhashcache.py

bzrlib/tests/test_inv.py => bzrlib/selftest/testinv.py

bzrlib/tests/test_log.py => bzrlib/selftest/testlog.py

bzrlib/tests/test_merge3.py => bzrlib/selftest/testmerge3.py

bzrlib/tests/test_revision.py => bzrlib/selftest/testrevision.py

bzrlib/tests/test_revisionnamespaces.py => bzrlib/selftest/testrevisionnamespaces.py

bzrlib/tests/blackbox/test_status.py => bzrlib/selftest/teststatus.py

bzrlib/tests/test_store.py => bzrlib/selftest/teststore.py

bzrlib/tests/blackbox/test_versioning.py => bzrlib/selftest/versioning.py

bzrlib/tests/test_whitebox.py => bzrlib/selftest/whitebox.py

bzrlib/store/__init__.py => bzrlib/store.py

bzrlib/ui/__init__.py => bzrlib/ui.py

bzrlib/store/versioned/__init__.py => bzrlib/weavestore.py

bzrlib/xml_serializer.py => bzrlib/xml.py

bzrlib/upgrade.py => tools/history2weaves.py

bzrlib/tests/test_weave.py => tools/testweave.py

doc/tutorial.txt => tutorial.txt

files modified:
.bzrignore

.rsyncexclude

HACKING

Makefile

NEWS

README

TODO

build-api

bzr *

bzrlib/__init__.py

bzrlib/add.py

bzrlib/atomicfile.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/check.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/delta.py

bzrlib/diff.py

bzrlib/errors.py

bzrlib/externalcommand.py

bzrlib/fetch.py

bzrlib/hashcache.py

bzrlib/help.py

bzrlib/info.py

bzrlib/intset.py

bzrlib/inventory.py

bzrlib/lock.py

bzrlib/log.py

bzrlib/merge.py

bzrlib/merge3.py

bzrlib/missing.py

bzrlib/msgeditor.py

bzrlib/osutils.py

bzrlib/patch.py

bzrlib/plugin.py

bzrlib/progress.py

bzrlib/revision.py

bzrlib/selftest/TestUtil.py

bzrlib/selftest/__init__.py

bzrlib/selftest/test_merge_core.py

bzrlib/selftest/test_smart_add.py

bzrlib/selftest/test_xml.py

bzrlib/shellcomplete.py

bzrlib/status.py

bzrlib/textinv.py

bzrlib/textui.py

bzrlib/trace.py

bzrlib/tree.py

bzrlib/util/elementtree/ElementTree.py

bzrlib/weave.py *

bzrlib/weavefile.py

bzrlib/workingtree.py

bzrlib/xml5.py

contrib/newinventory.py

contrib/pwk

contrib/zsh/_bzr

setup.py

tools/convertfile.py

tools/convertinv.py

tools/history2revfiles.py

tools/http_client.py

tools/weavebench.py

Show diffs side-by-side

added added

removed removed

bzrlib/urlutils.py

# Bazaar -- distributed version control

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""A collection of function for handling URL operations."""

import os

from posixpath import split as _posix_split, normpath as _posix_normpath

import re

import sys

import urllib

import bzrlib.errors as errors

import bzrlib.osutils

def basename(url, exclude_trailing_slash=True):

"""Return the last component of a URL.

:param url: The URL in question

:param exclude_trailing_slash: If the url looks like "path/to/foo/"

ignore the final slash and return 'foo' rather than ''

:return: Just the final component of the URL. This can return ''

if you don't exclude_trailing_slash, or if you are at the

root of the URL.

"""

return split(url, exclude_trailing_slash=exclude_trailing_slash)[1]

def dirname(url, exclude_trailing_slash=True):

"""Return the parent directory of the given path.

:param url: Relative or absolute URL

:param exclude_trailing_slash: Remove a final slash

(treat http://host/foo/ as http://host/foo, but

http://host/ stays http://host/)

:return: Everything in the URL except the last path chunk

"""

# TODO: jam 20060502 This was named dirname to be consistent

# with the os functions, but maybe "parent" would be better

return split(url, exclude_trailing_slash=exclude_trailing_slash)[0]

def escape(relpath):

"""Escape relpath to be a valid url."""

if isinstance(relpath, unicode):

relpath = relpath.encode('utf-8')

# After quoting and encoding, the path should be perfectly

# safe as a plain ASCII string, str() just enforces this

return str(urllib.quote(relpath))

def file_relpath(base, path):

"""Compute just the relative sub-portion of a url

This assumes that both paths are already fully specified file:// URLs.

"""

assert len(base) >= MIN_ABS_FILEURL_LENGTH, ('Length of base must be equal or'

' exceed the platform minimum url length (which is %d)' %

MIN_ABS_FILEURL_LENGTH)

base = local_path_from_url(base)

path = local_path_from_url(path)

return escape(bzrlib.osutils.relpath(base, path))

def _find_scheme_and_separator(url):

"""Find the scheme separator (://) and the first path separator

This is just a helper functions for other path utilities.

It could probably be replaced by urlparse

"""

m = _url_scheme_re.match(url)

if not m:

return None, None

scheme = m.group('scheme')

path = m.group('path')

# Find the path separating slash

# (first slash after the ://)

first_path_slash = path.find('/')

if first_path_slash == -1:

return len(scheme), None

return len(scheme), first_path_slash+len(scheme)+3

100

101

102

def join(base, *args):

103

"""Create a URL by joining sections.

104

105

This will normalize '..', assuming that paths are absolute

106

(it assumes no symlinks in either path)

107

108

If any of *args is an absolute URL, it will be treated correctly.

109

Example:

110

join('http://foo', 'http://bar') => 'http://bar'

111

join('http://foo', 'bar') => 'http://foo/bar'

112

join('http://foo', 'bar', '../baz') => 'http://foo/baz'

113

"""

114

m = _url_scheme_re.match(base)

115

scheme = None

116

if m:

117

scheme = m.group('scheme')

118

path = m.group('path').split('/')

119

if path[-1:] == ['']:

120

# Strip off a trailing slash

121

# This helps both when we are at the root, and when

122

# 'base' has an extra slash at the end

123

path = path[:-1]

124

else:

125

path = base.split('/')

126

127

for arg in args:

128

m = _url_scheme_re.match(arg)

129

if m:

130

# Absolute URL

131

scheme = m.group('scheme')

132

# this skips .. normalisation, making http://host/../../..

133

# be rather strange.

134

path = m.group('path').split('/')

135

else:

136

for chunk in arg.split('/'):

137

if chunk == '.':

138

continue

139

elif chunk == '..':

140

if len(path) >= 2:

141

# Don't pop off the host portion

142

path.pop()

143

else:

144

raise errors.InvalidURLJoin('Cannot go above root',

145

base, args)

146

else:

147

path.append(chunk)

148

149

if scheme is None:

150

return '/'.join(path)

151

return scheme + '://' + '/'.join(path)

152

153

154

# jam 20060502 Sorted to 'l' because the final target is 'local_path_from_url'

155

def _posix_local_path_from_url(url):

156

"""Convert a url like file:///path/to/foo into /path/to/foo"""

157

if not url.startswith('file:///'):

158

raise errors.InvalidURL(url, 'local urls must start with file:///')

159

# We only strip off 2 slashes

160

return unescape(url[len('file://'):])

161

162

163

def _posix_local_path_to_url(path):

164

"""Convert a local path like ./foo into a URL like file:///path/to/foo

165

166

This also handles transforming escaping unicode characters, etc.

167

"""

168

# importing directly from posixpath allows us to test this

169

# on non-posix platforms

170

return 'file://' + escape(_posix_normpath(

171

bzrlib.osutils._posix_abspath(path)))

172

173

174

def _win32_local_path_from_url(url):

175

"""Convert a url like file:///C:/path/to/foo into C:/path/to/foo"""

176

if not url.startswith('file:///'):

177

raise errors.InvalidURL(url, 'local urls must start with file:///')

178

# We strip off all 3 slashes

179

win32_url = url[len('file:///'):]

180

if (win32_url[0] not in ('abcdefghijklmnopqrstuvwxyz'

181

'ABCDEFGHIJKLMNOPQRSTUVWXYZ')

182

or win32_url[1] not in '|:'

183

or win32_url[2] != '/'):

184

raise errors.InvalidURL(url, 'Win32 file urls start with'

185

' file:///x:/, where x is a valid drive letter')

186

return win32_url[0].upper() + u':' + unescape(win32_url[2:])

187

188

189

def _win32_local_path_to_url(path):

190

"""Convert a local path like ./foo into a URL like file:///C:/path/to/foo

191

192

This also handles transforming escaping unicode characters, etc.

193

"""

194

# importing directly from ntpath allows us to test this

195

# on non-win32 platform

196

# FIXME: It turns out that on nt, ntpath.abspath uses nt._getfullpathname

197

# which actually strips trailing space characters.

198

# The worst part is that under linux ntpath.abspath has different

199

# semantics, since 'nt' is not an available module.

200

win32_path = bzrlib.osutils._nt_normpath(

201

bzrlib.osutils._win32_abspath(path)).replace('\\', '/')

202

return 'file:///' + win32_path[0].upper() + ':' + escape(win32_path[2:])

203

204

205

local_path_to_url = _posix_local_path_to_url

206

local_path_from_url = _posix_local_path_from_url

207

MIN_ABS_FILEURL_LENGTH = len('file:///')

208

WIN32_MIN_ABS_FILEURL_LENGTH = len('file:///C:/')

209

210

if sys.platform == 'win32':

211

local_path_to_url = _win32_local_path_to_url

212

local_path_from_url = _win32_local_path_from_url

213

214

MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH

215

216

217

_url_scheme_re = re.compile(r'^(?P<scheme>[^:/]{2,})://(?P<path>.*)$')

218

219

220

def normalize_url(url):

221

"""Make sure that a path string is in fully normalized URL form.

222

223

This handles URLs which have unicode characters, spaces,

224

special characters, etc.

225

226

It has two basic modes of operation, depending on whether the

227

supplied string starts with a url specifier (scheme://) or not.

228

If it does not have a specifier it is considered a local path,

229

and will be converted into a file:/// url. Non-ascii characters

230

will be encoded using utf-8.

231

If it does have a url specifier, it will be treated as a "hybrid"

232

URL. Basically, a URL that should have URL special characters already

233

escaped (like +?&# etc), but may have unicode characters, etc

234

which would not be valid in a real URL.

235

236

:param url: Either a hybrid URL or a local path

237

:return: A normalized URL which only includes 7-bit ASCII characters.

238

"""

239

m = _url_scheme_re.match(url)

240

if not m:

241

return local_path_to_url(url)

242

if not isinstance(url, unicode):

243

for c in url:

244

if c not in _url_safe_characters:

245

raise errors.InvalidURL(url, 'URLs can only contain specific'

246

' safe characters (not %r)' % c)

247

return url

248

# We have a unicode (hybrid) url

249

scheme = m.group('scheme')

250

path = list(m.group('path'))

251

252

for i in xrange(len(path)):

253

if path[i] not in _url_safe_characters:

254

chars = path[i].encode('utf-8')

255

path[i] = ''.join(['%%%02X' % ord(c) for c in path[i].encode('utf-8')])

256

return scheme + '://' + ''.join(path)

257

258

259

def relative_url(base, other):

260

"""Return a path to other from base.

261

262

If other is unrelated to base, return other. Else return a relative path.

263

This assumes no symlinks as part of the url.

264

"""

265

dummy, base_first_slash = _find_scheme_and_separator(base)

266

if base_first_slash is None:

267

return other

268

269

dummy, other_first_slash = _find_scheme_and_separator(other)

270

if other_first_slash is None:

271

return other

272

273

# this takes care of differing schemes or hosts

274

base_scheme = base[:base_first_slash]

275

other_scheme = other[:other_first_slash]

276

if base_scheme != other_scheme:

277

return other

278

279

base_path = base[base_first_slash+1:]

280

other_path = other[other_first_slash+1:]

281

282

if base_path.endswith('/'):

283

base_path = base_path[:-1]

284

285

base_sections = base_path.split('/')

286

other_sections = other_path.split('/')

287

288

if base_sections == ['']:

289

base_sections = []

290

if other_sections == ['']:

291

other_sections = []

292

293

output_sections = []

294

for b, o in zip(base_sections, other_sections):

295

if b != o:

296

break

297

output_sections.append(b)

298

299

match_len = len(output_sections)

300

output_sections = ['..' for x in base_sections[match_len:]]

301

output_sections.extend(other_sections[match_len:])

302

303

return "/".join(output_sections) or "."

304

305

306

def _win32_extract_drive_letter(url_base, path):

307

"""On win32 the drive letter needs to be added to the url base."""

308

# Strip off the drive letter

309

# path is currently /C:/foo

310

if len(path) < 3 or path[2] not in ':|' or path[3] != '/':

311

raise errors.InvalidURL(url_base + path,

312

'win32 file:/// paths need a drive letter')

313

url_base += path[0:3] # file:// + /C:

314

path = path[3:] # /foo

315

return url_base, path

316

317

318

def split(url, exclude_trailing_slash=True):

319

"""Split a URL into its parent directory and a child directory.

320

321

:param url: A relative or absolute URL

322

:param exclude_trailing_slash: Strip off a final '/' if it is part

323

of the path (but not if it is part of the protocol specification)

324

325

:return: (parent_url, child_dir). child_dir may be the empty string if we're at

326

the root.

327

"""

328

scheme_loc, first_path_slash = _find_scheme_and_separator(url)

329

330

if first_path_slash is None:

331

# We have either a relative path, or no separating slash

332

if scheme_loc is None:

333

# Relative path

334

if exclude_trailing_slash and url.endswith('/'):

335

url = url[:-1]

336

return _posix_split(url)

337

else:

338

# Scheme with no path

339

return url, ''

340

341

# We have a fully defined path

342

url_base = url[:first_path_slash] # http://host, file://

343

path = url[first_path_slash:] # /file/foo

344

345

if sys.platform == 'win32' and url.startswith('file:///'):

346

# Strip off the drive letter

347

# url_base is currently file://

348

# path is currently /C:/foo

349

url_base, path = _win32_extract_drive_letter(url_base, path)

350

# now it should be file:///C: and /foo

351

352

if exclude_trailing_slash and len(path) > 1 and path.endswith('/'):

353

path = path[:-1]

354

head, tail = _posix_split(path)

355

return url_base + head, tail

356

357

358

def _win32_strip_local_trailing_slash(url):

359

"""Strip slashes after the drive letter"""

360

if len(url) > WIN32_MIN_ABS_FILEURL_LENGTH:

361

return url[:-1]

362

else:

363

return url

364

365

366

def strip_trailing_slash(url):

367

"""Strip trailing slash, except for root paths.

368

369

The definition of 'root path' is platform-dependent.

370

This assumes that all URLs are valid netloc urls, such that they

371

form:

372

scheme://host/path

373

It searches for ://, and then refuses to remove the next '/'.

374

It can also handle relative paths

375

Examples:

376

path/to/foo => path/to/foo

377

path/to/foo/ => path/to/foo

378

http://host/path/ => http://host/path

379

http://host/path => http://host/path

380

http://host/ => http://host/

381

file:/// => file:///

382

file:///foo/ => file:///foo

383

# This is unique on win32 platforms, and is the only URL

384

# format which does it differently.

385

file:///c|/ => file:///c:/

386

"""

387

if not url.endswith('/'):

388

# Nothing to do

389

return url

390

if sys.platform == 'win32' and url.startswith('file:///'):

391

return _win32_strip_local_trailing_slash(url)

392

393

scheme_loc, first_path_slash = _find_scheme_and_separator(url)

394

if scheme_loc is None:

395

# This is a relative path, as it has no scheme

396

# so just chop off the last character

397

return url[:-1]

398

399

if first_path_slash is None or first_path_slash == len(url)-1:

400

# Don't chop off anything if the only slash is the path

401

# separating slash

402

return url

403

404

return url[:-1]

405

406

407

def unescape(url):

408

"""Unescape relpath from url format.

409

410

This returns a Unicode path from a URL

411

"""

412

# jam 20060427 URLs are supposed to be ASCII only strings

413

# If they are passed in as unicode, urllib.unquote

414

# will return a UNICODE string, which actually contains

415

# utf-8 bytes. So we have to ensure that they are

416

# plain ASCII strings, or the final .decode will

417

# try to encode the UNICODE => ASCII, and then decode

418

# it into utf-8.

419

try:

420

url = str(url)

421

except UnicodeError, e:

422

raise errors.InvalidURL(url, 'URL was not a plain ASCII url: %s' % (e,))

423

424

unquoted = urllib.unquote(url)

425

try:

426

unicode_path = unquoted.decode('utf-8')

427

except UnicodeError, e:

428

raise errors.InvalidURL(url, 'Unable to encode the URL as utf-8: %s' % (e,))

429

return unicode_path

430

431

432

# These are characters that if escaped, should stay that way

433

_no_decode_chars = ';/?:@&=+$,#'

434

_no_decode_ords = [ord(c) for c in _no_decode_chars]

435

_no_decode_hex = (['%02x' % o for o in _no_decode_ords]

436

+ ['%02X' % o for o in _no_decode_ords])

437

_hex_display_map = dict(([('%02x' % o, chr(o)) for o in range(256)]

438

+ [('%02X' % o, chr(o)) for o in range(256)]))

439

#These entries get mapped to themselves

440

_hex_display_map.update((hex,'%'+hex) for hex in _no_decode_hex)

441

442

# These characters should not be escaped

443

_url_safe_characters = set('abcdefghijklmnopqrstuvwxyz'

444

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

445

'0123456789' '_.-/'

446

';?:@&=+$,%#')

447

448

449

def unescape_for_display(url, encoding):

450

"""Decode what you can for a URL, so that we get a nice looking path.

451

452

This will turn file:// urls into local paths, and try to decode

453

any portions of a http:// style url that it can.

454

455

Any sections of the URL which can't be represented in the encoding or

456

need to stay as escapes are left alone.

457

458

:param url: A 7-bit ASCII URL

459

:param encoding: The final output encoding

460

461

:return: A unicode string which can be safely encoded into the

462

specified encoding.

463

"""

464

assert encoding is not None, 'you cannot specify None for the display encoding.'

465

if url.startswith('file://'):

466

try:

467

path = local_path_from_url(url)

468

path.encode(encoding)

469

return path

470

except UnicodeError:

471

return url

472

473

# Split into sections to try to decode utf-8

474

res = url.split('/')

475

for i in xrange(1, len(res)):

476

escaped_chunks = res[i].split('%')

477

for j in xrange(1, len(escaped_chunks)):

478

item = escaped_chunks[j]

479

try:

480

escaped_chunks[j] = _hex_display_map[item[:2]] + item[2:]

481

except KeyError:

482

# Put back the percent symbol

483

escaped_chunks[j] = '%' + item

484

except UnicodeDecodeError:

485

escaped_chunks[j] = unichr(int(item[:2], 16)) + item[2:]

486

unescaped = ''.join(escaped_chunks)

487

try:

488

decoded = unescaped.decode('utf-8')

489

except UnicodeDecodeError:

490

# If this path segment cannot be properly utf-8 decoded

491

# after doing unescaping we will just leave it alone

492

pass

493

else:

494

try:

495

decoded.encode(encoding)

496

except UnicodeEncodeError:

497

# If this chunk cannot be encoded in the local

498

# encoding, then we should leave it alone

499

pass

500

else:

501

# Otherwise take the url decoded one

502

res[i] = decoded

503

return u'/'.join(res)

Older »