~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/osutils.py

Committer: Brad Crittenden
Date: 2007-02-26 20:56:10 UTC
mfrom: (2300 +trunk)
mto: (2293.1.5 bzr.dev)
mto: This revision was merged to the branch mainline in revision 2311.
Revision ID: brad.crittenden@canonical.com-20070226205610-44oatbxrjjz3ajwy

merge

files added:
bzrlib/tests/repository_implementations/test_iter_reverse_revision_history.py

files modified:
NEWS

bzrlib/add.py

bzrlib/branch.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/serializer/v08.py

bzrlib/cache_utf8.py

bzrlib/generate_ids.py

bzrlib/identitymap.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lockable_files.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/plugin.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/store/__init__.py

bzrlib/store/revision/__init__.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/store/versioned/__init__.py

bzrlib/testament.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/branch_implementations/test_branch.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/repository_implementations/test_commit_builder.py

bzrlib/tests/repository_implementations/test_fileid_involved.py

bzrlib/tests/test_cache_utf8.py

bzrlib/tests/test_generate_ids.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/__init__.py

bzrlib/tests/tree_implementations/test_test_trees.py

bzrlib/tests/workingtree_implementations/test_get_parent_ids.py

bzrlib/tests/workingtree_implementations/test_set_root_id.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/tree.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/xml5.py

bzrlib/xml6.py

Show diffs side-by-side

added added

removed removed

bzrlib/osutils.py

import unicodedata

from bzrlib import (

cache_utf8,

errors,

win32utils,

)

813

814

# 2) Isn't one of ' \t\r\n' which are characters we sometimes use as

814

815

# separators

815

816

# 3) '\xa0' isn't unicode safe since it is >128.

816

# So we are following textwrap's example and hard-coding our own.

817

# We probably could ignore \v and \f, too.

818

for ch in u' \t\n\r\v\f':

817

818

# This should *not* be a unicode set of characters in case the source

819

# string is not a Unicode string. We can auto-up-cast the characters since

820

# they are ascii, but we don't want to auto-up-cast the string in case it

821

# is utf-8

822

for ch in ' \t\n\r\v\f':

819

823

if ch in s:

820

824

return True

821

825

else:

881

885

raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)

882

886

883

887

888

def safe_utf8(unicode_or_utf8_string):

889

"""Coerce unicode_or_utf8_string to a utf8 string.

890

891

If it is a str, it is returned.

892

If it is Unicode, it is encoded into a utf-8 string.

893

"""

894

if isinstance(unicode_or_utf8_string, str):

895

# TODO: jam 20070209 This is overkill, and probably has an impact on

896

# performance if we are dealing with lots of apis that want a

897

# utf-8 revision id

898

try:

899

# Make sure it is a valid utf-8 string

900

unicode_or_utf8_string.decode('utf-8')

901

except UnicodeDecodeError:

902

raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)

903

return unicode_or_utf8_string

904

return unicode_or_utf8_string.encode('utf-8')

905

906

907

def safe_revision_id(unicode_or_utf8_string):

908

"""Revision ids should now be utf8, but at one point they were unicode.

909

910

This is the same as safe_utf8, except it uses the cached encode functions

911

to save a little bit of performance.

912

"""

913

if unicode_or_utf8_string is None:

914

return None

915

if isinstance(unicode_or_utf8_string, str):

916

# TODO: jam 20070209 Eventually just remove this check.

917

try:

918

utf8_str = cache_utf8.get_cached_utf8(unicode_or_utf8_string)

919

except UnicodeDecodeError:

920

raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)

921

return utf8_str

922

return cache_utf8.encode(unicode_or_utf8_string)

923

924

925

# TODO: jam 20070217 We start by just re-using safe_revision_id, but ultimately

926

# we want to use a different dictionary cache, because trapping file ids

927

# and revision ids in the same dict seemed to have a noticable effect on

928

# performance.

929

safe_file_id = safe_revision_id

930

931

884

932

_platform_normalizes_filenames = False

885

933

if sys.platform == 'darwin':

886

934

_platform_normalizes_filenames = True

Older »