813
814
# 2) Isn't one of ' \t\r\n' which are characters we sometimes use as
815
816
# 3) '\xa0' isn't unicode safe since it is >128.
816
# So we are following textwrap's example and hard-coding our own.
817
# We probably could ignore \v and \f, too.
818
for ch in u' \t\n\r\v\f':
818
# This should *not* be a unicode set of characters in case the source
819
# string is not a Unicode string. We can auto-up-cast the characters since
820
# they are ascii, but we don't want to auto-up-cast the string in case it
822
for ch in ' \t\n\r\v\f':
881
885
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
888
def safe_utf8(unicode_or_utf8_string):
889
"""Coerce unicode_or_utf8_string to a utf8 string.
891
If it is a str, it is returned.
892
If it is Unicode, it is encoded into a utf-8 string.
894
if isinstance(unicode_or_utf8_string, str):
895
# TODO: jam 20070209 This is overkill, and probably has an impact on
896
# performance if we are dealing with lots of apis that want a
899
# Make sure it is a valid utf-8 string
900
unicode_or_utf8_string.decode('utf-8')
901
except UnicodeDecodeError:
902
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
903
return unicode_or_utf8_string
904
return unicode_or_utf8_string.encode('utf-8')
907
def safe_revision_id(unicode_or_utf8_string):
908
"""Revision ids should now be utf8, but at one point they were unicode.
910
This is the same as safe_utf8, except it uses the cached encode functions
911
to save a little bit of performance.
913
if unicode_or_utf8_string is None:
915
if isinstance(unicode_or_utf8_string, str):
916
# TODO: jam 20070209 Eventually just remove this check.
918
utf8_str = cache_utf8.get_cached_utf8(unicode_or_utf8_string)
919
except UnicodeDecodeError:
920
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
922
return cache_utf8.encode(unicode_or_utf8_string)
925
# TODO: jam 20070217 We start by just re-using safe_revision_id, but ultimately
926
# we want to use a different dictionary cache, because trapping file ids
927
# and revision ids in the same dict seemed to have a noticable effect on
929
safe_file_id = safe_revision_id
884
932
_platform_normalizes_filenames = False
885
933
if sys.platform == 'darwin':
886
934
_platform_normalizes_filenames = True