904
905
return unicode_or_utf8_string.encode('utf-8')
907
def safe_revision_id(unicode_or_utf8_string):
908
_revision_id_warning = ('Unicode revision ids were deprecated in bzr 0.15.'
909
' Revision id generators should be creating utf8'
913
def safe_revision_id(unicode_or_utf8_string, warn=True):
908
914
"""Revision ids should now be utf8, but at one point they were unicode.
916
:param unicode_or_utf8_string: A possibly Unicode revision_id. (can also be
918
:param warn: Functions that are sanitizing user data can set warn=False
919
:return: None or a utf8 revision id.
921
if (unicode_or_utf8_string is None
922
or unicode_or_utf8_string.__class__ == str):
923
return unicode_or_utf8_string
925
symbol_versioning.warn(_revision_id_warning, DeprecationWarning,
927
return cache_utf8.encode(unicode_or_utf8_string)
930
_file_id_warning = ('Unicode file ids were deprecated in bzr 0.15. File id'
931
' generators should be creating utf8 file ids.')
934
def safe_file_id(unicode_or_utf8_string, warn=True):
935
"""File ids should now be utf8, but at one point they were unicode.
910
937
This is the same as safe_utf8, except it uses the cached encode functions
911
938
to save a little bit of performance.
940
:param unicode_or_utf8_string: A possibly Unicode file_id. (can also be
942
:param warn: Functions that are sanitizing user data can set warn=False
943
:return: None or a utf8 file id.
913
if unicode_or_utf8_string is None:
915
if isinstance(unicode_or_utf8_string, str):
916
# TODO: jam 20070209 Eventually just remove this check.
918
utf8_str = cache_utf8.get_cached_utf8(unicode_or_utf8_string)
919
except UnicodeDecodeError:
920
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
945
if (unicode_or_utf8_string is None
946
or unicode_or_utf8_string.__class__ == str):
947
return unicode_or_utf8_string
949
symbol_versioning.warn(_file_id_warning, DeprecationWarning,
922
951
return cache_utf8.encode(unicode_or_utf8_string)
925
# TODO: jam 20070217 We start by just re-using safe_revision_id, but ultimately
926
# we want to use a different dictionary cache, because trapping file ids
927
# and revision ids in the same dict seemed to have a noticable effect on
929
safe_file_id = safe_revision_id
932
954
_platform_normalizes_filenames = False
933
955
if sys.platform == 'darwin':
934
956
_platform_normalizes_filenames = True
1079
1101
# depending on top and prefix - i.e. ./foo and foo as a pair leads to
1080
1102
# potentially confusing output. We should make this more robust - but
1081
1103
# not at a speed cost. RBC 20060731
1084
1105
_directory = _directory_kind
1085
1106
_listdir = os.listdir
1086
pending = [(prefix, "", _directory, None, top)]
1107
_kind_from_mode = _formats.get
1108
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1089
currentdir = pending.pop()
1090
1110
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1093
relroot = currentdir[0] + '/'
1096
for name in sorted(_listdir(top)):
1097
abspath = top + '/' + name
1098
statvalue = lstat(abspath)
1099
dirblock.append((relroot + name, name,
1100
file_kind_from_stat_mode(statvalue.st_mode),
1101
statvalue, abspath))
1102
yield (currentdir[0], top), dirblock
1103
# push the user specified dirs from dirblock
1104
for dir in reversed(dirblock):
1105
if dir[2] == _directory:
1111
relroot, _, _, _, top = pending.pop()
1113
relprefix = relroot + u'/'
1116
top_slash = top + u'/'
1119
append = dirblock.append
1120
for name in sorted(_listdir(top)):
1121
abspath = top_slash + name
1122
statvalue = _lstat(abspath)
1123
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1124
append((relprefix + name, name, kind, statvalue, abspath))
1125
yield (relroot, top), dirblock
1127
# push the user specified dirs from dirblock
1128
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1131
def _walkdirs_utf8(top, prefix=""):
1132
"""Yield data about all the directories in a tree.
1134
This yields the same information as walkdirs() only each entry is yielded
1135
in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1136
are returned as exact byte-strings.
1138
:return: yields a tuple of (dir_info, [file_info])
1139
dir_info is (utf8_relpath, path-from-top)
1140
file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1141
if top is an absolute path, path-from-top is also an absolute path.
1142
path-from-top might be unicode or utf8, but it is the correct path to
1143
pass to os functions to affect the file in question. (such as os.lstat)
1145
fs_encoding = sys.getfilesystemencoding()
1146
if (sys.platform == 'win32' or
1147
fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968')): # ascii
1148
return _walkdirs_unicode_to_utf8(top, prefix=prefix)
1150
return _walkdirs_fs_utf8(top, prefix=prefix)
1153
def _walkdirs_fs_utf8(top, prefix=""):
1154
"""See _walkdirs_utf8.
1156
This sub-function is called when we know the filesystem is already in utf8
1157
encoding. So we don't need to transcode filenames.
1160
_directory = _directory_kind
1161
_listdir = os.listdir
1162
_kind_from_mode = _formats.get
1164
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1165
# But we don't actually uses 1-3 in pending, so set them to None
1166
pending = [(safe_utf8(prefix), None, None, None, safe_utf8(top))]
1168
relroot, _, _, _, top = pending.pop()
1170
relprefix = relroot + '/'
1173
top_slash = top + '/'
1176
append = dirblock.append
1177
for name in sorted(_listdir(top)):
1178
abspath = top_slash + name
1179
statvalue = _lstat(abspath)
1180
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1181
append((relprefix + name, name, kind, statvalue, abspath))
1182
yield (relroot, top), dirblock
1184
# push the user specified dirs from dirblock
1185
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1188
def _walkdirs_unicode_to_utf8(top, prefix=""):
1189
"""See _walkdirs_utf8
1191
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1193
This is currently the fallback code path when the filesystem encoding is
1194
not UTF-8. It may be better to implement an alternative so that we can
1195
safely handle paths that are not properly decodable in the current
1198
_utf8_encode = codecs.getencoder('utf8')
1200
_directory = _directory_kind
1201
_listdir = os.listdir
1202
_kind_from_mode = _formats.get
1204
pending = [(safe_utf8(prefix), None, None, None, safe_unicode(top))]
1206
relroot, _, _, _, top = pending.pop()
1208
relprefix = relroot + '/'
1211
top_slash = top + u'/'
1214
append = dirblock.append
1215
for name in sorted(_listdir(top)):
1216
name_utf8 = _utf8_encode(name)[0]
1217
abspath = top_slash + name
1218
statvalue = _lstat(abspath)
1219
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1220
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1221
yield (relroot, top), dirblock
1223
# push the user specified dirs from dirblock
1224
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1109
1227
def copy_tree(from_path, to_path, handlers={}):