904
925
return unicode_or_utf8_string.encode('utf-8')
907
def safe_revision_id(unicode_or_utf8_string):
928
_revision_id_warning = ('Unicode revision ids were deprecated in bzr 0.15.'
929
' Revision id generators should be creating utf8'
933
def safe_revision_id(unicode_or_utf8_string, warn=True):
908
934
"""Revision ids should now be utf8, but at one point they were unicode.
936
:param unicode_or_utf8_string: A possibly Unicode revision_id. (can also be
938
:param warn: Functions that are sanitizing user data can set warn=False
939
:return: None or a utf8 revision id.
941
if (unicode_or_utf8_string is None
942
or unicode_or_utf8_string.__class__ == str):
943
return unicode_or_utf8_string
945
symbol_versioning.warn(_revision_id_warning, DeprecationWarning,
947
return cache_utf8.encode(unicode_or_utf8_string)
950
_file_id_warning = ('Unicode file ids were deprecated in bzr 0.15. File id'
951
' generators should be creating utf8 file ids.')
954
def safe_file_id(unicode_or_utf8_string, warn=True):
955
"""File ids should now be utf8, but at one point they were unicode.
910
957
This is the same as safe_utf8, except it uses the cached encode functions
911
958
to save a little bit of performance.
960
:param unicode_or_utf8_string: A possibly Unicode file_id. (can also be
962
:param warn: Functions that are sanitizing user data can set warn=False
963
:return: None or a utf8 file id.
913
if unicode_or_utf8_string is None:
915
if isinstance(unicode_or_utf8_string, str):
916
# TODO: jam 20070209 Eventually just remove this check.
918
utf8_str = cache_utf8.get_cached_utf8(unicode_or_utf8_string)
919
except UnicodeDecodeError:
920
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
965
if (unicode_or_utf8_string is None
966
or unicode_or_utf8_string.__class__ == str):
967
return unicode_or_utf8_string
969
symbol_versioning.warn(_file_id_warning, DeprecationWarning,
922
971
return cache_utf8.encode(unicode_or_utf8_string)
943
992
On platforms where the system does not normalize filenames
944
993
(Windows, Linux), you have to access a file by its exact path.
946
Internally, bzr only supports NFC/NFKC normalization, since that is
995
Internally, bzr only supports NFC normalization, since that is
947
996
the standard for XML documents.
949
998
So return the normalized path, and a flag indicating if the file
950
999
can be accessed by that path.
953
return unicodedata.normalize('NFKC', unicode(path)), True
1002
return unicodedata.normalize('NFC', unicode(path)), True
956
1005
def _inaccessible_normalized_filename(path):
957
1006
__doc__ = _accessible_normalized_filename.__doc__
959
normalized = unicodedata.normalize('NFKC', unicode(path))
1008
normalized = unicodedata.normalize('NFC', unicode(path))
960
1009
return normalized, normalized == path
1072
1121
# depending on top and prefix - i.e. ./foo and foo as a pair leads to
1073
1122
# potentially confusing output. We should make this more robust - but
1074
1123
# not at a speed cost. RBC 20060731
1077
1125
_directory = _directory_kind
1078
1126
_listdir = os.listdir
1079
pending = [(prefix, "", _directory, None, top)]
1127
_kind_from_mode = _formats.get
1128
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1082
currentdir = pending.pop()
1083
1130
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1086
relroot = currentdir[0] + '/'
1089
for name in sorted(_listdir(top)):
1090
abspath = top + '/' + name
1091
statvalue = lstat(abspath)
1092
dirblock.append((relroot + name, name,
1093
file_kind_from_stat_mode(statvalue.st_mode),
1094
statvalue, abspath))
1095
yield (currentdir[0], top), dirblock
1096
# push the user specified dirs from dirblock
1097
for dir in reversed(dirblock):
1098
if dir[2] == _directory:
1131
relroot, _, _, _, top = pending.pop()
1133
relprefix = relroot + u'/'
1136
top_slash = top + u'/'
1139
append = dirblock.append
1140
for name in sorted(_listdir(top)):
1141
abspath = top_slash + name
1142
statvalue = _lstat(abspath)
1143
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1144
append((relprefix + name, name, kind, statvalue, abspath))
1145
yield (relroot, top), dirblock
1147
# push the user specified dirs from dirblock
1148
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1151
def _walkdirs_utf8(top, prefix=""):
1152
"""Yield data about all the directories in a tree.
1154
This yields the same information as walkdirs() only each entry is yielded
1155
in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1156
are returned as exact byte-strings.
1158
:return: yields a tuple of (dir_info, [file_info])
1159
dir_info is (utf8_relpath, path-from-top)
1160
file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1161
if top is an absolute path, path-from-top is also an absolute path.
1162
path-from-top might be unicode or utf8, but it is the correct path to
1163
pass to os functions to affect the file in question. (such as os.lstat)
1165
fs_encoding = _fs_enc.upper()
1166
if (sys.platform == 'win32' or
1167
fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968')): # ascii
1168
return _walkdirs_unicode_to_utf8(top, prefix=prefix)
1170
return _walkdirs_fs_utf8(top, prefix=prefix)
1173
def _walkdirs_fs_utf8(top, prefix=""):
1174
"""See _walkdirs_utf8.
1176
This sub-function is called when we know the filesystem is already in utf8
1177
encoding. So we don't need to transcode filenames.
1180
_directory = _directory_kind
1181
_listdir = os.listdir
1182
_kind_from_mode = _formats.get
1184
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1185
# But we don't actually uses 1-3 in pending, so set them to None
1186
pending = [(safe_utf8(prefix), None, None, None, safe_utf8(top))]
1188
relroot, _, _, _, top = pending.pop()
1190
relprefix = relroot + '/'
1193
top_slash = top + '/'
1196
append = dirblock.append
1197
for name in sorted(_listdir(top)):
1198
abspath = top_slash + name
1199
statvalue = _lstat(abspath)
1200
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1201
append((relprefix + name, name, kind, statvalue, abspath))
1202
yield (relroot, top), dirblock
1204
# push the user specified dirs from dirblock
1205
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1208
def _walkdirs_unicode_to_utf8(top, prefix=""):
1209
"""See _walkdirs_utf8
1211
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1213
This is currently the fallback code path when the filesystem encoding is
1214
not UTF-8. It may be better to implement an alternative so that we can
1215
safely handle paths that are not properly decodable in the current
1218
_utf8_encode = codecs.getencoder('utf8')
1220
_directory = _directory_kind
1221
_listdir = os.listdir
1222
_kind_from_mode = _formats.get
1224
pending = [(safe_utf8(prefix), None, None, None, safe_unicode(top))]
1226
relroot, _, _, _, top = pending.pop()
1228
relprefix = relroot + '/'
1231
top_slash = top + u'/'
1234
append = dirblock.append
1235
for name in sorted(_listdir(top)):
1236
name_utf8 = _utf8_encode(name)[0]
1237
abspath = top_slash + name
1238
statvalue = _lstat(abspath)
1239
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1240
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1241
yield (relroot, top), dirblock
1243
# push the user specified dirs from dirblock
1244
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1102
1247
def copy_tree(from_path, to_path, handlers={}):
1250
1407
# The pathjoin for '.' is a workaround for Python bug #1213894.
1251
1408
# (initial path components aren't dereferenced)
1252
1409
return pathjoin(realpath(pathjoin('.', parent)), base)
1412
def supports_mapi():
1413
"""Return True if we can use MAPI to launch a mail client."""
1414
return sys.platform == "win32"
1417
def resource_string(package, resource_name):
1418
"""Load a resource from a package and return it as a string.
1420
Note: Only packages that start with bzrlib are currently supported.
1422
This is designed to be a lightweight implementation of resource
1423
loading in a way which is API compatible with the same API from
1425
http://peak.telecommunity.com/DevCenter/PkgResources#basic-resource-access.
1426
If and when pkg_resources becomes a standard library, this routine
1429
# Check package name is within bzrlib
1430
if package == "bzrlib":
1431
resource_relpath = resource_name
1432
elif package.startswith("bzrlib."):
1433
package = package[len("bzrlib."):].replace('.', os.sep)
1434
resource_relpath = pathjoin(package, resource_name)
1436
raise errors.BzrError('resource package %s not in bzrlib' % package)
1438
# Map the resource to a file and read its contents
1439
base = dirname(bzrlib.__file__)
1440
if getattr(sys, 'frozen', None): # bzr.exe
1441
base = abspath(pathjoin(base, '..', '..'))
1442
filename = pathjoin(base, resource_relpath)
1443
return open(filename, 'rU').read()