171
246
rename_func(tmp_name, new)
173
# Default is to just use the python builtins
174
abspath = os.path.abspath
175
realpath = os.path.realpath
249
# In Python 2.4.2 and older, os.path.abspath and os.path.realpath
250
# choke on a Unicode string containing a relative path if
251
# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
253
_fs_enc = sys.getfilesystemencoding() or 'utf-8'
254
def _posix_abspath(path):
255
# jam 20060426 rather than encoding to fsencoding
256
# copy posixpath.abspath, but use os.getcwdu instead
257
if not posixpath.isabs(path):
258
path = posixpath.join(getcwd(), path)
259
return posixpath.normpath(path)
262
def _posix_realpath(path):
263
return posixpath.realpath(path.encode(_fs_enc)).decode(_fs_enc)
266
def _win32_fixdrive(path):
267
"""Force drive letters to be consistent.
269
win32 is inconsistent whether it returns lower or upper case
270
and even if it was consistent the user might type the other
271
so we force it to uppercase
272
running python.exe under cmd.exe return capital C:\\
273
running win32 python inside a cygwin shell returns lowercase c:\\
275
drive, path = _nt_splitdrive(path)
276
return drive.upper() + path
279
def _win32_abspath(path):
280
# Real _nt_abspath doesn't have a problem with a unicode cwd
281
return _win32_fixdrive(_nt_abspath(unicode(path)).replace('\\', '/'))
284
def _win98_abspath(path):
285
"""Return the absolute version of a path.
286
Windows 98 safe implementation (python reimplementation
287
of Win32 API function GetFullPathNameW)
292
# \\HOST\path => //HOST/path
293
# //HOST/path => //HOST/path
294
# path => C:/cwd/path
297
# check for absolute path
298
drive = _nt_splitdrive(path)[0]
299
if drive == '' and path[:2] not in('//','\\\\'):
301
# we cannot simply os.path.join cwd and path
302
# because os.path.join('C:','/path') produce '/path'
303
# and this is incorrect
304
if path[:1] in ('/','\\'):
305
cwd = _nt_splitdrive(cwd)[0]
307
path = cwd + '\\' + path
308
return _win32_fixdrive(_nt_normpath(path).replace('\\', '/'))
311
def _win32_realpath(path):
312
# Real _nt_realpath doesn't have a problem with a unicode cwd
313
return _win32_fixdrive(_nt_realpath(unicode(path)).replace('\\', '/'))
316
def _win32_pathjoin(*args):
317
return _nt_join(*args).replace('\\', '/')
320
def _win32_normpath(path):
321
return _win32_fixdrive(_nt_normpath(unicode(path)).replace('\\', '/'))
325
return _win32_fixdrive(os.getcwdu().replace('\\', '/'))
328
def _win32_mkdtemp(*args, **kwargs):
329
return _win32_fixdrive(tempfile.mkdtemp(*args, **kwargs).replace('\\', '/'))
332
def _win32_rename(old, new):
333
"""We expect to be able to atomically replace 'new' with old.
335
On win32, if new exists, it must be moved out of the way first,
339
fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
341
if e.errno in (errno.EPERM, errno.EACCES, errno.EBUSY, errno.EINVAL):
342
# If we try to rename a non-existant file onto cwd, we get
343
# EPERM or EACCES instead of ENOENT, this will raise ENOENT
344
# if the old path doesn't exist, sometimes we get EACCES
345
# On Linux, we seem to get EBUSY, on Mac we get EINVAL
351
return unicodedata.normalize('NFC', os.getcwdu())
354
# Default is to just use the python builtins, but these can be rebound on
355
# particular platforms.
356
abspath = _posix_abspath
357
realpath = _posix_realpath
176
358
pathjoin = os.path.join
177
359
normpath = os.path.normpath
178
360
getcwd = os.getcwdu
179
mkdtemp = tempfile.mkdtemp
180
361
rename = os.rename
181
362
dirname = os.path.dirname
182
363
basename = os.path.basename
184
if os.name == "posix":
185
# In Python 2.4.2 and older, os.path.abspath and os.path.realpath
186
# choke on a Unicode string containing a relative path if
187
# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
189
_fs_enc = sys.getfilesystemencoding()
191
return os.path.abspath(path.encode(_fs_enc)).decode(_fs_enc)
194
return os.path.realpath(path.encode(_fs_enc)).decode(_fs_enc)
364
split = os.path.split
365
splitext = os.path.splitext
366
# These were already imported into local scope
367
# mkdtemp = tempfile.mkdtemp
368
# rmtree = shutil.rmtree
370
MIN_ABS_PATHLENGTH = 1
196
373
if sys.platform == 'win32':
197
# We need to use the Unicode-aware os.path.abspath and
198
# os.path.realpath on Windows systems.
200
return os.path.abspath(path).replace('\\', '/')
203
return os.path.realpath(path).replace('\\', '/')
206
return os.path.join(*args).replace('\\', '/')
209
return os.path.normpath(path).replace('\\', '/')
212
return os.getcwdu().replace('\\', '/')
214
def mkdtemp(*args, **kwargs):
215
return tempfile.mkdtemp(*args, **kwargs).replace('\\', '/')
217
def rename(old, new):
218
fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
374
if win32utils.winver == 'Windows 98':
375
abspath = _win98_abspath
377
abspath = _win32_abspath
378
realpath = _win32_realpath
379
pathjoin = _win32_pathjoin
380
normpath = _win32_normpath
381
getcwd = _win32_getcwd
382
mkdtemp = _win32_mkdtemp
383
rename = _win32_rename
385
MIN_ABS_PATHLENGTH = 3
387
def _win32_delete_readonly(function, path, excinfo):
388
"""Error handler for shutil.rmtree function [for win32]
389
Helps to remove files and dirs marked as read-only.
391
exception = excinfo[1]
392
if function in (os.remove, os.rmdir) \
393
and isinstance(exception, OSError) \
394
and exception.errno == errno.EACCES:
400
def rmtree(path, ignore_errors=False, onerror=_win32_delete_readonly):
401
"""Replacer for shutil.rmtree: could remove readonly dirs/files"""
402
return shutil.rmtree(path, ignore_errors, onerror)
404
f = win32utils.get_unicode_argv # special function or None
408
elif sys.platform == 'darwin':
412
def get_terminal_encoding():
413
"""Find the best encoding for printing to the screen.
415
This attempts to check both sys.stdout and sys.stdin to see
416
what encoding they are in, and if that fails it falls back to
417
osutils.get_user_encoding().
418
The problem is that on Windows, locale.getpreferredencoding()
419
is not the same encoding as that used by the console:
420
http://mail.python.org/pipermail/python-list/2003-May/162357.html
422
On my standard US Windows XP, the preferred encoding is
423
cp1252, but the console is cp437
425
from bzrlib.trace import mutter
426
output_encoding = getattr(sys.stdout, 'encoding', None)
427
if not output_encoding:
428
input_encoding = getattr(sys.stdin, 'encoding', None)
429
if not input_encoding:
430
output_encoding = get_user_encoding()
431
mutter('encoding stdout as osutils.get_user_encoding() %r',
434
output_encoding = input_encoding
435
mutter('encoding stdout as sys.stdin encoding %r', output_encoding)
437
mutter('encoding stdout as sys.stdout encoding %r', output_encoding)
438
if output_encoding == 'cp0':
439
# invalid encoding (cp0 means 'no codepage' on Windows)
440
output_encoding = get_user_encoding()
441
mutter('cp0 is invalid encoding.'
442
' encoding stdout as osutils.get_user_encoding() %r',
446
codecs.lookup(output_encoding)
448
sys.stderr.write('bzr: warning:'
449
' unknown terminal encoding %s.\n'
450
' Using encoding %s instead.\n'
451
% (output_encoding, get_user_encoding())
453
output_encoding = get_user_encoding()
455
return output_encoding
221
458
def normalizepath(f):
222
if hasattr(os.path, 'realpath'):
459
if getattr(os.path, 'realpath', None) is not None:
584
1029
os.path.commonprefix (python2.4) has a bad bug that it works just
585
1030
on string prefixes, assuming that '/u' is a prefix of '/u2'. This
586
avoids that problem."""
1031
avoids that problem.
1034
if len(base) < MIN_ABS_PATHLENGTH:
1035
# must have space for e.g. a drive letter
1036
raise ValueError('%r is too short to calculate a relative path'
587
1039
rp = abspath(path)
591
while len(head) >= len(base):
1044
if len(head) <= len(base) and head != base:
1045
raise errors.PathNotChild(rp, base)
592
1046
if head == base:
594
head, tail = os.path.split(head)
1048
head, tail = split(head)
598
# XXX This should raise a NotChildPath exception, as its not tied
600
raise PathNotChild(rp, base)
1053
return pathjoin(*reversed(s))
1058
def _cicp_canonical_relpath(base, path):
1059
"""Return the canonical path relative to base.
1061
Like relpath, but on case-insensitive-case-preserving file-systems, this
1062
will return the relpath as stored on the file-system rather than in the
1063
case specified in the input string, for all existing portions of the path.
1065
This will cause O(N) behaviour if called for every path in a tree; if you
1066
have a number of paths to convert, you should use canonical_relpaths().
1068
# TODO: it should be possible to optimize this for Windows by using the
1069
# win32 API FindFiles function to look for the specified name - but using
1070
# os.listdir() still gives us the correct, platform agnostic semantics in
1073
rel = relpath(base, path)
1074
# '.' will have been turned into ''
1078
abs_base = abspath(base)
1080
_listdir = os.listdir
1082
# use an explicit iterator so we can easily consume the rest on early exit.
1083
bit_iter = iter(rel.split('/'))
1084
for bit in bit_iter:
1086
for look in _listdir(current):
1087
if lbit == look.lower():
1088
current = pathjoin(current, look)
1091
# got to the end, nothing matched, so we just return the
1092
# non-existing bits as they were specified (the filename may be
1093
# the target of a move, for example).
1094
current = pathjoin(current, bit, *list(bit_iter))
1096
return current[len(abs_base)+1:]
1098
# XXX - TODO - we need better detection/integration of case-insensitive
1099
# file-systems; Linux often sees FAT32 devices (or NFS-mounted OSX
1100
# filesystems), for example, so could probably benefit from the same basic
1101
# support there. For now though, only Windows and OSX get that support, and
1102
# they get it for *all* file-systems!
1103
if sys.platform in ('win32', 'darwin'):
1104
canonical_relpath = _cicp_canonical_relpath
1106
canonical_relpath = relpath
1108
def canonical_relpaths(base, paths):
1109
"""Create an iterable to canonicalize a sequence of relative paths.
1111
The intent is for this implementation to use a cache, vastly speeding
1112
up multiple transformations in the same directory.
1114
# but for now, we haven't optimized...
1115
return [canonical_relpath(base, p) for p in paths]
608
1117
def safe_unicode(unicode_or_utf8_string):
609
1118
"""Coerce unicode_or_utf8_string into unicode.
611
1120
If it is unicode, it is returned.
612
Otherwise it is decoded from utf-8. If a decoding error
613
occurs, it is wrapped as a If the decoding fails, the exception is wrapped
614
as a BzrBadParameter exception.
1121
Otherwise it is decoded from utf-8. If decoding fails, the exception is
1122
wrapped in a BzrBadParameterNotUnicode exception.
616
1124
if isinstance(unicode_or_utf8_string, unicode):
617
1125
return unicode_or_utf8_string
619
1127
return unicode_or_utf8_string.decode('utf8')
620
1128
except UnicodeDecodeError:
621
raise BzrBadParameterNotUnicode(unicode_or_utf8_string)
1129
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
1132
def safe_utf8(unicode_or_utf8_string):
1133
"""Coerce unicode_or_utf8_string to a utf8 string.
1135
If it is a str, it is returned.
1136
If it is Unicode, it is encoded into a utf-8 string.
1138
if isinstance(unicode_or_utf8_string, str):
1139
# TODO: jam 20070209 This is overkill, and probably has an impact on
1140
# performance if we are dealing with lots of apis that want a
1143
# Make sure it is a valid utf-8 string
1144
unicode_or_utf8_string.decode('utf-8')
1145
except UnicodeDecodeError:
1146
raise errors.BzrBadParameterNotUnicode(unicode_or_utf8_string)
1147
return unicode_or_utf8_string
1148
return unicode_or_utf8_string.encode('utf-8')
1151
_revision_id_warning = ('Unicode revision ids were deprecated in bzr 0.15.'
1152
' Revision id generators should be creating utf8'
1156
def safe_revision_id(unicode_or_utf8_string, warn=True):
1157
"""Revision ids should now be utf8, but at one point they were unicode.
1159
:param unicode_or_utf8_string: A possibly Unicode revision_id. (can also be
1161
:param warn: Functions that are sanitizing user data can set warn=False
1162
:return: None or a utf8 revision id.
1164
if (unicode_or_utf8_string is None
1165
or unicode_or_utf8_string.__class__ == str):
1166
return unicode_or_utf8_string
1168
symbol_versioning.warn(_revision_id_warning, DeprecationWarning,
1170
return cache_utf8.encode(unicode_or_utf8_string)
1173
_file_id_warning = ('Unicode file ids were deprecated in bzr 0.15. File id'
1174
' generators should be creating utf8 file ids.')
1177
def safe_file_id(unicode_or_utf8_string, warn=True):
1178
"""File ids should now be utf8, but at one point they were unicode.
1180
This is the same as safe_utf8, except it uses the cached encode functions
1181
to save a little bit of performance.
1183
:param unicode_or_utf8_string: A possibly Unicode file_id. (can also be
1185
:param warn: Functions that are sanitizing user data can set warn=False
1186
:return: None or a utf8 file id.
1188
if (unicode_or_utf8_string is None
1189
or unicode_or_utf8_string.__class__ == str):
1190
return unicode_or_utf8_string
1192
symbol_versioning.warn(_file_id_warning, DeprecationWarning,
1194
return cache_utf8.encode(unicode_or_utf8_string)
1197
_platform_normalizes_filenames = False
1198
if sys.platform == 'darwin':
1199
_platform_normalizes_filenames = True
1202
def normalizes_filenames():
1203
"""Return True if this platform normalizes unicode filenames.
1205
Mac OSX does, Windows/Linux do not.
1207
return _platform_normalizes_filenames
1210
def _accessible_normalized_filename(path):
1211
"""Get the unicode normalized path, and if you can access the file.
1213
On platforms where the system normalizes filenames (Mac OSX),
1214
you can access a file by any path which will normalize correctly.
1215
On platforms where the system does not normalize filenames
1216
(Windows, Linux), you have to access a file by its exact path.
1218
Internally, bzr only supports NFC normalization, since that is
1219
the standard for XML documents.
1221
So return the normalized path, and a flag indicating if the file
1222
can be accessed by that path.
1225
return unicodedata.normalize('NFC', unicode(path)), True
1228
def _inaccessible_normalized_filename(path):
1229
__doc__ = _accessible_normalized_filename.__doc__
1231
normalized = unicodedata.normalize('NFC', unicode(path))
1232
return normalized, normalized == path
1235
if _platform_normalizes_filenames:
1236
normalized_filename = _accessible_normalized_filename
1238
normalized_filename = _inaccessible_normalized_filename
624
1241
def terminal_width():
625
1242
"""Return estimated terminal width."""
627
# TODO: Do something smart on Windows?
629
# TODO: Is there anything that gets a better update when the window
630
# is resized while the program is running? We could use the Python termcap
1243
if sys.platform == 'win32':
1244
return win32utils.get_console_size()[0]
633
return int(os.environ['COLUMNS'])
634
except (IndexError, KeyError, ValueError):
1247
import struct, fcntl, termios
1248
s = struct.pack('HHHH', 0, 0, 0, 0)
1249
x = fcntl.ioctl(1, termios.TIOCGWINSZ, s)
1250
width = struct.unpack('HHHH', x)[1]
1255
width = int(os.environ['COLUMNS'])
637
1264
def supports_executable():
638
1265
return sys.platform != "win32"
1268
def supports_posix_readonly():
1269
"""Return True if 'readonly' has POSIX semantics, False otherwise.
1271
Notably, a win32 readonly file cannot be deleted, unlike POSIX where the
1272
directory controls creation/deletion, etc.
1274
And under win32, readonly means that the directory itself cannot be
1275
deleted. The contents of a readonly directory can be changed, unlike POSIX
1276
where files in readonly directories cannot be added, deleted or renamed.
1278
return sys.platform != "win32"
1281
def set_or_unset_env(env_variable, value):
1282
"""Modify the environment, setting or removing the env_variable.
1284
:param env_variable: The environment variable in question
1285
:param value: The value to set the environment to. If None, then
1286
the variable will be removed.
1287
:return: The original value of the environment variable.
1289
orig_val = os.environ.get(env_variable)
1291
if orig_val is not None:
1292
del os.environ[env_variable]
1294
if isinstance(value, unicode):
1295
value = value.encode(get_user_encoding())
1296
os.environ[env_variable] = value
1300
_validWin32PathRE = re.compile(r'^([A-Za-z]:[/\\])?[^:<>*"?\|]*$')
1303
def check_legal_path(path):
1304
"""Check whether the supplied path is legal.
1305
This is only required on Windows, so we don't test on other platforms
1308
if sys.platform != "win32":
1310
if _validWin32PathRE.match(path) is None:
1311
raise errors.IllegalPath(path)
1314
_WIN32_ERROR_DIRECTORY = 267 # Similar to errno.ENOTDIR
1316
def _is_error_enotdir(e):
1317
"""Check if this exception represents ENOTDIR.
1319
Unfortunately, python is very inconsistent about the exception
1320
here. The cases are:
1321
1) Linux, Mac OSX all versions seem to set errno == ENOTDIR
1322
2) Windows, Python2.4, uses errno == ERROR_DIRECTORY (267)
1323
which is the windows error code.
1324
3) Windows, Python2.5 uses errno == EINVAL and
1325
winerror == ERROR_DIRECTORY
1327
:param e: An Exception object (expected to be OSError with an errno
1328
attribute, but we should be able to cope with anything)
1329
:return: True if this represents an ENOTDIR error. False otherwise.
1331
en = getattr(e, 'errno', None)
1332
if (en == errno.ENOTDIR
1333
or (sys.platform == 'win32'
1334
and (en == _WIN32_ERROR_DIRECTORY
1335
or (en == errno.EINVAL
1336
and getattr(e, 'winerror', None) == _WIN32_ERROR_DIRECTORY)
1342
def walkdirs(top, prefix=""):
1343
"""Yield data about all the directories in a tree.
1345
This yields all the data about the contents of a directory at a time.
1346
After each directory has been yielded, if the caller has mutated the list
1347
to exclude some directories, they are then not descended into.
1349
The data yielded is of the form:
1350
((directory-relpath, directory-path-from-top),
1351
[(relpath, basename, kind, lstat, path-from-top), ...]),
1352
- directory-relpath is the relative path of the directory being returned
1353
with respect to top. prefix is prepended to this.
1354
- directory-path-from-root is the path including top for this directory.
1355
It is suitable for use with os functions.
1356
- relpath is the relative path within the subtree being walked.
1357
- basename is the basename of the path
1358
- kind is the kind of the file now. If unknown then the file is not
1359
present within the tree - but it may be recorded as versioned. See
1361
- lstat is the stat data *if* the file was statted.
1362
- planned, not implemented:
1363
path_from_tree_root is the path from the root of the tree.
1365
:param prefix: Prefix the relpaths that are yielded with 'prefix'. This
1366
allows one to walk a subtree but get paths that are relative to a tree
1368
:return: an iterator over the dirs.
1370
#TODO there is a bit of a smell where the results of the directory-
1371
# summary in this, and the path from the root, may not agree
1372
# depending on top and prefix - i.e. ./foo and foo as a pair leads to
1373
# potentially confusing output. We should make this more robust - but
1374
# not at a speed cost. RBC 20060731
1376
_directory = _directory_kind
1377
_listdir = os.listdir
1378
_kind_from_mode = file_kind_from_stat_mode
1379
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1381
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1382
relroot, _, _, _, top = pending.pop()
1384
relprefix = relroot + u'/'
1387
top_slash = top + u'/'
1390
append = dirblock.append
1392
names = sorted(_listdir(top))
1394
if not _is_error_enotdir(e):
1398
abspath = top_slash + name
1399
statvalue = _lstat(abspath)
1400
kind = _kind_from_mode(statvalue.st_mode)
1401
append((relprefix + name, name, kind, statvalue, abspath))
1402
yield (relroot, top), dirblock
1404
# push the user specified dirs from dirblock
1405
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1408
class DirReader(object):
1409
"""An interface for reading directories."""
1411
def top_prefix_to_starting_dir(self, top, prefix=""):
1412
"""Converts top and prefix to a starting dir entry
1414
:param top: A utf8 path
1415
:param prefix: An optional utf8 path to prefix output relative paths
1417
:return: A tuple starting with prefix, and ending with the native
1420
raise NotImplementedError(self.top_prefix_to_starting_dir)
1422
def read_dir(self, prefix, top):
1423
"""Read a specific dir.
1425
:param prefix: A utf8 prefix to be preprended to the path basenames.
1426
:param top: A natively encoded path to read.
1427
:return: A list of the directories contents. Each item contains:
1428
(utf8_relpath, utf8_name, kind, lstatvalue, native_abspath)
1430
raise NotImplementedError(self.read_dir)
1433
_selected_dir_reader = None
1436
def _walkdirs_utf8(top, prefix=""):
1437
"""Yield data about all the directories in a tree.
1439
This yields the same information as walkdirs() only each entry is yielded
1440
in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1441
are returned as exact byte-strings.
1443
:return: yields a tuple of (dir_info, [file_info])
1444
dir_info is (utf8_relpath, path-from-top)
1445
file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1446
if top is an absolute path, path-from-top is also an absolute path.
1447
path-from-top might be unicode or utf8, but it is the correct path to
1448
pass to os functions to affect the file in question. (such as os.lstat)
1450
global _selected_dir_reader
1451
if _selected_dir_reader is None:
1452
fs_encoding = _fs_enc.upper()
1453
if sys.platform == "win32" and win32utils.winver == 'Windows NT':
1454
# Win98 doesn't have unicode apis like FindFirstFileW
1455
# TODO: We possibly could support Win98 by falling back to the
1456
# original FindFirstFile, and using TCHAR instead of WCHAR,
1457
# but that gets a bit tricky, and requires custom compiling
1460
from bzrlib._walkdirs_win32 import Win32ReadDir
1461
_selected_dir_reader = Win32ReadDir()
1464
elif fs_encoding in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
1465
# ANSI_X3.4-1968 is a form of ASCII
1467
from bzrlib._readdir_pyx import UTF8DirReader
1468
_selected_dir_reader = UTF8DirReader()
1472
if _selected_dir_reader is None:
1473
# Fallback to the python version
1474
_selected_dir_reader = UnicodeDirReader()
1476
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1477
# But we don't actually uses 1-3 in pending, so set them to None
1478
pending = [[_selected_dir_reader.top_prefix_to_starting_dir(top, prefix)]]
1479
read_dir = _selected_dir_reader.read_dir
1480
_directory = _directory_kind
1482
relroot, _, _, _, top = pending[-1].pop()
1485
dirblock = sorted(read_dir(relroot, top))
1486
yield (relroot, top), dirblock
1487
# push the user specified dirs from dirblock
1488
next = [d for d in reversed(dirblock) if d[2] == _directory]
1490
pending.append(next)
1493
class UnicodeDirReader(DirReader):
1494
"""A dir reader for non-utf8 file systems, which transcodes."""
1496
__slots__ = ['_utf8_encode']
1499
self._utf8_encode = codecs.getencoder('utf8')
1501
def top_prefix_to_starting_dir(self, top, prefix=""):
1502
"""See DirReader.top_prefix_to_starting_dir."""
1503
return (safe_utf8(prefix), None, None, None, safe_unicode(top))
1505
def read_dir(self, prefix, top):
1506
"""Read a single directory from a non-utf8 file system.
1508
top, and the abspath element in the output are unicode, all other paths
1509
are utf8. Local disk IO is done via unicode calls to listdir etc.
1511
This is currently the fallback code path when the filesystem encoding is
1512
not UTF-8. It may be better to implement an alternative so that we can
1513
safely handle paths that are not properly decodable in the current
1516
See DirReader.read_dir for details.
1518
_utf8_encode = self._utf8_encode
1520
_listdir = os.listdir
1521
_kind_from_mode = file_kind_from_stat_mode
1524
relprefix = prefix + '/'
1527
top_slash = top + u'/'
1530
append = dirblock.append
1531
for name in sorted(_listdir(top)):
1533
name_utf8 = _utf8_encode(name)[0]
1534
except UnicodeDecodeError:
1535
raise errors.BadFilenameEncoding(
1536
_utf8_encode(relprefix)[0] + name, _fs_enc)
1537
abspath = top_slash + name
1538
statvalue = _lstat(abspath)
1539
kind = _kind_from_mode(statvalue.st_mode)
1540
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1544
def copy_tree(from_path, to_path, handlers={}):
1545
"""Copy all of the entries in from_path into to_path.
1547
:param from_path: The base directory to copy.
1548
:param to_path: The target directory. If it does not exist, it will
1550
:param handlers: A dictionary of functions, which takes a source and
1551
destinations for files, directories, etc.
1552
It is keyed on the file kind, such as 'directory', 'symlink', or 'file'
1553
'file', 'directory', and 'symlink' should always exist.
1554
If they are missing, they will be replaced with 'os.mkdir()',
1555
'os.readlink() + os.symlink()', and 'shutil.copy2()', respectively.
1557
# Now, just copy the existing cached tree to the new location
1558
# We use a cheap trick here.
1559
# Absolute paths are prefixed with the first parameter
1560
# relative paths are prefixed with the second.
1561
# So we can get both the source and target returned
1562
# without any extra work.
1564
def copy_dir(source, dest):
1567
def copy_link(source, dest):
1568
"""Copy the contents of a symlink"""
1569
link_to = os.readlink(source)
1570
os.symlink(link_to, dest)
1572
real_handlers = {'file':shutil.copy2,
1573
'symlink':copy_link,
1574
'directory':copy_dir,
1576
real_handlers.update(handlers)
1578
if not os.path.exists(to_path):
1579
real_handlers['directory'](from_path, to_path)
1581
for dir_info, entries in walkdirs(from_path, prefix=to_path):
1582
for relpath, name, kind, st, abspath in entries:
1583
real_handlers[kind](abspath, relpath)
1586
def path_prefix_key(path):
1587
"""Generate a prefix-order path key for path.
1589
This can be used to sort paths in the same way that walkdirs does.
1591
return (dirname(path) , path)
1594
def compare_paths_prefix_order(path_a, path_b):
1595
"""Compare path_a and path_b to generate the same order walkdirs uses."""
1596
key_a = path_prefix_key(path_a)
1597
key_b = path_prefix_key(path_b)
1598
return cmp(key_a, key_b)
1601
_cached_user_encoding = None
1604
def get_user_encoding(use_cache=True):
1605
"""Find out what the preferred user encoding is.
1607
This is generally the encoding that is used for command line parameters
1608
and file contents. This may be different from the terminal encoding
1609
or the filesystem encoding.
1611
:param use_cache: Enable cache for detected encoding.
1612
(This parameter is turned on by default,
1613
and required only for selftesting)
1615
:return: A string defining the preferred user encoding
1617
global _cached_user_encoding
1618
if _cached_user_encoding is not None and use_cache:
1619
return _cached_user_encoding
1621
if sys.platform == 'darwin':
1622
# python locale.getpreferredencoding() always return
1623
# 'mac-roman' on darwin. That's a lie.
1624
sys.platform = 'posix'
1626
if os.environ.get('LANG', None) is None:
1627
# If LANG is not set, we end up with 'ascii', which is bad
1628
# ('mac-roman' is more than ascii), so we set a default which
1629
# will give us UTF-8 (which appears to work in all cases on
1630
# OSX). Users are still free to override LANG of course, as
1631
# long as it give us something meaningful. This work-around
1632
# *may* not be needed with python 3k and/or OSX 10.5, but will
1633
# work with them too -- vila 20080908
1634
os.environ['LANG'] = 'en_US.UTF-8'
1637
sys.platform = 'darwin'
1642
user_encoding = locale.getpreferredencoding()
1643
except locale.Error, e:
1644
sys.stderr.write('bzr: warning: %s\n'
1645
' Could not determine what text encoding to use.\n'
1646
' This error usually means your Python interpreter\n'
1647
' doesn\'t support the locale set by $LANG (%s)\n'
1648
" Continuing with ascii encoding.\n"
1649
% (e, os.environ.get('LANG')))
1650
user_encoding = 'ascii'
1652
# Windows returns 'cp0' to indicate there is no code page. So we'll just
1653
# treat that as ASCII, and not support printing unicode characters to the
1656
# For python scripts run under vim, we get '', so also treat that as ASCII
1657
if user_encoding in (None, 'cp0', ''):
1658
user_encoding = 'ascii'
1662
codecs.lookup(user_encoding)
1664
sys.stderr.write('bzr: warning:'
1665
' unknown encoding %s.'
1666
' Continuing with ascii encoding.\n'
1669
user_encoding = 'ascii'
1672
_cached_user_encoding = user_encoding
1674
return user_encoding
1677
def get_host_name():
1678
"""Return the current unicode host name.
1680
This is meant to be used in place of socket.gethostname() because that
1681
behaves inconsistently on different platforms.
1683
if sys.platform == "win32":
1685
return win32utils.get_host_name()
1688
return socket.gethostname().decode(get_user_encoding())
1691
def recv_all(socket, bytes):
1692
"""Receive an exact number of bytes.
1694
Regular Socket.recv() may return less than the requested number of bytes,
1695
dependning on what's in the OS buffer. MSG_WAITALL is not available
1696
on all platforms, but this should work everywhere. This will return
1697
less than the requested amount if the remote end closes.
1699
This isn't optimized and is intended mostly for use in testing.
1702
while len(b) < bytes:
1703
new = until_no_eintr(socket.recv, bytes - len(b))
1710
def send_all(socket, bytes, report_activity=None):
1711
"""Send all bytes on a socket.
1713
Regular socket.sendall() can give socket error 10053 on Windows. This
1714
implementation sends no more than 64k at a time, which avoids this problem.
1716
:param report_activity: Call this as bytes are read, see
1717
Transport._report_activity
1720
for pos in xrange(0, len(bytes), chunk_size):
1721
block = bytes[pos:pos+chunk_size]
1722
if report_activity is not None:
1723
report_activity(len(block), 'write')
1724
until_no_eintr(socket.sendall, block)
1727
def dereference_path(path):
1728
"""Determine the real path to a file.
1730
All parent elements are dereferenced. But the file itself is not
1732
:param path: The original path. May be absolute or relative.
1733
:return: the real path *to* the file
1735
parent, base = os.path.split(path)
1736
# The pathjoin for '.' is a workaround for Python bug #1213894.
1737
# (initial path components aren't dereferenced)
1738
return pathjoin(realpath(pathjoin('.', parent)), base)
1741
def supports_mapi():
1742
"""Return True if we can use MAPI to launch a mail client."""
1743
return sys.platform == "win32"
1746
def resource_string(package, resource_name):
1747
"""Load a resource from a package and return it as a string.
1749
Note: Only packages that start with bzrlib are currently supported.
1751
This is designed to be a lightweight implementation of resource
1752
loading in a way which is API compatible with the same API from
1754
http://peak.telecommunity.com/DevCenter/PkgResources#basic-resource-access.
1755
If and when pkg_resources becomes a standard library, this routine
1758
# Check package name is within bzrlib
1759
if package == "bzrlib":
1760
resource_relpath = resource_name
1761
elif package.startswith("bzrlib."):
1762
package = package[len("bzrlib."):].replace('.', os.sep)
1763
resource_relpath = pathjoin(package, resource_name)
1765
raise errors.BzrError('resource package %s not in bzrlib' % package)
1767
# Map the resource to a file and read its contents
1768
base = dirname(bzrlib.__file__)
1769
if getattr(sys, 'frozen', None): # bzr.exe
1770
base = abspath(pathjoin(base, '..', '..'))
1771
filename = pathjoin(base, resource_relpath)
1772
return open(filename, 'rU').read()
1775
def file_kind_from_stat_mode_thunk(mode):
1776
global file_kind_from_stat_mode
1777
if file_kind_from_stat_mode is file_kind_from_stat_mode_thunk:
1779
from bzrlib._readdir_pyx import UTF8DirReader
1780
file_kind_from_stat_mode = UTF8DirReader().kind_from_mode
1782
from bzrlib._readdir_py import (
1783
_kind_from_mode as file_kind_from_stat_mode
1785
return file_kind_from_stat_mode(mode)
1786
file_kind_from_stat_mode = file_kind_from_stat_mode_thunk
1789
def file_kind(f, _lstat=os.lstat):
1791
return file_kind_from_stat_mode(_lstat(f).st_mode)
1793
if getattr(e, 'errno', None) in (errno.ENOENT, errno.ENOTDIR):
1794
raise errors.NoSuchFile(f)
1798
def until_no_eintr(f, *a, **kw):
1799
"""Run f(*a, **kw), retrying if an EINTR error occurs."""
1800
# Borrowed from Twisted's twisted.python.util.untilConcludes function.
1804
except (IOError, OSError), e:
1805
if e.errno == errno.EINTR:
1809
def re_compile_checked(re_string, flags=0, where=""):
1810
"""Return a compiled re, or raise a sensible error.
1812
This should only be used when compiling user-supplied REs.
1814
:param re_string: Text form of regular expression.
1815
:param flags: eg re.IGNORECASE
1816
:param where: Message explaining to the user the context where
1817
it occurred, eg 'log search filter'.
1819
# from https://bugs.launchpad.net/bzr/+bug/251352
1821
re_obj = re.compile(re_string, flags)
1826
where = ' in ' + where
1827
# despite the name 'error' is a type
1828
raise errors.BzrCommandError('Invalid regular expression%s: %r: %s'
1829
% (where, re_string, e))
1832
if sys.platform == "win32":
1835
return msvcrt.getch()
1840
fd = sys.stdin.fileno()
1841
settings = termios.tcgetattr(fd)
1844
ch = sys.stdin.read(1)
1846
termios.tcsetattr(fd, termios.TCSADRAIN, settings)
1850
if sys.platform == 'linux2':
1851
def _local_concurrency():
1853
prefix = 'processor'
1854
for line in file('/proc/cpuinfo', 'rb'):
1855
if line.startswith(prefix):
1856
concurrency = int(line[line.find(':')+1:]) + 1
1858
elif sys.platform == 'darwin':
1859
def _local_concurrency():
1860
return subprocess.Popen(['sysctl', '-n', 'hw.availcpu'],
1861
stdout=subprocess.PIPE).communicate()[0]
1862
elif sys.platform[0:7] == 'freebsd':
1863
def _local_concurrency():
1864
return subprocess.Popen(['sysctl', '-n', 'hw.ncpu'],
1865
stdout=subprocess.PIPE).communicate()[0]
1866
elif sys.platform == 'sunos5':
1867
def _local_concurrency():
1868
return subprocess.Popen(['psrinfo', '-p',],
1869
stdout=subprocess.PIPE).communicate()[0]
1870
elif sys.platform == "win32":
1871
def _local_concurrency():
1872
# This appears to return the number of cores.
1873
return os.environ.get('NUMBER_OF_PROCESSORS')
1875
def _local_concurrency():
1880
_cached_local_concurrency = None
1882
def local_concurrency(use_cache=True):
1883
"""Return how many processes can be run concurrently.
1885
Rely on platform specific implementations and default to 1 (one) if
1886
anything goes wrong.
1888
global _cached_local_concurrency
1889
if _cached_local_concurrency is not None and use_cache:
1890
return _cached_local_concurrency
1893
concurrency = _local_concurrency()
1894
except (OSError, IOError):
1897
concurrency = int(concurrency)
1898
except (TypeError, ValueError):
1901
_cached_concurrency = concurrency