201
259
# choke on a Unicode string containing a relative path if
202
260
# os.getcwd() returns a non-sys.getdefaultencoding()-encoded
204
_fs_enc = sys.getfilesystemencoding()
262
_fs_enc = sys.getfilesystemencoding() or 'utf-8'
205
263
def _posix_abspath(path):
206
return os.path.abspath(path.encode(_fs_enc)).decode(_fs_enc)
207
# jam 20060426 This is another possibility which mimics
208
# os.path.abspath, only uses unicode characters instead
209
# if not os.path.isabs(path):
210
# return os.path.join(os.getcwdu(), path)
264
# jam 20060426 rather than encoding to fsencoding
265
# copy posixpath.abspath, but use os.getcwdu instead
266
if not posixpath.isabs(path):
267
path = posixpath.join(getcwd(), path)
268
return posixpath.normpath(path)
214
271
def _posix_realpath(path):
215
return os.path.realpath(path.encode(_fs_enc)).decode(_fs_enc)
272
return posixpath.realpath(path.encode(_fs_enc)).decode(_fs_enc)
275
def _win32_fixdrive(path):
276
"""Force drive letters to be consistent.
278
win32 is inconsistent whether it returns lower or upper case
279
and even if it was consistent the user might type the other
280
so we force it to uppercase
281
running python.exe under cmd.exe return capital C:\\
282
running win32 python inside a cygwin shell returns lowercase c:\\
284
drive, path = _nt_splitdrive(path)
285
return drive.upper() + path
218
288
def _win32_abspath(path):
219
return _nt_abspath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
289
# Real _nt_abspath doesn't have a problem with a unicode cwd
290
return _win32_fixdrive(_nt_abspath(unicode(path)).replace('\\', '/'))
293
def _win98_abspath(path):
294
"""Return the absolute version of a path.
295
Windows 98 safe implementation (python reimplementation
296
of Win32 API function GetFullPathNameW)
301
# \\HOST\path => //HOST/path
302
# //HOST/path => //HOST/path
303
# path => C:/cwd/path
306
# check for absolute path
307
drive = _nt_splitdrive(path)[0]
308
if drive == '' and path[:2] not in('//','\\\\'):
310
# we cannot simply os.path.join cwd and path
311
# because os.path.join('C:','/path') produce '/path'
312
# and this is incorrect
313
if path[:1] in ('/','\\'):
314
cwd = _nt_splitdrive(cwd)[0]
316
path = cwd + '\\' + path
317
return _win32_fixdrive(_nt_normpath(path).replace('\\', '/'))
319
if win32utils.winver == 'Windows 98':
320
_win32_abspath = _win98_abspath
222
323
def _win32_realpath(path):
223
return _nt_realpath(path.encode(_fs_enc)).decode(_fs_enc).replace('\\', '/')
324
# Real _nt_realpath doesn't have a problem with a unicode cwd
325
return _win32_fixdrive(_nt_realpath(unicode(path)).replace('\\', '/'))
226
328
def _win32_pathjoin(*args):
732
1033
return _platform_normalizes_filenames
1036
def _accessible_normalized_filename(path):
1037
"""Get the unicode normalized path, and if you can access the file.
1039
On platforms where the system normalizes filenames (Mac OSX),
1040
you can access a file by any path which will normalize correctly.
1041
On platforms where the system does not normalize filenames
1042
(Windows, Linux), you have to access a file by its exact path.
1044
Internally, bzr only supports NFC normalization, since that is
1045
the standard for XML documents.
1047
So return the normalized path, and a flag indicating if the file
1048
can be accessed by that path.
1051
return unicodedata.normalize('NFC', unicode(path)), True
1054
def _inaccessible_normalized_filename(path):
1055
__doc__ = _accessible_normalized_filename.__doc__
1057
normalized = unicodedata.normalize('NFC', unicode(path))
1058
return normalized, normalized == path
735
1061
if _platform_normalizes_filenames:
736
def unicode_filename(path):
737
"""Make sure 'path' is a properly normalized filename.
739
On platforms where the system normalizes filenames (Mac OSX),
740
you can access a file by any path which will normalize
742
Internally, bzr only supports NFC/NFKC normalization, since
743
that is the standard for XML documents.
744
So we return an normalized path, and indicate this has been
747
:return: (path, is_normalized) Return a path which can
748
access the file, and whether or not this path is
751
return unicodedata.normalize('NFKC', path), True
1062
normalized_filename = _accessible_normalized_filename
753
def unicode_filename(path):
754
"""Make sure 'path' is a properly normalized filename.
756
On platforms where the system does not normalize filenames
757
(Windows, Linux), you have to access a file by its exact path.
758
Internally, bzr only supports NFC/NFKC normalization, since
759
that is the standard for XML documents.
760
So we return the original path, and indicate if this is
763
:return: (path, is_normalized) Return a path which can
764
access the file, and whether or not this path is
767
return path, unicodedata.normalize('NFKC', path) == path
1064
normalized_filename = _inaccessible_normalized_filename
770
1067
def terminal_width():
771
1068
"""Return estimated terminal width."""
772
1069
if sys.platform == 'win32':
773
import bzrlib.win32console
774
return bzrlib.win32console.get_console_size()[0]
1070
return win32utils.get_console_size()[0]
777
1073
import struct, fcntl, termios
816
1173
to exclude some directories, they are then not descended into.
818
1175
The data yielded is of the form:
819
[(relpath, basename, kind, lstat, path_from_top), ...]
1176
((directory-relpath, directory-path-from-top),
1177
[(relpath, basename, kind, lstat, path-from-top), ...]),
1178
- directory-relpath is the relative path of the directory being returned
1179
with respect to top. prefix is prepended to this.
1180
- directory-path-from-root is the path including top for this directory.
1181
It is suitable for use with os functions.
1182
- relpath is the relative path within the subtree being walked.
1183
- basename is the basename of the path
1184
- kind is the kind of the file now. If unknown then the file is not
1185
present within the tree - but it may be recorded as versioned. See
1187
- lstat is the stat data *if* the file was statted.
1188
- planned, not implemented:
1189
path_from_tree_root is the path from the root of the tree.
821
1191
:param prefix: Prefix the relpaths that are yielded with 'prefix'. This
822
1192
allows one to walk a subtree but get paths that are relative to a tree
823
1193
rooted higher up.
824
1194
:return: an iterator over the dirs.
1196
#TODO there is a bit of a smell where the results of the directory-
1197
# summary in this, and the path from the root, may not agree
1198
# depending on top and prefix - i.e. ./foo and foo as a pair leads to
1199
# potentially confusing output. We should make this more robust - but
1200
# not at a speed cost. RBC 20060731
828
1202
_directory = _directory_kind
830
pending = [(prefix, "", _directory, None, top)]
1203
_listdir = os.listdir
1204
_kind_from_mode = _formats.get
1205
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
833
currentdir = pending.pop()
834
1207
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
837
relroot = currentdir[0] + '/'
1208
relroot, _, _, _, top = pending.pop()
1210
relprefix = relroot + u'/'
1213
top_slash = top + u'/'
1216
append = dirblock.append
1218
names = sorted(_listdir(top))
1220
if not _is_error_enotdir(e):
1224
abspath = top_slash + name
1225
statvalue = _lstat(abspath)
1226
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1227
append((relprefix + name, name, kind, statvalue, abspath))
1228
yield (relroot, top), dirblock
1230
# push the user specified dirs from dirblock
1231
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1234
_real_walkdirs_utf8 = None
1236
def _walkdirs_utf8(top, prefix=""):
1237
"""Yield data about all the directories in a tree.
1239
This yields the same information as walkdirs() only each entry is yielded
1240
in utf-8. On platforms which have a filesystem encoding of utf8 the paths
1241
are returned as exact byte-strings.
1243
:return: yields a tuple of (dir_info, [file_info])
1244
dir_info is (utf8_relpath, path-from-top)
1245
file_info is (utf8_relpath, utf8_name, kind, lstat, path-from-top)
1246
if top is an absolute path, path-from-top is also an absolute path.
1247
path-from-top might be unicode or utf8, but it is the correct path to
1248
pass to os functions to affect the file in question. (such as os.lstat)
1250
global _real_walkdirs_utf8
1251
if _real_walkdirs_utf8 is None:
1252
fs_encoding = _fs_enc.upper()
1253
if win32utils.winver == 'Windows NT':
1254
# Win98 doesn't have unicode apis like FindFirstFileW
1255
# TODO: We possibly could support Win98 by falling back to the
1256
# original FindFirstFile, and using TCHAR instead of WCHAR,
1257
# but that gets a bit tricky, and requires custom compiling
1260
from bzrlib._walkdirs_win32 import _walkdirs_utf8_win32_find_file
1262
_real_walkdirs_utf8 = _walkdirs_unicode_to_utf8
1264
_real_walkdirs_utf8 = _walkdirs_utf8_win32_find_file
1265
elif fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
1266
# ANSI_X3.4-1968 is a form of ASCII
1267
_real_walkdirs_utf8 = _walkdirs_unicode_to_utf8
1269
_real_walkdirs_utf8 = _walkdirs_fs_utf8
1270
return _real_walkdirs_utf8(top, prefix=prefix)
1273
def _walkdirs_fs_utf8(top, prefix=""):
1274
"""See _walkdirs_utf8.
1276
This sub-function is called when we know the filesystem is already in utf8
1277
encoding. So we don't need to transcode filenames.
1280
_directory = _directory_kind
1281
# Use C accelerated directory listing.
1282
_listdir = _read_dir
1283
_kind_from_mode = _formats.get
1285
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1286
# But we don't actually uses 1-3 in pending, so set them to None
1287
pending = [(safe_utf8(prefix), None, None, None, safe_utf8(top))]
1289
relroot, _, _, _, top = pending.pop()
1291
relprefix = relroot + '/'
1294
top_slash = top + '/'
1297
append = dirblock.append
1298
# read_dir supplies in should-stat order.
1299
for _, name in sorted(_listdir(top)):
1300
abspath = top_slash + name
1301
statvalue = _lstat(abspath)
1302
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1303
append((relprefix + name, name, kind, statvalue, abspath))
1305
yield (relroot, top), dirblock
1307
# push the user specified dirs from dirblock
1308
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1311
def _walkdirs_unicode_to_utf8(top, prefix=""):
1312
"""See _walkdirs_utf8
1314
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1316
This is currently the fallback code path when the filesystem encoding is
1317
not UTF-8. It may be better to implement an alternative so that we can
1318
safely handle paths that are not properly decodable in the current
1321
_utf8_encode = codecs.getencoder('utf8')
1323
_directory = _directory_kind
1324
_listdir = os.listdir
1325
_kind_from_mode = _formats.get
1327
pending = [(safe_utf8(prefix), None, None, None, safe_unicode(top))]
1329
relroot, _, _, _, top = pending.pop()
1331
relprefix = relroot + '/'
1334
top_slash = top + u'/'
1337
append = dirblock.append
840
1338
for name in sorted(_listdir(top)):
841
abspath = top + '/' + name
842
statvalue = lstat(abspath)
843
dirblock.append ((relroot + name, name, file_kind_from_stat_mode(statvalue.st_mode), statvalue, abspath))
1339
name_utf8 = _utf8_encode(name)[0]
1340
abspath = top_slash + name
1341
statvalue = _lstat(abspath)
1342
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1343
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1344
yield (relroot, top), dirblock
845
1346
# push the user specified dirs from dirblock
846
for dir in reversed(dirblock):
847
if dir[2] == _directory:
1347
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1350
def copy_tree(from_path, to_path, handlers={}):
1351
"""Copy all of the entries in from_path into to_path.
1353
:param from_path: The base directory to copy.
1354
:param to_path: The target directory. If it does not exist, it will
1356
:param handlers: A dictionary of functions, which takes a source and
1357
destinations for files, directories, etc.
1358
It is keyed on the file kind, such as 'directory', 'symlink', or 'file'
1359
'file', 'directory', and 'symlink' should always exist.
1360
If they are missing, they will be replaced with 'os.mkdir()',
1361
'os.readlink() + os.symlink()', and 'shutil.copy2()', respectively.
1363
# Now, just copy the existing cached tree to the new location
1364
# We use a cheap trick here.
1365
# Absolute paths are prefixed with the first parameter
1366
# relative paths are prefixed with the second.
1367
# So we can get both the source and target returned
1368
# without any extra work.
1370
def copy_dir(source, dest):
1373
def copy_link(source, dest):
1374
"""Copy the contents of a symlink"""
1375
link_to = os.readlink(source)
1376
os.symlink(link_to, dest)
1378
real_handlers = {'file':shutil.copy2,
1379
'symlink':copy_link,
1380
'directory':copy_dir,
1382
real_handlers.update(handlers)
1384
if not os.path.exists(to_path):
1385
real_handlers['directory'](from_path, to_path)
1387
for dir_info, entries in walkdirs(from_path, prefix=to_path):
1388
for relpath, name, kind, st, abspath in entries:
1389
real_handlers[kind](abspath, relpath)
1392
def path_prefix_key(path):
1393
"""Generate a prefix-order path key for path.
1395
This can be used to sort paths in the same way that walkdirs does.
1397
return (dirname(path) , path)
1400
def compare_paths_prefix_order(path_a, path_b):
1401
"""Compare path_a and path_b to generate the same order walkdirs uses."""
1402
key_a = path_prefix_key(path_a)
1403
key_b = path_prefix_key(path_b)
1404
return cmp(key_a, key_b)
1407
_cached_user_encoding = None
1410
def get_user_encoding(use_cache=True):
1411
"""Find out what the preferred user encoding is.
1413
This is generally the encoding that is used for command line parameters
1414
and file contents. This may be different from the terminal encoding
1415
or the filesystem encoding.
1417
:param use_cache: Enable cache for detected encoding.
1418
(This parameter is turned on by default,
1419
and required only for selftesting)
1421
:return: A string defining the preferred user encoding
1423
global _cached_user_encoding
1424
if _cached_user_encoding is not None and use_cache:
1425
return _cached_user_encoding
1427
if sys.platform == 'darwin':
1428
# work around egregious python 2.4 bug
1429
sys.platform = 'posix'
1433
sys.platform = 'darwin'
1438
user_encoding = locale.getpreferredencoding()
1439
except locale.Error, e:
1440
sys.stderr.write('bzr: warning: %s\n'
1441
' Could not determine what text encoding to use.\n'
1442
' This error usually means your Python interpreter\n'
1443
' doesn\'t support the locale set by $LANG (%s)\n'
1444
" Continuing with ascii encoding.\n"
1445
% (e, os.environ.get('LANG')))
1446
user_encoding = 'ascii'
1448
# Windows returns 'cp0' to indicate there is no code page. So we'll just
1449
# treat that as ASCII, and not support printing unicode characters to the
1452
# For python scripts run under vim, we get '', so also treat that as ASCII
1453
if user_encoding in (None, 'cp0', ''):
1454
user_encoding = 'ascii'
1458
codecs.lookup(user_encoding)
1460
sys.stderr.write('bzr: warning:'
1461
' unknown encoding %s.'
1462
' Continuing with ascii encoding.\n'
1465
user_encoding = 'ascii'
1468
_cached_user_encoding = user_encoding
1470
return user_encoding
1473
def get_host_name():
1474
"""Return the current unicode host name.
1476
This is meant to be used in place of socket.gethostname() because that
1477
behaves inconsistently on different platforms.
1479
if sys.platform == "win32":
1481
return win32utils.get_host_name()
1484
return socket.gethostname().decode(get_user_encoding())
1487
def recv_all(socket, bytes):
1488
"""Receive an exact number of bytes.
1490
Regular Socket.recv() may return less than the requested number of bytes,
1491
dependning on what's in the OS buffer. MSG_WAITALL is not available
1492
on all platforms, but this should work everywhere. This will return
1493
less than the requested amount if the remote end closes.
1495
This isn't optimized and is intended mostly for use in testing.
1498
while len(b) < bytes:
1499
new = socket.recv(bytes - len(b))
1506
def send_all(socket, bytes):
1507
"""Send all bytes on a socket.
1509
Regular socket.sendall() can give socket error 10053 on Windows. This
1510
implementation sends no more than 64k at a time, which avoids this problem.
1513
for pos in xrange(0, len(bytes), chunk_size):
1514
socket.sendall(bytes[pos:pos+chunk_size])
1517
def dereference_path(path):
1518
"""Determine the real path to a file.
1520
All parent elements are dereferenced. But the file itself is not
1522
:param path: The original path. May be absolute or relative.
1523
:return: the real path *to* the file
1525
parent, base = os.path.split(path)
1526
# The pathjoin for '.' is a workaround for Python bug #1213894.
1527
# (initial path components aren't dereferenced)
1528
return pathjoin(realpath(pathjoin('.', parent)), base)
1531
def supports_mapi():
1532
"""Return True if we can use MAPI to launch a mail client."""
1533
return sys.platform == "win32"
1536
def resource_string(package, resource_name):
1537
"""Load a resource from a package and return it as a string.
1539
Note: Only packages that start with bzrlib are currently supported.
1541
This is designed to be a lightweight implementation of resource
1542
loading in a way which is API compatible with the same API from
1544
http://peak.telecommunity.com/DevCenter/PkgResources#basic-resource-access.
1545
If and when pkg_resources becomes a standard library, this routine
1548
# Check package name is within bzrlib
1549
if package == "bzrlib":
1550
resource_relpath = resource_name
1551
elif package.startswith("bzrlib."):
1552
package = package[len("bzrlib."):].replace('.', os.sep)
1553
resource_relpath = pathjoin(package, resource_name)
1555
raise errors.BzrError('resource package %s not in bzrlib' % package)
1557
# Map the resource to a file and read its contents
1558
base = dirname(bzrlib.__file__)
1559
if getattr(sys, 'frozen', None): # bzr.exe
1560
base = abspath(pathjoin(base, '..', '..'))
1561
filename = pathjoin(base, resource_relpath)
1562
return open(filename, 'rU').read()
1566
from bzrlib._readdir_pyx import read_dir as _read_dir
1568
from bzrlib._readdir_py import read_dir as _read_dir