344
333
"""We expect to be able to atomically replace 'new' with old.
346
335
On win32, if new exists, it must be moved out of the way first,
350
339
fancy_rename(old, new, rename_func=os.rename, unlink_func=os.unlink)
351
340
except OSError, e:
352
341
if e.errno in (errno.EPERM, errno.EACCES, errno.EBUSY, errno.EINVAL):
353
# If we try to rename a non-existant file onto cwd, we get
354
# EPERM or EACCES instead of ENOENT, this will raise ENOENT
342
# If we try to rename a non-existant file onto cwd, we get
343
# EPERM or EACCES instead of ENOENT, this will raise ENOENT
355
344
# if the old path doesn't exist, sometimes we get EACCES
356
345
# On Linux, we seem to get EBUSY, on Mac we get EINVAL
532
def pumpfile(fromfile, tofile):
532
def pumpfile(from_file, to_file, read_length=-1, buff_size=32768,
533
report_activity=None, direction='read'):
533
534
"""Copy contents of one file to another.
536
The read_length can either be -1 to read to end-of-file (EOF) or
537
it can specify the maximum number of bytes to read.
539
The buff_size represents the maximum size for each read operation
540
performed on from_file.
542
:param report_activity: Call this as bytes are read, see
543
Transport._report_activity
544
:param direction: Will be passed to report_activity
535
546
:return: The number of bytes copied.
540
b = fromfile.read(BUFSIZE)
550
# read specified number of bytes
552
while read_length > 0:
553
num_bytes_to_read = min(read_length, buff_size)
555
block = from_file.read(num_bytes_to_read)
559
if report_activity is not None:
560
report_activity(len(block), direction)
563
actual_bytes_read = len(block)
564
read_length -= actual_bytes_read
565
length += actual_bytes_read
569
block = from_file.read(buff_size)
573
if report_activity is not None:
574
report_activity(len(block), direction)
580
def pump_string_file(bytes, file_handle, segment_size=None):
581
"""Write bytes to file_handle in many smaller writes.
583
:param bytes: The string to write.
584
:param file_handle: The file to write to.
586
# Write data in chunks rather than all at once, because very large
587
# writes fail on some platforms (e.g. Windows with SMB mounted
590
segment_size = 5242880 # 5MB
591
segments = range(len(bytes) / segment_size + 1)
592
write = file_handle.write
593
for segment_index in segments:
594
segment = buffer(bytes, segment_index * segment_size, segment_size)
548
598
def file_iterator(input_file, readsize=32768):
550
600
b = input_file.read(readsize)
628
697
:param timezone: How to display the time: 'utc', 'original' for the
629
698
timezone specified by offset, or 'local' for the process's current
631
:param show_offset: Whether to append the timezone.
632
:param date_fmt: strftime format.
700
:param date_fmt: strftime format.
701
:param show_offset: Whether to append the timezone.
703
(date_fmt, tt, offset_str) = \
704
_format_date(t, offset, timezone, date_fmt, show_offset)
705
date_fmt = date_fmt.replace('%a', weekdays[tt[6]])
706
date_str = time.strftime(date_fmt, tt)
707
return date_str + offset_str
709
def format_local_date(t, offset=0, timezone='original', date_fmt=None,
711
"""Return an unicode date string formatted according to the current locale.
713
:param t: Seconds since the epoch.
714
:param offset: Timezone offset in seconds east of utc.
715
:param timezone: How to display the time: 'utc', 'original' for the
716
timezone specified by offset, or 'local' for the process's current
718
:param date_fmt: strftime format.
719
:param show_offset: Whether to append the timezone.
721
(date_fmt, tt, offset_str) = \
722
_format_date(t, offset, timezone, date_fmt, show_offset)
723
date_str = time.strftime(date_fmt, tt)
724
if not isinstance(date_str, unicode):
725
date_str = date_str.decode(get_user_encoding(), 'replace')
726
return date_str + offset_str
728
def _format_date(t, offset, timezone, date_fmt, show_offset):
634
729
if timezone == 'utc':
635
730
tt = time.gmtime(t)
772
868
return pathjoin(*p)
871
def parent_directories(filename):
872
"""Return the list of parent directories, deepest first.
874
For example, parent_directories("a/b/c") -> ["a/b", "a"].
877
parts = splitpath(dirname(filename))
879
parents.append(joinpath(parts))
885
from bzrlib._chunks_to_lines_pyx import chunks_to_lines
887
from bzrlib._chunks_to_lines_py import chunks_to_lines
775
890
def split_lines(s):
776
891
"""Split s into lines, but without removing the newline characters."""
892
# Trivially convert a fulltext into a 'chunked' representation, and let
893
# chunks_to_lines do the heavy lifting.
894
if isinstance(s, str):
895
# chunks_to_lines only supports 8-bit strings
896
return chunks_to_lines([s])
898
return _split_lines(s)
902
"""Split s into lines, but without removing the newline characters.
904
This supports Unicode or plain string objects.
777
906
lines = s.split('\n')
778
907
result = [line + '\n' for line in lines[:-1]]
1040
def _cicp_canonical_relpath(base, path):
1041
"""Return the canonical path relative to base.
1043
Like relpath, but on case-insensitive-case-preserving file-systems, this
1044
will return the relpath as stored on the file-system rather than in the
1045
case specified in the input string, for all existing portions of the path.
1047
This will cause O(N) behaviour if called for every path in a tree; if you
1048
have a number of paths to convert, you should use canonical_relpaths().
1050
# TODO: it should be possible to optimize this for Windows by using the
1051
# win32 API FindFiles function to look for the specified name - but using
1052
# os.listdir() still gives us the correct, platform agnostic semantics in
1055
rel = relpath(base, path)
1056
# '.' will have been turned into ''
1060
abs_base = abspath(base)
1062
_listdir = os.listdir
1064
# use an explicit iterator so we can easily consume the rest on early exit.
1065
bit_iter = iter(rel.split('/'))
1066
for bit in bit_iter:
1068
for look in _listdir(current):
1069
if lbit == look.lower():
1070
current = pathjoin(current, look)
1073
# got to the end, nothing matched, so we just return the
1074
# non-existing bits as they were specified (the filename may be
1075
# the target of a move, for example).
1076
current = pathjoin(current, bit, *list(bit_iter))
1078
return current[len(abs_base)+1:]
1080
# XXX - TODO - we need better detection/integration of case-insensitive
1081
# file-systems; Linux often sees FAT32 devices (or NFS-mounted OSX
1082
# filesystems), for example, so could probably benefit from the same basic
1083
# support there. For now though, only Windows and OSX get that support, and
1084
# they get it for *all* file-systems!
1085
if sys.platform in ('win32', 'darwin'):
1086
canonical_relpath = _cicp_canonical_relpath
1088
canonical_relpath = relpath
1090
def canonical_relpaths(base, paths):
1091
"""Create an iterable to canonicalize a sequence of relative paths.
1093
The intent is for this implementation to use a cache, vastly speeding
1094
up multiple transformations in the same directory.
1096
# but for now, we haven't optimized...
1097
return [canonical_relpath(base, p) for p in paths]
892
1099
def safe_unicode(unicode_or_utf8_string):
893
1100
"""Coerce unicode_or_utf8_string into unicode.
895
1102
If it is unicode, it is returned.
896
Otherwise it is decoded from utf-8. If a decoding error
897
occurs, it is wrapped as a If the decoding fails, the exception is wrapped
898
as a BzrBadParameter exception.
1103
Otherwise it is decoded from utf-8. If decoding fails, the exception is
1104
wrapped in a BzrBadParameterNotUnicode exception.
900
1106
if isinstance(unicode_or_utf8_string, unicode):
901
1107
return unicode_or_utf8_string
1087
1293
raise errors.IllegalPath(path)
1296
_WIN32_ERROR_DIRECTORY = 267 # Similar to errno.ENOTDIR
1298
def _is_error_enotdir(e):
1299
"""Check if this exception represents ENOTDIR.
1301
Unfortunately, python is very inconsistent about the exception
1302
here. The cases are:
1303
1) Linux, Mac OSX all versions seem to set errno == ENOTDIR
1304
2) Windows, Python2.4, uses errno == ERROR_DIRECTORY (267)
1305
which is the windows error code.
1306
3) Windows, Python2.5 uses errno == EINVAL and
1307
winerror == ERROR_DIRECTORY
1309
:param e: An Exception object (expected to be OSError with an errno
1310
attribute, but we should be able to cope with anything)
1311
:return: True if this represents an ENOTDIR error. False otherwise.
1313
en = getattr(e, 'errno', None)
1314
if (en == errno.ENOTDIR
1315
or (sys.platform == 'win32'
1316
and (en == _WIN32_ERROR_DIRECTORY
1317
or (en == errno.EINVAL
1318
and getattr(e, 'winerror', None) == _WIN32_ERROR_DIRECTORY)
1090
1324
def walkdirs(top, prefix=""):
1091
1325
"""Yield data about all the directories in a tree.
1093
1327
This yields all the data about the contents of a directory at a time.
1094
1328
After each directory has been yielded, if the caller has mutated the list
1095
1329
to exclude some directories, they are then not descended into.
1097
1331
The data yielded is of the form:
1098
1332
((directory-relpath, directory-path-from-top),
1099
1333
[(relpath, basename, kind, lstat, path-from-top), ...]),
1100
1334
- directory-relpath is the relative path of the directory being returned
1101
1335
with respect to top. prefix is prepended to this.
1102
- directory-path-from-root is the path including top for this directory.
1336
- directory-path-from-root is the path including top for this directory.
1103
1337
It is suitable for use with os functions.
1104
1338
- relpath is the relative path within the subtree being walked.
1105
1339
- basename is the basename of the path
1107
1341
present within the tree - but it may be recorded as versioned. See
1108
1342
versioned_kind.
1109
1343
- lstat is the stat data *if* the file was statted.
1110
- planned, not implemented:
1344
- planned, not implemented:
1111
1345
path_from_tree_root is the path from the root of the tree.
1113
:param prefix: Prefix the relpaths that are yielded with 'prefix'. This
1347
:param prefix: Prefix the relpaths that are yielded with 'prefix'. This
1114
1348
allows one to walk a subtree but get paths that are relative to a tree
1115
1349
rooted higher up.
1116
1350
:return: an iterator over the dirs.
1118
1352
#TODO there is a bit of a smell where the results of the directory-
1119
# summary in this, and the path from the root, may not agree
1353
# summary in this, and the path from the root, may not agree
1120
1354
# depending on top and prefix - i.e. ./foo and foo as a pair leads to
1121
1355
# potentially confusing output. We should make this more robust - but
1122
1356
# not at a speed cost. RBC 20060731
1123
1357
_lstat = os.lstat
1124
1358
_directory = _directory_kind
1125
1359
_listdir = os.listdir
1126
_kind_from_mode = _formats.get
1360
_kind_from_mode = file_kind_from_stat_mode
1127
1361
pending = [(safe_unicode(prefix), "", _directory, None, safe_unicode(top))]
1129
1363
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1138
1372
append = dirblock.append
1139
for name in sorted(_listdir(top)):
1140
abspath = top_slash + name
1141
statvalue = _lstat(abspath)
1142
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1143
append((relprefix + name, name, kind, statvalue, abspath))
1374
names = sorted(_listdir(top))
1376
if not _is_error_enotdir(e):
1380
abspath = top_slash + name
1381
statvalue = _lstat(abspath)
1382
kind = _kind_from_mode(statvalue.st_mode)
1383
append((relprefix + name, name, kind, statvalue, abspath))
1144
1384
yield (relroot, top), dirblock
1146
1386
# push the user specified dirs from dirblock
1147
1387
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1390
class DirReader(object):
1391
"""An interface for reading directories."""
1393
def top_prefix_to_starting_dir(self, top, prefix=""):
1394
"""Converts top and prefix to a starting dir entry
1396
:param top: A utf8 path
1397
:param prefix: An optional utf8 path to prefix output relative paths
1399
:return: A tuple starting with prefix, and ending with the native
1402
raise NotImplementedError(self.top_prefix_to_starting_dir)
1404
def read_dir(self, prefix, top):
1405
"""Read a specific dir.
1407
:param prefix: A utf8 prefix to be preprended to the path basenames.
1408
:param top: A natively encoded path to read.
1409
:return: A list of the directories contents. Each item contains:
1410
(utf8_relpath, utf8_name, kind, lstatvalue, native_abspath)
1412
raise NotImplementedError(self.read_dir)
1415
_selected_dir_reader = None
1150
1418
def _walkdirs_utf8(top, prefix=""):
1151
1419
"""Yield data about all the directories in a tree.
1161
1429
path-from-top might be unicode or utf8, but it is the correct path to
1162
1430
pass to os functions to affect the file in question. (such as os.lstat)
1164
fs_encoding = _fs_enc.upper()
1165
if (sys.platform == 'win32' or
1166
fs_encoding not in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968')): # ascii
1167
return _walkdirs_unicode_to_utf8(top, prefix=prefix)
1169
return _walkdirs_fs_utf8(top, prefix=prefix)
1172
def _walkdirs_fs_utf8(top, prefix=""):
1173
"""See _walkdirs_utf8.
1175
This sub-function is called when we know the filesystem is already in utf8
1176
encoding. So we don't need to transcode filenames.
1179
_directory = _directory_kind
1180
_listdir = os.listdir
1181
_kind_from_mode = _formats.get
1432
global _selected_dir_reader
1433
if _selected_dir_reader is None:
1434
fs_encoding = _fs_enc.upper()
1435
if sys.platform == "win32" and win32utils.winver == 'Windows NT':
1436
# Win98 doesn't have unicode apis like FindFirstFileW
1437
# TODO: We possibly could support Win98 by falling back to the
1438
# original FindFirstFile, and using TCHAR instead of WCHAR,
1439
# but that gets a bit tricky, and requires custom compiling
1442
from bzrlib._walkdirs_win32 import Win32ReadDir
1443
_selected_dir_reader = Win32ReadDir()
1446
elif fs_encoding in ('UTF-8', 'US-ASCII', 'ANSI_X3.4-1968'):
1447
# ANSI_X3.4-1968 is a form of ASCII
1449
from bzrlib._readdir_pyx import UTF8DirReader
1450
_selected_dir_reader = UTF8DirReader()
1454
if _selected_dir_reader is None:
1455
# Fallback to the python version
1456
_selected_dir_reader = UnicodeDirReader()
1183
1458
# 0 - relpath, 1- basename, 2- kind, 3- stat, 4-toppath
1184
1459
# But we don't actually uses 1-3 in pending, so set them to None
1185
pending = [(safe_utf8(prefix), None, None, None, safe_utf8(top))]
1460
pending = [[_selected_dir_reader.top_prefix_to_starting_dir(top, prefix)]]
1461
read_dir = _selected_dir_reader.read_dir
1462
_directory = _directory_kind
1187
relroot, _, _, _, top = pending.pop()
1189
relprefix = relroot + '/'
1192
top_slash = top + '/'
1195
append = dirblock.append
1196
for name in sorted(_listdir(top)):
1197
abspath = top_slash + name
1198
statvalue = _lstat(abspath)
1199
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1200
append((relprefix + name, name, kind, statvalue, abspath))
1464
relroot, _, _, _, top = pending[-1].pop()
1467
dirblock = sorted(read_dir(relroot, top))
1201
1468
yield (relroot, top), dirblock
1203
1469
# push the user specified dirs from dirblock
1204
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1207
def _walkdirs_unicode_to_utf8(top, prefix=""):
1208
"""See _walkdirs_utf8
1210
Because Win32 has a Unicode api, all of the 'path-from-top' entries will be
1212
This is currently the fallback code path when the filesystem encoding is
1213
not UTF-8. It may be better to implement an alternative so that we can
1214
safely handle paths that are not properly decodable in the current
1217
_utf8_encode = codecs.getencoder('utf8')
1219
_directory = _directory_kind
1220
_listdir = os.listdir
1221
_kind_from_mode = _formats.get
1223
pending = [(safe_utf8(prefix), None, None, None, safe_unicode(top))]
1225
relroot, _, _, _, top = pending.pop()
1227
relprefix = relroot + '/'
1470
next = [d for d in reversed(dirblock) if d[2] == _directory]
1472
pending.append(next)
1475
class UnicodeDirReader(DirReader):
1476
"""A dir reader for non-utf8 file systems, which transcodes."""
1478
__slots__ = ['_utf8_encode']
1481
self._utf8_encode = codecs.getencoder('utf8')
1483
def top_prefix_to_starting_dir(self, top, prefix=""):
1484
"""See DirReader.top_prefix_to_starting_dir."""
1485
return (safe_utf8(prefix), None, None, None, safe_unicode(top))
1487
def read_dir(self, prefix, top):
1488
"""Read a single directory from a non-utf8 file system.
1490
top, and the abspath element in the output are unicode, all other paths
1491
are utf8. Local disk IO is done via unicode calls to listdir etc.
1493
This is currently the fallback code path when the filesystem encoding is
1494
not UTF-8. It may be better to implement an alternative so that we can
1495
safely handle paths that are not properly decodable in the current
1498
See DirReader.read_dir for details.
1500
_utf8_encode = self._utf8_encode
1502
_listdir = os.listdir
1503
_kind_from_mode = file_kind_from_stat_mode
1506
relprefix = prefix + '/'
1230
1509
top_slash = top + u'/'
1233
1512
append = dirblock.append
1234
1513
for name in sorted(_listdir(top)):
1235
name_utf8 = _utf8_encode(name)[0]
1515
name_utf8 = _utf8_encode(name)[0]
1516
except UnicodeDecodeError:
1517
raise errors.BadFilenameEncoding(
1518
_utf8_encode(relprefix)[0] + name, _fs_enc)
1236
1519
abspath = top_slash + name
1237
1520
statvalue = _lstat(abspath)
1238
kind = _kind_from_mode(statvalue.st_mode & 0170000, 'unknown')
1521
kind = _kind_from_mode(statvalue.st_mode)
1239
1522
append((relprefix + name_utf8, name_utf8, kind, statvalue, abspath))
1240
yield (relroot, top), dirblock
1242
# push the user specified dirs from dirblock
1243
pending.extend(d for d in reversed(dirblock) if d[2] == _directory)
1246
1526
def copy_tree(from_path, to_path, handlers={}):
1247
1527
"""Copy all of the entries in from_path into to_path.
1249
:param from_path: The base directory to copy.
1529
:param from_path: The base directory to copy.
1250
1530
:param to_path: The target directory. If it does not exist, it will
1252
1532
:param handlers: A dictionary of functions, which takes a source and
1378
1684
while len(b) < bytes:
1379
new = socket.recv(bytes - len(b))
1685
new = until_no_eintr(socket.recv, bytes - len(b))
1386
def send_all(socket, bytes):
1692
def send_all(socket, bytes, report_activity=None):
1387
1693
"""Send all bytes on a socket.
1389
1695
Regular socket.sendall() can give socket error 10053 on Windows. This
1390
1696
implementation sends no more than 64k at a time, which avoids this problem.
1698
:param report_activity: Call this as bytes are read, see
1699
Transport._report_activity
1392
1701
chunk_size = 2**16
1393
1702
for pos in xrange(0, len(bytes), chunk_size):
1394
socket.sendall(bytes[pos:pos+chunk_size])
1703
block = bytes[pos:pos+chunk_size]
1704
if report_activity is not None:
1705
report_activity(len(block), 'write')
1706
until_no_eintr(socket.sendall, block)
1397
1709
def dereference_path(path):
1440
1752
base = abspath(pathjoin(base, '..', '..'))
1441
1753
filename = pathjoin(base, resource_relpath)
1442
1754
return open(filename, 'rU').read()
1757
def file_kind_from_stat_mode_thunk(mode):
1758
global file_kind_from_stat_mode
1759
if file_kind_from_stat_mode is file_kind_from_stat_mode_thunk:
1761
from bzrlib._readdir_pyx import UTF8DirReader
1762
file_kind_from_stat_mode = UTF8DirReader().kind_from_mode
1764
from bzrlib._readdir_py import (
1765
_kind_from_mode as file_kind_from_stat_mode
1767
return file_kind_from_stat_mode(mode)
1768
file_kind_from_stat_mode = file_kind_from_stat_mode_thunk
1771
def file_kind(f, _lstat=os.lstat):
1773
return file_kind_from_stat_mode(_lstat(f).st_mode)
1775
if getattr(e, 'errno', None) in (errno.ENOENT, errno.ENOTDIR):
1776
raise errors.NoSuchFile(f)
1780
def until_no_eintr(f, *a, **kw):
1781
"""Run f(*a, **kw), retrying if an EINTR error occurs."""
1782
# Borrowed from Twisted's twisted.python.util.untilConcludes function.
1786
except (IOError, OSError), e:
1787
if e.errno == errno.EINTR:
1791
def re_compile_checked(re_string, flags=0, where=""):
1792
"""Return a compiled re, or raise a sensible error.
1794
This should only be used when compiling user-supplied REs.
1796
:param re_string: Text form of regular expression.
1797
:param flags: eg re.IGNORECASE
1798
:param where: Message explaining to the user the context where
1799
it occurred, eg 'log search filter'.
1801
# from https://bugs.launchpad.net/bzr/+bug/251352
1803
re_obj = re.compile(re_string, flags)
1808
where = ' in ' + where
1809
# despite the name 'error' is a type
1810
raise errors.BzrCommandError('Invalid regular expression%s: %r: %s'
1811
% (where, re_string, e))
1814
if sys.platform == "win32":
1817
return msvcrt.getch()
1822
fd = sys.stdin.fileno()
1823
settings = termios.tcgetattr(fd)
1826
ch = sys.stdin.read(1)
1828
termios.tcsetattr(fd, termios.TCSADRAIN, settings)
1832
if sys.platform == 'linux2':
1833
def _local_concurrency():
1835
prefix = 'processor'
1836
for line in file('/proc/cpuinfo', 'rb'):
1837
if line.startswith(prefix):
1838
concurrency = int(line[line.find(':')+1:]) + 1
1840
elif sys.platform == 'darwin':
1841
def _local_concurrency():
1842
return subprocess.Popen(['sysctl', '-n', 'hw.availcpu'],
1843
stdout=subprocess.PIPE).communicate()[0]
1844
elif sys.platform[0:7] == 'freebsd':
1845
def _local_concurrency():
1846
return subprocess.Popen(['sysctl', '-n', 'hw.ncpu'],
1847
stdout=subprocess.PIPE).communicate()[0]
1848
elif sys.platform == 'sunos5':
1849
def _local_concurrency():
1850
return subprocess.Popen(['psrinfo', '-p',],
1851
stdout=subprocess.PIPE).communicate()[0]
1852
elif sys.platform == "win32":
1853
def _local_concurrency():
1854
# This appears to return the number of cores.
1855
return os.environ.get('NUMBER_OF_PROCESSORS')
1857
def _local_concurrency():
1862
_cached_local_concurrency = None
1864
def local_concurrency(use_cache=True):
1865
"""Return how many processes can be run concurrently.
1867
Rely on platform specific implementations and default to 1 (one) if
1868
anything goes wrong.
1870
global _cached_local_concurrency
1871
if _cached_local_concurrency is not None and use_cache:
1872
return _cached_local_concurrency
1875
concurrency = _local_concurrency()
1876
except (OSError, IOError):
1879
concurrency = int(concurrency)
1880
except (TypeError, ValueError):
1883
_cached_concurrency = concurrency