~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/transport/http/__init__.py

  • Committer: v.ladeuil+lp at free
  • Date: 2006-10-06 17:07:58 UTC
  • mto: (2145.1.1 keepalive)
  • mto: This revision was merged to the branch mainline in revision 2146.
  • Revision ID: v.ladeuil+lp@free.fr-20061006170758-dcfe0c203afa330c
Implements Range header handling for GET requests. Fix a test.

* bzrlib/transport/http/__init__.py:
(TestingHTTPRequestHandler.parse_ranges,
TestingHTTPRequestHandler.send_range_content,
TestingHTTPRequestHandler.get_single_range,
TestingHTTPRequestHandler.get_multiple_ranges,
TestingHTTPRequestHandler.do_GET): New methods. Implements the Range
header handling for GET request.

* bzrlib/tests/test_transport_implementations.py:
(TransportTests.test_readv): Add a test with a single range.

* bzrlib/tests/test_fetch.py:
(TestHttpFetch._count_log_matches): GET can succeeds with a 206 code.
(TestHttpFetch.test_weaves_are_retrieved_once): log_pattern was not
used here.

Show diffs side-by-side

added added

removed removed

Lines of Context:
24
24
import mimetools
25
25
import os
26
26
import posixpath
 
27
import random
27
28
import re
28
29
import sys
29
30
import urlparse
31
32
from warnings import warn
32
33
 
33
34
# TODO: load these only when running http tests
34
 
import BaseHTTPServer, SimpleHTTPServer, socket, time
 
35
import BaseHTTPServer
 
36
from SimpleHTTPServer import SimpleHTTPRequestHandler
 
37
import socket
35
38
import threading
 
39
import time
36
40
 
37
41
from bzrlib import errors
38
42
from bzrlib.errors import (TransportNotPossible, NoSuchFile,
418
422
        return 'path %s is not in %s' % self.args
419
423
 
420
424
 
421
 
class TestingHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
 
425
class TestingHTTPRequestHandler(SimpleHTTPRequestHandler):
422
426
 
423
427
    def log_message(self, format, *args):
424
428
        self.server.test_case.log('webserver - %s - - [%s] %s "%s" "%s"',
462
466
        method = getattr(self, mname)
463
467
        method()
464
468
 
 
469
    _range_regexp = re.compile(r'^(?P<start>\d+)-(?P<end>\d+)$')
 
470
    _tail_regexp = re.compile(r'^-(?P<tail>\d+)$')
 
471
 
 
472
    def parse_ranges(self, ranges_header):
 
473
        """Parse the range header value and returns ranges and tail"""
 
474
        tail = 0
 
475
        ranges = []
 
476
        assert ranges_header.startswith('bytes=')
 
477
        ranges_header = ranges_header[len('bytes='):]
 
478
        for range_str in ranges_header.split(','):
 
479
            range_match = self._range_regexp.match(range_str)
 
480
            if range_match is not None:
 
481
                ranges.append((int(range_match.group('start')),
 
482
                               int(range_match.group('end'))))
 
483
            else:
 
484
                tail_match = self._tail_regexp.match(range_str)
 
485
                if tail_match is not None:
 
486
                    tail = int(tail_match.group('tail'))
 
487
        return tail, ranges
 
488
 
 
489
    def send_range_content(self, file, start, length):
 
490
        file.seek(start)
 
491
        self.wfile.write(file.read(length))
 
492
 
 
493
    def get_single_range(self, file, file_size, start, end):
 
494
        self.send_response(206)
 
495
        length = end - start + 1
 
496
        self.send_header('Accept-Ranges', 'bytes')
 
497
        self.send_header("Content-Length", "%d" % length)
 
498
 
 
499
        self.send_header("Content-type", 'application/octet-stream')
 
500
        self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
 
501
                                                              end,
 
502
                                                              file_size))
 
503
        self.end_headers()
 
504
        self.send_range_content(file, start, length)
 
505
 
 
506
    def get_multiple_ranges(self, file, file_size, ranges):
 
507
        self.send_response(206)
 
508
        self.send_header('Accept-Ranges', 'bytes')
 
509
        boundary = "%d" % random.randint(0,0x7FFFFFFF)
 
510
        self.send_header("Content-Type",
 
511
                         "multipart/byteranges; boundary=%s" % boundary)
 
512
        self.end_headers()
 
513
        for (start, end) in ranges:
 
514
            self.wfile.write("--%s\r\n" % boundary)
 
515
            self.send_header("Content-type", 'application/octet-stream')
 
516
            self.send_header("Content-Range", "bytes %d-%d/%d" % (start,
 
517
                                                                  end,
 
518
                                                                  file_size))
 
519
            self.end_headers()
 
520
            self.send_range_content(file, start, end - start + 1)
 
521
            self.wfile.write("--%s\r\n" % boundary)
 
522
            pass
 
523
 
 
524
    def do_GET(self):
 
525
        """Serve a GET request.
 
526
 
 
527
        Handles the Range header.
 
528
        """
 
529
 
 
530
        path = self.translate_path(self.path)
 
531
        ranges_header_value = self.headers.get('Range')
 
532
        if ranges_header_value is None or os.path.isdir(path):
 
533
            # Let the mother class handle most cases
 
534
            return SimpleHTTPRequestHandler.do_GET(self)
 
535
 
 
536
        try:
 
537
            # Always read in binary mode. Opening files in text
 
538
            # mode may cause newline translations, making the
 
539
            # actual size of the content transmitted *less* than
 
540
            # the content-length!
 
541
            file = open(path, 'rb')
 
542
        except IOError:
 
543
            self.send_error(404, "File not found")
 
544
            return None
 
545
 
 
546
        file_size = os.fstat(file.fileno())[6]
 
547
        tail, ranges = self.parse_ranges(ranges_header_value)
 
548
        # Normalize tail into ranges
 
549
        if tail != 0:
 
550
            ranges.append((file_size - tail, file_size))
 
551
 
 
552
        ranges_valid = True
 
553
        if len(ranges) == 0:
 
554
            ranges_valid = False
 
555
        else:
 
556
            for (start, end) in ranges:
 
557
                if start >= file_size or end >= file_size:
 
558
                    ranges_valid = False
 
559
                    break
 
560
        if not ranges_valid:
 
561
            # RFC2616 14-16 says that invalid Range headers
 
562
            # should be ignored and in that case, the whole file
 
563
            # should be returned as if no Range header was
 
564
            # present
 
565
            file.close() # Will be reopened by the following call
 
566
            return SimpleHTTPRequestHandler.do_GET(self)
 
567
 
 
568
        if len(ranges) == 1:
 
569
            (start, end) = ranges[0]
 
570
            self.get_single_range(file, file_size, start, end)
 
571
        else:
 
572
            self.get_multiple_ranges(file, file_size, ranges)
 
573
        file.close()
 
574
 
465
575
    if sys.platform == 'win32':
466
576
        # On win32 you cannot access non-ascii filenames without
467
577
        # decoding them into unicode first.
580
690
        # etc
581
691
        return 'http://127.0.0.1:1/'
582
692
 
 
693
 
583
694
class WallRequestHandler(TestingHTTPRequestHandler):
584
695
    """Whatever request comes in, close the connection"""
585
696