~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2007-07-11 00:01:54 UTC
mto: This revision was merged to the branch mainline in revision 2643.
Revision ID: john@arbash-meinel.com-20070711000154-4et8yf8si3jgxmgc

(broken) Try to properly implement DirState._bisect*
Involves rewriting some helper functions.
Currently something is wrong.

files modified:
bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/dirstate.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_dirstate.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

199

200

"""

201

202

203

202

import bisect

204

203

import binascii

205

204

import errno

423

422

if self._id_index:

424

423

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

425

424

426

def _bisect(self, dir_name_list):

425

def _bisect(self, paths):

427

426

"""Bisect through the disk structure for specific rows.

428

427

429

:param dir_name_list: A list of (dir, name) pairs.

430

:return: A dict mapping (dir, name) => entry for found entries. Missing

428

:param paths: A list of paths to find

429

:return: A dict mapping path => entries for found entries. Missing

431

430

entries will not be in the map.

431

The list is not sorted, and entries will be populated

432

based on when they were read.

432

433

"""

433

434

self._requires_lock()

434

435

# We need the file pointer to be right after the initial header block

454

455

found = {}

455

456

457

# Avoid infinite seeking

457

max_count = 30*len(dir_name_list)

458

max_count = 30*len(paths)

458

459

count = 0

459

460

# pending is a list of places to look.

460

461

# each entry is a tuple of low, high, dir_names

462

463

# high -> the last byte offset (inclusive)

463

464

# dir_names -> The list of (dir, name) pairs that should be found in

464

465

# the [low, high] range

465

pending = [(low, high, dir_name_list)]

466

pending = [(low, high, paths)]

466

467

468

page_size = self._bisect_page_size

468

469

521

522

# Find what entries we are looking for, which occur before and

522

523

# after this first record.

523

524

after = start

524

first_dir_name = (first_fields[1], first_fields[2])

525

first_loc = bisect.bisect_left(cur_files, first_dir_name)

525

if first_fields[1]:

526

first_path = first_fields[1] + '/' + first_fields[2]

527

else:

528

first_path = first_fields[2]

529

first_loc = bisect_path_left(cur_files, first_path)

526

530

527

531

# These exist before the current location

528

532

pre = cur_files[:first_loc]

545

549

else:

546

550

after = mid + len(block)

547

551

548

last_dir_name = (last_fields[1], last_fields[2])

549

last_loc = bisect.bisect_right(post, last_dir_name)

552

if last_fields[1]:

553

last_path = last_fields[1] + '/' + last_fields[2]

554

else:

555

last_path = last_fields[2]

556

last_loc = bisect_path_right(post, last_path)

550

557

551

558

middle_files = post[:last_loc]

552

559

post = post[last_loc:]

557

564

# Either we will find them here, or we can mark them as

558

565

# missing.

559

566

560

if middle_files[0] == first_dir_name:

567

if middle_files[0] == first_path:

561

568

# We might need to go before this location

562

pre.append(first_dir_name)

563

if middle_files[-1] == last_dir_name:

564

post.insert(0, last_dir_name)

569

pre.append(first_path)

570

if middle_files[-1] == last_path:

571

post.insert(0, last_path)

565

572

566

573

# Find out what paths we have

567

paths = {first_dir_name:[first_fields]}

568

# last_dir_name might == first_dir_name so we need to be

574

paths = {first_path:[first_fields]}

575

# last_path might == first_path so we need to be

569

576

# careful if we should append rather than overwrite

570

577

if last_entry_num != first_entry_num:

571

paths.setdefault(last_dir_name, []).append(last_fields)

578

paths.setdefault(last_path, []).append(last_fields)

572

579

for num in xrange(first_entry_num+1, last_entry_num):

573

580

# TODO: jam 20070223 We are already splitting here, so

574

581

# shouldn't we just split the whole thing rather

575

582

# than doing the split again in add_one_record?

576

583

fields = entries[num].split('\0')

577

dir_name = (fields[1], fields[2])

578

paths.setdefault(dir_name, []).append(fields)

584

if fields[1]:

585

path = fields[1] + '/' + fields[2]

586

else:

587

path = fields[2]

588

paths.setdefault(path, []).append(fields)

579

589

580

for dir_name in middle_files:

581

for fields in paths.get(dir_name, []):

590

for path in middle_files:

591

for fields in paths.get(path, []):

582

592

# offset by 1 because of the opening '\0'

583

593

# consider changing fields_to_entry to avoid the

584

594

# extra list slice

585

595

entry = fields_to_entry(fields[1:])

586

found.setdefault(dir_name, []).append(entry)

596

found.setdefault(path, []).append(entry)

587

597

588

598

# Now we have split up everything into pre, middle, and post, and

589

599

# we have handled everything that fell in 'middle'.

705

715

# after this first record.

706

716

after = start

707

717

first_dir = first_fields[1]

708

first_loc = bisect.bisect_left(cur_dirs, first_dir)

718

first_loc = bisect_path_left(cur_dirs, first_dir)

709

719

710

720

# These exist before the current location

711

721

pre = cur_dirs[:first_loc]

729

739

after = mid + len(block)

730

740

731

741

last_dir = last_fields[1]

732

last_loc = bisect.bisect_right(post, last_dir)

742

last_loc = bisect_path_right(post, last_dir)

733

743

734

744

middle_files = post[:last_loc]

735

745

post = post[last_loc:]

779

789

780

790

return found

781

791

782

def _bisect_recursive(self, dir_name_list):

792

def _bisect_recursive(self, paths):

783

793

"""Bisect for entries for all paths and their children.

784

794

785

795

This will use bisect to find all records for the supplied paths. It

798

808

# Directories that have been read

799

809

processed_dirs = set()

800

810

# Get the ball rolling with the first bisect for all entries.

801

newly_found = self._bisect(dir_name_list)

811

newly_found = self._bisect(paths)

802

812

803

813

while newly_found:

804

814

# Directories that need to be read

2267

2277

from bzrlib._dirstate_helpers_c import (

2268

2278

_read_dirblocks_c as _read_dirblocks,

2269

2279

bisect_dirblock_c as bisect_dirblock,

2280

bisect_path_left_c as bisect_path_left,

2281

bisect_path_right_c as bisect_path_right,

2270

2282

cmp_by_dirs_c as cmp_by_dirs,

2271

2283

)

2272

2284

except ImportError:

2273

2285

from bzrlib._dirstate_helpers_py import (

2274

2286

_read_dirblocks_py as _read_dirblocks,

2275

2287

bisect_dirblock_py as bisect_dirblock,

2288

bisect_path_left_py as bisect_path_left,

2289

bisect_path_right_py as bisect_path_right,

2276

2290

cmp_by_dirs_py as cmp_by_dirs,

2277

2291

)

Older »