~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/tests/test__groupcompress.py

Committer: John Arbash Meinel
Date: 2011-05-12 11:38:39 UTC
mto: (5755.2.10 2.4-max-entries-gc-602614)
mto: This revision was merged to the branch mainline in revision 5870.
Revision ID: john@arbash-meinel.com-20110512113839-reasdnirl4889ptb

Add a max_entries_per_source to DeltaIndex

This changes the sampling rate in the create_delta_from_source.
This isn't exposed higher up yet, but it work so far.

files modified:
bzrlib/_groupcompress_pyx.pyx

bzrlib/delta.h

bzrlib/diff-delta.c

bzrlib/tests/test__groupcompress.py

Show diffs side-by-side

added added

removed removed

bzrlib/tests/test__groupcompress.py

264

di = self._gc_module.DeltaIndex('test text\n')

265

self.assertEqual('DeltaIndex(1, 10)', repr(di))

266

267

def test__dump_no_index(self):

268

di = self._gc_module.DeltaIndex()

269

self.assertEqual(None, di._dump_index())

270

271

def test__dump_index_simple(self):

272

di = self._gc_module.DeltaIndex()

273

di.add_source(_text1, 0)

274

self.assertFalse(di._has_index())

275

self.assertEqual(None, di._dump_index())

276

_ = di.make_delta(_text1)

277

self.assertTrue(di._has_index())

278

hash_list, entry_list = di._dump_index()

279

self.assertEqual(16, len(hash_list))

280

self.assertEqual(68, len(entry_list))

281

just_entries = [(idx, text_offset, hash_val)

282

for idx, (text_offset, hash_val)

283

in enumerate(entry_list)

284

if text_offset != 0 or hash_val != 0]

285

rabin_hash = self._gc_module.rabin_hash

286

self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),

287

(25, 48, rabin_hash(_text1[33:49])),

288

(34, 32, rabin_hash(_text1[17:33])),

289

(47, 64, rabin_hash(_text1[49:65])),

290

], just_entries)

291

# This ensures that the hash map points to the location we expect it to

292

for entry_idx, text_offset, hash_val in just_entries:

293

self.assertEqual(entry_idx, hash_list[hash_val & 0xf])

294

295

def test__dump_index_two_sources(self):

296

di = self._gc_module.DeltaIndex()

297

di.add_source(_text1, 0)

298

di.add_source(_text2, 2)

299

start2 = len(_text1) + 2

300

self.assertTrue(di._has_index())

301

hash_list, entry_list = di._dump_index()

302

self.assertEqual(16, len(hash_list))

303

self.assertEqual(68, len(entry_list))

304

just_entries = [(idx, text_offset, hash_val)

305

for idx, (text_offset, hash_val)

306

in enumerate(entry_list)

307

if text_offset != 0 or hash_val != 0]

308

rabin_hash = self._gc_module.rabin_hash

309

self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),

310

(9, start2+16, rabin_hash(_text2[1:17])),

311

(25, 48, rabin_hash(_text1[33:49])),

312

(30, start2+64, rabin_hash(_text2[49:65])),

313

(34, 32, rabin_hash(_text1[17:33])),

314

(35, start2+32, rabin_hash(_text2[17:33])),

315

(43, start2+48, rabin_hash(_text2[33:49])),

316

(47, 64, rabin_hash(_text1[49:65])),

317

], just_entries)

318

# Each entry should be in the appropriate hash bucket.

319

for entry_idx, text_offset, hash_val in just_entries:

320

hash_idx = hash_val & 0xf

321

self.assertTrue(

322

hash_list[hash_idx] <= entry_idx < hash_list[hash_idx+1])

323

267

324

def test_first_add_source_doesnt_index_until_make_delta(self):

268

325

di = self._gc_module.DeltaIndex()

269

326

self.assertFalse(di._has_index())

275

332

self.assertTrue(di._has_index())

276

333

self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)

277

334

335

def test_add_source_max_entries(self):

336

di = self._gc_module.DeltaIndex()

337

di._max_entries_per_source = 3

338

di.add_source(_text1, 0) # (77 bytes -1) // 3 = 25 byte stride

339

di.add_source(_text3, 3) # (135 bytes -1) // 3 = 44 byte stride

340

start2 = len(_text1) + 3

341

hash_list, entry_list = di._dump_index()

342

self.assertEqual(16, len(hash_list))

343

self.assertEqual(67, len(entry_list))

344

just_entries = sorted([(text_offset, hash_val)

345

for text_offset, hash_val in entry_list

346

if text_offset != 0 or hash_val != 0])

347

rabin_hash = self._gc_module.rabin_hash

348

self.assertEqual([(25, rabin_hash(_text1[10:26])),

349

(50, rabin_hash(_text1[35:51])),

350

(75, rabin_hash(_text1[60:76])),

351

(start2+44, rabin_hash(_text3[29:45])),

352

(start2+88, rabin_hash(_text3[73:89])),

353

(start2+132, rabin_hash(_text3[117:133])),

354

], just_entries)

355

278

356

def test_second_add_source_triggers_make_index(self):

279

357

di = self._gc_module.DeltaIndex()

280

358

self.assertFalse(di._has_index())

Older »