~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/blackbox/test_non_ascii.py

Committer: John Arbash Meinel
Date: 2006-01-13 06:04:00 UTC
mto: (1685.1.1 bzr-encoding)
mto: This revision was merged to the branch mainline in revision 1752.
Revision ID: john@arbash-meinel.com-20060113060400-e24615f222244238

Hooked up EncodingAdapter, and updated test_non_ascii.

files modified:
bzrlib/tests/EncodingAdapter.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_non_ascii.py

Show diffs side-by-side

added added

removed removed

bzrlib/tests/blackbox/test_non_ascii.py

from bzrlib.tests import TestCaseInTempDir, TestSkipped

from bzrlib.trace import mutter, note

class TestNonAscii(TestCaseInTempDir):

"""Test that bzr handles files/committers/etc which are non-ascii."""

# This will be the tested encoding

_encoding = 'utf-8'

def setUp(self):

super(TestNonAscii, self).setUp()

self._orig_email = os.environ.get('BZREMAIL', None)

email = _erik + u' <joe@foo.com>'

try:

os.environ['BZREMAIL'] = email.encode(bzrlib.user_encoding)

except UnicodeEncodeError:

note('Unable to test unicode in BZREMAIL')

# Do the rest of the tests, just don't expect

# _erik to exist in the email

os.environ['BZREMAIL'] = 'Erik Bagfors <joe@foo.com>'

self.email_name = 'Erik Bagfors'

self._orig_encoding = bzrlib.user_encoding

bzrlib.user_encoding = self.encoding

email = self.info['committer'] + ' <joe@foo.com>'

os.environ['BZREMAIL'] = email.encode(bzrlib.user_encoding)

self.create_base()

def tearDown(self):

if self._orig_email is not None:

os.environ['BZREMAIL'] = self._orig_email

else:

self.email_name = _erik

if os.environ.get('BZREMAIL', None) is not None:

del os.environ['BZREMAIL']

bzrlib.user_encoding = self._orig_encoding

super(TestNonAscii, self).tearDown()

def create_base(self):

bzr = self.run_bzr

bzr('init')

open('a', 'wb').write('foo\n')

bzr('add', 'a')

bzr('commit', '-m', 'adding a')

open('b', 'wb').write(_shrimp_sandwich.encode('utf-8') + '\n')

open('b', 'wb').write('non-ascii \xFF\xFF\xFC\xFB\x00 in b\n')

bzr('add', 'b')

bzr('commit', '-m', u'Creating a ' + _shrimp_sandwich)

fname = _juju + '.txt'

bzr('commit', '-m', self.info['message'])

fname = self.info['filename']

try:

open(fname, 'wb').write('unicode filename\n')

except UnicodeEncodeError:

note('Unable to create an arabic filename')

fname = _juju_alt + '.txt'

try:

open(fname, 'wb').write('unicode filename\n')

except UnicodeEncodeError:

raise TestSkipped("can't create an arabic or european filename"

" in filesystem encoding %s" % sys.getfilesystemencoding())

else:

self.juju = _juju_alt

else:

self.juju = _juju

raise TestSkipped(('Unable to represent filename %r'

' in filesystem encoding %s')

% (fname, sys.getfilesystemencoding()))

bzr('add', fname)

bzr('commit', '-m', u'And an unicode file\n')

def tearDown(self):

if self._orig_email is not None:

os.environ['BZREMAIL'] = self._orig_email

else:

if os.environ.get('BZREMAIL', None) is not None:

del os.environ['BZREMAIL']

super(TestNonAscii, self).tearDown()

def try_character_set(self, charset, filename):

"""Try to create a file in a given character set."""

try:

open(filename, 'wb').write('adding %s\n' % (charset,))

except UnicodeEncodeError:

raise TestSkipped('Cannot create %s filename.' % (charset,))

bzr = self.run_bzr_decode

bzr('add', filename)

txt = bzr('added')

self.assertEqual(filename+'\n', txt)

bzr('commit', '-m', u'adding ' + filename)

txt = bzr('log')

100

self.assertNotEqual(-1, txt.find(filename))

101

102

def test_russian(self):

103

self.try_character_set('russian', _alexander)

104

105

def test_kanji(self):

106

self.try_character_set('kanji', _nihonjin)

107

108

def test_swedish(self):

109

self.try_character_set('swedish', _shrimp_sandwich)

110

111

def test_status(self):

112

bzr = self.run_bzr_decode

113

114

open(self.juju + '.txt', 'ab').write('added something\n')

open(self.info['filename'], 'ab').write('added something\n')

115

txt = bzr('status')

116

self.assertEqual(u'modified:\n \u062c\u0648\u062c\u0648.txt\n' , txt)

self.assertEqual(u'modified:\n %s\n' % (self.info['filename'],), txt)

117

118

def test_cat(self):

119

# bzr cat shouldn't change the contents

120

# using run_bzr since that doesn't decode

121

txt = self.run_bzr('cat', 'b')[0]

122

self.assertEqual(_shrimp_sandwich.encode('utf-8') + '\n', txt)

self.assertEqual('non-ascii \xFF\xFF\xFC\xFB\x00 in b\n', txt)

123

124

txt = self.run_bzr('cat', self.juju + '.txt')[0]

txt = self.run_bzr('cat', self.info['filename'])[0]

125

self.assertEqual('unicode filename\n', txt)

126

127

def test_cat_revision(self):

128

bzr = self.run_bzr_decode

129

130

txt = bzr('cat-revision', '-r', '1')

131

self.assertNotEqual(-1, txt.find(self.email_name))

self.assertNotEqual(-1, txt.find(self.info['committer']))

132

133

txt = bzr('cat-revision', '-r', '2')

134

self.assertNotEqual(-1, txt.find(_shrimp_sandwich))

self.assertNotEqual(-1, txt.find(self.info['message']))

135

136

def test_mkdir(self):

137

100

bzr = self.run_bzr_decode

138

101

139

txt = bzr('mkdir', _shrimp_sandwich)

140

self.assertEqual('added ' + _shrimp_sandwich + '\n', txt)

102

txt = bzr('mkdir', self.info['directory'])

103

self.assertEqual(u'added %s\n' % self.info['directory'], txt)

104

105

# The text should be garbled, but the command should succeed

106

txt = bzr('mkdir', self.info['directory'] + '2', encoding='ascii')

107

expected = u'added %s2\n' % (self.info['directory'],)

108

expected = expected.encode('ascii', 'replace')

109

self.assertEqual(expected, txt)

141

110

142

111

def test_relpath(self):

143

112

bzr = self.run_bzr_decode

144

113

145

txt = bzr('relpath', _shrimp_sandwich)

146

self.assertEqual(_shrimp_sandwich + '\n', txt)

114

txt = bzr('relpath', self.info['filename'])

115

self.assertEqual(self.info['filename'] + '\n', txt)

147

116

148

117

# TODO: jam 20060106 if relpath can return a munged string

149

118

# this text needs to be fixed

150

bzr('relpath', _shrimp_sandwich, encoding='ascii',

151

retcode=3)

119

bzr('relpath', self.info['filename'], encoding='ascii', retcode=3)

152

120

153

121

def test_inventory(self):

154

122

bzr = self.run_bzr_decode

155

123

156

124

txt = bzr('inventory')

157

self.assertEqual(['a', 'b', u'\u062c\u0648\u062c\u0648.txt'],

125

self.assertEqual(['a', 'b', self.info['filename']],

158

126

txt.splitlines())

159

127

160

128

# inventory should fail if unable to encode

170

138

bzr = self.run_bzr_decode

171

139

172

140

self.assertEqual('3\n', bzr('revno'))

141

self.assertEqual('3\n', bzr('revno', encoding='ascii'))

173

142

174

143

def test_revision_info(self):

175

144

bzr = self.run_bzr_decode

182

151

def test_mv(self):

183

152

bzr = self.run_bzr_decode

184

153

185

fname1 = self.juju + '.txt'

186

fname2 = self.juju + '2.txt'

154

fname1 = self.info['filename']

155

fname2 = self.info['filename'] + '2'

156

dirname = self.info['directory']

187

157

188

158

bzr('mv', 'a', fname1, retcode=3)

189

159

190

160

txt = bzr('mv', 'a', fname2)

191

self.assertEqual(u'a => ' + fname2 + '\n', txt)

161

self.assertEqual(u'a => %s\n' % fname2, txt)

192

162

self.failIfExists('a')

193

163

self.failUnlessExists(fname2)

194

164

195

165

bzr('commit', '-m', 'renamed to non-ascii')

196

166

197

bzr('mkdir', _shrimp_sandwich)

198

txt = bzr('mv', fname1, fname2, _shrimp_sandwich)

199

self.assertEqual([fname1 + ' => ' + _shrimp_sandwich + '/' + fname1,

200

fname2 + ' => ' + _shrimp_sandwich + '/' + fname2]

167

bzr('mkdir', dirname)

168

txt = bzr('mv', fname1, fname2, dirname)

169

self.assertEqual([u'%s => %s/%s' % (fname1, dirname, fname1),

170

u'%s => %s/%s' % (fname2, dirname, fname2)]

201

171

, txt.splitlines())

202

172

203

173

# The rename should still succeed

204

txt = bzr('mv', _shrimp_sandwich + '/' + fname2, 'a',

205

encoding='ascii')

174

newpath = u'%s/%s' % (dirname, fname2)

175

txt = bzr('mv', newpath, 'a', encoding='ascii')

206

176

self.failUnlessExists('a')

207

self.assertEqual('r?ksm?rg?s/????2.txt => a\n', txt)

177

self.assertEqual(newpath.encode('ascii', 'replace'), txt)

208

178

209

179

def test_branch(self):

210

180

# We should be able to branch into a directory that

211

181

# has a unicode name, even if we can't display the name

212

182

bzr = self.run_bzr_decode

213

183

214

bzr('branch', u'.', _shrimp_sandwich)

184

bzr('branch', u'.', self.info['directory'])

215

185

216

bzr('branch', u'.', _shrimp_sandwich + '2', encoding='ascii')

186

bzr('branch', u'.', self.info['directory'] + '2', encoding='ascii')

217

187

218

188

def test_pull(self):

219

189

# Make sure we can pull from paths that can't be encoded

220

190

bzr = self.run_bzr_decode

221

191

222

bzr('branch', '.', _shrimp_sandwich)

223

bzr('branch', _shrimp_sandwich, _shrimp_sandwich + '2')

192

dirname1 = self.info['directory']

193

dirname2 = self.info['directory'] + '2'

194

bzr('branch', '.', dirname1)

195

bzr('branch', dirname1, dirname2)

224

196

225

os.chdir(_shrimp_sandwich)

197

os.chdir(dirname1)

226

198

open('a', 'ab').write('more text\n')

227

199

bzr('commit', '-m', 'mod a')

228

200

229

201

pwd = os.getcwdu()

230

202

231

os.chdir('../' + _shrimp_sandwich + '2')

203

os.chdir(u'../' + dirname2)

232

204

txt = bzr('pull')

233

205

234

206

self.assertEqual(u'Using saved location: %s\n' % (pwd,), txt)

235

207

236

os.chdir('../' + _shrimp_sandwich)

208

os.chdir('../' + dirname1)

237

209

open('a', 'ab').write('and yet more\n')

238

# here we cheat. If self.erik is not _erik, then technically

239

# we would not be able to supply the argument, since sys.argv

240

# could not be decoded to those characters.

241

# but self.run_bzr takes the decoded string directly

242

bzr('commit', '-m', 'modifying a by ' + _erik)

210

bzr('commit', '-m', 'modifying a by ' + self.info['committer'])

243

211

244

os.chdir('../' + _shrimp_sandwich + '2')

212

os.chdir('../' + dirname2)

245

213

# We should be able to pull, even if our encoding is bad

246

214

bzr('pull', '--verbose', encoding='ascii')

247

215

250

218

# Make sure we can pull from paths that can't be encoded

251

219

bzr = self.run_bzr_decode

252

220

253

# ConfigObj has to be modified to make it allow unicode

254

# strings. It seems to have the functionality, but doesn't

255

# like to use it.

256

bzr('push', _shrimp_sandwich)

221

dirname = self.info['directory']

222

bzr('push', dirname)

257

223

258

224

open('a', 'ab').write('adding more text\n')

259

225

bzr('commit', '-m', 'added some stuff')

262

228

263

229

f = open('a', 'ab')

264

230

f.write('and a bit more: ')

265

f.write(_shrimp_sandwich.encode('utf-8'))

231

f.write(dirname.encode('utf-8'))

266

232

f.write('\n')

267

233

f.close()

268

bzr('commit', '-m', u'Added some ' + _shrimp_sandwich)

234

235

bzr('commit', '-m', u'Added some ' + dirname)

269

236

bzr('push', '--verbose', encoding='ascii')

270

237

271

bzr('push', '--verbose', _shrimp_sandwich + '2')

238

bzr('push', '--verbose', dirname + '2')

272

239

273

bzr('push', '--verbose', _shrimp_sandwich + '3',

274

encoding='ascii')

240

bzr('push', '--verbose', dirname + '3', encoding='ascii')

275

241

276

242

def test_renames(self):

277

243

bzr = self.run_bzr_decode

278

244

279

fname = self.juju + '2.txt'

245

fname = self.info['filename'] + '2'

280

246

bzr('mv', 'a', fname)

281

247

txt = bzr('renames')

282

self.assertEqual('a => ' + fname + '\n', txt)

248

self.assertEqual(u'a => %s\n' % fname, txt)

283

249

284

250

bzr('renames', retcode=3, encoding='ascii')

285

251

286

252

def test_remove(self):

287

253

bzr = self.run_bzr_decode

288

254

289

fname = self.juju + '.txt'

255

fname = self.info['filename']

290

256

txt = bzr('remove', fname, encoding='ascii')

291

257

292

258

def test_remove_verbose(self):

293

259

bzr = self.run_bzr_decode

294

260

295

261

raise TestSkipped('bzr remove --verbose uses tree.remove, which calls print directly.')

296

fname = self.juju + '.txt'

262

fname = self.info['filename']

297

263

txt = bzr('remove', '--verbose', fname, encoding='ascii')

298

264

299

265

def test_file_id(self):

300

266

bzr = self.run_bzr_decode

301

267

302

fname = self.juju + '.txt'

268

fname = self.info['filename']

303

269

txt = bzr('file-id', fname)

304

270

305

271

# TODO: jam 20060106 We don't support non-ascii file ids yet,

311

277

bzr = self.run_bzr_decode

312

278

313

279

# Create a directory structure

314

fname = self.juju + '.txt'

280

fname = self.info['filename']

315

281

bzr('mkdir', 'base')

316

bzr('mkdir', 'base/' + _shrimp_sandwich)

317

path = '/'.join(['base', _shrimp_sandwich, fname])

282

bzr('mkdir', 'base/' + self.info['dirname'])

283

path = '/'.join(['base', self.info['dirname'], fname])

318

284

bzr('mv', fname, path)

319

285

bzr('commit', '-m', 'moving things around')

320

286

348

314

def test_deleted(self):

349

315

bzr = self.run_bzr_decode

350

316

351

fname = self.juju + '.txt'

317

fname = self.info['filename']

352

318

os.remove(fname)

353

319

bzr('rm', fname)

354

320

366

332

def test_modified(self):

367

333

bzr = self.run_bzr_decode

368

334

369

fname = self.juju + '.txt'

335

fname = self.info['filename']

370

336

open(fname, 'ab').write('modified\n')

371

337

372

338

txt = bzr('modified')

377

343

def test_added(self):

378

344

bzr = self.run_bzr_decode

379

345

380

fname = self.juju + '2.txt'

346

fname = self.info['filename'] + '2'

381

347

open(fname, 'wb').write('added\n')

382

348

bzr('add', fname)

383

349

389

355

def test_root(self):

390

356

bzr = self.run_bzr_decode

391

357

358

dirname = self.info['directory']

392

359

bzr('root')

393

360

394

bzr('branch', u'.', _shrimp_sandwich)

361

bzr('branch', u'.', dirname)

395

362

396

os.chdir(_shrimp_sandwich)

363

os.chdir(dirname)

397

364

398

365

txt = bzr('root')

399

self.failUnless(txt.endswith(_shrimp_sandwich+'\n'))

366

self.failUnless(txt.endswith(dirname+'\n'))

400

367

401

368

txt = bzr('root', encoding='ascii', retcode=3)

402

369

403

370

def test_log(self):

404

371

bzr = self.run_bzr_decode

405

372

373

fname = self.info['filename']

374

406

375

txt = bzr('log')

407

self.assertNotEqual(-1, txt.find(self.email_name))

408

self.assertNotEqual(-1, txt.find(_shrimp_sandwich))

376

self.assertNotEqual(-1, txt.find(self.info['committer']))

377

self.assertNotEqual(-1, txt.find(self.info['message']))

409

378

410

379

txt = bzr('log', '--verbose')

411

self.assertNotEqual(-1, txt.find(self.juju))

380

self.assertNotEqual(-1, txt.find(fname))

412

381

413

382

# Make sure log doesn't fail even if we can't write out

414

383

txt = bzr('log', '--verbose', encoding='ascii')

415

self.assertEqual(-1, txt.find(self.juju))

416

self.assertNotEqual(-1, txt.find(self.juju.encode('ascii', 'replace')))

384

self.assertEqual(-1, txt.find(fname))

385

self.assertNotEqual(-1, txt.find(fname.encode('ascii', 'replace')))

417

386

418

387

def test_touching_revisions(self):

419

388

bzr = self.run_bzr_decode

420

389

421

fname = self.juju + '.txt'

390

fname = self.info['filename']

422

391

txt = bzr('touching-revisions', fname)

423

392

self.assertEqual(u' 3 added %s\n' % (fname,), txt)

424

393

425

fname_new = _shrimp_sandwich + '.txt'

426

bzr('mv', fname, fname_new)

427

bzr('commit', '-m', u'Renamed %s => %s' % (fname, fname_new))

394

fname2 = self.info['filename'] + '2'

395

bzr('mv', fname, fname2)

396

bzr('commit', '-m', u'Renamed %s => %s' % (fname, fname2))

428

397

429

txt = bzr('touching-revisions', fname_new)

398

txt = bzr('touching-revisions', fname2)

430

399

expected_txt = (u' 3 added %s\n'

431

400

u' 4 renamed %s => %s\n'

432

% (fname, fname, fname_new))

401

% (fname, fname, fname2))

433

402

self.assertEqual(expected_txt, txt)

434

403

435

txt = bzr('touching-revisions', fname_new, encoding='ascii')

404

txt = bzr('touching-revisions', fname2, encoding='ascii')

436

405

expected_ascii = expected_txt.encode('ascii', 'replace')

437

406

self.assertEqual(expected_ascii, txt)

438

407

440

409

bzr = self.run_bzr_decode

441

410

442

411

txt = bzr('ls')

443

self.assertEqual(['a', 'b', u'\u062c\u0648\u062c\u0648.txt'],

412

self.assertEqual(['a', 'b', self.info['filename']],

444

413

txt.splitlines())

445

414

txt = bzr('ls', '--null')

446

self.assertEqual(['a', 'b', u'\u062c\u0648\u062c\u0648.txt', ''],

415

self.assertEqual(['a', 'b', self.info['filename'], ''],

447

416

txt.split('\0'))

448

417

449

418

txt = bzr('ls', encoding='ascii', retcode=3)

452

421

def test_unknowns(self):

453

422

bzr = self.run_bzr_decode

454

423

455

fname = self.juju + '2.txt'

424

fname = self.info['filename'] + '2'

456

425

open(fname, 'wb').write('unknown\n')

457

426

427

# TODO: jam 20060112 bzr unknowns is the only one which

428

# quotes paths do we really want it to?

458

429

txt = bzr('unknowns')

459

430

self.assertEqual(u'"%s"\n' % (fname,), txt)

460

431

463

434

def test_ignore(self):

464

435

bzr = self.run_bzr_decode

465

436

466

fname2 = self.juju + '2.txt'

437

fname2 = self.info['filename'] + '2.txt'

467

438

open(fname2, 'wb').write('ignored\n')

468

439

469

440

txt = bzr('unknowns')

476

447

# This is the incorrect output

477

448

self.assertEqual(u'"%s"\n' % (fname2,), txt)

478

449

479

fname3 = self.juju + '3.txt'

450

fname3 = self.info['filename'] + '3.txt'

480

451

open(fname3, 'wb').write('unknown 3\n')

481

452

txt = bzr('unknowns')

482

453

# TODO: jam 20060107 This is the correct output

483

454

# self.assertEqual(u'"%s"\n' % (fname3,), txt)

484

455

# This is the incorrect output

485

self.assertEqual(u'"%s"\n"%s"\n' % (fname2,fname3,), txt)

456

self.assertEqual(u'"%s"\n"%s"\n' % (fname2, fname3,), txt)

486

457

487

458

# Ignore should not care what the encoding is

488

459

# (right now it doesn't print anything)

494

465

self.assertEqual(u'"%s"\n"%s"\n' % (fname2, fname3), txt)

495

466

496

467

# Now try a wildcard match

497

fname4 = self.juju + '4.txt'

468

fname4 = self.info['filename'] + '4.txt'

469

open(fname4, 'wb').write('unknown 4\n')

498

470

bzr('ignore', '*.txt')

499

471

txt = bzr('unknowns')

500

472

self.assertEqual('', txt)

501

473

502

474

os.remove('.bzrignore')

503

bzr('ignore', self.juju + '*')

475

bzr('ignore', self.info['filename'] + '*')

504

476

txt = bzr('unknowns')

505

477

# TODO: jam 20060107 This is the correct output

506

478

# self.assertEqual('', txt)

507

479

# This is the incorrect output

508

self.assertEqual(u'"%s"\n"%s"\n' % (fname2, fname3), txt)

480

self.assertEqual(u'"%s"\n"%s"\n"%s"\n' % (fname2, fname3, fname4), txt)

509

481

510

482

# TODO: jam 20060107 The best error we have right now is TestSkipped

511

483

# to indicate that this test is known to fail

Older »