~abentley/bzrtools/bzrtools.dev

147.1.3 by Robert Collins
test and deliver basic pending-merges into bzr so that merging is recorded
1
#!/usr/bin/env python
2
3
# pythfilter.py v1.5.5, written by Matthias Baas (baas@ira.uka.de)
4
5
# Doxygen filter which can be used to document Python source code.
6
# Classes (incl. methods) and functions can be documented.
7
# Every comment that begins with ## is literally turned into an
8
# Doxygen comment. Consecutive comment lines are turned into
9
# comment blocks (-> /** ... */).
10
# All the stuff is put inside a namespace with the same name as
11
# the source file.
12
13
# Conversions:
14
# ============
15
# ##-blocks                  ->  /** ... */
16
# "class name(base): ..."    ->  "class name : public base {...}"
17
# "def name(params): ..."    ->  "name(params) {...}"
18
19
# Changelog:
20
# 21.01.2003: Raw (r"") or unicode (u"") doc string will now be properly
21
#             handled. (thanks to Richard Laager for the patch)
22
# 22.12.2003: Fixed a bug where no function names would be output for "def"
23
#             blocks that were not in a class.
24
#             (thanks to Richard Laager for the patch)
25
# 12.12.2003: Implemented code to handle static and class methods with
26
#             this logic: Methods with "self" as the first argument are
27
#             non-static. Methods with "cls" are Python class methods,
28
#             which translate into static methods for Doxygen. Other
29
#             methods are assumed to be static methods. As should be
30
#             obvious, this logic doesn't take into account if the method
31
#             is actually setup as a classmethod() or a staticmethod(),
32
#             just if it follows the normal conventions.
33
#             (thanks to Richard Laager for the patch)
34
# 11.12.2003: Corrected #includes to use os.path.sep instead of ".". Corrected
35
#             namespace code to use "::" instead of ".".
36
#             (thanks to Richard Laager for the patch)
37
# 11.12.2003: Methods beginning with two underscores that end with
38
#             something other than two underscores are considered private
39
#             and are handled accordingly.
40
#             (thanks to Richard Laager for the patch)
41
# 03.12.2003: The first parameter of class methods (self) is removed from
42
#             the documentation.
43
# 03.11.2003: The module docstring will be used as namespace documentation
44
#             (thanks to Joe Bronkema for the patch)
45
# 08.07.2003: Namespaces get a default documentation so that the namespace
46
#             and its contents will show up in the generated documentation.
47
# 05.02.2003: Directories will be delted during synchronization.
48
# 31.01.2003: -f option & filtering entire directory trees.
49
# 10.08.2002: In base classes the '.' will be replaced by '::'
50
# 18.07.2002: * and ** will be translated into arguments
51
# 18.07.2002: Argument lists may contain default values using constructors.
52
# 18.06.2002: Support for ## public:
53
# 21.01.2002: from ... import will be translated to "using namespace ...;"
54
#             TODO: "from ... import *" vs "from ... import names"
55
#             TODO: Using normal imports: name.name -> name::name
56
# 20.01.2002: #includes will be placed in front of the namespace
57
58
######################################################################
59
60
# The program is written as a state machine with the following states:
61
#
62
# - OUTSIDE               The current position is outside any comment,
63
#                         class definition or function.
64
#
65
# - BUILD_COMMENT         Begins with first "##".
66
#                         Ends with the first token that is no "##"
67
#                         at the same column as before.
68
#
69
# - BUILD_CLASS_DECL      Begins with "class".
70
#                         Ends with ":"
71
# - BUILD_CLASS_BODY      Begins just after BUILD_CLASS_DECL.
72
#                         The first following token (which is no comment)
73
#                         determines indentation depth.
74
#                         Ends with a token that has a smaller indendation.
75
#
76
# - BUILD_DEF_DECL        Begins with "def".
77
#                         Ends with ":".
78
# - BUILD_DEF_BODY        Begins just after BUILD_DEF_DECL.
79
#                         The first following token (which is no comment)
80
#                         determines indentation depth.
81
#                         Ends with a token that has a smaller indendation.
82
83
import getopt
84
import glob
85
import os.path
86
import shutil
87
import string
88
import sys
89
import token
90
import tokenize
91
92
from stat import *
93
94
OUTSIDE          = 0
95
BUILD_COMMENT    = 1
96
BUILD_CLASS_DECL = 2
97
BUILD_CLASS_BODY = 3
98
BUILD_DEF_DECL   = 4
99
BUILD_DEF_BODY   = 5
100
IMPORT           = 6
101
IMPORT_OP        = 7
102
IMPORT_APPEND    = 8
103
104
# Output file stream
105
outfile = sys.stdout
106
107
# Output buffer
108
outbuffer = []
109
110
out_row = 0
111
out_col = 0
112
113
# Variables used by rec_name_n_param()
114
name         = ""
115
param        = ""
116
doc_string   = ""
117
record_state = 0
118
bracket_counter = 0
119
120
# Tuple: (row,column)
121
class_spos  = (0,0)
122
def_spos    = (0,0)
123
import_spos = (0,0)
124
125
# Which import was used? ("import" or "from")
126
import_token = ""
127
128
# Comment block buffer
129
comment_block = []
130
comment_finished = 0
131
132
# Imported modules
133
modules = []
134
135
# Program state
136
stateStack = [OUTSIDE]
137
138
# Keep track of whether module has a docstring
139
module_has_docstring = False
140
141
# Keep track of member protection
142
protection_level = "public"
143
private_member = False
144
145
# Keep track of the module namespace
146
namespace = ""
147
148
######################################################################
149
# Output string s. '\n' may only be at the end of the string (not
150
# somewhere in the middle).
151
#
152
# In: s    - String
153
#     spos - Startpos
154
######################################################################
155
def output(s,spos, immediate=0):
156
    global outbuffer, out_row, out_col, outfile
157
158
    os = string.rjust(s,spos[1]-out_col+len(s))
159
    if immediate:
160
        outfile.write(os)
161
    else:
162
        outbuffer.append(os)
163
    if (s[-1:]=="\n"):
164
        out_row = out_row+1
165
        out_col = 0
166
    else:
167
        out_col = spos[1]+len(s)
168
169
170
######################################################################
171
# Records a name and parameters. The name is either a class name or
172
# a function name. Then the parameter is either the base class or
173
# the function parameters.
174
# The name is stored in the global variable "name", the parameters
175
# in "param".
176
# The variable "record_state" holds the current state of this internal
177
# state machine.
178
# The recording is started by calling start_recording().
179
#
180
# In: type, tok
181
######################################################################
182
def rec_name_n_param(type, tok):
183
    global record_state,name,param,doc_string,bracket_counter
184
    s = record_state
185
    # State 0: Do nothing.
186
    if   (s==0):
187
         return
188
    # State 1: Remember name.
189
    elif (s==1):
190
        name = tok
191
        record_state = 2
192
    # State 2: Wait for opening bracket or colon
193
    elif (s==2):
194
        if (tok=='('):
195
            bracket_counter = 1
196
            record_state=3
197
        if (tok==':'): record_state=4
198
    # State 3: Store parameter (or base class) and wait for an ending bracket
199
    elif (s==3):
200
        if (tok=='*' or tok=='**'):
201
            tok=''
202
        if (tok=='('):
203
            bracket_counter = bracket_counter+1
204
        if (tok==')'):
205
            bracket_counter = bracket_counter-1
206
        if bracket_counter==0:
207
            record_state=4
208
        else:
209
            param=param+tok
210
    # State 4: Look for doc string
211
    elif (s==4):
212
        if (type==token.NEWLINE or type==token.INDENT or type==token.SLASHEQUAL):
213
            return
214
        elif (tok==":"):
215
            return
216
        elif (type==token.STRING):
217
            while tok[:1]=='r' or tok[:1]=='u':
218
                tok=tok[1:]
219
            while tok[:1]=='"':
220
                tok=tok[1:]
221
            while tok[-1:]=='"':
222
                tok=tok[:-1]
223
            doc_string=tok
224
        record_state=0
225
226
######################################################################
227
# Starts the recording of a name & param part.
228
# The function rec_name_n_param() has to be fed with tokens. After
229
# the necessary tokens are fed the name and parameters can be found
230
# in the global variables "name" und "param".
231
######################################################################
232
def start_recording():
233
    global record_state,param,name, doc_string
234
    record_state=1
235
    name=""
236
    param=""
237
    doc_string=""
238
239
######################################################################
240
# Test if recording is finished
241
######################################################################
242
def is_recording_finished():
243
    global record_state
244
    return record_state==0
245
246
######################################################################
247
## Gather comment block
248
######################################################################
249
def gather_comment(type,tok,spos):
250
    global comment_block,comment_finished
251
    if (type!=tokenize.COMMENT):
252
        comment_finished = 1
253
    else:
254
        # Output old comment block if a new one is started.
255
        if (comment_finished):
256
            print_comment(spos)
257
            comment_finished=0
258
        if (tok[0:2]=="##" and tok[0:3]!="###"):
259
            comment_block.append(tok[2:])
260
261
######################################################################
262
## Output comment block and empty buffer.
263
######################################################################
264
def print_comment(spos):
265
    global comment_block,comment_finished
266
    if (comment_block!=[]):
267
        output("/**\n",spos)
268
        for c in comment_block:
269
            output(c,spos)
270
        output("*/\n",spos)
271
    comment_block    = []
272
    comment_finished = 0
273
274
######################################################################
275
def set_state(s):
276
    global stateStack
277
    stateStack[len(stateStack)-1]=s
278
279
######################################################################
280
def get_state():
281
    global stateStack
282
    return stateStack[len(stateStack)-1]
283
284
######################################################################
285
def push_state(s):
286
    global stateStack
287
    stateStack.append(s)
288
289
######################################################################
290
def pop_state():
291
    global stateStack
292
    stateStack.pop()
293
294
295
######################################################################
296
def tok_eater(type, tok, spos, epos, line):
297
    global stateStack,name,param,class_spos,def_spos,import_spos
298
    global doc_string, modules, import_token, module_has_docstring
299
    global protection_level, private_member
300
301
    rec_name_n_param(type,tok)
302
    if (string.replace(string.strip(tok)," ","")=="##private:"):
303
         protection_level = "private"
304
         output("private:\n",spos)
305
    elif (string.replace(string.strip(tok)," ","")=="##protected:"):
306
         protection_level = "protected"
307
         output("protected:\n",spos)
308
    elif (string.replace(string.strip(tok)," ","")=="##public:"):
309
         protection_level = "public"
310
         output("public:\n",spos)
311
    else:
312
         gather_comment(type,tok,spos)
313
314
    state = get_state()
315
316
#    sys.stderr.write("%d: %s\n"%(state, tok))
317
318
    # OUTSIDE
319
    if   (state==OUTSIDE):
320
        if  (tok=="class"):
321
            start_recording()
322
            class_spos = spos
323
            push_state(BUILD_CLASS_DECL)
324
        elif (tok=="def"):
325
            start_recording()
326
            def_spos = spos
327
            push_state(BUILD_DEF_DECL)
328
        elif (tok=="import") or (tok=="from"):
329
            import_token = tok
330
            import_spos = spos
331
            modules     = []
332
            push_state(IMPORT)
333
        elif (spos[1] == 0 and tok[:3] == '"""'):
334
            # Capture module docstring as namespace documentation
335
            module_has_docstring = True
336
            comment_block.append("\\namespace %s\n" % namespace)
337
            comment_block.append(tok[3:-3])
338
            print_comment(spos)
339
340
    # IMPORT
341
    elif (state==IMPORT):
342
        if (type==token.NAME):
343
            modules.append(tok)
344
            set_state(IMPORT_OP)
345
    # IMPORT_OP
346
    elif (state==IMPORT_OP):
347
        if (tok=="."):
348
            set_state(IMPORT_APPEND)
349
        elif (tok==","):
350
            set_state(IMPORT)
351
        else:
352
            for m in modules:
353
#                output('#include "'+m.replace('.',os.path.sep)+'.py"\n', import_spos, immediate=1)
354
                if import_token=="from":
355
                    output('using namespace '+m.replace('.', '::')+';\n', import_spos)
356
            pop_state()
357
    # IMPORT_APPEND
358
    elif (state==IMPORT_APPEND):
359
        if (type==token.NAME):
360
            modules[len(modules)-1]+="."+tok
361
            set_state(IMPORT_OP)
362
    # BUILD_CLASS_DECL
363
    elif (state==BUILD_CLASS_DECL):
364
        if (is_recording_finished()):
365
            s = "class "+name
366
            if (param!=""): s = s+" : public "+param.replace('.','::')
367
            if (doc_string!=""): comment_block.append(doc_string)
368
            print_comment(class_spos)
369
            output(s+"\n",class_spos)
370
            output("{\n",(class_spos[0]+1,class_spos[1]))
371
            protection_level = "public"
372
            output("  public:\n",(class_spos[0]+2,class_spos[1]))
373
            set_state(BUILD_CLASS_BODY)
374
    # BUILD_CLASS_BODY
375
    elif (state==BUILD_CLASS_BODY):
376
        if (type!=token.INDENT and type!=token.NEWLINE and type!=40 and
377
            type!=tokenize.NL and type!=tokenize.COMMENT and
378
            (spos[1]<=class_spos[1])):
379
            output("}; // end of class\n",(out_row+1,class_spos[1]))
380
            pop_state()
381
        elif (tok=="def"):
382
            start_recording()
383
            def_spos = spos
384
            push_state(BUILD_DEF_DECL)
385
    # BUILD_DEF_DECL
386
    elif (state==BUILD_DEF_DECL):
387
        if (is_recording_finished()):
388
            s = ''
389
            # Do we document a class method? then remove the 'self' parameter
390
            if BUILD_CLASS_BODY in stateStack:
391
                params = param.split(",")
392
                if params[0] == 'self':
393
                    param = string.join(params[1:], ",")
394
                else:
395
                    s = 'static '
396
                    if params[0] == 'cls':
397
                        param = string.join(params[1:], ",")
398
		s = s+name+"("+param+");\n"
399
                if len(name) > 1 \
400
                   and name[0:2] == '__' \
401
                   and name[len(name)-2:len(name)] != '__' \
402
                   and protection_level != 'private':
403
                       private_member = True
404
                       output("  private:\n",(def_spos[0]+2,def_spos[1]))
405
            else:
406
	        s = name+"("+param+");\n"
407
            if (doc_string!=""): comment_block.append(doc_string)
408
            print_comment(def_spos)
409
            output(s,def_spos)
410
#       output("{\n",(def_spos[0]+1,def_spos[1]))
411
            set_state(BUILD_DEF_BODY)
412
    # BUILD_DEF_BODY
413
    elif (state==BUILD_DEF_BODY):
414
        if (type!=token.INDENT and type!=token.NEWLINE \
415
            and type!=40 and type!=tokenize.NL \
416
            and (spos[1]<=def_spos[1])):
417
#            output("} // end of method/function\n",(out_row+1,def_spos[1]))
418
            if private_member and protection_level != 'private':
419
                private_member = False
420
                output("  " + protection_level + ":\n",(def_spos[0]+2,def_spos[1]))
421
            pop_state()
422
#       else:
423
#            output(tok,spos)
424
425
426
def dump(filename):
427
    f = open(filename)
428
    r = f.readlines()
429
    for s in r:
430
        sys.stdout.write(s)
431
432
def filter(filename):
433
    global name, module_has_docstring
434
435
    path,name = os.path.split(filename)
436
    root,ext  = os.path.splitext(name)
437
438
    output("namespace "+root+" {\n",(0,0))
439
440
    # set module name for tok_eater to use if there's a module doc string
441
    name = root
442
443
    sys.stderr.write('Filtering "'+filename+'"...')
444
    f = open(filename)
445
    tokenize.tokenize(f.readline, tok_eater)
446
    f.close()
447
    print_comment((0,0))
448
449
    output("\n",(0,0))
450
    output("}  // end of namespace\n",(0,0))
451
452
    if not module_has_docstring:
453
        # Put in default namespace documentation
454
        output('/** \\namespace '+root+' \n',(0,0))
455
        output('    \\brief Module "%s" */\n'%(root),(0,0))
456
457
    for s in outbuffer:
458
        outfile.write(s)
459
460
461
def filterFile(filename, out=sys.stdout):
462
    global outfile
463
464
    outfile = out
465
466
    try:
467
        root,ext  = os.path.splitext(filename)
468
469
        if ext==".py":
470
            filter(filename)
471
        else:
472
            dump(filename)
473
474
        sys.stderr.write("OK\n")
475
    except IOError,e:
476
        sys.stderr.write(e[1]+"\n")
477
478
479
######################################################################
480
481
# preparePath
482
def preparePath(path):
483
    """Prepare a path.
484
485
    Checks if the path exists and creates it if it does not exist.
486
    """
487
    if not os.path.exists(path):
488
        parent = os.path.dirname(path)
489
        if parent!="":
490
            preparePath(parent)
491
        os.mkdir(path)
492
493
# isNewer
494
def isNewer(file1,file2):
495
    """Check if file1 is newer than file2.
496
497
    file1 must be an existing file.
498
    """
499
    if not os.path.exists(file2):
500
        return True
501
    return os.stat(file1)[ST_MTIME]>os.stat(file2)[ST_MTIME]
502
503
# convert
504
def convert(srcpath, destpath):
505
    """Convert a Python source tree into a C+ stub tree.
506
507
    All *.py files in srcpath (including sub-directories) are filtered
508
    and written to destpath. If destpath exists, only the files
509
    that have been modified are filtered again. Files that were deleted
510
    from srcpath are also deleted in destpath if they are still present.
511
    The function returns the number of processed *.py files.
512
    """
513
    count=0
514
    sp = os.path.join(srcpath,"*")
515
    sfiles = glob.glob(sp)
516
    dp = os.path.join(destpath,"*")
517
    dfiles = glob.glob(dp)
518
    leftovers={}
519
    for df in dfiles:
520
        leftovers[os.path.basename(df)]=1
521
522
    for srcfile in sfiles:
523
        basename = os.path.basename(srcfile)
524
        if basename in leftovers:
525
            del leftovers[basename]
526
527
        # Is it a subdirectory?
528
        if os.path.isdir(srcfile):
529
            sdir = os.path.join(srcpath,basename)
530
            ddir = os.path.join(destpath,basename)
531
            count+=convert(sdir, ddir)
532
            continue
533
        # Check the extension (only *.py will be converted)
534
        root, ext = os.path.splitext(srcfile)
535
        if ext.lower()!=".py":
536
            continue
537
538
        destfile = os.path.join(destpath,basename)
539
        if destfile==srcfile:
540
            print "WARNING: Input and output names are identical!"
541
            sys.exit(1)
542
543
        count+=1
544
#        sys.stdout.write("%s\015"%(srcfile))
545
546
        if isNewer(srcfile, destfile):
547
            preparePath(os.path.dirname(destfile))
548
#            out=open(destfile,"w")
549
#            filterFile(srcfile, out)
550
#            out.close()
551
            os.system("python %s -f %s>%s"%(sys.argv[0],srcfile,destfile))
552
553
    # Delete obsolete files in destpath
554
    for df in leftovers:
555
        dname=os.path.join(destpath,df)
556
        if os.path.isdir(dname):
557
            try:
558
                shutil.rmtree(dname)
559
            except:
560
                print "Can't remove obsolete directory '%s'"%dname
561
        else:
562
            try:
563
                os.remove(dname)
564
            except:
565
                print "Can't remove obsolete file '%s'"%dname
566
567
    return count
568
569
570
######################################################################
571
######################################################################
572
######################################################################
573
574
filter_file = False
575
576
try:
577
    opts, args = getopt.getopt(sys.argv[1:], "hf", ["help"])
578
except getopt.GetoptError,e:
579
    print e
580
    sys.exit(1)
581
582
for o,a in opts:
583
    if o=="-f":
584
        filter_file = True
585
586
if filter_file:
587
    # Filter the specified file and print the result to stdout
588
    filename = string.join(args)
589
    filterFile(filename)
590
else:
591
592
    if len(args)!=2:
593
        sys.stderr.write("%s options input output\n"%(os.path.basename(sys.argv[0])))
594
        sys.exit(1)
595
596
    # Filter an entire Python source tree
597
    print '"%s" -> "%s"\n'%(args[0],args[1])
598
    c=convert(args[0],args[1])
599
    print "%d files"%(c)
600