123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- #!/usr/bin/env python3
- #
- # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
- #
- # Extract documentation from C++ header files to use it in Python bindings
- #
- import os
- import sys
- import platform
- import re
- import textwrap
- from clang import cindex
- from clang.cindex import CursorKind
- from collections import OrderedDict
- from threading import Thread, Semaphore
- from multiprocessing import cpu_count
- RECURSE_LIST = [
- CursorKind.TRANSLATION_UNIT,
- CursorKind.NAMESPACE,
- CursorKind.CLASS_DECL,
- CursorKind.STRUCT_DECL,
- CursorKind.ENUM_DECL,
- CursorKind.CLASS_TEMPLATE
- ]
- PRINT_LIST = [
- CursorKind.CLASS_DECL,
- CursorKind.STRUCT_DECL,
- CursorKind.ENUM_DECL,
- CursorKind.ENUM_CONSTANT_DECL,
- CursorKind.CLASS_TEMPLATE,
- CursorKind.FUNCTION_DECL,
- CursorKind.FUNCTION_TEMPLATE,
- CursorKind.CONVERSION_FUNCTION,
- CursorKind.CXX_METHOD,
- CursorKind.CONSTRUCTOR,
- CursorKind.FIELD_DECL
- ]
- CPP_OPERATORS = {
- '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
- '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
- 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
- '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
- 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
- '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
- 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
- }
- CPP_OPERATORS = OrderedDict(
- sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
- job_count = cpu_count()
- job_semaphore = Semaphore(job_count)
- output = []
- def d(s):
- return s.decode('utf8')
- def sanitize_name(name):
- name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
- for k, v in CPP_OPERATORS.items():
- name = name.replace('operator%s' % k, 'operator_%s' % v)
- name = re.sub('<.*>', '', name)
- name = ''.join([ch if ch.isalnum() else '_' for ch in name])
- name = re.sub('_$', '', re.sub('_+', '_', name))
- return '__doc_' + name
- def process_comment(comment):
- result = ''
- # Remove C++ comment syntax
- leading_spaces = float('inf')
- for s in comment.expandtabs(tabsize=4).splitlines():
- s = s.strip()
- if s.startswith('/*'):
- s = s[2:].lstrip('*')
- elif s.endswith('*/'):
- s = s[:-2].rstrip('*')
- elif s.startswith('///'):
- s = s[3:]
- if s.startswith('*'):
- s = s[1:]
- if len(s) > 0:
- leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
- result += s + '\n'
- if leading_spaces != float('inf'):
- result2 = ""
- for s in result.splitlines():
- result2 += s[leading_spaces:] + '\n'
- result = result2
- # Doxygen tags
- cpp_group = '([\w:]+)'
- param_group = '([\[\w:\]]+)'
- s = result
- s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
- s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
- s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
- s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
- s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
- s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
- s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
- r'\n\n$Parameter ``\2``:\n\n', s)
- s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
- r'\n\n$Template parameter ``\2``:\n\n', s)
- for in_, out_ in {
- 'return': 'Returns',
- 'author': 'Author',
- 'authors': 'Authors',
- 'copyright': 'Copyright',
- 'date': 'Date',
- 'remark': 'Remark',
- 'sa': 'See also',
- 'see': 'See also',
- 'extends': 'Extends',
- 'throw': 'Throws',
- 'throws': 'Throws'
- }.items():
- s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
- s = re.sub(r'\\details\s*', r'\n\n', s)
- s = re.sub(r'\\brief\s*', r'', s)
- s = re.sub(r'\\short\s*', r'', s)
- s = re.sub(r'\\ref\s*', r'', s)
- s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
- r"```\n\1\n```\n", s, flags=re.DOTALL)
- # HTML/TeX tags
- s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
- s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
- s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
- s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
- s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
- s = re.sub(r'<li>', r'\n\n* ', s)
- s = re.sub(r'</?ul>', r'', s)
- s = re.sub(r'</li>', r'\n\n', s)
- s = s.replace('``true``', '``True``')
- s = s.replace('``false``', '``False``')
- # Re-flow text
- wrapper = textwrap.TextWrapper()
- wrapper.expand_tabs = True
- wrapper.replace_whitespace = True
- wrapper.drop_whitespace = True
- wrapper.width = 70
- wrapper.initial_indent = wrapper.subsequent_indent = ''
- result = ''
- in_code_segment = False
- for x in re.split(r'(```)', s):
- if x == '```':
- if not in_code_segment:
- result += '```\n'
- else:
- result += '\n```\n\n'
- in_code_segment = not in_code_segment
- elif in_code_segment:
- result += x.strip()
- else:
- for y in re.split(r'(?: *\n *){2,}', x):
- wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
- if len(wrapped) > 0 and wrapped[0] == '$':
- result += wrapped[1:] + '\n'
- wrapper.initial_indent = \
- wrapper.subsequent_indent = ' ' * 4
- else:
- if len(wrapped) > 0:
- result += wrapped + '\n\n'
- wrapper.initial_indent = wrapper.subsequent_indent = ''
- return result.rstrip().lstrip('\n')
- def extract(filename, node, prefix):
- if not (node.location.file is None or
- os.path.samefile(d(node.location.file.name), filename)):
- return 0
- if node.kind in RECURSE_LIST:
- sub_prefix = prefix
- if node.kind != CursorKind.TRANSLATION_UNIT:
- if len(sub_prefix) > 0:
- sub_prefix += '_'
- sub_prefix += d(node.spelling)
- for i in node.get_children():
- extract(filename, i, sub_prefix)
- if node.kind in PRINT_LIST:
- comment = d(node.raw_comment) if node.raw_comment is not None else ''
- comment = process_comment(comment)
- sub_prefix = prefix
- if len(sub_prefix) > 0:
- sub_prefix += '_'
- if len(node.spelling) > 0:
- name = sanitize_name(sub_prefix + d(node.spelling))
- global output
- output.append((name, filename, comment))
- class ExtractionThread(Thread):
- def __init__(self, filename, parameters):
- Thread.__init__(self)
- self.filename = filename
- self.parameters = parameters
- job_semaphore.acquire()
- def run(self):
- print('Processing "%s" ..' % self.filename, file=sys.stderr)
- try:
- index = cindex.Index(
- cindex.conf.lib.clang_createIndex(False, True))
- tu = index.parse(self.filename, self.parameters)
- extract(self.filename, tu.cursor, '')
- finally:
- job_semaphore.release()
- if __name__ == '__main__':
- parameters = ['-x', 'c++', '-std=c++11']
- filenames = []
- if platform.system() == 'Darwin':
- dev_path = '/Applications/Xcode.app/Contents/Developer/'
- lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
- sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
- libclang = lib_dir + 'libclang.dylib'
- if os.path.exists(libclang):
- cindex.Config.set_library_path(os.path.dirname(libclang))
- if os.path.exists(sdk_dir):
- sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
- parameters.append('-isysroot')
- parameters.append(sysroot_dir)
- for item in sys.argv[1:]:
- if item.startswith('-'):
- parameters.append(item)
- else:
- filenames.append(item)
- if len(filenames) == 0:
- print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
- exit(-1)
- print('''/*
- This file contains docstrings for the Python bindings.
- Do not edit! These were automatically extracted by mkdoc.py
- */
- #define __EXPAND(x) x
- #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT
- #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
- #define __CAT1(a, b) a ## b
- #define __CAT2(a, b) __CAT1(a, b)
- #define __DOC1(n1) __doc_##n1
- #define __DOC2(n1, n2) __doc_##n1##_##n2
- #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3
- #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4
- #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5
- #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
- #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
- #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
- #if defined(__GNUG__)
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-variable"
- #endif
- ''')
- output.clear()
- for filename in filenames:
- thr = ExtractionThread(filename, parameters)
- thr.start()
- print('Waiting for jobs to finish ..', file=sys.stderr)
- for i in range(job_count):
- job_semaphore.acquire()
- name_ctr = 1
- name_prev = None
- for name, _, comment in list(sorted(output, key=lambda x: (x[0], x[1]))):
- if name == name_prev:
- name_ctr += 1
- name = name + "_%i" % name_ctr
- else:
- name_prev = name
- name_ctr = 1
- print('\nstatic const char *%s =%sR"doc(%s)doc";' %
- (name, '\n' if '\n' in comment else ' ', comment))
- print('''
- #if defined(__GNUG__)
- #pragma GCC diagnostic pop
- #endif
- ''')
|