antglob.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. # Copyright 2009 Baptiste Lepilleur and The JsonCpp Authors
  4. # Distributed under MIT license, or public domain if desired and
  5. # recognized in your jurisdiction.
  6. # See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
  7. from __future__ import print_function
  8. from dircache import listdir
  9. import re
  10. import fnmatch
  11. import os.path
  12. # These fnmatch expressions are used by default to prune the directory tree
  13. # while doing the recursive traversal in the glob_impl method of glob function.
  14. prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
  15. # These fnmatch expressions are used by default to exclude files and dirs
  16. # while doing the recursive traversal in the glob_impl method of glob function.
  17. ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
  18. # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
  19. # while doing the recursive traversal in the glob_impl method of glob function.
  20. default_excludes = '''
  21. **/*~
  22. **/#*#
  23. **/.#*
  24. **/%*%
  25. **/._*
  26. **/CVS
  27. **/CVS/**
  28. **/.cvsignore
  29. **/SCCS
  30. **/SCCS/**
  31. **/vssver.scc
  32. **/.svn
  33. **/.svn/**
  34. **/.git
  35. **/.git/**
  36. **/.gitignore
  37. **/.bzr
  38. **/.bzr/**
  39. **/.hg
  40. **/.hg/**
  41. **/_MTN
  42. **/_MTN/**
  43. **/_darcs
  44. **/_darcs/**
  45. **/.DS_Store '''
  46. DIR = 1
  47. FILE = 2
  48. DIR_LINK = 4
  49. FILE_LINK = 8
  50. LINKS = DIR_LINK | FILE_LINK
  51. ALL_NO_LINK = DIR | FILE
  52. ALL = DIR | FILE | LINKS
  53. _ANT_RE = re.compile(r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)')
  54. def ant_pattern_to_re(ant_pattern):
  55. """Generates a regular expression from the ant pattern.
  56. Matching convention:
  57. **/a: match 'a', 'dir/a', 'dir1/dir2/a'
  58. a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
  59. *.py: match 'script.py' but not 'a/script.py'
  60. """
  61. rex = ['^']
  62. next_pos = 0
  63. sep_rex = r'(?:/|%s)' % re.escape(os.path.sep)
  64. ## print 'Converting', ant_pattern
  65. for match in _ANT_RE.finditer(ant_pattern):
  66. ## print 'Matched', match.group()
  67. ## print match.start(0), next_pos
  68. if match.start(0) != next_pos:
  69. raise ValueError("Invalid ant pattern")
  70. if match.group(1): # /**/
  71. rex.append(sep_rex + '(?:.*%s)?' % sep_rex)
  72. elif match.group(2): # **/
  73. rex.append('(?:.*%s)?' % sep_rex)
  74. elif match.group(3): # /**
  75. rex.append(sep_rex + '.*')
  76. elif match.group(4): # *
  77. rex.append('[^/%s]*' % re.escape(os.path.sep))
  78. elif match.group(5): # /
  79. rex.append(sep_rex)
  80. else: # somepath
  81. rex.append(re.escape(match.group(6)))
  82. next_pos = match.end()
  83. rex.append('$')
  84. return re.compile(''.join(rex))
  85. def _as_list(l):
  86. if isinstance(l, basestring):
  87. return l.split()
  88. return l
  89. def glob(dir_path,
  90. includes = '**/*',
  91. excludes = default_excludes,
  92. entry_type = FILE,
  93. prune_dirs = prune_dirs,
  94. max_depth = 25):
  95. include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
  96. exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
  97. prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
  98. dir_path = dir_path.replace('/',os.path.sep)
  99. entry_type_filter = entry_type
  100. def is_pruned_dir(dir_name):
  101. for pattern in prune_dirs:
  102. if fnmatch.fnmatch(dir_name, pattern):
  103. return True
  104. return False
  105. def apply_filter(full_path, filter_rexs):
  106. """Return True if at least one of the filter regular expression match full_path."""
  107. for rex in filter_rexs:
  108. if rex.match(full_path):
  109. return True
  110. return False
  111. def glob_impl(root_dir_path):
  112. child_dirs = [root_dir_path]
  113. while child_dirs:
  114. dir_path = child_dirs.pop()
  115. for entry in listdir(dir_path):
  116. full_path = os.path.join(dir_path, entry)
  117. ## print 'Testing:', full_path,
  118. is_dir = os.path.isdir(full_path)
  119. if is_dir and not is_pruned_dir(entry): # explore child directory ?
  120. ## print '===> marked for recursion',
  121. child_dirs.append(full_path)
  122. included = apply_filter(full_path, include_filter)
  123. rejected = apply_filter(full_path, exclude_filter)
  124. if not included or rejected: # do not include entry ?
  125. ## print '=> not included or rejected'
  126. continue
  127. link = os.path.islink(full_path)
  128. is_file = os.path.isfile(full_path)
  129. if not is_file and not is_dir:
  130. ## print '=> unknown entry type'
  131. continue
  132. if link:
  133. entry_type = is_file and FILE_LINK or DIR_LINK
  134. else:
  135. entry_type = is_file and FILE or DIR
  136. ## print '=> type: %d' % entry_type,
  137. if (entry_type & entry_type_filter) != 0:
  138. ## print ' => KEEP'
  139. yield os.path.join(dir_path, entry)
  140. ## else:
  141. ## print ' => TYPE REJECTED'
  142. return list(glob_impl(dir_path))
  143. if __name__ == "__main__":
  144. import unittest
  145. class AntPatternToRETest(unittest.TestCase):
  146. ## def test_conversion(self):
  147. ## self.assertEqual('^somepath$', ant_pattern_to_re('somepath').pattern)
  148. def test_matching(self):
  149. test_cases = [ ('path',
  150. ['path'],
  151. ['somepath', 'pathsuffix', '/path', '/path']),
  152. ('*.py',
  153. ['source.py', 'source.ext.py', '.py'],
  154. ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c']),
  155. ('**/path',
  156. ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
  157. ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath']),
  158. ('path/**',
  159. ['path/a', 'path/path/a', 'path//'],
  160. ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a']),
  161. ('/**/path',
  162. ['/path', '/a/path', '/a/b/path/path', '/path/path'],
  163. ['path', 'path/', 'a/path', '/pathsuffix', '/somepath']),
  164. ('a/b',
  165. ['a/b'],
  166. ['somea/b', 'a/bsuffix', 'a/b/c']),
  167. ('**/*.py',
  168. ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
  169. ['script.pyc', 'script.pyo', 'a.py/b']),
  170. ('src/**/*.py',
  171. ['src/a.py', 'src/dir/a.py'],
  172. ['a/src/a.py', '/src/a.py']),
  173. ]
  174. for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
  175. def local_path(paths):
  176. return [ p.replace('/',os.path.sep) for p in paths ]
  177. test_cases.append((ant_pattern, local_path(accepted_matches), local_path(rejected_matches)))
  178. for ant_pattern, accepted_matches, rejected_matches in test_cases:
  179. rex = ant_pattern_to_re(ant_pattern)
  180. print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
  181. for accepted_match in accepted_matches:
  182. print('Accepted?:', accepted_match)
  183. self.assertTrue(rex.match(accepted_match) is not None)
  184. for rejected_match in rejected_matches:
  185. print('Rejected?:', rejected_match)
  186. self.assertTrue(rex.match(rejected_match) is None)
  187. unittest.main()