123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206 |
- #!/usr/bin/env python
- # encoding: utf-8
- # Copyright 2009 Baptiste Lepilleur and The JsonCpp Authors
- # Distributed under MIT license, or public domain if desired and
- # recognized in your jurisdiction.
- # See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
- from __future__ import print_function
- from dircache import listdir
- import re
- import fnmatch
- import os.path
- # These fnmatch expressions are used by default to prune the directory tree
- # while doing the recursive traversal in the glob_impl method of glob function.
- prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS '
- # These fnmatch expressions are used by default to exclude files and dirs
- # while doing the recursive traversal in the glob_impl method of glob function.
- ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split()
- # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree
- # while doing the recursive traversal in the glob_impl method of glob function.
- default_excludes = '''
- **/*~
- **/#*#
- **/.#*
- **/%*%
- **/._*
- **/CVS
- **/CVS/**
- **/.cvsignore
- **/SCCS
- **/SCCS/**
- **/vssver.scc
- **/.svn
- **/.svn/**
- **/.git
- **/.git/**
- **/.gitignore
- **/.bzr
- **/.bzr/**
- **/.hg
- **/.hg/**
- **/_MTN
- **/_MTN/**
- **/_darcs
- **/_darcs/**
- **/.DS_Store '''
- DIR = 1
- FILE = 2
- DIR_LINK = 4
- FILE_LINK = 8
- LINKS = DIR_LINK | FILE_LINK
- ALL_NO_LINK = DIR | FILE
- ALL = DIR | FILE | LINKS
- _ANT_RE = re.compile(r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)')
- def ant_pattern_to_re(ant_pattern):
- """Generates a regular expression from the ant pattern.
- Matching convention:
- **/a: match 'a', 'dir/a', 'dir1/dir2/a'
- a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b'
- *.py: match 'script.py' but not 'a/script.py'
- """
- rex = ['^']
- next_pos = 0
- sep_rex = r'(?:/|%s)' % re.escape(os.path.sep)
- ## print 'Converting', ant_pattern
- for match in _ANT_RE.finditer(ant_pattern):
- ## print 'Matched', match.group()
- ## print match.start(0), next_pos
- if match.start(0) != next_pos:
- raise ValueError("Invalid ant pattern")
- if match.group(1): # /**/
- rex.append(sep_rex + '(?:.*%s)?' % sep_rex)
- elif match.group(2): # **/
- rex.append('(?:.*%s)?' % sep_rex)
- elif match.group(3): # /**
- rex.append(sep_rex + '.*')
- elif match.group(4): # *
- rex.append('[^/%s]*' % re.escape(os.path.sep))
- elif match.group(5): # /
- rex.append(sep_rex)
- else: # somepath
- rex.append(re.escape(match.group(6)))
- next_pos = match.end()
- rex.append('$')
- return re.compile(''.join(rex))
- def _as_list(l):
- if isinstance(l, basestring):
- return l.split()
- return l
- def glob(dir_path,
- includes = '**/*',
- excludes = default_excludes,
- entry_type = FILE,
- prune_dirs = prune_dirs,
- max_depth = 25):
- include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)]
- exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)]
- prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)]
- dir_path = dir_path.replace('/',os.path.sep)
- entry_type_filter = entry_type
- def is_pruned_dir(dir_name):
- for pattern in prune_dirs:
- if fnmatch.fnmatch(dir_name, pattern):
- return True
- return False
- def apply_filter(full_path, filter_rexs):
- """Return True if at least one of the filter regular expression match full_path."""
- for rex in filter_rexs:
- if rex.match(full_path):
- return True
- return False
- def glob_impl(root_dir_path):
- child_dirs = [root_dir_path]
- while child_dirs:
- dir_path = child_dirs.pop()
- for entry in listdir(dir_path):
- full_path = os.path.join(dir_path, entry)
- ## print 'Testing:', full_path,
- is_dir = os.path.isdir(full_path)
- if is_dir and not is_pruned_dir(entry): # explore child directory ?
- ## print '===> marked for recursion',
- child_dirs.append(full_path)
- included = apply_filter(full_path, include_filter)
- rejected = apply_filter(full_path, exclude_filter)
- if not included or rejected: # do not include entry ?
- ## print '=> not included or rejected'
- continue
- link = os.path.islink(full_path)
- is_file = os.path.isfile(full_path)
- if not is_file and not is_dir:
- ## print '=> unknown entry type'
- continue
- if link:
- entry_type = is_file and FILE_LINK or DIR_LINK
- else:
- entry_type = is_file and FILE or DIR
- ## print '=> type: %d' % entry_type,
- if (entry_type & entry_type_filter) != 0:
- ## print ' => KEEP'
- yield os.path.join(dir_path, entry)
- ## else:
- ## print ' => TYPE REJECTED'
- return list(glob_impl(dir_path))
- if __name__ == "__main__":
- import unittest
- class AntPatternToRETest(unittest.TestCase):
- ## def test_conversion(self):
- ## self.assertEqual('^somepath$', ant_pattern_to_re('somepath').pattern)
- def test_matching(self):
- test_cases = [ ('path',
- ['path'],
- ['somepath', 'pathsuffix', '/path', '/path']),
- ('*.py',
- ['source.py', 'source.ext.py', '.py'],
- ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c']),
- ('**/path',
- ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'],
- ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath']),
- ('path/**',
- ['path/a', 'path/path/a', 'path//'],
- ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a']),
- ('/**/path',
- ['/path', '/a/path', '/a/b/path/path', '/path/path'],
- ['path', 'path/', 'a/path', '/pathsuffix', '/somepath']),
- ('a/b',
- ['a/b'],
- ['somea/b', 'a/bsuffix', 'a/b/c']),
- ('**/*.py',
- ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'],
- ['script.pyc', 'script.pyo', 'a.py/b']),
- ('src/**/*.py',
- ['src/a.py', 'src/dir/a.py'],
- ['a/src/a.py', '/src/a.py']),
- ]
- for ant_pattern, accepted_matches, rejected_matches in list(test_cases):
- def local_path(paths):
- return [ p.replace('/',os.path.sep) for p in paths ]
- test_cases.append((ant_pattern, local_path(accepted_matches), local_path(rejected_matches)))
- for ant_pattern, accepted_matches, rejected_matches in test_cases:
- rex = ant_pattern_to_re(ant_pattern)
- print('ant_pattern:', ant_pattern, ' => ', rex.pattern)
- for accepted_match in accepted_matches:
- print('Accepted?:', accepted_match)
- self.assertTrue(rex.match(accepted_match) is not None)
- for rejected_match in rejected_matches:
- print('Rejected?:', rejected_match)
- self.assertTrue(rex.match(rejected_match) is None)
- unittest.main()
|