Eclectic Media Git klaus / 909a534
Support for Exuberant Ctags Jonas Haag 5 years ago
8 changed file(s) with 345 addition(s) and 8 deletion(s). Raw diff Collapse all Expand all
3232 default=False, action='store_true')
3333 parser.add_argument('-B', '--with-browser', help="specify which browser to use with --browser",
3434 metavar='BROWSER', default=None)
35 parser.add_argument('--ctags', help="enable ctags for which revisions? default: none. "
36 "WARNING: Don't use 'ALL' for public servers!",
37 choices=['none', 'tags-and-branches', 'ALL'], default='none')
3538
3639 parser.add_argument('repos', help='repositories to serve',
3740 metavar='DIR', nargs='*', type=git_repository)
5255 args = make_parser().parse_args()
5356
5457 if args.htdigest and not args.smarthttp:
55 print >> sys.stderr, "--htdigest option has no effect without --smarthttp enabled"
58 print >> sys.stderr, "ERROR: --htdigest option has no effect without --smarthttp enabled"
5659 return 1
5760
5861 if not args.repos:
6164 if not args.site_name:
6265 args.site_name = '%s:%d' % (args.host, args.port)
6366
67 if args.ctags != 'none':
68 from klaus.ctags import check_have_exuberant_ctags
69 if not check_have_exuberant_ctags():
70 print >> sys.stderr, "ERROR: Exuberant ctags not installed (or 'ctags' binary isn't *Exuberant* ctags)"
71 return 1
72 try:
73 import ctags
74 except ImportError:
75 raise ImportError("Please install 'python-ctags' to enable ctags support.")
76
6477 app = make_app(
6578 args.repos,
6679 force_unicode(args.site_name or args.host),
6780 args.smarthttp,
68 args.htdigest
81 args.htdigest,
82 ctags_policy=args.ctags,
6983 )
7084
7185 if args.browser:
1414 'undefined': jinja2.StrictUndefined
1515 }
1616
17 def __init__(self, repo_paths, site_name, use_smarthttp):
17 def __init__(self, repo_paths, site_name, use_smarthttp, ctags_policy='none'):
1818 """(See `make_app` for parameter descriptions.)"""
1919 repo_objs = [FancyRepo(path) for path in repo_paths]
2020 self.repos = dict((repo.name, repo) for repo in repo_objs)
2121 self.site_name = site_name
2222 self.use_smarthttp = use_smarthttp
23 self.ctags_policy = ctags_policy
2324
2425 flask.Flask.__init__(self, __name__)
2526
6465 ]:
6566 self.add_url_rule(rule, view_func=getattr(views, endpoint))
6667
68 def should_use_ctags(self, git_repo, git_commit):
69 if self.ctags_policy == 'none':
70 return False
71 elif self.ctags_policy == 'ALL':
72 return True
73 elif self.ctags_policy == 'tags-and-branches':
74 return git_commit.id in git_repo.get_tag_and_branch_shas()
75 else:
76 raise ValueError("Unknown ctags policy %r" % self.ctags_policy)
77
78
6779
6880 def make_app(repo_paths, site_name, use_smarthttp=False, htdigest_file=None,
69 require_browser_auth=False, disable_push=False, unauthenticated_push=False):
81 require_browser_auth=False, disable_push=False, unauthenticated_push=False,
82 ctags_policy='none'):
7083 """
7184 Returns a WSGI app with all the features (smarthttp, authentication)
7285 already patched in.
8497 are set, but push should not be supported.
8598 :param htdigest_file: A *file-like* object that contains the HTTP auth credentials.
8699 :param unauthenticated_push: Allow push'ing without authentication. DANGER ZONE!
100 :param ctags_policy: The ctags policy to use, may be one of:
101 - 'none': never use ctags
102 - 'tags-and-branches': use ctags for revisions that are the HEAD of
103 a tag or branc
104 - 'ALL': use ctags for all revisions, may result in high server load!
87105 """
88106 if unauthenticated_push:
89107 if not use_smarthttp:
99117 repo_paths,
100118 site_name,
101119 use_smarthttp,
120 ctags_policy,
102121 )
103122 app.wsgi_app = utils.SubUri(app.wsgi_app)
104123
0 import os
1 import subprocess
2 import shutil
3 import tempfile
4 from klaus.utils import check_output
5
6
7 def check_have_exuberant_ctags():
8 """Check that the 'ctags' binary is *Exuberant* ctags (not etags etc)"""
9 try:
10 return "Exuberant" in check_output(["ctags", "--version"], stderr=subprocess.PIPE)
11 except subprocess.CalledProcessError:
12 return False
13
14
15 def create_tagsfile(git_repo_path, git_rev):
16 """Create a ctags tagsfile for the given Git repository and revision.
17
18 This creates a temporary clone of the repository, checks out the revision,
19 runs 'ctags -R' and deletes the temporary clone.
20
21 :return: path to the generated tagsfile
22 """
23 assert check_have_exuberant_ctags(), "'ctags' binary is missing or not *Exuberant* ctags"
24
25 _, target_tagsfile = tempfile.mkstemp()
26 checkout_tmpdir = tempfile.mkdtemp()
27 try:
28 subprocess.check_call(["git", "clone", "-q", "--shared", git_repo_path, checkout_tmpdir])
29 subprocess.check_call(["git", "checkout", "-q", git_rev], cwd=checkout_tmpdir)
30 subprocess.check_call(["ctags", "--fields=+l", "-Rno", target_tagsfile], cwd=checkout_tmpdir)
31 finally:
32 shutil.rmtree(checkout_tmpdir)
33 return target_tagsfile
34
35
36 def delete_tagsfile(tagsfile_path):
37 """Delete a tagsfile."""
38 os.remove(tagsfile_path)
0 """A cache for tagsfiles generated by the 'ctags' command line tool.
1
2 We don't want to run the 'ctags' command line tool on each request as it may
3 take a lot of time. The following steps are necessary in order to create a
4 ctags tagsfile that be read by Pygments:
5
6 1. Clone the repository to a temporary location and check out the branch/commit
7 the user is browsing, unless the branch is already checked out. (*)
8 2. Run 'ctags -R' on the temporary repository checkout.
9 3. Delete the temporary repository checkout.
10
11 To avoid going through these steps on each request, we cache the tagsfile
12 generated in step 2. The cache is on-disk and non-persistent, i.e. cleared
13 whenever the Python interpreter running klaus is shut down.
14
15 For large projects, the ctags tagsfiles may grow to sizes of multiple MiB, so
16 we have to set an upper limit on the size of the cache. Since tagsfiles are
17 represented as uncompressed ASCII files, we can increase the number of tagsfiles
18 we can cache by using compression. Of course, 'python-ctags', which is used by
19 Pygments to read the tagsfiles, can't deal with compressed tagsfiles, so we have
20 to uncompress them before actually using them. To avoid decompressing tagsfiles
21 on each request, we keep the tagsfiles that are most likely to be used (**) in
22 uncompressed form.
23
24 (*) We always create a clone in the current implementation;
25 this could be optimized in the future.
26 (**) "most likely": currently implemented as "most recently used"
27 """
28 import os
29 import shutil
30 import tempfile
31 import threading
32 import gzip
33 from dulwich.lru_cache import LRUSizeCache
34 from klaus.ctags import create_tagsfile, delete_tagsfile
35
36
37 # Good compression while taking only 10% more time than level 1
38 COMPRESSION_LEVEL = 4
39
40
41 def compress_tagsfile(uncompressed_tagsfile_path):
42 """Compress an uncompressed tagsfile.
43
44 :return: path to the compressed version of the tagsfile
45 """
46 _, compressed_tagsfile_path = tempfile.mkstemp()
47 with open(uncompressed_tagsfile_path, 'rb') as uncompressed:
48 with gzip.open(compressed_tagsfile_path, 'wb', COMPRESSION_LEVEL) as compressed:
49 shutil.copyfileobj(uncompressed, compressed)
50 return compressed_tagsfile_path
51
52
53 def uncompress_tagsfile(compressed_tagsfile_path):
54 """Uncompress an compressed tagsfile.
55
56 :return: path to the uncompressed version of the tagsfile
57 """
58 _, uncompressed_tagsfile_path = tempfile.mkstemp()
59 with gzip.open(compressed_tagsfile_path, 'rb') as compressed:
60 with open(uncompressed_tagsfile_path, 'wb') as uncompressed:
61 shutil.copyfileobj(compressed, uncompressed)
62 return uncompressed_tagsfile_path
63
64
65 MiB = 1024 * 1024
66
67 class CTagsCache(object):
68 """A ctags cache. Both uncompressed and compressed entries are kept in
69 temporary files created by `tempfile.mkstemp` which are deleted from disk
70 when the Python interpreter is shut down.
71
72 :param uncompressed_max_bytes: Maximum size of the uncompressed cache sector
73 :param compressed_max_bytes: Maximum size of the compressed cache sector
74
75 The lifecycle of a cache entry is as follows.
76
77 - When first created, a tagsfile is put into the uncompressed cache sector.
78 - When free space is required for other uncompressed tagsfiles, it may be
79 moved to the compressed cache sector. Gzip is used to compress the tagsfile.
80 - When free space is required for other compressed tagsfiles, it may be
81 evicted from the cache entirely.
82 - When the tagsfile is requested and it's in the compressed cache sector,
83 it is moved back to the uncompressed sector prior to using it.
84 """
85 def __init__(self, uncompressed_max_bytes=30*MiB, compressed_max_bytes=20*MiB):
86 self.uncompressed_max_bytes = uncompressed_max_bytes
87 self.compressed_max_bytes = compressed_max_bytes
88 # Note: We use dulwich's LRU cache to store the tagsfile paths here,
89 # but we could easily replace it by any other (LRU) cache implementation.
90 self._uncompressed_cache = LRUSizeCache(uncompressed_max_bytes, compute_size=os.path.getsize)
91 self._compressed_cache = LRUSizeCache(compressed_max_bytes, compute_size=os.path.getsize)
92 self._clearing = False
93 self._lock = threading.Lock()
94
95 def __del__(self):
96 self.clear()
97
98 def clear(self):
99 """Clear both the uncompressed and compressed caches."""
100 # Don't waste time moving tagsfiles from uncompressed to compressed cache,
101 # but remove them directly instead:
102 self._clearing = True
103 self._uncompressed_cache.clear()
104 self._compressed_cache.clear()
105 self._clearing = False
106
107 def get_tagsfile(self, git_repo_path, git_rev):
108 """Get the ctags tagsfile for the given Git repository and revision.
109
110 - If the tagsfile is still in cache, and in uncompressed form, return it
111 without any further cost.
112 - If the tagsfile is still in cache, but in compressed form, uncompress
113 it, put it into uncompressed space, and return the uncompressed version.
114 - If the tagsfile isn't in cache at all, create it, put it into
115 uncompressed cache and return the newly created version.
116 """
117 # Always require full SHAs
118 assert len(git_rev) == 40
119
120 # Avoiding race conditions, The Sledgehammer Way
121 with self._lock:
122 if git_rev in self._uncompressed_cache:
123 return self._uncompressed_cache[git_rev]
124
125 if git_rev in self._compressed_cache:
126 compressed_tagsfile_path = self._compressed_cache[git_rev]
127 uncompressed_tagsfile_path = uncompress_tagsfile(compressed_tagsfile_path)
128 self._compressed_cache._remove_node(self._compressed_cache._cache[git_rev])
129 else:
130 # Not in cache.
131 uncompressed_tagsfile_path = create_tagsfile(git_repo_path, git_rev)
132 self._uncompressed_cache.add(git_rev, uncompressed_tagsfile_path,
133 self._clear_uncompressed_entry)
134 return uncompressed_tagsfile_path
135
136 def _clear_uncompressed_entry(self, git_rev, uncompressed_tagsfile_path):
137 """Called by LRUSizeCache whenever an entry is to be evicted from
138 uncompressed cache.
139
140 Most of the times this happens when space is needed
141 in uncompressed cache, in which case we move the tagsfile to compressed
142 cache. When clearing the cache, we don't bother moving entries to
143 uncompressed space; we delete them directly instead.
144 """
145 if not self._clearing:
146 # If we're clearing the whole cache, don't waste time moving tagsfiles
147 # from uncompressed to compressed cache, but remove them directly instead.
148 self._compressed_cache.add(git_rev, compress_tagsfile(uncompressed_tagsfile_path),
149 self._clear_compressed_entry)
150 delete_tagsfile(uncompressed_tagsfile_path)
151
152 def _clear_compressed_entry(self, git_rev, compressed_tagsfile_path):
153 """Called by LRUSizeCache whenever an entry to be evicted from
154 compressed cache.
155
156 This happens when space is needed for new compressed
157 tagsfiles. We delete the evictee from the cache entirely.
158 """
159 delete_tagsfile(compressed_tagsfile_path)
00 from pygments import highlight
1 from pygments.lexers import get_lexer_for_filename, \
1 from pygments.lexers import get_lexer_by_name, get_lexer_for_filename, \
22 guess_lexer, ClassNotFound, TextLexer
33 from pygments.formatters import HtmlFormatter
44
55 from klaus import markup
66
77
8 CTAGS_SUPPORTED_LANGUAGES = (
9 "Asm Awk Basic C C# C++ Cobol DosBatch Eiffel Erlang Fortran HTML Java "
10 "JavaScript Lisp Lua Make Makefile MatLab OCaml PHP Pascal Perl Python "
11 "REXX Ruby SML SQL Scheme Sh Tcl Tex VHDL Verilog Vim"
12 # Not supported by Pygments: Asp Ant BETA Flex SLang Vera YACC
13 ).split()
14 PYGMENTS_CTAGS_LANGUAGE_MAP = dict((get_lexer_by_name(l).name, l) for l in CTAGS_SUPPORTED_LANGUAGES)
15
16
817 class KlausDefaultFormatter(HtmlFormatter):
9 def __init__(self, **kwargs):
18 def __init__(self, language, ctags, **kwargs):
1019 HtmlFormatter.__init__(self, linenos='table', lineanchors='L',
1120 linespans='L', anchorlinenos=True, **kwargs)
21 self.language = language
22 if ctags:
23 # Use Pygments' ctags system but provide our own CTags instance
24 self.tagsfile = True # some trueish object
25 self._ctags = ctags
1226
1327 def _format_lines(self, tokensource):
1428 for tag, line in HtmlFormatter._format_lines(self, tokensource):
1731 line = '<span class=line>%s</span>' % line
1832 yield tag, line
1933
34 def _lookup_ctag(self, token):
35 matches = list(self._get_all_ctags_matches(token))
36 best_matches = self.get_best_ctags_matches(matches)
37 if not best_matches:
38 return None, None
39 else:
40 return best_matches[0]['file'], best_matches[0]['lineNumber']
2041
21 def pygmentize(code, filename, render_markup):
42 def _get_all_ctags_matches(self, token):
43 FIELDS = ('file', 'lineNumber', 'kind', 'language')
44 from ctags import TagEntry
45 entry = TagEntry() # target "buffer" for ctags
46 if self._ctags.find(entry, token, 0):
47 yield dict((k, entry[k]) for k in FIELDS)
48 while self._ctags.findNext(entry):
49 yield dict((k, entry[k]) for k in FIELDS)
50
51 def get_best_ctags_matches(self, matches):
52 if self.language is None:
53 return matches
54 else:
55 return filter(lambda match: match['language'] == self.language, matches)
56
57
58 class KlausPythonFormatter(KlausDefaultFormatter):
59 def get_best_ctags_matches(self, matches):
60 # The first ctags match may be an import, which ctags sees as a
61 # definition of the tag -- even though it might very well have found
62 # the "real" definition of the tag. Import matches aren't very helpful:
63 # In the best case, we are brought to the line where the tag is imported
64 # in the same file. But it may also bring us to some completely unrelated
65 # import of the tag in some other file. We change the tag lookup mechanics
66 # so that non-import matches are always preferred over import matches.
67 return filter(
68 lambda match: match['kind'] != 'i',
69 super(KlausPythonFormatter, self).get_best_ctags_matches(matches)
70 )
71
72
73 def pygmentize(code, filename, render_markup, ctags=None, ctags_baseurl=None):
2274 """Render code using Pygments, markup (markdown, rst, ...) using the
2375 corresponding renderer, if available.
2476
2577 :param code: the program code to highlight, str
2678 :param filename: name of the source file the code is taken from, str
2779 :param render_markup: whether to render markup if possible, bool
80 :param ctags: tagsfile obj used for source code hyperlinks, ``ctags.CTags``
81 :param ctags_baseurl: base url used for source code hyperlinks, str
2882 """
2983 if render_markup and markup.can_render(filename):
3084 return markup.render(filename, code)
3791 except ClassNotFound:
3892 lexer = TextLexer()
3993
40 return highlight(code, lexer, KlausFormatter())
94 formatter_cls = {
95 'Python': KlausPythonFormatter,
96 }.get(lexer.name, KlausDefaultFormatter)
97 if ctags:
98 ctags_urlscheme = ctags_baseurl + "%(path)s%(fname)s%(fext)s"
99 else:
100 ctags_urlscheme = None
101 formatter = formatter_cls(
102 language=PYGMENTS_CTAGS_LANGUAGE_MAP.get(lexer.name),
103 ctags=ctags,
104 tagurlformat=ctags_urlscheme,
105 )
106
107 return highlight(code, lexer, formatter)
9090 def get_tag_names(self):
9191 """Return a list of tag names of this repo, ordered by creation time."""
9292 return self.get_ref_names_ordered_by_last_commit('refs/tags')
93
94 def get_tag_and_branch_shas(self):
95 """Return a list of SHAs of all tags and branches."""
96 tag_shas = self.refs.as_dict('refs/tags/').values()
97 branch_shas = self.refs.as_dict('refs/heads/').values()
98 return tag_shas + branch_shas
9399
94100 def history(self, commit, path=None, max_commits=None, skip=0):
95101 """Return a list of all commits that affected `path`, starting at branch
187187 .blobview table, .blameview table { border: 1px solid #e0e0e0; }
188188 .blobview .code, .blameview .code { padding: 0; width: 100%; }
189189 .blobview .code .line, .blameview .code .line { padding: 0 5px 0 10px; }
190 .blobview .code a, .blameview .code a { color: inherit; }
190191 .blobview .linenos, .blameview .linenos { border: 1px solid #e0e0e0; padding: 0; }
191192
192193
66 from werkzeug.exceptions import NotFound
77
88 from dulwich.objects import Blob
9
10 try:
11 import ctags
12 from klaus import ctagscache
13 except ImportError:
14 ctags = None
915
1016 from klaus import markup, tarutils
1117 from klaus.highlighting import pygmentize
1218 from klaus.utils import parent_directory, subpaths, force_unicode, guess_is_binary, \
1319 guess_is_image, replace_dupes
20
21
22 if ctags:
23 CTAGS_CACHE = ctagscache.CTagsCache()
1424
1525
1626 def repo_list():
176186 class BaseFileView(TreeViewMixin, BaseBlobView):
177187 """Base for FileView and BlameView."""
178188 def render_code(self, render_markup):
189 should_use_ctags = current_app.should_use_ctags(self.context['repo'],
190 self.context['commit'])
191 if should_use_ctags:
192 ctags_base_url = url_for(
193 self.view_name,
194 repo=self.context['repo'].name,
195 rev=self.context['rev'],
196 path=''
197 )
198 ctags_tagsfile = CTAGS_CACHE.get_tagsfile(
199 self.context['repo'].path,
200 self.context['commit'].id
201 )
202 ctags_args = {
203 'ctags': ctags.CTags(ctags_tagsfile),
204 'ctags_baseurl': ctags_base_url,
205 }
206 else:
207 ctags_args = {}
208
179209 return pygmentize(
180210 force_unicode(self.context['blob_or_tree'].data),
181211 self.context['filename'],
182212 render_markup,
213 **ctags_args
183214 )
184215
185216 def make_template_context(self, *args):