James Moger
2012-09-10 fabe060d3a435f116128851f828e35c2af5fde67
commit | author | age
e31da0 1 /*
JM 2  * Copyright 2012 gitblit.com.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.gitblit;
17
d896e6 18 import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0 19
d896e6 20 import java.io.ByteArrayOutputStream;
JM 21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
905d31 24 import java.lang.reflect.Method;
d896e6 25 import java.text.MessageFormat;
JM 26 import java.text.ParseException;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Comparator;
30 import java.util.HashMap;
31 import java.util.LinkedHashSet;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Set;
35 import java.util.TreeMap;
36 import java.util.TreeSet;
37 import java.util.concurrent.ConcurrentHashMap;
38
39 import org.apache.lucene.analysis.Analyzer;
40 import org.apache.lucene.analysis.standard.StandardAnalyzer;
41 import org.apache.lucene.document.DateTools;
42 import org.apache.lucene.document.DateTools.Resolution;
43 import org.apache.lucene.document.Document;
44 import org.apache.lucene.document.Field;
45 import org.apache.lucene.document.Field.Index;
46 import org.apache.lucene.document.Field.Store;
47 import org.apache.lucene.index.IndexReader;
48 import org.apache.lucene.index.IndexWriter;
49 import org.apache.lucene.index.IndexWriterConfig;
50 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
51 import org.apache.lucene.index.MultiReader;
52 import org.apache.lucene.index.Term;
53 import org.apache.lucene.queryParser.QueryParser;
54 import org.apache.lucene.search.BooleanClause.Occur;
55 import org.apache.lucene.search.BooleanQuery;
56 import org.apache.lucene.search.IndexSearcher;
57 import org.apache.lucene.search.Query;
58 import org.apache.lucene.search.ScoreDoc;
59 import org.apache.lucene.search.TermQuery;
60 import org.apache.lucene.search.TopScoreDocCollector;
61 import org.apache.lucene.search.highlight.Fragmenter;
62 import org.apache.lucene.search.highlight.Highlighter;
63 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
64 import org.apache.lucene.search.highlight.QueryScorer;
65 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
66 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
67 import org.apache.lucene.store.Directory;
68 import org.apache.lucene.store.FSDirectory;
69 import org.apache.lucene.util.Version;
70 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
71 import org.eclipse.jgit.lib.Constants;
a02998 72 import org.eclipse.jgit.lib.FileMode;
d896e6 73 import org.eclipse.jgit.lib.ObjectId;
JM 74 import org.eclipse.jgit.lib.ObjectLoader;
75 import org.eclipse.jgit.lib.ObjectReader;
e31da0 76 import org.eclipse.jgit.lib.Repository;
6ef2fc 77 import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6 78 import org.eclipse.jgit.revwalk.RevCommit;
JM 79 import org.eclipse.jgit.revwalk.RevTree;
80 import org.eclipse.jgit.revwalk.RevWalk;
81 import org.eclipse.jgit.storage.file.FileBasedConfig;
82 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
83 import org.eclipse.jgit.treewalk.TreeWalk;
84 import org.eclipse.jgit.util.FS;
e31da0 85 import org.slf4j.Logger;
JM 86 import org.slf4j.LoggerFactory;
87
d896e6 88 import com.gitblit.Constants.SearchObjectType;
JM 89 import com.gitblit.models.IssueModel;
90 import com.gitblit.models.IssueModel.Attachment;
91 import com.gitblit.models.PathModel.PathChangeModel;
92 import com.gitblit.models.RefModel;
40ca5c 93 import com.gitblit.models.RepositoryModel;
d896e6 94 import com.gitblit.models.SearchResult;
JM 95 import com.gitblit.utils.ArrayUtils;
96 import com.gitblit.utils.IssueUtils;
e31da0 97 import com.gitblit.utils.JGitUtils;
d896e6 98 import com.gitblit.utils.StringUtils;
e31da0 99
JM 100 /**
d896e6 101  * The Lucene executor handles indexing and searching repositories.
e31da0 102  * 
JM 103  * @author James Moger
104  * 
105  */
106 public class LuceneExecutor implements Runnable {
d896e6 107     
JM 108         
ae8366 109     private static final int INDEX_VERSION = 5;
e31da0 110
d896e6 111     private static final String FIELD_OBJECT_TYPE = "type";
JM 112     private static final String FIELD_ISSUE = "issue";
113     private static final String FIELD_PATH = "path";
114     private static final String FIELD_COMMIT = "commit";
115     private static final String FIELD_BRANCH = "branch";
116     private static final String FIELD_SUMMARY = "summary";
117     private static final String FIELD_CONTENT = "content";
118     private static final String FIELD_AUTHOR = "author";
119     private static final String FIELD_COMMITTER = "committer";
120     private static final String FIELD_DATE = "date";
121     private static final String FIELD_TAG = "tag";
122     private static final String FIELD_LABEL = "label";
123     private static final String FIELD_ATTACHMENT = "attachment";
124
125     private static final String CONF_FILE = "lucene.conf";
126     private static final String LUCENE_DIR = "lucene";
127     private static final String CONF_INDEX = "index";
128     private static final String CONF_VERSION = "version";
129     private static final String CONF_ALIAS = "aliases";
130     private static final String CONF_BRANCH = "branches";
131         
132     private static final Version LUCENE_VERSION = Version.LUCENE_35;
133     
e31da0 134     private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
d896e6 135     
JM 136     private final IStoredSettings storedSettings;
137     private final File repositoriesFolder;
138     
139     private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
140     private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
141     
f1d2ad 142     private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM 143     private Set<String> excludedExtensions;
144     
d896e6 145     public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
JM 146         this.storedSettings = settings;
147         this.repositoriesFolder = repositoriesFolder;
462488 148         String exts = luceneIgnoreExtensions;
JM 149         if (settings != null) {
150             exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
151         }
152         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0 153     }
JM 154
155     /**
273cb9 156      * Run is executed by the Gitblit executor service.  Because this is called 
JM 157      * by an executor service, calls will queue - i.e. there can never be
158      * concurrent execution of repository index updates.
e31da0 159      */
JM 160     @Override
161     public void run() {
7db092 162         if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM 163             // Lucene indexing is disabled
164             return;
165         }
f1d2ad 166         // reload the excluded extensions
JM 167         String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
168         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
169
40ca5c 170         for (String repositoryName: GitBlit.self().getRepositoryList()) {
JM 171             RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
172             if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
173                 Repository repository = GitBlit.self().getRepository(model.name);
174                 index(model, repository);                
175                 repository.close();
176                 System.gc();
e31da0 177             }
JM 178         }
179     }
180
181     /**
182      * Synchronously indexes a repository. This may build a complete index of a
183      * repository or it may update an existing index.
184      * 
3d0494 185      * @param name
e31da0 186      *            the name of the repository
JM 187      * @param repository
188      *            the repository object
189      */
9f6ef3 190     private void index(RepositoryModel model, Repository repository) {
e31da0 191         try {
40ca5c 192             if (shouldReindex(repository)) {
JM 193                 // (re)build the entire index
194                 IndexResult result = reindex(model, repository);
195
196                 if (result.success) {
197                     if (result.commitCount > 0) {
198                         String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
199                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
200                                 result.blobCount, result.branchCount, result.duration()));
e31da0 201                     }
JM 202                 } else {
40ca5c 203                     String msg = "Could not build {0} Lucene index!";
JM 204                     logger.error(MessageFormat.format(msg, model.name));
e31da0 205                 }
JM 206             } else {
40ca5c 207                 // update the index with latest commits
JM 208                 IndexResult result = updateIndex(model, repository);
209                 if (result.success) {
210                     if (result.commitCount > 0) {
211                         String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
212                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
213                                 result.blobCount, result.branchCount, result.duration()));
214                     }
215                 } else {
216                     String msg = "Could not update {0} Lucene index!";
217                     logger.error(MessageFormat.format(msg, model.name));
218                 }
e31da0 219             }
JM 220         } catch (Throwable t) {
40ca5c 221             logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0 222         }
JM 223     }
e6637c 224     
JM 225     /**
226      * Close the writer/searcher objects for a repository.
227      * 
228      * @param repositoryName
229      */
8e9988 230     public synchronized void close(String repositoryName) {
JM 231         try {
232             IndexSearcher searcher = searchers.remove(repositoryName);
233             if (searcher != null) {
234                 searcher.getIndexReader().close();
235             }
236         } catch (Exception e) {
237             logger.error("Failed to close index searcher for " + repositoryName, e);
238         }
239         
e6637c 240         try {
JM 241             IndexWriter writer = writers.remove(repositoryName);
242             if (writer != null) {
243                 writer.close();
244             }
245         } catch (Exception e) {
246             logger.error("Failed to close index writer for " + repositoryName, e);
8e9988 247         }        
e6637c 248     }
b938ae 249
JM 250     /**
251      * Close all Lucene indexers.
252      * 
253      */
8e9988 254     public synchronized void close() {
d896e6 255         // close all writers
JM 256         for (String writer : writers.keySet()) {
257             try {
258                 writers.get(writer).close(true);
259             } catch (Throwable t) {
260                 logger.error("Failed to close Lucene writer for " + writer, t);
261             }
262         }
263         writers.clear();
264
265         // close all searchers
266         for (String searcher : searchers.keySet()) {
267             try {
8e9988 268                 searchers.get(searcher).getIndexReader().close();
d896e6 269             } catch (Throwable t) {
JM 270                 logger.error("Failed to close Lucene searcher for " + searcher, t);
271             }
272         }
273         searchers.clear();
274     }
275
276     
277     /**
278      * Deletes the Lucene index for the specified repository.
279      * 
280      * @param repositoryName
281      * @return true, if successful
282      */
283     public boolean deleteIndex(String repositoryName) {
284         try {
8e9988 285             // close any open writer/searcher
JM 286             close(repositoryName);
287
d896e6 288             // delete the index folder
eb741a 289             File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
d896e6 290             File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
JM 291             if (luceneIndex.exists()) {
292                 org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
293                         org.eclipse.jgit.util.FileUtils.RECURSIVE);
294             }
295             // delete the config file
296             File luceneConfig = new File(repositoryFolder, CONF_FILE);
297             if (luceneConfig.exists()) {
298                 luceneConfig.delete();
299             }
300             return true;
301         } catch (IOException e) {
302             throw new RuntimeException(e);
303         }
304     }
305     
306     /**
307      * Returns the author for the commit, if this information is available.
308      * 
309      * @param commit
310      * @return an author or unknown
311      */
312     private String getAuthor(RevCommit commit) {
313         String name = "unknown";
314         try {
315             name = commit.getAuthorIdent().getName();
316             if (StringUtils.isEmpty(name)) {
317                 name = commit.getAuthorIdent().getEmailAddress();
318             }
319         } catch (NullPointerException n) {                        
320         }
321         return name;
322     }
323     
324     /**
325      * Returns the committer for the commit, if this information is available.
326      * 
327      * @param commit
328      * @return an committer or unknown
329      */
330     private String getCommitter(RevCommit commit) {
331         String name = "unknown";
332         try {
333             name = commit.getCommitterIdent().getName();
334             if (StringUtils.isEmpty(name)) {
335                 name = commit.getCommitterIdent().getEmailAddress();
336             }
337         } catch (NullPointerException n) {                        
338         }
339         return name;
340     }
905d31 341     
JM 342     /**
343      * Get the tree associated with the given commit.
344      *
345      * @param walk
346      * @param commit
347      * @return tree
348      * @throws IOException
349      */
9f6ef3 350     private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31 351             throws IOException {
JM 352         final RevTree tree = commit.getTree();
353         if (tree != null) {
354             return tree;
355         }
356         walk.parseHeaders(commit);
357         return commit.getTree();
358     }
d896e6 359
JM 360     /**
361      * Construct a keyname from the branch.
362      * 
363      * @param branchName
364      * @return a keyname appropriate for the Git config file format
365      */
366     private String getBranchKey(String branchName) {
367         return StringUtils.getSHA1(branchName);
368     }
369
370     /**
371      * Returns the Lucene configuration for the specified repository.
372      * 
373      * @param repository
374      * @return a config object
375      */
376     private FileBasedConfig getConfig(Repository repository) {
377         File file = new File(repository.getDirectory(), CONF_FILE);
378         FileBasedConfig config = new FileBasedConfig(file, FS.detect());
379         return config;
380     }
381
382     /**
383      * Reads the Lucene config file for the repository to check the index
384      * version. If the index version is different, then rebuild the repository
385      * index.
386      * 
387      * @param repository
388      * @return true of the on-disk index format is different than INDEX_VERSION
389      */
9f6ef3 390     private boolean shouldReindex(Repository repository) {
d896e6 391         try {
JM 392             FileBasedConfig config = getConfig(repository);
393             config.load();
394             int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
395             // reindex if versions do not match
396             return indexVersion != INDEX_VERSION;
397         } catch (Throwable t) {
398         }
399         return true;
400     }
401
402
403     /**
404      * This completely indexes the repository and will destroy any existing
405      * index.
406      * 
407      * @param repositoryName
408      * @param repository
409      * @return IndexResult
410      */
40ca5c 411     public IndexResult reindex(RepositoryModel model, Repository repository) {
8e9988 412         IndexResult result = new IndexResult();        
40ca5c 413         if (!deleteIndex(model.name)) {
d896e6 414             return result;
JM 415         }
fa0afc 416         try {
JM 417             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 418             FileBasedConfig config = getConfig(repository);
JM 419             Set<String> indexedCommits = new TreeSet<String>();
40ca5c 420             IndexWriter writer = getIndexWriter(model.name);
d896e6 421             // build a quick lookup of tags
JM 422             Map<String, List<String>> tags = new HashMap<String, List<String>>();
423             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
424                 if (!tag.isAnnotatedTag()) {
425                     // skip non-annotated tags
426                     continue;
427                 }
428                 if (!tags.containsKey(tag.getObjectId())) {
429                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
430                 }
431                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
432             }
433             
434             ObjectReader reader = repository.newObjectReader();
435
436             // get the local branches
437             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
438             
439             // sort them by most recently updated
440             Collections.sort(branches, new Comparator<RefModel>() {
441                 @Override
442                 public int compare(RefModel ref1, RefModel ref2) {
443                     return ref2.getDate().compareTo(ref1.getDate());
444                 }
445             });
446             
447             // reorder default branch to first position
448             RefModel defaultBranch = null;
449             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
450             for (RefModel branch :  branches) {
451                 if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0 452                     defaultBranch = branch;
d896e6 453                     break;
JM 454                 }
455             }
456             branches.remove(defaultBranch);
457             branches.add(0, defaultBranch);
458             
459             // walk through each branch
460             for (RefModel branch : branches) {
40ca5c 461
1aabf0 462                 boolean indexBranch = false;
JM 463                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
464                         && branch.equals(defaultBranch)) {
465                     // indexing "default" branch
466                     indexBranch = true;
467                 } else if (IssueUtils.GB_ISSUES.equals(branch)) {
468                     // skip the GB_ISSUES branch because it is indexed later
469                     // note: this is different than updateIndex
470                     indexBranch = false;
471                 } else {
472                     // normal explicit branch check
473                     indexBranch = model.indexedBranches.contains(branch.getName());
474                 }
475                 
40ca5c 476                 // if this branch is not specifically indexed then skip
1aabf0 477                 if (!indexBranch) {
d896e6 478                     continue;
JM 479                 }
480
481                 String branchName = branch.getName();
482                 RevWalk revWalk = new RevWalk(reader);
483                 RevCommit tip = revWalk.parseCommit(branch.getObjectId());
484                 String tipId = tip.getId().getName();
485
486                 String keyName = getBranchKey(branchName);
487                 config.setString(CONF_ALIAS, null, keyName, branchName);
488                 config.setString(CONF_BRANCH, null, keyName, tipId);
489
490                 // index the blob contents of the tree
491                 TreeWalk treeWalk = new TreeWalk(repository);
492                 treeWalk.addTree(tip.getTree());
493                 treeWalk.setRecursive(true);                                
494                 
495                 Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
496                 while (treeWalk.next()) {
749110 497                     // ensure path is not in a submodule
a02998 498                     if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
PA 499                         paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
500                     }
d896e6 501                 }                
JM 502
503                 ByteArrayOutputStream os = new ByteArrayOutputStream();
504                 byte[] tmp = new byte[32767];
505
506                 RevWalk commitWalk = new RevWalk(reader);
507                 commitWalk.markStart(tip);
508                 
509                 RevCommit commit;
510                 while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
511                     TreeWalk diffWalk = new TreeWalk(reader);
512                     int parentCount = commit.getParentCount();
513                     switch (parentCount) {
514                     case 0:
515                         diffWalk.addTree(new EmptyTreeIterator());
516                         break;
517                     case 1:
518                         diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
519                         break;
520                     default:
521                         // skip merge commits
522                         continue;
523                     }
524                     diffWalk.addTree(getTree(commitWalk, commit));
525                     diffWalk.setFilter(ANY_DIFF);
526                     diffWalk.setRecursive(true);
527                     while ((paths.size() > 0) && diffWalk.next()) {
528                         String path = diffWalk.getPathString();
529                         if (!paths.containsKey(path)) {
530                             continue;
531                         }
532                         
533                         // remove path from set
534                         ObjectId blobId = paths.remove(path);
535                         result.blobCount++;
536                         
537                         // index the blob metadata
538                         String blobAuthor = getAuthor(commit);
539                         String blobCommitter = getCommitter(commit);
540                         String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
541                                 Resolution.MINUTE);
542                         
543                         Document doc = new Document();
544                         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
545                         doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
546                         doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
547                         doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
548                         doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
549                         doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
550                         doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));                    
551
552                         // determine extension to compare to the extension
553                         // blacklist
554                         String ext = null;
555                         String name = path.toLowerCase();
556                         if (name.indexOf('.') > -1) {
557                             ext = name.substring(name.lastIndexOf('.') + 1);
558                         }
559
560                         // index the blob content
561                         if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {                            
562                             ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
462488 563                             InputStream in = ldr.openStream();                        
d896e6 564                             int n;
JM 565                             while ((n = in.read(tmp)) > 0) {
566                                 os.write(tmp, 0, n);
567                             }
568                             in.close();
569                             byte[] content = os.toByteArray();
fa0afc 570                             String str = StringUtils.decodeString(content, encodings);                            
d896e6 571                             doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
JM 572                             os.reset();
573                         }                            
574                         
575                         // add the blob to the index
576                         writer.addDocument(doc);
577                     }
578                 }
579
580                 os.close();
581
582                 // index the tip commit object
583                 if (indexedCommits.add(tipId)) {
584                     Document doc = createDocument(tip, tags.get(tipId));
585                     doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
586                     writer.addDocument(doc);
587                     result.commitCount += 1;
588                     result.branchCount += 1;
589                 }
590
591                 // traverse the log and index the previous commit objects
592                 RevWalk historyWalk = new RevWalk(reader);
593                 historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
594                 RevCommit rev;
595                 while ((rev = historyWalk.next()) != null) {
596                     String hash = rev.getId().getName();
597                     if (indexedCommits.add(hash)) {
598                         Document doc = createDocument(rev, tags.get(hash));
599                         doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
600                         writer.addDocument(doc);
601                         result.commitCount += 1;
602                     }
603                 }
604             }
605
606             // finished
607             reader.release();
608             
609             // this repository has a gb-issues branch, index all issues
610             if (IssueUtils.getIssuesBranch(repository) != null) {
611                 List<IssueModel> issues = IssueUtils.getIssues(repository, null);
612                 if (issues.size() > 0) {
613                     result.branchCount += 1;
614                 }
615                 for (IssueModel issue : issues) {
616                     result.issueCount++;
617                     Document doc = createDocument(issue);
618                     writer.addDocument(doc);
619                 }
620             }
621
622             // commit all changes and reset the searcher
623             config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
624             config.save();
625             writer.commit();
8e9988 626             resetIndexSearcher(model.name);
d896e6 627             result.success();
JM 628         } catch (Exception e) {
40ca5c 629             logger.error("Exception while reindexing " + model.name, e);
d896e6 630         }
JM 631         return result;
632     }
633     
634     /**
635      * Incrementally update the index with the specified commit for the
636      * repository.
637      * 
638      * @param repositoryName
639      * @param repository
640      * @param branch
641      *            the fully qualified branch name (e.g. refs/heads/master)
642      * @param commit
643      * @return true, if successful
644      */
645     private IndexResult index(String repositoryName, Repository repository, 
646             String branch, RevCommit commit) {
647         IndexResult result = new IndexResult();
648         try {
ae9e15 649             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 650             List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
JM 651             String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
652                     Resolution.MINUTE);
653             IndexWriter writer = getIndexWriter(repositoryName);
654             for (PathChangeModel path : changedPaths) {
88fb67 655                 if (path.isSubmodule()) {
JM 656                     continue;
657                 }
d896e6 658                 // delete the indexed blob
856091 659                 deleteBlob(repositoryName, branch, path.name);
d896e6 660
JM 661                 // re-index the blob
662                 if (!ChangeType.DELETE.equals(path.changeType)) {
663                     result.blobCount++;
664                     Document doc = new Document();
665                     doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
666                             Index.NOT_ANALYZED));
667                     doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
668                     doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
669                     doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
670                     doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
671                     doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
672                     doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
673
674                     // determine extension to compare to the extension
675                     // blacklist
676                     String ext = null;
677                     String name = path.name.toLowerCase();
678                     if (name.indexOf('.') > -1) {
679                         ext = name.substring(name.lastIndexOf('.') + 1);
680                     }
681
682                     if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
683                         // read the blob content
684                         String str = JGitUtils.getStringContent(repository, commit.getTree(),
ae9e15 685                                 path.path, encodings);
749110 686                         if (str != null) {
JM 687                             doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
688                             writer.addDocument(doc);
689                         }
d896e6 690                     }
JM 691                 }
692             }
693             writer.commit();
261024 694             
JM 695             // get any annotated commit tags
696             List<String> commitTags = new ArrayList<String>();
33ceba 697             for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024 698                 if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM 699                     commitTags.add(ref.displayName);
700                 }
701             }
702             
703             // create and write the Lucene document
704             Document doc = createDocument(commit, commitTags);
cdbbda 705             doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
d896e6 706             result.commitCount++;
JM 707             result.success = index(repositoryName, doc);
708         } catch (Exception e) {
709             logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
710         }
711         return result;
712     }
713
714     /**
715      * Incrementally update the index with the specified issue for the
716      * repository.
717      * 
718      * @param repositoryName
719      * @param issue
720      * @return true, if successful
721      */
722     public boolean index(String repositoryName, IssueModel issue) {
723         try {
724             // delete the old issue from the index, if exists
725             deleteIssue(repositoryName, issue.id);
726             Document doc = createDocument(issue);
727             return index(repositoryName, doc);
728         } catch (Exception e) {
729             logger.error(MessageFormat.format("Error while indexing issue {0} in {1}", issue.id, repositoryName), e);
730         }
731         return false;
732     }
733     
734     /**
735      * Delete an issue from the repository index.
736      * 
737      * @param repositoryName
738      * @param issueId
739      * @throws Exception
87ee94 740      * @return true, if deleted, false if no record was deleted
d896e6 741      */
87ee94 742     private boolean deleteIssue(String repositoryName, String issueId) throws Exception {
d896e6 743         BooleanQuery query = new BooleanQuery();
JM 744         Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
745         query.add(new TermQuery(objectTerm), Occur.MUST);
746         Term issueidTerm = new Term(FIELD_ISSUE, issueId);
747         query.add(new TermQuery(issueidTerm), Occur.MUST);
748         
749         IndexWriter writer = getIndexWriter(repositoryName);
87ee94 750         int numDocsBefore = writer.numDocs();
d896e6 751         writer.deleteDocuments(query);
JM 752         writer.commit();
87ee94 753         int numDocsAfter = writer.numDocs();
JM 754         if (numDocsBefore == numDocsAfter) {
755             logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
756             return false;
757         } else {
758             logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
759             return true;
760         }
d896e6 761     }
JM 762     
763     /**
764      * Delete a blob from the specified branch of the repository index.
765      * 
766      * @param repositoryName
767      * @param branch
768      * @param path
769      * @throws Exception
87ee94 770      * @return true, if deleted, false if no record was deleted
d896e6 771      */
87ee94 772     public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
JM 773         String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
774         String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
d896e6 775         
87ee94 776         BooleanQuery query = new BooleanQuery();
JM 777         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
778         QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
779         query.add(qp.parse(q), Occur.MUST);
780
d896e6 781         IndexWriter writer = getIndexWriter(repositoryName);
87ee94 782         int numDocsBefore = writer.numDocs();
JM 783         writer.deleteDocuments(query);        
d896e6 784         writer.commit();
87ee94 785         int numDocsAfter = writer.numDocs();
JM 786         if (numDocsBefore == numDocsAfter) {
787             logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
788             return false;
789         } else {
790             logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
791             return true;
792         }
d896e6 793     }
JM 794
795     /**
796      * Updates a repository index incrementally from the last indexed commits.
797      * 
40ca5c 798      * @param model
d896e6 799      * @param repository
JM 800      * @return IndexResult
801      */
9f6ef3 802     private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6 803         IndexResult result = new IndexResult();
JM 804         try {
805             FileBasedConfig config = getConfig(repository);
806             config.load();
807
808             // build a quick lookup of annotated tags
809             Map<String, List<String>> tags = new HashMap<String, List<String>>();
810             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
811                 if (!tag.isAnnotatedTag()) {
812                     // skip non-annotated tags
813                     continue;
814                 }
815                 if (!tags.containsKey(tag.getObjectId())) {
816                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
817                 }
818                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
819             }
820
821             // detect branch deletion
822             // first assume all branches are deleted and then remove each
823             // existing branch from deletedBranches during indexing
824             Set<String> deletedBranches = new TreeSet<String>();
825             for (String alias : config.getNames(CONF_ALIAS)) {
826                 String branch = config.getString(CONF_ALIAS, null, alias);
827                 deletedBranches.add(branch);
828             }
829
1aabf0 830             // get the local branches
d896e6 831             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
1aabf0 832             
JM 833             // sort them by most recently updated
834             Collections.sort(branches, new Comparator<RefModel>() {
835                 @Override
836                 public int compare(RefModel ref1, RefModel ref2) {
837                     return ref2.getDate().compareTo(ref1.getDate());
838                 }
839             });
840                         
841             // reorder default branch to first position
842             RefModel defaultBranch = null;
843             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
844             for (RefModel branch :  branches) {
845                 if (branch.getObjectId().equals(defaultBranchId)) {
846                     defaultBranch = branch;
847                     break;
848                 }
849             }
850             branches.remove(defaultBranch);
851             branches.add(0, defaultBranch);
852             
853             // walk through each branches
d896e6 854             for (RefModel branch : branches) {
JM 855                 String branchName = branch.getName();
856
1aabf0 857                 boolean indexBranch = false;
JM 858                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
859                         && branch.equals(defaultBranch)) {
860                     // indexing "default" branch
861                     indexBranch = true;
862                 } else if (IssueUtils.GB_ISSUES.equals(branch)) {
863                     // update issues modified on the GB_ISSUES branch
864                     // note: this is different than reindex
865                     indexBranch = true;
866                 } else {
867                     // normal explicit branch check
868                     indexBranch = model.indexedBranches.contains(branch.getName());
869                 }
870                 
871                 // if this branch is not specifically indexed then skip
872                 if (!indexBranch) {
40ca5c 873                     continue;
JM 874                 }
875                 
d896e6 876                 // remove this branch from the deletedBranches set
JM 877                 deletedBranches.remove(branchName);
1aabf0 878                 
d896e6 879                 // determine last commit
JM 880                 String keyName = getBranchKey(branchName);
881                 String lastCommit = config.getString(CONF_BRANCH, null, keyName);
882
883                 List<RevCommit> revs;
884                 if (StringUtils.isEmpty(lastCommit)) {
885                     // new branch/unindexed branch, get all commits on branch
886                     revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
887                 } else {
888                     // pre-existing branch, get changes since last commit
889                     revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
890                 }
891
892                 if (revs.size() > 0) {
893                     result.branchCount += 1;
894                 }
895                 
40ca5c 896                 // track the issue ids that we have already indexed
JM 897                 Set<String> indexedIssues = new TreeSet<String>();
898                 
d896e6 899                 // reverse the list of commits so we start with the first commit                
JM 900                 Collections.reverse(revs);
40ca5c 901                 for (RevCommit commit : revs) {                    
JM 902                     if (IssueUtils.GB_ISSUES.equals(branch)) {
903                         // only index an issue once during updateIndex
904                         String issueId = commit.getShortMessage().substring(2).trim();
905                         if (indexedIssues.contains(issueId)) {
906                             continue;
907                         }
908                         indexedIssues.add(issueId);
909                         
910                         IssueModel issue = IssueUtils.getIssue(repository, issueId);
911                         if (issue == null) {
912                             // issue was deleted, remove from index
87ee94 913                             if (!deleteIssue(model.name, issueId)) {
JM 914                                 logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId));
915                             }
40ca5c 916                         } else {
JM 917                             // issue was updated
918                             index(model.name, issue);
919                             result.issueCount++;
920                         }
921                     } else {
922                         // index a commit
923                         result.add(index(model.name, repository, branchName, commit));
924                     }
d896e6 925                 }
JM 926
927                 // update the config
928                 config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
929                 config.setString(CONF_ALIAS, null, keyName, branchName);
930                 config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
931                 config.save();
932             }
933
934             // the deletedBranches set will normally be empty by this point
935             // unless a branch really was deleted and no longer exists
936             if (deletedBranches.size() > 0) {
937                 for (String branch : deletedBranches) {
40ca5c 938                     IndexWriter writer = getIndexWriter(model.name);
d896e6 939                     writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM 940                     writer.commit();
941                 }
942             }
943             result.success = true;
944         } catch (Throwable t) {
40ca5c 945             logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6 946         }
JM 947         return result;
948     }
905d31 949     
d896e6 950     /**
JM 951      * Creates a Lucene document from an issue.
952      * 
953      * @param issue
954      * @return a Lucene document
955      */
956     private Document createDocument(IssueModel issue) {
957         Document doc = new Document();
958         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.issue.name(), Store.YES,
959                 Field.Index.NOT_ANALYZED));
960         doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
961         doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
962         doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
963                 Store.YES, Field.Index.NO));
964         doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
965         List<String> attachments = new ArrayList<String>();
966         for (Attachment attachment : issue.getAttachments()) {
967             attachments.add(attachment.name.toLowerCase());
968         }
969         doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
970                 Index.ANALYZED));
971         doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
972         doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));
973         doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
974                 Index.ANALYZED));
975         return doc;
976     }
977
978     /**
979      * Creates a Lucene document for a commit
980      * 
981      * @param commit
982      * @param tags
983      * @return a Lucene document
984      */
985     private Document createDocument(RevCommit commit, List<String> tags) {
986         Document doc = new Document();
987         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,
988                 Index.NOT_ANALYZED));
989         doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
990         doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
991                 Resolution.MINUTE), Store.YES, Index.NO));
992         doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
993         doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
994         doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
995         doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
996         if (!ArrayUtils.isEmpty(tags)) {
997             doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
998         }
999         return doc;
1000     }
1001
1002     /**
1003      * Incrementally index an object for the repository.
1004      * 
1005      * @param repositoryName
1006      * @param doc
1007      * @return true, if successful
1008      */
1009     private boolean index(String repositoryName, Document doc) {
1010         try {            
1011             IndexWriter writer = getIndexWriter(repositoryName);
1012             writer.addDocument(doc);
1013             writer.commit();
8e9988 1014             resetIndexSearcher(repositoryName);
d896e6 1015             return true;
JM 1016         } catch (Exception e) {
1017             logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
1018         }
1019         return false;
1020     }
1021
d04009 1022     private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6 1023         SearchResult result = new SearchResult();
d04009 1024         result.hitId = hitId;
JM 1025         result.totalHits = totalHits;
d896e6 1026         result.score = score;
JM 1027         result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
1028         result.summary = doc.get(FIELD_SUMMARY);        
1029         result.author = doc.get(FIELD_AUTHOR);
1030         result.committer = doc.get(FIELD_COMMITTER);
1031         result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
1032         result.branch = doc.get(FIELD_BRANCH);
1033         result.commitId = doc.get(FIELD_COMMIT);
1034         result.issueId = doc.get(FIELD_ISSUE);
1035         result.path = doc.get(FIELD_PATH);
1036         if (doc.get(FIELD_TAG) != null) {
1037             result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
1038         }
1039         if (doc.get(FIELD_LABEL) != null) {
1040             result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
1041         }
1042         return result;
1043     }
1044
1045     private synchronized void resetIndexSearcher(String repository) throws IOException {
1046         IndexSearcher searcher = searchers.remove(repository);
1047         if (searcher != null) {
8e9988 1048             searcher.getIndexReader().close();
d896e6 1049         }
JM 1050     }
1051
1052     /**
1053      * Gets an index searcher for the repository.
1054      * 
1055      * @param repository
1056      * @return
1057      * @throws IOException
1058      */
1059     private IndexSearcher getIndexSearcher(String repository) throws IOException {
1060         IndexSearcher searcher = searchers.get(repository);
1061         if (searcher == null) {
1062             IndexWriter writer = getIndexWriter(repository);
1063             searcher = new IndexSearcher(IndexReader.open(writer, true));
1064             searchers.put(repository, searcher);
1065         }
1066         return searcher;
1067     }
1068
1069     /**
1070      * Gets an index writer for the repository. The index will be created if it
1071      * does not already exist or if forceCreate is specified.
1072      * 
1073      * @param repository
1074      * @return an IndexWriter
1075      * @throws IOException
1076      */
1077     private IndexWriter getIndexWriter(String repository) throws IOException {
6ef2fc 1078         IndexWriter indexWriter = writers.get(repository);                
JM 1079         File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6 1080         File indexFolder = new File(repositoryFolder, LUCENE_DIR);
JM 1081         Directory directory = FSDirectory.open(indexFolder);        
1082
1083         if (indexWriter == null) {
1084             if (!indexFolder.exists()) {
1085                 indexFolder.mkdirs();
1086             }
1087             StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
1088             IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
1089             config.setOpenMode(OpenMode.CREATE_OR_APPEND);
1090             indexWriter = new IndexWriter(directory, config);
1091             writers.put(repository, indexWriter);
1092         }
1093         return indexWriter;
1094     }
1095
1096     /**
1097      * Searches the specified repositories for the given text or query
1098      * 
1099      * @param text
1100      *            if the text is null or empty, null is returned
d04009 1101      * @param page
JM 1102      *            the page number to retrieve. page is 1-indexed.
1103      * @param pageSize
1104      *            the number of elements to return for this page
d896e6 1105      * @param repositories
JM 1106      *            a list of repositories to search. if no repositories are
1107      *            specified null is returned.
1108      * @return a list of SearchResults in order from highest to the lowest score
1109      * 
1110      */
d04009 1111     public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6 1112         if (ArrayUtils.isEmpty(repositories)) {
JM 1113             return null;
1114         }
d04009 1115         return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6 1116     }
JM 1117     
1118     /**
1119      * Searches the specified repositories for the given text or query
1120      * 
1121      * @param text
1122      *            if the text is null or empty, null is returned
d04009 1123      * @param page
JM 1124      *            the page number to retrieve. page is 1-indexed.
1125      * @param pageSize
1126      *            the number of elements to return for this page
d896e6 1127      * @param repositories
JM 1128      *            a list of repositories to search. if no repositories are
1129      *            specified null is returned.
1130      * @return a list of SearchResults in order from highest to the lowest score
1131      * 
d04009 1132      */
JM 1133     public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6 1134         if (StringUtils.isEmpty(text)) {
JM 1135             return null;
1136         }
1137         if (ArrayUtils.isEmpty(repositories)) {
1138             return null;
1139         }
1140         Set<SearchResult> results = new LinkedHashSet<SearchResult>();
1141         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
1142         try {
1143             // default search checks summary and content
1144             BooleanQuery query = new BooleanQuery();
1145             QueryParser qp;
1146             qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
1147             qp.setAllowLeadingWildcard(true);
1148             query.add(qp.parse(text), Occur.SHOULD);
1149
1150             qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
1151             qp.setAllowLeadingWildcard(true);
1152             query.add(qp.parse(text), Occur.SHOULD);
87ee94 1153             
d896e6 1154             IndexSearcher searcher;
JM 1155             if (repositories.length == 1) {
1156                 // single repository search
1157                 searcher = getIndexSearcher(repositories[0]);
1158             } else {
1159                 // multiple repository search
1160                 List<IndexReader> readers = new ArrayList<IndexReader>();
1161                 for (String repository : repositories) {
1162                     IndexSearcher repositoryIndex = getIndexSearcher(repository);
1163                     readers.add(repositoryIndex.getIndexReader());
1164                 }
1165                 IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31 1166                 MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6 1167                 searcher = new IndexSearcher(reader);
JM 1168             }
87ee94 1169             
d896e6 1170             Query rewrittenQuery = searcher.rewrite(query);
87ee94 1171             logger.debug(rewrittenQuery.toString());
JM 1172
d04009 1173             TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6 1174             searcher.search(rewrittenQuery, collector);
d04009 1175             int offset = Math.max(0, (page - 1) * pageSize);
JM 1176             ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
1177             int totalHits = collector.getTotalHits();
d896e6 1178             for (int i = 0; i < hits.length; i++) {
JM 1179                 int docId = hits[i].doc;
1180                 Document doc = searcher.doc(docId);
d04009 1181                 SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31 1182                 if (repositories.length == 1) {
JM 1183                     // single repository search
1184                     result.repository = repositories[0];
1185                 } else {
1186                     // multi-repository search
1187                     MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
1188                     int index = reader.getSourceIndex(docId);
1189                     result.repository = repositories[index];
1190                 }
d896e6 1191                 String content = doc.get(FIELD_CONTENT);                
JM 1192                 result.fragment = getHighlightedFragment(analyzer, query, content, result);
1193                 results.add(result);
1194             }
1195         } catch (Exception e) {
1196             logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
1197         }
1198         return new ArrayList<SearchResult>(results);
1199     }
1200     
1201     /**
1202      * 
1203      * @param analyzer
1204      * @param query
1205      * @param content
1206      * @param result
1207      * @return
1208      * @throws IOException
1209      * @throws InvalidTokenOffsetsException
1210      */
1211     private String getHighlightedFragment(Analyzer analyzer, Query query,
1212             String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e 1213         if (content == null) {
JM 1214             content = "";
1215         }        
1216
1217         int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
1218
d896e6 1219         QueryScorer scorer = new QueryScorer(query, "content");
12c31e 1220         Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); 
d896e6 1221
JM 1222         // use an artificial delimiter for the token
9f6ef3 1223         String termTag = "!!--[";
JM 1224         String termTagEnd = "]--!!";
d896e6 1225         SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
JM 1226         Highlighter highlighter = new Highlighter(formatter, scorer);        
1227         highlighter.setTextFragmenter(fragmenter);
12c31e 1228
73fba6 1229         String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6 1230         if (ArrayUtils.isEmpty(fragments)) {
JM 1231             if (SearchObjectType.blob  == result.type) {
1232                 return "";
1233             }
12c31e 1234             // clip commit message
JM 1235             String fragment = content;
1236             if (fragment.length() > fragmentLength) {
1237                 fragment = fragment.substring(0, fragmentLength) + "...";
1238             }
1239             return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
d896e6 1240         }
12c31e 1241         
2b67ec 1242         // make sure we have unique fragments
JM 1243         Set<String> uniqueFragments = new LinkedHashSet<String>();
1244         for (String fragment : fragments) {
1245             uniqueFragments.add(fragment);
1246         }
1247         fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
1248         
d896e6 1249         StringBuilder sb = new StringBuilder();
JM 1250         for (int i = 0, len = fragments.length; i < len; i++) {
1251             String fragment = fragments[i];
12c31e 1252             String tag = "<pre class=\"text\">";
JM 1253
d896e6 1254             // resurrect the raw fragment from removing the artificial delimiters
12c31e 1255             String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM 1256
1257             // determine position of the raw fragment in the content
2b67ec 1258             int pos = content.indexOf(raw);
12c31e 1259                 
JM 1260             // restore complete first line of fragment
1261             int c = pos;
1262             while (c > 0) {
1263                 c--;
1264                 if (content.charAt(c) == '\n') {
1265                     break;
1266                 }
1267             }
1268             if (c > 0) {
1269                 // inject leading chunk of first fragment line
1270                 fragment = content.substring(c + 1, pos) + fragment;
1271             }
1272                 
1273             if (SearchObjectType.blob  == result.type) {
1274                 // count lines as offset into the content for this fragment
c2833a 1275                 int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
12c31e 1276                 
JM 1277                 // create fragment tag with line number and language
1278                 String lang = "";
1279                 String ext = StringUtils.getFileExtension(result.path).toLowerCase();
1280                 if (!StringUtils.isEmpty(ext)) {
1281                     // maintain leading space!
1282                     lang = " lang-" + ext;
1283                 }
1284                 tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
1285                                 
1286             }
d896e6 1287             
12c31e 1288             sb.append(tag);
JM 1289
d896e6 1290             // replace the artificial delimiter with html tags
9f6ef3 1291             String html = StringUtils.escapeForHtml(fragment, false);
JM 1292             html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6 1293             sb.append(html);
JM 1294             sb.append("</pre>");
1295             if (i < len - 1) {
1296                 sb.append("<span class=\"ellipses\">...</span><br/>");
1297             }
1298         }
1299         return sb.toString();
12c31e 1300     }    
d896e6 1301     
JM 1302     /**
1303      * Simple class to track the results of an index update. 
1304      */
1305     private class IndexResult {
1306         long startTime = System.currentTimeMillis();
1307         long endTime = startTime;
1308         boolean success;
1309         int branchCount;
1310         int commitCount;
1311         int blobCount;
1312         int issueCount;
1313         
1314         void add(IndexResult result) {
1315             this.branchCount += result.branchCount;
1316             this.commitCount += result.commitCount;
1317             this.blobCount += result.blobCount;
1318             this.issueCount += result.issueCount;            
1319         }
1320         
1321         void success() {
1322             success = true;
1323             endTime = System.currentTimeMillis();
1324         }
1325         
1326         float duration() {
1327             return (endTime - startTime)/1000f;
1328         }
b938ae 1329     }
905d31 1330     
JM 1331     /**
1332      * Custom subclass of MultiReader to identify the source index for a given
1333      * doc id.  This would not be necessary of there was a public method to
1334      * obtain this information.
1335      *  
1336      */
1337     private class MultiSourceReader extends MultiReader {
1338         
1339         final Method method;
1340         
1341         MultiSourceReader(IndexReader[] subReaders) {
1342             super(subReaders);
1343             Method m = null;
1344             try {
1345                 m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
1346                 m.setAccessible(true);
1347             } catch (Exception e) {
1348                 logger.error("Error getting readerIndex method", e);
1349             }
1350             method = m;
1351         }
1352         
1353         int getSourceIndex(int docId) {
1354             int index = -1;
1355             try {
1356                 Object o = method.invoke(this, docId);
1357                 index = (Integer) o;
1358             } catch (Exception e) {
1359                 logger.error("Error getting source index", e);
1360             }
1361             return index;
1362         }
1363     }
e31da0 1364 }