James Moger
2012-11-01 7ba85bfa11c7fcab21ada61650fe30763aafd7b0
commit | author | age
e31da0 1 /*
JM 2  * Copyright 2012 gitblit.com.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.gitblit;
17
d896e6 18 import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0 19
d896e6 20 import java.io.ByteArrayOutputStream;
JM 21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
905d31 24 import java.lang.reflect.Method;
d896e6 25 import java.text.MessageFormat;
JM 26 import java.text.ParseException;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Comparator;
30 import java.util.HashMap;
31 import java.util.LinkedHashSet;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Set;
35 import java.util.TreeMap;
36 import java.util.TreeSet;
37 import java.util.concurrent.ConcurrentHashMap;
38
39 import org.apache.lucene.analysis.Analyzer;
40 import org.apache.lucene.analysis.standard.StandardAnalyzer;
41 import org.apache.lucene.document.DateTools;
42 import org.apache.lucene.document.DateTools.Resolution;
43 import org.apache.lucene.document.Document;
44 import org.apache.lucene.document.Field;
45 import org.apache.lucene.document.Field.Index;
46 import org.apache.lucene.document.Field.Store;
47 import org.apache.lucene.index.IndexReader;
48 import org.apache.lucene.index.IndexWriter;
49 import org.apache.lucene.index.IndexWriterConfig;
50 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
51 import org.apache.lucene.index.MultiReader;
52 import org.apache.lucene.index.Term;
53 import org.apache.lucene.queryParser.QueryParser;
54 import org.apache.lucene.search.BooleanClause.Occur;
55 import org.apache.lucene.search.BooleanQuery;
56 import org.apache.lucene.search.IndexSearcher;
57 import org.apache.lucene.search.Query;
58 import org.apache.lucene.search.ScoreDoc;
59 import org.apache.lucene.search.TermQuery;
60 import org.apache.lucene.search.TopScoreDocCollector;
61 import org.apache.lucene.search.highlight.Fragmenter;
62 import org.apache.lucene.search.highlight.Highlighter;
63 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
64 import org.apache.lucene.search.highlight.QueryScorer;
65 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
66 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
67 import org.apache.lucene.store.Directory;
68 import org.apache.lucene.store.FSDirectory;
69 import org.apache.lucene.util.Version;
70 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
71 import org.eclipse.jgit.lib.Constants;
a02998 72 import org.eclipse.jgit.lib.FileMode;
d896e6 73 import org.eclipse.jgit.lib.ObjectId;
JM 74 import org.eclipse.jgit.lib.ObjectLoader;
75 import org.eclipse.jgit.lib.ObjectReader;
e31da0 76 import org.eclipse.jgit.lib.Repository;
6ef2fc 77 import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6 78 import org.eclipse.jgit.revwalk.RevCommit;
JM 79 import org.eclipse.jgit.revwalk.RevTree;
80 import org.eclipse.jgit.revwalk.RevWalk;
81 import org.eclipse.jgit.storage.file.FileBasedConfig;
82 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
83 import org.eclipse.jgit.treewalk.TreeWalk;
84 import org.eclipse.jgit.util.FS;
e31da0 85 import org.slf4j.Logger;
JM 86 import org.slf4j.LoggerFactory;
87
d896e6 88 import com.gitblit.Constants.SearchObjectType;
JM 89 import com.gitblit.models.IssueModel;
90 import com.gitblit.models.IssueModel.Attachment;
91 import com.gitblit.models.PathModel.PathChangeModel;
92 import com.gitblit.models.RefModel;
40ca5c 93 import com.gitblit.models.RepositoryModel;
d896e6 94 import com.gitblit.models.SearchResult;
JM 95 import com.gitblit.utils.ArrayUtils;
96 import com.gitblit.utils.IssueUtils;
e31da0 97 import com.gitblit.utils.JGitUtils;
d896e6 98 import com.gitblit.utils.StringUtils;
e31da0 99
JM 100 /**
d896e6 101  * The Lucene executor handles indexing and searching repositories.
e31da0 102  * 
JM 103  * @author James Moger
104  * 
105  */
106 public class LuceneExecutor implements Runnable {
d896e6 107     
JM 108         
ae8366 109     private static final int INDEX_VERSION = 5;
e31da0 110
d896e6 111     private static final String FIELD_OBJECT_TYPE = "type";
JM 112     private static final String FIELD_ISSUE = "issue";
113     private static final String FIELD_PATH = "path";
114     private static final String FIELD_COMMIT = "commit";
115     private static final String FIELD_BRANCH = "branch";
116     private static final String FIELD_SUMMARY = "summary";
117     private static final String FIELD_CONTENT = "content";
118     private static final String FIELD_AUTHOR = "author";
119     private static final String FIELD_COMMITTER = "committer";
120     private static final String FIELD_DATE = "date";
121     private static final String FIELD_TAG = "tag";
122     private static final String FIELD_LABEL = "label";
123     private static final String FIELD_ATTACHMENT = "attachment";
124
125     private static final String CONF_FILE = "lucene.conf";
126     private static final String LUCENE_DIR = "lucene";
127     private static final String CONF_INDEX = "index";
128     private static final String CONF_VERSION = "version";
129     private static final String CONF_ALIAS = "aliases";
130     private static final String CONF_BRANCH = "branches";
131         
132     private static final Version LUCENE_VERSION = Version.LUCENE_35;
133     
e31da0 134     private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
d896e6 135     
JM 136     private final IStoredSettings storedSettings;
137     private final File repositoriesFolder;
138     
139     private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
140     private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
141     
f1d2ad 142     private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM 143     private Set<String> excludedExtensions;
144     
d896e6 145     public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
JM 146         this.storedSettings = settings;
147         this.repositoriesFolder = repositoriesFolder;
462488 148         String exts = luceneIgnoreExtensions;
JM 149         if (settings != null) {
150             exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
151         }
152         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0 153     }
JM 154
155     /**
273cb9 156      * Run is executed by the Gitblit executor service.  Because this is called 
JM 157      * by an executor service, calls will queue - i.e. there can never be
158      * concurrent execution of repository index updates.
e31da0 159      */
JM 160     @Override
161     public void run() {
7db092 162         if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM 163             // Lucene indexing is disabled
164             return;
165         }
f1d2ad 166         // reload the excluded extensions
JM 167         String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
168         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
169
dad8b4 170         if (GitBlit.self().isCollectingGarbage()) {
JM 171             // busy collecting garbage, try again later
172             return;
173         }
174         
40ca5c 175         for (String repositoryName: GitBlit.self().getRepositoryList()) {
JM 176             RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
177             if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
178                 Repository repository = GitBlit.self().getRepository(model.name);
e92c6d 179                 if (repository == null) {
JM 180                     if (GitBlit.self().isCollectingGarbage(model.name)) {
181                         logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
182                     }
183                     continue;
184                 }
40ca5c 185                 index(model, repository);                
JM 186                 repository.close();
187                 System.gc();
e31da0 188             }
JM 189         }
190     }
191
192     /**
193      * Synchronously indexes a repository. This may build a complete index of a
194      * repository or it may update an existing index.
195      * 
3d0494 196      * @param name
e31da0 197      *            the name of the repository
JM 198      * @param repository
199      *            the repository object
200      */
9f6ef3 201     private void index(RepositoryModel model, Repository repository) {
e31da0 202         try {
40ca5c 203             if (shouldReindex(repository)) {
JM 204                 // (re)build the entire index
205                 IndexResult result = reindex(model, repository);
206
207                 if (result.success) {
208                     if (result.commitCount > 0) {
209                         String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
210                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
211                                 result.blobCount, result.branchCount, result.duration()));
e31da0 212                     }
JM 213                 } else {
40ca5c 214                     String msg = "Could not build {0} Lucene index!";
JM 215                     logger.error(MessageFormat.format(msg, model.name));
e31da0 216                 }
JM 217             } else {
40ca5c 218                 // update the index with latest commits
JM 219                 IndexResult result = updateIndex(model, repository);
220                 if (result.success) {
221                     if (result.commitCount > 0) {
222                         String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
223                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
224                                 result.blobCount, result.branchCount, result.duration()));
225                     }
226                 } else {
227                     String msg = "Could not update {0} Lucene index!";
228                     logger.error(MessageFormat.format(msg, model.name));
229                 }
e31da0 230             }
JM 231         } catch (Throwable t) {
40ca5c 232             logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0 233         }
JM 234     }
e6637c 235     
JM 236     /**
237      * Close the writer/searcher objects for a repository.
238      * 
239      * @param repositoryName
240      */
8e9988 241     public synchronized void close(String repositoryName) {
JM 242         try {
243             IndexSearcher searcher = searchers.remove(repositoryName);
244             if (searcher != null) {
245                 searcher.getIndexReader().close();
246             }
247         } catch (Exception e) {
248             logger.error("Failed to close index searcher for " + repositoryName, e);
249         }
250         
e6637c 251         try {
JM 252             IndexWriter writer = writers.remove(repositoryName);
253             if (writer != null) {
254                 writer.close();
255             }
256         } catch (Exception e) {
257             logger.error("Failed to close index writer for " + repositoryName, e);
8e9988 258         }        
e6637c 259     }
b938ae 260
JM 261     /**
262      * Close all Lucene indexers.
263      * 
264      */
8e9988 265     public synchronized void close() {
d896e6 266         // close all writers
JM 267         for (String writer : writers.keySet()) {
268             try {
269                 writers.get(writer).close(true);
270             } catch (Throwable t) {
271                 logger.error("Failed to close Lucene writer for " + writer, t);
272             }
273         }
274         writers.clear();
275
276         // close all searchers
277         for (String searcher : searchers.keySet()) {
278             try {
8e9988 279                 searchers.get(searcher).getIndexReader().close();
d896e6 280             } catch (Throwable t) {
JM 281                 logger.error("Failed to close Lucene searcher for " + searcher, t);
282             }
283         }
284         searchers.clear();
285     }
286
287     
288     /**
289      * Deletes the Lucene index for the specified repository.
290      * 
291      * @param repositoryName
292      * @return true, if successful
293      */
294     public boolean deleteIndex(String repositoryName) {
295         try {
8e9988 296             // close any open writer/searcher
JM 297             close(repositoryName);
298
d896e6 299             // delete the index folder
eb741a 300             File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
d896e6 301             File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
JM 302             if (luceneIndex.exists()) {
303                 org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
304                         org.eclipse.jgit.util.FileUtils.RECURSIVE);
305             }
306             // delete the config file
307             File luceneConfig = new File(repositoryFolder, CONF_FILE);
308             if (luceneConfig.exists()) {
309                 luceneConfig.delete();
310             }
311             return true;
312         } catch (IOException e) {
313             throw new RuntimeException(e);
314         }
315     }
316     
317     /**
318      * Returns the author for the commit, if this information is available.
319      * 
320      * @param commit
321      * @return an author or unknown
322      */
323     private String getAuthor(RevCommit commit) {
324         String name = "unknown";
325         try {
326             name = commit.getAuthorIdent().getName();
327             if (StringUtils.isEmpty(name)) {
328                 name = commit.getAuthorIdent().getEmailAddress();
329             }
330         } catch (NullPointerException n) {                        
331         }
332         return name;
333     }
334     
335     /**
336      * Returns the committer for the commit, if this information is available.
337      * 
338      * @param commit
339      * @return an committer or unknown
340      */
341     private String getCommitter(RevCommit commit) {
342         String name = "unknown";
343         try {
344             name = commit.getCommitterIdent().getName();
345             if (StringUtils.isEmpty(name)) {
346                 name = commit.getCommitterIdent().getEmailAddress();
347             }
348         } catch (NullPointerException n) {                        
349         }
350         return name;
351     }
905d31 352     
JM 353     /**
354      * Get the tree associated with the given commit.
355      *
356      * @param walk
357      * @param commit
358      * @return tree
359      * @throws IOException
360      */
9f6ef3 361     private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31 362             throws IOException {
JM 363         final RevTree tree = commit.getTree();
364         if (tree != null) {
365             return tree;
366         }
367         walk.parseHeaders(commit);
368         return commit.getTree();
369     }
d896e6 370
JM 371     /**
372      * Construct a keyname from the branch.
373      * 
374      * @param branchName
375      * @return a keyname appropriate for the Git config file format
376      */
377     private String getBranchKey(String branchName) {
378         return StringUtils.getSHA1(branchName);
379     }
380
381     /**
382      * Returns the Lucene configuration for the specified repository.
383      * 
384      * @param repository
385      * @return a config object
386      */
387     private FileBasedConfig getConfig(Repository repository) {
388         File file = new File(repository.getDirectory(), CONF_FILE);
389         FileBasedConfig config = new FileBasedConfig(file, FS.detect());
390         return config;
391     }
392
393     /**
394      * Reads the Lucene config file for the repository to check the index
395      * version. If the index version is different, then rebuild the repository
396      * index.
397      * 
398      * @param repository
399      * @return true of the on-disk index format is different than INDEX_VERSION
400      */
9f6ef3 401     private boolean shouldReindex(Repository repository) {
d896e6 402         try {
JM 403             FileBasedConfig config = getConfig(repository);
404             config.load();
405             int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
406             // reindex if versions do not match
407             return indexVersion != INDEX_VERSION;
408         } catch (Throwable t) {
409         }
410         return true;
411     }
412
413
414     /**
415      * This completely indexes the repository and will destroy any existing
416      * index.
417      * 
418      * @param repositoryName
419      * @param repository
420      * @return IndexResult
421      */
40ca5c 422     public IndexResult reindex(RepositoryModel model, Repository repository) {
8e9988 423         IndexResult result = new IndexResult();        
40ca5c 424         if (!deleteIndex(model.name)) {
d896e6 425             return result;
JM 426         }
fa0afc 427         try {
JM 428             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 429             FileBasedConfig config = getConfig(repository);
JM 430             Set<String> indexedCommits = new TreeSet<String>();
40ca5c 431             IndexWriter writer = getIndexWriter(model.name);
d896e6 432             // build a quick lookup of tags
JM 433             Map<String, List<String>> tags = new HashMap<String, List<String>>();
434             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
435                 if (!tag.isAnnotatedTag()) {
436                     // skip non-annotated tags
437                     continue;
438                 }
439                 if (!tags.containsKey(tag.getObjectId())) {
440                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
441                 }
442                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
443             }
444             
445             ObjectReader reader = repository.newObjectReader();
446
447             // get the local branches
448             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
449             
450             // sort them by most recently updated
451             Collections.sort(branches, new Comparator<RefModel>() {
452                 @Override
453                 public int compare(RefModel ref1, RefModel ref2) {
454                     return ref2.getDate().compareTo(ref1.getDate());
455                 }
456             });
457             
458             // reorder default branch to first position
459             RefModel defaultBranch = null;
460             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
461             for (RefModel branch :  branches) {
462                 if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0 463                     defaultBranch = branch;
d896e6 464                     break;
JM 465                 }
466             }
467             branches.remove(defaultBranch);
468             branches.add(0, defaultBranch);
469             
470             // walk through each branch
471             for (RefModel branch : branches) {
40ca5c 472
1aabf0 473                 boolean indexBranch = false;
JM 474                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
475                         && branch.equals(defaultBranch)) {
476                     // indexing "default" branch
477                     indexBranch = true;
478                 } else if (IssueUtils.GB_ISSUES.equals(branch)) {
479                     // skip the GB_ISSUES branch because it is indexed later
480                     // note: this is different than updateIndex
481                     indexBranch = false;
482                 } else {
483                     // normal explicit branch check
484                     indexBranch = model.indexedBranches.contains(branch.getName());
485                 }
486                 
40ca5c 487                 // if this branch is not specifically indexed then skip
1aabf0 488                 if (!indexBranch) {
d896e6 489                     continue;
JM 490                 }
491
492                 String branchName = branch.getName();
493                 RevWalk revWalk = new RevWalk(reader);
494                 RevCommit tip = revWalk.parseCommit(branch.getObjectId());
495                 String tipId = tip.getId().getName();
496
497                 String keyName = getBranchKey(branchName);
498                 config.setString(CONF_ALIAS, null, keyName, branchName);
499                 config.setString(CONF_BRANCH, null, keyName, tipId);
500
501                 // index the blob contents of the tree
502                 TreeWalk treeWalk = new TreeWalk(repository);
503                 treeWalk.addTree(tip.getTree());
504                 treeWalk.setRecursive(true);                                
505                 
506                 Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
507                 while (treeWalk.next()) {
749110 508                     // ensure path is not in a submodule
a02998 509                     if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
PA 510                         paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
511                     }
d896e6 512                 }                
JM 513
514                 ByteArrayOutputStream os = new ByteArrayOutputStream();
515                 byte[] tmp = new byte[32767];
516
517                 RevWalk commitWalk = new RevWalk(reader);
518                 commitWalk.markStart(tip);
519                 
520                 RevCommit commit;
521                 while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
522                     TreeWalk diffWalk = new TreeWalk(reader);
523                     int parentCount = commit.getParentCount();
524                     switch (parentCount) {
525                     case 0:
526                         diffWalk.addTree(new EmptyTreeIterator());
527                         break;
528                     case 1:
529                         diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
530                         break;
531                     default:
532                         // skip merge commits
533                         continue;
534                     }
535                     diffWalk.addTree(getTree(commitWalk, commit));
536                     diffWalk.setFilter(ANY_DIFF);
537                     diffWalk.setRecursive(true);
538                     while ((paths.size() > 0) && diffWalk.next()) {
539                         String path = diffWalk.getPathString();
540                         if (!paths.containsKey(path)) {
541                             continue;
542                         }
543                         
544                         // remove path from set
545                         ObjectId blobId = paths.remove(path);
546                         result.blobCount++;
547                         
548                         // index the blob metadata
549                         String blobAuthor = getAuthor(commit);
550                         String blobCommitter = getCommitter(commit);
551                         String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
552                                 Resolution.MINUTE);
553                         
554                         Document doc = new Document();
555                         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
556                         doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
557                         doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
558                         doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
559                         doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
560                         doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
561                         doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));                    
562
563                         // determine extension to compare to the extension
564                         // blacklist
565                         String ext = null;
566                         String name = path.toLowerCase();
567                         if (name.indexOf('.') > -1) {
568                             ext = name.substring(name.lastIndexOf('.') + 1);
569                         }
570
571                         // index the blob content
572                         if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {                            
573                             ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
462488 574                             InputStream in = ldr.openStream();                        
d896e6 575                             int n;
JM 576                             while ((n = in.read(tmp)) > 0) {
577                                 os.write(tmp, 0, n);
578                             }
579                             in.close();
580                             byte[] content = os.toByteArray();
fa0afc 581                             String str = StringUtils.decodeString(content, encodings);                            
d896e6 582                             doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
JM 583                             os.reset();
584                         }                            
585                         
586                         // add the blob to the index
587                         writer.addDocument(doc);
588                     }
589                 }
590
591                 os.close();
592
593                 // index the tip commit object
594                 if (indexedCommits.add(tipId)) {
595                     Document doc = createDocument(tip, tags.get(tipId));
596                     doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
597                     writer.addDocument(doc);
598                     result.commitCount += 1;
599                     result.branchCount += 1;
600                 }
601
602                 // traverse the log and index the previous commit objects
603                 RevWalk historyWalk = new RevWalk(reader);
604                 historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
605                 RevCommit rev;
606                 while ((rev = historyWalk.next()) != null) {
607                     String hash = rev.getId().getName();
608                     if (indexedCommits.add(hash)) {
609                         Document doc = createDocument(rev, tags.get(hash));
610                         doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
611                         writer.addDocument(doc);
612                         result.commitCount += 1;
613                     }
614                 }
615             }
616
617             // finished
618             reader.release();
619             
620             // this repository has a gb-issues branch, index all issues
621             if (IssueUtils.getIssuesBranch(repository) != null) {
622                 List<IssueModel> issues = IssueUtils.getIssues(repository, null);
623                 if (issues.size() > 0) {
624                     result.branchCount += 1;
625                 }
626                 for (IssueModel issue : issues) {
627                     result.issueCount++;
628                     Document doc = createDocument(issue);
629                     writer.addDocument(doc);
630                 }
631             }
632
633             // commit all changes and reset the searcher
634             config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
635             config.save();
636             writer.commit();
8e9988 637             resetIndexSearcher(model.name);
d896e6 638             result.success();
JM 639         } catch (Exception e) {
40ca5c 640             logger.error("Exception while reindexing " + model.name, e);
d896e6 641         }
JM 642         return result;
643     }
644     
645     /**
646      * Incrementally update the index with the specified commit for the
647      * repository.
648      * 
649      * @param repositoryName
650      * @param repository
651      * @param branch
652      *            the fully qualified branch name (e.g. refs/heads/master)
653      * @param commit
654      * @return true, if successful
655      */
656     private IndexResult index(String repositoryName, Repository repository, 
657             String branch, RevCommit commit) {
658         IndexResult result = new IndexResult();
659         try {
ae9e15 660             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 661             List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
JM 662             String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
663                     Resolution.MINUTE);
664             IndexWriter writer = getIndexWriter(repositoryName);
665             for (PathChangeModel path : changedPaths) {
88fb67 666                 if (path.isSubmodule()) {
JM 667                     continue;
668                 }
d896e6 669                 // delete the indexed blob
856091 670                 deleteBlob(repositoryName, branch, path.name);
d896e6 671
JM 672                 // re-index the blob
673                 if (!ChangeType.DELETE.equals(path.changeType)) {
674                     result.blobCount++;
675                     Document doc = new Document();
676                     doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
677                             Index.NOT_ANALYZED));
678                     doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
679                     doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
680                     doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
681                     doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
682                     doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
683                     doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
684
685                     // determine extension to compare to the extension
686                     // blacklist
687                     String ext = null;
688                     String name = path.name.toLowerCase();
689                     if (name.indexOf('.') > -1) {
690                         ext = name.substring(name.lastIndexOf('.') + 1);
691                     }
692
693                     if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
694                         // read the blob content
695                         String str = JGitUtils.getStringContent(repository, commit.getTree(),
ae9e15 696                                 path.path, encodings);
749110 697                         if (str != null) {
JM 698                             doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
699                             writer.addDocument(doc);
700                         }
d896e6 701                     }
JM 702                 }
703             }
704             writer.commit();
261024 705             
JM 706             // get any annotated commit tags
707             List<String> commitTags = new ArrayList<String>();
33ceba 708             for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024 709                 if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM 710                     commitTags.add(ref.displayName);
711                 }
712             }
713             
714             // create and write the Lucene document
715             Document doc = createDocument(commit, commitTags);
cdbbda 716             doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
d896e6 717             result.commitCount++;
JM 718             result.success = index(repositoryName, doc);
719         } catch (Exception e) {
720             logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
721         }
722         return result;
723     }
724
725     /**
726      * Incrementally update the index with the specified issue for the
727      * repository.
728      * 
729      * @param repositoryName
730      * @param issue
731      * @return true, if successful
732      */
733     public boolean index(String repositoryName, IssueModel issue) {
734         try {
735             // delete the old issue from the index, if exists
736             deleteIssue(repositoryName, issue.id);
737             Document doc = createDocument(issue);
738             return index(repositoryName, doc);
739         } catch (Exception e) {
740             logger.error(MessageFormat.format("Error while indexing issue {0} in {1}", issue.id, repositoryName), e);
741         }
742         return false;
743     }
744     
745     /**
746      * Delete an issue from the repository index.
747      * 
748      * @param repositoryName
749      * @param issueId
750      * @throws Exception
87ee94 751      * @return true, if deleted, false if no record was deleted
d896e6 752      */
87ee94 753     private boolean deleteIssue(String repositoryName, String issueId) throws Exception {
d896e6 754         BooleanQuery query = new BooleanQuery();
JM 755         Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
756         query.add(new TermQuery(objectTerm), Occur.MUST);
757         Term issueidTerm = new Term(FIELD_ISSUE, issueId);
758         query.add(new TermQuery(issueidTerm), Occur.MUST);
759         
760         IndexWriter writer = getIndexWriter(repositoryName);
87ee94 761         int numDocsBefore = writer.numDocs();
d896e6 762         writer.deleteDocuments(query);
JM 763         writer.commit();
87ee94 764         int numDocsAfter = writer.numDocs();
JM 765         if (numDocsBefore == numDocsAfter) {
766             logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
767             return false;
768         } else {
769             logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
770             return true;
771         }
d896e6 772     }
JM 773     
774     /**
775      * Delete a blob from the specified branch of the repository index.
776      * 
777      * @param repositoryName
778      * @param branch
779      * @param path
780      * @throws Exception
87ee94 781      * @return true, if deleted, false if no record was deleted
d896e6 782      */
87ee94 783     public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
JM 784         String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
785         String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
d896e6 786         
87ee94 787         BooleanQuery query = new BooleanQuery();
JM 788         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
789         QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
790         query.add(qp.parse(q), Occur.MUST);
791
d896e6 792         IndexWriter writer = getIndexWriter(repositoryName);
87ee94 793         int numDocsBefore = writer.numDocs();
JM 794         writer.deleteDocuments(query);        
d896e6 795         writer.commit();
87ee94 796         int numDocsAfter = writer.numDocs();
JM 797         if (numDocsBefore == numDocsAfter) {
798             logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
799             return false;
800         } else {
801             logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
802             return true;
803         }
d896e6 804     }
JM 805
806     /**
807      * Updates a repository index incrementally from the last indexed commits.
808      * 
40ca5c 809      * @param model
d896e6 810      * @param repository
JM 811      * @return IndexResult
812      */
9f6ef3 813     private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6 814         IndexResult result = new IndexResult();
JM 815         try {
816             FileBasedConfig config = getConfig(repository);
817             config.load();
818
819             // build a quick lookup of annotated tags
820             Map<String, List<String>> tags = new HashMap<String, List<String>>();
821             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
822                 if (!tag.isAnnotatedTag()) {
823                     // skip non-annotated tags
824                     continue;
825                 }
826                 if (!tags.containsKey(tag.getObjectId())) {
827                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
828                 }
829                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
830             }
831
832             // detect branch deletion
833             // first assume all branches are deleted and then remove each
834             // existing branch from deletedBranches during indexing
835             Set<String> deletedBranches = new TreeSet<String>();
836             for (String alias : config.getNames(CONF_ALIAS)) {
837                 String branch = config.getString(CONF_ALIAS, null, alias);
838                 deletedBranches.add(branch);
839             }
840
1aabf0 841             // get the local branches
d896e6 842             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
1aabf0 843             
JM 844             // sort them by most recently updated
845             Collections.sort(branches, new Comparator<RefModel>() {
846                 @Override
847                 public int compare(RefModel ref1, RefModel ref2) {
848                     return ref2.getDate().compareTo(ref1.getDate());
849                 }
850             });
851                         
852             // reorder default branch to first position
853             RefModel defaultBranch = null;
854             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
855             for (RefModel branch :  branches) {
856                 if (branch.getObjectId().equals(defaultBranchId)) {
857                     defaultBranch = branch;
858                     break;
859                 }
860             }
861             branches.remove(defaultBranch);
862             branches.add(0, defaultBranch);
863             
864             // walk through each branches
d896e6 865             for (RefModel branch : branches) {
JM 866                 String branchName = branch.getName();
867
1aabf0 868                 boolean indexBranch = false;
JM 869                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
870                         && branch.equals(defaultBranch)) {
871                     // indexing "default" branch
872                     indexBranch = true;
873                 } else if (IssueUtils.GB_ISSUES.equals(branch)) {
874                     // update issues modified on the GB_ISSUES branch
875                     // note: this is different than reindex
876                     indexBranch = true;
877                 } else {
878                     // normal explicit branch check
879                     indexBranch = model.indexedBranches.contains(branch.getName());
880                 }
881                 
882                 // if this branch is not specifically indexed then skip
883                 if (!indexBranch) {
40ca5c 884                     continue;
JM 885                 }
886                 
d896e6 887                 // remove this branch from the deletedBranches set
JM 888                 deletedBranches.remove(branchName);
1aabf0 889                 
d896e6 890                 // determine last commit
JM 891                 String keyName = getBranchKey(branchName);
892                 String lastCommit = config.getString(CONF_BRANCH, null, keyName);
893
894                 List<RevCommit> revs;
895                 if (StringUtils.isEmpty(lastCommit)) {
896                     // new branch/unindexed branch, get all commits on branch
897                     revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
898                 } else {
899                     // pre-existing branch, get changes since last commit
900                     revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
901                 }
902
903                 if (revs.size() > 0) {
904                     result.branchCount += 1;
905                 }
906                 
40ca5c 907                 // track the issue ids that we have already indexed
JM 908                 Set<String> indexedIssues = new TreeSet<String>();
909                 
d896e6 910                 // reverse the list of commits so we start with the first commit                
JM 911                 Collections.reverse(revs);
40ca5c 912                 for (RevCommit commit : revs) {                    
JM 913                     if (IssueUtils.GB_ISSUES.equals(branch)) {
914                         // only index an issue once during updateIndex
915                         String issueId = commit.getShortMessage().substring(2).trim();
916                         if (indexedIssues.contains(issueId)) {
917                             continue;
918                         }
919                         indexedIssues.add(issueId);
920                         
921                         IssueModel issue = IssueUtils.getIssue(repository, issueId);
922                         if (issue == null) {
923                             // issue was deleted, remove from index
87ee94 924                             if (!deleteIssue(model.name, issueId)) {
JM 925                                 logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId));
926                             }
40ca5c 927                         } else {
JM 928                             // issue was updated
929                             index(model.name, issue);
930                             result.issueCount++;
931                         }
932                     } else {
933                         // index a commit
934                         result.add(index(model.name, repository, branchName, commit));
935                     }
d896e6 936                 }
JM 937
938                 // update the config
939                 config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
940                 config.setString(CONF_ALIAS, null, keyName, branchName);
941                 config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
942                 config.save();
943             }
944
945             // the deletedBranches set will normally be empty by this point
946             // unless a branch really was deleted and no longer exists
947             if (deletedBranches.size() > 0) {
948                 for (String branch : deletedBranches) {
40ca5c 949                     IndexWriter writer = getIndexWriter(model.name);
d896e6 950                     writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM 951                     writer.commit();
952                 }
953             }
954             result.success = true;
955         } catch (Throwable t) {
40ca5c 956             logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6 957         }
JM 958         return result;
959     }
905d31 960     
d896e6 961     /**
JM 962      * Creates a Lucene document from an issue.
963      * 
964      * @param issue
965      * @return a Lucene document
966      */
967     private Document createDocument(IssueModel issue) {
968         Document doc = new Document();
969         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.issue.name(), Store.YES,
970                 Field.Index.NOT_ANALYZED));
971         doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
972         doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
973         doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
974                 Store.YES, Field.Index.NO));
975         doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
976         List<String> attachments = new ArrayList<String>();
977         for (Attachment attachment : issue.getAttachments()) {
978             attachments.add(attachment.name.toLowerCase());
979         }
980         doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
981                 Index.ANALYZED));
982         doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
983         doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));
984         doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
985                 Index.ANALYZED));
986         return doc;
987     }
988
989     /**
990      * Creates a Lucene document for a commit
991      * 
992      * @param commit
993      * @param tags
994      * @return a Lucene document
995      */
996     private Document createDocument(RevCommit commit, List<String> tags) {
997         Document doc = new Document();
998         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,
999                 Index.NOT_ANALYZED));
1000         doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
1001         doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
1002                 Resolution.MINUTE), Store.YES, Index.NO));
1003         doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
1004         doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
1005         doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
1006         doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
1007         if (!ArrayUtils.isEmpty(tags)) {
1008             doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
1009         }
1010         return doc;
1011     }
1012
1013     /**
1014      * Incrementally index an object for the repository.
1015      * 
1016      * @param repositoryName
1017      * @param doc
1018      * @return true, if successful
1019      */
1020     private boolean index(String repositoryName, Document doc) {
1021         try {            
1022             IndexWriter writer = getIndexWriter(repositoryName);
1023             writer.addDocument(doc);
1024             writer.commit();
8e9988 1025             resetIndexSearcher(repositoryName);
d896e6 1026             return true;
JM 1027         } catch (Exception e) {
1028             logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
1029         }
1030         return false;
1031     }
1032
d04009 1033     private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6 1034         SearchResult result = new SearchResult();
d04009 1035         result.hitId = hitId;
JM 1036         result.totalHits = totalHits;
d896e6 1037         result.score = score;
JM 1038         result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
1039         result.summary = doc.get(FIELD_SUMMARY);        
1040         result.author = doc.get(FIELD_AUTHOR);
1041         result.committer = doc.get(FIELD_COMMITTER);
1042         result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
1043         result.branch = doc.get(FIELD_BRANCH);
1044         result.commitId = doc.get(FIELD_COMMIT);
1045         result.issueId = doc.get(FIELD_ISSUE);
1046         result.path = doc.get(FIELD_PATH);
1047         if (doc.get(FIELD_TAG) != null) {
1048             result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
1049         }
1050         if (doc.get(FIELD_LABEL) != null) {
1051             result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
1052         }
1053         return result;
1054     }
1055
1056     private synchronized void resetIndexSearcher(String repository) throws IOException {
1057         IndexSearcher searcher = searchers.remove(repository);
1058         if (searcher != null) {
8e9988 1059             searcher.getIndexReader().close();
d896e6 1060         }
JM 1061     }
1062
1063     /**
1064      * Gets an index searcher for the repository.
1065      * 
1066      * @param repository
1067      * @return
1068      * @throws IOException
1069      */
1070     private IndexSearcher getIndexSearcher(String repository) throws IOException {
1071         IndexSearcher searcher = searchers.get(repository);
1072         if (searcher == null) {
1073             IndexWriter writer = getIndexWriter(repository);
1074             searcher = new IndexSearcher(IndexReader.open(writer, true));
1075             searchers.put(repository, searcher);
1076         }
1077         return searcher;
1078     }
1079
1080     /**
1081      * Gets an index writer for the repository. The index will be created if it
1082      * does not already exist or if forceCreate is specified.
1083      * 
1084      * @param repository
1085      * @return an IndexWriter
1086      * @throws IOException
1087      */
1088     private IndexWriter getIndexWriter(String repository) throws IOException {
6ef2fc 1089         IndexWriter indexWriter = writers.get(repository);                
JM 1090         File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6 1091         File indexFolder = new File(repositoryFolder, LUCENE_DIR);
JM 1092         Directory directory = FSDirectory.open(indexFolder);        
1093
1094         if (indexWriter == null) {
1095             if (!indexFolder.exists()) {
1096                 indexFolder.mkdirs();
1097             }
1098             StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
1099             IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
1100             config.setOpenMode(OpenMode.CREATE_OR_APPEND);
1101             indexWriter = new IndexWriter(directory, config);
1102             writers.put(repository, indexWriter);
1103         }
1104         return indexWriter;
1105     }
1106
1107     /**
1108      * Searches the specified repositories for the given text or query
1109      * 
1110      * @param text
1111      *            if the text is null or empty, null is returned
d04009 1112      * @param page
JM 1113      *            the page number to retrieve. page is 1-indexed.
1114      * @param pageSize
1115      *            the number of elements to return for this page
d896e6 1116      * @param repositories
JM 1117      *            a list of repositories to search. if no repositories are
1118      *            specified null is returned.
1119      * @return a list of SearchResults in order from highest to the lowest score
1120      * 
1121      */
d04009 1122     public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6 1123         if (ArrayUtils.isEmpty(repositories)) {
JM 1124             return null;
1125         }
d04009 1126         return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6 1127     }
JM 1128     
1129     /**
1130      * Searches the specified repositories for the given text or query
1131      * 
1132      * @param text
1133      *            if the text is null or empty, null is returned
d04009 1134      * @param page
JM 1135      *            the page number to retrieve. page is 1-indexed.
1136      * @param pageSize
1137      *            the number of elements to return for this page
d896e6 1138      * @param repositories
JM 1139      *            a list of repositories to search. if no repositories are
1140      *            specified null is returned.
1141      * @return a list of SearchResults in order from highest to the lowest score
1142      * 
d04009 1143      */
JM 1144     public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6 1145         if (StringUtils.isEmpty(text)) {
JM 1146             return null;
1147         }
1148         if (ArrayUtils.isEmpty(repositories)) {
1149             return null;
1150         }
1151         Set<SearchResult> results = new LinkedHashSet<SearchResult>();
1152         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
1153         try {
1154             // default search checks summary and content
1155             BooleanQuery query = new BooleanQuery();
1156             QueryParser qp;
1157             qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
1158             qp.setAllowLeadingWildcard(true);
1159             query.add(qp.parse(text), Occur.SHOULD);
1160
1161             qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
1162             qp.setAllowLeadingWildcard(true);
1163             query.add(qp.parse(text), Occur.SHOULD);
87ee94 1164             
d896e6 1165             IndexSearcher searcher;
JM 1166             if (repositories.length == 1) {
1167                 // single repository search
1168                 searcher = getIndexSearcher(repositories[0]);
1169             } else {
1170                 // multiple repository search
1171                 List<IndexReader> readers = new ArrayList<IndexReader>();
1172                 for (String repository : repositories) {
1173                     IndexSearcher repositoryIndex = getIndexSearcher(repository);
1174                     readers.add(repositoryIndex.getIndexReader());
1175                 }
1176                 IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31 1177                 MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6 1178                 searcher = new IndexSearcher(reader);
JM 1179             }
87ee94 1180             
d896e6 1181             Query rewrittenQuery = searcher.rewrite(query);
87ee94 1182             logger.debug(rewrittenQuery.toString());
JM 1183
d04009 1184             TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6 1185             searcher.search(rewrittenQuery, collector);
d04009 1186             int offset = Math.max(0, (page - 1) * pageSize);
JM 1187             ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
1188             int totalHits = collector.getTotalHits();
d896e6 1189             for (int i = 0; i < hits.length; i++) {
JM 1190                 int docId = hits[i].doc;
1191                 Document doc = searcher.doc(docId);
d04009 1192                 SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31 1193                 if (repositories.length == 1) {
JM 1194                     // single repository search
1195                     result.repository = repositories[0];
1196                 } else {
1197                     // multi-repository search
1198                     MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
1199                     int index = reader.getSourceIndex(docId);
1200                     result.repository = repositories[index];
1201                 }
d896e6 1202                 String content = doc.get(FIELD_CONTENT);                
JM 1203                 result.fragment = getHighlightedFragment(analyzer, query, content, result);
1204                 results.add(result);
1205             }
1206         } catch (Exception e) {
1207             logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
1208         }
1209         return new ArrayList<SearchResult>(results);
1210     }
1211     
1212     /**
1213      * 
1214      * @param analyzer
1215      * @param query
1216      * @param content
1217      * @param result
1218      * @return
1219      * @throws IOException
1220      * @throws InvalidTokenOffsetsException
1221      */
1222     private String getHighlightedFragment(Analyzer analyzer, Query query,
1223             String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e 1224         if (content == null) {
JM 1225             content = "";
1226         }        
1227
1228         int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
1229
d896e6 1230         QueryScorer scorer = new QueryScorer(query, "content");
12c31e 1231         Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); 
d896e6 1232
JM 1233         // use an artificial delimiter for the token
9f6ef3 1234         String termTag = "!!--[";
JM 1235         String termTagEnd = "]--!!";
d896e6 1236         SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
JM 1237         Highlighter highlighter = new Highlighter(formatter, scorer);        
1238         highlighter.setTextFragmenter(fragmenter);
12c31e 1239
73fba6 1240         String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6 1241         if (ArrayUtils.isEmpty(fragments)) {
JM 1242             if (SearchObjectType.blob  == result.type) {
1243                 return "";
1244             }
12c31e 1245             // clip commit message
JM 1246             String fragment = content;
1247             if (fragment.length() > fragmentLength) {
1248                 fragment = fragment.substring(0, fragmentLength) + "...";
1249             }
1250             return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
d896e6 1251         }
12c31e 1252         
2b67ec 1253         // make sure we have unique fragments
JM 1254         Set<String> uniqueFragments = new LinkedHashSet<String>();
1255         for (String fragment : fragments) {
1256             uniqueFragments.add(fragment);
1257         }
1258         fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
1259         
d896e6 1260         StringBuilder sb = new StringBuilder();
JM 1261         for (int i = 0, len = fragments.length; i < len; i++) {
1262             String fragment = fragments[i];
12c31e 1263             String tag = "<pre class=\"text\">";
JM 1264
d896e6 1265             // resurrect the raw fragment from removing the artificial delimiters
12c31e 1266             String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM 1267
1268             // determine position of the raw fragment in the content
2b67ec 1269             int pos = content.indexOf(raw);
12c31e 1270                 
JM 1271             // restore complete first line of fragment
1272             int c = pos;
1273             while (c > 0) {
1274                 c--;
1275                 if (content.charAt(c) == '\n') {
1276                     break;
1277                 }
1278             }
1279             if (c > 0) {
1280                 // inject leading chunk of first fragment line
1281                 fragment = content.substring(c + 1, pos) + fragment;
1282             }
1283                 
1284             if (SearchObjectType.blob  == result.type) {
1285                 // count lines as offset into the content for this fragment
c2833a 1286                 int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
12c31e 1287                 
JM 1288                 // create fragment tag with line number and language
1289                 String lang = "";
1290                 String ext = StringUtils.getFileExtension(result.path).toLowerCase();
1291                 if (!StringUtils.isEmpty(ext)) {
1292                     // maintain leading space!
1293                     lang = " lang-" + ext;
1294                 }
1295                 tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
1296                                 
1297             }
d896e6 1298             
12c31e 1299             sb.append(tag);
JM 1300
d896e6 1301             // replace the artificial delimiter with html tags
9f6ef3 1302             String html = StringUtils.escapeForHtml(fragment, false);
JM 1303             html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6 1304             sb.append(html);
JM 1305             sb.append("</pre>");
1306             if (i < len - 1) {
1307                 sb.append("<span class=\"ellipses\">...</span><br/>");
1308             }
1309         }
1310         return sb.toString();
12c31e 1311     }    
d896e6 1312     
JM 1313     /**
1314      * Simple class to track the results of an index update. 
1315      */
1316     private class IndexResult {
1317         long startTime = System.currentTimeMillis();
1318         long endTime = startTime;
1319         boolean success;
1320         int branchCount;
1321         int commitCount;
1322         int blobCount;
1323         int issueCount;
1324         
1325         void add(IndexResult result) {
1326             this.branchCount += result.branchCount;
1327             this.commitCount += result.commitCount;
1328             this.blobCount += result.blobCount;
1329             this.issueCount += result.issueCount;            
1330         }
1331         
1332         void success() {
1333             success = true;
1334             endTime = System.currentTimeMillis();
1335         }
1336         
1337         float duration() {
1338             return (endTime - startTime)/1000f;
1339         }
b938ae 1340     }
905d31 1341     
JM 1342     /**
1343      * Custom subclass of MultiReader to identify the source index for a given
1344      * doc id.  This would not be necessary of there was a public method to
1345      * obtain this information.
1346      *  
1347      */
1348     private class MultiSourceReader extends MultiReader {
1349         
1350         final Method method;
1351         
1352         MultiSourceReader(IndexReader[] subReaders) {
1353             super(subReaders);
1354             Method m = null;
1355             try {
1356                 m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
1357                 m.setAccessible(true);
1358             } catch (Exception e) {
1359                 logger.error("Error getting readerIndex method", e);
1360             }
1361             method = m;
1362         }
1363         
1364         int getSourceIndex(int docId) {
1365             int index = -1;
1366             try {
1367                 Object o = method.invoke(this, docId);
1368                 index = (Integer) o;
1369             } catch (Exception e) {
1370                 logger.error("Error getting source index", e);
1371             }
1372             return index;
1373         }
1374     }
e31da0 1375 }