Paul Martin
2016-04-16 eecaad8b8e2c447429c31a01d49260ddd6b4ee03
commit | author | age
e31da0 1 /*
JM 2  * Copyright 2012 gitblit.com.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
7bf6e1 16 package com.gitblit.service;
e31da0 17
d896e6 18 import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0 19
d896e6 20 import java.io.ByteArrayOutputStream;
JM 21 import java.io.File;
eecaad 22 import java.io.FileInputStream;
d896e6 23 import java.io.IOException;
JM 24 import java.io.InputStream;
25 import java.text.MessageFormat;
26 import java.text.ParseException;
27 import java.util.ArrayList;
28 import java.util.Collections;
29 import java.util.Comparator;
30 import java.util.HashMap;
31 import java.util.LinkedHashSet;
32 import java.util.List;
33 import java.util.Map;
34 import java.util.Set;
35 import java.util.TreeMap;
36 import java.util.TreeSet;
37 import java.util.concurrent.ConcurrentHashMap;
38
39 import org.apache.lucene.analysis.Analyzer;
40 import org.apache.lucene.analysis.standard.StandardAnalyzer;
41 import org.apache.lucene.document.DateTools;
42 import org.apache.lucene.document.DateTools.Resolution;
43 import org.apache.lucene.document.Document;
44 import org.apache.lucene.document.Field;
db9832 45 import org.apache.lucene.document.StringField;
JM 46 import org.apache.lucene.document.TextField;
47 import org.apache.lucene.index.DirectoryReader;
d896e6 48 import org.apache.lucene.index.IndexReader;
JM 49 import org.apache.lucene.index.IndexWriter;
50 import org.apache.lucene.index.IndexWriterConfig;
51 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
52 import org.apache.lucene.index.MultiReader;
53 import org.apache.lucene.index.Term;
db9832 54 import org.apache.lucene.queryparser.classic.QueryParser;
d896e6 55 import org.apache.lucene.search.BooleanClause.Occur;
JM 56 import org.apache.lucene.search.BooleanQuery;
57 import org.apache.lucene.search.IndexSearcher;
58 import org.apache.lucene.search.Query;
59 import org.apache.lucene.search.ScoreDoc;
60 import org.apache.lucene.search.TopScoreDocCollector;
61 import org.apache.lucene.search.highlight.Fragmenter;
62 import org.apache.lucene.search.highlight.Highlighter;
63 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
64 import org.apache.lucene.search.highlight.QueryScorer;
65 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
66 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
67 import org.apache.lucene.store.Directory;
68 import org.apache.lucene.store.FSDirectory;
69 import org.apache.lucene.util.Version;
eecaad 70 import org.apache.tika.metadata.Metadata;
PM 71 import org.apache.tika.parser.AutoDetectParser;
72 import org.apache.tika.parser.ParseContext;
73 import org.apache.tika.parser.pdf.PDFParser;
74 import org.apache.tika.sax.BodyContentHandler;
d896e6 75 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
JM 76 import org.eclipse.jgit.lib.Constants;
a02998 77 import org.eclipse.jgit.lib.FileMode;
d896e6 78 import org.eclipse.jgit.lib.ObjectId;
JM 79 import org.eclipse.jgit.lib.ObjectLoader;
80 import org.eclipse.jgit.lib.ObjectReader;
e31da0 81 import org.eclipse.jgit.lib.Repository;
6ef2fc 82 import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6 83 import org.eclipse.jgit.revwalk.RevCommit;
JM 84 import org.eclipse.jgit.revwalk.RevTree;
85 import org.eclipse.jgit.revwalk.RevWalk;
86 import org.eclipse.jgit.storage.file.FileBasedConfig;
87 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
88 import org.eclipse.jgit.treewalk.TreeWalk;
89 import org.eclipse.jgit.util.FS;
e31da0 90 import org.slf4j.Logger;
JM 91 import org.slf4j.LoggerFactory;
92
d896e6 93 import com.gitblit.Constants.SearchObjectType;
eecaad 94 import com.gitblit.GitBlit;
7bf6e1 95 import com.gitblit.IStoredSettings;
JM 96 import com.gitblit.Keys;
eecaad 97 import com.gitblit.manager.FilestoreManager;
PM 98 import com.gitblit.manager.IFilestoreManager;
db4f6b 99 import com.gitblit.manager.IRepositoryManager;
d896e6 100 import com.gitblit.models.PathModel.PathChangeModel;
JM 101 import com.gitblit.models.RefModel;
40ca5c 102 import com.gitblit.models.RepositoryModel;
d896e6 103 import com.gitblit.models.SearchResult;
JM 104 import com.gitblit.utils.ArrayUtils;
e31da0 105 import com.gitblit.utils.JGitUtils;
d896e6 106 import com.gitblit.utils.StringUtils;
e31da0 107
JM 108 /**
7bf6e1 109  * The Lucene service handles indexing and searching repositories.
699e71 110  *
e31da0 111  * @author James Moger
699e71 112  *
e31da0 113  */
7bf6e1 114 public class LuceneService implements Runnable {
699e71 115
JM 116
3a4470 117     private static final int INDEX_VERSION = 6;
e31da0 118
d896e6 119     private static final String FIELD_OBJECT_TYPE = "type";
JM 120     private static final String FIELD_PATH = "path";
121     private static final String FIELD_COMMIT = "commit";
122     private static final String FIELD_BRANCH = "branch";
123     private static final String FIELD_SUMMARY = "summary";
124     private static final String FIELD_CONTENT = "content";
125     private static final String FIELD_AUTHOR = "author";
126     private static final String FIELD_COMMITTER = "committer";
127     private static final String FIELD_DATE = "date";
128     private static final String FIELD_TAG = "tag";
129
130     private static final String CONF_FILE = "lucene.conf";
131     private static final String LUCENE_DIR = "lucene";
132     private static final String CONF_INDEX = "index";
133     private static final String CONF_VERSION = "version";
134     private static final String CONF_ALIAS = "aliases";
135     private static final String CONF_BRANCH = "branches";
699e71 136
3a4470 137     private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
699e71 138
7bf6e1 139     private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
699e71 140
d896e6 141     private final IStoredSettings storedSettings;
cacf8b 142     private final IRepositoryManager repositoryManager;
eecaad 143     private final IFilestoreManager filestoreManager;
PM 144     
d896e6 145     private final File repositoriesFolder;
699e71 146
d896e6 147     private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
JM 148     private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
699e71 149
f1d2ad 150     private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM 151     private Set<String> excludedExtensions;
699e71 152
7bf6e1 153     public LuceneService(
cacf8b 154             IStoredSettings settings,
eecaad 155             IRepositoryManager repositoryManager, 
PM 156             IFilestoreManager filestoreManager) {
cacf8b 157
d896e6 158         this.storedSettings = settings;
cacf8b 159         this.repositoryManager = repositoryManager;
eecaad 160         this.filestoreManager = filestoreManager;
cacf8b 161         this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
462488 162         String exts = luceneIgnoreExtensions;
JM 163         if (settings != null) {
164             exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
165         }
166         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0 167     }
JM 168
169     /**
699e71 170      * Run is executed by the Gitblit executor service.  Because this is called
273cb9 171      * by an executor service, calls will queue - i.e. there can never be
JM 172      * concurrent execution of repository index updates.
e31da0 173      */
JM 174     @Override
175     public void run() {
7db092 176         if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM 177             // Lucene indexing is disabled
178             return;
179         }
f1d2ad 180         // reload the excluded extensions
JM 181         String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
182         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
183
db4f6b 184         if (repositoryManager.isCollectingGarbage()) {
dad8b4 185             // busy collecting garbage, try again later
JM 186             return;
187         }
699e71 188
db4f6b 189         for (String repositoryName: repositoryManager.getRepositoryList()) {
JM 190             RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
40ca5c 191             if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
db4f6b 192                 Repository repository = repositoryManager.getRepository(model.name);
e92c6d 193                 if (repository == null) {
db4f6b 194                     if (repositoryManager.isCollectingGarbage(model.name)) {
e92c6d 195                         logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
JM 196                     }
197                     continue;
198                 }
699e71 199                 index(model, repository);
40ca5c 200                 repository.close();
JM 201                 System.gc();
e31da0 202             }
JM 203         }
204     }
205
206     /**
207      * Synchronously indexes a repository. This may build a complete index of a
208      * repository or it may update an existing index.
699e71 209      *
3ad13e 210      * @param displayName
e31da0 211      *            the name of the repository
JM 212      * @param repository
213      *            the repository object
214      */
9f6ef3 215     private void index(RepositoryModel model, Repository repository) {
e31da0 216         try {
40ca5c 217             if (shouldReindex(repository)) {
JM 218                 // (re)build the entire index
219                 IndexResult result = reindex(model, repository);
220
221                 if (result.success) {
222                     if (result.commitCount > 0) {
223                         String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
224                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
225                                 result.blobCount, result.branchCount, result.duration()));
e31da0 226                     }
JM 227                 } else {
40ca5c 228                     String msg = "Could not build {0} Lucene index!";
JM 229                     logger.error(MessageFormat.format(msg, model.name));
e31da0 230                 }
JM 231             } else {
40ca5c 232                 // update the index with latest commits
JM 233                 IndexResult result = updateIndex(model, repository);
234                 if (result.success) {
235                     if (result.commitCount > 0) {
236                         String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
237                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
238                                 result.blobCount, result.branchCount, result.duration()));
239                     }
240                 } else {
241                     String msg = "Could not update {0} Lucene index!";
242                     logger.error(MessageFormat.format(msg, model.name));
243                 }
e31da0 244             }
JM 245         } catch (Throwable t) {
40ca5c 246             logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0 247         }
JM 248     }
699e71 249
e6637c 250     /**
JM 251      * Close the writer/searcher objects for a repository.
699e71 252      *
e6637c 253      * @param repositoryName
JM 254      */
8e9988 255     public synchronized void close(String repositoryName) {
JM 256         try {
257             IndexSearcher searcher = searchers.remove(repositoryName);
258             if (searcher != null) {
259                 searcher.getIndexReader().close();
260             }
261         } catch (Exception e) {
262             logger.error("Failed to close index searcher for " + repositoryName, e);
263         }
699e71 264
e6637c 265         try {
JM 266             IndexWriter writer = writers.remove(repositoryName);
267             if (writer != null) {
268                 writer.close();
269             }
270         } catch (Exception e) {
271             logger.error("Failed to close index writer for " + repositoryName, e);
699e71 272         }
e6637c 273     }
b938ae 274
JM 275     /**
276      * Close all Lucene indexers.
699e71 277      *
b938ae 278      */
8e9988 279     public synchronized void close() {
d896e6 280         // close all writers
JM 281         for (String writer : writers.keySet()) {
282             try {
60110f 283                 writers.get(writer).close(true);
d896e6 284             } catch (Throwable t) {
JM 285                 logger.error("Failed to close Lucene writer for " + writer, t);
286             }
287         }
288         writers.clear();
289
290         // close all searchers
291         for (String searcher : searchers.keySet()) {
292             try {
8e9988 293                 searchers.get(searcher).getIndexReader().close();
d896e6 294             } catch (Throwable t) {
JM 295                 logger.error("Failed to close Lucene searcher for " + searcher, t);
296             }
297         }
298         searchers.clear();
299     }
300
699e71 301
d896e6 302     /**
JM 303      * Deletes the Lucene index for the specified repository.
699e71 304      *
d896e6 305      * @param repositoryName
JM 306      * @return true, if successful
307      */
308     public boolean deleteIndex(String repositoryName) {
309         try {
8e9988 310             // close any open writer/searcher
JM 311             close(repositoryName);
312
d896e6 313             // delete the index folder
eb741a 314             File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
d896e6 315             File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
JM 316             if (luceneIndex.exists()) {
317                 org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
318                         org.eclipse.jgit.util.FileUtils.RECURSIVE);
319             }
320             // delete the config file
321             File luceneConfig = new File(repositoryFolder, CONF_FILE);
322             if (luceneConfig.exists()) {
323                 luceneConfig.delete();
324             }
325             return true;
326         } catch (IOException e) {
327             throw new RuntimeException(e);
328         }
329     }
699e71 330
d896e6 331     /**
JM 332      * Returns the author for the commit, if this information is available.
699e71 333      *
d896e6 334      * @param commit
JM 335      * @return an author or unknown
336      */
337     private String getAuthor(RevCommit commit) {
338         String name = "unknown";
339         try {
340             name = commit.getAuthorIdent().getName();
341             if (StringUtils.isEmpty(name)) {
342                 name = commit.getAuthorIdent().getEmailAddress();
343             }
699e71 344         } catch (NullPointerException n) {
d896e6 345         }
JM 346         return name;
347     }
699e71 348
d896e6 349     /**
JM 350      * Returns the committer for the commit, if this information is available.
699e71 351      *
d896e6 352      * @param commit
JM 353      * @return an committer or unknown
354      */
355     private String getCommitter(RevCommit commit) {
356         String name = "unknown";
357         try {
358             name = commit.getCommitterIdent().getName();
359             if (StringUtils.isEmpty(name)) {
360                 name = commit.getCommitterIdent().getEmailAddress();
361             }
699e71 362         } catch (NullPointerException n) {
d896e6 363         }
JM 364         return name;
365     }
699e71 366
905d31 367     /**
JM 368      * Get the tree associated with the given commit.
369      *
370      * @param walk
371      * @param commit
372      * @return tree
373      * @throws IOException
374      */
9f6ef3 375     private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31 376             throws IOException {
JM 377         final RevTree tree = commit.getTree();
378         if (tree != null) {
379             return tree;
380         }
381         walk.parseHeaders(commit);
382         return commit.getTree();
383     }
d896e6 384
JM 385     /**
386      * Construct a keyname from the branch.
699e71 387      *
d896e6 388      * @param branchName
JM 389      * @return a keyname appropriate for the Git config file format
390      */
391     private String getBranchKey(String branchName) {
392         return StringUtils.getSHA1(branchName);
393     }
394
395     /**
396      * Returns the Lucene configuration for the specified repository.
699e71 397      *
d896e6 398      * @param repository
JM 399      * @return a config object
400      */
401     private FileBasedConfig getConfig(Repository repository) {
402         File file = new File(repository.getDirectory(), CONF_FILE);
403         FileBasedConfig config = new FileBasedConfig(file, FS.detect());
404         return config;
405     }
406
407     /**
408      * Reads the Lucene config file for the repository to check the index
409      * version. If the index version is different, then rebuild the repository
410      * index.
699e71 411      *
d896e6 412      * @param repository
JM 413      * @return true of the on-disk index format is different than INDEX_VERSION
414      */
9f6ef3 415     private boolean shouldReindex(Repository repository) {
d896e6 416         try {
JM 417             FileBasedConfig config = getConfig(repository);
418             config.load();
419             int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
420             // reindex if versions do not match
421             return indexVersion != INDEX_VERSION;
422         } catch (Throwable t) {
423         }
424         return true;
425     }
426
427
428     /**
429      * This completely indexes the repository and will destroy any existing
430      * index.
699e71 431      *
d896e6 432      * @param repositoryName
JM 433      * @param repository
434      * @return IndexResult
435      */
40ca5c 436     public IndexResult reindex(RepositoryModel model, Repository repository) {
699e71 437         IndexResult result = new IndexResult();
40ca5c 438         if (!deleteIndex(model.name)) {
d896e6 439             return result;
JM 440         }
fa0afc 441         try {
JM 442             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 443             FileBasedConfig config = getConfig(repository);
JM 444             Set<String> indexedCommits = new TreeSet<String>();
40ca5c 445             IndexWriter writer = getIndexWriter(model.name);
d896e6 446             // build a quick lookup of tags
JM 447             Map<String, List<String>> tags = new HashMap<String, List<String>>();
448             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
449                 if (!tag.isAnnotatedTag()) {
450                     // skip non-annotated tags
451                     continue;
452                 }
d0bb38 453                 if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
d896e6 454                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM 455                 }
456                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
457             }
699e71 458
d896e6 459             ObjectReader reader = repository.newObjectReader();
JM 460
461             // get the local branches
462             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71 463
d896e6 464             // sort them by most recently updated
JM 465             Collections.sort(branches, new Comparator<RefModel>() {
466                 @Override
467                 public int compare(RefModel ref1, RefModel ref2) {
468                     return ref2.getDate().compareTo(ref1.getDate());
469                 }
470             });
699e71 471
d896e6 472             // reorder default branch to first position
JM 473             RefModel defaultBranch = null;
474             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
475             for (RefModel branch :  branches) {
476                 if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0 477                     defaultBranch = branch;
d896e6 478                     break;
JM 479                 }
480             }
481             branches.remove(defaultBranch);
482             branches.add(0, defaultBranch);
699e71 483
d896e6 484             // walk through each branch
JM 485             for (RefModel branch : branches) {
40ca5c 486
1aabf0 487                 boolean indexBranch = false;
JM 488                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
489                         && branch.equals(defaultBranch)) {
490                     // indexing "default" branch
491                     indexBranch = true;
c134a0 492                 } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM 493                     // skip internal meta branches
1aabf0 494                     indexBranch = false;
JM 495                 } else {
496                     // normal explicit branch check
497                     indexBranch = model.indexedBranches.contains(branch.getName());
498                 }
699e71 499
40ca5c 500                 // if this branch is not specifically indexed then skip
1aabf0 501                 if (!indexBranch) {
d896e6 502                     continue;
JM 503                 }
504
505                 String branchName = branch.getName();
506                 RevWalk revWalk = new RevWalk(reader);
507                 RevCommit tip = revWalk.parseCommit(branch.getObjectId());
508                 String tipId = tip.getId().getName();
509
510                 String keyName = getBranchKey(branchName);
511                 config.setString(CONF_ALIAS, null, keyName, branchName);
512                 config.setString(CONF_BRANCH, null, keyName, tipId);
513
514                 // index the blob contents of the tree
515                 TreeWalk treeWalk = new TreeWalk(repository);
516                 treeWalk.addTree(tip.getTree());
699e71 517                 treeWalk.setRecursive(true);
JM 518
d896e6 519                 Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
JM 520                 while (treeWalk.next()) {
749110 521                     // ensure path is not in a submodule
a02998 522                     if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
PA 523                         paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
524                     }
699e71 525                 }
d896e6 526
JM 527                 ByteArrayOutputStream os = new ByteArrayOutputStream();
528                 byte[] tmp = new byte[32767];
529
530                 RevWalk commitWalk = new RevWalk(reader);
531                 commitWalk.markStart(tip);
699e71 532
d896e6 533                 RevCommit commit;
JM 534                 while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
535                     TreeWalk diffWalk = new TreeWalk(reader);
536                     int parentCount = commit.getParentCount();
537                     switch (parentCount) {
538                     case 0:
539                         diffWalk.addTree(new EmptyTreeIterator());
540                         break;
541                     case 1:
542                         diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
543                         break;
544                     default:
545                         // skip merge commits
546                         continue;
547                     }
548                     diffWalk.addTree(getTree(commitWalk, commit));
549                     diffWalk.setFilter(ANY_DIFF);
550                     diffWalk.setRecursive(true);
551                     while ((paths.size() > 0) && diffWalk.next()) {
552                         String path = diffWalk.getPathString();
553                         if (!paths.containsKey(path)) {
554                             continue;
555                         }
eecaad 556 //TODO: Figure out filestore oid the path - bit more involved than updating the index
PM 557                         
d896e6 558                         // remove path from set
JM 559                         ObjectId blobId = paths.remove(path);
560                         result.blobCount++;
699e71 561
d896e6 562                         // index the blob metadata
JM 563                         String blobAuthor = getAuthor(commit);
564                         String blobCommitter = getCommitter(commit);
565                         String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
566                                 Resolution.MINUTE);
699e71 567
d896e6 568                         Document doc = new Document();
db9832 569                         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM 570                         doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
571                         doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
572                         doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
573                         doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
574                         doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
575                         doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
d896e6 576
JM 577                         // determine extension to compare to the extension
578                         // blacklist
579                         String ext = null;
580                         String name = path.toLowerCase();
581                         if (name.indexOf('.') > -1) {
582                             ext = name.substring(name.lastIndexOf('.') + 1);
583                         }
584
585                         // index the blob content
699e71 586                         if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
d896e6 587                             ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
699e71 588                             InputStream in = ldr.openStream();
d896e6 589                             int n;
JM 590                             while ((n = in.read(tmp)) > 0) {
591                                 os.write(tmp, 0, n);
592                             }
593                             in.close();
594                             byte[] content = os.toByteArray();
699e71 595                             String str = StringUtils.decodeString(content, encodings);
db9832 596                             doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
d896e6 597                             os.reset();
699e71 598                         }
JM 599
d896e6 600                         // add the blob to the index
JM 601                         writer.addDocument(doc);
602                     }
603                 }
604
605                 os.close();
606
607                 // index the tip commit object
608                 if (indexedCommits.add(tipId)) {
609                     Document doc = createDocument(tip, tags.get(tipId));
db9832 610                     doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6 611                     writer.addDocument(doc);
JM 612                     result.commitCount += 1;
613                     result.branchCount += 1;
614                 }
615
616                 // traverse the log and index the previous commit objects
617                 RevWalk historyWalk = new RevWalk(reader);
618                 historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
619                 RevCommit rev;
620                 while ((rev = historyWalk.next()) != null) {
621                     String hash = rev.getId().getName();
622                     if (indexedCommits.add(hash)) {
623                         Document doc = createDocument(rev, tags.get(hash));
db9832 624                         doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6 625                         writer.addDocument(doc);
JM 626                         result.commitCount += 1;
627                     }
628                 }
629             }
630
631             // finished
a1cee6 632             reader.close();
699e71 633
d896e6 634             // commit all changes and reset the searcher
JM 635             config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
636             config.save();
637             writer.commit();
8e9988 638             resetIndexSearcher(model.name);
d896e6 639             result.success();
JM 640         } catch (Exception e) {
40ca5c 641             logger.error("Exception while reindexing " + model.name, e);
d896e6 642         }
JM 643         return result;
644     }
699e71 645
d896e6 646     /**
JM 647      * Incrementally update the index with the specified commit for the
648      * repository.
699e71 649      *
d896e6 650      * @param repositoryName
JM 651      * @param repository
652      * @param branch
653      *            the fully qualified branch name (e.g. refs/heads/master)
654      * @param commit
655      * @return true, if successful
656      */
699e71 657     private IndexResult index(String repositoryName, Repository repository,
d896e6 658             String branch, RevCommit commit) {
JM 659         IndexResult result = new IndexResult();
660         try {
ae9e15 661             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 662             List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
JM 663             String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
664                     Resolution.MINUTE);
665             IndexWriter writer = getIndexWriter(repositoryName);
666             for (PathChangeModel path : changedPaths) {
88fb67 667                 if (path.isSubmodule()) {
JM 668                     continue;
669                 }
d896e6 670                 // delete the indexed blob
856091 671                 deleteBlob(repositoryName, branch, path.name);
d896e6 672
JM 673                 // re-index the blob
674                 if (!ChangeType.DELETE.equals(path.changeType)) {
675                     result.blobCount++;
676                     Document doc = new Document();
db9832 677                     doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM 678                     doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
679                     doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
680                     doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
681                     doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
682                     doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
683                     doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
d896e6 684
JM 685                     // determine extension to compare to the extension
686                     // blacklist
687                     String ext = null;
688                     String name = path.name.toLowerCase();
689                     if (name.indexOf('.') > -1) {
690                         ext = name.substring(name.lastIndexOf('.') + 1);
691                     }
692
693                     if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
eecaad 694                         String str = "";
d896e6 695                         // read the blob content
eecaad 696                         if (path.isFilestoreItem()) {
PM 697                             //Get file from filestore
698                             BodyContentHandler handler = new BodyContentHandler();
699                             Metadata metadata = new Metadata();
700                             PDFParser parser = new PDFParser();
701                             
702                             ParseContext parseContext = new ParseContext();
703                             File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
704                             FileInputStream inputstream = new FileInputStream(lfsFile);
705                             parser.parse(inputstream, handler, metadata, parseContext);
706                             str = handler.toString();
707                         } else {
708                             str = JGitUtils.getStringContent(repository, commit.getTree(),
ae9e15 709                                 path.path, encodings);
eecaad 710                         }
PM 711                         
749110 712                         if (str != null) {
db9832 713                             doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
749110 714                             writer.addDocument(doc);
JM 715                         }
d896e6 716                     }
JM 717                 }
718             }
719             writer.commit();
699e71 720
261024 721             // get any annotated commit tags
JM 722             List<String> commitTags = new ArrayList<String>();
33ceba 723             for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024 724                 if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM 725                     commitTags.add(ref.displayName);
726                 }
727             }
699e71 728
261024 729             // create and write the Lucene document
JM 730             Document doc = createDocument(commit, commitTags);
db9832 731             doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
d896e6 732             result.commitCount++;
JM 733             result.success = index(repositoryName, doc);
734         } catch (Exception e) {
735             logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
736         }
737         return result;
738     }
739
740     /**
741      * Delete a blob from the specified branch of the repository index.
699e71 742      *
d896e6 743      * @param repositoryName
JM 744      * @param branch
745      * @param path
746      * @throws Exception
87ee94 747      * @return true, if deleted, false if no record was deleted
d896e6 748      */
87ee94 749     public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
JM 750         String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
751         String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
699e71 752
87ee94 753         BooleanQuery query = new BooleanQuery();
60110f 754         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM 755         QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
87ee94 756         query.add(qp.parse(q), Occur.MUST);
JM 757
d896e6 758         IndexWriter writer = getIndexWriter(repositoryName);
87ee94 759         int numDocsBefore = writer.numDocs();
699e71 760         writer.deleteDocuments(query);
d896e6 761         writer.commit();
87ee94 762         int numDocsAfter = writer.numDocs();
JM 763         if (numDocsBefore == numDocsAfter) {
764             logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
765             return false;
766         } else {
767             logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
768             return true;
769         }
d896e6 770     }
JM 771
772     /**
773      * Updates a repository index incrementally from the last indexed commits.
699e71 774      *
40ca5c 775      * @param model
d896e6 776      * @param repository
JM 777      * @return IndexResult
778      */
9f6ef3 779     private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6 780         IndexResult result = new IndexResult();
JM 781         try {
782             FileBasedConfig config = getConfig(repository);
783             config.load();
784
785             // build a quick lookup of annotated tags
786             Map<String, List<String>> tags = new HashMap<String, List<String>>();
787             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
788                 if (!tag.isAnnotatedTag()) {
789                     // skip non-annotated tags
790                     continue;
791                 }
b1d77a 792                 if (!tags.containsKey(tag.getObjectId().getName())) {
d896e6 793                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM 794                 }
795                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
796             }
797
798             // detect branch deletion
799             // first assume all branches are deleted and then remove each
800             // existing branch from deletedBranches during indexing
801             Set<String> deletedBranches = new TreeSet<String>();
802             for (String alias : config.getNames(CONF_ALIAS)) {
803                 String branch = config.getString(CONF_ALIAS, null, alias);
804                 deletedBranches.add(branch);
805             }
806
1aabf0 807             // get the local branches
d896e6 808             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71 809
1aabf0 810             // sort them by most recently updated
JM 811             Collections.sort(branches, new Comparator<RefModel>() {
812                 @Override
813                 public int compare(RefModel ref1, RefModel ref2) {
814                     return ref2.getDate().compareTo(ref1.getDate());
815                 }
816             });
699e71 817
1aabf0 818             // reorder default branch to first position
JM 819             RefModel defaultBranch = null;
820             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
821             for (RefModel branch :  branches) {
822                 if (branch.getObjectId().equals(defaultBranchId)) {
823                     defaultBranch = branch;
824                     break;
825                 }
826             }
827             branches.remove(defaultBranch);
828             branches.add(0, defaultBranch);
699e71 829
1aabf0 830             // walk through each branches
d896e6 831             for (RefModel branch : branches) {
JM 832                 String branchName = branch.getName();
833
1aabf0 834                 boolean indexBranch = false;
JM 835                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
836                         && branch.equals(defaultBranch)) {
837                     // indexing "default" branch
838                     indexBranch = true;
c134a0 839                 } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM 840                     // ignore internal meta branches
a04808 841                     indexBranch = false;
1aabf0 842                 } else {
JM 843                     // normal explicit branch check
844                     indexBranch = model.indexedBranches.contains(branch.getName());
845                 }
699e71 846
1aabf0 847                 // if this branch is not specifically indexed then skip
JM 848                 if (!indexBranch) {
40ca5c 849                     continue;
JM 850                 }
699e71 851
d896e6 852                 // remove this branch from the deletedBranches set
JM 853                 deletedBranches.remove(branchName);
699e71 854
d896e6 855                 // determine last commit
JM 856                 String keyName = getBranchKey(branchName);
857                 String lastCommit = config.getString(CONF_BRANCH, null, keyName);
858
859                 List<RevCommit> revs;
860                 if (StringUtils.isEmpty(lastCommit)) {
861                     // new branch/unindexed branch, get all commits on branch
862                     revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
863                 } else {
864                     // pre-existing branch, get changes since last commit
865                     revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
866                 }
867
868                 if (revs.size() > 0) {
869                     result.branchCount += 1;
870                 }
699e71 871
JM 872                 // reverse the list of commits so we start with the first commit
d896e6 873                 Collections.reverse(revs);
699e71 874                 for (RevCommit commit : revs) {
a04808 875                     // index a commit
JM 876                     result.add(index(model.name, repository, branchName, commit));
d896e6 877                 }
JM 878
879                 // update the config
880                 config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
881                 config.setString(CONF_ALIAS, null, keyName, branchName);
882                 config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
883                 config.save();
884             }
885
886             // the deletedBranches set will normally be empty by this point
887             // unless a branch really was deleted and no longer exists
888             if (deletedBranches.size() > 0) {
889                 for (String branch : deletedBranches) {
40ca5c 890                     IndexWriter writer = getIndexWriter(model.name);
d896e6 891                     writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM 892                     writer.commit();
893                 }
894             }
895             result.success = true;
896         } catch (Throwable t) {
40ca5c 897             logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6 898         }
JM 899         return result;
900     }
699e71 901
d896e6 902     /**
JM 903      * Creates a Lucene document for a commit
699e71 904      *
d896e6 905      * @param commit
JM 906      * @param tags
907      * @return a Lucene document
908      */
909     private Document createDocument(RevCommit commit, List<String> tags) {
910         Document doc = new Document();
db9832 911         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
JM 912         doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
d896e6 913         doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
db9832 914                 Resolution.MINUTE), StringField.TYPE_STORED));
JM 915         doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
916         doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
917         doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
918         doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
d896e6 919         if (!ArrayUtils.isEmpty(tags)) {
db9832 920             doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
d896e6 921         }
JM 922         return doc;
923     }
924
925     /**
926      * Incrementally index an object for the repository.
699e71 927      *
d896e6 928      * @param repositoryName
JM 929      * @param doc
930      * @return true, if successful
931      */
932     private boolean index(String repositoryName, Document doc) {
699e71 933         try {
d896e6 934             IndexWriter writer = getIndexWriter(repositoryName);
JM 935             writer.addDocument(doc);
936             writer.commit();
8e9988 937             resetIndexSearcher(repositoryName);
d896e6 938             return true;
JM 939         } catch (Exception e) {
940             logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
941         }
942         return false;
943     }
944
d04009 945     private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6 946         SearchResult result = new SearchResult();
d04009 947         result.hitId = hitId;
JM 948         result.totalHits = totalHits;
d896e6 949         result.score = score;
JM 950         result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
699e71 951         result.summary = doc.get(FIELD_SUMMARY);
d896e6 952         result.author = doc.get(FIELD_AUTHOR);
JM 953         result.committer = doc.get(FIELD_COMMITTER);
954         result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
955         result.branch = doc.get(FIELD_BRANCH);
956         result.commitId = doc.get(FIELD_COMMIT);
957         result.path = doc.get(FIELD_PATH);
958         if (doc.get(FIELD_TAG) != null) {
959             result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
960         }
961         return result;
962     }
963
964     private synchronized void resetIndexSearcher(String repository) throws IOException {
965         IndexSearcher searcher = searchers.remove(repository);
966         if (searcher != null) {
8e9988 967             searcher.getIndexReader().close();
d896e6 968         }
JM 969     }
970
971     /**
972      * Gets an index searcher for the repository.
699e71 973      *
d896e6 974      * @param repository
JM 975      * @return
976      * @throws IOException
977      */
978     private IndexSearcher getIndexSearcher(String repository) throws IOException {
979         IndexSearcher searcher = searchers.get(repository);
980         if (searcher == null) {
981             IndexWriter writer = getIndexWriter(repository);
db9832 982             searcher = new IndexSearcher(DirectoryReader.open(writer, true));
d896e6 983             searchers.put(repository, searcher);
JM 984         }
985         return searcher;
986     }
987
988     /**
989      * Gets an index writer for the repository. The index will be created if it
990      * does not already exist or if forceCreate is specified.
699e71 991      *
d896e6 992      * @param repository
JM 993      * @return an IndexWriter
994      * @throws IOException
995      */
996     private IndexWriter getIndexWriter(String repository) throws IOException {
699e71 997         IndexWriter indexWriter = writers.get(repository);
6ef2fc 998         File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6 999         File indexFolder = new File(repositoryFolder, LUCENE_DIR);
60110f 1000         Directory directory = FSDirectory.open(indexFolder);
d896e6 1001
JM 1002         if (indexWriter == null) {
1003             if (!indexFolder.exists()) {
1004                 indexFolder.mkdirs();
1005             }
60110f 1006             StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM 1007             IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
d896e6 1008             config.setOpenMode(OpenMode.CREATE_OR_APPEND);
JM 1009             indexWriter = new IndexWriter(directory, config);
1010             writers.put(repository, indexWriter);
1011         }
1012         return indexWriter;
1013     }
1014
1015     /**
1016      * Searches the specified repositories for the given text or query
699e71 1017      *
d896e6 1018      * @param text
JM 1019      *            if the text is null or empty, null is returned
d04009 1020      * @param page
JM 1021      *            the page number to retrieve. page is 1-indexed.
1022      * @param pageSize
1023      *            the number of elements to return for this page
d896e6 1024      * @param repositories
JM 1025      *            a list of repositories to search. if no repositories are
1026      *            specified null is returned.
1027      * @return a list of SearchResults in order from highest to the lowest score
699e71 1028      *
d896e6 1029      */
d04009 1030     public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6 1031         if (ArrayUtils.isEmpty(repositories)) {
JM 1032             return null;
1033         }
d04009 1034         return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6 1035     }
699e71 1036
d896e6 1037     /**
JM 1038      * Searches the specified repositories for the given text or query
699e71 1039      *
d896e6 1040      * @param text
JM 1041      *            if the text is null or empty, null is returned
d04009 1042      * @param page
JM 1043      *            the page number to retrieve. page is 1-indexed.
1044      * @param pageSize
1045      *            the number of elements to return for this page
d896e6 1046      * @param repositories
JM 1047      *            a list of repositories to search. if no repositories are
1048      *            specified null is returned.
1049      * @return a list of SearchResults in order from highest to the lowest score
699e71 1050      *
d04009 1051      */
JM 1052     public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6 1053         if (StringUtils.isEmpty(text)) {
JM 1054             return null;
1055         }
1056         if (ArrayUtils.isEmpty(repositories)) {
1057             return null;
1058         }
1059         Set<SearchResult> results = new LinkedHashSet<SearchResult>();
60110f 1060         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
d896e6 1061         try {
JM 1062             // default search checks summary and content
1063             BooleanQuery query = new BooleanQuery();
1064             QueryParser qp;
60110f 1065             qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
d896e6 1066             qp.setAllowLeadingWildcard(true);
JM 1067             query.add(qp.parse(text), Occur.SHOULD);
1068
60110f 1069             qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
d896e6 1070             qp.setAllowLeadingWildcard(true);
JM 1071             query.add(qp.parse(text), Occur.SHOULD);
699e71 1072
d896e6 1073             IndexSearcher searcher;
JM 1074             if (repositories.length == 1) {
1075                 // single repository search
1076                 searcher = getIndexSearcher(repositories[0]);
1077             } else {
1078                 // multiple repository search
1079                 List<IndexReader> readers = new ArrayList<IndexReader>();
1080                 for (String repository : repositories) {
1081                     IndexSearcher repositoryIndex = getIndexSearcher(repository);
1082                     readers.add(repositoryIndex.getIndexReader());
1083                 }
1084                 IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31 1085                 MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6 1086                 searcher = new IndexSearcher(reader);
JM 1087             }
699e71 1088
d896e6 1089             Query rewrittenQuery = searcher.rewrite(query);
87ee94 1090             logger.debug(rewrittenQuery.toString());
JM 1091
60110f 1092             TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6 1093             searcher.search(rewrittenQuery, collector);
d04009 1094             int offset = Math.max(0, (page - 1) * pageSize);
JM 1095             ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
1096             int totalHits = collector.getTotalHits();
d896e6 1097             for (int i = 0; i < hits.length; i++) {
JM 1098                 int docId = hits[i].doc;
1099                 Document doc = searcher.doc(docId);
d04009 1100                 SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31 1101                 if (repositories.length == 1) {
JM 1102                     // single repository search
1103                     result.repository = repositories[0];
1104                 } else {
1105                     // multi-repository search
1106                     MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
1107                     int index = reader.getSourceIndex(docId);
1108                     result.repository = repositories[index];
1109                 }
699e71 1110                 String content = doc.get(FIELD_CONTENT);
d896e6 1111                 result.fragment = getHighlightedFragment(analyzer, query, content, result);
JM 1112                 results.add(result);
1113             }
1114         } catch (Exception e) {
1115             logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
1116         }
1117         return new ArrayList<SearchResult>(results);
1118     }
699e71 1119
d896e6 1120     /**
699e71 1121      *
d896e6 1122      * @param analyzer
JM 1123      * @param query
1124      * @param content
1125      * @param result
1126      * @return
1127      * @throws IOException
1128      * @throws InvalidTokenOffsetsException
1129      */
1130     private String getHighlightedFragment(Analyzer analyzer, Query query,
1131             String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e 1132         if (content == null) {
JM 1133             content = "";
699e71 1134         }
12c31e 1135
310a80 1136         int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
12c31e 1137         int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
JM 1138
d896e6 1139         QueryScorer scorer = new QueryScorer(query, "content");
699e71 1140         Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
d896e6 1141
JM 1142         // use an artificial delimiter for the token
9f6ef3 1143         String termTag = "!!--[";
JM 1144         String termTagEnd = "]--!!";
d896e6 1145         SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
699e71 1146         Highlighter highlighter = new Highlighter(formatter, scorer);
d896e6 1147         highlighter.setTextFragmenter(fragmenter);
12c31e 1148
73fba6 1149         String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6 1150         if (ArrayUtils.isEmpty(fragments)) {
JM 1151             if (SearchObjectType.blob  == result.type) {
1152                 return "";
1153             }
12c31e 1154             // clip commit message
JM 1155             String fragment = content;
1156             if (fragment.length() > fragmentLength) {
1157                 fragment = fragment.substring(0, fragmentLength) + "...";
1158             }
310a80 1159             return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
d896e6 1160         }
699e71 1161
2b67ec 1162         // make sure we have unique fragments
JM 1163         Set<String> uniqueFragments = new LinkedHashSet<String>();
1164         for (String fragment : fragments) {
1165             uniqueFragments.add(fragment);
1166         }
1167         fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
699e71 1168
d896e6 1169         StringBuilder sb = new StringBuilder();
JM 1170         for (int i = 0, len = fragments.length; i < len; i++) {
1171             String fragment = fragments[i];
12c31e 1172             String tag = "<pre class=\"text\">";
JM 1173
d896e6 1174             // resurrect the raw fragment from removing the artificial delimiters
12c31e 1175             String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM 1176
1177             // determine position of the raw fragment in the content
2b67ec 1178             int pos = content.indexOf(raw);
699e71 1179
12c31e 1180             // restore complete first line of fragment
JM 1181             int c = pos;
1182             while (c > 0) {
1183                 c--;
1184                 if (content.charAt(c) == '\n') {
1185                     break;
1186                 }
1187             }
1188             if (c > 0) {
1189                 // inject leading chunk of first fragment line
1190                 fragment = content.substring(c + 1, pos) + fragment;
1191             }
699e71 1192
12c31e 1193             if (SearchObjectType.blob  == result.type) {
JM 1194                 // count lines as offset into the content for this fragment
c2833a 1195                 int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
699e71 1196
12c31e 1197                 // create fragment tag with line number and language
JM 1198                 String lang = "";
1199                 String ext = StringUtils.getFileExtension(result.path).toLowerCase();
1200                 if (!StringUtils.isEmpty(ext)) {
1201                     // maintain leading space!
1202                     lang = " lang-" + ext;
1203                 }
1204                 tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
699e71 1205
12c31e 1206             }
699e71 1207
12c31e 1208             sb.append(tag);
JM 1209
d896e6 1210             // replace the artificial delimiter with html tags
9f6ef3 1211             String html = StringUtils.escapeForHtml(fragment, false);
JM 1212             html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6 1213             sb.append(html);
JM 1214             sb.append("</pre>");
1215             if (i < len - 1) {
1216                 sb.append("<span class=\"ellipses\">...</span><br/>");
1217             }
1218         }
1219         return sb.toString();
699e71 1220     }
JM 1221
d896e6 1222     /**
699e71 1223      * Simple class to track the results of an index update.
d896e6 1224      */
JM 1225     private class IndexResult {
1226         long startTime = System.currentTimeMillis();
1227         long endTime = startTime;
1228         boolean success;
1229         int branchCount;
1230         int commitCount;
1231         int blobCount;
699e71 1232
d896e6 1233         void add(IndexResult result) {
JM 1234             this.branchCount += result.branchCount;
1235             this.commitCount += result.commitCount;
1236             this.blobCount += result.blobCount;
1237         }
699e71 1238
d896e6 1239         void success() {
JM 1240             success = true;
1241             endTime = System.currentTimeMillis();
1242         }
699e71 1243
d896e6 1244         float duration() {
JM 1245             return (endTime - startTime)/1000f;
1246         }
b938ae 1247     }
699e71 1248
905d31 1249     /**
JM 1250      * Custom subclass of MultiReader to identify the source index for a given
1251      * doc id.  This would not be necessary of there was a public method to
1252      * obtain this information.
699e71 1253      *
905d31 1254      */
JM 1255     private class MultiSourceReader extends MultiReader {
699e71 1256
60110f 1257         MultiSourceReader(IndexReader [] readers) {
db9832 1258             super(readers, false);
905d31 1259         }
699e71 1260
905d31 1261         int getSourceIndex(int docId) {
JM 1262             int index = -1;
1263             try {
db9832 1264                 index = super.readerIndex(docId);
905d31 1265             } catch (Exception e) {
JM 1266                 logger.error("Error getting source index", e);
1267             }
1268             return index;
1269         }
1270     }
e31da0 1271 }