James Moger
2015-11-22 ed552ba47c02779c270ffd62841d6d1048dade70
commit | author | age
e31da0 1 /*
JM 2  * Copyright 2012 gitblit.com.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
7bf6e1 16 package com.gitblit.service;
e31da0 17
d896e6 18 import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0 19
d896e6 20 import java.io.ByteArrayOutputStream;
JM 21 import java.io.File;
22 import java.io.IOException;
23 import java.io.InputStream;
24 import java.text.MessageFormat;
25 import java.text.ParseException;
26 import java.util.ArrayList;
27 import java.util.Collections;
28 import java.util.Comparator;
29 import java.util.HashMap;
30 import java.util.LinkedHashSet;
31 import java.util.List;
32 import java.util.Map;
33 import java.util.Set;
34 import java.util.TreeMap;
35 import java.util.TreeSet;
36 import java.util.concurrent.ConcurrentHashMap;
37
38 import org.apache.lucene.analysis.Analyzer;
39 import org.apache.lucene.analysis.standard.StandardAnalyzer;
40 import org.apache.lucene.document.DateTools;
41 import org.apache.lucene.document.DateTools.Resolution;
42 import org.apache.lucene.document.Document;
43 import org.apache.lucene.document.Field;
db9832 44 import org.apache.lucene.document.StringField;
JM 45 import org.apache.lucene.document.TextField;
46 import org.apache.lucene.index.DirectoryReader;
d896e6 47 import org.apache.lucene.index.IndexReader;
JM 48 import org.apache.lucene.index.IndexWriter;
49 import org.apache.lucene.index.IndexWriterConfig;
50 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
51 import org.apache.lucene.index.MultiReader;
52 import org.apache.lucene.index.Term;
db9832 53 import org.apache.lucene.queryparser.classic.QueryParser;
d896e6 54 import org.apache.lucene.search.BooleanClause.Occur;
JM 55 import org.apache.lucene.search.BooleanQuery;
56 import org.apache.lucene.search.IndexSearcher;
57 import org.apache.lucene.search.Query;
58 import org.apache.lucene.search.ScoreDoc;
59 import org.apache.lucene.search.TopScoreDocCollector;
60 import org.apache.lucene.search.highlight.Fragmenter;
61 import org.apache.lucene.search.highlight.Highlighter;
62 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
63 import org.apache.lucene.search.highlight.QueryScorer;
64 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
65 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
66 import org.apache.lucene.store.Directory;
67 import org.apache.lucene.store.FSDirectory;
68 import org.apache.lucene.util.Version;
69 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
70 import org.eclipse.jgit.lib.Constants;
a02998 71 import org.eclipse.jgit.lib.FileMode;
d896e6 72 import org.eclipse.jgit.lib.ObjectId;
JM 73 import org.eclipse.jgit.lib.ObjectLoader;
74 import org.eclipse.jgit.lib.ObjectReader;
e31da0 75 import org.eclipse.jgit.lib.Repository;
6ef2fc 76 import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6 77 import org.eclipse.jgit.revwalk.RevCommit;
JM 78 import org.eclipse.jgit.revwalk.RevTree;
79 import org.eclipse.jgit.revwalk.RevWalk;
80 import org.eclipse.jgit.storage.file.FileBasedConfig;
81 import org.eclipse.jgit.treewalk.EmptyTreeIterator;
82 import org.eclipse.jgit.treewalk.TreeWalk;
83 import org.eclipse.jgit.util.FS;
e31da0 84 import org.slf4j.Logger;
JM 85 import org.slf4j.LoggerFactory;
86
d896e6 87 import com.gitblit.Constants.SearchObjectType;
7bf6e1 88 import com.gitblit.IStoredSettings;
JM 89 import com.gitblit.Keys;
db4f6b 90 import com.gitblit.manager.IRepositoryManager;
d896e6 91 import com.gitblit.models.PathModel.PathChangeModel;
JM 92 import com.gitblit.models.RefModel;
40ca5c 93 import com.gitblit.models.RepositoryModel;
d896e6 94 import com.gitblit.models.SearchResult;
JM 95 import com.gitblit.utils.ArrayUtils;
e31da0 96 import com.gitblit.utils.JGitUtils;
d896e6 97 import com.gitblit.utils.StringUtils;
e31da0 98
JM 99 /**
7bf6e1 100  * The Lucene service handles indexing and searching repositories.
699e71 101  *
e31da0 102  * @author James Moger
699e71 103  *
e31da0 104  */
7bf6e1 105 public class LuceneService implements Runnable {
699e71 106
JM 107
3a4470 108     private static final int INDEX_VERSION = 6;
e31da0 109
d896e6 110     private static final String FIELD_OBJECT_TYPE = "type";
JM 111     private static final String FIELD_PATH = "path";
112     private static final String FIELD_COMMIT = "commit";
113     private static final String FIELD_BRANCH = "branch";
114     private static final String FIELD_SUMMARY = "summary";
115     private static final String FIELD_CONTENT = "content";
116     private static final String FIELD_AUTHOR = "author";
117     private static final String FIELD_COMMITTER = "committer";
118     private static final String FIELD_DATE = "date";
119     private static final String FIELD_TAG = "tag";
120
121     private static final String CONF_FILE = "lucene.conf";
122     private static final String LUCENE_DIR = "lucene";
123     private static final String CONF_INDEX = "index";
124     private static final String CONF_VERSION = "version";
125     private static final String CONF_ALIAS = "aliases";
126     private static final String CONF_BRANCH = "branches";
699e71 127
3a4470 128     private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
699e71 129
7bf6e1 130     private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
699e71 131
d896e6 132     private final IStoredSettings storedSettings;
cacf8b 133     private final IRepositoryManager repositoryManager;
d896e6 134     private final File repositoriesFolder;
699e71 135
d896e6 136     private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
JM 137     private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
699e71 138
f1d2ad 139     private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM 140     private Set<String> excludedExtensions;
699e71 141
7bf6e1 142     public LuceneService(
cacf8b 143             IStoredSettings settings,
JM 144             IRepositoryManager repositoryManager) {
145
d896e6 146         this.storedSettings = settings;
cacf8b 147         this.repositoryManager = repositoryManager;
JM 148         this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
462488 149         String exts = luceneIgnoreExtensions;
JM 150         if (settings != null) {
151             exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
152         }
153         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0 154     }
JM 155
156     /**
699e71 157      * Run is executed by the Gitblit executor service.  Because this is called
273cb9 158      * by an executor service, calls will queue - i.e. there can never be
JM 159      * concurrent execution of repository index updates.
e31da0 160      */
JM 161     @Override
162     public void run() {
7db092 163         if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM 164             // Lucene indexing is disabled
165             return;
166         }
f1d2ad 167         // reload the excluded extensions
JM 168         String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
169         excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
170
db4f6b 171         if (repositoryManager.isCollectingGarbage()) {
dad8b4 172             // busy collecting garbage, try again later
JM 173             return;
174         }
699e71 175
db4f6b 176         for (String repositoryName: repositoryManager.getRepositoryList()) {
JM 177             RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
40ca5c 178             if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
db4f6b 179                 Repository repository = repositoryManager.getRepository(model.name);
e92c6d 180                 if (repository == null) {
db4f6b 181                     if (repositoryManager.isCollectingGarbage(model.name)) {
e92c6d 182                         logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
JM 183                     }
184                     continue;
185                 }
699e71 186                 index(model, repository);
40ca5c 187                 repository.close();
JM 188                 System.gc();
e31da0 189             }
JM 190         }
191     }
192
193     /**
194      * Synchronously indexes a repository. This may build a complete index of a
195      * repository or it may update an existing index.
699e71 196      *
3ad13e 197      * @param displayName
e31da0 198      *            the name of the repository
JM 199      * @param repository
200      *            the repository object
201      */
9f6ef3 202     private void index(RepositoryModel model, Repository repository) {
e31da0 203         try {
40ca5c 204             if (shouldReindex(repository)) {
JM 205                 // (re)build the entire index
206                 IndexResult result = reindex(model, repository);
207
208                 if (result.success) {
209                     if (result.commitCount > 0) {
210                         String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
211                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
212                                 result.blobCount, result.branchCount, result.duration()));
e31da0 213                     }
JM 214                 } else {
40ca5c 215                     String msg = "Could not build {0} Lucene index!";
JM 216                     logger.error(MessageFormat.format(msg, model.name));
e31da0 217                 }
JM 218             } else {
40ca5c 219                 // update the index with latest commits
JM 220                 IndexResult result = updateIndex(model, repository);
221                 if (result.success) {
222                     if (result.commitCount > 0) {
223                         String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
224                         logger.info(MessageFormat.format(msg, model.name, result.commitCount,
225                                 result.blobCount, result.branchCount, result.duration()));
226                     }
227                 } else {
228                     String msg = "Could not update {0} Lucene index!";
229                     logger.error(MessageFormat.format(msg, model.name));
230                 }
e31da0 231             }
JM 232         } catch (Throwable t) {
40ca5c 233             logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0 234         }
JM 235     }
699e71 236
e6637c 237     /**
JM 238      * Close the writer/searcher objects for a repository.
699e71 239      *
e6637c 240      * @param repositoryName
JM 241      */
8e9988 242     public synchronized void close(String repositoryName) {
JM 243         try {
244             IndexSearcher searcher = searchers.remove(repositoryName);
245             if (searcher != null) {
246                 searcher.getIndexReader().close();
247             }
248         } catch (Exception e) {
249             logger.error("Failed to close index searcher for " + repositoryName, e);
250         }
699e71 251
e6637c 252         try {
JM 253             IndexWriter writer = writers.remove(repositoryName);
254             if (writer != null) {
255                 writer.close();
256             }
257         } catch (Exception e) {
258             logger.error("Failed to close index writer for " + repositoryName, e);
699e71 259         }
e6637c 260     }
b938ae 261
JM 262     /**
263      * Close all Lucene indexers.
699e71 264      *
b938ae 265      */
8e9988 266     public synchronized void close() {
d896e6 267         // close all writers
JM 268         for (String writer : writers.keySet()) {
269             try {
60110f 270                 writers.get(writer).close(true);
d896e6 271             } catch (Throwable t) {
JM 272                 logger.error("Failed to close Lucene writer for " + writer, t);
273             }
274         }
275         writers.clear();
276
277         // close all searchers
278         for (String searcher : searchers.keySet()) {
279             try {
8e9988 280                 searchers.get(searcher).getIndexReader().close();
d896e6 281             } catch (Throwable t) {
JM 282                 logger.error("Failed to close Lucene searcher for " + searcher, t);
283             }
284         }
285         searchers.clear();
286     }
287
699e71 288
d896e6 289     /**
JM 290      * Deletes the Lucene index for the specified repository.
699e71 291      *
d896e6 292      * @param repositoryName
JM 293      * @return true, if successful
294      */
295     public boolean deleteIndex(String repositoryName) {
296         try {
8e9988 297             // close any open writer/searcher
JM 298             close(repositoryName);
299
d896e6 300             // delete the index folder
eb741a 301             File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
d896e6 302             File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
JM 303             if (luceneIndex.exists()) {
304                 org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
305                         org.eclipse.jgit.util.FileUtils.RECURSIVE);
306             }
307             // delete the config file
308             File luceneConfig = new File(repositoryFolder, CONF_FILE);
309             if (luceneConfig.exists()) {
310                 luceneConfig.delete();
311             }
312             return true;
313         } catch (IOException e) {
314             throw new RuntimeException(e);
315         }
316     }
699e71 317
d896e6 318     /**
JM 319      * Returns the author for the commit, if this information is available.
699e71 320      *
d896e6 321      * @param commit
JM 322      * @return an author or unknown
323      */
324     private String getAuthor(RevCommit commit) {
325         String name = "unknown";
326         try {
327             name = commit.getAuthorIdent().getName();
328             if (StringUtils.isEmpty(name)) {
329                 name = commit.getAuthorIdent().getEmailAddress();
330             }
699e71 331         } catch (NullPointerException n) {
d896e6 332         }
JM 333         return name;
334     }
699e71 335
d896e6 336     /**
JM 337      * Returns the committer for the commit, if this information is available.
699e71 338      *
d896e6 339      * @param commit
JM 340      * @return an committer or unknown
341      */
342     private String getCommitter(RevCommit commit) {
343         String name = "unknown";
344         try {
345             name = commit.getCommitterIdent().getName();
346             if (StringUtils.isEmpty(name)) {
347                 name = commit.getCommitterIdent().getEmailAddress();
348             }
699e71 349         } catch (NullPointerException n) {
d896e6 350         }
JM 351         return name;
352     }
699e71 353
905d31 354     /**
JM 355      * Get the tree associated with the given commit.
356      *
357      * @param walk
358      * @param commit
359      * @return tree
360      * @throws IOException
361      */
9f6ef3 362     private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31 363             throws IOException {
JM 364         final RevTree tree = commit.getTree();
365         if (tree != null) {
366             return tree;
367         }
368         walk.parseHeaders(commit);
369         return commit.getTree();
370     }
d896e6 371
JM 372     /**
373      * Construct a keyname from the branch.
699e71 374      *
d896e6 375      * @param branchName
JM 376      * @return a keyname appropriate for the Git config file format
377      */
378     private String getBranchKey(String branchName) {
379         return StringUtils.getSHA1(branchName);
380     }
381
382     /**
383      * Returns the Lucene configuration for the specified repository.
699e71 384      *
d896e6 385      * @param repository
JM 386      * @return a config object
387      */
388     private FileBasedConfig getConfig(Repository repository) {
389         File file = new File(repository.getDirectory(), CONF_FILE);
390         FileBasedConfig config = new FileBasedConfig(file, FS.detect());
391         return config;
392     }
393
394     /**
395      * Reads the Lucene config file for the repository to check the index
396      * version. If the index version is different, then rebuild the repository
397      * index.
699e71 398      *
d896e6 399      * @param repository
JM 400      * @return true of the on-disk index format is different than INDEX_VERSION
401      */
9f6ef3 402     private boolean shouldReindex(Repository repository) {
d896e6 403         try {
JM 404             FileBasedConfig config = getConfig(repository);
405             config.load();
406             int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
407             // reindex if versions do not match
408             return indexVersion != INDEX_VERSION;
409         } catch (Throwable t) {
410         }
411         return true;
412     }
413
414
415     /**
416      * This completely indexes the repository and will destroy any existing
417      * index.
699e71 418      *
d896e6 419      * @param repositoryName
JM 420      * @param repository
421      * @return IndexResult
422      */
40ca5c 423     public IndexResult reindex(RepositoryModel model, Repository repository) {
699e71 424         IndexResult result = new IndexResult();
40ca5c 425         if (!deleteIndex(model.name)) {
d896e6 426             return result;
JM 427         }
fa0afc 428         try {
JM 429             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 430             FileBasedConfig config = getConfig(repository);
JM 431             Set<String> indexedCommits = new TreeSet<String>();
40ca5c 432             IndexWriter writer = getIndexWriter(model.name);
d896e6 433             // build a quick lookup of tags
JM 434             Map<String, List<String>> tags = new HashMap<String, List<String>>();
435             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
436                 if (!tag.isAnnotatedTag()) {
437                     // skip non-annotated tags
438                     continue;
439                 }
d0bb38 440                 if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
d896e6 441                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM 442                 }
443                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
444             }
699e71 445
d896e6 446             ObjectReader reader = repository.newObjectReader();
JM 447
448             // get the local branches
449             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71 450
d896e6 451             // sort them by most recently updated
JM 452             Collections.sort(branches, new Comparator<RefModel>() {
453                 @Override
454                 public int compare(RefModel ref1, RefModel ref2) {
455                     return ref2.getDate().compareTo(ref1.getDate());
456                 }
457             });
699e71 458
d896e6 459             // reorder default branch to first position
JM 460             RefModel defaultBranch = null;
461             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
462             for (RefModel branch :  branches) {
463                 if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0 464                     defaultBranch = branch;
d896e6 465                     break;
JM 466                 }
467             }
468             branches.remove(defaultBranch);
469             branches.add(0, defaultBranch);
699e71 470
d896e6 471             // walk through each branch
JM 472             for (RefModel branch : branches) {
40ca5c 473
1aabf0 474                 boolean indexBranch = false;
JM 475                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
476                         && branch.equals(defaultBranch)) {
477                     // indexing "default" branch
478                     indexBranch = true;
c134a0 479                 } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM 480                     // skip internal meta branches
1aabf0 481                     indexBranch = false;
JM 482                 } else {
483                     // normal explicit branch check
484                     indexBranch = model.indexedBranches.contains(branch.getName());
485                 }
699e71 486
40ca5c 487                 // if this branch is not specifically indexed then skip
1aabf0 488                 if (!indexBranch) {
d896e6 489                     continue;
JM 490                 }
491
492                 String branchName = branch.getName();
493                 RevWalk revWalk = new RevWalk(reader);
494                 RevCommit tip = revWalk.parseCommit(branch.getObjectId());
495                 String tipId = tip.getId().getName();
496
497                 String keyName = getBranchKey(branchName);
498                 config.setString(CONF_ALIAS, null, keyName, branchName);
499                 config.setString(CONF_BRANCH, null, keyName, tipId);
500
501                 // index the blob contents of the tree
502                 TreeWalk treeWalk = new TreeWalk(repository);
503                 treeWalk.addTree(tip.getTree());
699e71 504                 treeWalk.setRecursive(true);
JM 505
d896e6 506                 Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
JM 507                 while (treeWalk.next()) {
749110 508                     // ensure path is not in a submodule
a02998 509                     if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
PA 510                         paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
511                     }
699e71 512                 }
d896e6 513
JM 514                 ByteArrayOutputStream os = new ByteArrayOutputStream();
515                 byte[] tmp = new byte[32767];
516
517                 RevWalk commitWalk = new RevWalk(reader);
518                 commitWalk.markStart(tip);
699e71 519
d896e6 520                 RevCommit commit;
JM 521                 while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
522                     TreeWalk diffWalk = new TreeWalk(reader);
523                     int parentCount = commit.getParentCount();
524                     switch (parentCount) {
525                     case 0:
526                         diffWalk.addTree(new EmptyTreeIterator());
527                         break;
528                     case 1:
529                         diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
530                         break;
531                     default:
532                         // skip merge commits
533                         continue;
534                     }
535                     diffWalk.addTree(getTree(commitWalk, commit));
536                     diffWalk.setFilter(ANY_DIFF);
537                     diffWalk.setRecursive(true);
538                     while ((paths.size() > 0) && diffWalk.next()) {
539                         String path = diffWalk.getPathString();
540                         if (!paths.containsKey(path)) {
541                             continue;
542                         }
699e71 543
d896e6 544                         // remove path from set
JM 545                         ObjectId blobId = paths.remove(path);
546                         result.blobCount++;
699e71 547
d896e6 548                         // index the blob metadata
JM 549                         String blobAuthor = getAuthor(commit);
550                         String blobCommitter = getCommitter(commit);
551                         String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
552                                 Resolution.MINUTE);
699e71 553
d896e6 554                         Document doc = new Document();
db9832 555                         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM 556                         doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
557                         doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
558                         doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
559                         doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
560                         doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
561                         doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
d896e6 562
JM 563                         // determine extension to compare to the extension
564                         // blacklist
565                         String ext = null;
566                         String name = path.toLowerCase();
567                         if (name.indexOf('.') > -1) {
568                             ext = name.substring(name.lastIndexOf('.') + 1);
569                         }
570
571                         // index the blob content
699e71 572                         if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
d896e6 573                             ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
699e71 574                             InputStream in = ldr.openStream();
d896e6 575                             int n;
JM 576                             while ((n = in.read(tmp)) > 0) {
577                                 os.write(tmp, 0, n);
578                             }
579                             in.close();
580                             byte[] content = os.toByteArray();
699e71 581                             String str = StringUtils.decodeString(content, encodings);
db9832 582                             doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
d896e6 583                             os.reset();
699e71 584                         }
JM 585
d896e6 586                         // add the blob to the index
JM 587                         writer.addDocument(doc);
588                     }
589                 }
590
591                 os.close();
592
593                 // index the tip commit object
594                 if (indexedCommits.add(tipId)) {
595                     Document doc = createDocument(tip, tags.get(tipId));
db9832 596                     doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6 597                     writer.addDocument(doc);
JM 598                     result.commitCount += 1;
599                     result.branchCount += 1;
600                 }
601
602                 // traverse the log and index the previous commit objects
603                 RevWalk historyWalk = new RevWalk(reader);
604                 historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
605                 RevCommit rev;
606                 while ((rev = historyWalk.next()) != null) {
607                     String hash = rev.getId().getName();
608                     if (indexedCommits.add(hash)) {
609                         Document doc = createDocument(rev, tags.get(hash));
db9832 610                         doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6 611                         writer.addDocument(doc);
JM 612                         result.commitCount += 1;
613                     }
614                 }
615             }
616
617             // finished
a1cee6 618             reader.close();
699e71 619
d896e6 620             // commit all changes and reset the searcher
JM 621             config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
622             config.save();
623             writer.commit();
8e9988 624             resetIndexSearcher(model.name);
d896e6 625             result.success();
JM 626         } catch (Exception e) {
40ca5c 627             logger.error("Exception while reindexing " + model.name, e);
d896e6 628         }
JM 629         return result;
630     }
699e71 631
d896e6 632     /**
JM 633      * Incrementally update the index with the specified commit for the
634      * repository.
699e71 635      *
d896e6 636      * @param repositoryName
JM 637      * @param repository
638      * @param branch
639      *            the fully qualified branch name (e.g. refs/heads/master)
640      * @param commit
641      * @return true, if successful
642      */
699e71 643     private IndexResult index(String repositoryName, Repository repository,
d896e6 644             String branch, RevCommit commit) {
JM 645         IndexResult result = new IndexResult();
646         try {
ae9e15 647             String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6 648             List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
JM 649             String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
650                     Resolution.MINUTE);
651             IndexWriter writer = getIndexWriter(repositoryName);
652             for (PathChangeModel path : changedPaths) {
88fb67 653                 if (path.isSubmodule()) {
JM 654                     continue;
655                 }
d896e6 656                 // delete the indexed blob
856091 657                 deleteBlob(repositoryName, branch, path.name);
d896e6 658
JM 659                 // re-index the blob
660                 if (!ChangeType.DELETE.equals(path.changeType)) {
661                     result.blobCount++;
662                     Document doc = new Document();
db9832 663                     doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM 664                     doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
665                     doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
666                     doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
667                     doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
668                     doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
669                     doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
d896e6 670
JM 671                     // determine extension to compare to the extension
672                     // blacklist
673                     String ext = null;
674                     String name = path.name.toLowerCase();
675                     if (name.indexOf('.') > -1) {
676                         ext = name.substring(name.lastIndexOf('.') + 1);
677                     }
678
679                     if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
680                         // read the blob content
681                         String str = JGitUtils.getStringContent(repository, commit.getTree(),
ae9e15 682                                 path.path, encodings);
749110 683                         if (str != null) {
db9832 684                             doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
749110 685                             writer.addDocument(doc);
JM 686                         }
d896e6 687                     }
JM 688                 }
689             }
690             writer.commit();
699e71 691
261024 692             // get any annotated commit tags
JM 693             List<String> commitTags = new ArrayList<String>();
33ceba 694             for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024 695                 if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM 696                     commitTags.add(ref.displayName);
697                 }
698             }
699e71 699
261024 700             // create and write the Lucene document
JM 701             Document doc = createDocument(commit, commitTags);
db9832 702             doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
d896e6 703             result.commitCount++;
JM 704             result.success = index(repositoryName, doc);
705         } catch (Exception e) {
706             logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
707         }
708         return result;
709     }
710
711     /**
712      * Delete a blob from the specified branch of the repository index.
699e71 713      *
d896e6 714      * @param repositoryName
JM 715      * @param branch
716      * @param path
717      * @throws Exception
87ee94 718      * @return true, if deleted, false if no record was deleted
d896e6 719      */
87ee94 720     public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
JM 721         String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
722         String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
699e71 723
87ee94 724         BooleanQuery query = new BooleanQuery();
60110f 725         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM 726         QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
87ee94 727         query.add(qp.parse(q), Occur.MUST);
JM 728
d896e6 729         IndexWriter writer = getIndexWriter(repositoryName);
87ee94 730         int numDocsBefore = writer.numDocs();
699e71 731         writer.deleteDocuments(query);
d896e6 732         writer.commit();
87ee94 733         int numDocsAfter = writer.numDocs();
JM 734         if (numDocsBefore == numDocsAfter) {
735             logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
736             return false;
737         } else {
738             logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
739             return true;
740         }
d896e6 741     }
JM 742
743     /**
744      * Updates a repository index incrementally from the last indexed commits.
699e71 745      *
40ca5c 746      * @param model
d896e6 747      * @param repository
JM 748      * @return IndexResult
749      */
9f6ef3 750     private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6 751         IndexResult result = new IndexResult();
JM 752         try {
753             FileBasedConfig config = getConfig(repository);
754             config.load();
755
756             // build a quick lookup of annotated tags
757             Map<String, List<String>> tags = new HashMap<String, List<String>>();
758             for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
759                 if (!tag.isAnnotatedTag()) {
760                     // skip non-annotated tags
761                     continue;
762                 }
b1d77a 763                 if (!tags.containsKey(tag.getObjectId().getName())) {
d896e6 764                     tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM 765                 }
766                 tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
767             }
768
769             // detect branch deletion
770             // first assume all branches are deleted and then remove each
771             // existing branch from deletedBranches during indexing
772             Set<String> deletedBranches = new TreeSet<String>();
773             for (String alias : config.getNames(CONF_ALIAS)) {
774                 String branch = config.getString(CONF_ALIAS, null, alias);
775                 deletedBranches.add(branch);
776             }
777
1aabf0 778             // get the local branches
d896e6 779             List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71 780
1aabf0 781             // sort them by most recently updated
JM 782             Collections.sort(branches, new Comparator<RefModel>() {
783                 @Override
784                 public int compare(RefModel ref1, RefModel ref2) {
785                     return ref2.getDate().compareTo(ref1.getDate());
786                 }
787             });
699e71 788
1aabf0 789             // reorder default branch to first position
JM 790             RefModel defaultBranch = null;
791             ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
792             for (RefModel branch :  branches) {
793                 if (branch.getObjectId().equals(defaultBranchId)) {
794                     defaultBranch = branch;
795                     break;
796                 }
797             }
798             branches.remove(defaultBranch);
799             branches.add(0, defaultBranch);
699e71 800
1aabf0 801             // walk through each branches
d896e6 802             for (RefModel branch : branches) {
JM 803                 String branchName = branch.getName();
804
1aabf0 805                 boolean indexBranch = false;
JM 806                 if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
807                         && branch.equals(defaultBranch)) {
808                     // indexing "default" branch
809                     indexBranch = true;
c134a0 810                 } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM 811                     // ignore internal meta branches
a04808 812                     indexBranch = false;
1aabf0 813                 } else {
JM 814                     // normal explicit branch check
815                     indexBranch = model.indexedBranches.contains(branch.getName());
816                 }
699e71 817
1aabf0 818                 // if this branch is not specifically indexed then skip
JM 819                 if (!indexBranch) {
40ca5c 820                     continue;
JM 821                 }
699e71 822
d896e6 823                 // remove this branch from the deletedBranches set
JM 824                 deletedBranches.remove(branchName);
699e71 825
d896e6 826                 // determine last commit
JM 827                 String keyName = getBranchKey(branchName);
828                 String lastCommit = config.getString(CONF_BRANCH, null, keyName);
829
830                 List<RevCommit> revs;
831                 if (StringUtils.isEmpty(lastCommit)) {
832                     // new branch/unindexed branch, get all commits on branch
833                     revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
834                 } else {
835                     // pre-existing branch, get changes since last commit
836                     revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
837                 }
838
839                 if (revs.size() > 0) {
840                     result.branchCount += 1;
841                 }
699e71 842
JM 843                 // reverse the list of commits so we start with the first commit
d896e6 844                 Collections.reverse(revs);
699e71 845                 for (RevCommit commit : revs) {
a04808 846                     // index a commit
JM 847                     result.add(index(model.name, repository, branchName, commit));
d896e6 848                 }
JM 849
850                 // update the config
851                 config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
852                 config.setString(CONF_ALIAS, null, keyName, branchName);
853                 config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
854                 config.save();
855             }
856
857             // the deletedBranches set will normally be empty by this point
858             // unless a branch really was deleted and no longer exists
859             if (deletedBranches.size() > 0) {
860                 for (String branch : deletedBranches) {
40ca5c 861                     IndexWriter writer = getIndexWriter(model.name);
d896e6 862                     writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM 863                     writer.commit();
864                 }
865             }
866             result.success = true;
867         } catch (Throwable t) {
40ca5c 868             logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6 869         }
JM 870         return result;
871     }
699e71 872
d896e6 873     /**
JM 874      * Creates a Lucene document for a commit
699e71 875      *
d896e6 876      * @param commit
JM 877      * @param tags
878      * @return a Lucene document
879      */
880     private Document createDocument(RevCommit commit, List<String> tags) {
881         Document doc = new Document();
db9832 882         doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
JM 883         doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
d896e6 884         doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
db9832 885                 Resolution.MINUTE), StringField.TYPE_STORED));
JM 886         doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
887         doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
888         doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
889         doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
d896e6 890         if (!ArrayUtils.isEmpty(tags)) {
db9832 891             doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
d896e6 892         }
JM 893         return doc;
894     }
895
896     /**
897      * Incrementally index an object for the repository.
699e71 898      *
d896e6 899      * @param repositoryName
JM 900      * @param doc
901      * @return true, if successful
902      */
903     private boolean index(String repositoryName, Document doc) {
699e71 904         try {
d896e6 905             IndexWriter writer = getIndexWriter(repositoryName);
JM 906             writer.addDocument(doc);
907             writer.commit();
8e9988 908             resetIndexSearcher(repositoryName);
d896e6 909             return true;
JM 910         } catch (Exception e) {
911             logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
912         }
913         return false;
914     }
915
d04009 916     private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6 917         SearchResult result = new SearchResult();
d04009 918         result.hitId = hitId;
JM 919         result.totalHits = totalHits;
d896e6 920         result.score = score;
JM 921         result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
699e71 922         result.summary = doc.get(FIELD_SUMMARY);
d896e6 923         result.author = doc.get(FIELD_AUTHOR);
JM 924         result.committer = doc.get(FIELD_COMMITTER);
925         result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
926         result.branch = doc.get(FIELD_BRANCH);
927         result.commitId = doc.get(FIELD_COMMIT);
928         result.path = doc.get(FIELD_PATH);
929         if (doc.get(FIELD_TAG) != null) {
930             result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
931         }
932         return result;
933     }
934
935     private synchronized void resetIndexSearcher(String repository) throws IOException {
936         IndexSearcher searcher = searchers.remove(repository);
937         if (searcher != null) {
8e9988 938             searcher.getIndexReader().close();
d896e6 939         }
JM 940     }
941
942     /**
943      * Gets an index searcher for the repository.
699e71 944      *
d896e6 945      * @param repository
JM 946      * @return
947      * @throws IOException
948      */
949     private IndexSearcher getIndexSearcher(String repository) throws IOException {
950         IndexSearcher searcher = searchers.get(repository);
951         if (searcher == null) {
952             IndexWriter writer = getIndexWriter(repository);
db9832 953             searcher = new IndexSearcher(DirectoryReader.open(writer, true));
d896e6 954             searchers.put(repository, searcher);
JM 955         }
956         return searcher;
957     }
958
959     /**
960      * Gets an index writer for the repository. The index will be created if it
961      * does not already exist or if forceCreate is specified.
699e71 962      *
d896e6 963      * @param repository
JM 964      * @return an IndexWriter
965      * @throws IOException
966      */
967     private IndexWriter getIndexWriter(String repository) throws IOException {
699e71 968         IndexWriter indexWriter = writers.get(repository);
6ef2fc 969         File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6 970         File indexFolder = new File(repositoryFolder, LUCENE_DIR);
60110f 971         Directory directory = FSDirectory.open(indexFolder);
d896e6 972
JM 973         if (indexWriter == null) {
974             if (!indexFolder.exists()) {
975                 indexFolder.mkdirs();
976             }
60110f 977             StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM 978             IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
d896e6 979             config.setOpenMode(OpenMode.CREATE_OR_APPEND);
JM 980             indexWriter = new IndexWriter(directory, config);
981             writers.put(repository, indexWriter);
982         }
983         return indexWriter;
984     }
985
986     /**
987      * Searches the specified repositories for the given text or query
699e71 988      *
d896e6 989      * @param text
JM 990      *            if the text is null or empty, null is returned
d04009 991      * @param page
JM 992      *            the page number to retrieve. page is 1-indexed.
993      * @param pageSize
994      *            the number of elements to return for this page
d896e6 995      * @param repositories
JM 996      *            a list of repositories to search. if no repositories are
997      *            specified null is returned.
998      * @return a list of SearchResults in order from highest to the lowest score
699e71 999      *
d896e6 1000      */
d04009 1001     public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6 1002         if (ArrayUtils.isEmpty(repositories)) {
JM 1003             return null;
1004         }
d04009 1005         return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6 1006     }
699e71 1007
d896e6 1008     /**
JM 1009      * Searches the specified repositories for the given text or query
699e71 1010      *
d896e6 1011      * @param text
JM 1012      *            if the text is null or empty, null is returned
d04009 1013      * @param page
JM 1014      *            the page number to retrieve. page is 1-indexed.
1015      * @param pageSize
1016      *            the number of elements to return for this page
d896e6 1017      * @param repositories
JM 1018      *            a list of repositories to search. if no repositories are
1019      *            specified null is returned.
1020      * @return a list of SearchResults in order from highest to the lowest score
699e71 1021      *
d04009 1022      */
JM 1023     public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6 1024         if (StringUtils.isEmpty(text)) {
JM 1025             return null;
1026         }
1027         if (ArrayUtils.isEmpty(repositories)) {
1028             return null;
1029         }
1030         Set<SearchResult> results = new LinkedHashSet<SearchResult>();
60110f 1031         StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
d896e6 1032         try {
JM 1033             // default search checks summary and content
1034             BooleanQuery query = new BooleanQuery();
1035             QueryParser qp;
60110f 1036             qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
d896e6 1037             qp.setAllowLeadingWildcard(true);
JM 1038             query.add(qp.parse(text), Occur.SHOULD);
1039
60110f 1040             qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
d896e6 1041             qp.setAllowLeadingWildcard(true);
JM 1042             query.add(qp.parse(text), Occur.SHOULD);
699e71 1043
d896e6 1044             IndexSearcher searcher;
JM 1045             if (repositories.length == 1) {
1046                 // single repository search
1047                 searcher = getIndexSearcher(repositories[0]);
1048             } else {
1049                 // multiple repository search
1050                 List<IndexReader> readers = new ArrayList<IndexReader>();
1051                 for (String repository : repositories) {
1052                     IndexSearcher repositoryIndex = getIndexSearcher(repository);
1053                     readers.add(repositoryIndex.getIndexReader());
1054                 }
1055                 IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31 1056                 MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6 1057                 searcher = new IndexSearcher(reader);
JM 1058             }
699e71 1059
d896e6 1060             Query rewrittenQuery = searcher.rewrite(query);
87ee94 1061             logger.debug(rewrittenQuery.toString());
JM 1062
60110f 1063             TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6 1064             searcher.search(rewrittenQuery, collector);
d04009 1065             int offset = Math.max(0, (page - 1) * pageSize);
JM 1066             ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
1067             int totalHits = collector.getTotalHits();
d896e6 1068             for (int i = 0; i < hits.length; i++) {
JM 1069                 int docId = hits[i].doc;
1070                 Document doc = searcher.doc(docId);
d04009 1071                 SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31 1072                 if (repositories.length == 1) {
JM 1073                     // single repository search
1074                     result.repository = repositories[0];
1075                 } else {
1076                     // multi-repository search
1077                     MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
1078                     int index = reader.getSourceIndex(docId);
1079                     result.repository = repositories[index];
1080                 }
699e71 1081                 String content = doc.get(FIELD_CONTENT);
d896e6 1082                 result.fragment = getHighlightedFragment(analyzer, query, content, result);
JM 1083                 results.add(result);
1084             }
1085         } catch (Exception e) {
1086             logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
1087         }
1088         return new ArrayList<SearchResult>(results);
1089     }
699e71 1090
d896e6 1091     /**
699e71 1092      *
d896e6 1093      * @param analyzer
JM 1094      * @param query
1095      * @param content
1096      * @param result
1097      * @return
1098      * @throws IOException
1099      * @throws InvalidTokenOffsetsException
1100      */
1101     private String getHighlightedFragment(Analyzer analyzer, Query query,
1102             String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e 1103         if (content == null) {
JM 1104             content = "";
699e71 1105         }
12c31e 1106
310a80 1107         int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
12c31e 1108         int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
JM 1109
d896e6 1110         QueryScorer scorer = new QueryScorer(query, "content");
699e71 1111         Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
d896e6 1112
JM 1113         // use an artificial delimiter for the token
9f6ef3 1114         String termTag = "!!--[";
JM 1115         String termTagEnd = "]--!!";
d896e6 1116         SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
699e71 1117         Highlighter highlighter = new Highlighter(formatter, scorer);
d896e6 1118         highlighter.setTextFragmenter(fragmenter);
12c31e 1119
73fba6 1120         String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6 1121         if (ArrayUtils.isEmpty(fragments)) {
JM 1122             if (SearchObjectType.blob  == result.type) {
1123                 return "";
1124             }
12c31e 1125             // clip commit message
JM 1126             String fragment = content;
1127             if (fragment.length() > fragmentLength) {
1128                 fragment = fragment.substring(0, fragmentLength) + "...";
1129             }
310a80 1130             return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
d896e6 1131         }
699e71 1132
2b67ec 1133         // make sure we have unique fragments
JM 1134         Set<String> uniqueFragments = new LinkedHashSet<String>();
1135         for (String fragment : fragments) {
1136             uniqueFragments.add(fragment);
1137         }
1138         fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
699e71 1139
d896e6 1140         StringBuilder sb = new StringBuilder();
JM 1141         for (int i = 0, len = fragments.length; i < len; i++) {
1142             String fragment = fragments[i];
12c31e 1143             String tag = "<pre class=\"text\">";
JM 1144
d896e6 1145             // resurrect the raw fragment from removing the artificial delimiters
12c31e 1146             String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM 1147
1148             // determine position of the raw fragment in the content
2b67ec 1149             int pos = content.indexOf(raw);
699e71 1150
12c31e 1151             // restore complete first line of fragment
JM 1152             int c = pos;
1153             while (c > 0) {
1154                 c--;
1155                 if (content.charAt(c) == '\n') {
1156                     break;
1157                 }
1158             }
1159             if (c > 0) {
1160                 // inject leading chunk of first fragment line
1161                 fragment = content.substring(c + 1, pos) + fragment;
1162             }
699e71 1163
12c31e 1164             if (SearchObjectType.blob  == result.type) {
JM 1165                 // count lines as offset into the content for this fragment
c2833a 1166                 int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
699e71 1167
12c31e 1168                 // create fragment tag with line number and language
JM 1169                 String lang = "";
1170                 String ext = StringUtils.getFileExtension(result.path).toLowerCase();
1171                 if (!StringUtils.isEmpty(ext)) {
1172                     // maintain leading space!
1173                     lang = " lang-" + ext;
1174                 }
1175                 tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
699e71 1176
12c31e 1177             }
699e71 1178
12c31e 1179             sb.append(tag);
JM 1180
d896e6 1181             // replace the artificial delimiter with html tags
9f6ef3 1182             String html = StringUtils.escapeForHtml(fragment, false);
JM 1183             html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6 1184             sb.append(html);
JM 1185             sb.append("</pre>");
1186             if (i < len - 1) {
1187                 sb.append("<span class=\"ellipses\">...</span><br/>");
1188             }
1189         }
1190         return sb.toString();
699e71 1191     }
JM 1192
d896e6 1193     /**
699e71 1194      * Simple class to track the results of an index update.
d896e6 1195      */
JM 1196     private class IndexResult {
1197         long startTime = System.currentTimeMillis();
1198         long endTime = startTime;
1199         boolean success;
1200         int branchCount;
1201         int commitCount;
1202         int blobCount;
699e71 1203
d896e6 1204         void add(IndexResult result) {
JM 1205             this.branchCount += result.branchCount;
1206             this.commitCount += result.commitCount;
1207             this.blobCount += result.blobCount;
1208         }
699e71 1209
d896e6 1210         void success() {
JM 1211             success = true;
1212             endTime = System.currentTimeMillis();
1213         }
699e71 1214
d896e6 1215         float duration() {
JM 1216             return (endTime - startTime)/1000f;
1217         }
b938ae 1218     }
699e71 1219
905d31 1220     /**
JM 1221      * Custom subclass of MultiReader to identify the source index for a given
1222      * doc id.  This would not be necessary of there was a public method to
1223      * obtain this information.
699e71 1224      *
905d31 1225      */
JM 1226     private class MultiSourceReader extends MultiReader {
699e71 1227
60110f 1228         MultiSourceReader(IndexReader [] readers) {
db9832 1229             super(readers, false);
905d31 1230         }
699e71 1231
905d31 1232         int getSourceIndex(int docId) {
JM 1233             int index = -1;
1234             try {
db9832 1235                 index = super.readerIndex(docId);
905d31 1236             } catch (Exception e) {
JM 1237                 logger.error("Error getting source index", e);
1238             }
1239             return index;
1240         }
1241     }
e31da0 1242 }