Paul Martin
2016-04-16 eecaad8b8e2c447429c31a01d49260ddd6b4ee03
src/main/java/com/gitblit/service/LuceneService.java
@@ -19,9 +19,9 @@
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.text.MessageFormat;
import java.text.ParseException;
import java.util.ArrayList;
@@ -42,15 +42,16 @@
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -66,6 +67,11 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.lib.Constants;
import org.eclipse.jgit.lib.FileMode;
@@ -85,8 +91,11 @@
import org.slf4j.LoggerFactory;
import com.gitblit.Constants.SearchObjectType;
import com.gitblit.GitBlit;
import com.gitblit.IStoredSettings;
import com.gitblit.Keys;
import com.gitblit.manager.FilestoreManager;
import com.gitblit.manager.IFilestoreManager;
import com.gitblit.manager.IRepositoryManager;
import com.gitblit.models.PathModel.PathChangeModel;
import com.gitblit.models.RefModel;
@@ -105,7 +114,7 @@
public class LuceneService implements Runnable {
   private static final int INDEX_VERSION = 5;
   private static final int INDEX_VERSION = 6;
   private static final String FIELD_OBJECT_TYPE = "type";
   private static final String FIELD_PATH = "path";
@@ -125,12 +134,14 @@
   private static final String CONF_ALIAS = "aliases";
   private static final String CONF_BRANCH = "branches";
   private static final Version LUCENE_VERSION = Version.LUCENE_35;
   private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
   private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
   private final IStoredSettings storedSettings;
   private final IRepositoryManager repositoryManager;
   private final IFilestoreManager filestoreManager;
   private final File repositoriesFolder;
   private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
@@ -141,10 +152,12 @@
   public LuceneService(
         IStoredSettings settings,
         IRepositoryManager repositoryManager) {
         IRepositoryManager repositoryManager,
         IFilestoreManager filestoreManager) {
      this.storedSettings = settings;
      this.repositoryManager = repositoryManager;
      this.filestoreManager = filestoreManager;
      this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
      String exts = luceneIgnoreExtensions;
      if (settings != null) {
@@ -194,7 +207,7 @@
    * Synchronously indexes a repository. This may build a complete index of a
    * repository or it may update an existing index.
    *
    * @param name
    * @param displayName
    *            the name of the repository
    * @param repository
    *            the repository object
@@ -437,7 +450,7 @@
               // skip non-annotated tags
               continue;
            }
            if (!tags.containsKey(tag.getObjectId())) {
            if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
               tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
@@ -476,8 +489,8 @@
                  && branch.equals(defaultBranch)) {
               // indexing "default" branch
               indexBranch = true;
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_GITBLIT)) {
               // skip Gitblit internal branches
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
               // skip internal meta branches
               indexBranch = false;
            } else {
               // normal explicit branch check
@@ -540,7 +553,8 @@
                  if (!paths.containsKey(path)) {
                     continue;
                  }
//TODO: Figure out filestore oid the path - bit more involved than updating the index
                  // remove path from set
                  ObjectId blobId = paths.remove(path);
                  result.blobCount++;
@@ -552,13 +566,13 @@
                        Resolution.MINUTE);
                  Document doc = new Document();
                  doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
                  doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
                  doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
                  doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                  doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
                  doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
                  doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
                  doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
                  doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
                  // determine extension to compare to the extension
                  // blacklist
@@ -579,7 +593,7 @@
                     in.close();
                     byte[] content = os.toByteArray();
                     String str = StringUtils.decodeString(content, encodings);
                     doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                     doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                     os.reset();
                  }
@@ -593,7 +607,7 @@
            // index the tip commit object
            if (indexedCommits.add(tipId)) {
               Document doc = createDocument(tip, tags.get(tipId));
               doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
               writer.addDocument(doc);
               result.commitCount += 1;
               result.branchCount += 1;
@@ -607,7 +621,7 @@
               String hash = rev.getId().getName();
               if (indexedCommits.add(hash)) {
                  Document doc = createDocument(rev, tags.get(hash));
                  doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
                  doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
                  writer.addDocument(doc);
                  result.commitCount += 1;
               }
@@ -615,7 +629,7 @@
         }
         // finished
         reader.release();
         reader.close();
         // commit all changes and reset the searcher
         config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
@@ -660,14 +674,13 @@
            if (!ChangeType.DELETE.equals(path.changeType)) {
               result.blobCount++;
               Document doc = new Document();
               doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
                     Index.NOT_ANALYZED));
               doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
               doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
               doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
               doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
               doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
               doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
               doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
               doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
               doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
               // determine extension to compare to the extension
               // blacklist
@@ -678,11 +691,26 @@
               }
               if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
                  String str = "";
                  // read the blob content
                  String str = JGitUtils.getStringContent(repository, commit.getTree(),
                  if (path.isFilestoreItem()) {
                     //Get file from filestore
                     BodyContentHandler handler = new BodyContentHandler();
                           Metadata metadata = new Metadata();
                           PDFParser parser = new PDFParser();
                           ParseContext parseContext = new ParseContext();
                           File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
                           FileInputStream inputstream = new FileInputStream(lfsFile);
                           parser.parse(inputstream, handler, metadata, parseContext);
                     str = handler.toString();
                  } else {
                     str = JGitUtils.getStringContent(repository, commit.getTree(),
                        path.path, encodings);
                  }
                  if (str != null) {
                     doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
                     doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
                     writer.addDocument(doc);
                  }
               }
@@ -700,7 +728,7 @@
         // create and write the Lucene document
         Document doc = createDocument(commit, commitTags);
         doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
         doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
         result.commitCount++;
         result.success = index(repositoryName, doc);
      } catch (Exception e) {
@@ -761,7 +789,7 @@
               // skip non-annotated tags
               continue;
            }
            if (!tags.containsKey(tag.getObjectId())) {
            if (!tags.containsKey(tag.getObjectId().getName())) {
               tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
            }
            tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
@@ -808,8 +836,8 @@
                  && branch.equals(defaultBranch)) {
               // indexing "default" branch
               indexBranch = true;
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_GITBLIT)) {
               // ignore internal Gitblit branches
            } else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
               // ignore internal meta branches
               indexBranch = false;
            } else {
               // normal explicit branch check
@@ -880,17 +908,16 @@
    */
   private Document createDocument(RevCommit commit, List<String> tags) {
      Document doc = new Document();
      doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,
            Index.NOT_ANALYZED));
      doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
      doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
      doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
            Resolution.MINUTE), Store.YES, Index.NO));
      doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
      doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
            Resolution.MINUTE), StringField.TYPE_STORED));
      doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
      doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
      doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
      doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
      if (!ArrayUtils.isEmpty(tags)) {
         doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
         doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
      }
      return doc;
   }
@@ -952,7 +979,7 @@
      IndexSearcher searcher = searchers.get(repository);
      if (searcher == null) {
         IndexWriter writer = getIndexWriter(repository);
         searcher = new IndexSearcher(IndexReader.open(writer, true));
         searcher = new IndexSearcher(DirectoryReader.open(writer, true));
         searchers.put(repository, searcher);
      }
      return searcher;
@@ -1106,6 +1133,7 @@
         content = "";
      }
      int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
      int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
      QueryScorer scorer = new QueryScorer(query, "content");
@@ -1128,7 +1156,7 @@
         if (fragment.length() > fragmentLength) {
            fragment = fragment.substring(0, fragmentLength) + "...";
         }
         return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
         return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
      }
      // make sure we have unique fragments
@@ -1226,25 +1254,14 @@
    */
   private class MultiSourceReader extends MultiReader {
      final Method method;
      MultiSourceReader(IndexReader[] subReaders) {
         super(subReaders);
         Method m = null;
         try {
            m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
            m.setAccessible(true);
         } catch (Exception e) {
            logger.error("Error getting readerIndex method", e);
         }
         method = m;
      MultiSourceReader(IndexReader [] readers) {
         super(readers, false);
      }
      int getSourceIndex(int docId) {
         int index = -1;
         try {
            Object o = method.invoke(this, docId);
            index = (Integer) o;
            index = super.readerIndex(docId);
         } catch (Exception e) {
            logger.error("Error getting source index", e);
         }