githubFork/gitblit.git - Solinfo Gitblit

Paul Martin

2016-04-16 eecaad8b8e2c447429c31a01d49260ddd6b4ee03

commit \| author \| age
e31da0	1	/*
JM	2	* Copyright 2012 gitblit.com.
	3	*
	4	* Licensed under the Apache License, Version 2.0 (the "License");
	5	* you may not use this file except in compliance with the License.
	6	* You may obtain a copy of the License at
	7	*
	8	* http://www.apache.org/licenses/LICENSE-2.0
	9	*
	10	* Unless required by applicable law or agreed to in writing, software
	11	* distributed under the License is distributed on an "AS IS" BASIS,
	12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	* See the License for the specific language governing permissions and
	14	* limitations under the License.
	15	*/
7bf6e1	16	package com.gitblit.service;
e31da0	17
d896e6	18	import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0	19
d896e6	20	import java.io.ByteArrayOutputStream;
JM	21	import java.io.File;
eecaad	22	import java.io.FileInputStream;
d896e6	23	import java.io.IOException;
JM	24	import java.io.InputStream;
	25	import java.text.MessageFormat;
	26	import java.text.ParseException;
	27	import java.util.ArrayList;
	28	import java.util.Collections;
	29	import java.util.Comparator;
	30	import java.util.HashMap;
	31	import java.util.LinkedHashSet;
	32	import java.util.List;
	33	import java.util.Map;
	34	import java.util.Set;
	35	import java.util.TreeMap;
	36	import java.util.TreeSet;
	37	import java.util.concurrent.ConcurrentHashMap;
	38
	39	import org.apache.lucene.analysis.Analyzer;
	40	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	41	import org.apache.lucene.document.DateTools;
	42	import org.apache.lucene.document.DateTools.Resolution;
	43	import org.apache.lucene.document.Document;
	44	import org.apache.lucene.document.Field;
db9832	45	import org.apache.lucene.document.StringField;
JM	46	import org.apache.lucene.document.TextField;
	47	import org.apache.lucene.index.DirectoryReader;
d896e6	48	import org.apache.lucene.index.IndexReader;
JM	49	import org.apache.lucene.index.IndexWriter;
	50	import org.apache.lucene.index.IndexWriterConfig;
	51	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	52	import org.apache.lucene.index.MultiReader;
	53	import org.apache.lucene.index.Term;
db9832	54	import org.apache.lucene.queryparser.classic.QueryParser;
d896e6	55	import org.apache.lucene.search.BooleanClause.Occur;
JM	56	import org.apache.lucene.search.BooleanQuery;
	57	import org.apache.lucene.search.IndexSearcher;
	58	import org.apache.lucene.search.Query;
	59	import org.apache.lucene.search.ScoreDoc;
	60	import org.apache.lucene.search.TopScoreDocCollector;
	61	import org.apache.lucene.search.highlight.Fragmenter;
	62	import org.apache.lucene.search.highlight.Highlighter;
	63	import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
	64	import org.apache.lucene.search.highlight.QueryScorer;
	65	import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
	66	import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
	67	import org.apache.lucene.store.Directory;
	68	import org.apache.lucene.store.FSDirectory;
	69	import org.apache.lucene.util.Version;
eecaad	70	import org.apache.tika.metadata.Metadata;
PM	71	import org.apache.tika.parser.AutoDetectParser;
	72	import org.apache.tika.parser.ParseContext;
	73	import org.apache.tika.parser.pdf.PDFParser;
	74	import org.apache.tika.sax.BodyContentHandler;
d896e6	75	import org.eclipse.jgit.diff.DiffEntry.ChangeType;
JM	76	import org.eclipse.jgit.lib.Constants;
a02998	77	import org.eclipse.jgit.lib.FileMode;
d896e6	78	import org.eclipse.jgit.lib.ObjectId;
JM	79	import org.eclipse.jgit.lib.ObjectLoader;
	80	import org.eclipse.jgit.lib.ObjectReader;
e31da0	81	import org.eclipse.jgit.lib.Repository;
6ef2fc	82	import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6	83	import org.eclipse.jgit.revwalk.RevCommit;
JM	84	import org.eclipse.jgit.revwalk.RevTree;
	85	import org.eclipse.jgit.revwalk.RevWalk;
	86	import org.eclipse.jgit.storage.file.FileBasedConfig;
	87	import org.eclipse.jgit.treewalk.EmptyTreeIterator;
	88	import org.eclipse.jgit.treewalk.TreeWalk;
	89	import org.eclipse.jgit.util.FS;
e31da0	90	import org.slf4j.Logger;
JM	91	import org.slf4j.LoggerFactory;
	92
d896e6	93	import com.gitblit.Constants.SearchObjectType;
eecaad	94	import com.gitblit.GitBlit;
7bf6e1	95	import com.gitblit.IStoredSettings;
JM	96	import com.gitblit.Keys;
eecaad	97	import com.gitblit.manager.FilestoreManager;
PM	98	import com.gitblit.manager.IFilestoreManager;
db4f6b	99	import com.gitblit.manager.IRepositoryManager;
d896e6	100	import com.gitblit.models.PathModel.PathChangeModel;
JM	101	import com.gitblit.models.RefModel;
40ca5c	102	import com.gitblit.models.RepositoryModel;
d896e6	103	import com.gitblit.models.SearchResult;
JM	104	import com.gitblit.utils.ArrayUtils;
e31da0	105	import com.gitblit.utils.JGitUtils;
d896e6	106	import com.gitblit.utils.StringUtils;
e31da0	107
JM	108	/**
7bf6e1	109	* The Lucene service handles indexing and searching repositories.
699e71	110	*
e31da0	111	* @author James Moger
699e71	112	*
e31da0	113	*/
7bf6e1	114	public class LuceneService implements Runnable {
699e71	115
JM	116
3a4470	117	private static final int INDEX_VERSION = 6;
e31da0	118
d896e6	119	private static final String FIELD_OBJECT_TYPE = "type";
JM	120	private static final String FIELD_PATH = "path";
	121	private static final String FIELD_COMMIT = "commit";
	122	private static final String FIELD_BRANCH = "branch";
	123	private static final String FIELD_SUMMARY = "summary";
	124	private static final String FIELD_CONTENT = "content";
	125	private static final String FIELD_AUTHOR = "author";
	126	private static final String FIELD_COMMITTER = "committer";
	127	private static final String FIELD_DATE = "date";
	128	private static final String FIELD_TAG = "tag";
	129
	130	private static final String CONF_FILE = "lucene.conf";
	131	private static final String LUCENE_DIR = "lucene";
	132	private static final String CONF_INDEX = "index";
	133	private static final String CONF_VERSION = "version";
	134	private static final String CONF_ALIAS = "aliases";
	135	private static final String CONF_BRANCH = "branches";
699e71	136
3a4470	137	private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
699e71	138
7bf6e1	139	private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
699e71	140
d896e6	141	private final IStoredSettings storedSettings;
cacf8b	142	private final IRepositoryManager repositoryManager;
eecaad	143	private final IFilestoreManager filestoreManager;
PM	144
d896e6	145	private final File repositoriesFolder;
699e71	146
d896e6	147	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
JM	148	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
699e71	149
f1d2ad	150	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM	151	private Set<String> excludedExtensions;
699e71	152
7bf6e1	153	public LuceneService(
cacf8b	154	IStoredSettings settings,
eecaad	155	IRepositoryManager repositoryManager,
PM	156	IFilestoreManager filestoreManager) {
cacf8b	157
d896e6	158	this.storedSettings = settings;
cacf8b	159	this.repositoryManager = repositoryManager;
eecaad	160	this.filestoreManager = filestoreManager;
cacf8b	161	this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
462488	162	String exts = luceneIgnoreExtensions;
JM	163	if (settings != null) {
	164	exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
	165	}
	166	excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0	167	}
JM	168
	169	/**
699e71	170	* Run is executed by the Gitblit executor service. Because this is called
273cb9	171	* by an executor service, calls will queue - i.e. there can never be
JM	172	* concurrent execution of repository index updates.
e31da0	173	*/
JM	174	@Override
	175	public void run() {
7db092	176	if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM	177	// Lucene indexing is disabled
	178	return;
	179	}
f1d2ad	180	// reload the excluded extensions
JM	181	String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
	182	excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
	183
db4f6b	184	if (repositoryManager.isCollectingGarbage()) {
dad8b4	185	// busy collecting garbage, try again later
JM	186	return;
	187	}
699e71	188
db4f6b	189	for (String repositoryName: repositoryManager.getRepositoryList()) {
JM	190	RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
40ca5c	191	if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
db4f6b	192	Repository repository = repositoryManager.getRepository(model.name);
e92c6d	193	if (repository == null) {
db4f6b	194	if (repositoryManager.isCollectingGarbage(model.name)) {
e92c6d	195	logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
JM	196	}
	197	continue;
	198	}
699e71	199	index(model, repository);
40ca5c	200	repository.close();
JM	201	System.gc();
e31da0	202	}
JM	203	}
	204	}
	205
	206	/**
	207	* Synchronously indexes a repository. This may build a complete index of a
	208	* repository or it may update an existing index.
699e71	209	*
3ad13e	210	* @param displayName
e31da0	211	* the name of the repository
JM	212	* @param repository
	213	* the repository object
	214	*/
9f6ef3	215	private void index(RepositoryModel model, Repository repository) {
e31da0	216	try {
40ca5c	217	if (shouldReindex(repository)) {
JM	218	// (re)build the entire index
	219	IndexResult result = reindex(model, repository);
	220
	221	if (result.success) {
	222	if (result.commitCount > 0) {
	223	String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
	224	logger.info(MessageFormat.format(msg, model.name, result.commitCount,
	225	result.blobCount, result.branchCount, result.duration()));
e31da0	226	}
JM	227	} else {
40ca5c	228	String msg = "Could not build {0} Lucene index!";
JM	229	logger.error(MessageFormat.format(msg, model.name));
e31da0	230	}
JM	231	} else {
40ca5c	232	// update the index with latest commits
JM	233	IndexResult result = updateIndex(model, repository);
	234	if (result.success) {
	235	if (result.commitCount > 0) {
	236	String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
	237	logger.info(MessageFormat.format(msg, model.name, result.commitCount,
	238	result.blobCount, result.branchCount, result.duration()));
	239	}
	240	} else {
	241	String msg = "Could not update {0} Lucene index!";
	242	logger.error(MessageFormat.format(msg, model.name));
	243	}
e31da0	244	}
JM	245	} catch (Throwable t) {
40ca5c	246	logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0	247	}
JM	248	}
699e71	249
e6637c	250	/**
JM	251	* Close the writer/searcher objects for a repository.
699e71	252	*
e6637c	253	* @param repositoryName
JM	254	*/
8e9988	255	public synchronized void close(String repositoryName) {
JM	256	try {
	257	IndexSearcher searcher = searchers.remove(repositoryName);
	258	if (searcher != null) {
	259	searcher.getIndexReader().close();
	260	}
	261	} catch (Exception e) {
	262	logger.error("Failed to close index searcher for " + repositoryName, e);
	263	}
699e71	264
e6637c	265	try {
JM	266	IndexWriter writer = writers.remove(repositoryName);
	267	if (writer != null) {
	268	writer.close();
	269	}
	270	} catch (Exception e) {
	271	logger.error("Failed to close index writer for " + repositoryName, e);
699e71	272	}
e6637c	273	}
b938ae	274
JM	275	/**
	276	* Close all Lucene indexers.
699e71	277	*
b938ae	278	*/
8e9988	279	public synchronized void close() {
d896e6	280	// close all writers
JM	281	for (String writer : writers.keySet()) {
	282	try {
60110f	283	writers.get(writer).close(true);
d896e6	284	} catch (Throwable t) {
JM	285	logger.error("Failed to close Lucene writer for " + writer, t);
	286	}
	287	}
	288	writers.clear();
	289
	290	// close all searchers
	291	for (String searcher : searchers.keySet()) {
	292	try {
8e9988	293	searchers.get(searcher).getIndexReader().close();
d896e6	294	} catch (Throwable t) {
JM	295	logger.error("Failed to close Lucene searcher for " + searcher, t);
	296	}
	297	}
	298	searchers.clear();
	299	}
	300
699e71	301
d896e6	302	/**
JM	303	* Deletes the Lucene index for the specified repository.
699e71	304	*
d896e6	305	* @param repositoryName
JM	306	* @return true, if successful
	307	*/
	308	public boolean deleteIndex(String repositoryName) {
	309	try {
8e9988	310	// close any open writer/searcher
JM	311	close(repositoryName);
	312
d896e6	313	// delete the index folder
eb741a	314	File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
d896e6	315	File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
JM	316	if (luceneIndex.exists()) {
	317	org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
	318	org.eclipse.jgit.util.FileUtils.RECURSIVE);
	319	}
	320	// delete the config file
	321	File luceneConfig = new File(repositoryFolder, CONF_FILE);
	322	if (luceneConfig.exists()) {
	323	luceneConfig.delete();
	324	}
	325	return true;
	326	} catch (IOException e) {
	327	throw new RuntimeException(e);
	328	}
	329	}
699e71	330
d896e6	331	/**
JM	332	* Returns the author for the commit, if this information is available.
699e71	333	*
d896e6	334	* @param commit
JM	335	* @return an author or unknown
	336	*/
	337	private String getAuthor(RevCommit commit) {
	338	String name = "unknown";
	339	try {
	340	name = commit.getAuthorIdent().getName();
	341	if (StringUtils.isEmpty(name)) {
	342	name = commit.getAuthorIdent().getEmailAddress();
	343	}
699e71	344	} catch (NullPointerException n) {
d896e6	345	}
JM	346	return name;
	347	}
699e71	348
d896e6	349	/**
JM	350	* Returns the committer for the commit, if this information is available.
699e71	351	*
d896e6	352	* @param commit
JM	353	* @return an committer or unknown
	354	*/
	355	private String getCommitter(RevCommit commit) {
	356	String name = "unknown";
	357	try {
	358	name = commit.getCommitterIdent().getName();
	359	if (StringUtils.isEmpty(name)) {
	360	name = commit.getCommitterIdent().getEmailAddress();
	361	}
699e71	362	} catch (NullPointerException n) {
d896e6	363	}
JM	364	return name;
	365	}
699e71	366
905d31	367	/**
JM	368	* Get the tree associated with the given commit.
	369	*
	370	* @param walk
	371	* @param commit
	372	* @return tree
	373	* @throws IOException
	374	*/
9f6ef3	375	private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31	376	throws IOException {
JM	377	final RevTree tree = commit.getTree();
	378	if (tree != null) {
	379	return tree;
	380	}
	381	walk.parseHeaders(commit);
	382	return commit.getTree();
	383	}
d896e6	384
JM	385	/**
	386	* Construct a keyname from the branch.
699e71	387	*
d896e6	388	* @param branchName
JM	389	* @return a keyname appropriate for the Git config file format
	390	*/
	391	private String getBranchKey(String branchName) {
	392	return StringUtils.getSHA1(branchName);
	393	}
	394
	395	/**
	396	* Returns the Lucene configuration for the specified repository.
699e71	397	*
d896e6	398	* @param repository
JM	399	* @return a config object
	400	*/
	401	private FileBasedConfig getConfig(Repository repository) {
	402	File file = new File(repository.getDirectory(), CONF_FILE);
	403	FileBasedConfig config = new FileBasedConfig(file, FS.detect());
	404	return config;
	405	}
	406
	407	/**
	408	* Reads the Lucene config file for the repository to check the index
	409	* version. If the index version is different, then rebuild the repository
	410	* index.
699e71	411	*
d896e6	412	* @param repository
JM	413	* @return true of the on-disk index format is different than INDEX_VERSION
	414	*/
9f6ef3	415	private boolean shouldReindex(Repository repository) {
d896e6	416	try {
JM	417	FileBasedConfig config = getConfig(repository);
	418	config.load();
	419	int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
	420	// reindex if versions do not match
	421	return indexVersion != INDEX_VERSION;
	422	} catch (Throwable t) {
	423	}
	424	return true;
	425	}
	426
	427
	428	/**
	429	* This completely indexes the repository and will destroy any existing
	430	* index.
699e71	431	*
d896e6	432	* @param repositoryName
JM	433	* @param repository
	434	* @return IndexResult
	435	*/
40ca5c	436	public IndexResult reindex(RepositoryModel model, Repository repository) {
699e71	437	IndexResult result = new IndexResult();
40ca5c	438	if (!deleteIndex(model.name)) {
d896e6	439	return result;
JM	440	}
fa0afc	441	try {
JM	442	String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6	443	FileBasedConfig config = getConfig(repository);
JM	444	Set<String> indexedCommits = new TreeSet<String>();
40ca5c	445	IndexWriter writer = getIndexWriter(model.name);
d896e6	446	// build a quick lookup of tags
JM	447	Map<String, List<String>> tags = new HashMap<String, List<String>>();
	448	for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
	449	if (!tag.isAnnotatedTag()) {
	450	// skip non-annotated tags
	451	continue;
	452	}
d0bb38	453	if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
d896e6	454	tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM	455	}
	456	tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
	457	}
699e71	458
d896e6	459	ObjectReader reader = repository.newObjectReader();
JM	460
	461	// get the local branches
	462	List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71	463
d896e6	464	// sort them by most recently updated
JM	465	Collections.sort(branches, new Comparator<RefModel>() {
	466	@Override
	467	public int compare(RefModel ref1, RefModel ref2) {
	468	return ref2.getDate().compareTo(ref1.getDate());
	469	}
	470	});
699e71	471
d896e6	472	// reorder default branch to first position
JM	473	RefModel defaultBranch = null;
	474	ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
	475	for (RefModel branch : branches) {
	476	if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0	477	defaultBranch = branch;
d896e6	478	break;
JM	479	}
	480	}
	481	branches.remove(defaultBranch);
	482	branches.add(0, defaultBranch);
699e71	483
d896e6	484	// walk through each branch
JM	485	for (RefModel branch : branches) {
40ca5c	486
1aabf0	487	boolean indexBranch = false;
JM	488	if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
	489	&& branch.equals(defaultBranch)) {
	490	// indexing "default" branch
	491	indexBranch = true;
c134a0	492	} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM	493	// skip internal meta branches
1aabf0	494	indexBranch = false;
JM	495	} else {
	496	// normal explicit branch check
	497	indexBranch = model.indexedBranches.contains(branch.getName());
	498	}
699e71	499
40ca5c	500	// if this branch is not specifically indexed then skip
1aabf0	501	if (!indexBranch) {
d896e6	502	continue;
JM	503	}
	504
	505	String branchName = branch.getName();
	506	RevWalk revWalk = new RevWalk(reader);
	507	RevCommit tip = revWalk.parseCommit(branch.getObjectId());
	508	String tipId = tip.getId().getName();
	509
	510	String keyName = getBranchKey(branchName);
	511	config.setString(CONF_ALIAS, null, keyName, branchName);
	512	config.setString(CONF_BRANCH, null, keyName, tipId);
	513
	514	// index the blob contents of the tree
	515	TreeWalk treeWalk = new TreeWalk(repository);
	516	treeWalk.addTree(tip.getTree());
699e71	517	treeWalk.setRecursive(true);
JM	518
d896e6	519	Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
JM	520	while (treeWalk.next()) {
749110	521	// ensure path is not in a submodule
a02998	522	if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
PA	523	paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
	524	}
699e71	525	}
d896e6	526
JM	527	ByteArrayOutputStream os = new ByteArrayOutputStream();
	528	byte[] tmp = new byte[32767];
	529
	530	RevWalk commitWalk = new RevWalk(reader);
	531	commitWalk.markStart(tip);
699e71	532
d896e6	533	RevCommit commit;
JM	534	while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
	535	TreeWalk diffWalk = new TreeWalk(reader);
	536	int parentCount = commit.getParentCount();
	537	switch (parentCount) {
	538	case 0:
	539	diffWalk.addTree(new EmptyTreeIterator());
	540	break;
	541	case 1:
	542	diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
	543	break;
	544	default:
	545	// skip merge commits
	546	continue;
	547	}
	548	diffWalk.addTree(getTree(commitWalk, commit));
	549	diffWalk.setFilter(ANY_DIFF);
	550	diffWalk.setRecursive(true);
	551	while ((paths.size() > 0) && diffWalk.next()) {
	552	String path = diffWalk.getPathString();
	553	if (!paths.containsKey(path)) {
	554	continue;
	555	}
eecaad	556	//TODO: Figure out filestore oid the path - bit more involved than updating the index
PM	557
d896e6	558	// remove path from set
JM	559	ObjectId blobId = paths.remove(path);
	560	result.blobCount++;
699e71	561
d896e6	562	// index the blob metadata
JM	563	String blobAuthor = getAuthor(commit);
	564	String blobCommitter = getCommitter(commit);
	565	String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
	566	Resolution.MINUTE);
699e71	567
d896e6	568	Document doc = new Document();
db9832	569	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM	570	doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
	571	doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
	572	doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
	573	doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
	574	doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
	575	doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
d896e6	576
JM	577	// determine extension to compare to the extension
	578	// blacklist
	579	String ext = null;
	580	String name = path.toLowerCase();
	581	if (name.indexOf('.') > -1) {
	582	ext = name.substring(name.lastIndexOf('.') + 1);
	583	}
	584
	585	// index the blob content
699e71	586	if (StringUtils.isEmpty(ext) \|\| !excludedExtensions.contains(ext)) {
d896e6	587	ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
699e71	588	InputStream in = ldr.openStream();
d896e6	589	int n;
JM	590	while ((n = in.read(tmp)) > 0) {
	591	os.write(tmp, 0, n);
	592	}
	593	in.close();
	594	byte[] content = os.toByteArray();
699e71	595	String str = StringUtils.decodeString(content, encodings);
db9832	596	doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
d896e6	597	os.reset();
699e71	598	}
JM	599
d896e6	600	// add the blob to the index
JM	601	writer.addDocument(doc);
	602	}
	603	}
	604
	605	os.close();
	606
	607	// index the tip commit object
	608	if (indexedCommits.add(tipId)) {
	609	Document doc = createDocument(tip, tags.get(tipId));
db9832	610	doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6	611	writer.addDocument(doc);
JM	612	result.commitCount += 1;
	613	result.branchCount += 1;
	614	}
	615
	616	// traverse the log and index the previous commit objects
	617	RevWalk historyWalk = new RevWalk(reader);
	618	historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
	619	RevCommit rev;
	620	while ((rev = historyWalk.next()) != null) {
	621	String hash = rev.getId().getName();
	622	if (indexedCommits.add(hash)) {
	623	Document doc = createDocument(rev, tags.get(hash));
db9832	624	doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6	625	writer.addDocument(doc);
JM	626	result.commitCount += 1;
	627	}
	628	}
	629	}
	630
	631	// finished
a1cee6	632	reader.close();
699e71	633
d896e6	634	// commit all changes and reset the searcher
JM	635	config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
	636	config.save();
	637	writer.commit();
8e9988	638	resetIndexSearcher(model.name);
d896e6	639	result.success();
JM	640	} catch (Exception e) {
40ca5c	641	logger.error("Exception while reindexing " + model.name, e);
d896e6	642	}
JM	643	return result;
	644	}
699e71	645
d896e6	646	/**
JM	647	* Incrementally update the index with the specified commit for the
	648	* repository.
699e71	649	*
d896e6	650	* @param repositoryName
JM	651	* @param repository
	652	* @param branch
	653	* the fully qualified branch name (e.g. refs/heads/master)
	654	* @param commit
	655	* @return true, if successful
	656	*/
699e71	657	private IndexResult index(String repositoryName, Repository repository,
d896e6	658	String branch, RevCommit commit) {
JM	659	IndexResult result = new IndexResult();
	660	try {
ae9e15	661	String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6	662	List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
JM	663	String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
	664	Resolution.MINUTE);
	665	IndexWriter writer = getIndexWriter(repositoryName);
	666	for (PathChangeModel path : changedPaths) {
88fb67	667	if (path.isSubmodule()) {
JM	668	continue;
	669	}
d896e6	670	// delete the indexed blob
856091	671	deleteBlob(repositoryName, branch, path.name);
d896e6	672
JM	673	// re-index the blob
	674	if (!ChangeType.DELETE.equals(path.changeType)) {
	675	result.blobCount++;
	676	Document doc = new Document();
db9832	677	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM	678	doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
	679	doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
	680	doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
	681	doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
	682	doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
	683	doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
d896e6	684
JM	685	// determine extension to compare to the extension
	686	// blacklist
	687	String ext = null;
	688	String name = path.name.toLowerCase();
	689	if (name.indexOf('.') > -1) {
	690	ext = name.substring(name.lastIndexOf('.') + 1);
	691	}
	692
	693	if (StringUtils.isEmpty(ext) \|\| !excludedExtensions.contains(ext)) {
eecaad	694	String str = "";
d896e6	695	// read the blob content
eecaad	696	if (path.isFilestoreItem()) {
PM	697	//Get file from filestore
	698	BodyContentHandler handler = new BodyContentHandler();
	699	Metadata metadata = new Metadata();
	700	PDFParser parser = new PDFParser();
	701
	702	ParseContext parseContext = new ParseContext();
	703	File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
	704	FileInputStream inputstream = new FileInputStream(lfsFile);
	705	parser.parse(inputstream, handler, metadata, parseContext);
	706	str = handler.toString();
	707	} else {
	708	str = JGitUtils.getStringContent(repository, commit.getTree(),
ae9e15	709	path.path, encodings);
eecaad	710	}
PM	711
749110	712	if (str != null) {
db9832	713	doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
749110	714	writer.addDocument(doc);
JM	715	}
d896e6	716	}
JM	717	}
	718	}
	719	writer.commit();
699e71	720
261024	721	// get any annotated commit tags
JM	722	List<String> commitTags = new ArrayList<String>();
33ceba	723	for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024	724	if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM	725	commitTags.add(ref.displayName);
	726	}
	727	}
699e71	728
261024	729	// create and write the Lucene document
JM	730	Document doc = createDocument(commit, commitTags);
db9832	731	doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
d896e6	732	result.commitCount++;
JM	733	result.success = index(repositoryName, doc);
	734	} catch (Exception e) {
	735	logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
	736	}
	737	return result;
	738	}
	739
	740	/**
	741	* Delete a blob from the specified branch of the repository index.
699e71	742	*
d896e6	743	* @param repositoryName
JM	744	* @param branch
	745	* @param path
	746	* @throws Exception
87ee94	747	* @return true, if deleted, false if no record was deleted
d896e6	748	*/
87ee94	749	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
JM	750	String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
	751	String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
699e71	752
87ee94	753	BooleanQuery query = new BooleanQuery();
60110f	754	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM	755	QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
87ee94	756	query.add(qp.parse(q), Occur.MUST);
JM	757
d896e6	758	IndexWriter writer = getIndexWriter(repositoryName);
87ee94	759	int numDocsBefore = writer.numDocs();
699e71	760	writer.deleteDocuments(query);
d896e6	761	writer.commit();
87ee94	762	int numDocsAfter = writer.numDocs();
JM	763	if (numDocsBefore == numDocsAfter) {
	764	logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
	765	return false;
	766	} else {
	767	logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
	768	return true;
	769	}
d896e6	770	}
JM	771
	772	/**
	773	* Updates a repository index incrementally from the last indexed commits.
699e71	774	*
40ca5c	775	* @param model
d896e6	776	* @param repository
JM	777	* @return IndexResult
	778	*/
9f6ef3	779	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6	780	IndexResult result = new IndexResult();
JM	781	try {
	782	FileBasedConfig config = getConfig(repository);
	783	config.load();
	784
	785	// build a quick lookup of annotated tags
	786	Map<String, List<String>> tags = new HashMap<String, List<String>>();
	787	for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
	788	if (!tag.isAnnotatedTag()) {
	789	// skip non-annotated tags
	790	continue;
	791	}
b1d77a	792	if (!tags.containsKey(tag.getObjectId().getName())) {
d896e6	793	tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM	794	}
	795	tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
	796	}
	797
	798	// detect branch deletion
	799	// first assume all branches are deleted and then remove each
	800	// existing branch from deletedBranches during indexing
	801	Set<String> deletedBranches = new TreeSet<String>();
	802	for (String alias : config.getNames(CONF_ALIAS)) {
	803	String branch = config.getString(CONF_ALIAS, null, alias);
	804	deletedBranches.add(branch);
	805	}
	806
1aabf0	807	// get the local branches
d896e6	808	List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71	809
1aabf0	810	// sort them by most recently updated
JM	811	Collections.sort(branches, new Comparator<RefModel>() {
	812	@Override
	813	public int compare(RefModel ref1, RefModel ref2) {
	814	return ref2.getDate().compareTo(ref1.getDate());
	815	}
	816	});
699e71	817
1aabf0	818	// reorder default branch to first position
JM	819	RefModel defaultBranch = null;
	820	ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
	821	for (RefModel branch : branches) {
	822	if (branch.getObjectId().equals(defaultBranchId)) {
	823	defaultBranch = branch;
	824	break;
	825	}
	826	}
	827	branches.remove(defaultBranch);
	828	branches.add(0, defaultBranch);
699e71	829
1aabf0	830	// walk through each branches
d896e6	831	for (RefModel branch : branches) {
JM	832	String branchName = branch.getName();
	833
1aabf0	834	boolean indexBranch = false;
JM	835	if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
	836	&& branch.equals(defaultBranch)) {
	837	// indexing "default" branch
	838	indexBranch = true;
c134a0	839	} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM	840	// ignore internal meta branches
a04808	841	indexBranch = false;
1aabf0	842	} else {
JM	843	// normal explicit branch check
	844	indexBranch = model.indexedBranches.contains(branch.getName());
	845	}
699e71	846
1aabf0	847	// if this branch is not specifically indexed then skip
JM	848	if (!indexBranch) {
40ca5c	849	continue;
JM	850	}
699e71	851
d896e6	852	// remove this branch from the deletedBranches set
JM	853	deletedBranches.remove(branchName);
699e71	854
d896e6	855	// determine last commit
JM	856	String keyName = getBranchKey(branchName);
	857	String lastCommit = config.getString(CONF_BRANCH, null, keyName);
	858
	859	List<RevCommit> revs;
	860	if (StringUtils.isEmpty(lastCommit)) {
	861	// new branch/unindexed branch, get all commits on branch
	862	revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
	863	} else {
	864	// pre-existing branch, get changes since last commit
	865	revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
	866	}
	867
	868	if (revs.size() > 0) {
	869	result.branchCount += 1;
	870	}
699e71	871
JM	872	// reverse the list of commits so we start with the first commit
d896e6	873	Collections.reverse(revs);
699e71	874	for (RevCommit commit : revs) {
a04808	875	// index a commit
JM	876	result.add(index(model.name, repository, branchName, commit));
d896e6	877	}
JM	878
	879	// update the config
	880	config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
	881	config.setString(CONF_ALIAS, null, keyName, branchName);
	882	config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
	883	config.save();
	884	}
	885
	886	// the deletedBranches set will normally be empty by this point
	887	// unless a branch really was deleted and no longer exists
	888	if (deletedBranches.size() > 0) {
	889	for (String branch : deletedBranches) {
40ca5c	890	IndexWriter writer = getIndexWriter(model.name);
d896e6	891	writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM	892	writer.commit();
	893	}
	894	}
	895	result.success = true;
	896	} catch (Throwable t) {
40ca5c	897	logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6	898	}
JM	899	return result;
	900	}
699e71	901
d896e6	902	/**
JM	903	* Creates a Lucene document for a commit
699e71	904	*
d896e6	905	* @param commit
JM	906	* @param tags
	907	* @return a Lucene document
	908	*/
	909	private Document createDocument(RevCommit commit, List<String> tags) {
	910	Document doc = new Document();
db9832	911	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
JM	912	doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
d896e6	913	doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
db9832	914	Resolution.MINUTE), StringField.TYPE_STORED));
JM	915	doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
	916	doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
	917	doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
	918	doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
d896e6	919	if (!ArrayUtils.isEmpty(tags)) {
db9832	920	doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
d896e6	921	}
JM	922	return doc;
	923	}
	924
	925	/**
	926	* Incrementally index an object for the repository.
699e71	927	*
d896e6	928	* @param repositoryName
JM	929	* @param doc
	930	* @return true, if successful
	931	*/
	932	private boolean index(String repositoryName, Document doc) {
699e71	933	try {
d896e6	934	IndexWriter writer = getIndexWriter(repositoryName);
JM	935	writer.addDocument(doc);
	936	writer.commit();
8e9988	937	resetIndexSearcher(repositoryName);
d896e6	938	return true;
JM	939	} catch (Exception e) {
	940	logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
	941	}
	942	return false;
	943	}
	944
d04009	945	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6	946	SearchResult result = new SearchResult();
d04009	947	result.hitId = hitId;
JM	948	result.totalHits = totalHits;
d896e6	949	result.score = score;
JM	950	result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
699e71	951	result.summary = doc.get(FIELD_SUMMARY);
d896e6	952	result.author = doc.get(FIELD_AUTHOR);
JM	953	result.committer = doc.get(FIELD_COMMITTER);
	954	result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
	955	result.branch = doc.get(FIELD_BRANCH);
	956	result.commitId = doc.get(FIELD_COMMIT);
	957	result.path = doc.get(FIELD_PATH);
	958	if (doc.get(FIELD_TAG) != null) {
	959	result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
	960	}
	961	return result;
	962	}
	963
	964	private synchronized void resetIndexSearcher(String repository) throws IOException {
	965	IndexSearcher searcher = searchers.remove(repository);
	966	if (searcher != null) {
8e9988	967	searcher.getIndexReader().close();
d896e6	968	}
JM	969	}
	970
	971	/**
	972	* Gets an index searcher for the repository.
699e71	973	*
d896e6	974	* @param repository
JM	975	* @return
	976	* @throws IOException
	977	*/
	978	private IndexSearcher getIndexSearcher(String repository) throws IOException {
	979	IndexSearcher searcher = searchers.get(repository);
	980	if (searcher == null) {
	981	IndexWriter writer = getIndexWriter(repository);
db9832	982	searcher = new IndexSearcher(DirectoryReader.open(writer, true));
d896e6	983	searchers.put(repository, searcher);
JM	984	}
	985	return searcher;
	986	}
	987
	988	/**
	989	* Gets an index writer for the repository. The index will be created if it
	990	* does not already exist or if forceCreate is specified.
699e71	991	*
d896e6	992	* @param repository
JM	993	* @return an IndexWriter
	994	* @throws IOException
	995	*/
	996	private IndexWriter getIndexWriter(String repository) throws IOException {
699e71	997	IndexWriter indexWriter = writers.get(repository);
6ef2fc	998	File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6	999	File indexFolder = new File(repositoryFolder, LUCENE_DIR);
60110f	1000	Directory directory = FSDirectory.open(indexFolder);
d896e6	1001
JM	1002	if (indexWriter == null) {
	1003	if (!indexFolder.exists()) {
	1004	indexFolder.mkdirs();
	1005	}
60110f	1006	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM	1007	IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
d896e6	1008	config.setOpenMode(OpenMode.CREATE_OR_APPEND);
JM	1009	indexWriter = new IndexWriter(directory, config);
	1010	writers.put(repository, indexWriter);
	1011	}
	1012	return indexWriter;
	1013	}
	1014
	1015	/**
	1016	* Searches the specified repositories for the given text or query
699e71	1017	*
d896e6	1018	* @param text
JM	1019	* if the text is null or empty, null is returned
d04009	1020	* @param page
JM	1021	* the page number to retrieve. page is 1-indexed.
	1022	* @param pageSize
	1023	* the number of elements to return for this page
d896e6	1024	* @param repositories
JM	1025	* a list of repositories to search. if no repositories are
	1026	* specified null is returned.
	1027	* @return a list of SearchResults in order from highest to the lowest score
699e71	1028	*
d896e6	1029	*/
d04009	1030	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6	1031	if (ArrayUtils.isEmpty(repositories)) {
JM	1032	return null;
	1033	}
d04009	1034	return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6	1035	}
699e71	1036
d896e6	1037	/**
JM	1038	* Searches the specified repositories for the given text or query
699e71	1039	*
d896e6	1040	* @param text
JM	1041	* if the text is null or empty, null is returned
d04009	1042	* @param page
JM	1043	* the page number to retrieve. page is 1-indexed.
	1044	* @param pageSize
	1045	* the number of elements to return for this page
d896e6	1046	* @param repositories
JM	1047	* a list of repositories to search. if no repositories are
	1048	* specified null is returned.
	1049	* @return a list of SearchResults in order from highest to the lowest score
699e71	1050	*
d04009	1051	*/
JM	1052	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6	1053	if (StringUtils.isEmpty(text)) {
JM	1054	return null;
	1055	}
	1056	if (ArrayUtils.isEmpty(repositories)) {
	1057	return null;
	1058	}
	1059	Set<SearchResult> results = new LinkedHashSet<SearchResult>();
60110f	1060	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
d896e6	1061	try {
JM	1062	// default search checks summary and content
	1063	BooleanQuery query = new BooleanQuery();
	1064	QueryParser qp;
60110f	1065	qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
d896e6	1066	qp.setAllowLeadingWildcard(true);
JM	1067	query.add(qp.parse(text), Occur.SHOULD);
	1068
60110f	1069	qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
d896e6	1070	qp.setAllowLeadingWildcard(true);
JM	1071	query.add(qp.parse(text), Occur.SHOULD);
699e71	1072
d896e6	1073	IndexSearcher searcher;
JM	1074	if (repositories.length == 1) {
	1075	// single repository search
	1076	searcher = getIndexSearcher(repositories[0]);
	1077	} else {
	1078	// multiple repository search
	1079	List<IndexReader> readers = new ArrayList<IndexReader>();
	1080	for (String repository : repositories) {
	1081	IndexSearcher repositoryIndex = getIndexSearcher(repository);
	1082	readers.add(repositoryIndex.getIndexReader());
	1083	}
	1084	IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31	1085	MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6	1086	searcher = new IndexSearcher(reader);
JM	1087	}
699e71	1088
d896e6	1089	Query rewrittenQuery = searcher.rewrite(query);
87ee94	1090	logger.debug(rewrittenQuery.toString());
JM	1091
60110f	1092	TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6	1093	searcher.search(rewrittenQuery, collector);
d04009	1094	int offset = Math.max(0, (page - 1) * pageSize);
JM	1095	ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
	1096	int totalHits = collector.getTotalHits();
d896e6	1097	for (int i = 0; i < hits.length; i++) {
JM	1098	int docId = hits[i].doc;
	1099	Document doc = searcher.doc(docId);
d04009	1100	SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31	1101	if (repositories.length == 1) {
JM	1102	// single repository search
	1103	result.repository = repositories[0];
	1104	} else {
	1105	// multi-repository search
	1106	MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
	1107	int index = reader.getSourceIndex(docId);
	1108	result.repository = repositories[index];
	1109	}
699e71	1110	String content = doc.get(FIELD_CONTENT);
d896e6	1111	result.fragment = getHighlightedFragment(analyzer, query, content, result);
JM	1112	results.add(result);
	1113	}
	1114	} catch (Exception e) {
	1115	logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
	1116	}
	1117	return new ArrayList<SearchResult>(results);
	1118	}
699e71	1119
d896e6	1120	/**
699e71	1121	*
d896e6	1122	* @param analyzer
JM	1123	* @param query
	1124	* @param content
	1125	* @param result
	1126	* @return
	1127	* @throws IOException
	1128	* @throws InvalidTokenOffsetsException
	1129	*/
	1130	private String getHighlightedFragment(Analyzer analyzer, Query query,
	1131	String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e	1132	if (content == null) {
JM	1133	content = "";
699e71	1134	}
12c31e	1135
310a80	1136	int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
12c31e	1137	int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
JM	1138
d896e6	1139	QueryScorer scorer = new QueryScorer(query, "content");
699e71	1140	Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
d896e6	1141
JM	1142	// use an artificial delimiter for the token
9f6ef3	1143	String termTag = "!!--[";
JM	1144	String termTagEnd = "]--!!";
d896e6	1145	SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
699e71	1146	Highlighter highlighter = new Highlighter(formatter, scorer);
d896e6	1147	highlighter.setTextFragmenter(fragmenter);
12c31e	1148
73fba6	1149	String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6	1150	if (ArrayUtils.isEmpty(fragments)) {
JM	1151	if (SearchObjectType.blob == result.type) {
	1152	return "";
	1153	}
12c31e	1154	// clip commit message
JM	1155	String fragment = content;
	1156	if (fragment.length() > fragmentLength) {
	1157	fragment = fragment.substring(0, fragmentLength) + "...";
	1158	}
310a80	1159	return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
d896e6	1160	}
699e71	1161
2b67ec	1162	// make sure we have unique fragments
JM	1163	Set<String> uniqueFragments = new LinkedHashSet<String>();
	1164	for (String fragment : fragments) {
	1165	uniqueFragments.add(fragment);
	1166	}
	1167	fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
699e71	1168
d896e6	1169	StringBuilder sb = new StringBuilder();
JM	1170	for (int i = 0, len = fragments.length; i < len; i++) {
	1171	String fragment = fragments[i];
12c31e	1172	String tag = "<pre class=\"text\">";
JM	1173
d896e6	1174	// resurrect the raw fragment from removing the artificial delimiters
12c31e	1175	String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM	1176
	1177	// determine position of the raw fragment in the content
2b67ec	1178	int pos = content.indexOf(raw);
699e71	1179
12c31e	1180	// restore complete first line of fragment
JM	1181	int c = pos;
	1182	while (c > 0) {
	1183	c--;
	1184	if (content.charAt(c) == '\n') {
	1185	break;
	1186	}
	1187	}
	1188	if (c > 0) {
	1189	// inject leading chunk of first fragment line
	1190	fragment = content.substring(c + 1, pos) + fragment;
	1191	}
699e71	1192
12c31e	1193	if (SearchObjectType.blob == result.type) {
JM	1194	// count lines as offset into the content for this fragment
c2833a	1195	int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
699e71	1196
12c31e	1197	// create fragment tag with line number and language
JM	1198	String lang = "";
	1199	String ext = StringUtils.getFileExtension(result.path).toLowerCase();
	1200	if (!StringUtils.isEmpty(ext)) {
	1201	// maintain leading space!
	1202	lang = " lang-" + ext;
	1203	}
	1204	tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
699e71	1205
12c31e	1206	}
699e71	1207
12c31e	1208	sb.append(tag);
JM	1209
d896e6	1210	// replace the artificial delimiter with html tags
9f6ef3	1211	String html = StringUtils.escapeForHtml(fragment, false);
JM	1212	html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6	1213	sb.append(html);
JM	1214	sb.append("</pre>");
	1215	if (i < len - 1) {
	1216	sb.append("<span class=\"ellipses\">...</span><br/>");
	1217	}
	1218	}
	1219	return sb.toString();
699e71	1220	}
JM	1221
d896e6	1222	/**
699e71	1223	* Simple class to track the results of an index update.
d896e6	1224	*/
JM	1225	private class IndexResult {
	1226	long startTime = System.currentTimeMillis();
	1227	long endTime = startTime;
	1228	boolean success;
	1229	int branchCount;
	1230	int commitCount;
	1231	int blobCount;
699e71	1232
d896e6	1233	void add(IndexResult result) {
JM	1234	this.branchCount += result.branchCount;
	1235	this.commitCount += result.commitCount;
	1236	this.blobCount += result.blobCount;
	1237	}
699e71	1238
d896e6	1239	void success() {
JM	1240	success = true;
	1241	endTime = System.currentTimeMillis();
	1242	}
699e71	1243
d896e6	1244	float duration() {
JM	1245	return (endTime - startTime)/1000f;
	1246	}
b938ae	1247	}
699e71	1248
905d31	1249	/**
JM	1250	* Custom subclass of MultiReader to identify the source index for a given
	1251	* doc id. This would not be necessary of there was a public method to
	1252	* obtain this information.
699e71	1253	*
905d31	1254	*/
JM	1255	private class MultiSourceReader extends MultiReader {
699e71	1256
60110f	1257	MultiSourceReader(IndexReader [] readers) {
db9832	1258	super(readers, false);
905d31	1259	}
699e71	1260
905d31	1261	int getSourceIndex(int docId) {
JM	1262	int index = -1;
	1263	try {
db9832	1264	index = super.readerIndex(docId);
905d31	1265	} catch (Exception e) {
JM	1266	logger.error("Error getting source index", e);
	1267	}
	1268	return index;
	1269	}
	1270	}
e31da0	1271	}