githubFork/gitblit.git - Solinfo Gitblit

James Moger

2012-03-27 6ef2fcaa22f4295c3c1185883e9e891009140cb4

commit \| author \| age
e31da0	1	/*
JM	2	* Copyright 2012 gitblit.com.
	3	*
	4	* Licensed under the Apache License, Version 2.0 (the "License");
	5	* you may not use this file except in compliance with the License.
	6	* You may obtain a copy of the License at
	7	*
	8	* http://www.apache.org/licenses/LICENSE-2.0
	9	*
	10	* Unless required by applicable law or agreed to in writing, software
	11	* distributed under the License is distributed on an "AS IS" BASIS,
	12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	* See the License for the specific language governing permissions and
	14	* limitations under the License.
	15	*/
	16	package com.gitblit;
	17
d896e6	18	import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0	19
d896e6	20	import java.io.ByteArrayOutputStream;
JM	21	import java.io.File;
	22	import java.io.IOException;
	23	import java.io.InputStream;
905d31	24	import java.lang.reflect.Method;
d896e6	25	import java.text.MessageFormat;
JM	26	import java.text.ParseException;
	27	import java.util.ArrayList;
	28	import java.util.Collections;
	29	import java.util.Comparator;
	30	import java.util.HashMap;
	31	import java.util.LinkedHashSet;
	32	import java.util.List;
	33	import java.util.Map;
	34	import java.util.Set;
	35	import java.util.TreeMap;
	36	import java.util.TreeSet;
	37	import java.util.concurrent.ConcurrentHashMap;
	38
	39	import org.apache.lucene.analysis.Analyzer;
	40	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	41	import org.apache.lucene.document.DateTools;
	42	import org.apache.lucene.document.DateTools.Resolution;
	43	import org.apache.lucene.document.Document;
	44	import org.apache.lucene.document.Field;
	45	import org.apache.lucene.document.Field.Index;
	46	import org.apache.lucene.document.Field.Store;
	47	import org.apache.lucene.index.IndexReader;
	48	import org.apache.lucene.index.IndexWriter;
	49	import org.apache.lucene.index.IndexWriterConfig;
	50	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	51	import org.apache.lucene.index.MultiReader;
	52	import org.apache.lucene.index.Term;
	53	import org.apache.lucene.queryParser.QueryParser;
	54	import org.apache.lucene.search.BooleanClause.Occur;
	55	import org.apache.lucene.search.BooleanQuery;
	56	import org.apache.lucene.search.IndexSearcher;
	57	import org.apache.lucene.search.Query;
	58	import org.apache.lucene.search.ScoreDoc;
	59	import org.apache.lucene.search.TermQuery;
	60	import org.apache.lucene.search.TopScoreDocCollector;
	61	import org.apache.lucene.search.highlight.Fragmenter;
	62	import org.apache.lucene.search.highlight.Highlighter;
	63	import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
	64	import org.apache.lucene.search.highlight.QueryScorer;
	65	import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
	66	import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
	67	import org.apache.lucene.store.Directory;
	68	import org.apache.lucene.store.FSDirectory;
	69	import org.apache.lucene.util.Version;
	70	import org.eclipse.jgit.diff.DiffEntry.ChangeType;
	71	import org.eclipse.jgit.lib.Constants;
	72	import org.eclipse.jgit.lib.ObjectId;
	73	import org.eclipse.jgit.lib.ObjectLoader;
	74	import org.eclipse.jgit.lib.ObjectReader;
e31da0	75	import org.eclipse.jgit.lib.Repository;
6ef2fc	76	import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6	77	import org.eclipse.jgit.revwalk.RevCommit;
JM	78	import org.eclipse.jgit.revwalk.RevTree;
	79	import org.eclipse.jgit.revwalk.RevWalk;
	80	import org.eclipse.jgit.storage.file.FileBasedConfig;
	81	import org.eclipse.jgit.treewalk.EmptyTreeIterator;
	82	import org.eclipse.jgit.treewalk.TreeWalk;
	83	import org.eclipse.jgit.util.FS;
e31da0	84	import org.slf4j.Logger;
JM	85	import org.slf4j.LoggerFactory;
	86
d896e6	87	import com.gitblit.Constants.SearchObjectType;
JM	88	import com.gitblit.models.IssueModel;
	89	import com.gitblit.models.IssueModel.Attachment;
	90	import com.gitblit.models.PathModel.PathChangeModel;
	91	import com.gitblit.models.RefModel;
40ca5c	92	import com.gitblit.models.RepositoryModel;
d896e6	93	import com.gitblit.models.SearchResult;
JM	94	import com.gitblit.utils.ArrayUtils;
	95	import com.gitblit.utils.IssueUtils;
e31da0	96	import com.gitblit.utils.JGitUtils;
d896e6	97	import com.gitblit.utils.StringUtils;
e31da0	98
JM	99	/**
d896e6	100	* The Lucene executor handles indexing and searching repositories.
e31da0	101	*
JM	102	* @author James Moger
	103	*
	104	*/
	105	public class LuceneExecutor implements Runnable {
d896e6	106
JM	107
905d31	108	private static final int INDEX_VERSION = 2;
e31da0	109
d896e6	110	private static final String FIELD_OBJECT_TYPE = "type";
JM	111	private static final String FIELD_ISSUE = "issue";
	112	private static final String FIELD_PATH = "path";
	113	private static final String FIELD_COMMIT = "commit";
	114	private static final String FIELD_BRANCH = "branch";
	115	private static final String FIELD_SUMMARY = "summary";
	116	private static final String FIELD_CONTENT = "content";
	117	private static final String FIELD_AUTHOR = "author";
	118	private static final String FIELD_COMMITTER = "committer";
	119	private static final String FIELD_DATE = "date";
	120	private static final String FIELD_TAG = "tag";
	121	private static final String FIELD_LABEL = "label";
	122	private static final String FIELD_ATTACHMENT = "attachment";
	123
	124	private static final String CONF_FILE = "lucene.conf";
	125	private static final String LUCENE_DIR = "lucene";
	126	private static final String CONF_INDEX = "index";
	127	private static final String CONF_VERSION = "version";
	128	private static final String CONF_ALIAS = "aliases";
	129	private static final String CONF_BRANCH = "branches";
	130
	131	private static final Version LUCENE_VERSION = Version.LUCENE_35;
	132
e31da0	133	private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
d896e6	134
JM	135	private final IStoredSettings storedSettings;
	136	private final File repositoriesFolder;
	137
	138	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
	139	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
	140
f1d2ad	141	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM	142	private Set<String> excludedExtensions;
	143
d896e6	144	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
JM	145	this.storedSettings = settings;
	146	this.repositoriesFolder = repositoriesFolder;
462488	147	String exts = luceneIgnoreExtensions;
JM	148	if (settings != null) {
	149	exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
	150	}
	151	excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0	152	}
JM	153
	154	/**
273cb9	155	* Run is executed by the Gitblit executor service. Because this is called
JM	156	* by an executor service, calls will queue - i.e. there can never be
	157	* concurrent execution of repository index updates.
e31da0	158	*/
JM	159	@Override
	160	public void run() {
7db092	161	if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM	162	// Lucene indexing is disabled
	163	return;
	164	}
f1d2ad	165	// reload the excluded extensions
JM	166	String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
	167	excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
	168
40ca5c	169	for (String repositoryName: GitBlit.self().getRepositoryList()) {
JM	170	RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
	171	if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
	172	Repository repository = GitBlit.self().getRepository(model.name);
	173	index(model, repository);
	174	repository.close();
	175	System.gc();
e31da0	176	}
JM	177	}
	178	}
	179
	180	/**
	181	* Synchronously indexes a repository. This may build a complete index of a
	182	* repository or it may update an existing index.
	183	*
3d0494	184	* @param name
e31da0	185	* the name of the repository
JM	186	* @param repository
	187	* the repository object
	188	*/
9f6ef3	189	private void index(RepositoryModel model, Repository repository) {
e31da0	190	try {
40ca5c	191	if (shouldReindex(repository)) {
JM	192	// (re)build the entire index
	193	IndexResult result = reindex(model, repository);
	194
	195	if (result.success) {
	196	if (result.commitCount > 0) {
	197	String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
	198	logger.info(MessageFormat.format(msg, model.name, result.commitCount,
	199	result.blobCount, result.branchCount, result.duration()));
e31da0	200	}
JM	201	} else {
40ca5c	202	String msg = "Could not build {0} Lucene index!";
JM	203	logger.error(MessageFormat.format(msg, model.name));
e31da0	204	}
JM	205	} else {
40ca5c	206	// update the index with latest commits
JM	207	IndexResult result = updateIndex(model, repository);
	208	if (result.success) {
	209	if (result.commitCount > 0) {
	210	String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
	211	logger.info(MessageFormat.format(msg, model.name, result.commitCount,
	212	result.blobCount, result.branchCount, result.duration()));
	213	}
	214	} else {
	215	String msg = "Could not update {0} Lucene index!";
	216	logger.error(MessageFormat.format(msg, model.name));
	217	}
e31da0	218	}
JM	219	} catch (Throwable t) {
40ca5c	220	logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0	221	}
JM	222	}
e6637c	223
JM	224	/**
	225	* Close the writer/searcher objects for a repository.
	226	*
	227	* @param repositoryName
	228	*/
8e9988	229	public synchronized void close(String repositoryName) {
JM	230	try {
	231	IndexSearcher searcher = searchers.remove(repositoryName);
	232	if (searcher != null) {
	233	searcher.getIndexReader().close();
	234	}
	235	} catch (Exception e) {
	236	logger.error("Failed to close index searcher for " + repositoryName, e);
	237	}
	238
e6637c	239	try {
JM	240	IndexWriter writer = writers.remove(repositoryName);
	241	if (writer != null) {
	242	writer.close();
	243	}
	244	} catch (Exception e) {
	245	logger.error("Failed to close index writer for " + repositoryName, e);
8e9988	246	}
e6637c	247	}
b938ae	248
JM	249	/**
	250	* Close all Lucene indexers.
	251	*
	252	*/
8e9988	253	public synchronized void close() {
d896e6	254	// close all writers
JM	255	for (String writer : writers.keySet()) {
	256	try {
	257	writers.get(writer).close(true);
	258	} catch (Throwable t) {
	259	logger.error("Failed to close Lucene writer for " + writer, t);
	260	}
	261	}
	262	writers.clear();
	263
	264	// close all searchers
	265	for (String searcher : searchers.keySet()) {
	266	try {
8e9988	267	searchers.get(searcher).getIndexReader().close();
d896e6	268	} catch (Throwable t) {
JM	269	logger.error("Failed to close Lucene searcher for " + searcher, t);
	270	}
	271	}
	272	searchers.clear();
	273	}
	274
	275
	276	/**
	277	* Deletes the Lucene index for the specified repository.
	278	*
	279	* @param repositoryName
	280	* @return true, if successful
	281	*/
	282	public boolean deleteIndex(String repositoryName) {
	283	try {
8e9988	284	// close any open writer/searcher
JM	285	close(repositoryName);
	286
d896e6	287	// delete the index folder
JM	288	File repositoryFolder = new File(repositoriesFolder, repositoryName);
	289	File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
	290	if (luceneIndex.exists()) {
	291	org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
	292	org.eclipse.jgit.util.FileUtils.RECURSIVE);
	293	}
	294	// delete the config file
	295	File luceneConfig = new File(repositoryFolder, CONF_FILE);
	296	if (luceneConfig.exists()) {
	297	luceneConfig.delete();
	298	}
	299	return true;
	300	} catch (IOException e) {
	301	throw new RuntimeException(e);
	302	}
	303	}
	304
	305
	306	/**
	307	* Returns the author for the commit, if this information is available.
	308	*
	309	* @param commit
	310	* @return an author or unknown
	311	*/
	312	private String getAuthor(RevCommit commit) {
	313	String name = "unknown";
	314	try {
	315	name = commit.getAuthorIdent().getName();
	316	if (StringUtils.isEmpty(name)) {
	317	name = commit.getAuthorIdent().getEmailAddress();
	318	}
	319	} catch (NullPointerException n) {
	320	}
	321	return name;
	322	}
	323
	324	/**
	325	* Returns the committer for the commit, if this information is available.
	326	*
	327	* @param commit
	328	* @return an committer or unknown
	329	*/
	330	private String getCommitter(RevCommit commit) {
	331	String name = "unknown";
	332	try {
	333	name = commit.getCommitterIdent().getName();
	334	if (StringUtils.isEmpty(name)) {
	335	name = commit.getCommitterIdent().getEmailAddress();
	336	}
	337	} catch (NullPointerException n) {
	338	}
	339	return name;
	340	}
905d31	341
JM	342	/**
	343	* Get the tree associated with the given commit.
	344	*
	345	* @param walk
	346	* @param commit
	347	* @return tree
	348	* @throws IOException
	349	*/
9f6ef3	350	private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31	351	throws IOException {
JM	352	final RevTree tree = commit.getTree();
	353	if (tree != null) {
	354	return tree;
	355	}
	356	walk.parseHeaders(commit);
	357	return commit.getTree();
	358	}
d896e6	359
JM	360	/**
	361	* Construct a keyname from the branch.
	362	*
	363	* @param branchName
	364	* @return a keyname appropriate for the Git config file format
	365	*/
	366	private String getBranchKey(String branchName) {
	367	return StringUtils.getSHA1(branchName);
	368	}
	369
	370	/**
	371	* Returns the Lucene configuration for the specified repository.
	372	*
	373	* @param repository
	374	* @return a config object
	375	*/
	376	private FileBasedConfig getConfig(Repository repository) {
	377	File file = new File(repository.getDirectory(), CONF_FILE);
	378	FileBasedConfig config = new FileBasedConfig(file, FS.detect());
	379	return config;
	380	}
	381
	382	/**
	383	* Reads the Lucene config file for the repository to check the index
	384	* version. If the index version is different, then rebuild the repository
	385	* index.
	386	*
	387	* @param repository
	388	* @return true of the on-disk index format is different than INDEX_VERSION
	389	*/
9f6ef3	390	private boolean shouldReindex(Repository repository) {
d896e6	391	try {
JM	392	FileBasedConfig config = getConfig(repository);
	393	config.load();
	394	int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
	395	// reindex if versions do not match
	396	return indexVersion != INDEX_VERSION;
	397	} catch (Throwable t) {
	398	}
	399	return true;
	400	}
	401
	402
	403	/**
	404	* This completely indexes the repository and will destroy any existing
	405	* index.
	406	*
	407	* @param repositoryName
	408	* @param repository
	409	* @return IndexResult
	410	*/
40ca5c	411	public IndexResult reindex(RepositoryModel model, Repository repository) {
8e9988	412	IndexResult result = new IndexResult();
40ca5c	413	if (!deleteIndex(model.name)) {
d896e6	414	return result;
JM	415	}
	416	try {
	417	FileBasedConfig config = getConfig(repository);
	418	Set<String> indexedCommits = new TreeSet<String>();
40ca5c	419	IndexWriter writer = getIndexWriter(model.name);
d896e6	420	// build a quick lookup of tags
JM	421	Map<String, List<String>> tags = new HashMap<String, List<String>>();
	422	for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
	423	if (!tag.isAnnotatedTag()) {
	424	// skip non-annotated tags
	425	continue;
	426	}
	427	if (!tags.containsKey(tag.getObjectId())) {
	428	tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
	429	}
	430	tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
	431	}
	432
	433	ObjectReader reader = repository.newObjectReader();
	434
	435	// get the local branches
	436	List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
	437
	438	// sort them by most recently updated
	439	Collections.sort(branches, new Comparator<RefModel>() {
	440	@Override
	441	public int compare(RefModel ref1, RefModel ref2) {
	442	return ref2.getDate().compareTo(ref1.getDate());
	443	}
	444	});
	445
	446	// reorder default branch to first position
	447	RefModel defaultBranch = null;
	448	ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
	449	for (RefModel branch : branches) {
	450	if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0	451	defaultBranch = branch;
d896e6	452	break;
JM	453	}
	454	}
	455	branches.remove(defaultBranch);
	456	branches.add(0, defaultBranch);
	457
	458	// walk through each branch
	459	for (RefModel branch : branches) {
40ca5c	460
1aabf0	461	boolean indexBranch = false;
JM	462	if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
	463	&& branch.equals(defaultBranch)) {
	464	// indexing "default" branch
	465	indexBranch = true;
	466	} else if (IssueUtils.GB_ISSUES.equals(branch)) {
	467	// skip the GB_ISSUES branch because it is indexed later
	468	// note: this is different than updateIndex
	469	indexBranch = false;
	470	} else {
	471	// normal explicit branch check
	472	indexBranch = model.indexedBranches.contains(branch.getName());
	473	}
	474
40ca5c	475	// if this branch is not specifically indexed then skip
1aabf0	476	if (!indexBranch) {
d896e6	477	continue;
JM	478	}
	479
	480	String branchName = branch.getName();
	481	RevWalk revWalk = new RevWalk(reader);
	482	RevCommit tip = revWalk.parseCommit(branch.getObjectId());
	483	String tipId = tip.getId().getName();
	484
	485	String keyName = getBranchKey(branchName);
	486	config.setString(CONF_ALIAS, null, keyName, branchName);
	487	config.setString(CONF_BRANCH, null, keyName, tipId);
	488
	489	// index the blob contents of the tree
	490	TreeWalk treeWalk = new TreeWalk(repository);
	491	treeWalk.addTree(tip.getTree());
	492	treeWalk.setRecursive(true);
	493
	494	Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
	495	while (treeWalk.next()) {
	496	paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
	497	}
	498
	499	ByteArrayOutputStream os = new ByteArrayOutputStream();
	500	byte[] tmp = new byte[32767];
	501
	502	RevWalk commitWalk = new RevWalk(reader);
	503	commitWalk.markStart(tip);
	504
	505	RevCommit commit;
	506	while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
	507	TreeWalk diffWalk = new TreeWalk(reader);
	508	int parentCount = commit.getParentCount();
	509	switch (parentCount) {
	510	case 0:
	511	diffWalk.addTree(new EmptyTreeIterator());
	512	break;
	513	case 1:
	514	diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
	515	break;
	516	default:
	517	// skip merge commits
	518	continue;
	519	}
	520	diffWalk.addTree(getTree(commitWalk, commit));
	521	diffWalk.setFilter(ANY_DIFF);
	522	diffWalk.setRecursive(true);
	523	while ((paths.size() > 0) && diffWalk.next()) {
	524	String path = diffWalk.getPathString();
	525	if (!paths.containsKey(path)) {
	526	continue;
	527	}
	528
	529	// remove path from set
	530	ObjectId blobId = paths.remove(path);
	531	result.blobCount++;
	532
	533	// index the blob metadata
	534	String blobAuthor = getAuthor(commit);
	535	String blobCommitter = getCommitter(commit);
	536	String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
	537	Resolution.MINUTE);
	538
	539	Document doc = new Document();
	540	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
	541	doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
	542	doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
	543	doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
	544	doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
	545	doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
	546	doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));
	547
	548	// determine extension to compare to the extension
	549	// blacklist
	550	String ext = null;
	551	String name = path.toLowerCase();
	552	if (name.indexOf('.') > -1) {
	553	ext = name.substring(name.lastIndexOf('.') + 1);
	554	}
	555
	556	// index the blob content
	557	if (StringUtils.isEmpty(ext) \|\| !excludedExtensions.contains(ext)) {
	558	ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
462488	559	InputStream in = ldr.openStream();
d896e6	560	int n;
JM	561	while ((n = in.read(tmp)) > 0) {
	562	os.write(tmp, 0, n);
	563	}
	564	in.close();
	565	byte[] content = os.toByteArray();
	566	String str = new String(content, Constants.CHARACTER_ENCODING);
	567	doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
	568	os.reset();
	569	}
	570
	571	// add the blob to the index
	572	writer.addDocument(doc);
	573	}
	574	}
	575
	576	os.close();
	577
	578	// index the tip commit object
	579	if (indexedCommits.add(tipId)) {
	580	Document doc = createDocument(tip, tags.get(tipId));
	581	doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
	582	writer.addDocument(doc);
	583	result.commitCount += 1;
	584	result.branchCount += 1;
	585	}
	586
	587	// traverse the log and index the previous commit objects
	588	RevWalk historyWalk = new RevWalk(reader);
	589	historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
	590	RevCommit rev;
	591	while ((rev = historyWalk.next()) != null) {
	592	String hash = rev.getId().getName();
	593	if (indexedCommits.add(hash)) {
	594	Document doc = createDocument(rev, tags.get(hash));
	595	doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
	596	writer.addDocument(doc);
	597	result.commitCount += 1;
	598	}
	599	}
	600	}
	601
	602	// finished
	603	reader.release();
	604
	605	// this repository has a gb-issues branch, index all issues
	606	if (IssueUtils.getIssuesBranch(repository) != null) {
	607	List<IssueModel> issues = IssueUtils.getIssues(repository, null);
	608	if (issues.size() > 0) {
	609	result.branchCount += 1;
	610	}
	611	for (IssueModel issue : issues) {
	612	result.issueCount++;
	613	Document doc = createDocument(issue);
	614	writer.addDocument(doc);
	615	}
	616	}
	617
	618	// commit all changes and reset the searcher
	619	config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
	620	config.save();
	621	writer.commit();
8e9988	622	resetIndexSearcher(model.name);
d896e6	623	result.success();
JM	624	} catch (Exception e) {
40ca5c	625	logger.error("Exception while reindexing " + model.name, e);
d896e6	626	}
JM	627	return result;
	628	}
	629
	630	/**
	631	* Incrementally update the index with the specified commit for the
	632	* repository.
	633	*
	634	* @param repositoryName
	635	* @param repository
	636	* @param branch
	637	* the fully qualified branch name (e.g. refs/heads/master)
	638	* @param commit
	639	* @return true, if successful
	640	*/
	641	private IndexResult index(String repositoryName, Repository repository,
	642	String branch, RevCommit commit) {
	643	IndexResult result = new IndexResult();
	644	try {
	645	List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
	646	String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
	647	Resolution.MINUTE);
	648	IndexWriter writer = getIndexWriter(repositoryName);
	649	for (PathChangeModel path : changedPaths) {
	650	// delete the indexed blob
856091	651	deleteBlob(repositoryName, branch, path.name);
d896e6	652
JM	653	// re-index the blob
	654	if (!ChangeType.DELETE.equals(path.changeType)) {
	655	result.blobCount++;
	656	Document doc = new Document();
	657	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
	658	Index.NOT_ANALYZED));
	659	doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
	660	doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
	661	doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
	662	doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
	663	doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
	664	doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
	665
	666	// determine extension to compare to the extension
	667	// blacklist
	668	String ext = null;
	669	String name = path.name.toLowerCase();
	670	if (name.indexOf('.') > -1) {
	671	ext = name.substring(name.lastIndexOf('.') + 1);
	672	}
	673
	674	if (StringUtils.isEmpty(ext) \|\| !excludedExtensions.contains(ext)) {
	675	// read the blob content
	676	String str = JGitUtils.getStringContent(repository, commit.getTree(),
	677	path.path);
	678	doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
	679	writer.addDocument(doc);
	680	}
	681	}
	682	}
	683	writer.commit();
261024	684
JM	685	// get any annotated commit tags
	686	List<String> commitTags = new ArrayList<String>();
33ceba	687	for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024	688	if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM	689	commitTags.add(ref.displayName);
	690	}
	691	}
	692
	693	// create and write the Lucene document
	694	Document doc = createDocument(commit, commitTags);
cdbbda	695	doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
d896e6	696	result.commitCount++;
JM	697	result.success = index(repositoryName, doc);
	698	} catch (Exception e) {
	699	logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
	700	}
	701	return result;
	702	}
	703
	704	/**
	705	* Incrementally update the index with the specified issue for the
	706	* repository.
	707	*
	708	* @param repositoryName
	709	* @param issue
	710	* @return true, if successful
	711	*/
	712	public boolean index(String repositoryName, IssueModel issue) {
	713	try {
	714	// delete the old issue from the index, if exists
	715	deleteIssue(repositoryName, issue.id);
	716	Document doc = createDocument(issue);
	717	return index(repositoryName, doc);
	718	} catch (Exception e) {
	719	logger.error(MessageFormat.format("Error while indexing issue {0} in {1}", issue.id, repositoryName), e);
	720	}
	721	return false;
	722	}
	723
	724	/**
	725	* Delete an issue from the repository index.
	726	*
	727	* @param repositoryName
	728	* @param issueId
	729	* @throws Exception
	730	*/
	731	private void deleteIssue(String repositoryName, String issueId) throws Exception {
	732	BooleanQuery query = new BooleanQuery();
	733	Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
	734	query.add(new TermQuery(objectTerm), Occur.MUST);
	735	Term issueidTerm = new Term(FIELD_ISSUE, issueId);
	736	query.add(new TermQuery(issueidTerm), Occur.MUST);
	737
	738	IndexWriter writer = getIndexWriter(repositoryName);
	739	writer.deleteDocuments(query);
	740	writer.commit();
	741	}
	742
	743	/**
	744	* Delete a blob from the specified branch of the repository index.
	745	*
	746	* @param repositoryName
	747	* @param branch
	748	* @param path
	749	* @throws Exception
	750	*/
	751	private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
	752	BooleanQuery query = new BooleanQuery();
	753	Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
	754	query.add(new TermQuery(objectTerm), Occur.MUST);
	755	Term branchTerm = new Term(FIELD_BRANCH, branch);
	756	query.add(new TermQuery(branchTerm), Occur.MUST);
	757	Term pathTerm = new Term(FIELD_PATH, path);
	758	query.add(new TermQuery(pathTerm), Occur.MUST);
	759
	760	IndexWriter writer = getIndexWriter(repositoryName);
	761	writer.deleteDocuments(query);
	762	writer.commit();
	763	}
	764
	765	/**
	766	* Updates a repository index incrementally from the last indexed commits.
	767	*
40ca5c	768	* @param model
d896e6	769	* @param repository
JM	770	* @return IndexResult
	771	*/
9f6ef3	772	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6	773	IndexResult result = new IndexResult();
JM	774	try {
	775	FileBasedConfig config = getConfig(repository);
	776	config.load();
	777
	778	// build a quick lookup of annotated tags
	779	Map<String, List<String>> tags = new HashMap<String, List<String>>();
	780	for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
	781	if (!tag.isAnnotatedTag()) {
	782	// skip non-annotated tags
	783	continue;
	784	}
	785	if (!tags.containsKey(tag.getObjectId())) {
	786	tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
	787	}
	788	tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
	789	}
	790
	791	// detect branch deletion
	792	// first assume all branches are deleted and then remove each
	793	// existing branch from deletedBranches during indexing
	794	Set<String> deletedBranches = new TreeSet<String>();
	795	for (String alias : config.getNames(CONF_ALIAS)) {
	796	String branch = config.getString(CONF_ALIAS, null, alias);
	797	deletedBranches.add(branch);
	798	}
	799
1aabf0	800	// get the local branches
d896e6	801	List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
1aabf0	802
JM	803	// sort them by most recently updated
	804	Collections.sort(branches, new Comparator<RefModel>() {
	805	@Override
	806	public int compare(RefModel ref1, RefModel ref2) {
	807	return ref2.getDate().compareTo(ref1.getDate());
	808	}
	809	});
	810
	811	// reorder default branch to first position
	812	RefModel defaultBranch = null;
	813	ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
	814	for (RefModel branch : branches) {
	815	if (branch.getObjectId().equals(defaultBranchId)) {
	816	defaultBranch = branch;
	817	break;
	818	}
	819	}
	820	branches.remove(defaultBranch);
	821	branches.add(0, defaultBranch);
	822
	823	// walk through each branches
d896e6	824	for (RefModel branch : branches) {
JM	825	String branchName = branch.getName();
	826
1aabf0	827	boolean indexBranch = false;
JM	828	if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
	829	&& branch.equals(defaultBranch)) {
	830	// indexing "default" branch
	831	indexBranch = true;
	832	} else if (IssueUtils.GB_ISSUES.equals(branch)) {
	833	// update issues modified on the GB_ISSUES branch
	834	// note: this is different than reindex
	835	indexBranch = true;
	836	} else {
	837	// normal explicit branch check
	838	indexBranch = model.indexedBranches.contains(branch.getName());
	839	}
	840
	841	// if this branch is not specifically indexed then skip
	842	if (!indexBranch) {
40ca5c	843	continue;
JM	844	}
	845
d896e6	846	// remove this branch from the deletedBranches set
JM	847	deletedBranches.remove(branchName);
1aabf0	848
d896e6	849	// determine last commit
JM	850	String keyName = getBranchKey(branchName);
	851	String lastCommit = config.getString(CONF_BRANCH, null, keyName);
	852
	853	List<RevCommit> revs;
	854	if (StringUtils.isEmpty(lastCommit)) {
	855	// new branch/unindexed branch, get all commits on branch
	856	revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
	857	} else {
	858	// pre-existing branch, get changes since last commit
	859	revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
	860	}
	861
	862	if (revs.size() > 0) {
	863	result.branchCount += 1;
	864	}
	865
40ca5c	866	// track the issue ids that we have already indexed
JM	867	Set<String> indexedIssues = new TreeSet<String>();
	868
d896e6	869	// reverse the list of commits so we start with the first commit
JM	870	Collections.reverse(revs);
40ca5c	871	for (RevCommit commit : revs) {
JM	872	if (IssueUtils.GB_ISSUES.equals(branch)) {
	873	// only index an issue once during updateIndex
	874	String issueId = commit.getShortMessage().substring(2).trim();
	875	if (indexedIssues.contains(issueId)) {
	876	continue;
	877	}
	878	indexedIssues.add(issueId);
	879
	880	IssueModel issue = IssueUtils.getIssue(repository, issueId);
	881	if (issue == null) {
	882	// issue was deleted, remove from index
	883	deleteIssue(model.name, issueId);
	884	} else {
	885	// issue was updated
	886	index(model.name, issue);
	887	result.issueCount++;
	888	}
	889	} else {
	890	// index a commit
	891	result.add(index(model.name, repository, branchName, commit));
	892	}
d896e6	893	}
JM	894
	895	// update the config
	896	config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
	897	config.setString(CONF_ALIAS, null, keyName, branchName);
	898	config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
	899	config.save();
	900	}
	901
	902	// the deletedBranches set will normally be empty by this point
	903	// unless a branch really was deleted and no longer exists
	904	if (deletedBranches.size() > 0) {
	905	for (String branch : deletedBranches) {
40ca5c	906	IndexWriter writer = getIndexWriter(model.name);
d896e6	907	writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM	908	writer.commit();
	909	}
	910	}
	911	result.success = true;
	912	} catch (Throwable t) {
40ca5c	913	logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6	914	}
JM	915	return result;
	916	}
905d31	917
d896e6	918	/**
JM	919	* Creates a Lucene document from an issue.
	920	*
	921	* @param issue
	922	* @return a Lucene document
	923	*/
	924	private Document createDocument(IssueModel issue) {
	925	Document doc = new Document();
	926	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.issue.name(), Store.YES,
	927	Field.Index.NOT_ANALYZED));
	928	doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
	929	doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
	930	doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
	931	Store.YES, Field.Index.NO));
	932	doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
	933	List<String> attachments = new ArrayList<String>();
	934	for (Attachment attachment : issue.getAttachments()) {
	935	attachments.add(attachment.name.toLowerCase());
	936	}
	937	doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
	938	Index.ANALYZED));
	939	doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
	940	doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));
	941	doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
	942	Index.ANALYZED));
	943	return doc;
	944	}
	945
	946	/**
	947	* Creates a Lucene document for a commit
	948	*
	949	* @param commit
	950	* @param tags
	951	* @return a Lucene document
	952	*/
	953	private Document createDocument(RevCommit commit, List<String> tags) {
	954	Document doc = new Document();
	955	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,
	956	Index.NOT_ANALYZED));
	957	doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
	958	doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
	959	Resolution.MINUTE), Store.YES, Index.NO));
	960	doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
	961	doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
	962	doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
	963	doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
	964	if (!ArrayUtils.isEmpty(tags)) {
	965	doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
	966	}
	967	return doc;
	968	}
	969
	970	/**
	971	* Incrementally index an object for the repository.
	972	*
	973	* @param repositoryName
	974	* @param doc
	975	* @return true, if successful
	976	*/
	977	private boolean index(String repositoryName, Document doc) {
	978	try {
	979	IndexWriter writer = getIndexWriter(repositoryName);
	980	writer.addDocument(doc);
	981	writer.commit();
8e9988	982	resetIndexSearcher(repositoryName);
d896e6	983	return true;
JM	984	} catch (Exception e) {
	985	logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
	986	}
	987	return false;
	988	}
	989
d04009	990	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6	991	SearchResult result = new SearchResult();
d04009	992	result.hitId = hitId;
JM	993	result.totalHits = totalHits;
d896e6	994	result.score = score;
JM	995	result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
	996	result.summary = doc.get(FIELD_SUMMARY);
	997	result.author = doc.get(FIELD_AUTHOR);
	998	result.committer = doc.get(FIELD_COMMITTER);
	999	result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
	1000	result.branch = doc.get(FIELD_BRANCH);
	1001	result.commitId = doc.get(FIELD_COMMIT);
	1002	result.issueId = doc.get(FIELD_ISSUE);
	1003	result.path = doc.get(FIELD_PATH);
	1004	if (doc.get(FIELD_TAG) != null) {
	1005	result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
	1006	}
	1007	if (doc.get(FIELD_LABEL) != null) {
	1008	result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
	1009	}
	1010	return result;
	1011	}
	1012
	1013	private synchronized void resetIndexSearcher(String repository) throws IOException {
	1014	IndexSearcher searcher = searchers.remove(repository);
	1015	if (searcher != null) {
8e9988	1016	searcher.getIndexReader().close();
d896e6	1017	}
JM	1018	}
	1019
	1020	/**
	1021	* Gets an index searcher for the repository.
	1022	*
	1023	* @param repository
	1024	* @return
	1025	* @throws IOException
	1026	*/
	1027	private IndexSearcher getIndexSearcher(String repository) throws IOException {
	1028	IndexSearcher searcher = searchers.get(repository);
	1029	if (searcher == null) {
	1030	IndexWriter writer = getIndexWriter(repository);
	1031	searcher = new IndexSearcher(IndexReader.open(writer, true));
	1032	searchers.put(repository, searcher);
	1033	}
	1034	return searcher;
	1035	}
	1036
	1037	/**
	1038	* Gets an index writer for the repository. The index will be created if it
	1039	* does not already exist or if forceCreate is specified.
	1040	*
	1041	* @param repository
	1042	* @return an IndexWriter
	1043	* @throws IOException
	1044	*/
	1045	private IndexWriter getIndexWriter(String repository) throws IOException {
6ef2fc	1046	IndexWriter indexWriter = writers.get(repository);
JM	1047	File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6	1048	File indexFolder = new File(repositoryFolder, LUCENE_DIR);
JM	1049	Directory directory = FSDirectory.open(indexFolder);
	1050
	1051	if (indexWriter == null) {
	1052	if (!indexFolder.exists()) {
	1053	indexFolder.mkdirs();
	1054	}
	1055	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
	1056	IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
	1057	config.setOpenMode(OpenMode.CREATE_OR_APPEND);
	1058	indexWriter = new IndexWriter(directory, config);
	1059	writers.put(repository, indexWriter);
	1060	}
	1061	return indexWriter;
	1062	}
	1063
	1064	/**
	1065	* Searches the specified repositories for the given text or query
	1066	*
	1067	* @param text
	1068	* if the text is null or empty, null is returned
d04009	1069	* @param page
JM	1070	* the page number to retrieve. page is 1-indexed.
	1071	* @param pageSize
	1072	* the number of elements to return for this page
d896e6	1073	* @param repositories
JM	1074	* a list of repositories to search. if no repositories are
	1075	* specified null is returned.
	1076	* @return a list of SearchResults in order from highest to the lowest score
	1077	*
	1078	*/
d04009	1079	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6	1080	if (ArrayUtils.isEmpty(repositories)) {
JM	1081	return null;
	1082	}
d04009	1083	return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6	1084	}
JM	1085
	1086	/**
	1087	* Searches the specified repositories for the given text or query
	1088	*
	1089	* @param text
	1090	* if the text is null or empty, null is returned
d04009	1091	* @param page
JM	1092	* the page number to retrieve. page is 1-indexed.
	1093	* @param pageSize
	1094	* the number of elements to return for this page
d896e6	1095	* @param repositories
JM	1096	* a list of repositories to search. if no repositories are
	1097	* specified null is returned.
	1098	* @return a list of SearchResults in order from highest to the lowest score
	1099	*
d04009	1100	*/
JM	1101	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6	1102	if (StringUtils.isEmpty(text)) {
JM	1103	return null;
	1104	}
	1105	if (ArrayUtils.isEmpty(repositories)) {
	1106	return null;
	1107	}
	1108	Set<SearchResult> results = new LinkedHashSet<SearchResult>();
	1109	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
	1110	try {
	1111	// default search checks summary and content
	1112	BooleanQuery query = new BooleanQuery();
	1113	QueryParser qp;
	1114	qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
	1115	qp.setAllowLeadingWildcard(true);
	1116	query.add(qp.parse(text), Occur.SHOULD);
	1117
	1118	qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
	1119	qp.setAllowLeadingWildcard(true);
	1120	query.add(qp.parse(text), Occur.SHOULD);
	1121
	1122	IndexSearcher searcher;
	1123	if (repositories.length == 1) {
	1124	// single repository search
	1125	searcher = getIndexSearcher(repositories[0]);
	1126	} else {
	1127	// multiple repository search
	1128	List<IndexReader> readers = new ArrayList<IndexReader>();
	1129	for (String repository : repositories) {
	1130	IndexSearcher repositoryIndex = getIndexSearcher(repository);
	1131	readers.add(repositoryIndex.getIndexReader());
	1132	}
	1133	IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31	1134	MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6	1135	searcher = new IndexSearcher(reader);
JM	1136	}
	1137	Query rewrittenQuery = searcher.rewrite(query);
d04009	1138	TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6	1139	searcher.search(rewrittenQuery, collector);
d04009	1140	int offset = Math.max(0, (page - 1) * pageSize);
JM	1141	ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
	1142	int totalHits = collector.getTotalHits();
d896e6	1143	for (int i = 0; i < hits.length; i++) {
JM	1144	int docId = hits[i].doc;
	1145	Document doc = searcher.doc(docId);
d04009	1146	SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31	1147	if (repositories.length == 1) {
JM	1148	// single repository search
	1149	result.repository = repositories[0];
	1150	} else {
	1151	// multi-repository search
	1152	MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
	1153	int index = reader.getSourceIndex(docId);
	1154	result.repository = repositories[index];
	1155	}
d896e6	1156	String content = doc.get(FIELD_CONTENT);
JM	1157	result.fragment = getHighlightedFragment(analyzer, query, content, result);
	1158	results.add(result);
	1159	}
	1160	} catch (Exception e) {
	1161	logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
	1162	}
	1163	return new ArrayList<SearchResult>(results);
	1164	}
	1165
	1166	/**
	1167	*
	1168	* @param analyzer
	1169	* @param query
	1170	* @param content
	1171	* @param result
	1172	* @return
	1173	* @throws IOException
	1174	* @throws InvalidTokenOffsetsException
	1175	*/
	1176	private String getHighlightedFragment(Analyzer analyzer, Query query,
	1177	String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e	1178	if (content == null) {
JM	1179	content = "";
	1180	}
	1181
	1182	int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
	1183
d896e6	1184	QueryScorer scorer = new QueryScorer(query, "content");
12c31e	1185	Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
d896e6	1186
JM	1187	// use an artificial delimiter for the token
9f6ef3	1188	String termTag = "!!--[";
JM	1189	String termTagEnd = "]--!!";
d896e6	1190	SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
JM	1191	Highlighter highlighter = new Highlighter(formatter, scorer);
	1192	highlighter.setTextFragmenter(fragmenter);
12c31e	1193
73fba6	1194	String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6	1195	if (ArrayUtils.isEmpty(fragments)) {
JM	1196	if (SearchObjectType.blob == result.type) {
	1197	return "";
	1198	}
12c31e	1199	// clip commit message
JM	1200	String fragment = content;
	1201	if (fragment.length() > fragmentLength) {
	1202	fragment = fragment.substring(0, fragmentLength) + "...";
	1203	}
	1204	return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
d896e6	1205	}
12c31e	1206
JM	1207	int contentPos = 0;
d896e6	1208	StringBuilder sb = new StringBuilder();
JM	1209	for (int i = 0, len = fragments.length; i < len; i++) {
	1210	String fragment = fragments[i];
12c31e	1211	String tag = "<pre class=\"text\">";
JM	1212
d896e6	1213	// resurrect the raw fragment from removing the artificial delimiters
12c31e	1214	String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM	1215
	1216	// determine position of the raw fragment in the content
	1217	int pos = content.indexOf(raw, contentPos);
	1218
	1219	// restore complete first line of fragment
	1220	int c = pos;
	1221	while (c > 0) {
	1222	c--;
	1223	if (content.charAt(c) == '\n') {
	1224	break;
	1225	}
	1226	}
	1227	if (c > 0) {
	1228	// inject leading chunk of first fragment line
	1229	fragment = content.substring(c + 1, pos) + fragment;
	1230	}
	1231
	1232	if (SearchObjectType.blob == result.type) {
	1233	// count lines as offset into the content for this fragment
c2833a	1234	int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
12c31e	1235
JM	1236	// create fragment tag with line number and language
	1237	String lang = "";
	1238	String ext = StringUtils.getFileExtension(result.path).toLowerCase();
	1239	if (!StringUtils.isEmpty(ext)) {
	1240	// maintain leading space!
	1241	lang = " lang-" + ext;
	1242	}
	1243	tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
	1244
	1245	// update offset into content
	1246	contentPos = pos + raw.length() + 1;
	1247	}
d896e6	1248
12c31e	1249	sb.append(tag);
JM	1250
d896e6	1251	// replace the artificial delimiter with html tags
9f6ef3	1252	String html = StringUtils.escapeForHtml(fragment, false);
JM	1253	html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6	1254	sb.append(html);
JM	1255	sb.append("</pre>");
	1256	if (i < len - 1) {
	1257	sb.append("<span class=\"ellipses\">...</span><br/>");
	1258	}
	1259	}
	1260	return sb.toString();
12c31e	1261	}
d896e6	1262
JM	1263	/**
	1264	* Simple class to track the results of an index update.
	1265	*/
	1266	private class IndexResult {
	1267	long startTime = System.currentTimeMillis();
	1268	long endTime = startTime;
	1269	boolean success;
	1270	int branchCount;
	1271	int commitCount;
	1272	int blobCount;
	1273	int issueCount;
	1274
	1275	void add(IndexResult result) {
	1276	this.branchCount += result.branchCount;
	1277	this.commitCount += result.commitCount;
	1278	this.blobCount += result.blobCount;
	1279	this.issueCount += result.issueCount;
	1280	}
	1281
	1282	void success() {
	1283	success = true;
	1284	endTime = System.currentTimeMillis();
	1285	}
	1286
	1287	float duration() {
	1288	return (endTime - startTime)/1000f;
	1289	}
b938ae	1290	}
905d31	1291
JM	1292	/**
	1293	* Custom subclass of MultiReader to identify the source index for a given
	1294	* doc id. This would not be necessary of there was a public method to
	1295	* obtain this information.
	1296	*
	1297	*/
	1298	private class MultiSourceReader extends MultiReader {
	1299
	1300	final Method method;
	1301
	1302	MultiSourceReader(IndexReader[] subReaders) {
	1303	super(subReaders);
	1304	Method m = null;
	1305	try {
	1306	m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
	1307	m.setAccessible(true);
	1308	} catch (Exception e) {
	1309	logger.error("Error getting readerIndex method", e);
	1310	}
	1311	method = m;
	1312	}
	1313
	1314	int getSourceIndex(int docId) {
	1315	int index = -1;
	1316	try {
	1317	Object o = method.invoke(this, docId);
	1318	index = (Integer) o;
	1319	} catch (Exception e) {
	1320	logger.error("Error getting source index", e);
	1321	}
	1322	return index;
	1323	}
	1324	}
e31da0	1325	}