githubFork/gitblit.git - Solinfo Gitblit

James Moger

2015-11-22 ed552ba47c02779c270ffd62841d6d1048dade70

commit \| author \| age
e31da0	1	/*
JM	2	* Copyright 2012 gitblit.com.
	3	*
	4	* Licensed under the Apache License, Version 2.0 (the "License");
	5	* you may not use this file except in compliance with the License.
	6	* You may obtain a copy of the License at
	7	*
	8	* http://www.apache.org/licenses/LICENSE-2.0
	9	*
	10	* Unless required by applicable law or agreed to in writing, software
	11	* distributed under the License is distributed on an "AS IS" BASIS,
	12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	13	* See the License for the specific language governing permissions and
	14	* limitations under the License.
	15	*/
7bf6e1	16	package com.gitblit.service;
e31da0	17
d896e6	18	import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
e31da0	19
d896e6	20	import java.io.ByteArrayOutputStream;
JM	21	import java.io.File;
	22	import java.io.IOException;
	23	import java.io.InputStream;
	24	import java.text.MessageFormat;
	25	import java.text.ParseException;
	26	import java.util.ArrayList;
	27	import java.util.Collections;
	28	import java.util.Comparator;
	29	import java.util.HashMap;
	30	import java.util.LinkedHashSet;
	31	import java.util.List;
	32	import java.util.Map;
	33	import java.util.Set;
	34	import java.util.TreeMap;
	35	import java.util.TreeSet;
	36	import java.util.concurrent.ConcurrentHashMap;
	37
	38	import org.apache.lucene.analysis.Analyzer;
	39	import org.apache.lucene.analysis.standard.StandardAnalyzer;
	40	import org.apache.lucene.document.DateTools;
	41	import org.apache.lucene.document.DateTools.Resolution;
	42	import org.apache.lucene.document.Document;
	43	import org.apache.lucene.document.Field;
db9832	44	import org.apache.lucene.document.StringField;
JM	45	import org.apache.lucene.document.TextField;
	46	import org.apache.lucene.index.DirectoryReader;
d896e6	47	import org.apache.lucene.index.IndexReader;
JM	48	import org.apache.lucene.index.IndexWriter;
	49	import org.apache.lucene.index.IndexWriterConfig;
	50	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	51	import org.apache.lucene.index.MultiReader;
	52	import org.apache.lucene.index.Term;
db9832	53	import org.apache.lucene.queryparser.classic.QueryParser;
d896e6	54	import org.apache.lucene.search.BooleanClause.Occur;
JM	55	import org.apache.lucene.search.BooleanQuery;
	56	import org.apache.lucene.search.IndexSearcher;
	57	import org.apache.lucene.search.Query;
	58	import org.apache.lucene.search.ScoreDoc;
	59	import org.apache.lucene.search.TopScoreDocCollector;
	60	import org.apache.lucene.search.highlight.Fragmenter;
	61	import org.apache.lucene.search.highlight.Highlighter;
	62	import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
	63	import org.apache.lucene.search.highlight.QueryScorer;
	64	import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
	65	import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
	66	import org.apache.lucene.store.Directory;
	67	import org.apache.lucene.store.FSDirectory;
	68	import org.apache.lucene.util.Version;
	69	import org.eclipse.jgit.diff.DiffEntry.ChangeType;
	70	import org.eclipse.jgit.lib.Constants;
a02998	71	import org.eclipse.jgit.lib.FileMode;
d896e6	72	import org.eclipse.jgit.lib.ObjectId;
JM	73	import org.eclipse.jgit.lib.ObjectLoader;
	74	import org.eclipse.jgit.lib.ObjectReader;
e31da0	75	import org.eclipse.jgit.lib.Repository;
6ef2fc	76	import org.eclipse.jgit.lib.RepositoryCache.FileKey;
d896e6	77	import org.eclipse.jgit.revwalk.RevCommit;
JM	78	import org.eclipse.jgit.revwalk.RevTree;
	79	import org.eclipse.jgit.revwalk.RevWalk;
	80	import org.eclipse.jgit.storage.file.FileBasedConfig;
	81	import org.eclipse.jgit.treewalk.EmptyTreeIterator;
	82	import org.eclipse.jgit.treewalk.TreeWalk;
	83	import org.eclipse.jgit.util.FS;
e31da0	84	import org.slf4j.Logger;
JM	85	import org.slf4j.LoggerFactory;
	86
d896e6	87	import com.gitblit.Constants.SearchObjectType;
7bf6e1	88	import com.gitblit.IStoredSettings;
JM	89	import com.gitblit.Keys;
db4f6b	90	import com.gitblit.manager.IRepositoryManager;
d896e6	91	import com.gitblit.models.PathModel.PathChangeModel;
JM	92	import com.gitblit.models.RefModel;
40ca5c	93	import com.gitblit.models.RepositoryModel;
d896e6	94	import com.gitblit.models.SearchResult;
JM	95	import com.gitblit.utils.ArrayUtils;
e31da0	96	import com.gitblit.utils.JGitUtils;
d896e6	97	import com.gitblit.utils.StringUtils;
e31da0	98
JM	99	/**
7bf6e1	100	* The Lucene service handles indexing and searching repositories.
699e71	101	*
e31da0	102	* @author James Moger
699e71	103	*
e31da0	104	*/
7bf6e1	105	public class LuceneService implements Runnable {
699e71	106
JM	107
3a4470	108	private static final int INDEX_VERSION = 6;
e31da0	109
d896e6	110	private static final String FIELD_OBJECT_TYPE = "type";
JM	111	private static final String FIELD_PATH = "path";
	112	private static final String FIELD_COMMIT = "commit";
	113	private static final String FIELD_BRANCH = "branch";
	114	private static final String FIELD_SUMMARY = "summary";
	115	private static final String FIELD_CONTENT = "content";
	116	private static final String FIELD_AUTHOR = "author";
	117	private static final String FIELD_COMMITTER = "committer";
	118	private static final String FIELD_DATE = "date";
	119	private static final String FIELD_TAG = "tag";
	120
	121	private static final String CONF_FILE = "lucene.conf";
	122	private static final String LUCENE_DIR = "lucene";
	123	private static final String CONF_INDEX = "index";
	124	private static final String CONF_VERSION = "version";
	125	private static final String CONF_ALIAS = "aliases";
	126	private static final String CONF_BRANCH = "branches";
699e71	127
3a4470	128	private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
699e71	129
7bf6e1	130	private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
699e71	131
d896e6	132	private final IStoredSettings storedSettings;
cacf8b	133	private final IRepositoryManager repositoryManager;
d896e6	134	private final File repositoriesFolder;
699e71	135
d896e6	136	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
JM	137	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
699e71	138
f1d2ad	139	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
JM	140	private Set<String> excludedExtensions;
699e71	141
7bf6e1	142	public LuceneService(
cacf8b	143	IStoredSettings settings,
JM	144	IRepositoryManager repositoryManager) {
	145
d896e6	146	this.storedSettings = settings;
cacf8b	147	this.repositoryManager = repositoryManager;
JM	148	this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
462488	149	String exts = luceneIgnoreExtensions;
JM	150	if (settings != null) {
	151	exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
	152	}
	153	excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
e31da0	154	}
JM	155
	156	/**
699e71	157	* Run is executed by the Gitblit executor service. Because this is called
273cb9	158	* by an executor service, calls will queue - i.e. there can never be
JM	159	* concurrent execution of repository index updates.
e31da0	160	*/
JM	161	@Override
	162	public void run() {
7db092	163	if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
JM	164	// Lucene indexing is disabled
	165	return;
	166	}
f1d2ad	167	// reload the excluded extensions
JM	168	String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
	169	excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
	170
db4f6b	171	if (repositoryManager.isCollectingGarbage()) {
dad8b4	172	// busy collecting garbage, try again later
JM	173	return;
	174	}
699e71	175
db4f6b	176	for (String repositoryName: repositoryManager.getRepositoryList()) {
JM	177	RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
40ca5c	178	if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
db4f6b	179	Repository repository = repositoryManager.getRepository(model.name);
e92c6d	180	if (repository == null) {
db4f6b	181	if (repositoryManager.isCollectingGarbage(model.name)) {
e92c6d	182	logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
JM	183	}
	184	continue;
	185	}
699e71	186	index(model, repository);
40ca5c	187	repository.close();
JM	188	System.gc();
e31da0	189	}
JM	190	}
	191	}
	192
	193	/**
	194	* Synchronously indexes a repository. This may build a complete index of a
	195	* repository or it may update an existing index.
699e71	196	*
3ad13e	197	* @param displayName
e31da0	198	* the name of the repository
JM	199	* @param repository
	200	* the repository object
	201	*/
9f6ef3	202	private void index(RepositoryModel model, Repository repository) {
e31da0	203	try {
40ca5c	204	if (shouldReindex(repository)) {
JM	205	// (re)build the entire index
	206	IndexResult result = reindex(model, repository);
	207
	208	if (result.success) {
	209	if (result.commitCount > 0) {
	210	String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
	211	logger.info(MessageFormat.format(msg, model.name, result.commitCount,
	212	result.blobCount, result.branchCount, result.duration()));
e31da0	213	}
JM	214	} else {
40ca5c	215	String msg = "Could not build {0} Lucene index!";
JM	216	logger.error(MessageFormat.format(msg, model.name));
e31da0	217	}
JM	218	} else {
40ca5c	219	// update the index with latest commits
JM	220	IndexResult result = updateIndex(model, repository);
	221	if (result.success) {
	222	if (result.commitCount > 0) {
	223	String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
	224	logger.info(MessageFormat.format(msg, model.name, result.commitCount,
	225	result.blobCount, result.branchCount, result.duration()));
	226	}
	227	} else {
	228	String msg = "Could not update {0} Lucene index!";
	229	logger.error(MessageFormat.format(msg, model.name));
	230	}
e31da0	231	}
JM	232	} catch (Throwable t) {
40ca5c	233	logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
e31da0	234	}
JM	235	}
699e71	236
e6637c	237	/**
JM	238	* Close the writer/searcher objects for a repository.
699e71	239	*
e6637c	240	* @param repositoryName
JM	241	*/
8e9988	242	public synchronized void close(String repositoryName) {
JM	243	try {
	244	IndexSearcher searcher = searchers.remove(repositoryName);
	245	if (searcher != null) {
	246	searcher.getIndexReader().close();
	247	}
	248	} catch (Exception e) {
	249	logger.error("Failed to close index searcher for " + repositoryName, e);
	250	}
699e71	251
e6637c	252	try {
JM	253	IndexWriter writer = writers.remove(repositoryName);
	254	if (writer != null) {
	255	writer.close();
	256	}
	257	} catch (Exception e) {
	258	logger.error("Failed to close index writer for " + repositoryName, e);
699e71	259	}
e6637c	260	}
b938ae	261
JM	262	/**
	263	* Close all Lucene indexers.
699e71	264	*
b938ae	265	*/
8e9988	266	public synchronized void close() {
d896e6	267	// close all writers
JM	268	for (String writer : writers.keySet()) {
	269	try {
60110f	270	writers.get(writer).close(true);
d896e6	271	} catch (Throwable t) {
JM	272	logger.error("Failed to close Lucene writer for " + writer, t);
	273	}
	274	}
	275	writers.clear();
	276
	277	// close all searchers
	278	for (String searcher : searchers.keySet()) {
	279	try {
8e9988	280	searchers.get(searcher).getIndexReader().close();
d896e6	281	} catch (Throwable t) {
JM	282	logger.error("Failed to close Lucene searcher for " + searcher, t);
	283	}
	284	}
	285	searchers.clear();
	286	}
	287
699e71	288
d896e6	289	/**
JM	290	* Deletes the Lucene index for the specified repository.
699e71	291	*
d896e6	292	* @param repositoryName
JM	293	* @return true, if successful
	294	*/
	295	public boolean deleteIndex(String repositoryName) {
	296	try {
8e9988	297	// close any open writer/searcher
JM	298	close(repositoryName);
	299
d896e6	300	// delete the index folder
eb741a	301	File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
d896e6	302	File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
JM	303	if (luceneIndex.exists()) {
	304	org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
	305	org.eclipse.jgit.util.FileUtils.RECURSIVE);
	306	}
	307	// delete the config file
	308	File luceneConfig = new File(repositoryFolder, CONF_FILE);
	309	if (luceneConfig.exists()) {
	310	luceneConfig.delete();
	311	}
	312	return true;
	313	} catch (IOException e) {
	314	throw new RuntimeException(e);
	315	}
	316	}
699e71	317
d896e6	318	/**
JM	319	* Returns the author for the commit, if this information is available.
699e71	320	*
d896e6	321	* @param commit
JM	322	* @return an author or unknown
	323	*/
	324	private String getAuthor(RevCommit commit) {
	325	String name = "unknown";
	326	try {
	327	name = commit.getAuthorIdent().getName();
	328	if (StringUtils.isEmpty(name)) {
	329	name = commit.getAuthorIdent().getEmailAddress();
	330	}
699e71	331	} catch (NullPointerException n) {
d896e6	332	}
JM	333	return name;
	334	}
699e71	335
d896e6	336	/**
JM	337	* Returns the committer for the commit, if this information is available.
699e71	338	*
d896e6	339	* @param commit
JM	340	* @return an committer or unknown
	341	*/
	342	private String getCommitter(RevCommit commit) {
	343	String name = "unknown";
	344	try {
	345	name = commit.getCommitterIdent().getName();
	346	if (StringUtils.isEmpty(name)) {
	347	name = commit.getCommitterIdent().getEmailAddress();
	348	}
699e71	349	} catch (NullPointerException n) {
d896e6	350	}
JM	351	return name;
	352	}
699e71	353
905d31	354	/**
JM	355	* Get the tree associated with the given commit.
	356	*
	357	* @param walk
	358	* @param commit
	359	* @return tree
	360	* @throws IOException
	361	*/
9f6ef3	362	private RevTree getTree(final RevWalk walk, final RevCommit commit)
905d31	363	throws IOException {
JM	364	final RevTree tree = commit.getTree();
	365	if (tree != null) {
	366	return tree;
	367	}
	368	walk.parseHeaders(commit);
	369	return commit.getTree();
	370	}
d896e6	371
JM	372	/**
	373	* Construct a keyname from the branch.
699e71	374	*
d896e6	375	* @param branchName
JM	376	* @return a keyname appropriate for the Git config file format
	377	*/
	378	private String getBranchKey(String branchName) {
	379	return StringUtils.getSHA1(branchName);
	380	}
	381
	382	/**
	383	* Returns the Lucene configuration for the specified repository.
699e71	384	*
d896e6	385	* @param repository
JM	386	* @return a config object
	387	*/
	388	private FileBasedConfig getConfig(Repository repository) {
	389	File file = new File(repository.getDirectory(), CONF_FILE);
	390	FileBasedConfig config = new FileBasedConfig(file, FS.detect());
	391	return config;
	392	}
	393
	394	/**
	395	* Reads the Lucene config file for the repository to check the index
	396	* version. If the index version is different, then rebuild the repository
	397	* index.
699e71	398	*
d896e6	399	* @param repository
JM	400	* @return true of the on-disk index format is different than INDEX_VERSION
	401	*/
9f6ef3	402	private boolean shouldReindex(Repository repository) {
d896e6	403	try {
JM	404	FileBasedConfig config = getConfig(repository);
	405	config.load();
	406	int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
	407	// reindex if versions do not match
	408	return indexVersion != INDEX_VERSION;
	409	} catch (Throwable t) {
	410	}
	411	return true;
	412	}
	413
	414
	415	/**
	416	* This completely indexes the repository and will destroy any existing
	417	* index.
699e71	418	*
d896e6	419	* @param repositoryName
JM	420	* @param repository
	421	* @return IndexResult
	422	*/
40ca5c	423	public IndexResult reindex(RepositoryModel model, Repository repository) {
699e71	424	IndexResult result = new IndexResult();
40ca5c	425	if (!deleteIndex(model.name)) {
d896e6	426	return result;
JM	427	}
fa0afc	428	try {
JM	429	String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6	430	FileBasedConfig config = getConfig(repository);
JM	431	Set<String> indexedCommits = new TreeSet<String>();
40ca5c	432	IndexWriter writer = getIndexWriter(model.name);
d896e6	433	// build a quick lookup of tags
JM	434	Map<String, List<String>> tags = new HashMap<String, List<String>>();
	435	for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
	436	if (!tag.isAnnotatedTag()) {
	437	// skip non-annotated tags
	438	continue;
	439	}
d0bb38	440	if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
d896e6	441	tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM	442	}
	443	tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
	444	}
699e71	445
d896e6	446	ObjectReader reader = repository.newObjectReader();
JM	447
	448	// get the local branches
	449	List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71	450
d896e6	451	// sort them by most recently updated
JM	452	Collections.sort(branches, new Comparator<RefModel>() {
	453	@Override
	454	public int compare(RefModel ref1, RefModel ref2) {
	455	return ref2.getDate().compareTo(ref1.getDate());
	456	}
	457	});
699e71	458
d896e6	459	// reorder default branch to first position
JM	460	RefModel defaultBranch = null;
	461	ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
	462	for (RefModel branch : branches) {
	463	if (branch.getObjectId().equals(defaultBranchId)) {
1aabf0	464	defaultBranch = branch;
d896e6	465	break;
JM	466	}
	467	}
	468	branches.remove(defaultBranch);
	469	branches.add(0, defaultBranch);
699e71	470
d896e6	471	// walk through each branch
JM	472	for (RefModel branch : branches) {
40ca5c	473
1aabf0	474	boolean indexBranch = false;
JM	475	if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
	476	&& branch.equals(defaultBranch)) {
	477	// indexing "default" branch
	478	indexBranch = true;
c134a0	479	} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM	480	// skip internal meta branches
1aabf0	481	indexBranch = false;
JM	482	} else {
	483	// normal explicit branch check
	484	indexBranch = model.indexedBranches.contains(branch.getName());
	485	}
699e71	486
40ca5c	487	// if this branch is not specifically indexed then skip
1aabf0	488	if (!indexBranch) {
d896e6	489	continue;
JM	490	}
	491
	492	String branchName = branch.getName();
	493	RevWalk revWalk = new RevWalk(reader);
	494	RevCommit tip = revWalk.parseCommit(branch.getObjectId());
	495	String tipId = tip.getId().getName();
	496
	497	String keyName = getBranchKey(branchName);
	498	config.setString(CONF_ALIAS, null, keyName, branchName);
	499	config.setString(CONF_BRANCH, null, keyName, tipId);
	500
	501	// index the blob contents of the tree
	502	TreeWalk treeWalk = new TreeWalk(repository);
	503	treeWalk.addTree(tip.getTree());
699e71	504	treeWalk.setRecursive(true);
JM	505
d896e6	506	Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
JM	507	while (treeWalk.next()) {
749110	508	// ensure path is not in a submodule
a02998	509	if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
PA	510	paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
	511	}
699e71	512	}
d896e6	513
JM	514	ByteArrayOutputStream os = new ByteArrayOutputStream();
	515	byte[] tmp = new byte[32767];
	516
	517	RevWalk commitWalk = new RevWalk(reader);
	518	commitWalk.markStart(tip);
699e71	519
d896e6	520	RevCommit commit;
JM	521	while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
	522	TreeWalk diffWalk = new TreeWalk(reader);
	523	int parentCount = commit.getParentCount();
	524	switch (parentCount) {
	525	case 0:
	526	diffWalk.addTree(new EmptyTreeIterator());
	527	break;
	528	case 1:
	529	diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
	530	break;
	531	default:
	532	// skip merge commits
	533	continue;
	534	}
	535	diffWalk.addTree(getTree(commitWalk, commit));
	536	diffWalk.setFilter(ANY_DIFF);
	537	diffWalk.setRecursive(true);
	538	while ((paths.size() > 0) && diffWalk.next()) {
	539	String path = diffWalk.getPathString();
	540	if (!paths.containsKey(path)) {
	541	continue;
	542	}
699e71	543
d896e6	544	// remove path from set
JM	545	ObjectId blobId = paths.remove(path);
	546	result.blobCount++;
699e71	547
d896e6	548	// index the blob metadata
JM	549	String blobAuthor = getAuthor(commit);
	550	String blobCommitter = getCommitter(commit);
	551	String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
	552	Resolution.MINUTE);
699e71	553
d896e6	554	Document doc = new Document();
db9832	555	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM	556	doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
	557	doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
	558	doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
	559	doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
	560	doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
	561	doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
d896e6	562
JM	563	// determine extension to compare to the extension
	564	// blacklist
	565	String ext = null;
	566	String name = path.toLowerCase();
	567	if (name.indexOf('.') > -1) {
	568	ext = name.substring(name.lastIndexOf('.') + 1);
	569	}
	570
	571	// index the blob content
699e71	572	if (StringUtils.isEmpty(ext) \|\| !excludedExtensions.contains(ext)) {
d896e6	573	ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
699e71	574	InputStream in = ldr.openStream();
d896e6	575	int n;
JM	576	while ((n = in.read(tmp)) > 0) {
	577	os.write(tmp, 0, n);
	578	}
	579	in.close();
	580	byte[] content = os.toByteArray();
699e71	581	String str = StringUtils.decodeString(content, encodings);
db9832	582	doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
d896e6	583	os.reset();
699e71	584	}
JM	585
d896e6	586	// add the blob to the index
JM	587	writer.addDocument(doc);
	588	}
	589	}
	590
	591	os.close();
	592
	593	// index the tip commit object
	594	if (indexedCommits.add(tipId)) {
	595	Document doc = createDocument(tip, tags.get(tipId));
db9832	596	doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6	597	writer.addDocument(doc);
JM	598	result.commitCount += 1;
	599	result.branchCount += 1;
	600	}
	601
	602	// traverse the log and index the previous commit objects
	603	RevWalk historyWalk = new RevWalk(reader);
	604	historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
	605	RevCommit rev;
	606	while ((rev = historyWalk.next()) != null) {
	607	String hash = rev.getId().getName();
	608	if (indexedCommits.add(hash)) {
	609	Document doc = createDocument(rev, tags.get(hash));
db9832	610	doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
d896e6	611	writer.addDocument(doc);
JM	612	result.commitCount += 1;
	613	}
	614	}
	615	}
	616
	617	// finished
a1cee6	618	reader.close();
699e71	619
d896e6	620	// commit all changes and reset the searcher
JM	621	config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
	622	config.save();
	623	writer.commit();
8e9988	624	resetIndexSearcher(model.name);
d896e6	625	result.success();
JM	626	} catch (Exception e) {
40ca5c	627	logger.error("Exception while reindexing " + model.name, e);
d896e6	628	}
JM	629	return result;
	630	}
699e71	631
d896e6	632	/**
JM	633	* Incrementally update the index with the specified commit for the
	634	* repository.
699e71	635	*
d896e6	636	* @param repositoryName
JM	637	* @param repository
	638	* @param branch
	639	* the fully qualified branch name (e.g. refs/heads/master)
	640	* @param commit
	641	* @return true, if successful
	642	*/
699e71	643	private IndexResult index(String repositoryName, Repository repository,
d896e6	644	String branch, RevCommit commit) {
JM	645	IndexResult result = new IndexResult();
	646	try {
ae9e15	647	String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
d896e6	648	List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
JM	649	String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
	650	Resolution.MINUTE);
	651	IndexWriter writer = getIndexWriter(repositoryName);
	652	for (PathChangeModel path : changedPaths) {
88fb67	653	if (path.isSubmodule()) {
JM	654	continue;
	655	}
d896e6	656	// delete the indexed blob
856091	657	deleteBlob(repositoryName, branch, path.name);
d896e6	658
JM	659	// re-index the blob
	660	if (!ChangeType.DELETE.equals(path.changeType)) {
	661	result.blobCount++;
	662	Document doc = new Document();
db9832	663	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
JM	664	doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
	665	doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
	666	doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
	667	doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
	668	doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
	669	doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
d896e6	670
JM	671	// determine extension to compare to the extension
	672	// blacklist
	673	String ext = null;
	674	String name = path.name.toLowerCase();
	675	if (name.indexOf('.') > -1) {
	676	ext = name.substring(name.lastIndexOf('.') + 1);
	677	}
	678
	679	if (StringUtils.isEmpty(ext) \|\| !excludedExtensions.contains(ext)) {
	680	// read the blob content
	681	String str = JGitUtils.getStringContent(repository, commit.getTree(),
ae9e15	682	path.path, encodings);
749110	683	if (str != null) {
db9832	684	doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
749110	685	writer.addDocument(doc);
JM	686	}
d896e6	687	}
JM	688	}
	689	}
	690	writer.commit();
699e71	691
261024	692	// get any annotated commit tags
JM	693	List<String> commitTags = new ArrayList<String>();
33ceba	694	for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
261024	695	if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
JM	696	commitTags.add(ref.displayName);
	697	}
	698	}
699e71	699
261024	700	// create and write the Lucene document
JM	701	Document doc = createDocument(commit, commitTags);
db9832	702	doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
d896e6	703	result.commitCount++;
JM	704	result.success = index(repositoryName, doc);
	705	} catch (Exception e) {
	706	logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
	707	}
	708	return result;
	709	}
	710
	711	/**
	712	* Delete a blob from the specified branch of the repository index.
699e71	713	*
d896e6	714	* @param repositoryName
JM	715	* @param branch
	716	* @param path
	717	* @throws Exception
87ee94	718	* @return true, if deleted, false if no record was deleted
d896e6	719	*/
87ee94	720	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
JM	721	String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
	722	String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
699e71	723
87ee94	724	BooleanQuery query = new BooleanQuery();
60110f	725	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM	726	QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
87ee94	727	query.add(qp.parse(q), Occur.MUST);
JM	728
d896e6	729	IndexWriter writer = getIndexWriter(repositoryName);
87ee94	730	int numDocsBefore = writer.numDocs();
699e71	731	writer.deleteDocuments(query);
d896e6	732	writer.commit();
87ee94	733	int numDocsAfter = writer.numDocs();
JM	734	if (numDocsBefore == numDocsAfter) {
	735	logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
	736	return false;
	737	} else {
	738	logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
	739	return true;
	740	}
d896e6	741	}
JM	742
	743	/**
	744	* Updates a repository index incrementally from the last indexed commits.
699e71	745	*
40ca5c	746	* @param model
d896e6	747	* @param repository
JM	748	* @return IndexResult
	749	*/
9f6ef3	750	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
d896e6	751	IndexResult result = new IndexResult();
JM	752	try {
	753	FileBasedConfig config = getConfig(repository);
	754	config.load();
	755
	756	// build a quick lookup of annotated tags
	757	Map<String, List<String>> tags = new HashMap<String, List<String>>();
	758	for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
	759	if (!tag.isAnnotatedTag()) {
	760	// skip non-annotated tags
	761	continue;
	762	}
b1d77a	763	if (!tags.containsKey(tag.getObjectId().getName())) {
d896e6	764	tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
JM	765	}
	766	tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
	767	}
	768
	769	// detect branch deletion
	770	// first assume all branches are deleted and then remove each
	771	// existing branch from deletedBranches during indexing
	772	Set<String> deletedBranches = new TreeSet<String>();
	773	for (String alias : config.getNames(CONF_ALIAS)) {
	774	String branch = config.getString(CONF_ALIAS, null, alias);
	775	deletedBranches.add(branch);
	776	}
	777
1aabf0	778	// get the local branches
d896e6	779	List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
699e71	780
1aabf0	781	// sort them by most recently updated
JM	782	Collections.sort(branches, new Comparator<RefModel>() {
	783	@Override
	784	public int compare(RefModel ref1, RefModel ref2) {
	785	return ref2.getDate().compareTo(ref1.getDate());
	786	}
	787	});
699e71	788
1aabf0	789	// reorder default branch to first position
JM	790	RefModel defaultBranch = null;
	791	ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
	792	for (RefModel branch : branches) {
	793	if (branch.getObjectId().equals(defaultBranchId)) {
	794	defaultBranch = branch;
	795	break;
	796	}
	797	}
	798	branches.remove(defaultBranch);
	799	branches.add(0, defaultBranch);
699e71	800
1aabf0	801	// walk through each branches
d896e6	802	for (RefModel branch : branches) {
JM	803	String branchName = branch.getName();
	804
1aabf0	805	boolean indexBranch = false;
JM	806	if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
	807	&& branch.equals(defaultBranch)) {
	808	// indexing "default" branch
	809	indexBranch = true;
c134a0	810	} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
JM	811	// ignore internal meta branches
a04808	812	indexBranch = false;
1aabf0	813	} else {
JM	814	// normal explicit branch check
	815	indexBranch = model.indexedBranches.contains(branch.getName());
	816	}
699e71	817
1aabf0	818	// if this branch is not specifically indexed then skip
JM	819	if (!indexBranch) {
40ca5c	820	continue;
JM	821	}
699e71	822
d896e6	823	// remove this branch from the deletedBranches set
JM	824	deletedBranches.remove(branchName);
699e71	825
d896e6	826	// determine last commit
JM	827	String keyName = getBranchKey(branchName);
	828	String lastCommit = config.getString(CONF_BRANCH, null, keyName);
	829
	830	List<RevCommit> revs;
	831	if (StringUtils.isEmpty(lastCommit)) {
	832	// new branch/unindexed branch, get all commits on branch
	833	revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
	834	} else {
	835	// pre-existing branch, get changes since last commit
	836	revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
	837	}
	838
	839	if (revs.size() > 0) {
	840	result.branchCount += 1;
	841	}
699e71	842
JM	843	// reverse the list of commits so we start with the first commit
d896e6	844	Collections.reverse(revs);
699e71	845	for (RevCommit commit : revs) {
a04808	846	// index a commit
JM	847	result.add(index(model.name, repository, branchName, commit));
d896e6	848	}
JM	849
	850	// update the config
	851	config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
	852	config.setString(CONF_ALIAS, null, keyName, branchName);
	853	config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
	854	config.save();
	855	}
	856
	857	// the deletedBranches set will normally be empty by this point
	858	// unless a branch really was deleted and no longer exists
	859	if (deletedBranches.size() > 0) {
	860	for (String branch : deletedBranches) {
40ca5c	861	IndexWriter writer = getIndexWriter(model.name);
d896e6	862	writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
JM	863	writer.commit();
	864	}
	865	}
	866	result.success = true;
	867	} catch (Throwable t) {
40ca5c	868	logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
d896e6	869	}
JM	870	return result;
	871	}
699e71	872
d896e6	873	/**
JM	874	* Creates a Lucene document for a commit
699e71	875	*
d896e6	876	* @param commit
JM	877	* @param tags
	878	* @return a Lucene document
	879	*/
	880	private Document createDocument(RevCommit commit, List<String> tags) {
	881	Document doc = new Document();
db9832	882	doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
JM	883	doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
d896e6	884	doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
db9832	885	Resolution.MINUTE), StringField.TYPE_STORED));
JM	886	doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
	887	doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
	888	doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
	889	doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
d896e6	890	if (!ArrayUtils.isEmpty(tags)) {
db9832	891	doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
d896e6	892	}
JM	893	return doc;
	894	}
	895
	896	/**
	897	* Incrementally index an object for the repository.
699e71	898	*
d896e6	899	* @param repositoryName
JM	900	* @param doc
	901	* @return true, if successful
	902	*/
	903	private boolean index(String repositoryName, Document doc) {
699e71	904	try {
d896e6	905	IndexWriter writer = getIndexWriter(repositoryName);
JM	906	writer.addDocument(doc);
	907	writer.commit();
8e9988	908	resetIndexSearcher(repositoryName);
d896e6	909	return true;
JM	910	} catch (Exception e) {
	911	logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
	912	}
	913	return false;
	914	}
	915
d04009	916	private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
d896e6	917	SearchResult result = new SearchResult();
d04009	918	result.hitId = hitId;
JM	919	result.totalHits = totalHits;
d896e6	920	result.score = score;
JM	921	result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
699e71	922	result.summary = doc.get(FIELD_SUMMARY);
d896e6	923	result.author = doc.get(FIELD_AUTHOR);
JM	924	result.committer = doc.get(FIELD_COMMITTER);
	925	result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
	926	result.branch = doc.get(FIELD_BRANCH);
	927	result.commitId = doc.get(FIELD_COMMIT);
	928	result.path = doc.get(FIELD_PATH);
	929	if (doc.get(FIELD_TAG) != null) {
	930	result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
	931	}
	932	return result;
	933	}
	934
	935	private synchronized void resetIndexSearcher(String repository) throws IOException {
	936	IndexSearcher searcher = searchers.remove(repository);
	937	if (searcher != null) {
8e9988	938	searcher.getIndexReader().close();
d896e6	939	}
JM	940	}
	941
	942	/**
	943	* Gets an index searcher for the repository.
699e71	944	*
d896e6	945	* @param repository
JM	946	* @return
	947	* @throws IOException
	948	*/
	949	private IndexSearcher getIndexSearcher(String repository) throws IOException {
	950	IndexSearcher searcher = searchers.get(repository);
	951	if (searcher == null) {
	952	IndexWriter writer = getIndexWriter(repository);
db9832	953	searcher = new IndexSearcher(DirectoryReader.open(writer, true));
d896e6	954	searchers.put(repository, searcher);
JM	955	}
	956	return searcher;
	957	}
	958
	959	/**
	960	* Gets an index writer for the repository. The index will be created if it
	961	* does not already exist or if forceCreate is specified.
699e71	962	*
d896e6	963	* @param repository
JM	964	* @return an IndexWriter
	965	* @throws IOException
	966	*/
	967	private IndexWriter getIndexWriter(String repository) throws IOException {
699e71	968	IndexWriter indexWriter = writers.get(repository);
6ef2fc	969	File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
d896e6	970	File indexFolder = new File(repositoryFolder, LUCENE_DIR);
60110f	971	Directory directory = FSDirectory.open(indexFolder);
d896e6	972
JM	973	if (indexWriter == null) {
	974	if (!indexFolder.exists()) {
	975	indexFolder.mkdirs();
	976	}
60110f	977	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
JM	978	IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
d896e6	979	config.setOpenMode(OpenMode.CREATE_OR_APPEND);
JM	980	indexWriter = new IndexWriter(directory, config);
	981	writers.put(repository, indexWriter);
	982	}
	983	return indexWriter;
	984	}
	985
	986	/**
	987	* Searches the specified repositories for the given text or query
699e71	988	*
d896e6	989	* @param text
JM	990	* if the text is null or empty, null is returned
d04009	991	* @param page
JM	992	* the page number to retrieve. page is 1-indexed.
	993	* @param pageSize
	994	* the number of elements to return for this page
d896e6	995	* @param repositories
JM	996	* a list of repositories to search. if no repositories are
	997	* specified null is returned.
	998	* @return a list of SearchResults in order from highest to the lowest score
699e71	999	*
d896e6	1000	*/
d04009	1001	public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
d896e6	1002	if (ArrayUtils.isEmpty(repositories)) {
JM	1003	return null;
	1004	}
d04009	1005	return search(text, page, pageSize, repositories.toArray(new String[0]));
d896e6	1006	}
699e71	1007
d896e6	1008	/**
JM	1009	* Searches the specified repositories for the given text or query
699e71	1010	*
d896e6	1011	* @param text
JM	1012	* if the text is null or empty, null is returned
d04009	1013	* @param page
JM	1014	* the page number to retrieve. page is 1-indexed.
	1015	* @param pageSize
	1016	* the number of elements to return for this page
d896e6	1017	* @param repositories
JM	1018	* a list of repositories to search. if no repositories are
	1019	* specified null is returned.
	1020	* @return a list of SearchResults in order from highest to the lowest score
699e71	1021	*
d04009	1022	*/
JM	1023	public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
d896e6	1024	if (StringUtils.isEmpty(text)) {
JM	1025	return null;
	1026	}
	1027	if (ArrayUtils.isEmpty(repositories)) {
	1028	return null;
	1029	}
	1030	Set<SearchResult> results = new LinkedHashSet<SearchResult>();
60110f	1031	StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
d896e6	1032	try {
JM	1033	// default search checks summary and content
	1034	BooleanQuery query = new BooleanQuery();
	1035	QueryParser qp;
60110f	1036	qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
d896e6	1037	qp.setAllowLeadingWildcard(true);
JM	1038	query.add(qp.parse(text), Occur.SHOULD);
	1039
60110f	1040	qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
d896e6	1041	qp.setAllowLeadingWildcard(true);
JM	1042	query.add(qp.parse(text), Occur.SHOULD);
699e71	1043
d896e6	1044	IndexSearcher searcher;
JM	1045	if (repositories.length == 1) {
	1046	// single repository search
	1047	searcher = getIndexSearcher(repositories[0]);
	1048	} else {
	1049	// multiple repository search
	1050	List<IndexReader> readers = new ArrayList<IndexReader>();
	1051	for (String repository : repositories) {
	1052	IndexSearcher repositoryIndex = getIndexSearcher(repository);
	1053	readers.add(repositoryIndex.getIndexReader());
	1054	}
	1055	IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
905d31	1056	MultiSourceReader reader = new MultiSourceReader(rdrs);
d896e6	1057	searcher = new IndexSearcher(reader);
JM	1058	}
699e71	1059
d896e6	1060	Query rewrittenQuery = searcher.rewrite(query);
87ee94	1061	logger.debug(rewrittenQuery.toString());
JM	1062
60110f	1063	TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
d896e6	1064	searcher.search(rewrittenQuery, collector);
d04009	1065	int offset = Math.max(0, (page - 1) * pageSize);
JM	1066	ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
	1067	int totalHits = collector.getTotalHits();
d896e6	1068	for (int i = 0; i < hits.length; i++) {
JM	1069	int docId = hits[i].doc;
	1070	Document doc = searcher.doc(docId);
d04009	1071	SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
905d31	1072	if (repositories.length == 1) {
JM	1073	// single repository search
	1074	result.repository = repositories[0];
	1075	} else {
	1076	// multi-repository search
	1077	MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
	1078	int index = reader.getSourceIndex(docId);
	1079	result.repository = repositories[index];
	1080	}
699e71	1081	String content = doc.get(FIELD_CONTENT);
d896e6	1082	result.fragment = getHighlightedFragment(analyzer, query, content, result);
JM	1083	results.add(result);
	1084	}
	1085	} catch (Exception e) {
	1086	logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
	1087	}
	1088	return new ArrayList<SearchResult>(results);
	1089	}
699e71	1090
d896e6	1091	/**
699e71	1092	*
d896e6	1093	* @param analyzer
JM	1094	* @param query
	1095	* @param content
	1096	* @param result
	1097	* @return
	1098	* @throws IOException
	1099	* @throws InvalidTokenOffsetsException
	1100	*/
	1101	private String getHighlightedFragment(Analyzer analyzer, Query query,
	1102	String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
12c31e	1103	if (content == null) {
JM	1104	content = "";
699e71	1105	}
12c31e	1106
310a80	1107	int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
12c31e	1108	int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
JM	1109
d896e6	1110	QueryScorer scorer = new QueryScorer(query, "content");
699e71	1111	Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
d896e6	1112
JM	1113	// use an artificial delimiter for the token
9f6ef3	1114	String termTag = "!!--[";
JM	1115	String termTagEnd = "]--!!";
d896e6	1116	SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
699e71	1117	Highlighter highlighter = new Highlighter(formatter, scorer);
d896e6	1118	highlighter.setTextFragmenter(fragmenter);
12c31e	1119
73fba6	1120	String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
d896e6	1121	if (ArrayUtils.isEmpty(fragments)) {
JM	1122	if (SearchObjectType.blob == result.type) {
	1123	return "";
	1124	}
12c31e	1125	// clip commit message
JM	1126	String fragment = content;
	1127	if (fragment.length() > fragmentLength) {
	1128	fragment = fragment.substring(0, fragmentLength) + "...";
	1129	}
310a80	1130	return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
d896e6	1131	}
699e71	1132
2b67ec	1133	// make sure we have unique fragments
JM	1134	Set<String> uniqueFragments = new LinkedHashSet<String>();
	1135	for (String fragment : fragments) {
	1136	uniqueFragments.add(fragment);
	1137	}
	1138	fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
699e71	1139
d896e6	1140	StringBuilder sb = new StringBuilder();
JM	1141	for (int i = 0, len = fragments.length; i < len; i++) {
	1142	String fragment = fragments[i];
12c31e	1143	String tag = "<pre class=\"text\">";
JM	1144
d896e6	1145	// resurrect the raw fragment from removing the artificial delimiters
12c31e	1146	String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
JM	1147
	1148	// determine position of the raw fragment in the content
2b67ec	1149	int pos = content.indexOf(raw);
699e71	1150
12c31e	1151	// restore complete first line of fragment
JM	1152	int c = pos;
	1153	while (c > 0) {
	1154	c--;
	1155	if (content.charAt(c) == '\n') {
	1156	break;
	1157	}
	1158	}
	1159	if (c > 0) {
	1160	// inject leading chunk of first fragment line
	1161	fragment = content.substring(c + 1, pos) + fragment;
	1162	}
699e71	1163
12c31e	1164	if (SearchObjectType.blob == result.type) {
JM	1165	// count lines as offset into the content for this fragment
c2833a	1166	int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
699e71	1167
12c31e	1168	// create fragment tag with line number and language
JM	1169	String lang = "";
	1170	String ext = StringUtils.getFileExtension(result.path).toLowerCase();
	1171	if (!StringUtils.isEmpty(ext)) {
	1172	// maintain leading space!
	1173	lang = " lang-" + ext;
	1174	}
	1175	tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
699e71	1176
12c31e	1177	}
699e71	1178
12c31e	1179	sb.append(tag);
JM	1180
d896e6	1181	// replace the artificial delimiter with html tags
9f6ef3	1182	String html = StringUtils.escapeForHtml(fragment, false);
JM	1183	html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
d896e6	1184	sb.append(html);
JM	1185	sb.append("</pre>");
	1186	if (i < len - 1) {
	1187	sb.append("<span class=\"ellipses\">...</span><br/>");
	1188	}
	1189	}
	1190	return sb.toString();
699e71	1191	}
JM	1192
d896e6	1193	/**
699e71	1194	* Simple class to track the results of an index update.
d896e6	1195	*/
JM	1196	private class IndexResult {
	1197	long startTime = System.currentTimeMillis();
	1198	long endTime = startTime;
	1199	boolean success;
	1200	int branchCount;
	1201	int commitCount;
	1202	int blobCount;
699e71	1203
d896e6	1204	void add(IndexResult result) {
JM	1205	this.branchCount += result.branchCount;
	1206	this.commitCount += result.commitCount;
	1207	this.blobCount += result.blobCount;
	1208	}
699e71	1209
d896e6	1210	void success() {
JM	1211	success = true;
	1212	endTime = System.currentTimeMillis();
	1213	}
699e71	1214
d896e6	1215	float duration() {
JM	1216	return (endTime - startTime)/1000f;
	1217	}
b938ae	1218	}
699e71	1219
905d31	1220	/**
JM	1221	* Custom subclass of MultiReader to identify the source index for a given
	1222	* doc id. This would not be necessary of there was a public method to
	1223	* obtain this information.
699e71	1224	*
905d31	1225	*/
JM	1226	private class MultiSourceReader extends MultiReader {
699e71	1227
60110f	1228	MultiSourceReader(IndexReader [] readers) {
db9832	1229	super(readers, false);
905d31	1230	}
699e71	1231
905d31	1232	int getSourceIndex(int docId) {
JM	1233	int index = -1;
	1234	try {
db9832	1235	index = super.readerIndex(docId);
905d31	1236	} catch (Exception e) {
JM	1237	logger.error("Error getting source index", e);
	1238	}
	1239	return index;
	1240	}
	1241	}
e31da0	1242	}