From 27ae9095639bb228a1b7ff86a3ebe4264abf05be Mon Sep 17 00:00:00 2001
From: mschaefers <mschaefers@scoop-gmbh.de>
Date: Thu, 29 Nov 2012 12:33:09 -0500
Subject: [PATCH] feature: when using LdapUserService one can configure Gitblit to fetch all users from ldap that can possibly login. This allows to see newly generated LDAP users instantly in Gitblit. By now an LDAP user had to log in once to appear in GitBlit.

---
 src/com/gitblit/LuceneExecutor.java |  308 +++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 222 insertions(+), 86 deletions(-)

diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index 29a8569..0e4baae 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -25,7 +25,6 @@
 import java.text.MessageFormat;
 import java.text.ParseException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -70,10 +69,12 @@
 import org.apache.lucene.util.Version;
 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
 import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.ObjectReader;
 import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.lib.RepositoryCache.FileKey;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevTree;
 import org.eclipse.jgit.revwalk.RevWalk;
@@ -105,7 +106,7 @@
 public class LuceneExecutor implements Runnable {
 	
 		
-	private static final int INDEX_VERSION = 2;
+	private static final int INDEX_VERSION = 5;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_ISSUE = "issue";
@@ -138,13 +139,17 @@
 	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
 	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
 	
-	private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
-			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
-			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
-
+	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
+	private Set<String> excludedExtensions;
+	
 	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
 		this.storedSettings = settings;
 		this.repositoriesFolder = repositoriesFolder;
+		String exts = luceneIgnoreExtensions;
+		if (settings != null) {
+			exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
+		}
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 	}
 
 	/**
@@ -154,10 +159,29 @@
 	 */
 	@Override
 	public void run() {
+		if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
+			// Lucene indexing is disabled
+			return;
+		}
+		// reload the excluded extensions
+		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
+
+		if (GitBlit.self().isCollectingGarbage()) {
+			// busy collecting garbage, try again later
+			return;
+		}
+		
 		for (String repositoryName: GitBlit.self().getRepositoryList()) {
 			RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
 			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
 				Repository repository = GitBlit.self().getRepository(model.name);
+				if (repository == null) {
+					if (GitBlit.self().isCollectingGarbage(model.name)) {
+						logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
+					}
+					continue;
+				}
 				index(model, repository);				
 				repository.close();
 				System.gc();
@@ -174,7 +198,7 @@
 	 * @param repository
 	 *            the repository object
 	 */
-	protected void index(RepositoryModel model, Repository repository) {
+	private void index(RepositoryModel model, Repository repository) {
 		try {
 			if (shouldReindex(repository)) {
 				// (re)build the entire index
@@ -273,7 +297,7 @@
 			close(repositoryName);
 
 			// delete the index folder
-			File repositoryFolder = new File(repositoriesFolder, repositoryName);
+			File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
 			File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
 			if (luceneIndex.exists()) {
 				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
@@ -289,7 +313,6 @@
 			throw new RuntimeException(e);
 		}
 	}
-
 	
 	/**
 	 * Returns the author for the commit, if this information is available.
@@ -335,7 +358,7 @@
 	 * @return tree
 	 * @throws IOException
 	 */
-	protected RevTree getTree(final RevWalk walk, final RevCommit commit)
+	private RevTree getTree(final RevWalk walk, final RevCommit commit)
 			throws IOException {
 		final RevTree tree = commit.getTree();
 		if (tree != null) {
@@ -375,7 +398,7 @@
 	 * @param repository
 	 * @return true of the on-disk index format is different than INDEX_VERSION
 	 */
-	protected boolean shouldReindex(Repository repository) {
+	private boolean shouldReindex(Repository repository) {
 		try {
 			FileBasedConfig config = getConfig(repository);
 			config.load();
@@ -401,7 +424,8 @@
 		if (!deleteIndex(model.name)) {
 			return result;
 		}
-		try {			
+		try {
+			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
 			IndexWriter writer = getIndexWriter(model.name);
@@ -436,7 +460,7 @@
 			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
 			for (RefModel branch :  branches) {
 				if (branch.getObjectId().equals(defaultBranchId)) {
-					defaultBranch = branch;					
+					defaultBranch = branch;
 					break;
 				}
 			}
@@ -446,8 +470,22 @@
 			// walk through each branch
 			for (RefModel branch : branches) {
 
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// skip the GB_ISSUES branch because it is indexed later
+					// note: this is different than updateIndex
+					indexBranch = false;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
 				// if this branch is not specifically indexed then skip
-				if (!model.indexedBranches.contains(branch.getName())) {
+				if (!indexBranch) {
 					continue;
 				}
 
@@ -467,7 +505,10 @@
 				
 				Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
 				while (treeWalk.next()) {
-					paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+					// ensure path is not in a submodule
+					if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
+						paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+					}
 				}				
 
 				ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -530,14 +571,14 @@
 						// index the blob content
 						if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {							
 							ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
-							InputStream in = ldr.openStream();							
+							InputStream in = ldr.openStream();						
 							int n;
 							while ((n = in.read(tmp)) > 0) {
 								os.write(tmp, 0, n);
 							}
 							in.close();
 							byte[] content = os.toByteArray();
-							String str = new String(content, Constants.CHARACTER_ENCODING);
+							String str = StringUtils.decodeString(content, encodings);							
 							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
 							os.reset();
 						}							
@@ -616,13 +657,17 @@
 			String branch, RevCommit commit) {
 		IndexResult result = new IndexResult();
 		try {
+			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
 			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 					Resolution.MINUTE);
 			IndexWriter writer = getIndexWriter(repositoryName);
 			for (PathChangeModel path : changedPaths) {
+				if (path.isSubmodule()) {
+					continue;
+				}
 				// delete the indexed blob
-				deleteBlob(repositoryName, branch, path.path);
+				deleteBlob(repositoryName, branch, path.name);
 
 				// re-index the blob
 				if (!ChangeType.DELETE.equals(path.changeType)) {
@@ -648,15 +693,26 @@
 					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
 						// read the blob content
 						String str = JGitUtils.getStringContent(repository, commit.getTree(),
-								path.path);
-						doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
-						writer.addDocument(doc);
+								path.path, encodings);
+						if (str != null) {
+							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
+							writer.addDocument(doc);
+						}
 					}
 				}
 			}
 			writer.commit();
-
-			Document doc = createDocument(commit, null);
+			
+			// get any annotated commit tags
+			List<String> commitTags = new ArrayList<String>();
+			for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
+				if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
+					commitTags.add(ref.displayName);
+				}
+			}
+			
+			// create and write the Lucene document
+			Document doc = createDocument(commit, commitTags);
 			doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
 			result.commitCount++;
 			result.success = index(repositoryName, doc);
@@ -692,8 +748,9 @@
 	 * @param repositoryName
 	 * @param issueId
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteIssue(String repositoryName, String issueId) throws Exception {
+	private boolean deleteIssue(String repositoryName, String issueId) throws Exception {
 		BooleanQuery query = new BooleanQuery();
 		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
 		query.add(new TermQuery(objectTerm), Occur.MUST);
@@ -701,8 +758,17 @@
 		query.add(new TermQuery(issueidTerm), Occur.MUST);
 		
 		IndexWriter writer = getIndexWriter(repositoryName);
+		int numDocsBefore = writer.numDocs();
 		writer.deleteDocuments(query);
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 	
 	/**
@@ -712,19 +778,29 @@
 	 * @param branch
 	 * @param path
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
-		BooleanQuery query = new BooleanQuery();
-		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
-		query.add(new TermQuery(objectTerm), Occur.MUST);
-		Term branchTerm = new Term(FIELD_BRANCH, branch);
-		query.add(new TermQuery(branchTerm), Occur.MUST);
-		Term pathTerm = new Term(FIELD_PATH, path);
-		query.add(new TermQuery(pathTerm), Occur.MUST);
+	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
+		String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
+		String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
 		
+		BooleanQuery query = new BooleanQuery();
+		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
+		QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
+		query.add(qp.parse(q), Occur.MUST);
+
 		IndexWriter writer = getIndexWriter(repositoryName);
-		writer.deleteDocuments(query);
+		int numDocsBefore = writer.numDocs();
+		writer.deleteDocuments(query);		
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 
 	/**
@@ -734,7 +810,7 @@
 	 * @param repository
 	 * @return IndexResult
 	 */
-	protected IndexResult updateIndex(RepositoryModel model, Repository repository) {
+	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		try {
 			FileBasedConfig config = getConfig(repository);
@@ -762,20 +838,55 @@
 				deletedBranches.add(branch);
 			}
 
-			// walk through each branches
+			// get the local branches
 			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+			
+			// sort them by most recently updated
+			Collections.sort(branches, new Comparator<RefModel>() {
+				@Override
+				public int compare(RefModel ref1, RefModel ref2) {
+					return ref2.getDate().compareTo(ref1.getDate());
+				}
+			});
+						
+			// reorder default branch to first position
+			RefModel defaultBranch = null;
+			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
+			for (RefModel branch :  branches) {
+				if (branch.getObjectId().equals(defaultBranchId)) {
+					defaultBranch = branch;
+					break;
+				}
+			}
+			branches.remove(defaultBranch);
+			branches.add(0, defaultBranch);
+			
+			// walk through each branches
 			for (RefModel branch : branches) {
 				String branchName = branch.getName();
 
-				// determine if we should skip this branch
-				if (!IssueUtils.GB_ISSUES.equals(branch)
-						&& !model.indexedBranches.contains(branch.getName())) {
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// update issues modified on the GB_ISSUES branch
+					// note: this is different than reindex
+					indexBranch = true;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
+				// if this branch is not specifically indexed then skip
+				if (!indexBranch) {
 					continue;
 				}
 				
 				// remove this branch from the deletedBranches set
 				deletedBranches.remove(branchName);
-
+				
 				// determine last commit
 				String keyName = getBranchKey(branchName);
 				String lastCommit = config.getString(CONF_BRANCH, null, keyName);
@@ -810,7 +921,9 @@
 						IssueModel issue = IssueUtils.getIssue(repository, issueId);
 						if (issue == null) {
 							// issue was deleted, remove from index
-							deleteIssue(model.name, issueId);
+							if (!deleteIssue(model.name, issueId)) {
+								logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId));
+							}
 						} else {
 							// issue was updated
 							index(model.name, issue);
@@ -973,8 +1086,8 @@
 	 * @throws IOException
 	 */
 	private IndexWriter getIndexWriter(String repository) throws IOException {
-		IndexWriter indexWriter = writers.get(repository);		
-		File repositoryFolder = new File(repositoriesFolder, repository);
+		IndexWriter indexWriter = writers.get(repository);				
+		File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
 		File indexFolder = new File(repositoryFolder, LUCENE_DIR);
 		Directory directory = FSDirectory.open(indexFolder);		
 
@@ -1048,7 +1161,7 @@
 			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
 			qp.setAllowLeadingWildcard(true);
 			query.add(qp.parse(text), Occur.SHOULD);
-
+			
 			IndexSearcher searcher;
 			if (repositories.length == 1) {
 				// single repository search
@@ -1064,7 +1177,10 @@
 				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
+			
 			Query rewrittenQuery = searcher.rewrite(query);
+			logger.debug(rewrittenQuery.toString());
+
 			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
 			searcher.search(rewrittenQuery, collector);
 			int offset = Math.max(0, (page - 1) * pageSize);
@@ -1105,42 +1221,86 @@
 	 */
 	private String getHighlightedFragment(Analyzer analyzer, Query query,
 			String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
-		content = content == null ? "":StringUtils.escapeForHtml(content, false);
-		
+		if (content == null) {
+			content = "";
+		}		
+
+		int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
+
 		QueryScorer scorer = new QueryScorer(query, "content");
-		Fragmenter fragmenter;
-		
-		// TODO improve the fragmenter - hopefully on line breaks
-		if (SearchObjectType.commit == result.type) {
-			fragmenter = new SimpleSpanFragmenter(scorer, 1024); 
-		} else {
-			fragmenter = new SimpleSpanFragmenter(scorer, 150);
-		}
+		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); 
 
 		// use an artificial delimiter for the token
-		String termTag = "<!--[";
-		String termTagEnd = "]-->";
+		String termTag = "!!--[";
+		String termTagEnd = "]--!!";
 		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
 		Highlighter highlighter = new Highlighter(formatter, scorer);		
 		highlighter.setTextFragmenter(fragmenter);
-		
+
 		String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
 		if (ArrayUtils.isEmpty(fragments)) {
 			if (SearchObjectType.blob  == result.type) {
 				return "";
 			}
-			return "<pre class=\"text\">" + content + "</pre>";
+			// clip commit message
+			String fragment = content;
+			if (fragment.length() > fragmentLength) {
+				fragment = fragment.substring(0, fragmentLength) + "...";
+			}
+			return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
 		}
+		
+		// make sure we have unique fragments
+		Set<String> uniqueFragments = new LinkedHashSet<String>();
+		for (String fragment : fragments) {
+			uniqueFragments.add(fragment);
+		}
+		fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
+		
 		StringBuilder sb = new StringBuilder();
 		for (int i = 0, len = fragments.length; i < len; i++) {
 			String fragment = fragments[i];
-			
+			String tag = "<pre class=\"text\">";
+
 			// resurrect the raw fragment from removing the artificial delimiters
-			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");			
-			sb.append(getPreTag(result, raw, content));
+			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
+
+			// determine position of the raw fragment in the content
+			int pos = content.indexOf(raw);
+				
+			// restore complete first line of fragment
+			int c = pos;
+			while (c > 0) {
+				c--;
+				if (content.charAt(c) == '\n') {
+					break;
+				}
+			}
+			if (c > 0) {
+				// inject leading chunk of first fragment line
+				fragment = content.substring(c + 1, pos) + fragment;
+			}
+				
+			if (SearchObjectType.blob  == result.type) {
+				// count lines as offset into the content for this fragment
+				int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
+				
+				// create fragment tag with line number and language
+				String lang = "";
+				String ext = StringUtils.getFileExtension(result.path).toLowerCase();
+				if (!StringUtils.isEmpty(ext)) {
+					// maintain leading space!
+					lang = " lang-" + ext;
+				}
+				tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
+								
+			}
 			
+			sb.append(tag);
+
 			// replace the artificial delimiter with html tags
-			String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
+			String html = StringUtils.escapeForHtml(fragment, false);
+			html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
 			sb.append(html);
 			sb.append("</pre>");
 			if (i < len - 1) {
@@ -1148,31 +1308,7 @@
 			}
 		}
 		return sb.toString();
-	}
-	
-	/**
-	 * Returns the appropriate tag for a fragment. Commit messages are visually
-	 * differentiated from blob fragments.
-	 * 
-	 * @param result
-	 * @param fragment
-	 * @param content
-	 * @return an html tag appropriate for the fragment
-	 */
-	private String getPreTag(SearchResult result, String fragment, String content) {
-		String pre = "<pre class=\"text\">";
-		if (SearchObjectType.blob  == result.type) {
-			int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));			
-			int lastDot = result.path.lastIndexOf('.');
-			if (lastDot > -1) {
-				String ext = result.path.substring(lastDot + 1).toLowerCase();
-				pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext);	
-			} else {
-				pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);
-			}
-		}
-		return pre;
-	}
+	}	
 	
 	/**
 	 * Simple class to track the results of an index update. 

--
Gitblit v1.9.1