From 27ae9095639bb228a1b7ff86a3ebe4264abf05be Mon Sep 17 00:00:00 2001
From: mschaefers <mschaefers@scoop-gmbh.de>
Date: Thu, 29 Nov 2012 12:33:09 -0500
Subject: [PATCH] feature: when using LdapUserService one can configure Gitblit to fetch all users from ldap that can possibly login. This allows to see newly generated LDAP users instantly in Gitblit. By now an LDAP user had to log in once to appear in GitBlit.

---
 src/com/gitblit/LuceneExecutor.java |  198 ++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 154 insertions(+), 44 deletions(-)

diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index b6df254..0e4baae 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -69,10 +69,12 @@
 import org.apache.lucene.util.Version;
 import org.eclipse.jgit.diff.DiffEntry.ChangeType;
 import org.eclipse.jgit.lib.Constants;
+import org.eclipse.jgit.lib.FileMode;
 import org.eclipse.jgit.lib.ObjectId;
 import org.eclipse.jgit.lib.ObjectLoader;
 import org.eclipse.jgit.lib.ObjectReader;
 import org.eclipse.jgit.lib.Repository;
+import org.eclipse.jgit.lib.RepositoryCache.FileKey;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevTree;
 import org.eclipse.jgit.revwalk.RevWalk;
@@ -104,7 +106,7 @@
 public class LuceneExecutor implements Runnable {
 	
 		
-	private static final int INDEX_VERSION = 2;
+	private static final int INDEX_VERSION = 5;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_ISSUE = "issue";
@@ -143,6 +145,11 @@
 	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
 		this.storedSettings = settings;
 		this.repositoriesFolder = repositoriesFolder;
+		String exts = luceneIgnoreExtensions;
+		if (settings != null) {
+			exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
+		}
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 	}
 
 	/**
@@ -152,14 +159,29 @@
 	 */
 	@Override
 	public void run() {
+		if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
+			// Lucene indexing is disabled
+			return;
+		}
 		// reload the excluded extensions
 		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
 		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
 
+		if (GitBlit.self().isCollectingGarbage()) {
+			// busy collecting garbage, try again later
+			return;
+		}
+		
 		for (String repositoryName: GitBlit.self().getRepositoryList()) {
 			RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
 			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
 				Repository repository = GitBlit.self().getRepository(model.name);
+				if (repository == null) {
+					if (GitBlit.self().isCollectingGarbage(model.name)) {
+						logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
+					}
+					continue;
+				}
 				index(model, repository);				
 				repository.close();
 				System.gc();
@@ -176,7 +198,7 @@
 	 * @param repository
 	 *            the repository object
 	 */
-	protected void index(RepositoryModel model, Repository repository) {
+	private void index(RepositoryModel model, Repository repository) {
 		try {
 			if (shouldReindex(repository)) {
 				// (re)build the entire index
@@ -275,7 +297,7 @@
 			close(repositoryName);
 
 			// delete the index folder
-			File repositoryFolder = new File(repositoriesFolder, repositoryName);
+			File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
 			File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
 			if (luceneIndex.exists()) {
 				org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
@@ -291,7 +313,6 @@
 			throw new RuntimeException(e);
 		}
 	}
-
 	
 	/**
 	 * Returns the author for the commit, if this information is available.
@@ -337,7 +358,7 @@
 	 * @return tree
 	 * @throws IOException
 	 */
-	protected RevTree getTree(final RevWalk walk, final RevCommit commit)
+	private RevTree getTree(final RevWalk walk, final RevCommit commit)
 			throws IOException {
 		final RevTree tree = commit.getTree();
 		if (tree != null) {
@@ -377,7 +398,7 @@
 	 * @param repository
 	 * @return true of the on-disk index format is different than INDEX_VERSION
 	 */
-	protected boolean shouldReindex(Repository repository) {
+	private boolean shouldReindex(Repository repository) {
 		try {
 			FileBasedConfig config = getConfig(repository);
 			config.load();
@@ -403,7 +424,8 @@
 		if (!deleteIndex(model.name)) {
 			return result;
 		}
-		try {			
+		try {
+			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			FileBasedConfig config = getConfig(repository);
 			Set<String> indexedCommits = new TreeSet<String>();
 			IndexWriter writer = getIndexWriter(model.name);
@@ -438,7 +460,7 @@
 			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
 			for (RefModel branch :  branches) {
 				if (branch.getObjectId().equals(defaultBranchId)) {
-					defaultBranch = branch;					
+					defaultBranch = branch;
 					break;
 				}
 			}
@@ -448,8 +470,22 @@
 			// walk through each branch
 			for (RefModel branch : branches) {
 
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// skip the GB_ISSUES branch because it is indexed later
+					// note: this is different than updateIndex
+					indexBranch = false;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
 				// if this branch is not specifically indexed then skip
-				if (!model.indexedBranches.contains(branch.getName())) {
+				if (!indexBranch) {
 					continue;
 				}
 
@@ -469,7 +505,10 @@
 				
 				Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
 				while (treeWalk.next()) {
-					paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+					// ensure path is not in a submodule
+					if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
+						paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
+					}
 				}				
 
 				ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -532,14 +571,14 @@
 						// index the blob content
 						if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {							
 							ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
-							InputStream in = ldr.openStream();							
+							InputStream in = ldr.openStream();						
 							int n;
 							while ((n = in.read(tmp)) > 0) {
 								os.write(tmp, 0, n);
 							}
 							in.close();
 							byte[] content = os.toByteArray();
-							String str = new String(content, Constants.CHARACTER_ENCODING);
+							String str = StringUtils.decodeString(content, encodings);							
 							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
 							os.reset();
 						}							
@@ -618,11 +657,15 @@
 			String branch, RevCommit commit) {
 		IndexResult result = new IndexResult();
 		try {
+			String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
 			List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
 			String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
 					Resolution.MINUTE);
 			IndexWriter writer = getIndexWriter(repositoryName);
 			for (PathChangeModel path : changedPaths) {
+				if (path.isSubmodule()) {
+					continue;
+				}
 				// delete the indexed blob
 				deleteBlob(repositoryName, branch, path.name);
 
@@ -650,9 +693,11 @@
 					if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
 						// read the blob content
 						String str = JGitUtils.getStringContent(repository, commit.getTree(),
-								path.path);
-						doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
-						writer.addDocument(doc);
+								path.path, encodings);
+						if (str != null) {
+							doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
+							writer.addDocument(doc);
+						}
 					}
 				}
 			}
@@ -660,7 +705,7 @@
 			
 			// get any annotated commit tags
 			List<String> commitTags = new ArrayList<String>();
-			for (RefModel ref : JGitUtils.getTags(repository, true, -1)) {
+			for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
 				if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
 					commitTags.add(ref.displayName);
 				}
@@ -703,8 +748,9 @@
 	 * @param repositoryName
 	 * @param issueId
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteIssue(String repositoryName, String issueId) throws Exception {
+	private boolean deleteIssue(String repositoryName, String issueId) throws Exception {
 		BooleanQuery query = new BooleanQuery();
 		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
 		query.add(new TermQuery(objectTerm), Occur.MUST);
@@ -712,8 +758,17 @@
 		query.add(new TermQuery(issueidTerm), Occur.MUST);
 		
 		IndexWriter writer = getIndexWriter(repositoryName);
+		int numDocsBefore = writer.numDocs();
 		writer.deleteDocuments(query);
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 	
 	/**
@@ -723,19 +778,29 @@
 	 * @param branch
 	 * @param path
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
-		BooleanQuery query = new BooleanQuery();
-		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
-		query.add(new TermQuery(objectTerm), Occur.MUST);
-		Term branchTerm = new Term(FIELD_BRANCH, branch);
-		query.add(new TermQuery(branchTerm), Occur.MUST);
-		Term pathTerm = new Term(FIELD_PATH, path);
-		query.add(new TermQuery(pathTerm), Occur.MUST);
+	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
+		String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
+		String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
 		
+		BooleanQuery query = new BooleanQuery();
+		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
+		QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
+		query.add(qp.parse(q), Occur.MUST);
+
 		IndexWriter writer = getIndexWriter(repositoryName);
-		writer.deleteDocuments(query);
+		int numDocsBefore = writer.numDocs();
+		writer.deleteDocuments(query);		
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 
 	/**
@@ -745,7 +810,7 @@
 	 * @param repository
 	 * @return IndexResult
 	 */
-	protected IndexResult updateIndex(RepositoryModel model, Repository repository) {
+	private IndexResult updateIndex(RepositoryModel model, Repository repository) {
 		IndexResult result = new IndexResult();
 		try {
 			FileBasedConfig config = getConfig(repository);
@@ -773,20 +838,55 @@
 				deletedBranches.add(branch);
 			}
 
-			// walk through each branches
+			// get the local branches
 			List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
+			
+			// sort them by most recently updated
+			Collections.sort(branches, new Comparator<RefModel>() {
+				@Override
+				public int compare(RefModel ref1, RefModel ref2) {
+					return ref2.getDate().compareTo(ref1.getDate());
+				}
+			});
+						
+			// reorder default branch to first position
+			RefModel defaultBranch = null;
+			ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
+			for (RefModel branch :  branches) {
+				if (branch.getObjectId().equals(defaultBranchId)) {
+					defaultBranch = branch;
+					break;
+				}
+			}
+			branches.remove(defaultBranch);
+			branches.add(0, defaultBranch);
+			
+			// walk through each branches
 			for (RefModel branch : branches) {
 				String branchName = branch.getName();
 
-				// determine if we should skip this branch
-				if (!IssueUtils.GB_ISSUES.equals(branch)
-						&& !model.indexedBranches.contains(branch.getName())) {
+				boolean indexBranch = false;
+				if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
+						&& branch.equals(defaultBranch)) {
+					// indexing "default" branch
+					indexBranch = true;
+				} else if (IssueUtils.GB_ISSUES.equals(branch)) {
+					// update issues modified on the GB_ISSUES branch
+					// note: this is different than reindex
+					indexBranch = true;
+				} else {
+					// normal explicit branch check
+					indexBranch = model.indexedBranches.contains(branch.getName());
+				}
+				
+				// if this branch is not specifically indexed then skip
+				if (!indexBranch) {
 					continue;
 				}
 				
 				// remove this branch from the deletedBranches set
 				deletedBranches.remove(branchName);
-
+				
 				// determine last commit
 				String keyName = getBranchKey(branchName);
 				String lastCommit = config.getString(CONF_BRANCH, null, keyName);
@@ -821,7 +921,9 @@
 						IssueModel issue = IssueUtils.getIssue(repository, issueId);
 						if (issue == null) {
 							// issue was deleted, remove from index
-							deleteIssue(model.name, issueId);
+							if (!deleteIssue(model.name, issueId)) {
+								logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId));
+							}
 						} else {
 							// issue was updated
 							index(model.name, issue);
@@ -984,8 +1086,8 @@
 	 * @throws IOException
 	 */
 	private IndexWriter getIndexWriter(String repository) throws IOException {
-		IndexWriter indexWriter = writers.get(repository);		
-		File repositoryFolder = new File(repositoriesFolder, repository);
+		IndexWriter indexWriter = writers.get(repository);				
+		File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
 		File indexFolder = new File(repositoryFolder, LUCENE_DIR);
 		Directory directory = FSDirectory.open(indexFolder);		
 
@@ -1059,7 +1161,7 @@
 			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
 			qp.setAllowLeadingWildcard(true);
 			query.add(qp.parse(text), Occur.SHOULD);
-
+			
 			IndexSearcher searcher;
 			if (repositories.length == 1) {
 				// single repository search
@@ -1075,7 +1177,10 @@
 				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
+			
 			Query rewrittenQuery = searcher.rewrite(query);
+			logger.debug(rewrittenQuery.toString());
+
 			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
 			searcher.search(rewrittenQuery, collector);
 			int offset = Math.max(0, (page - 1) * pageSize);
@@ -1126,8 +1231,8 @@
 		Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength); 
 
 		// use an artificial delimiter for the token
-		String termTag = "<!--[";
-		String termTagEnd = "]-->";
+		String termTag = "!!--[";
+		String termTagEnd = "]--!!";
 		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
 		Highlighter highlighter = new Highlighter(formatter, scorer);		
 		highlighter.setTextFragmenter(fragmenter);
@@ -1145,7 +1250,13 @@
 			return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true) + "</pre>";
 		}
 		
-		int contentPos = 0;
+		// make sure we have unique fragments
+		Set<String> uniqueFragments = new LinkedHashSet<String>();
+		for (String fragment : fragments) {
+			uniqueFragments.add(fragment);
+		}
+		fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
+		
 		StringBuilder sb = new StringBuilder();
 		for (int i = 0, len = fragments.length; i < len; i++) {
 			String fragment = fragments[i];
@@ -1155,7 +1266,7 @@
 			String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
 
 			// determine position of the raw fragment in the content
-			int pos = content.indexOf(raw, contentPos);
+			int pos = content.indexOf(raw);
 				
 			// restore complete first line of fragment
 			int c = pos;
@@ -1172,7 +1283,7 @@
 				
 			if (SearchObjectType.blob  == result.type) {
 				// count lines as offset into the content for this fragment
-				int line = StringUtils.countLines(content.substring(0, pos));
+				int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
 				
 				// create fragment tag with line number and language
 				String lang = "";
@@ -1183,14 +1294,13 @@
 				}
 				tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
 								
-				// update offset into content				
-				contentPos = pos + raw.length() + 1;
 			}
 			
 			sb.append(tag);
 
 			// replace the artificial delimiter with html tags
-			String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
+			String html = StringUtils.escapeForHtml(fragment, false);
+			html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
 			sb.append(html);
 			sb.append("</pre>");
 			if (i < len - 1) {

--
Gitblit v1.9.1