commit | author | age
|
e31da0
|
1 |
/*
|
JM |
2 |
* Copyright 2012 gitblit.com.
|
|
3 |
*
|
|
4 |
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5 |
* you may not use this file except in compliance with the License.
|
|
6 |
* You may obtain a copy of the License at
|
|
7 |
*
|
|
8 |
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9 |
*
|
|
10 |
* Unless required by applicable law or agreed to in writing, software
|
|
11 |
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13 |
* See the License for the specific language governing permissions and
|
|
14 |
* limitations under the License.
|
|
15 |
*/
|
|
16 |
package com.gitblit;
|
|
17 |
|
d896e6
|
18 |
import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
|
e31da0
|
19 |
|
d896e6
|
20 |
import java.io.ByteArrayOutputStream;
|
JM |
21 |
import java.io.File;
|
|
22 |
import java.io.IOException;
|
|
23 |
import java.io.InputStream;
|
905d31
|
24 |
import java.lang.reflect.Method;
|
d896e6
|
25 |
import java.text.MessageFormat;
|
JM |
26 |
import java.text.ParseException;
|
|
27 |
import java.util.ArrayList;
|
|
28 |
import java.util.Arrays;
|
|
29 |
import java.util.Collections;
|
|
30 |
import java.util.Comparator;
|
|
31 |
import java.util.HashMap;
|
|
32 |
import java.util.LinkedHashSet;
|
|
33 |
import java.util.List;
|
|
34 |
import java.util.Map;
|
|
35 |
import java.util.Set;
|
|
36 |
import java.util.TreeMap;
|
|
37 |
import java.util.TreeSet;
|
|
38 |
import java.util.concurrent.ConcurrentHashMap;
|
|
39 |
|
|
40 |
import org.apache.lucene.analysis.Analyzer;
|
|
41 |
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
42 |
import org.apache.lucene.document.DateTools;
|
|
43 |
import org.apache.lucene.document.DateTools.Resolution;
|
|
44 |
import org.apache.lucene.document.Document;
|
|
45 |
import org.apache.lucene.document.Field;
|
|
46 |
import org.apache.lucene.document.Field.Index;
|
|
47 |
import org.apache.lucene.document.Field.Store;
|
|
48 |
import org.apache.lucene.index.IndexReader;
|
|
49 |
import org.apache.lucene.index.IndexWriter;
|
|
50 |
import org.apache.lucene.index.IndexWriterConfig;
|
|
51 |
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
52 |
import org.apache.lucene.index.MultiReader;
|
|
53 |
import org.apache.lucene.index.Term;
|
|
54 |
import org.apache.lucene.queryParser.QueryParser;
|
|
55 |
import org.apache.lucene.search.BooleanClause.Occur;
|
|
56 |
import org.apache.lucene.search.BooleanQuery;
|
|
57 |
import org.apache.lucene.search.IndexSearcher;
|
|
58 |
import org.apache.lucene.search.Query;
|
|
59 |
import org.apache.lucene.search.ScoreDoc;
|
|
60 |
import org.apache.lucene.search.TermQuery;
|
|
61 |
import org.apache.lucene.search.TopScoreDocCollector;
|
|
62 |
import org.apache.lucene.search.highlight.Fragmenter;
|
|
63 |
import org.apache.lucene.search.highlight.Highlighter;
|
|
64 |
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
|
|
65 |
import org.apache.lucene.search.highlight.QueryScorer;
|
|
66 |
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
|
67 |
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
|
|
68 |
import org.apache.lucene.store.Directory;
|
|
69 |
import org.apache.lucene.store.FSDirectory;
|
|
70 |
import org.apache.lucene.util.Version;
|
|
71 |
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
|
72 |
import org.eclipse.jgit.lib.Constants;
|
|
73 |
import org.eclipse.jgit.lib.ObjectId;
|
|
74 |
import org.eclipse.jgit.lib.ObjectLoader;
|
|
75 |
import org.eclipse.jgit.lib.ObjectReader;
|
e31da0
|
76 |
import org.eclipse.jgit.lib.Repository;
|
d896e6
|
77 |
import org.eclipse.jgit.revwalk.RevCommit;
|
JM |
78 |
import org.eclipse.jgit.revwalk.RevTree;
|
|
79 |
import org.eclipse.jgit.revwalk.RevWalk;
|
|
80 |
import org.eclipse.jgit.storage.file.FileBasedConfig;
|
|
81 |
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
|
|
82 |
import org.eclipse.jgit.treewalk.TreeWalk;
|
|
83 |
import org.eclipse.jgit.util.FS;
|
e31da0
|
84 |
import org.slf4j.Logger;
|
JM |
85 |
import org.slf4j.LoggerFactory;
|
|
86 |
|
d896e6
|
87 |
import com.gitblit.Constants.SearchObjectType;
|
JM |
88 |
import com.gitblit.models.IssueModel;
|
|
89 |
import com.gitblit.models.IssueModel.Attachment;
|
|
90 |
import com.gitblit.models.PathModel.PathChangeModel;
|
|
91 |
import com.gitblit.models.RefModel;
|
40ca5c
|
92 |
import com.gitblit.models.RepositoryModel;
|
d896e6
|
93 |
import com.gitblit.models.SearchResult;
|
JM |
94 |
import com.gitblit.utils.ArrayUtils;
|
|
95 |
import com.gitblit.utils.IssueUtils;
|
e31da0
|
96 |
import com.gitblit.utils.JGitUtils;
|
d896e6
|
97 |
import com.gitblit.utils.StringUtils;
|
e31da0
|
98 |
|
JM |
99 |
/**
|
d896e6
|
100 |
* The Lucene executor handles indexing and searching repositories.
|
e31da0
|
101 |
*
|
JM |
102 |
* @author James Moger
|
|
103 |
*
|
|
104 |
*/
|
|
105 |
public class LuceneExecutor implements Runnable {
|
d896e6
|
106 |
|
JM |
107 |
|
905d31
|
108 |
private static final int INDEX_VERSION = 2;
|
e31da0
|
109 |
|
d896e6
|
110 |
private static final String FIELD_OBJECT_TYPE = "type";
|
JM |
111 |
private static final String FIELD_ISSUE = "issue";
|
|
112 |
private static final String FIELD_PATH = "path";
|
|
113 |
private static final String FIELD_COMMIT = "commit";
|
|
114 |
private static final String FIELD_BRANCH = "branch";
|
|
115 |
private static final String FIELD_SUMMARY = "summary";
|
|
116 |
private static final String FIELD_CONTENT = "content";
|
|
117 |
private static final String FIELD_AUTHOR = "author";
|
|
118 |
private static final String FIELD_COMMITTER = "committer";
|
|
119 |
private static final String FIELD_DATE = "date";
|
|
120 |
private static final String FIELD_TAG = "tag";
|
|
121 |
private static final String FIELD_LABEL = "label";
|
|
122 |
private static final String FIELD_ATTACHMENT = "attachment";
|
|
123 |
|
|
124 |
private static final String CONF_FILE = "lucene.conf";
|
|
125 |
private static final String LUCENE_DIR = "lucene";
|
|
126 |
private static final String CONF_INDEX = "index";
|
|
127 |
private static final String CONF_VERSION = "version";
|
|
128 |
private static final String CONF_ALIAS = "aliases";
|
|
129 |
private static final String CONF_BRANCH = "branches";
|
|
130 |
|
|
131 |
private static final Version LUCENE_VERSION = Version.LUCENE_35;
|
|
132 |
|
e31da0
|
133 |
private final Logger logger = LoggerFactory.getLogger(LuceneExecutor.class);
|
d896e6
|
134 |
|
JM |
135 |
private final IStoredSettings storedSettings;
|
|
136 |
private final File repositoriesFolder;
|
|
137 |
|
|
138 |
private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
|
|
139 |
private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
|
|
140 |
|
|
141 |
private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
|
|
142 |
"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
|
|
143 |
"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
|
e31da0
|
144 |
|
d896e6
|
145 |
public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
|
JM |
146 |
this.storedSettings = settings;
|
|
147 |
this.repositoriesFolder = repositoriesFolder;
|
e31da0
|
148 |
}
|
JM |
149 |
|
|
150 |
/**
|
273cb9
|
151 |
* Run is executed by the Gitblit executor service. Because this is called
|
JM |
152 |
* by an executor service, calls will queue - i.e. there can never be
|
|
153 |
* concurrent execution of repository index updates.
|
e31da0
|
154 |
*/
|
JM |
155 |
@Override
|
|
156 |
public void run() {
|
40ca5c
|
157 |
for (String repositoryName: GitBlit.self().getRepositoryList()) {
|
JM |
158 |
RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
|
|
159 |
if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
|
|
160 |
Repository repository = GitBlit.self().getRepository(model.name);
|
|
161 |
index(model, repository);
|
|
162 |
repository.close();
|
|
163 |
System.gc();
|
e31da0
|
164 |
}
|
JM |
165 |
}
|
|
166 |
}
|
|
167 |
|
|
168 |
/**
|
|
169 |
* Synchronously indexes a repository. This may build a complete index of a
|
|
170 |
* repository or it may update an existing index.
|
|
171 |
*
|
3d0494
|
172 |
* @param name
|
e31da0
|
173 |
* the name of the repository
|
JM |
174 |
* @param repository
|
|
175 |
* the repository object
|
|
176 |
*/
|
40ca5c
|
177 |
protected void index(RepositoryModel model, Repository repository) {
|
e31da0
|
178 |
try {
|
40ca5c
|
179 |
if (shouldReindex(repository)) {
|
JM |
180 |
// (re)build the entire index
|
|
181 |
IndexResult result = reindex(model, repository);
|
|
182 |
|
|
183 |
if (result.success) {
|
|
184 |
if (result.commitCount > 0) {
|
|
185 |
String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
|
|
186 |
logger.info(MessageFormat.format(msg, model.name, result.commitCount,
|
|
187 |
result.blobCount, result.branchCount, result.duration()));
|
e31da0
|
188 |
}
|
JM |
189 |
} else {
|
40ca5c
|
190 |
String msg = "Could not build {0} Lucene index!";
|
JM |
191 |
logger.error(MessageFormat.format(msg, model.name));
|
e31da0
|
192 |
}
|
JM |
193 |
} else {
|
40ca5c
|
194 |
// update the index with latest commits
|
JM |
195 |
IndexResult result = updateIndex(model, repository);
|
|
196 |
if (result.success) {
|
|
197 |
if (result.commitCount > 0) {
|
|
198 |
String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
|
|
199 |
logger.info(MessageFormat.format(msg, model.name, result.commitCount,
|
|
200 |
result.blobCount, result.branchCount, result.duration()));
|
|
201 |
}
|
|
202 |
} else {
|
|
203 |
String msg = "Could not update {0} Lucene index!";
|
|
204 |
logger.error(MessageFormat.format(msg, model.name));
|
|
205 |
}
|
e31da0
|
206 |
}
|
JM |
207 |
} catch (Throwable t) {
|
40ca5c
|
208 |
logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
|
e31da0
|
209 |
}
|
JM |
210 |
}
|
e6637c
|
211 |
|
JM |
212 |
/**
|
|
213 |
* Close the writer/searcher objects for a repository.
|
|
214 |
*
|
|
215 |
* @param repositoryName
|
|
216 |
*/
|
8e9988
|
217 |
public synchronized void close(String repositoryName) {
|
JM |
218 |
try {
|
|
219 |
IndexSearcher searcher = searchers.remove(repositoryName);
|
|
220 |
if (searcher != null) {
|
|
221 |
searcher.getIndexReader().close();
|
|
222 |
}
|
|
223 |
} catch (Exception e) {
|
|
224 |
logger.error("Failed to close index searcher for " + repositoryName, e);
|
|
225 |
}
|
|
226 |
|
e6637c
|
227 |
try {
|
JM |
228 |
IndexWriter writer = writers.remove(repositoryName);
|
|
229 |
if (writer != null) {
|
|
230 |
writer.close();
|
|
231 |
}
|
|
232 |
} catch (Exception e) {
|
|
233 |
logger.error("Failed to close index writer for " + repositoryName, e);
|
8e9988
|
234 |
}
|
e6637c
|
235 |
}
|
b938ae
|
236 |
|
JM |
237 |
/**
|
|
238 |
* Close all Lucene indexers.
|
|
239 |
*
|
|
240 |
*/
|
8e9988
|
241 |
public synchronized void close() {
|
d896e6
|
242 |
// close all writers
|
JM |
243 |
for (String writer : writers.keySet()) {
|
|
244 |
try {
|
|
245 |
writers.get(writer).close(true);
|
|
246 |
} catch (Throwable t) {
|
|
247 |
logger.error("Failed to close Lucene writer for " + writer, t);
|
|
248 |
}
|
|
249 |
}
|
|
250 |
writers.clear();
|
|
251 |
|
|
252 |
// close all searchers
|
|
253 |
for (String searcher : searchers.keySet()) {
|
|
254 |
try {
|
8e9988
|
255 |
searchers.get(searcher).getIndexReader().close();
|
d896e6
|
256 |
} catch (Throwable t) {
|
JM |
257 |
logger.error("Failed to close Lucene searcher for " + searcher, t);
|
|
258 |
}
|
|
259 |
}
|
|
260 |
searchers.clear();
|
|
261 |
}
|
|
262 |
|
|
263 |
|
|
264 |
/**
|
|
265 |
* Deletes the Lucene index for the specified repository.
|
|
266 |
*
|
|
267 |
* @param repositoryName
|
|
268 |
* @return true, if successful
|
|
269 |
*/
|
|
270 |
public boolean deleteIndex(String repositoryName) {
|
|
271 |
try {
|
8e9988
|
272 |
// close any open writer/searcher
|
JM |
273 |
close(repositoryName);
|
|
274 |
|
d896e6
|
275 |
// delete the index folder
|
JM |
276 |
File repositoryFolder = new File(repositoriesFolder, repositoryName);
|
|
277 |
File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
|
|
278 |
if (luceneIndex.exists()) {
|
|
279 |
org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
|
|
280 |
org.eclipse.jgit.util.FileUtils.RECURSIVE);
|
|
281 |
}
|
|
282 |
// delete the config file
|
|
283 |
File luceneConfig = new File(repositoryFolder, CONF_FILE);
|
|
284 |
if (luceneConfig.exists()) {
|
|
285 |
luceneConfig.delete();
|
|
286 |
}
|
|
287 |
return true;
|
|
288 |
} catch (IOException e) {
|
|
289 |
throw new RuntimeException(e);
|
|
290 |
}
|
|
291 |
}
|
|
292 |
|
|
293 |
|
|
294 |
/**
|
|
295 |
* Returns the author for the commit, if this information is available.
|
|
296 |
*
|
|
297 |
* @param commit
|
|
298 |
* @return an author or unknown
|
|
299 |
*/
|
|
300 |
private String getAuthor(RevCommit commit) {
|
|
301 |
String name = "unknown";
|
|
302 |
try {
|
|
303 |
name = commit.getAuthorIdent().getName();
|
|
304 |
if (StringUtils.isEmpty(name)) {
|
|
305 |
name = commit.getAuthorIdent().getEmailAddress();
|
|
306 |
}
|
|
307 |
} catch (NullPointerException n) {
|
|
308 |
}
|
|
309 |
return name;
|
|
310 |
}
|
|
311 |
|
|
312 |
/**
|
|
313 |
* Returns the committer for the commit, if this information is available.
|
|
314 |
*
|
|
315 |
* @param commit
|
|
316 |
* @return an committer or unknown
|
|
317 |
*/
|
|
318 |
private String getCommitter(RevCommit commit) {
|
|
319 |
String name = "unknown";
|
|
320 |
try {
|
|
321 |
name = commit.getCommitterIdent().getName();
|
|
322 |
if (StringUtils.isEmpty(name)) {
|
|
323 |
name = commit.getCommitterIdent().getEmailAddress();
|
|
324 |
}
|
|
325 |
} catch (NullPointerException n) {
|
|
326 |
}
|
|
327 |
return name;
|
|
328 |
}
|
905d31
|
329 |
|
JM |
330 |
/**
|
|
331 |
* Get the tree associated with the given commit.
|
|
332 |
*
|
|
333 |
* @param walk
|
|
334 |
* @param commit
|
|
335 |
* @return tree
|
|
336 |
* @throws IOException
|
|
337 |
*/
|
|
338 |
protected RevTree getTree(final RevWalk walk, final RevCommit commit)
|
|
339 |
throws IOException {
|
|
340 |
final RevTree tree = commit.getTree();
|
|
341 |
if (tree != null) {
|
|
342 |
return tree;
|
|
343 |
}
|
|
344 |
walk.parseHeaders(commit);
|
|
345 |
return commit.getTree();
|
|
346 |
}
|
d896e6
|
347 |
|
JM |
348 |
/**
|
|
349 |
* Construct a keyname from the branch.
|
|
350 |
*
|
|
351 |
* @param branchName
|
|
352 |
* @return a keyname appropriate for the Git config file format
|
|
353 |
*/
|
|
354 |
private String getBranchKey(String branchName) {
|
|
355 |
return StringUtils.getSHA1(branchName);
|
|
356 |
}
|
|
357 |
|
|
358 |
/**
|
|
359 |
* Returns the Lucene configuration for the specified repository.
|
|
360 |
*
|
|
361 |
* @param repository
|
|
362 |
* @return a config object
|
|
363 |
*/
|
|
364 |
private FileBasedConfig getConfig(Repository repository) {
|
|
365 |
File file = new File(repository.getDirectory(), CONF_FILE);
|
|
366 |
FileBasedConfig config = new FileBasedConfig(file, FS.detect());
|
|
367 |
return config;
|
|
368 |
}
|
|
369 |
|
|
370 |
/**
|
|
371 |
* Reads the Lucene config file for the repository to check the index
|
|
372 |
* version. If the index version is different, then rebuild the repository
|
|
373 |
* index.
|
|
374 |
*
|
|
375 |
* @param repository
|
|
376 |
* @return true of the on-disk index format is different than INDEX_VERSION
|
|
377 |
*/
|
|
378 |
protected boolean shouldReindex(Repository repository) {
|
|
379 |
try {
|
|
380 |
FileBasedConfig config = getConfig(repository);
|
|
381 |
config.load();
|
|
382 |
int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
|
|
383 |
// reindex if versions do not match
|
|
384 |
return indexVersion != INDEX_VERSION;
|
|
385 |
} catch (Throwable t) {
|
|
386 |
}
|
|
387 |
return true;
|
|
388 |
}
|
|
389 |
|
|
390 |
|
|
391 |
/**
|
|
392 |
* This completely indexes the repository and will destroy any existing
|
|
393 |
* index.
|
|
394 |
*
|
|
395 |
* @param repositoryName
|
|
396 |
* @param repository
|
|
397 |
* @return IndexResult
|
|
398 |
*/
|
40ca5c
|
399 |
public IndexResult reindex(RepositoryModel model, Repository repository) {
|
8e9988
|
400 |
IndexResult result = new IndexResult();
|
40ca5c
|
401 |
if (!deleteIndex(model.name)) {
|
d896e6
|
402 |
return result;
|
JM |
403 |
}
|
|
404 |
try {
|
|
405 |
FileBasedConfig config = getConfig(repository);
|
|
406 |
Set<String> indexedCommits = new TreeSet<String>();
|
40ca5c
|
407 |
IndexWriter writer = getIndexWriter(model.name);
|
d896e6
|
408 |
// build a quick lookup of tags
|
JM |
409 |
Map<String, List<String>> tags = new HashMap<String, List<String>>();
|
|
410 |
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
|
|
411 |
if (!tag.isAnnotatedTag()) {
|
|
412 |
// skip non-annotated tags
|
|
413 |
continue;
|
|
414 |
}
|
|
415 |
if (!tags.containsKey(tag.getObjectId())) {
|
|
416 |
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
|
|
417 |
}
|
|
418 |
tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
|
|
419 |
}
|
|
420 |
|
|
421 |
ObjectReader reader = repository.newObjectReader();
|
|
422 |
|
|
423 |
// get the local branches
|
|
424 |
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
|
|
425 |
|
|
426 |
// sort them by most recently updated
|
|
427 |
Collections.sort(branches, new Comparator<RefModel>() {
|
|
428 |
@Override
|
|
429 |
public int compare(RefModel ref1, RefModel ref2) {
|
|
430 |
return ref2.getDate().compareTo(ref1.getDate());
|
|
431 |
}
|
|
432 |
});
|
|
433 |
|
|
434 |
// reorder default branch to first position
|
|
435 |
RefModel defaultBranch = null;
|
|
436 |
ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
|
|
437 |
for (RefModel branch : branches) {
|
|
438 |
if (branch.getObjectId().equals(defaultBranchId)) {
|
|
439 |
defaultBranch = branch;
|
|
440 |
break;
|
|
441 |
}
|
|
442 |
}
|
|
443 |
branches.remove(defaultBranch);
|
|
444 |
branches.add(0, defaultBranch);
|
|
445 |
|
|
446 |
// walk through each branch
|
|
447 |
for (RefModel branch : branches) {
|
40ca5c
|
448 |
|
JM |
449 |
// if this branch is not specifically indexed then skip
|
|
450 |
if (!model.indexedBranches.contains(branch.getName())) {
|
d896e6
|
451 |
continue;
|
JM |
452 |
}
|
|
453 |
|
|
454 |
String branchName = branch.getName();
|
|
455 |
RevWalk revWalk = new RevWalk(reader);
|
|
456 |
RevCommit tip = revWalk.parseCommit(branch.getObjectId());
|
|
457 |
String tipId = tip.getId().getName();
|
|
458 |
|
|
459 |
String keyName = getBranchKey(branchName);
|
|
460 |
config.setString(CONF_ALIAS, null, keyName, branchName);
|
|
461 |
config.setString(CONF_BRANCH, null, keyName, tipId);
|
|
462 |
|
|
463 |
// index the blob contents of the tree
|
|
464 |
TreeWalk treeWalk = new TreeWalk(repository);
|
|
465 |
treeWalk.addTree(tip.getTree());
|
|
466 |
treeWalk.setRecursive(true);
|
|
467 |
|
|
468 |
Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
|
|
469 |
while (treeWalk.next()) {
|
|
470 |
paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
|
|
471 |
}
|
|
472 |
|
|
473 |
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
|
474 |
byte[] tmp = new byte[32767];
|
|
475 |
|
|
476 |
RevWalk commitWalk = new RevWalk(reader);
|
|
477 |
commitWalk.markStart(tip);
|
|
478 |
|
|
479 |
RevCommit commit;
|
|
480 |
while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
|
|
481 |
TreeWalk diffWalk = new TreeWalk(reader);
|
|
482 |
int parentCount = commit.getParentCount();
|
|
483 |
switch (parentCount) {
|
|
484 |
case 0:
|
|
485 |
diffWalk.addTree(new EmptyTreeIterator());
|
|
486 |
break;
|
|
487 |
case 1:
|
|
488 |
diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
|
|
489 |
break;
|
|
490 |
default:
|
|
491 |
// skip merge commits
|
|
492 |
continue;
|
|
493 |
}
|
|
494 |
diffWalk.addTree(getTree(commitWalk, commit));
|
|
495 |
diffWalk.setFilter(ANY_DIFF);
|
|
496 |
diffWalk.setRecursive(true);
|
|
497 |
while ((paths.size() > 0) && diffWalk.next()) {
|
|
498 |
String path = diffWalk.getPathString();
|
|
499 |
if (!paths.containsKey(path)) {
|
|
500 |
continue;
|
|
501 |
}
|
|
502 |
|
|
503 |
// remove path from set
|
|
504 |
ObjectId blobId = paths.remove(path);
|
|
505 |
result.blobCount++;
|
|
506 |
|
|
507 |
// index the blob metadata
|
|
508 |
String blobAuthor = getAuthor(commit);
|
|
509 |
String blobCommitter = getCommitter(commit);
|
|
510 |
String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
|
|
511 |
Resolution.MINUTE);
|
|
512 |
|
|
513 |
Document doc = new Document();
|
|
514 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES, Index.NOT_ANALYZED_NO_NORMS));
|
|
515 |
doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
|
|
516 |
doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
|
|
517 |
doc.add(new Field(FIELD_PATH, path, Store.YES, Index.ANALYZED));
|
|
518 |
doc.add(new Field(FIELD_DATE, blobDate, Store.YES, Index.NO));
|
|
519 |
doc.add(new Field(FIELD_AUTHOR, blobAuthor, Store.YES, Index.ANALYZED));
|
|
520 |
doc.add(new Field(FIELD_COMMITTER, blobCommitter, Store.YES, Index.ANALYZED));
|
|
521 |
|
|
522 |
// determine extension to compare to the extension
|
|
523 |
// blacklist
|
|
524 |
String ext = null;
|
|
525 |
String name = path.toLowerCase();
|
|
526 |
if (name.indexOf('.') > -1) {
|
|
527 |
ext = name.substring(name.lastIndexOf('.') + 1);
|
|
528 |
}
|
|
529 |
|
|
530 |
// index the blob content
|
|
531 |
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
|
|
532 |
ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
|
|
533 |
InputStream in = ldr.openStream();
|
|
534 |
int n;
|
|
535 |
while ((n = in.read(tmp)) > 0) {
|
|
536 |
os.write(tmp, 0, n);
|
|
537 |
}
|
|
538 |
in.close();
|
|
539 |
byte[] content = os.toByteArray();
|
|
540 |
String str = new String(content, Constants.CHARACTER_ENCODING);
|
|
541 |
doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
|
|
542 |
os.reset();
|
|
543 |
}
|
|
544 |
|
|
545 |
// add the blob to the index
|
|
546 |
writer.addDocument(doc);
|
|
547 |
}
|
|
548 |
}
|
|
549 |
|
|
550 |
os.close();
|
|
551 |
|
|
552 |
// index the tip commit object
|
|
553 |
if (indexedCommits.add(tipId)) {
|
|
554 |
Document doc = createDocument(tip, tags.get(tipId));
|
|
555 |
doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
|
|
556 |
writer.addDocument(doc);
|
|
557 |
result.commitCount += 1;
|
|
558 |
result.branchCount += 1;
|
|
559 |
}
|
|
560 |
|
|
561 |
// traverse the log and index the previous commit objects
|
|
562 |
RevWalk historyWalk = new RevWalk(reader);
|
|
563 |
historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
|
|
564 |
RevCommit rev;
|
|
565 |
while ((rev = historyWalk.next()) != null) {
|
|
566 |
String hash = rev.getId().getName();
|
|
567 |
if (indexedCommits.add(hash)) {
|
|
568 |
Document doc = createDocument(rev, tags.get(hash));
|
|
569 |
doc.add(new Field(FIELD_BRANCH, branchName, Store.YES, Index.ANALYZED));
|
|
570 |
writer.addDocument(doc);
|
|
571 |
result.commitCount += 1;
|
|
572 |
}
|
|
573 |
}
|
|
574 |
}
|
|
575 |
|
|
576 |
// finished
|
|
577 |
reader.release();
|
|
578 |
|
|
579 |
// this repository has a gb-issues branch, index all issues
|
|
580 |
if (IssueUtils.getIssuesBranch(repository) != null) {
|
|
581 |
List<IssueModel> issues = IssueUtils.getIssues(repository, null);
|
|
582 |
if (issues.size() > 0) {
|
|
583 |
result.branchCount += 1;
|
|
584 |
}
|
|
585 |
for (IssueModel issue : issues) {
|
|
586 |
result.issueCount++;
|
|
587 |
Document doc = createDocument(issue);
|
|
588 |
writer.addDocument(doc);
|
|
589 |
}
|
|
590 |
}
|
|
591 |
|
|
592 |
// commit all changes and reset the searcher
|
|
593 |
config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
|
|
594 |
config.save();
|
|
595 |
writer.commit();
|
8e9988
|
596 |
resetIndexSearcher(model.name);
|
d896e6
|
597 |
result.success();
|
JM |
598 |
} catch (Exception e) {
|
40ca5c
|
599 |
logger.error("Exception while reindexing " + model.name, e);
|
d896e6
|
600 |
}
|
JM |
601 |
return result;
|
|
602 |
}
|
|
603 |
|
|
604 |
/**
|
|
605 |
* Incrementally update the index with the specified commit for the
|
|
606 |
* repository.
|
|
607 |
*
|
|
608 |
* @param repositoryName
|
|
609 |
* @param repository
|
|
610 |
* @param branch
|
|
611 |
* the fully qualified branch name (e.g. refs/heads/master)
|
|
612 |
* @param commit
|
|
613 |
* @return true, if successful
|
|
614 |
*/
|
|
615 |
private IndexResult index(String repositoryName, Repository repository,
|
|
616 |
String branch, RevCommit commit) {
|
|
617 |
IndexResult result = new IndexResult();
|
|
618 |
try {
|
|
619 |
List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
|
|
620 |
String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
|
|
621 |
Resolution.MINUTE);
|
|
622 |
IndexWriter writer = getIndexWriter(repositoryName);
|
|
623 |
for (PathChangeModel path : changedPaths) {
|
|
624 |
// delete the indexed blob
|
856091
|
625 |
deleteBlob(repositoryName, branch, path.name);
|
d896e6
|
626 |
|
JM |
627 |
// re-index the blob
|
|
628 |
if (!ChangeType.DELETE.equals(path.changeType)) {
|
|
629 |
result.blobCount++;
|
|
630 |
Document doc = new Document();
|
|
631 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), Store.YES,
|
|
632 |
Index.NOT_ANALYZED));
|
|
633 |
doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
|
|
634 |
doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
|
|
635 |
doc.add(new Field(FIELD_PATH, path.path, Store.YES, Index.ANALYZED));
|
|
636 |
doc.add(new Field(FIELD_DATE, revDate, Store.YES, Index.NO));
|
|
637 |
doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
|
|
638 |
doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
|
|
639 |
|
|
640 |
// determine extension to compare to the extension
|
|
641 |
// blacklist
|
|
642 |
String ext = null;
|
|
643 |
String name = path.name.toLowerCase();
|
|
644 |
if (name.indexOf('.') > -1) {
|
|
645 |
ext = name.substring(name.lastIndexOf('.') + 1);
|
|
646 |
}
|
|
647 |
|
|
648 |
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
|
|
649 |
// read the blob content
|
|
650 |
String str = JGitUtils.getStringContent(repository, commit.getTree(),
|
|
651 |
path.path);
|
|
652 |
doc.add(new Field(FIELD_CONTENT, str, Store.YES, Index.ANALYZED));
|
|
653 |
writer.addDocument(doc);
|
|
654 |
}
|
|
655 |
}
|
|
656 |
}
|
|
657 |
writer.commit();
|
|
658 |
|
|
659 |
Document doc = createDocument(commit, null);
|
cdbbda
|
660 |
doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED));
|
d896e6
|
661 |
result.commitCount++;
|
JM |
662 |
result.success = index(repositoryName, doc);
|
|
663 |
} catch (Exception e) {
|
|
664 |
logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
|
|
665 |
}
|
|
666 |
return result;
|
|
667 |
}
|
|
668 |
|
|
669 |
/**
|
|
670 |
* Incrementally update the index with the specified issue for the
|
|
671 |
* repository.
|
|
672 |
*
|
|
673 |
* @param repositoryName
|
|
674 |
* @param issue
|
|
675 |
* @return true, if successful
|
|
676 |
*/
|
|
677 |
public boolean index(String repositoryName, IssueModel issue) {
|
|
678 |
try {
|
|
679 |
// delete the old issue from the index, if exists
|
|
680 |
deleteIssue(repositoryName, issue.id);
|
|
681 |
Document doc = createDocument(issue);
|
|
682 |
return index(repositoryName, doc);
|
|
683 |
} catch (Exception e) {
|
|
684 |
logger.error(MessageFormat.format("Error while indexing issue {0} in {1}", issue.id, repositoryName), e);
|
|
685 |
}
|
|
686 |
return false;
|
|
687 |
}
|
|
688 |
|
|
689 |
/**
|
|
690 |
* Delete an issue from the repository index.
|
|
691 |
*
|
|
692 |
* @param repositoryName
|
|
693 |
* @param issueId
|
|
694 |
* @throws Exception
|
|
695 |
*/
|
|
696 |
private void deleteIssue(String repositoryName, String issueId) throws Exception {
|
|
697 |
BooleanQuery query = new BooleanQuery();
|
|
698 |
Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
|
|
699 |
query.add(new TermQuery(objectTerm), Occur.MUST);
|
|
700 |
Term issueidTerm = new Term(FIELD_ISSUE, issueId);
|
|
701 |
query.add(new TermQuery(issueidTerm), Occur.MUST);
|
|
702 |
|
|
703 |
IndexWriter writer = getIndexWriter(repositoryName);
|
|
704 |
writer.deleteDocuments(query);
|
|
705 |
writer.commit();
|
|
706 |
}
|
|
707 |
|
|
708 |
/**
|
|
709 |
* Delete a blob from the specified branch of the repository index.
|
|
710 |
*
|
|
711 |
* @param repositoryName
|
|
712 |
* @param branch
|
|
713 |
* @param path
|
|
714 |
* @throws Exception
|
|
715 |
*/
|
|
716 |
private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
|
|
717 |
BooleanQuery query = new BooleanQuery();
|
|
718 |
Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
|
|
719 |
query.add(new TermQuery(objectTerm), Occur.MUST);
|
|
720 |
Term branchTerm = new Term(FIELD_BRANCH, branch);
|
|
721 |
query.add(new TermQuery(branchTerm), Occur.MUST);
|
|
722 |
Term pathTerm = new Term(FIELD_PATH, path);
|
|
723 |
query.add(new TermQuery(pathTerm), Occur.MUST);
|
|
724 |
|
|
725 |
IndexWriter writer = getIndexWriter(repositoryName);
|
|
726 |
writer.deleteDocuments(query);
|
|
727 |
writer.commit();
|
|
728 |
}
|
|
729 |
|
|
730 |
/**
|
|
731 |
* Updates a repository index incrementally from the last indexed commits.
|
|
732 |
*
|
40ca5c
|
733 |
* @param model
|
d896e6
|
734 |
* @param repository
|
JM |
735 |
* @return IndexResult
|
|
736 |
*/
|
40ca5c
|
737 |
protected IndexResult updateIndex(RepositoryModel model, Repository repository) {
|
d896e6
|
738 |
IndexResult result = new IndexResult();
|
JM |
739 |
try {
|
|
740 |
FileBasedConfig config = getConfig(repository);
|
|
741 |
config.load();
|
|
742 |
|
|
743 |
// build a quick lookup of annotated tags
|
|
744 |
Map<String, List<String>> tags = new HashMap<String, List<String>>();
|
|
745 |
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
|
|
746 |
if (!tag.isAnnotatedTag()) {
|
|
747 |
// skip non-annotated tags
|
|
748 |
continue;
|
|
749 |
}
|
|
750 |
if (!tags.containsKey(tag.getObjectId())) {
|
|
751 |
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
|
|
752 |
}
|
|
753 |
tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
|
|
754 |
}
|
|
755 |
|
|
756 |
// detect branch deletion
|
|
757 |
// first assume all branches are deleted and then remove each
|
|
758 |
// existing branch from deletedBranches during indexing
|
|
759 |
Set<String> deletedBranches = new TreeSet<String>();
|
|
760 |
for (String alias : config.getNames(CONF_ALIAS)) {
|
|
761 |
String branch = config.getString(CONF_ALIAS, null, alias);
|
|
762 |
deletedBranches.add(branch);
|
|
763 |
}
|
|
764 |
|
|
765 |
// walk through each branches
|
|
766 |
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
|
|
767 |
for (RefModel branch : branches) {
|
|
768 |
String branchName = branch.getName();
|
|
769 |
|
40ca5c
|
770 |
// determine if we should skip this branch
|
JM |
771 |
if (!IssueUtils.GB_ISSUES.equals(branch)
|
|
772 |
&& !model.indexedBranches.contains(branch.getName())) {
|
|
773 |
continue;
|
|
774 |
}
|
|
775 |
|
d896e6
|
776 |
// remove this branch from the deletedBranches set
|
JM |
777 |
deletedBranches.remove(branchName);
|
|
778 |
|
|
779 |
// determine last commit
|
|
780 |
String keyName = getBranchKey(branchName);
|
|
781 |
String lastCommit = config.getString(CONF_BRANCH, null, keyName);
|
|
782 |
|
|
783 |
List<RevCommit> revs;
|
|
784 |
if (StringUtils.isEmpty(lastCommit)) {
|
|
785 |
// new branch/unindexed branch, get all commits on branch
|
|
786 |
revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
|
|
787 |
} else {
|
|
788 |
// pre-existing branch, get changes since last commit
|
|
789 |
revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
|
|
790 |
}
|
|
791 |
|
|
792 |
if (revs.size() > 0) {
|
|
793 |
result.branchCount += 1;
|
|
794 |
}
|
|
795 |
|
40ca5c
|
796 |
// track the issue ids that we have already indexed
|
JM |
797 |
Set<String> indexedIssues = new TreeSet<String>();
|
|
798 |
|
d896e6
|
799 |
// reverse the list of commits so we start with the first commit
|
JM |
800 |
Collections.reverse(revs);
|
40ca5c
|
801 |
for (RevCommit commit : revs) {
|
JM |
802 |
if (IssueUtils.GB_ISSUES.equals(branch)) {
|
|
803 |
// only index an issue once during updateIndex
|
|
804 |
String issueId = commit.getShortMessage().substring(2).trim();
|
|
805 |
if (indexedIssues.contains(issueId)) {
|
|
806 |
continue;
|
|
807 |
}
|
|
808 |
indexedIssues.add(issueId);
|
|
809 |
|
|
810 |
IssueModel issue = IssueUtils.getIssue(repository, issueId);
|
|
811 |
if (issue == null) {
|
|
812 |
// issue was deleted, remove from index
|
|
813 |
deleteIssue(model.name, issueId);
|
|
814 |
} else {
|
|
815 |
// issue was updated
|
|
816 |
index(model.name, issue);
|
|
817 |
result.issueCount++;
|
|
818 |
}
|
|
819 |
} else {
|
|
820 |
// index a commit
|
|
821 |
result.add(index(model.name, repository, branchName, commit));
|
|
822 |
}
|
d896e6
|
823 |
}
|
JM |
824 |
|
|
825 |
// update the config
|
|
826 |
config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
|
|
827 |
config.setString(CONF_ALIAS, null, keyName, branchName);
|
|
828 |
config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
|
|
829 |
config.save();
|
|
830 |
}
|
|
831 |
|
|
832 |
// the deletedBranches set will normally be empty by this point
|
|
833 |
// unless a branch really was deleted and no longer exists
|
|
834 |
if (deletedBranches.size() > 0) {
|
|
835 |
for (String branch : deletedBranches) {
|
40ca5c
|
836 |
IndexWriter writer = getIndexWriter(model.name);
|
d896e6
|
837 |
writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
|
JM |
838 |
writer.commit();
|
|
839 |
}
|
|
840 |
}
|
|
841 |
result.success = true;
|
|
842 |
} catch (Throwable t) {
|
40ca5c
|
843 |
logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
|
d896e6
|
844 |
}
|
JM |
845 |
return result;
|
|
846 |
}
|
905d31
|
847 |
|
d896e6
|
848 |
/**
|
JM |
849 |
* Creates a Lucene document from an issue.
|
|
850 |
*
|
|
851 |
* @param issue
|
|
852 |
* @return a Lucene document
|
|
853 |
*/
|
|
854 |
private Document createDocument(IssueModel issue) {
|
|
855 |
Document doc = new Document();
|
|
856 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.issue.name(), Store.YES,
|
|
857 |
Field.Index.NOT_ANALYZED));
|
|
858 |
doc.add(new Field(FIELD_ISSUE, issue.id, Store.YES, Index.ANALYZED));
|
|
859 |
doc.add(new Field(FIELD_BRANCH, IssueUtils.GB_ISSUES, Store.YES, Index.ANALYZED));
|
|
860 |
doc.add(new Field(FIELD_DATE, DateTools.dateToString(issue.created, Resolution.MINUTE),
|
|
861 |
Store.YES, Field.Index.NO));
|
|
862 |
doc.add(new Field(FIELD_AUTHOR, issue.reporter, Store.YES, Index.ANALYZED));
|
|
863 |
List<String> attachments = new ArrayList<String>();
|
|
864 |
for (Attachment attachment : issue.getAttachments()) {
|
|
865 |
attachments.add(attachment.name.toLowerCase());
|
|
866 |
}
|
|
867 |
doc.add(new Field(FIELD_ATTACHMENT, StringUtils.flattenStrings(attachments), Store.YES,
|
|
868 |
Index.ANALYZED));
|
|
869 |
doc.add(new Field(FIELD_SUMMARY, issue.summary, Store.YES, Index.ANALYZED));
|
|
870 |
doc.add(new Field(FIELD_CONTENT, issue.toString(), Store.YES, Index.ANALYZED));
|
|
871 |
doc.add(new Field(FIELD_LABEL, StringUtils.flattenStrings(issue.getLabels()), Store.YES,
|
|
872 |
Index.ANALYZED));
|
|
873 |
return doc;
|
|
874 |
}
|
|
875 |
|
|
876 |
/**
|
|
877 |
* Creates a Lucene document for a commit
|
|
878 |
*
|
|
879 |
* @param commit
|
|
880 |
* @param tags
|
|
881 |
* @return a Lucene document
|
|
882 |
*/
|
|
883 |
private Document createDocument(RevCommit commit, List<String> tags) {
|
|
884 |
Document doc = new Document();
|
|
885 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), Store.YES,
|
|
886 |
Index.NOT_ANALYZED));
|
|
887 |
doc.add(new Field(FIELD_COMMIT, commit.getName(), Store.YES, Index.ANALYZED));
|
|
888 |
doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
|
|
889 |
Resolution.MINUTE), Store.YES, Index.NO));
|
|
890 |
doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), Store.YES, Index.ANALYZED));
|
|
891 |
doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), Store.YES, Index.ANALYZED));
|
|
892 |
doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), Store.YES, Index.ANALYZED));
|
|
893 |
doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), Store.YES, Index.ANALYZED));
|
|
894 |
if (!ArrayUtils.isEmpty(tags)) {
|
|
895 |
doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), Store.YES, Index.ANALYZED));
|
|
896 |
}
|
|
897 |
return doc;
|
|
898 |
}
|
|
899 |
|
|
900 |
/**
|
|
901 |
* Incrementally index an object for the repository.
|
|
902 |
*
|
|
903 |
* @param repositoryName
|
|
904 |
* @param doc
|
|
905 |
* @return true, if successful
|
|
906 |
*/
|
|
907 |
private boolean index(String repositoryName, Document doc) {
|
|
908 |
try {
|
|
909 |
IndexWriter writer = getIndexWriter(repositoryName);
|
|
910 |
writer.addDocument(doc);
|
|
911 |
writer.commit();
|
8e9988
|
912 |
resetIndexSearcher(repositoryName);
|
d896e6
|
913 |
return true;
|
JM |
914 |
} catch (Exception e) {
|
|
915 |
logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
|
|
916 |
}
|
|
917 |
return false;
|
|
918 |
}
|
|
919 |
|
d04009
|
920 |
private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
|
d896e6
|
921 |
SearchResult result = new SearchResult();
|
d04009
|
922 |
result.hitId = hitId;
|
JM |
923 |
result.totalHits = totalHits;
|
d896e6
|
924 |
result.score = score;
|
JM |
925 |
result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
|
|
926 |
result.summary = doc.get(FIELD_SUMMARY);
|
|
927 |
result.author = doc.get(FIELD_AUTHOR);
|
|
928 |
result.committer = doc.get(FIELD_COMMITTER);
|
|
929 |
result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
|
|
930 |
result.branch = doc.get(FIELD_BRANCH);
|
|
931 |
result.commitId = doc.get(FIELD_COMMIT);
|
|
932 |
result.issueId = doc.get(FIELD_ISSUE);
|
|
933 |
result.path = doc.get(FIELD_PATH);
|
|
934 |
if (doc.get(FIELD_TAG) != null) {
|
|
935 |
result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
|
|
936 |
}
|
|
937 |
if (doc.get(FIELD_LABEL) != null) {
|
|
938 |
result.labels = StringUtils.getStringsFromValue(doc.get(FIELD_LABEL));
|
|
939 |
}
|
|
940 |
return result;
|
|
941 |
}
|
|
942 |
|
|
943 |
private synchronized void resetIndexSearcher(String repository) throws IOException {
|
|
944 |
IndexSearcher searcher = searchers.remove(repository);
|
|
945 |
if (searcher != null) {
|
8e9988
|
946 |
searcher.getIndexReader().close();
|
d896e6
|
947 |
}
|
JM |
948 |
}
|
|
949 |
|
|
950 |
/**
|
|
951 |
* Gets an index searcher for the repository.
|
|
952 |
*
|
|
953 |
* @param repository
|
|
954 |
* @return
|
|
955 |
* @throws IOException
|
|
956 |
*/
|
|
957 |
private IndexSearcher getIndexSearcher(String repository) throws IOException {
|
|
958 |
IndexSearcher searcher = searchers.get(repository);
|
|
959 |
if (searcher == null) {
|
|
960 |
IndexWriter writer = getIndexWriter(repository);
|
|
961 |
searcher = new IndexSearcher(IndexReader.open(writer, true));
|
|
962 |
searchers.put(repository, searcher);
|
|
963 |
}
|
|
964 |
return searcher;
|
|
965 |
}
|
|
966 |
|
|
967 |
/**
|
|
968 |
* Gets an index writer for the repository. The index will be created if it
|
|
969 |
* does not already exist or if forceCreate is specified.
|
|
970 |
*
|
|
971 |
* @param repository
|
|
972 |
* @return an IndexWriter
|
|
973 |
* @throws IOException
|
|
974 |
*/
|
|
975 |
private IndexWriter getIndexWriter(String repository) throws IOException {
|
|
976 |
IndexWriter indexWriter = writers.get(repository);
|
|
977 |
File repositoryFolder = new File(repositoriesFolder, repository);
|
|
978 |
File indexFolder = new File(repositoryFolder, LUCENE_DIR);
|
|
979 |
Directory directory = FSDirectory.open(indexFolder);
|
|
980 |
|
|
981 |
if (indexWriter == null) {
|
|
982 |
if (!indexFolder.exists()) {
|
|
983 |
indexFolder.mkdirs();
|
|
984 |
}
|
|
985 |
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
|
|
986 |
IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
|
|
987 |
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
|
988 |
indexWriter = new IndexWriter(directory, config);
|
|
989 |
writers.put(repository, indexWriter);
|
|
990 |
}
|
|
991 |
return indexWriter;
|
|
992 |
}
|
|
993 |
|
|
994 |
/**
|
|
995 |
* Searches the specified repositories for the given text or query
|
|
996 |
*
|
|
997 |
* @param text
|
|
998 |
* if the text is null or empty, null is returned
|
d04009
|
999 |
* @param page
|
JM |
1000 |
* the page number to retrieve. page is 1-indexed.
|
|
1001 |
* @param pageSize
|
|
1002 |
* the number of elements to return for this page
|
d896e6
|
1003 |
* @param repositories
|
JM |
1004 |
* a list of repositories to search. if no repositories are
|
|
1005 |
* specified null is returned.
|
|
1006 |
* @return a list of SearchResults in order from highest to the lowest score
|
|
1007 |
*
|
|
1008 |
*/
|
d04009
|
1009 |
public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
|
d896e6
|
1010 |
if (ArrayUtils.isEmpty(repositories)) {
|
JM |
1011 |
return null;
|
|
1012 |
}
|
d04009
|
1013 |
return search(text, page, pageSize, repositories.toArray(new String[0]));
|
d896e6
|
1014 |
}
|
JM |
1015 |
|
|
1016 |
/**
|
|
1017 |
* Searches the specified repositories for the given text or query
|
|
1018 |
*
|
|
1019 |
* @param text
|
|
1020 |
* if the text is null or empty, null is returned
|
d04009
|
1021 |
* @param page
|
JM |
1022 |
* the page number to retrieve. page is 1-indexed.
|
|
1023 |
* @param pageSize
|
|
1024 |
* the number of elements to return for this page
|
d896e6
|
1025 |
* @param repositories
|
JM |
1026 |
* a list of repositories to search. if no repositories are
|
|
1027 |
* specified null is returned.
|
|
1028 |
* @return a list of SearchResults in order from highest to the lowest score
|
|
1029 |
*
|
d04009
|
1030 |
*/
|
JM |
1031 |
public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
|
d896e6
|
1032 |
if (StringUtils.isEmpty(text)) {
|
JM |
1033 |
return null;
|
|
1034 |
}
|
|
1035 |
if (ArrayUtils.isEmpty(repositories)) {
|
|
1036 |
return null;
|
|
1037 |
}
|
|
1038 |
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
|
|
1039 |
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
|
|
1040 |
try {
|
|
1041 |
// default search checks summary and content
|
|
1042 |
BooleanQuery query = new BooleanQuery();
|
|
1043 |
QueryParser qp;
|
|
1044 |
qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
|
|
1045 |
qp.setAllowLeadingWildcard(true);
|
|
1046 |
query.add(qp.parse(text), Occur.SHOULD);
|
|
1047 |
|
|
1048 |
qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
|
|
1049 |
qp.setAllowLeadingWildcard(true);
|
|
1050 |
query.add(qp.parse(text), Occur.SHOULD);
|
|
1051 |
|
|
1052 |
IndexSearcher searcher;
|
|
1053 |
if (repositories.length == 1) {
|
|
1054 |
// single repository search
|
|
1055 |
searcher = getIndexSearcher(repositories[0]);
|
|
1056 |
} else {
|
|
1057 |
// multiple repository search
|
|
1058 |
List<IndexReader> readers = new ArrayList<IndexReader>();
|
|
1059 |
for (String repository : repositories) {
|
|
1060 |
IndexSearcher repositoryIndex = getIndexSearcher(repository);
|
|
1061 |
readers.add(repositoryIndex.getIndexReader());
|
|
1062 |
}
|
|
1063 |
IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
|
905d31
|
1064 |
MultiSourceReader reader = new MultiSourceReader(rdrs);
|
d896e6
|
1065 |
searcher = new IndexSearcher(reader);
|
JM |
1066 |
}
|
|
1067 |
Query rewrittenQuery = searcher.rewrite(query);
|
d04009
|
1068 |
TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
|
d896e6
|
1069 |
searcher.search(rewrittenQuery, collector);
|
d04009
|
1070 |
int offset = Math.max(0, (page - 1) * pageSize);
|
JM |
1071 |
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
|
|
1072 |
int totalHits = collector.getTotalHits();
|
d896e6
|
1073 |
for (int i = 0; i < hits.length; i++) {
|
JM |
1074 |
int docId = hits[i].doc;
|
|
1075 |
Document doc = searcher.doc(docId);
|
d04009
|
1076 |
SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
|
905d31
|
1077 |
if (repositories.length == 1) {
|
JM |
1078 |
// single repository search
|
|
1079 |
result.repository = repositories[0];
|
|
1080 |
} else {
|
|
1081 |
// multi-repository search
|
|
1082 |
MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
|
|
1083 |
int index = reader.getSourceIndex(docId);
|
|
1084 |
result.repository = repositories[index];
|
|
1085 |
}
|
d896e6
|
1086 |
String content = doc.get(FIELD_CONTENT);
|
JM |
1087 |
result.fragment = getHighlightedFragment(analyzer, query, content, result);
|
|
1088 |
results.add(result);
|
|
1089 |
}
|
|
1090 |
} catch (Exception e) {
|
|
1091 |
logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
|
|
1092 |
}
|
|
1093 |
return new ArrayList<SearchResult>(results);
|
|
1094 |
}
|
|
1095 |
|
|
1096 |
/**
|
|
1097 |
*
|
|
1098 |
* @param analyzer
|
|
1099 |
* @param query
|
|
1100 |
* @param content
|
|
1101 |
* @param result
|
|
1102 |
* @return
|
|
1103 |
* @throws IOException
|
|
1104 |
* @throws InvalidTokenOffsetsException
|
|
1105 |
*/
|
|
1106 |
private String getHighlightedFragment(Analyzer analyzer, Query query,
|
|
1107 |
String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
|
|
1108 |
content = content == null ? "":StringUtils.escapeForHtml(content, false);
|
|
1109 |
|
|
1110 |
QueryScorer scorer = new QueryScorer(query, "content");
|
|
1111 |
Fragmenter fragmenter;
|
|
1112 |
|
|
1113 |
// TODO improve the fragmenter - hopefully on line breaks
|
|
1114 |
if (SearchObjectType.commit == result.type) {
|
|
1115 |
fragmenter = new SimpleSpanFragmenter(scorer, 1024);
|
|
1116 |
} else {
|
|
1117 |
fragmenter = new SimpleSpanFragmenter(scorer, 150);
|
|
1118 |
}
|
|
1119 |
|
|
1120 |
// use an artificial delimiter for the token
|
|
1121 |
String termTag = "<!--[";
|
|
1122 |
String termTagEnd = "]-->";
|
|
1123 |
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
|
|
1124 |
Highlighter highlighter = new Highlighter(formatter, scorer);
|
|
1125 |
highlighter.setTextFragmenter(fragmenter);
|
|
1126 |
|
73fba6
|
1127 |
String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
|
d896e6
|
1128 |
if (ArrayUtils.isEmpty(fragments)) {
|
JM |
1129 |
if (SearchObjectType.blob == result.type) {
|
|
1130 |
return "";
|
|
1131 |
}
|
|
1132 |
return "<pre class=\"text\">" + content + "</pre>";
|
|
1133 |
}
|
|
1134 |
StringBuilder sb = new StringBuilder();
|
|
1135 |
for (int i = 0, len = fragments.length; i < len; i++) {
|
|
1136 |
String fragment = fragments[i];
|
|
1137 |
|
|
1138 |
// resurrect the raw fragment from removing the artificial delimiters
|
|
1139 |
String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
|
|
1140 |
sb.append(getPreTag(result, raw, content));
|
|
1141 |
|
|
1142 |
// replace the artificial delimiter with html tags
|
|
1143 |
String html = fragment.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
|
|
1144 |
sb.append(html);
|
|
1145 |
sb.append("</pre>");
|
|
1146 |
if (i < len - 1) {
|
|
1147 |
sb.append("<span class=\"ellipses\">...</span><br/>");
|
|
1148 |
}
|
|
1149 |
}
|
|
1150 |
return sb.toString();
|
|
1151 |
}
|
|
1152 |
|
|
1153 |
/**
|
|
1154 |
* Returns the appropriate tag for a fragment. Commit messages are visually
|
|
1155 |
* differentiated from blob fragments.
|
|
1156 |
*
|
|
1157 |
* @param result
|
|
1158 |
* @param fragment
|
|
1159 |
* @param content
|
|
1160 |
* @return an html tag appropriate for the fragment
|
|
1161 |
*/
|
|
1162 |
private String getPreTag(SearchResult result, String fragment, String content) {
|
|
1163 |
String pre = "<pre class=\"text\">";
|
|
1164 |
if (SearchObjectType.blob == result.type) {
|
|
1165 |
int line = StringUtils.countLines(content.substring(0, content.indexOf(fragment)));
|
|
1166 |
int lastDot = result.path.lastIndexOf('.');
|
|
1167 |
if (lastDot > -1) {
|
|
1168 |
String ext = result.path.substring(lastDot + 1).toLowerCase();
|
|
1169 |
pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0} lang-{1}\">", line, ext);
|
|
1170 |
} else {
|
|
1171 |
pre = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}\">", line);
|
|
1172 |
}
|
|
1173 |
}
|
|
1174 |
return pre;
|
|
1175 |
}
|
|
1176 |
|
|
1177 |
/**
|
|
1178 |
* Simple class to track the results of an index update.
|
|
1179 |
*/
|
|
1180 |
private class IndexResult {
|
|
1181 |
long startTime = System.currentTimeMillis();
|
|
1182 |
long endTime = startTime;
|
|
1183 |
boolean success;
|
|
1184 |
int branchCount;
|
|
1185 |
int commitCount;
|
|
1186 |
int blobCount;
|
|
1187 |
int issueCount;
|
|
1188 |
|
|
1189 |
void add(IndexResult result) {
|
|
1190 |
this.branchCount += result.branchCount;
|
|
1191 |
this.commitCount += result.commitCount;
|
|
1192 |
this.blobCount += result.blobCount;
|
|
1193 |
this.issueCount += result.issueCount;
|
|
1194 |
}
|
|
1195 |
|
|
1196 |
void success() {
|
|
1197 |
success = true;
|
|
1198 |
endTime = System.currentTimeMillis();
|
|
1199 |
}
|
|
1200 |
|
|
1201 |
float duration() {
|
|
1202 |
return (endTime - startTime)/1000f;
|
|
1203 |
}
|
b938ae
|
1204 |
}
|
905d31
|
1205 |
|
JM |
1206 |
/**
|
|
1207 |
* Custom subclass of MultiReader to identify the source index for a given
|
|
1208 |
* doc id. This would not be necessary of there was a public method to
|
|
1209 |
* obtain this information.
|
|
1210 |
*
|
|
1211 |
*/
|
|
1212 |
private class MultiSourceReader extends MultiReader {
|
|
1213 |
|
|
1214 |
final Method method;
|
|
1215 |
|
|
1216 |
MultiSourceReader(IndexReader[] subReaders) {
|
|
1217 |
super(subReaders);
|
|
1218 |
Method m = null;
|
|
1219 |
try {
|
|
1220 |
m = MultiReader.class.getDeclaredMethod("readerIndex", int.class);
|
|
1221 |
m.setAccessible(true);
|
|
1222 |
} catch (Exception e) {
|
|
1223 |
logger.error("Error getting readerIndex method", e);
|
|
1224 |
}
|
|
1225 |
method = m;
|
|
1226 |
}
|
|
1227 |
|
|
1228 |
int getSourceIndex(int docId) {
|
|
1229 |
int index = -1;
|
|
1230 |
try {
|
|
1231 |
Object o = method.invoke(this, docId);
|
|
1232 |
index = (Integer) o;
|
|
1233 |
} catch (Exception e) {
|
|
1234 |
logger.error("Error getting source index", e);
|
|
1235 |
}
|
|
1236 |
return index;
|
|
1237 |
}
|
|
1238 |
}
|
e31da0
|
1239 |
}
|