commit | author | age
|
e31da0
|
1 |
/*
|
JM |
2 |
* Copyright 2012 gitblit.com.
|
|
3 |
*
|
|
4 |
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5 |
* you may not use this file except in compliance with the License.
|
|
6 |
* You may obtain a copy of the License at
|
|
7 |
*
|
|
8 |
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9 |
*
|
|
10 |
* Unless required by applicable law or agreed to in writing, software
|
|
11 |
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13 |
* See the License for the specific language governing permissions and
|
|
14 |
* limitations under the License.
|
|
15 |
*/
|
7bf6e1
|
16 |
package com.gitblit.service;
|
e31da0
|
17 |
|
d896e6
|
18 |
import static org.eclipse.jgit.treewalk.filter.TreeFilter.ANY_DIFF;
|
e31da0
|
19 |
|
d896e6
|
20 |
import java.io.ByteArrayOutputStream;
|
JM |
21 |
import java.io.File;
|
eecaad
|
22 |
import java.io.FileInputStream;
|
d896e6
|
23 |
import java.io.IOException;
|
JM |
24 |
import java.io.InputStream;
|
|
25 |
import java.text.MessageFormat;
|
|
26 |
import java.text.ParseException;
|
|
27 |
import java.util.ArrayList;
|
|
28 |
import java.util.Collections;
|
|
29 |
import java.util.Comparator;
|
|
30 |
import java.util.HashMap;
|
|
31 |
import java.util.LinkedHashSet;
|
|
32 |
import java.util.List;
|
|
33 |
import java.util.Map;
|
|
34 |
import java.util.Set;
|
|
35 |
import java.util.TreeMap;
|
|
36 |
import java.util.TreeSet;
|
|
37 |
import java.util.concurrent.ConcurrentHashMap;
|
|
38 |
|
|
39 |
import org.apache.lucene.analysis.Analyzer;
|
|
40 |
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
41 |
import org.apache.lucene.document.DateTools;
|
|
42 |
import org.apache.lucene.document.DateTools.Resolution;
|
|
43 |
import org.apache.lucene.document.Document;
|
|
44 |
import org.apache.lucene.document.Field;
|
db9832
|
45 |
import org.apache.lucene.document.StringField;
|
JM |
46 |
import org.apache.lucene.document.TextField;
|
|
47 |
import org.apache.lucene.index.DirectoryReader;
|
d896e6
|
48 |
import org.apache.lucene.index.IndexReader;
|
JM |
49 |
import org.apache.lucene.index.IndexWriter;
|
|
50 |
import org.apache.lucene.index.IndexWriterConfig;
|
|
51 |
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
52 |
import org.apache.lucene.index.MultiReader;
|
|
53 |
import org.apache.lucene.index.Term;
|
db9832
|
54 |
import org.apache.lucene.queryparser.classic.QueryParser;
|
d896e6
|
55 |
import org.apache.lucene.search.BooleanClause.Occur;
|
JM |
56 |
import org.apache.lucene.search.BooleanQuery;
|
|
57 |
import org.apache.lucene.search.IndexSearcher;
|
|
58 |
import org.apache.lucene.search.Query;
|
|
59 |
import org.apache.lucene.search.ScoreDoc;
|
|
60 |
import org.apache.lucene.search.TopScoreDocCollector;
|
|
61 |
import org.apache.lucene.search.highlight.Fragmenter;
|
|
62 |
import org.apache.lucene.search.highlight.Highlighter;
|
|
63 |
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
|
|
64 |
import org.apache.lucene.search.highlight.QueryScorer;
|
|
65 |
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
|
66 |
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
|
|
67 |
import org.apache.lucene.store.Directory;
|
|
68 |
import org.apache.lucene.store.FSDirectory;
|
|
69 |
import org.apache.lucene.util.Version;
|
eecaad
|
70 |
import org.apache.tika.metadata.Metadata;
|
PM |
71 |
import org.apache.tika.parser.AutoDetectParser;
|
|
72 |
import org.apache.tika.parser.ParseContext;
|
|
73 |
import org.apache.tika.parser.pdf.PDFParser;
|
|
74 |
import org.apache.tika.sax.BodyContentHandler;
|
d896e6
|
75 |
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
|
JM |
76 |
import org.eclipse.jgit.lib.Constants;
|
a02998
|
77 |
import org.eclipse.jgit.lib.FileMode;
|
d896e6
|
78 |
import org.eclipse.jgit.lib.ObjectId;
|
JM |
79 |
import org.eclipse.jgit.lib.ObjectLoader;
|
|
80 |
import org.eclipse.jgit.lib.ObjectReader;
|
e31da0
|
81 |
import org.eclipse.jgit.lib.Repository;
|
6ef2fc
|
82 |
import org.eclipse.jgit.lib.RepositoryCache.FileKey;
|
d896e6
|
83 |
import org.eclipse.jgit.revwalk.RevCommit;
|
JM |
84 |
import org.eclipse.jgit.revwalk.RevTree;
|
|
85 |
import org.eclipse.jgit.revwalk.RevWalk;
|
|
86 |
import org.eclipse.jgit.storage.file.FileBasedConfig;
|
|
87 |
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
|
|
88 |
import org.eclipse.jgit.treewalk.TreeWalk;
|
|
89 |
import org.eclipse.jgit.util.FS;
|
e31da0
|
90 |
import org.slf4j.Logger;
|
JM |
91 |
import org.slf4j.LoggerFactory;
|
|
92 |
|
d896e6
|
93 |
import com.gitblit.Constants.SearchObjectType;
|
eecaad
|
94 |
import com.gitblit.GitBlit;
|
7bf6e1
|
95 |
import com.gitblit.IStoredSettings;
|
JM |
96 |
import com.gitblit.Keys;
|
eecaad
|
97 |
import com.gitblit.manager.FilestoreManager;
|
PM |
98 |
import com.gitblit.manager.IFilestoreManager;
|
db4f6b
|
99 |
import com.gitblit.manager.IRepositoryManager;
|
d896e6
|
100 |
import com.gitblit.models.PathModel.PathChangeModel;
|
JM |
101 |
import com.gitblit.models.RefModel;
|
40ca5c
|
102 |
import com.gitblit.models.RepositoryModel;
|
d896e6
|
103 |
import com.gitblit.models.SearchResult;
|
JM |
104 |
import com.gitblit.utils.ArrayUtils;
|
e31da0
|
105 |
import com.gitblit.utils.JGitUtils;
|
d896e6
|
106 |
import com.gitblit.utils.StringUtils;
|
e31da0
|
107 |
|
JM |
108 |
/**
|
7bf6e1
|
109 |
* The Lucene service handles indexing and searching repositories.
|
699e71
|
110 |
*
|
e31da0
|
111 |
* @author James Moger
|
699e71
|
112 |
*
|
e31da0
|
113 |
*/
|
7bf6e1
|
114 |
public class LuceneService implements Runnable {
|
699e71
|
115 |
|
JM |
116 |
|
3a4470
|
117 |
private static final int INDEX_VERSION = 6;
|
e31da0
|
118 |
|
d896e6
|
119 |
private static final String FIELD_OBJECT_TYPE = "type";
|
JM |
120 |
private static final String FIELD_PATH = "path";
|
|
121 |
private static final String FIELD_COMMIT = "commit";
|
|
122 |
private static final String FIELD_BRANCH = "branch";
|
|
123 |
private static final String FIELD_SUMMARY = "summary";
|
|
124 |
private static final String FIELD_CONTENT = "content";
|
|
125 |
private static final String FIELD_AUTHOR = "author";
|
|
126 |
private static final String FIELD_COMMITTER = "committer";
|
|
127 |
private static final String FIELD_DATE = "date";
|
|
128 |
private static final String FIELD_TAG = "tag";
|
|
129 |
|
|
130 |
private static final String CONF_FILE = "lucene.conf";
|
|
131 |
private static final String LUCENE_DIR = "lucene";
|
|
132 |
private static final String CONF_INDEX = "index";
|
|
133 |
private static final String CONF_VERSION = "version";
|
|
134 |
private static final String CONF_ALIAS = "aliases";
|
|
135 |
private static final String CONF_BRANCH = "branches";
|
699e71
|
136 |
|
3a4470
|
137 |
private static final Version LUCENE_VERSION = Version.LUCENE_4_10_0;
|
699e71
|
138 |
|
7bf6e1
|
139 |
private final Logger logger = LoggerFactory.getLogger(LuceneService.class);
|
699e71
|
140 |
|
d896e6
|
141 |
private final IStoredSettings storedSettings;
|
cacf8b
|
142 |
private final IRepositoryManager repositoryManager;
|
eecaad
|
143 |
private final IFilestoreManager filestoreManager;
|
PM |
144 |
|
d896e6
|
145 |
private final File repositoriesFolder;
|
699e71
|
146 |
|
d896e6
|
147 |
private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
|
JM |
148 |
private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
|
699e71
|
149 |
|
f1d2ad
|
150 |
private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
|
JM |
151 |
private Set<String> excludedExtensions;
|
699e71
|
152 |
|
7bf6e1
|
153 |
public LuceneService(
|
cacf8b
|
154 |
IStoredSettings settings,
|
eecaad
|
155 |
IRepositoryManager repositoryManager,
|
PM |
156 |
IFilestoreManager filestoreManager) {
|
cacf8b
|
157 |
|
d896e6
|
158 |
this.storedSettings = settings;
|
cacf8b
|
159 |
this.repositoryManager = repositoryManager;
|
eecaad
|
160 |
this.filestoreManager = filestoreManager;
|
cacf8b
|
161 |
this.repositoriesFolder = repositoryManager.getRepositoriesFolder();
|
462488
|
162 |
String exts = luceneIgnoreExtensions;
|
JM |
163 |
if (settings != null) {
|
|
164 |
exts = settings.getString(Keys.web.luceneIgnoreExtensions, exts);
|
|
165 |
}
|
|
166 |
excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
|
e31da0
|
167 |
}
|
JM |
168 |
|
|
169 |
/**
|
699e71
|
170 |
* Run is executed by the Gitblit executor service. Because this is called
|
273cb9
|
171 |
* by an executor service, calls will queue - i.e. there can never be
|
JM |
172 |
* concurrent execution of repository index updates.
|
e31da0
|
173 |
*/
|
JM |
174 |
@Override
|
|
175 |
public void run() {
|
7db092
|
176 |
if (!storedSettings.getBoolean(Keys.web.allowLuceneIndexing, true)) {
|
JM |
177 |
// Lucene indexing is disabled
|
|
178 |
return;
|
|
179 |
}
|
f1d2ad
|
180 |
// reload the excluded extensions
|
JM |
181 |
String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
|
|
182 |
excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
|
|
183 |
|
db4f6b
|
184 |
if (repositoryManager.isCollectingGarbage()) {
|
dad8b4
|
185 |
// busy collecting garbage, try again later
|
JM |
186 |
return;
|
|
187 |
}
|
699e71
|
188 |
|
db4f6b
|
189 |
for (String repositoryName: repositoryManager.getRepositoryList()) {
|
JM |
190 |
RepositoryModel model = repositoryManager.getRepositoryModel(repositoryName);
|
40ca5c
|
191 |
if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {
|
db4f6b
|
192 |
Repository repository = repositoryManager.getRepository(model.name);
|
e92c6d
|
193 |
if (repository == null) {
|
db4f6b
|
194 |
if (repositoryManager.isCollectingGarbage(model.name)) {
|
e92c6d
|
195 |
logger.info(MessageFormat.format("Skipping Lucene index of {0}, busy garbage collecting", repositoryName));
|
JM |
196 |
}
|
|
197 |
continue;
|
|
198 |
}
|
699e71
|
199 |
index(model, repository);
|
40ca5c
|
200 |
repository.close();
|
JM |
201 |
System.gc();
|
e31da0
|
202 |
}
|
JM |
203 |
}
|
|
204 |
}
|
|
205 |
|
|
206 |
/**
|
|
207 |
* Synchronously indexes a repository. This may build a complete index of a
|
|
208 |
* repository or it may update an existing index.
|
699e71
|
209 |
*
|
3ad13e
|
210 |
* @param displayName
|
e31da0
|
211 |
* the name of the repository
|
JM |
212 |
* @param repository
|
|
213 |
* the repository object
|
|
214 |
*/
|
9f6ef3
|
215 |
private void index(RepositoryModel model, Repository repository) {
|
e31da0
|
216 |
try {
|
40ca5c
|
217 |
if (shouldReindex(repository)) {
|
JM |
218 |
// (re)build the entire index
|
|
219 |
IndexResult result = reindex(model, repository);
|
|
220 |
|
|
221 |
if (result.success) {
|
|
222 |
if (result.commitCount > 0) {
|
|
223 |
String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs";
|
|
224 |
logger.info(MessageFormat.format(msg, model.name, result.commitCount,
|
|
225 |
result.blobCount, result.branchCount, result.duration()));
|
e31da0
|
226 |
}
|
JM |
227 |
} else {
|
40ca5c
|
228 |
String msg = "Could not build {0} Lucene index!";
|
JM |
229 |
logger.error(MessageFormat.format(msg, model.name));
|
e31da0
|
230 |
}
|
JM |
231 |
} else {
|
40ca5c
|
232 |
// update the index with latest commits
|
JM |
233 |
IndexResult result = updateIndex(model, repository);
|
|
234 |
if (result.success) {
|
|
235 |
if (result.commitCount > 0) {
|
|
236 |
String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs";
|
|
237 |
logger.info(MessageFormat.format(msg, model.name, result.commitCount,
|
|
238 |
result.blobCount, result.branchCount, result.duration()));
|
|
239 |
}
|
|
240 |
} else {
|
|
241 |
String msg = "Could not update {0} Lucene index!";
|
|
242 |
logger.error(MessageFormat.format(msg, model.name));
|
|
243 |
}
|
e31da0
|
244 |
}
|
JM |
245 |
} catch (Throwable t) {
|
40ca5c
|
246 |
logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t);
|
e31da0
|
247 |
}
|
JM |
248 |
}
|
699e71
|
249 |
|
e6637c
|
250 |
/**
|
JM |
251 |
* Close the writer/searcher objects for a repository.
|
699e71
|
252 |
*
|
e6637c
|
253 |
* @param repositoryName
|
JM |
254 |
*/
|
8e9988
|
255 |
public synchronized void close(String repositoryName) {
|
JM |
256 |
try {
|
|
257 |
IndexSearcher searcher = searchers.remove(repositoryName);
|
|
258 |
if (searcher != null) {
|
|
259 |
searcher.getIndexReader().close();
|
|
260 |
}
|
|
261 |
} catch (Exception e) {
|
|
262 |
logger.error("Failed to close index searcher for " + repositoryName, e);
|
|
263 |
}
|
699e71
|
264 |
|
e6637c
|
265 |
try {
|
JM |
266 |
IndexWriter writer = writers.remove(repositoryName);
|
|
267 |
if (writer != null) {
|
|
268 |
writer.close();
|
|
269 |
}
|
|
270 |
} catch (Exception e) {
|
|
271 |
logger.error("Failed to close index writer for " + repositoryName, e);
|
699e71
|
272 |
}
|
e6637c
|
273 |
}
|
b938ae
|
274 |
|
JM |
275 |
/**
|
|
276 |
* Close all Lucene indexers.
|
699e71
|
277 |
*
|
b938ae
|
278 |
*/
|
8e9988
|
279 |
public synchronized void close() {
|
d896e6
|
280 |
// close all writers
|
JM |
281 |
for (String writer : writers.keySet()) {
|
|
282 |
try {
|
60110f
|
283 |
writers.get(writer).close(true);
|
d896e6
|
284 |
} catch (Throwable t) {
|
JM |
285 |
logger.error("Failed to close Lucene writer for " + writer, t);
|
|
286 |
}
|
|
287 |
}
|
|
288 |
writers.clear();
|
|
289 |
|
|
290 |
// close all searchers
|
|
291 |
for (String searcher : searchers.keySet()) {
|
|
292 |
try {
|
8e9988
|
293 |
searchers.get(searcher).getIndexReader().close();
|
d896e6
|
294 |
} catch (Throwable t) {
|
JM |
295 |
logger.error("Failed to close Lucene searcher for " + searcher, t);
|
|
296 |
}
|
|
297 |
}
|
|
298 |
searchers.clear();
|
|
299 |
}
|
|
300 |
|
699e71
|
301 |
|
d896e6
|
302 |
/**
|
JM |
303 |
* Deletes the Lucene index for the specified repository.
|
699e71
|
304 |
*
|
d896e6
|
305 |
* @param repositoryName
|
JM |
306 |
* @return true, if successful
|
|
307 |
*/
|
|
308 |
public boolean deleteIndex(String repositoryName) {
|
|
309 |
try {
|
8e9988
|
310 |
// close any open writer/searcher
|
JM |
311 |
close(repositoryName);
|
|
312 |
|
d896e6
|
313 |
// delete the index folder
|
eb741a
|
314 |
File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repositoryName), FS.DETECTED);
|
d896e6
|
315 |
File luceneIndex = new File(repositoryFolder, LUCENE_DIR);
|
JM |
316 |
if (luceneIndex.exists()) {
|
|
317 |
org.eclipse.jgit.util.FileUtils.delete(luceneIndex,
|
|
318 |
org.eclipse.jgit.util.FileUtils.RECURSIVE);
|
|
319 |
}
|
|
320 |
// delete the config file
|
|
321 |
File luceneConfig = new File(repositoryFolder, CONF_FILE);
|
|
322 |
if (luceneConfig.exists()) {
|
|
323 |
luceneConfig.delete();
|
|
324 |
}
|
|
325 |
return true;
|
|
326 |
} catch (IOException e) {
|
|
327 |
throw new RuntimeException(e);
|
|
328 |
}
|
|
329 |
}
|
699e71
|
330 |
|
d896e6
|
331 |
/**
|
JM |
332 |
* Returns the author for the commit, if this information is available.
|
699e71
|
333 |
*
|
d896e6
|
334 |
* @param commit
|
JM |
335 |
* @return an author or unknown
|
|
336 |
*/
|
|
337 |
private String getAuthor(RevCommit commit) {
|
|
338 |
String name = "unknown";
|
|
339 |
try {
|
|
340 |
name = commit.getAuthorIdent().getName();
|
|
341 |
if (StringUtils.isEmpty(name)) {
|
|
342 |
name = commit.getAuthorIdent().getEmailAddress();
|
|
343 |
}
|
699e71
|
344 |
} catch (NullPointerException n) {
|
d896e6
|
345 |
}
|
JM |
346 |
return name;
|
|
347 |
}
|
699e71
|
348 |
|
d896e6
|
349 |
/**
|
JM |
350 |
* Returns the committer for the commit, if this information is available.
|
699e71
|
351 |
*
|
d896e6
|
352 |
* @param commit
|
JM |
353 |
* @return an committer or unknown
|
|
354 |
*/
|
|
355 |
private String getCommitter(RevCommit commit) {
|
|
356 |
String name = "unknown";
|
|
357 |
try {
|
|
358 |
name = commit.getCommitterIdent().getName();
|
|
359 |
if (StringUtils.isEmpty(name)) {
|
|
360 |
name = commit.getCommitterIdent().getEmailAddress();
|
|
361 |
}
|
699e71
|
362 |
} catch (NullPointerException n) {
|
d896e6
|
363 |
}
|
JM |
364 |
return name;
|
|
365 |
}
|
699e71
|
366 |
|
905d31
|
367 |
/**
|
JM |
368 |
* Get the tree associated with the given commit.
|
|
369 |
*
|
|
370 |
* @param walk
|
|
371 |
* @param commit
|
|
372 |
* @return tree
|
|
373 |
* @throws IOException
|
|
374 |
*/
|
9f6ef3
|
375 |
private RevTree getTree(final RevWalk walk, final RevCommit commit)
|
905d31
|
376 |
throws IOException {
|
JM |
377 |
final RevTree tree = commit.getTree();
|
|
378 |
if (tree != null) {
|
|
379 |
return tree;
|
|
380 |
}
|
|
381 |
walk.parseHeaders(commit);
|
|
382 |
return commit.getTree();
|
|
383 |
}
|
d896e6
|
384 |
|
JM |
385 |
/**
|
|
386 |
* Construct a keyname from the branch.
|
699e71
|
387 |
*
|
d896e6
|
388 |
* @param branchName
|
JM |
389 |
* @return a keyname appropriate for the Git config file format
|
|
390 |
*/
|
|
391 |
private String getBranchKey(String branchName) {
|
|
392 |
return StringUtils.getSHA1(branchName);
|
|
393 |
}
|
|
394 |
|
|
395 |
/**
|
|
396 |
* Returns the Lucene configuration for the specified repository.
|
699e71
|
397 |
*
|
d896e6
|
398 |
* @param repository
|
JM |
399 |
* @return a config object
|
|
400 |
*/
|
|
401 |
private FileBasedConfig getConfig(Repository repository) {
|
|
402 |
File file = new File(repository.getDirectory(), CONF_FILE);
|
|
403 |
FileBasedConfig config = new FileBasedConfig(file, FS.detect());
|
|
404 |
return config;
|
|
405 |
}
|
|
406 |
|
|
407 |
/**
|
|
408 |
* Reads the Lucene config file for the repository to check the index
|
|
409 |
* version. If the index version is different, then rebuild the repository
|
|
410 |
* index.
|
699e71
|
411 |
*
|
d896e6
|
412 |
* @param repository
|
JM |
413 |
* @return true of the on-disk index format is different than INDEX_VERSION
|
|
414 |
*/
|
9f6ef3
|
415 |
private boolean shouldReindex(Repository repository) {
|
d896e6
|
416 |
try {
|
JM |
417 |
FileBasedConfig config = getConfig(repository);
|
|
418 |
config.load();
|
|
419 |
int indexVersion = config.getInt(CONF_INDEX, CONF_VERSION, 0);
|
|
420 |
// reindex if versions do not match
|
|
421 |
return indexVersion != INDEX_VERSION;
|
|
422 |
} catch (Throwable t) {
|
|
423 |
}
|
|
424 |
return true;
|
|
425 |
}
|
|
426 |
|
|
427 |
|
|
428 |
/**
|
|
429 |
* This completely indexes the repository and will destroy any existing
|
|
430 |
* index.
|
699e71
|
431 |
*
|
d896e6
|
432 |
* @param repositoryName
|
JM |
433 |
* @param repository
|
|
434 |
* @return IndexResult
|
|
435 |
*/
|
40ca5c
|
436 |
public IndexResult reindex(RepositoryModel model, Repository repository) {
|
699e71
|
437 |
IndexResult result = new IndexResult();
|
40ca5c
|
438 |
if (!deleteIndex(model.name)) {
|
d896e6
|
439 |
return result;
|
JM |
440 |
}
|
fa0afc
|
441 |
try {
|
JM |
442 |
String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
|
d896e6
|
443 |
FileBasedConfig config = getConfig(repository);
|
JM |
444 |
Set<String> indexedCommits = new TreeSet<String>();
|
40ca5c
|
445 |
IndexWriter writer = getIndexWriter(model.name);
|
d896e6
|
446 |
// build a quick lookup of tags
|
JM |
447 |
Map<String, List<String>> tags = new HashMap<String, List<String>>();
|
|
448 |
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
|
|
449 |
if (!tag.isAnnotatedTag()) {
|
|
450 |
// skip non-annotated tags
|
|
451 |
continue;
|
|
452 |
}
|
d0bb38
|
453 |
if (!tags.containsKey(tag.getReferencedObjectId().getName())) {
|
d896e6
|
454 |
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
|
JM |
455 |
}
|
|
456 |
tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
|
|
457 |
}
|
699e71
|
458 |
|
d896e6
|
459 |
ObjectReader reader = repository.newObjectReader();
|
JM |
460 |
|
|
461 |
// get the local branches
|
|
462 |
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
|
699e71
|
463 |
|
d896e6
|
464 |
// sort them by most recently updated
|
JM |
465 |
Collections.sort(branches, new Comparator<RefModel>() {
|
|
466 |
@Override
|
|
467 |
public int compare(RefModel ref1, RefModel ref2) {
|
|
468 |
return ref2.getDate().compareTo(ref1.getDate());
|
|
469 |
}
|
|
470 |
});
|
699e71
|
471 |
|
d896e6
|
472 |
// reorder default branch to first position
|
JM |
473 |
RefModel defaultBranch = null;
|
|
474 |
ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
|
|
475 |
for (RefModel branch : branches) {
|
|
476 |
if (branch.getObjectId().equals(defaultBranchId)) {
|
1aabf0
|
477 |
defaultBranch = branch;
|
d896e6
|
478 |
break;
|
JM |
479 |
}
|
|
480 |
}
|
|
481 |
branches.remove(defaultBranch);
|
|
482 |
branches.add(0, defaultBranch);
|
699e71
|
483 |
|
d896e6
|
484 |
// walk through each branch
|
JM |
485 |
for (RefModel branch : branches) {
|
40ca5c
|
486 |
|
1aabf0
|
487 |
boolean indexBranch = false;
|
JM |
488 |
if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
|
|
489 |
&& branch.equals(defaultBranch)) {
|
|
490 |
// indexing "default" branch
|
|
491 |
indexBranch = true;
|
c134a0
|
492 |
} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
|
JM |
493 |
// skip internal meta branches
|
1aabf0
|
494 |
indexBranch = false;
|
JM |
495 |
} else {
|
|
496 |
// normal explicit branch check
|
|
497 |
indexBranch = model.indexedBranches.contains(branch.getName());
|
|
498 |
}
|
699e71
|
499 |
|
40ca5c
|
500 |
// if this branch is not specifically indexed then skip
|
1aabf0
|
501 |
if (!indexBranch) {
|
d896e6
|
502 |
continue;
|
JM |
503 |
}
|
|
504 |
|
|
505 |
String branchName = branch.getName();
|
|
506 |
RevWalk revWalk = new RevWalk(reader);
|
|
507 |
RevCommit tip = revWalk.parseCommit(branch.getObjectId());
|
|
508 |
String tipId = tip.getId().getName();
|
|
509 |
|
|
510 |
String keyName = getBranchKey(branchName);
|
|
511 |
config.setString(CONF_ALIAS, null, keyName, branchName);
|
|
512 |
config.setString(CONF_BRANCH, null, keyName, tipId);
|
|
513 |
|
|
514 |
// index the blob contents of the tree
|
|
515 |
TreeWalk treeWalk = new TreeWalk(repository);
|
|
516 |
treeWalk.addTree(tip.getTree());
|
699e71
|
517 |
treeWalk.setRecursive(true);
|
JM |
518 |
|
d896e6
|
519 |
Map<String, ObjectId> paths = new TreeMap<String, ObjectId>();
|
JM |
520 |
while (treeWalk.next()) {
|
749110
|
521 |
// ensure path is not in a submodule
|
a02998
|
522 |
if (treeWalk.getFileMode(0) != FileMode.GITLINK) {
|
PA |
523 |
paths.put(treeWalk.getPathString(), treeWalk.getObjectId(0));
|
|
524 |
}
|
699e71
|
525 |
}
|
d896e6
|
526 |
|
JM |
527 |
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
|
528 |
byte[] tmp = new byte[32767];
|
|
529 |
|
|
530 |
RevWalk commitWalk = new RevWalk(reader);
|
|
531 |
commitWalk.markStart(tip);
|
699e71
|
532 |
|
d896e6
|
533 |
RevCommit commit;
|
JM |
534 |
while ((paths.size() > 0) && (commit = commitWalk.next()) != null) {
|
|
535 |
TreeWalk diffWalk = new TreeWalk(reader);
|
|
536 |
int parentCount = commit.getParentCount();
|
|
537 |
switch (parentCount) {
|
|
538 |
case 0:
|
|
539 |
diffWalk.addTree(new EmptyTreeIterator());
|
|
540 |
break;
|
|
541 |
case 1:
|
|
542 |
diffWalk.addTree(getTree(commitWalk, commit.getParent(0)));
|
|
543 |
break;
|
|
544 |
default:
|
|
545 |
// skip merge commits
|
|
546 |
continue;
|
|
547 |
}
|
|
548 |
diffWalk.addTree(getTree(commitWalk, commit));
|
|
549 |
diffWalk.setFilter(ANY_DIFF);
|
|
550 |
diffWalk.setRecursive(true);
|
|
551 |
while ((paths.size() > 0) && diffWalk.next()) {
|
|
552 |
String path = diffWalk.getPathString();
|
|
553 |
if (!paths.containsKey(path)) {
|
|
554 |
continue;
|
|
555 |
}
|
eecaad
|
556 |
//TODO: Figure out filestore oid the path - bit more involved than updating the index
|
PM |
557 |
|
d896e6
|
558 |
// remove path from set
|
JM |
559 |
ObjectId blobId = paths.remove(path);
|
|
560 |
result.blobCount++;
|
699e71
|
561 |
|
d896e6
|
562 |
// index the blob metadata
|
JM |
563 |
String blobAuthor = getAuthor(commit);
|
|
564 |
String blobCommitter = getCommitter(commit);
|
|
565 |
String blobDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
|
|
566 |
Resolution.MINUTE);
|
699e71
|
567 |
|
d896e6
|
568 |
Document doc = new Document();
|
db9832
|
569 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
|
JM |
570 |
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
|
|
571 |
doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
|
|
572 |
doc.add(new Field(FIELD_PATH, path, TextField.TYPE_STORED));
|
|
573 |
doc.add(new Field(FIELD_DATE, blobDate, StringField.TYPE_STORED));
|
|
574 |
doc.add(new Field(FIELD_AUTHOR, blobAuthor, TextField.TYPE_STORED));
|
|
575 |
doc.add(new Field(FIELD_COMMITTER, blobCommitter, TextField.TYPE_STORED));
|
d896e6
|
576 |
|
JM |
577 |
// determine extension to compare to the extension
|
|
578 |
// blacklist
|
|
579 |
String ext = null;
|
|
580 |
String name = path.toLowerCase();
|
|
581 |
if (name.indexOf('.') > -1) {
|
|
582 |
ext = name.substring(name.lastIndexOf('.') + 1);
|
|
583 |
}
|
|
584 |
|
|
585 |
// index the blob content
|
699e71
|
586 |
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
|
d896e6
|
587 |
ObjectLoader ldr = repository.open(blobId, Constants.OBJ_BLOB);
|
699e71
|
588 |
InputStream in = ldr.openStream();
|
d896e6
|
589 |
int n;
|
JM |
590 |
while ((n = in.read(tmp)) > 0) {
|
|
591 |
os.write(tmp, 0, n);
|
|
592 |
}
|
|
593 |
in.close();
|
|
594 |
byte[] content = os.toByteArray();
|
699e71
|
595 |
String str = StringUtils.decodeString(content, encodings);
|
db9832
|
596 |
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
|
d896e6
|
597 |
os.reset();
|
699e71
|
598 |
}
|
JM |
599 |
|
d896e6
|
600 |
// add the blob to the index
|
JM |
601 |
writer.addDocument(doc);
|
|
602 |
}
|
|
603 |
}
|
|
604 |
|
|
605 |
os.close();
|
|
606 |
|
|
607 |
// index the tip commit object
|
|
608 |
if (indexedCommits.add(tipId)) {
|
|
609 |
Document doc = createDocument(tip, tags.get(tipId));
|
db9832
|
610 |
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
|
d896e6
|
611 |
writer.addDocument(doc);
|
JM |
612 |
result.commitCount += 1;
|
|
613 |
result.branchCount += 1;
|
|
614 |
}
|
|
615 |
|
|
616 |
// traverse the log and index the previous commit objects
|
|
617 |
RevWalk historyWalk = new RevWalk(reader);
|
|
618 |
historyWalk.markStart(historyWalk.parseCommit(tip.getId()));
|
|
619 |
RevCommit rev;
|
|
620 |
while ((rev = historyWalk.next()) != null) {
|
|
621 |
String hash = rev.getId().getName();
|
|
622 |
if (indexedCommits.add(hash)) {
|
|
623 |
Document doc = createDocument(rev, tags.get(hash));
|
db9832
|
624 |
doc.add(new Field(FIELD_BRANCH, branchName, TextField.TYPE_STORED));
|
d896e6
|
625 |
writer.addDocument(doc);
|
JM |
626 |
result.commitCount += 1;
|
|
627 |
}
|
|
628 |
}
|
|
629 |
}
|
|
630 |
|
|
631 |
// finished
|
a1cee6
|
632 |
reader.close();
|
699e71
|
633 |
|
d896e6
|
634 |
// commit all changes and reset the searcher
|
JM |
635 |
config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
|
|
636 |
config.save();
|
|
637 |
writer.commit();
|
8e9988
|
638 |
resetIndexSearcher(model.name);
|
d896e6
|
639 |
result.success();
|
JM |
640 |
} catch (Exception e) {
|
40ca5c
|
641 |
logger.error("Exception while reindexing " + model.name, e);
|
d896e6
|
642 |
}
|
JM |
643 |
return result;
|
|
644 |
}
|
699e71
|
645 |
|
d896e6
|
646 |
/**
|
JM |
647 |
* Incrementally update the index with the specified commit for the
|
|
648 |
* repository.
|
699e71
|
649 |
*
|
d896e6
|
650 |
* @param repositoryName
|
JM |
651 |
* @param repository
|
|
652 |
* @param branch
|
|
653 |
* the fully qualified branch name (e.g. refs/heads/master)
|
|
654 |
* @param commit
|
|
655 |
* @return true, if successful
|
|
656 |
*/
|
699e71
|
657 |
private IndexResult index(String repositoryName, Repository repository,
|
d896e6
|
658 |
String branch, RevCommit commit) {
|
JM |
659 |
IndexResult result = new IndexResult();
|
|
660 |
try {
|
ae9e15
|
661 |
String [] encodings = storedSettings.getStrings(Keys.web.blobEncodings).toArray(new String[0]);
|
d896e6
|
662 |
List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit);
|
JM |
663 |
String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L,
|
|
664 |
Resolution.MINUTE);
|
|
665 |
IndexWriter writer = getIndexWriter(repositoryName);
|
|
666 |
for (PathChangeModel path : changedPaths) {
|
88fb67
|
667 |
if (path.isSubmodule()) {
|
JM |
668 |
continue;
|
|
669 |
}
|
d896e6
|
670 |
// delete the indexed blob
|
856091
|
671 |
deleteBlob(repositoryName, branch, path.name);
|
d896e6
|
672 |
|
JM |
673 |
// re-index the blob
|
|
674 |
if (!ChangeType.DELETE.equals(path.changeType)) {
|
|
675 |
result.blobCount++;
|
|
676 |
Document doc = new Document();
|
db9832
|
677 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.blob.name(), StringField.TYPE_STORED));
|
JM |
678 |
doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
|
|
679 |
doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
|
|
680 |
doc.add(new Field(FIELD_PATH, path.path, TextField.TYPE_STORED));
|
|
681 |
doc.add(new Field(FIELD_DATE, revDate, StringField.TYPE_STORED));
|
|
682 |
doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
|
|
683 |
doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
|
d896e6
|
684 |
|
JM |
685 |
// determine extension to compare to the extension
|
|
686 |
// blacklist
|
|
687 |
String ext = null;
|
|
688 |
String name = path.name.toLowerCase();
|
|
689 |
if (name.indexOf('.') > -1) {
|
|
690 |
ext = name.substring(name.lastIndexOf('.') + 1);
|
|
691 |
}
|
|
692 |
|
|
693 |
if (StringUtils.isEmpty(ext) || !excludedExtensions.contains(ext)) {
|
eecaad
|
694 |
String str = "";
|
d896e6
|
695 |
// read the blob content
|
eecaad
|
696 |
if (path.isFilestoreItem()) {
|
PM |
697 |
//Get file from filestore
|
|
698 |
BodyContentHandler handler = new BodyContentHandler();
|
|
699 |
Metadata metadata = new Metadata();
|
|
700 |
PDFParser parser = new PDFParser();
|
|
701 |
|
|
702 |
ParseContext parseContext = new ParseContext();
|
|
703 |
File lfsFile = filestoreManager.getStoragePath(path.getFilestoreOid());
|
|
704 |
FileInputStream inputstream = new FileInputStream(lfsFile);
|
|
705 |
parser.parse(inputstream, handler, metadata, parseContext);
|
|
706 |
str = handler.toString();
|
|
707 |
} else {
|
|
708 |
str = JGitUtils.getStringContent(repository, commit.getTree(),
|
ae9e15
|
709 |
path.path, encodings);
|
eecaad
|
710 |
}
|
PM |
711 |
|
749110
|
712 |
if (str != null) {
|
db9832
|
713 |
doc.add(new Field(FIELD_CONTENT, str, TextField.TYPE_STORED));
|
749110
|
714 |
writer.addDocument(doc);
|
JM |
715 |
}
|
d896e6
|
716 |
}
|
JM |
717 |
}
|
|
718 |
}
|
|
719 |
writer.commit();
|
699e71
|
720 |
|
261024
|
721 |
// get any annotated commit tags
|
JM |
722 |
List<String> commitTags = new ArrayList<String>();
|
33ceba
|
723 |
for (RefModel ref : JGitUtils.getTags(repository, false, -1)) {
|
261024
|
724 |
if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) {
|
JM |
725 |
commitTags.add(ref.displayName);
|
|
726 |
}
|
|
727 |
}
|
699e71
|
728 |
|
261024
|
729 |
// create and write the Lucene document
|
JM |
730 |
Document doc = createDocument(commit, commitTags);
|
db9832
|
731 |
doc.add(new Field(FIELD_BRANCH, branch, TextField.TYPE_STORED));
|
d896e6
|
732 |
result.commitCount++;
|
JM |
733 |
result.success = index(repositoryName, doc);
|
|
734 |
} catch (Exception e) {
|
|
735 |
logger.error(MessageFormat.format("Exception while indexing commit {0} in {1}", commit.getId().getName(), repositoryName), e);
|
|
736 |
}
|
|
737 |
return result;
|
|
738 |
}
|
|
739 |
|
|
740 |
/**
|
|
741 |
* Delete a blob from the specified branch of the repository index.
|
699e71
|
742 |
*
|
d896e6
|
743 |
* @param repositoryName
|
JM |
744 |
* @param branch
|
|
745 |
* @param path
|
|
746 |
* @throws Exception
|
87ee94
|
747 |
* @return true, if deleted, false if no record was deleted
|
d896e6
|
748 |
*/
|
87ee94
|
749 |
public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
|
JM |
750 |
String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
|
|
751 |
String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
|
699e71
|
752 |
|
87ee94
|
753 |
BooleanQuery query = new BooleanQuery();
|
60110f
|
754 |
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
|
JM |
755 |
QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
|
87ee94
|
756 |
query.add(qp.parse(q), Occur.MUST);
|
JM |
757 |
|
d896e6
|
758 |
IndexWriter writer = getIndexWriter(repositoryName);
|
87ee94
|
759 |
int numDocsBefore = writer.numDocs();
|
699e71
|
760 |
writer.deleteDocuments(query);
|
d896e6
|
761 |
writer.commit();
|
87ee94
|
762 |
int numDocsAfter = writer.numDocs();
|
JM |
763 |
if (numDocsBefore == numDocsAfter) {
|
|
764 |
logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
|
|
765 |
return false;
|
|
766 |
} else {
|
|
767 |
logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
|
|
768 |
return true;
|
|
769 |
}
|
d896e6
|
770 |
}
|
JM |
771 |
|
|
772 |
/**
|
|
773 |
* Updates a repository index incrementally from the last indexed commits.
|
699e71
|
774 |
*
|
40ca5c
|
775 |
* @param model
|
d896e6
|
776 |
* @param repository
|
JM |
777 |
* @return IndexResult
|
|
778 |
*/
|
9f6ef3
|
779 |
private IndexResult updateIndex(RepositoryModel model, Repository repository) {
|
d896e6
|
780 |
IndexResult result = new IndexResult();
|
JM |
781 |
try {
|
|
782 |
FileBasedConfig config = getConfig(repository);
|
|
783 |
config.load();
|
|
784 |
|
|
785 |
// build a quick lookup of annotated tags
|
|
786 |
Map<String, List<String>> tags = new HashMap<String, List<String>>();
|
|
787 |
for (RefModel tag : JGitUtils.getTags(repository, false, -1)) {
|
|
788 |
if (!tag.isAnnotatedTag()) {
|
|
789 |
// skip non-annotated tags
|
|
790 |
continue;
|
|
791 |
}
|
b1d77a
|
792 |
if (!tags.containsKey(tag.getObjectId().getName())) {
|
d896e6
|
793 |
tags.put(tag.getReferencedObjectId().getName(), new ArrayList<String>());
|
JM |
794 |
}
|
|
795 |
tags.get(tag.getReferencedObjectId().getName()).add(tag.displayName);
|
|
796 |
}
|
|
797 |
|
|
798 |
// detect branch deletion
|
|
799 |
// first assume all branches are deleted and then remove each
|
|
800 |
// existing branch from deletedBranches during indexing
|
|
801 |
Set<String> deletedBranches = new TreeSet<String>();
|
|
802 |
for (String alias : config.getNames(CONF_ALIAS)) {
|
|
803 |
String branch = config.getString(CONF_ALIAS, null, alias);
|
|
804 |
deletedBranches.add(branch);
|
|
805 |
}
|
|
806 |
|
1aabf0
|
807 |
// get the local branches
|
d896e6
|
808 |
List<RefModel> branches = JGitUtils.getLocalBranches(repository, true, -1);
|
699e71
|
809 |
|
1aabf0
|
810 |
// sort them by most recently updated
|
JM |
811 |
Collections.sort(branches, new Comparator<RefModel>() {
|
|
812 |
@Override
|
|
813 |
public int compare(RefModel ref1, RefModel ref2) {
|
|
814 |
return ref2.getDate().compareTo(ref1.getDate());
|
|
815 |
}
|
|
816 |
});
|
699e71
|
817 |
|
1aabf0
|
818 |
// reorder default branch to first position
|
JM |
819 |
RefModel defaultBranch = null;
|
|
820 |
ObjectId defaultBranchId = JGitUtils.getDefaultBranch(repository);
|
|
821 |
for (RefModel branch : branches) {
|
|
822 |
if (branch.getObjectId().equals(defaultBranchId)) {
|
|
823 |
defaultBranch = branch;
|
|
824 |
break;
|
|
825 |
}
|
|
826 |
}
|
|
827 |
branches.remove(defaultBranch);
|
|
828 |
branches.add(0, defaultBranch);
|
699e71
|
829 |
|
1aabf0
|
830 |
// walk through each branches
|
d896e6
|
831 |
for (RefModel branch : branches) {
|
JM |
832 |
String branchName = branch.getName();
|
|
833 |
|
1aabf0
|
834 |
boolean indexBranch = false;
|
JM |
835 |
if (model.indexedBranches.contains(com.gitblit.Constants.DEFAULT_BRANCH)
|
|
836 |
&& branch.equals(defaultBranch)) {
|
|
837 |
// indexing "default" branch
|
|
838 |
indexBranch = true;
|
c134a0
|
839 |
} else if (branch.getName().startsWith(com.gitblit.Constants.R_META)) {
|
JM |
840 |
// ignore internal meta branches
|
a04808
|
841 |
indexBranch = false;
|
1aabf0
|
842 |
} else {
|
JM |
843 |
// normal explicit branch check
|
|
844 |
indexBranch = model.indexedBranches.contains(branch.getName());
|
|
845 |
}
|
699e71
|
846 |
|
1aabf0
|
847 |
// if this branch is not specifically indexed then skip
|
JM |
848 |
if (!indexBranch) {
|
40ca5c
|
849 |
continue;
|
JM |
850 |
}
|
699e71
|
851 |
|
d896e6
|
852 |
// remove this branch from the deletedBranches set
|
JM |
853 |
deletedBranches.remove(branchName);
|
699e71
|
854 |
|
d896e6
|
855 |
// determine last commit
|
JM |
856 |
String keyName = getBranchKey(branchName);
|
|
857 |
String lastCommit = config.getString(CONF_BRANCH, null, keyName);
|
|
858 |
|
|
859 |
List<RevCommit> revs;
|
|
860 |
if (StringUtils.isEmpty(lastCommit)) {
|
|
861 |
// new branch/unindexed branch, get all commits on branch
|
|
862 |
revs = JGitUtils.getRevLog(repository, branchName, 0, -1);
|
|
863 |
} else {
|
|
864 |
// pre-existing branch, get changes since last commit
|
|
865 |
revs = JGitUtils.getRevLog(repository, lastCommit, branchName);
|
|
866 |
}
|
|
867 |
|
|
868 |
if (revs.size() > 0) {
|
|
869 |
result.branchCount += 1;
|
|
870 |
}
|
699e71
|
871 |
|
JM |
872 |
// reverse the list of commits so we start with the first commit
|
d896e6
|
873 |
Collections.reverse(revs);
|
699e71
|
874 |
for (RevCommit commit : revs) {
|
a04808
|
875 |
// index a commit
|
JM |
876 |
result.add(index(model.name, repository, branchName, commit));
|
d896e6
|
877 |
}
|
JM |
878 |
|
|
879 |
// update the config
|
|
880 |
config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION);
|
|
881 |
config.setString(CONF_ALIAS, null, keyName, branchName);
|
|
882 |
config.setString(CONF_BRANCH, null, keyName, branch.getObjectId().getName());
|
|
883 |
config.save();
|
|
884 |
}
|
|
885 |
|
|
886 |
// the deletedBranches set will normally be empty by this point
|
|
887 |
// unless a branch really was deleted and no longer exists
|
|
888 |
if (deletedBranches.size() > 0) {
|
|
889 |
for (String branch : deletedBranches) {
|
40ca5c
|
890 |
IndexWriter writer = getIndexWriter(model.name);
|
d896e6
|
891 |
writer.deleteDocuments(new Term(FIELD_BRANCH, branch));
|
JM |
892 |
writer.commit();
|
|
893 |
}
|
|
894 |
}
|
|
895 |
result.success = true;
|
|
896 |
} catch (Throwable t) {
|
40ca5c
|
897 |
logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t);
|
d896e6
|
898 |
}
|
JM |
899 |
return result;
|
|
900 |
}
|
699e71
|
901 |
|
d896e6
|
902 |
/**
|
JM |
903 |
* Creates a Lucene document for a commit
|
699e71
|
904 |
*
|
d896e6
|
905 |
* @param commit
|
JM |
906 |
* @param tags
|
|
907 |
* @return a Lucene document
|
|
908 |
*/
|
|
909 |
private Document createDocument(RevCommit commit, List<String> tags) {
|
|
910 |
Document doc = new Document();
|
db9832
|
911 |
doc.add(new Field(FIELD_OBJECT_TYPE, SearchObjectType.commit.name(), StringField.TYPE_STORED));
|
JM |
912 |
doc.add(new Field(FIELD_COMMIT, commit.getName(), TextField.TYPE_STORED));
|
d896e6
|
913 |
doc.add(new Field(FIELD_DATE, DateTools.timeToString(commit.getCommitTime() * 1000L,
|
db9832
|
914 |
Resolution.MINUTE), StringField.TYPE_STORED));
|
JM |
915 |
doc.add(new Field(FIELD_AUTHOR, getAuthor(commit), TextField.TYPE_STORED));
|
|
916 |
doc.add(new Field(FIELD_COMMITTER, getCommitter(commit), TextField.TYPE_STORED));
|
|
917 |
doc.add(new Field(FIELD_SUMMARY, commit.getShortMessage(), TextField.TYPE_STORED));
|
|
918 |
doc.add(new Field(FIELD_CONTENT, commit.getFullMessage(), TextField.TYPE_STORED));
|
d896e6
|
919 |
if (!ArrayUtils.isEmpty(tags)) {
|
db9832
|
920 |
doc.add(new Field(FIELD_TAG, StringUtils.flattenStrings(tags), TextField.TYPE_STORED));
|
d896e6
|
921 |
}
|
JM |
922 |
return doc;
|
|
923 |
}
|
|
924 |
|
|
925 |
/**
|
|
926 |
* Incrementally index an object for the repository.
|
699e71
|
927 |
*
|
d896e6
|
928 |
* @param repositoryName
|
JM |
929 |
* @param doc
|
|
930 |
* @return true, if successful
|
|
931 |
*/
|
|
932 |
private boolean index(String repositoryName, Document doc) {
|
699e71
|
933 |
try {
|
d896e6
|
934 |
IndexWriter writer = getIndexWriter(repositoryName);
|
JM |
935 |
writer.addDocument(doc);
|
|
936 |
writer.commit();
|
8e9988
|
937 |
resetIndexSearcher(repositoryName);
|
d896e6
|
938 |
return true;
|
JM |
939 |
} catch (Exception e) {
|
|
940 |
logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e);
|
|
941 |
}
|
|
942 |
return false;
|
|
943 |
}
|
|
944 |
|
d04009
|
945 |
private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException {
|
d896e6
|
946 |
SearchResult result = new SearchResult();
|
d04009
|
947 |
result.hitId = hitId;
|
JM |
948 |
result.totalHits = totalHits;
|
d896e6
|
949 |
result.score = score;
|
JM |
950 |
result.date = DateTools.stringToDate(doc.get(FIELD_DATE));
|
699e71
|
951 |
result.summary = doc.get(FIELD_SUMMARY);
|
d896e6
|
952 |
result.author = doc.get(FIELD_AUTHOR);
|
JM |
953 |
result.committer = doc.get(FIELD_COMMITTER);
|
|
954 |
result.type = SearchObjectType.fromName(doc.get(FIELD_OBJECT_TYPE));
|
|
955 |
result.branch = doc.get(FIELD_BRANCH);
|
|
956 |
result.commitId = doc.get(FIELD_COMMIT);
|
|
957 |
result.path = doc.get(FIELD_PATH);
|
|
958 |
if (doc.get(FIELD_TAG) != null) {
|
|
959 |
result.tags = StringUtils.getStringsFromValue(doc.get(FIELD_TAG));
|
|
960 |
}
|
|
961 |
return result;
|
|
962 |
}
|
|
963 |
|
|
964 |
private synchronized void resetIndexSearcher(String repository) throws IOException {
|
|
965 |
IndexSearcher searcher = searchers.remove(repository);
|
|
966 |
if (searcher != null) {
|
8e9988
|
967 |
searcher.getIndexReader().close();
|
d896e6
|
968 |
}
|
JM |
969 |
}
|
|
970 |
|
|
971 |
/**
|
|
972 |
* Gets an index searcher for the repository.
|
699e71
|
973 |
*
|
d896e6
|
974 |
* @param repository
|
JM |
975 |
* @return
|
|
976 |
* @throws IOException
|
|
977 |
*/
|
|
978 |
private IndexSearcher getIndexSearcher(String repository) throws IOException {
|
|
979 |
IndexSearcher searcher = searchers.get(repository);
|
|
980 |
if (searcher == null) {
|
|
981 |
IndexWriter writer = getIndexWriter(repository);
|
db9832
|
982 |
searcher = new IndexSearcher(DirectoryReader.open(writer, true));
|
d896e6
|
983 |
searchers.put(repository, searcher);
|
JM |
984 |
}
|
|
985 |
return searcher;
|
|
986 |
}
|
|
987 |
|
|
988 |
/**
|
|
989 |
* Gets an index writer for the repository. The index will be created if it
|
|
990 |
* does not already exist or if forceCreate is specified.
|
699e71
|
991 |
*
|
d896e6
|
992 |
* @param repository
|
JM |
993 |
* @return an IndexWriter
|
|
994 |
* @throws IOException
|
|
995 |
*/
|
|
996 |
private IndexWriter getIndexWriter(String repository) throws IOException {
|
699e71
|
997 |
IndexWriter indexWriter = writers.get(repository);
|
6ef2fc
|
998 |
File repositoryFolder = FileKey.resolve(new File(repositoriesFolder, repository), FS.DETECTED);
|
d896e6
|
999 |
File indexFolder = new File(repositoryFolder, LUCENE_DIR);
|
60110f
|
1000 |
Directory directory = FSDirectory.open(indexFolder);
|
d896e6
|
1001 |
|
JM |
1002 |
if (indexWriter == null) {
|
|
1003 |
if (!indexFolder.exists()) {
|
|
1004 |
indexFolder.mkdirs();
|
|
1005 |
}
|
60110f
|
1006 |
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
|
JM |
1007 |
IndexWriterConfig config = new IndexWriterConfig(LUCENE_VERSION, analyzer);
|
d896e6
|
1008 |
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
JM |
1009 |
indexWriter = new IndexWriter(directory, config);
|
|
1010 |
writers.put(repository, indexWriter);
|
|
1011 |
}
|
|
1012 |
return indexWriter;
|
|
1013 |
}
|
|
1014 |
|
|
1015 |
/**
|
|
1016 |
* Searches the specified repositories for the given text or query
|
699e71
|
1017 |
*
|
d896e6
|
1018 |
* @param text
|
JM |
1019 |
* if the text is null or empty, null is returned
|
d04009
|
1020 |
* @param page
|
JM |
1021 |
* the page number to retrieve. page is 1-indexed.
|
|
1022 |
* @param pageSize
|
|
1023 |
* the number of elements to return for this page
|
d896e6
|
1024 |
* @param repositories
|
JM |
1025 |
* a list of repositories to search. if no repositories are
|
|
1026 |
* specified null is returned.
|
|
1027 |
* @return a list of SearchResults in order from highest to the lowest score
|
699e71
|
1028 |
*
|
d896e6
|
1029 |
*/
|
d04009
|
1030 |
public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) {
|
d896e6
|
1031 |
if (ArrayUtils.isEmpty(repositories)) {
|
JM |
1032 |
return null;
|
|
1033 |
}
|
d04009
|
1034 |
return search(text, page, pageSize, repositories.toArray(new String[0]));
|
d896e6
|
1035 |
}
|
699e71
|
1036 |
|
d896e6
|
1037 |
/**
|
JM |
1038 |
* Searches the specified repositories for the given text or query
|
699e71
|
1039 |
*
|
d896e6
|
1040 |
* @param text
|
JM |
1041 |
* if the text is null or empty, null is returned
|
d04009
|
1042 |
* @param page
|
JM |
1043 |
* the page number to retrieve. page is 1-indexed.
|
|
1044 |
* @param pageSize
|
|
1045 |
* the number of elements to return for this page
|
d896e6
|
1046 |
* @param repositories
|
JM |
1047 |
* a list of repositories to search. if no repositories are
|
|
1048 |
* specified null is returned.
|
|
1049 |
* @return a list of SearchResults in order from highest to the lowest score
|
699e71
|
1050 |
*
|
d04009
|
1051 |
*/
|
JM |
1052 |
public List<SearchResult> search(String text, int page, int pageSize, String... repositories) {
|
d896e6
|
1053 |
if (StringUtils.isEmpty(text)) {
|
JM |
1054 |
return null;
|
|
1055 |
}
|
|
1056 |
if (ArrayUtils.isEmpty(repositories)) {
|
|
1057 |
return null;
|
|
1058 |
}
|
|
1059 |
Set<SearchResult> results = new LinkedHashSet<SearchResult>();
|
60110f
|
1060 |
StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
|
d896e6
|
1061 |
try {
|
JM |
1062 |
// default search checks summary and content
|
|
1063 |
BooleanQuery query = new BooleanQuery();
|
|
1064 |
QueryParser qp;
|
60110f
|
1065 |
qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
|
d896e6
|
1066 |
qp.setAllowLeadingWildcard(true);
|
JM |
1067 |
query.add(qp.parse(text), Occur.SHOULD);
|
|
1068 |
|
60110f
|
1069 |
qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
|
d896e6
|
1070 |
qp.setAllowLeadingWildcard(true);
|
JM |
1071 |
query.add(qp.parse(text), Occur.SHOULD);
|
699e71
|
1072 |
|
d896e6
|
1073 |
IndexSearcher searcher;
|
JM |
1074 |
if (repositories.length == 1) {
|
|
1075 |
// single repository search
|
|
1076 |
searcher = getIndexSearcher(repositories[0]);
|
|
1077 |
} else {
|
|
1078 |
// multiple repository search
|
|
1079 |
List<IndexReader> readers = new ArrayList<IndexReader>();
|
|
1080 |
for (String repository : repositories) {
|
|
1081 |
IndexSearcher repositoryIndex = getIndexSearcher(repository);
|
|
1082 |
readers.add(repositoryIndex.getIndexReader());
|
|
1083 |
}
|
|
1084 |
IndexReader[] rdrs = readers.toArray(new IndexReader[readers.size()]);
|
905d31
|
1085 |
MultiSourceReader reader = new MultiSourceReader(rdrs);
|
d896e6
|
1086 |
searcher = new IndexSearcher(reader);
|
JM |
1087 |
}
|
699e71
|
1088 |
|
d896e6
|
1089 |
Query rewrittenQuery = searcher.rewrite(query);
|
87ee94
|
1090 |
logger.debug(rewrittenQuery.toString());
|
JM |
1091 |
|
60110f
|
1092 |
TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
|
d896e6
|
1093 |
searcher.search(rewrittenQuery, collector);
|
d04009
|
1094 |
int offset = Math.max(0, (page - 1) * pageSize);
|
JM |
1095 |
ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs;
|
|
1096 |
int totalHits = collector.getTotalHits();
|
d896e6
|
1097 |
for (int i = 0; i < hits.length; i++) {
|
JM |
1098 |
int docId = hits[i].doc;
|
|
1099 |
Document doc = searcher.doc(docId);
|
d04009
|
1100 |
SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits);
|
905d31
|
1101 |
if (repositories.length == 1) {
|
JM |
1102 |
// single repository search
|
|
1103 |
result.repository = repositories[0];
|
|
1104 |
} else {
|
|
1105 |
// multi-repository search
|
|
1106 |
MultiSourceReader reader = (MultiSourceReader) searcher.getIndexReader();
|
|
1107 |
int index = reader.getSourceIndex(docId);
|
|
1108 |
result.repository = repositories[index];
|
|
1109 |
}
|
699e71
|
1110 |
String content = doc.get(FIELD_CONTENT);
|
d896e6
|
1111 |
result.fragment = getHighlightedFragment(analyzer, query, content, result);
|
JM |
1112 |
results.add(result);
|
|
1113 |
}
|
|
1114 |
} catch (Exception e) {
|
|
1115 |
logger.error(MessageFormat.format("Exception while searching for {0}", text), e);
|
|
1116 |
}
|
|
1117 |
return new ArrayList<SearchResult>(results);
|
|
1118 |
}
|
699e71
|
1119 |
|
d896e6
|
1120 |
/**
|
699e71
|
1121 |
*
|
d896e6
|
1122 |
* @param analyzer
|
JM |
1123 |
* @param query
|
|
1124 |
* @param content
|
|
1125 |
* @param result
|
|
1126 |
* @return
|
|
1127 |
* @throws IOException
|
|
1128 |
* @throws InvalidTokenOffsetsException
|
|
1129 |
*/
|
|
1130 |
private String getHighlightedFragment(Analyzer analyzer, Query query,
|
|
1131 |
String content, SearchResult result) throws IOException, InvalidTokenOffsetsException {
|
12c31e
|
1132 |
if (content == null) {
|
JM |
1133 |
content = "";
|
699e71
|
1134 |
}
|
12c31e
|
1135 |
|
310a80
|
1136 |
int tabLength = storedSettings.getInteger(Keys.web.tabLength, 4);
|
12c31e
|
1137 |
int fragmentLength = SearchObjectType.commit == result.type ? 512 : 150;
|
JM |
1138 |
|
d896e6
|
1139 |
QueryScorer scorer = new QueryScorer(query, "content");
|
699e71
|
1140 |
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer, fragmentLength);
|
d896e6
|
1141 |
|
JM |
1142 |
// use an artificial delimiter for the token
|
9f6ef3
|
1143 |
String termTag = "!!--[";
|
JM |
1144 |
String termTagEnd = "]--!!";
|
d896e6
|
1145 |
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(termTag, termTagEnd);
|
699e71
|
1146 |
Highlighter highlighter = new Highlighter(formatter, scorer);
|
d896e6
|
1147 |
highlighter.setTextFragmenter(fragmenter);
|
12c31e
|
1148 |
|
73fba6
|
1149 |
String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3);
|
d896e6
|
1150 |
if (ArrayUtils.isEmpty(fragments)) {
|
JM |
1151 |
if (SearchObjectType.blob == result.type) {
|
|
1152 |
return "";
|
|
1153 |
}
|
12c31e
|
1154 |
// clip commit message
|
JM |
1155 |
String fragment = content;
|
|
1156 |
if (fragment.length() > fragmentLength) {
|
|
1157 |
fragment = fragment.substring(0, fragmentLength) + "...";
|
|
1158 |
}
|
310a80
|
1159 |
return "<pre class=\"text\">" + StringUtils.escapeForHtml(fragment, true, tabLength) + "</pre>";
|
d896e6
|
1160 |
}
|
699e71
|
1161 |
|
2b67ec
|
1162 |
// make sure we have unique fragments
|
JM |
1163 |
Set<String> uniqueFragments = new LinkedHashSet<String>();
|
|
1164 |
for (String fragment : fragments) {
|
|
1165 |
uniqueFragments.add(fragment);
|
|
1166 |
}
|
|
1167 |
fragments = uniqueFragments.toArray(new String[uniqueFragments.size()]);
|
699e71
|
1168 |
|
d896e6
|
1169 |
StringBuilder sb = new StringBuilder();
|
JM |
1170 |
for (int i = 0, len = fragments.length; i < len; i++) {
|
|
1171 |
String fragment = fragments[i];
|
12c31e
|
1172 |
String tag = "<pre class=\"text\">";
|
JM |
1173 |
|
d896e6
|
1174 |
// resurrect the raw fragment from removing the artificial delimiters
|
12c31e
|
1175 |
String raw = fragment.replace(termTag, "").replace(termTagEnd, "");
|
JM |
1176 |
|
|
1177 |
// determine position of the raw fragment in the content
|
2b67ec
|
1178 |
int pos = content.indexOf(raw);
|
699e71
|
1179 |
|
12c31e
|
1180 |
// restore complete first line of fragment
|
JM |
1181 |
int c = pos;
|
|
1182 |
while (c > 0) {
|
|
1183 |
c--;
|
|
1184 |
if (content.charAt(c) == '\n') {
|
|
1185 |
break;
|
|
1186 |
}
|
|
1187 |
}
|
|
1188 |
if (c > 0) {
|
|
1189 |
// inject leading chunk of first fragment line
|
|
1190 |
fragment = content.substring(c + 1, pos) + fragment;
|
|
1191 |
}
|
699e71
|
1192 |
|
12c31e
|
1193 |
if (SearchObjectType.blob == result.type) {
|
JM |
1194 |
// count lines as offset into the content for this fragment
|
c2833a
|
1195 |
int line = Math.max(1, StringUtils.countLines(content.substring(0, pos)));
|
699e71
|
1196 |
|
12c31e
|
1197 |
// create fragment tag with line number and language
|
JM |
1198 |
String lang = "";
|
|
1199 |
String ext = StringUtils.getFileExtension(result.path).toLowerCase();
|
|
1200 |
if (!StringUtils.isEmpty(ext)) {
|
|
1201 |
// maintain leading space!
|
|
1202 |
lang = " lang-" + ext;
|
|
1203 |
}
|
|
1204 |
tag = MessageFormat.format("<pre class=\"prettyprint linenums:{0,number,0}{1}\">", line, lang);
|
699e71
|
1205 |
|
12c31e
|
1206 |
}
|
699e71
|
1207 |
|
12c31e
|
1208 |
sb.append(tag);
|
JM |
1209 |
|
d896e6
|
1210 |
// replace the artificial delimiter with html tags
|
9f6ef3
|
1211 |
String html = StringUtils.escapeForHtml(fragment, false);
|
JM |
1212 |
html = html.replace(termTag, "<span class=\"highlight\">").replace(termTagEnd, "</span>");
|
d896e6
|
1213 |
sb.append(html);
|
JM |
1214 |
sb.append("</pre>");
|
|
1215 |
if (i < len - 1) {
|
|
1216 |
sb.append("<span class=\"ellipses\">...</span><br/>");
|
|
1217 |
}
|
|
1218 |
}
|
|
1219 |
return sb.toString();
|
699e71
|
1220 |
}
|
JM |
1221 |
|
d896e6
|
1222 |
/**
|
699e71
|
1223 |
* Simple class to track the results of an index update.
|
d896e6
|
1224 |
*/
|
JM |
1225 |
private class IndexResult {
|
|
1226 |
long startTime = System.currentTimeMillis();
|
|
1227 |
long endTime = startTime;
|
|
1228 |
boolean success;
|
|
1229 |
int branchCount;
|
|
1230 |
int commitCount;
|
|
1231 |
int blobCount;
|
699e71
|
1232 |
|
d896e6
|
1233 |
void add(IndexResult result) {
|
JM |
1234 |
this.branchCount += result.branchCount;
|
|
1235 |
this.commitCount += result.commitCount;
|
|
1236 |
this.blobCount += result.blobCount;
|
|
1237 |
}
|
699e71
|
1238 |
|
d896e6
|
1239 |
void success() {
|
JM |
1240 |
success = true;
|
|
1241 |
endTime = System.currentTimeMillis();
|
|
1242 |
}
|
699e71
|
1243 |
|
d896e6
|
1244 |
float duration() {
|
JM |
1245 |
return (endTime - startTime)/1000f;
|
|
1246 |
}
|
b938ae
|
1247 |
}
|
699e71
|
1248 |
|
905d31
|
1249 |
/**
|
JM |
1250 |
* Custom subclass of MultiReader to identify the source index for a given
|
|
1251 |
* doc id. This would not be necessary of there was a public method to
|
|
1252 |
* obtain this information.
|
699e71
|
1253 |
*
|
905d31
|
1254 |
*/
|
JM |
1255 |
private class MultiSourceReader extends MultiReader {
|
699e71
|
1256 |
|
60110f
|
1257 |
MultiSourceReader(IndexReader [] readers) {
|
db9832
|
1258 |
super(readers, false);
|
905d31
|
1259 |
}
|
699e71
|
1260 |
|
905d31
|
1261 |
int getSourceIndex(int docId) {
|
JM |
1262 |
int index = -1;
|
|
1263 |
try {
|
db9832
|
1264 |
index = super.readerIndex(docId);
|
905d31
|
1265 |
} catch (Exception e) {
|
JM |
1266 |
logger.error("Error getting source index", e);
|
|
1267 |
}
|
|
1268 |
return index;
|
|
1269 |
}
|
|
1270 |
}
|
e31da0
|
1271 |
}
|