From a004081b913c1bcf50184ee76df00bf889965d98 Mon Sep 17 00:00:00 2001 From: James Moger <james.moger@gitblit.com> Date: Wed, 21 Mar 2012 18:04:34 -0400 Subject: [PATCH] Documentation --- src/com/gitblit/LuceneExecutor.java | 268 ++++++++++++++++++++++++++--------------------------- 1 files changed, 132 insertions(+), 136 deletions(-) diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java index 0b90b74..65e1b2b 100644 --- a/src/com/gitblit/LuceneExecutor.java +++ b/src/com/gitblit/LuceneExecutor.java @@ -89,6 +89,7 @@ import com.gitblit.models.IssueModel.Attachment; import com.gitblit.models.PathModel.PathChangeModel; import com.gitblit.models.RefModel; +import com.gitblit.models.RepositoryModel; import com.gitblit.models.SearchResult; import com.gitblit.utils.ArrayUtils; import com.gitblit.utils.IssueUtils; @@ -141,47 +142,26 @@ "arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib", "lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip")); - private final Set<String> excludedBranches = new TreeSet<String>( - Arrays.asList("/refs/heads/gb-issues")); - public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) { this.storedSettings = settings; this.repositoriesFolder = repositoriesFolder; } /** - * Indicates if the Lucene executor can index repositories. - * - * @return true if the Lucene executor is ready to index repositories - */ - public boolean isReady() { - return storedSettings.getBoolean(Keys.lucene.enable, false); - } - - /** - * Run is executed by the gitblit executor service at whatever frequency - * is specified in the settings. Because this is called by an executor - * service, calls will queue - i.e. there can never be concurrent execution - * of repository index updates. + * Run is executed by the Gitblit executor service. Because this is called + * by an executor service, calls will queue - i.e. there can never be + * concurrent execution of repository index updates. */ @Override public void run() { - if (!isReady()) { - return; - } - - for (String repositoryName : GitBlit.self().getRepositoryList()) { - Repository repository = GitBlit.self().getRepository(repositoryName); - if (repository == null) { - logger.warn(MessageFormat.format( - "Lucene executor could not find repository {0}. Skipping.", - repositoryName)); - continue; + for (String repositoryName: GitBlit.self().getRepositoryList()) { + RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName); + if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) { + Repository repository = GitBlit.self().getRepository(model.name); + index(model, repository); + repository.close(); + System.gc(); } - // TODO allow repository to bypass Lucene indexing - index(repositoryName, repository); - repository.close(); - System.gc(); } } @@ -194,43 +174,38 @@ * @param repository * the repository object */ - protected void index(String name, Repository repository) { + protected void index(RepositoryModel model, Repository repository) { try { - if (JGitUtils.hasCommits(repository)) { - if (shouldReindex(repository)) { - // (re)build the entire index - IndexResult result = reindex(name, repository); - - if (result.success) { - if (result.commitCount > 0) { - String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs"; - logger.info(MessageFormat.format(msg, name, - result.commitCount, result.blobCount, result.branchCount, result.duration())); - } - } else { - String msg = "Could not build {0} Lucene index!"; - logger.error(MessageFormat.format(msg, name)); + if (shouldReindex(repository)) { + // (re)build the entire index + IndexResult result = reindex(model, repository); + + if (result.success) { + if (result.commitCount > 0) { + String msg = "Built {0} Lucene index from {1} commits and {2} files across {3} branches in {4} secs"; + logger.info(MessageFormat.format(msg, model.name, result.commitCount, + result.blobCount, result.branchCount, result.duration())); } } else { - // update the index with latest commits - IndexResult result = updateIndex(name, repository); - if (result.success) { - if (result.commitCount > 0) { - String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs"; - logger.info(MessageFormat.format(msg, name, - result.commitCount, result.blobCount, result.branchCount, result.duration())); - } - } else { - String msg = "Could not update {0} Lucene index!"; - logger.error(MessageFormat.format(msg, name)); - } + String msg = "Could not build {0} Lucene index!"; + logger.error(MessageFormat.format(msg, model.name)); } } else { - logger.info(MessageFormat.format("Skipped Lucene index of empty repository {0}", - name)); + // update the index with latest commits + IndexResult result = updateIndex(model, repository); + if (result.success) { + if (result.commitCount > 0) { + String msg = "Updated {0} Lucene index with {1} commits and {2} files across {3} branches in {4} secs"; + logger.info(MessageFormat.format(msg, model.name, result.commitCount, + result.blobCount, result.branchCount, result.duration())); + } + } else { + String msg = "Could not update {0} Lucene index!"; + logger.error(MessageFormat.format(msg, model.name)); + } } } catch (Throwable t) { - logger.error(MessageFormat.format("Lucene indexing failure for {0}", name), t); + logger.error(MessageFormat.format("Lucene indexing failure for {0}", model.name), t); } } @@ -239,7 +214,16 @@ * * @param repositoryName */ - public void close(String repositoryName) { + public synchronized void close(String repositoryName) { + try { + IndexSearcher searcher = searchers.remove(repositoryName); + if (searcher != null) { + searcher.getIndexReader().close(); + } + } catch (Exception e) { + logger.error("Failed to close index searcher for " + repositoryName, e); + } + try { IndexWriter writer = writers.remove(repositoryName); if (writer != null) { @@ -247,23 +231,14 @@ } } catch (Exception e) { logger.error("Failed to close index writer for " + repositoryName, e); - } - - try { - IndexSearcher searcher = searchers.remove(repositoryName); - if (searcher != null) { - searcher.close(); - } - } catch (Exception e) { - logger.error("Failed to close index searcher for " + repositoryName, e); - } + } } /** * Close all Lucene indexers. * */ - public void close() { + public synchronized void close() { // close all writers for (String writer : writers.keySet()) { try { @@ -277,7 +252,7 @@ // close all searchers for (String searcher : searchers.keySet()) { try { - searchers.get(searcher).close(); + searchers.get(searcher).getIndexReader().close(); } catch (Throwable t) { logger.error("Failed to close Lucene searcher for " + searcher, t); } @@ -294,18 +269,9 @@ */ public boolean deleteIndex(String repositoryName) { try { - // remove the repository index writer from the cache and close it - IndexWriter writer = writers.remove(repositoryName); - if (writer != null) { - writer.close(); - writer = null; - } - // remove the repository index searcher from the cache and close it - IndexSearcher searcher = searchers.remove(repositoryName); - if (searcher != null) { - searcher.close(); - searcher = null; - } + // close any open writer/searcher + close(repositoryName); + // delete the index folder File repositoryFolder = new File(repositoriesFolder, repositoryName); File luceneIndex = new File(repositoryFolder, LUCENE_DIR); @@ -430,15 +396,15 @@ * @param repository * @return IndexResult */ - public IndexResult reindex(String repositoryName, Repository repository) { - IndexResult result = new IndexResult(); - if (!deleteIndex(repositoryName)) { + public IndexResult reindex(RepositoryModel model, Repository repository) { + IndexResult result = new IndexResult(); + if (!deleteIndex(model.name)) { return result; } try { FileBasedConfig config = getConfig(repository); Set<String> indexedCommits = new TreeSet<String>(); - IndexWriter writer = getIndexWriter(repositoryName); + IndexWriter writer = getIndexWriter(model.name); // build a quick lookup of tags Map<String, List<String>> tags = new HashMap<String, List<String>>(); for (RefModel tag : JGitUtils.getTags(repository, false, -1)) { @@ -479,7 +445,9 @@ // walk through each branch for (RefModel branch : branches) { - if (excludedBranches.contains(branch.getName())) { + + // if this branch is not specifically indexed then skip + if (!model.indexedBranches.contains(branch.getName())) { continue; } @@ -624,11 +592,11 @@ // commit all changes and reset the searcher config.setInt(CONF_INDEX, null, CONF_VERSION, INDEX_VERSION); config.save(); - resetIndexSearcher(repositoryName); writer.commit(); + resetIndexSearcher(model.name); result.success(); } catch (Exception e) { - logger.error("Exception while reindexing " + repositoryName, e); + logger.error("Exception while reindexing " + model.name, e); } return result; } @@ -648,31 +616,13 @@ String branch, RevCommit commit) { IndexResult result = new IndexResult(); try { - if (excludedBranches.contains(branch)) { - if (IssueUtils.GB_ISSUES.equals(branch)) { - // index an issue - String issueId = commit.getShortMessage().substring(2).trim(); - IssueModel issue = IssueUtils.getIssue(repository, issueId); - if (issue == null) { - // issue was deleted, remove from index - deleteIssue(repositoryName, issueId); - result.success = true; - return result; - } - result.success = index(repositoryName, issue); - result.issueCount++; - return result; - - } - return result; - } List<PathChangeModel> changedPaths = JGitUtils.getFilesInCommit(repository, commit); String revDate = DateTools.timeToString(commit.getCommitTime() * 1000L, Resolution.MINUTE); IndexWriter writer = getIndexWriter(repositoryName); for (PathChangeModel path : changedPaths) { // delete the indexed blob - deleteBlob(repositoryName, branch, path.path); + deleteBlob(repositoryName, branch, path.name); // re-index the blob if (!ChangeType.DELETE.equals(path.changeType)) { @@ -705,8 +655,18 @@ } } writer.commit(); - - Document doc = createDocument(commit, null); + + // get any annotated commit tags + List<String> commitTags = new ArrayList<String>(); + for (RefModel ref : JGitUtils.getTags(repository, true, -1)) { + if (ref.isAnnotatedTag() && ref.getReferencedObjectId().equals(commit.getId())) { + commitTags.add(ref.displayName); + } + } + + // create and write the Lucene document + Document doc = createDocument(commit, commitTags); + doc.add(new Field(FIELD_BRANCH, branch, Store.YES, Index.ANALYZED)); result.commitCount++; result.success = index(repositoryName, doc); } catch (Exception e) { @@ -779,11 +739,11 @@ /** * Updates a repository index incrementally from the last indexed commits. * - * @param repositoryName + * @param model * @param repository * @return IndexResult */ - protected IndexResult updateIndex(String repositoryName, Repository repository) { + protected IndexResult updateIndex(RepositoryModel model, Repository repository) { IndexResult result = new IndexResult(); try { FileBasedConfig config = getConfig(repository); @@ -816,6 +776,12 @@ for (RefModel branch : branches) { String branchName = branch.getName(); + // determine if we should skip this branch + if (!IssueUtils.GB_ISSUES.equals(branch) + && !model.indexedBranches.contains(branch.getName())) { + continue; + } + // remove this branch from the deletedBranches set deletedBranches.remove(branchName); @@ -836,10 +802,33 @@ result.branchCount += 1; } + // track the issue ids that we have already indexed + Set<String> indexedIssues = new TreeSet<String>(); + // reverse the list of commits so we start with the first commit Collections.reverse(revs); - for (RevCommit commit : revs) { - result.add(index(repositoryName, repository, branchName, commit)); + for (RevCommit commit : revs) { + if (IssueUtils.GB_ISSUES.equals(branch)) { + // only index an issue once during updateIndex + String issueId = commit.getShortMessage().substring(2).trim(); + if (indexedIssues.contains(issueId)) { + continue; + } + indexedIssues.add(issueId); + + IssueModel issue = IssueUtils.getIssue(repository, issueId); + if (issue == null) { + // issue was deleted, remove from index + deleteIssue(model.name, issueId); + } else { + // issue was updated + index(model.name, issue); + result.issueCount++; + } + } else { + // index a commit + result.add(index(model.name, repository, branchName, commit)); + } } // update the config @@ -853,14 +842,14 @@ // unless a branch really was deleted and no longer exists if (deletedBranches.size() > 0) { for (String branch : deletedBranches) { - IndexWriter writer = getIndexWriter(repositoryName); + IndexWriter writer = getIndexWriter(model.name); writer.deleteDocuments(new Term(FIELD_BRANCH, branch)); writer.commit(); } } result.success = true; } catch (Throwable t) { - logger.error(MessageFormat.format("Exception while updating {0} Lucene index", repositoryName), t); + logger.error(MessageFormat.format("Exception while updating {0} Lucene index", model.name), t); } return result; } @@ -928,8 +917,8 @@ try { IndexWriter writer = getIndexWriter(repositoryName); writer.addDocument(doc); - resetIndexSearcher(repositoryName); writer.commit(); + resetIndexSearcher(repositoryName); return true; } catch (Exception e) { logger.error(MessageFormat.format("Exception while incrementally updating {0} Lucene index", repositoryName), e); @@ -937,8 +926,10 @@ return false; } - private SearchResult createSearchResult(Document doc, float score) throws ParseException { + private SearchResult createSearchResult(Document doc, float score, int hitId, int totalHits) throws ParseException { SearchResult result = new SearchResult(); + result.hitId = hitId; + result.totalHits = totalHits; result.score = score; result.date = DateTools.stringToDate(doc.get(FIELD_DATE)); result.summary = doc.get(FIELD_SUMMARY); @@ -961,7 +952,7 @@ private synchronized void resetIndexSearcher(String repository) throws IOException { IndexSearcher searcher = searchers.remove(repository); if (searcher != null) { - searcher.close(); + searcher.getIndexReader().close(); } } @@ -1014,19 +1005,21 @@ * * @param text * if the text is null or empty, null is returned - * @param maximumHits - * the maximum number of hits to collect + * @param page + * the page number to retrieve. page is 1-indexed. + * @param pageSize + * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * */ - public List<SearchResult> search(String text, int maximumHits, List<String> repositories) { + public List<SearchResult> search(String text, int page, int pageSize, List<String> repositories) { if (ArrayUtils.isEmpty(repositories)) { return null; } - return search(text, maximumHits, repositories.toArray(new String[0])); + return search(text, page, pageSize, repositories.toArray(new String[0])); } /** @@ -1034,15 +1027,17 @@ * * @param text * if the text is null or empty, null is returned - * @param maximumHits - * the maximum number of hits to collect + * @param page + * the page number to retrieve. page is 1-indexed. + * @param pageSize + * the number of elements to return for this page * @param repositories * a list of repositories to search. if no repositories are * specified null is returned. * @return a list of SearchResults in order from highest to the lowest score * - */ - public List<SearchResult> search(String text, int maximumHits, String... repositories) { + */ + public List<SearchResult> search(String text, int page, int pageSize, String... repositories) { if (StringUtils.isEmpty(text)) { return null; } @@ -1079,14 +1074,15 @@ searcher = new IndexSearcher(reader); } Query rewrittenQuery = searcher.rewrite(query); - TopScoreDocCollector collector = TopScoreDocCollector.create(maximumHits, true); + TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true); searcher.search(rewrittenQuery, collector); - ScoreDoc[] hits = collector.topDocs().scoreDocs; + int offset = Math.max(0, (page - 1) * pageSize); + ScoreDoc[] hits = collector.topDocs(offset, pageSize).scoreDocs; + int totalHits = collector.getTotalHits(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); - // TODO identify the source index for the doc, then eliminate FIELD_REPOSITORY - SearchResult result = createSearchResult(doc, hits[i].score); + SearchResult result = createSearchResult(doc, hits[i].score, offset + i + 1, totalHits); if (repositories.length == 1) { // single repository search result.repository = repositories[0]; @@ -1137,7 +1133,7 @@ Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(fragmenter); - String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 5); + String [] fragments = highlighter.getBestFragments(analyzer, "content", content, 3); if (ArrayUtils.isEmpty(fragments)) { if (SearchObjectType.blob == result.type) { return ""; -- Gitblit v1.9.1