From 87ee944282f3c7191eed900e498cca20fc34fe98 Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Thu, 12 Jul 2012 18:21:47 -0400
Subject: [PATCH] Fixed broken Lucene blob delete

---
 docs/04_releases.mkd                            |    3 +
 tests/com/gitblit/tests/IssuesTest.java         |    6 ++
 tests/com/gitblit/tests/LuceneExecutorTest.java |   17 +++++++-
 src/com/gitblit/LuceneExecutor.java             |   52 +++++++++++++++++++-------
 groovy/.gitignore                               |    1 
 5 files changed, 61 insertions(+), 18 deletions(-)

diff --git a/docs/04_releases.mkd b/docs/04_releases.mkd
index bed2328..43a2db0 100644
--- a/docs/04_releases.mkd
+++ b/docs/04_releases.mkd
@@ -6,8 +6,9 @@
 
 #### fixes
 
+- Fixed bug in Lucene search where old/stale blobs were never properly deleted prior to reindexing.  This resulted in duplicate blob entries in the index.
 - Fixed intermittent bug in identifying line numbers in Lucene search (issue 105)
-- Adjust repository search to handle foo.git and foo/bar.git (issue 104)
+- Adjust repository identification to handle foo.git and foo/bar.git (issue 104)
 - Fixed bug where a repository set as authenticated push did not have anonymous clone access (issue 96)
 - Fixed bug in Basic authentication if passwords had a colon (Github/peterloron)
 - Fixed bug where the Gitblit Manager could not update a setting that was not referenced in reference.properties (issue 85)
diff --git a/groovy/.gitignore b/groovy/.gitignore
new file mode 100644
index 0000000..e58dc47
--- /dev/null
+++ b/groovy/.gitignore
@@ -0,0 +1 @@
+/grape
diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index c702dcc..f7a7390 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -105,7 +105,7 @@
 public class LuceneExecutor implements Runnable {
 	
 		
-	private static final int INDEX_VERSION = 3;
+	private static final int INDEX_VERSION = 4;
 
 	private static final String FIELD_OBJECT_TYPE = "type";
 	private static final String FIELD_ISSUE = "issue";
@@ -301,7 +301,6 @@
 			throw new RuntimeException(e);
 		}
 	}
-
 	
 	/**
 	 * Returns the author for the commit, if this information is available.
@@ -728,8 +727,9 @@
 	 * @param repositoryName
 	 * @param issueId
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteIssue(String repositoryName, String issueId) throws Exception {
+	private boolean deleteIssue(String repositoryName, String issueId) throws Exception {
 		BooleanQuery query = new BooleanQuery();
 		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.issue.name());
 		query.add(new TermQuery(objectTerm), Occur.MUST);
@@ -737,8 +737,17 @@
 		query.add(new TermQuery(issueidTerm), Occur.MUST);
 		
 		IndexWriter writer = getIndexWriter(repositoryName);
+		int numDocsBefore = writer.numDocs();
 		writer.deleteDocuments(query);
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 	
 	/**
@@ -748,19 +757,29 @@
 	 * @param branch
 	 * @param path
 	 * @throws Exception
+	 * @return true, if deleted, false if no record was deleted
 	 */
-	private void deleteBlob(String repositoryName, String branch, String path) throws Exception {
-		BooleanQuery query = new BooleanQuery();
-		Term objectTerm = new Term(FIELD_OBJECT_TYPE, SearchObjectType.blob.name());
-		query.add(new TermQuery(objectTerm), Occur.MUST);
-		Term branchTerm = new Term(FIELD_BRANCH, branch);
-		query.add(new TermQuery(branchTerm), Occur.MUST);
-		Term pathTerm = new Term(FIELD_PATH, path);
-		query.add(new TermQuery(pathTerm), Occur.MUST);
+	public boolean deleteBlob(String repositoryName, String branch, String path) throws Exception {
+		String pattern = MessageFormat.format("{0}:'{'0} AND {1}:\"'{'1'}'\" AND {2}:\"'{'2'}'\"", FIELD_OBJECT_TYPE, FIELD_BRANCH, FIELD_PATH);
+		String q = MessageFormat.format(pattern, SearchObjectType.blob.name(), branch, path);
 		
+		BooleanQuery query = new BooleanQuery();
+		StandardAnalyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);
+		QueryParser qp = new QueryParser(LUCENE_VERSION, FIELD_SUMMARY, analyzer);
+		query.add(qp.parse(q), Occur.MUST);
+
 		IndexWriter writer = getIndexWriter(repositoryName);
-		writer.deleteDocuments(query);
+		int numDocsBefore = writer.numDocs();
+		writer.deleteDocuments(query);		
 		writer.commit();
+		int numDocsAfter = writer.numDocs();
+		if (numDocsBefore == numDocsAfter) {
+			logger.debug(MessageFormat.format("no records found to delete {0}", query.toString()));
+			return false;
+		} else {
+			logger.debug(MessageFormat.format("deleted {0} records with {1}", numDocsBefore - numDocsAfter, query.toString()));
+			return true;
+		}
 	}
 
 	/**
@@ -881,7 +900,9 @@
 						IssueModel issue = IssueUtils.getIssue(repository, issueId);
 						if (issue == null) {
 							// issue was deleted, remove from index
-							deleteIssue(model.name, issueId);
+							if (!deleteIssue(model.name, issueId)) {
+								logger.error(MessageFormat.format("Failed to delete issue {0} from Lucene index!", issueId));
+							}
 						} else {
 							// issue was updated
 							index(model.name, issue);
@@ -1119,7 +1140,7 @@
 			qp = new QueryParser(LUCENE_VERSION, FIELD_CONTENT, analyzer);
 			qp.setAllowLeadingWildcard(true);
 			query.add(qp.parse(text), Occur.SHOULD);
-
+			
 			IndexSearcher searcher;
 			if (repositories.length == 1) {
 				// single repository search
@@ -1135,7 +1156,10 @@
 				MultiSourceReader reader = new MultiSourceReader(rdrs);
 				searcher = new IndexSearcher(reader);
 			}
+			
 			Query rewrittenQuery = searcher.rewrite(query);
+			logger.debug(rewrittenQuery.toString());
+
 			TopScoreDocCollector collector = TopScoreDocCollector.create(5000, true);
 			searcher.search(rewrittenQuery, collector);
 			int offset = Math.max(0, (page - 1) * pageSize);
diff --git a/tests/com/gitblit/tests/IssuesTest.java b/tests/com/gitblit/tests/IssuesTest.java
index 11f9551..54cac33 100644
--- a/tests/com/gitblit/tests/IssuesTest.java
+++ b/tests/com/gitblit/tests/IssuesTest.java
@@ -134,7 +134,7 @@
 			lucene.index(name, anIssue);
 		}
 		List<SearchResult> hits = lucene.search("working", 1, 10, name);
-		assertTrue(hits.size() > 0);
+		assertTrue(hits.size() == 1);
 		
 		// reindex an issue
 		issue = allIssues.get(0);
@@ -144,6 +144,10 @@
 		issue = IssueUtils.getIssue(repository, issue.id);
 		lucene.index(name, issue);
 
+		hits = lucene.search("working", 1, 10, name);
+		assertTrue(hits.size() == 1);
+
+		
 		// delete all issues
 		for (IssueModel anIssue : allIssues) {
 			assertTrue(IssueUtils.deleteIssue(repository, anIssue.id, "D"));
diff --git a/tests/com/gitblit/tests/LuceneExecutorTest.java b/tests/com/gitblit/tests/LuceneExecutorTest.java
index ec81fd8..21454fe 100644
--- a/tests/com/gitblit/tests/LuceneExecutorTest.java
+++ b/tests/com/gitblit/tests/LuceneExecutorTest.java
@@ -16,6 +16,8 @@
 package com.gitblit.tests;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -23,14 +25,12 @@
 import org.eclipse.jgit.lib.Repository;
 import org.junit.Test;
 
-import com.gitblit.GitBlit;
 import com.gitblit.LuceneExecutor;
 import com.gitblit.models.RefModel;
 import com.gitblit.models.RepositoryModel;
 import com.gitblit.models.SearchResult;
 import com.gitblit.utils.FileUtils;
 import com.gitblit.utils.JGitUtils;
-import com.gitblit.utils.StringUtils;
 
 /**
  * Tests Lucene indexing and querying.
@@ -160,4 +160,17 @@
 		lucene.close();
 		assertEquals(10, results.size());
 	}
+	
+	@Test
+	public void testDeleteBlobFromIndex() throws Exception {
+		// start with a fresh reindex of entire repository
+		LuceneExecutor lucene = new LuceneExecutor(null, GitBlitSuite.REPOSITORIES);
+		Repository repository = GitBlitSuite.getHelloworldRepository();
+		RepositoryModel model = newRepositoryModel(repository);
+		lucene.reindex(model, repository);
+		
+		// now delete a blob
+		assertTrue(lucene.deleteBlob(model.name, "refs/heads/master", "java.java"));
+		assertFalse(lucene.deleteBlob(model.name, "refs/heads/master", "java.java"));
+	}
 }
\ No newline at end of file

--
Gitblit v1.9.1