From f1d2ada42c5e3640656d805155e1bcadb95fd126 Mon Sep 17 00:00:00 2001
From: James Moger <james.moger@gitblit.com>
Date: Wed, 21 Mar 2012 21:02:46 -0400
Subject: [PATCH] Externalized the Lucene ignore extensions

---
 docs/04_releases.mkd                |    6 ++++--
 distrib/gitblit.properties          |    6 ++++++
 src/com/gitblit/LuceneExecutor.java |   12 +++++++-----
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/distrib/gitblit.properties b/distrib/gitblit.properties
index 2ac6598..ad82280 100644
--- a/distrib/gitblit.properties
+++ b/distrib/gitblit.properties
@@ -373,6 +373,12 @@
 # SINCE 0.5.0
 web.itemsPerPage = 50
 
+# Registered file extensions to ignore during Lucene indexing
+#
+# SPACE-DELIMITED
+# SINCE 0.9.0
+web.luceneIgnoreExtensions = 7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip
+
 # Registered extensions for google-code-prettify
 #
 # SPACE-DELIMITED
diff --git a/docs/04_releases.mkd b/docs/04_releases.mkd
index 126ee23..a6170e6 100644
--- a/docs/04_releases.mkd
+++ b/docs/04_releases.mkd
@@ -17,8 +17,10 @@
 #### additions
 
 - Added optional Lucene branch indexing (issue 16)  
-Repository branches may be optionally indexed by Lucene for improved searching.  To use this feature you have to specify which branches to index within the *Edit Repository* page.  Indexes are automatically built and incrementally updated on a 2 minute cycle. (i.e you will have to wait 2-3 minutes after specifying a branch to be indexed before the index will be built.)<br/><br/>
-If a repository has Lucene-indexed branches the *search* form on the repository pages will redirect to the root-level Lucene search page.  If the repository does not specify any indexed branches then the traditional repository commit search is used.
+    **New:** *web.luceneIgnoreExtensions = 7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip*  
+Repository branches may be optionally indexed by Lucene for improved searching.  To use this feature you must specify which branches to index within the *Edit Repository* page; _no repositories are automatically indexed_.  Gitblit will build or incrementally update enrolled repositories on a 2 minute cycle. (i.e you will have to wait 2-3 minutes after respecifying indexed branches or pushing new commits before Gitblit will build/update the repository's Lucene index.)  
+If a repository has Lucene-indexed branches the *search* form on the repository pages will redirect to the root-level Lucene search page and only the content of those branches can be searched.  
+If the repository does not specify any indexed branches then repository commit-traversal search is used.
 - Allow specifying timezone to use for Gitblit which is independent of both the JVM and the system timezone (issue 54)  
     **New:** *web.timezone =*  
 - Added a built-in AJP connector for integrating Gitblit GO into an Apache mod_proxy setup (issue 59)  
diff --git a/src/com/gitblit/LuceneExecutor.java b/src/com/gitblit/LuceneExecutor.java
index 12f2305..b6df254 100644
--- a/src/com/gitblit/LuceneExecutor.java
+++ b/src/com/gitblit/LuceneExecutor.java
@@ -25,7 +25,6 @@
 import java.text.MessageFormat;
 import java.text.ParseException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -138,10 +137,9 @@
 	private final Map<String, IndexSearcher> searchers = new ConcurrentHashMap<String, IndexSearcher>();
 	private final Map<String, IndexWriter> writers = new ConcurrentHashMap<String, IndexWriter>();
 	
-	private final Set<String> excludedExtensions = new TreeSet<String>(Arrays.asList("7z", "arc",
-			"arj", "bin", "bmp", "dll", "doc", "docx", "exe", "gif", "gz", "jar", "jpg", "lib",
-			"lzh", "odg", "pdf", "ppt", "png", "so", "swf", "xcf", "xls", "xlsx", "zip"));
-
+	private final String luceneIgnoreExtensions = "7z arc arj bin bmp dll doc docx exe gif gz jar jpg lib lzh odg odf odt pdf ppt png so swf xcf xls xlsx zip";
+	private Set<String> excludedExtensions;
+	
 	public LuceneExecutor(IStoredSettings settings, File repositoriesFolder) {
 		this.storedSettings = settings;
 		this.repositoriesFolder = repositoriesFolder;
@@ -154,6 +152,10 @@
 	 */
 	@Override
 	public void run() {
+		// reload the excluded extensions
+		String exts = storedSettings.getString(Keys.web.luceneIgnoreExtensions, luceneIgnoreExtensions);
+		excludedExtensions = new TreeSet<String>(StringUtils.getStringsFromValue(exts));
+
 		for (String repositoryName: GitBlit.self().getRepositoryList()) {
 			RepositoryModel model = GitBlit.self().getRepositoryModel(repositoryName);
 			if (model.hasCommits && !ArrayUtils.isEmpty(model.indexedBranches)) {

--
Gitblit v1.9.1