Hi all,
We are using jackrabbit with features like indexing and versioning. Not
surprisingly, our repository folder is very big when only considering
the size of the content inside it. We are using Jackrabbit 1.4.1 under
linux.
Is there by chance some way to reduce the disk space of the repository?
If we switch to a newer version, will this make things better?
I attached to this file our repository.xml file.
Thanks.
Maxime Bégnis
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE Repository PUBLIC "-//The Apache Software Foundation//DTD Jackrabbit 1.4//EN"
"http://jackrabbit.apache.org/dtd/repository-1.4.dtd">
<!-- Example Repository Configuration File -->
<Repository>
<!--
virtual file system where the repository stores global state
(e.g. registered namespaces, custom node types, etc.)
-->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/repository"/>
</FileSystem>
<!--
security configuration
-->
<Security appName="Jackrabbit">
<!--
access manager:
class: FQN of class implementing the AccessManager interface
-->
<AccessManager class="org.apache.jackrabbit.core.security.SimpleAccessManager">
<!-- <param name="config" value="${rep.home}/access.xml"/> -->
</AccessManager>
<LoginModule class="org.apache.jackrabbit.core.security.SimpleLoginModule">
<!-- anonymous user name ('anonymous' is the default value) -->
<param name="anonymousId" value="anonymous"/>
<!--
default user name to be used instead of the anonymous user
when no login credentials are provided (unset by default)
-->
<!-- <param name="defaultUserId" value="superuser"/> -->
</LoginModule>
</Security>
<!--
location of workspaces root directory and name of default workspace
-->
<Workspaces rootPath="${rep.home}/workspaces" defaultWorkspace="default"/>
<!--
workspace configuration template:
used to create the initial workspace if there's no workspace yet
-->
<Workspace name="${wsp.name}">
<!--
virtual file system of the workspace:
class: FQN of class implementing the FileSystem interface
-->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${wsp.home}"/>
</FileSystem>
<!--
persistence manager of the workspace:
class: FQN of class implementing the PersistenceManager interface
-->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.DerbyPersistenceManager">
<param name="url" value="jdbc:derby:${wsp.home}/db;create=true"/>
<param name="schemaObjectPrefix" value="${wsp.name}_"/>
</PersistenceManager>
<!-- MySQL RDBMS on localhost example
<PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager">
<param name="driver" value="com.mysql.jdbc.Driver" />
<param name="url" value="jdbc:mysql://localhost/calenco_test" />
<param name="user" value="calencodevel" />
<param name="password" value="C4|3nc0"/>
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="wksp_${wsp.name}_"/>
<param name="externalBLOBs" value="false"/>
</PersistenceManager>
-->
<!--
Search index and the file system it uses.
class: FQN of class implementing the QueryHandler interface
-->
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${wsp.home}/index"/>
<param name="textFilterClasses" value="org.apache.jackrabbit.extractor.MsWordTextExtractor,org.apache.jackrabbit.extractor.MsExcelTextExtractor,org.apache.jackrabbit.extractor.MsPowerPointTextExtractor,org.apache.jackrabbit.extractor.PdfTextExtractor,org.apache.jackrabbit.extractor.OpenOfficeTextExtractor,org.apache.jackrabbit.extractor.RTFTextExtractor,org.apache.jackrabbit.extractor.HTMLTextExtractor,org.apache.jackrabbit.extractor.XMLTextExtractor"/>
<param name="extractorPoolSize " value="2"/>
<param name="supportHighlighting" value="true"/>
</SearchIndex>
</Workspace>
<!--
Configures the versioning
-->
<Versioning rootPath="${rep.home}/version">
<!--
Configures the filesystem to use for versioning for the respective
persistence manager
-->
<FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
<param name="path" value="${rep.home}/version" />
</FileSystem>
<!--
Configures the persistence manager to be used for persisting version state.
Please note that the current versioning implementation is based on
a 'normal' persistence manager, but this could change in future
implementations.
-->
<PersistenceManager class="org.apache.jackrabbit.core.persistence.bundle.DerbyPersistenceManager">
<param name="url" value="jdbc:derby:${rep.home}/version/db;create=true"/>
<param name="schemaObjectPrefix" value="version_"/>
</PersistenceManager>
<!-- MySQL RDBMS on localhost example
<PersistenceManager class="org.apache.jackrabbit.core.state.db.SimpleDbPersistenceManager">
<param name="driver" value="com.mysql.jdbc.Driver" />
<param name="url" value="jdbc:mysql://localhost/calenco_test" />
<param name="user" value="calencodevel" />
<param name="password" value="C4|3nc0"/>
<param name="schema" value="mysql"/>
<param name="schemaObjectPrefix" value="wksp_${wsp.name}_"/>
<param name="externalBLOBs" value="false"/>
</PersistenceManager>
-->
</Versioning>
<!--
Search index for content that is shared repository wide
(/jcr:system tree, contains mainly versions)
-->
<SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
<param name="path" value="${rep.home}/repository/index"/>
<param name="textFilterClasses" value="org.apache.jackrabbit.extractor.MsWordTextExtractor,org.apache.jackrabbit.extractor.MsExcelTextExtractor,org.apache.jackrabbit.extractor.MsPowerPointTextExtractor,org.apache.jackrabbit.extractor.PdfTextExtractor,org.apache.jackrabbit.extractor.OpenOfficeTextExtractor,org.apache.jackrabbit.extractor.RTFTextExtractor,org.apache.jackrabbit.extractor.HTMLTextExtractor,org.apache.jackrabbit.extractor.XMLTextExtractor"/>
<param name="extractorPoolSize " value="2"/>
<param name="supportHighlighting" value="true"/>
</SearchIndex>
<DataStore class="org.apache.jackrabbit.core.data.FileDataStore"/>
</Repository>