hi jeff
On 7/12/05, Jeff Foster <[EMAIL PROTECTED]> wrote:
> Hi
>
> I have been giving JackRabbit a go and was interested in the performance.
> I have seen some claims about performance (1 million nodes per hour) but have
> not been able to get anything like this.
>
> So I have written a little util to upload files and folders and then loaded
> up about 4000 file and folders out of a java project (mostly java files).
>
> So on my somewhat out of date laptop (Dell Lattitude C840 2G 1800 Mhz Windows
> XP)
> I get the following
>
> With the file system:
> 1 785 s
> 2 968 s
>
> Again:
> 1 899 s
> 2 1075 s
>
> And with all those files ... and the time to delete them I gave up..
>
> CQFS
> 1 295.4 s
> 2 378.6 s
> 3 409.9 s
> 4 453.4 s
> 5 544.0 s
> 6 10637 s
> 7 1542.2 s
>
> I accept the windows file system version is limited and my laptop is limited
> (especially waiting for all those files to be deleted!)
>
> But CQFS seems to only give me 13 files/folders per second at best and this
> performance seems to tail off quite quickly. Am I doing something silly in
> the configuration? How many nodes would be created per file and per folder?
a folder translates to 1 node and 2 properties. a file translates to 2
nodes and 8 properties.
the poor performance you're experiencing is caused by the windows
filesystem i am afraid
(even when using CQFS). the contents of every file is represented as a
BINARY property which in turn is internally backed by a file in the
local filesystem. if you run your test on a linux box you'll be seeing
a lot better results with both local filesystem and CQFS.
on the other hand, if you run a test that is generating about the same
amount of items but doesn't use BINARY properties you'll see a
dramatic difference.
to improve the performance you could either
- minimze/avoid use of BINARY properties
- use an OS with a decent filesystem
- write a custom persistence manager that 'inlines' BINARY properties
(e.g. using a size threshold) rather than storing them in the local filesystem
cheers
stefan
>
> It also took me a while to realise I needed to be in the correct place to
> download the dependancies via mavern.
>
> Cheers
>
> Jeff
>
>
>
>
>
>
>
>
>
>
>
> import java.io.BufferedInputStream;
> import java.io.File;
> import java.io.FileFilter;
> import java.io.FileInputStream;
> import java.io.FileNotFoundException;
> import java.io.Serializable;
> import java.util.Calendar;
> import java.util.Date;
> import java.util.GregorianCalendar;
> import java.util.HashMap;
> import java.util.Hashtable;
> import java.util.Map;
> import javax.jcr.*;
> import javax.jcr.lock.LockException;
> import javax.jcr.nodetype.ConstraintViolationException;
> import javax.jcr.nodetype.NoSuchNodeTypeException;
> import javax.jcr.version.VersionException;
> import javax.naming.Context;
> import javax.naming.InitialContext;
> import org.apache.jackrabbit.core.SearchManager;
> import org.apache.jackrabbit.core.jndi.RegistryHelper;
> import org.apache.jackrabbit.value.DateValue;
> import org.apache.jackrabbit.value.StringValue;
> public class JackRabbitFileLoader
> {
> public JackRabbitFileLoader()
> {
> super();
> }
>
> private Node create(Node container, File file, FileFilter filter, boolean
> recurse) throws Exception
> {
> if (file.isDirectory())
> {
> Node directoryNodeRef = createDirectory(container, file);
>
> if(recurse)
> {
> File[] files = ((filter == null) ? file.listFiles() :
> file.listFiles(filter));
> for(int i = 0; i < files.length; i++)
> {
> create(directoryNodeRef, files[i], filter, recurse);
> }
> }
>
> return directoryNodeRef;
> }
> else
> {
> return createFile(container, file);
> }
> }
> private Node createFile(Node parentNode, File file) throws Exception
> {
> Calendar cal = new GregorianCalendar();
> Node nodeRef = parentNode.addNode(file.getName(), "nt:file");
> // create properties for content type
>
> Node resource = nodeRef.addNode("jcr:content", "nt:resource");
>
> resource.setProperty("jcr:encoding", "UTF-8");
> resource.setProperty("jcr:mimeType", "text/plain");
> resource.setProperty("jcr:lastModified", new DateValue(cal));
> // apply the titled aspect - title and description
>
> resource.setProperty("jcr:data", new BufferedInputStream(new
> FileInputStream(file)));
> return nodeRef;
> }
> private Node createDirectory(Node parentNode, File file) throws Exception
> {
> Node nodeRef = parentNode.addNode(file.getName(), "nt:folder");
>
> return nodeRef;
> }
> public static void main(String[] args) throws Exception
> {
> try
> {
>
> JackRabbitFileLoader fileLoader = new JackRabbitFileLoader();
>
> //String configFile = "repotest/repository-file.xml";
> String configFile = "repotest/repository.xml";
>
> String repHomeDir = "repotest";
> Hashtable env = new Hashtable();
> env.put(Context.INITIAL_CONTEXT_FACTORY,
> "org.apache.jackrabbit.core.jndi.provider.DummyInitialContextFactory");
> env.put(Context.PROVIDER_URL, "localhost");
> InitialContext ctx = new InitialContext(env);
> RegistryHelper.registerRepository(ctx, "repo", configFile,
> repHomeDir, true);
> Repository r = (Repository) ctx.lookup("repo");
> Session session = r.login(new SimpleCredentials("userid",
> "".toCharArray()), null);
> Node rn = session.getRootNode();
>
> // for(NodeIterator it = rn.getNodes(); it.hasNext(); /**/)
> // {
> // Node node = it.nextNode();
> // deleteNode(node);
> // }
>
> System.out.println(rn.getNodes().getSize());
>
> long start = System.nanoTime();
> fileLoader.create(rn, new File(args[0]), null, true);
> session.save();
> long end = System.nanoTime();
> System.out.println("Done in " +((end-start)/1000000.0f));
>
> System.out.println(rn.getNodes().getSize());
>
> //printNodes(rn, 0);
> }
> catch (Exception e)
> {
> e.printStackTrace();
> }
> }
>
>
> private static void deleteNode(Node container) throws Exception
> {
> if(container.getDefinition().isProtected())
> {
> return;
> }
> for(NodeIterator it = container.getNodes(); it.hasNext(); /**/)
> {
> Node node = it.nextNode();
> deleteNode(node);
> }
> container.remove();
> }
>
> private static void printNodes(Node container, int indent) throws
> Exception
> {
> System.out.println(container.getPath());
> for(NodeIterator it = container.getNodes(); it.hasNext(); /**/)
> {
> Node node = it.nextNode();
> printNodes(node, indent+2);
> }
> }
> }
>
>
>
>
>
>
>
>
> <?xml version="1.0" encoding="ISO-8859-1"?>
>
> <Repository>
>
> <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
>
> <param name="path" value="w:/cqfs/repository"/>
>
> </FileSystem>
>
> <Security appName="Jackrabbit">
>
> <AccessManager
> class="org.apache.jackrabbit.core.security.SimpleAccessManager">
>
> <!-- <param name="config" value="w:/cqfs/access.xml"/> -->
>
> </AccessManager>
>
> </Security>
>
> <!--
>
> location of workspaces root directory and name of default workspace
>
> -->
>
> <Workspaces rootPath="w:/cqfs/workspaces" defaultWorkspace="default"/>
>
> <Workspace name="${wsp.name}">
>
> <FileSystem class="com.day.jackrabbit.fs.cq.CQFileSystem">
>
> <param name="path" value="${wsp.home}/wspStore.dat"/>
>
> <param name="autoRepair" value="false"/>
>
> <param name="blockSize" value="128"/>
>
> <param name="autoSync" value="false"/>
>
> </FileSystem>
>
> <!--
>
> <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
>
> <param name="path" value="${wsp.home}"/>
>
> </FileSystem>
>
> -->
>
> <!--
>
> persistence manager of the workspace:
>
> class: FQN of class implementing the PersistenceManager interface
>
> -->
>
> <!--
>
> <PersistenceManager
> class="org.apache.jackrabbit.core.state.xml.XMLPersistenceManager"/>
>
> -->
>
> <!--
>
> <PersistenceManager
> class="org.apache.jackrabbit.core.state.mem.InMemPersistenceManager">
>
> <param name="initialCapacity" value="100000"/>
>
> <param name="loadFactor" value="0.3"/>
>
> <param name="persistent" value="true"/>
>
> </PersistenceManager>
>
> -->
>
> <PersistenceManager
> class="org.apache.jackrabbit.core.state.obj.ObjectPersistenceManager"/>
>
> <!--
>
> Search index and the file system it uses.
>
> -->
>
> <SearchIndex class="org.apache.jackrabbit.core.query.lucene.SearchIndex">
>
> <param name="useCompoundFile" value="true"/>
>
> <param name="minMergeDocs" value="1000"/>
>
> <param name="maxMergeDocs" value="10000"/>
>
> <param name="mergeFactor" value="10"/>
>
> <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
>
> <param name="path" value="${wsp.home}/index"/>
>
> </FileSystem>
>
> </SearchIndex>
>
> </Workspace>
>
> <!--
>
> Configures the versioning
>
> -->
>
> <Versioning rootPath="w:/cqfs/version">
>
> <!--
>
> Configures the filesystem to use for versioning for the respective
>
> persistence manager
>
> -->
>
> <FileSystem class="com.day.jackrabbit.fs.cq.CQFileSystem">
>
> <param name="path" value="w:/cqfs/version/version.dat"/>
>
> <param name="autoRepair" value="false"/>
>
> <param name="blockSize" value="128"/>
>
> <param name="autoSync" value="false"/>
>
> </FileSystem>
>
>
>
> <!--
>
> <FileSystem class="org.apache.jackrabbit.core.fs.local.LocalFileSystem">
>
> <param name="path" value="w:/cqfs/version"/>
>
> </FileSystem>
>
> -->
>
>
>
> <!--
>
> Configures the perisistence manager to be used for persisting version state.
>
> Please note that the current versioning implementation is based on
>
> a 'normal' persistence manager, but this could change in future
>
> implementations.
>
> -->
>
> <PersistenceManager
> class="org.apache.jackrabbit.core.state.obj.ObjectPersistenceManager"/>
>
> </Versioning>
>
> </Repository>
>
>
>
>
>
> ---------------------------------
> Yahoo! Messenger NEW - crystal clear PC to PCcalling worldwide with voicemail
>