Revision: 17249
http://sourceforge.net/p/gate/code/17249
Author: valyt
Date: 2014-01-27 19:13:19 +0000 (Mon, 27 Jan 2014)
Log Message:
-----------
Refactored the writing functionality into its own class.
Modified Paths:
--------------
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
Modified:
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
===================================================================
---
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
2014-01-27 17:17:30 UTC (rev 17248)
+++
mimir/branches/5.0/mimir-core/src/gate/mimir/index/mg4j/zipcollection/DocumentCollection.java
2014-01-27 19:13:19 UTC (rev 17249)
@@ -205,13 +205,100 @@
}
}
+ /**
+ * Class that handles the creation of collection files.
+ */
+ protected static class CollectionFileWriter {
+
+ /**
+ * The zip file managed by this collection.
+ */
+ protected ZipOutputStream zipOuputStream;
+
+ /**
+ * The zip file to which we are currently writing.
+ */
+ protected File zipFile;
+
+ /**
+ * The number of entries written so far to the current zip file.
+ */
+ protected int currentEntries;
+
+ /**
+ * The amount of bytes written so far to the current zip file.
+ */
+ protected long currentLength;
+
+ /**
+ * A {@link ByteArrayOutputStream} used to temporarily store serialised
+ * document data objects.
+ */
+ protected ByteArrayOutputStream byteArrayOS;
+
+ public CollectionFileWriter(File file) throws IndexException {
+ this.zipFile = file;
+ if(zipFile.exists()) throw new IndexException("Collection zip file (" +
+ file.getAbsolutePath() + ") already exists!");
+ byteArrayOS = new ByteArrayOutputStream();
+
+ try {
+ zipOuputStream = new ZipOutputStream(new BufferedOutputStream(
+ new FileOutputStream(zipFile)));
+ } catch(FileNotFoundException e) {
+ throw new IndexException("Cannot write to collection zip file (" +
+ zipFile.getAbsolutePath() + ")", e);
+ }
+ currentEntries = 0;
+ currentLength = 0;
+ }
+
+ /**
+ *
+ * @param entryName
+ * @param document
+ * @return true if the document was written successfully, false if this
+ * collection file is full and cannot take the extra content.
+ *
+ * @throws IOException
+ */
+ public boolean writeDocumentData(String entryName, DocumentData document)
throws IOException {
+ //write the new document to the byte array
+ ObjectOutputStream objectOutStream = new ObjectOutputStream(byteArrayOS);
+ objectOutStream.writeObject(document);
+ objectOutStream.close();
+
+ // check if this will take us over size
+ if(currentLength + byteArrayOS.size() > ZIP_FILE_MAX_SIZE ||
+ currentEntries >= ZIP_FILE_MAX_ENTRIES) return false;
+
+ // create a new entry in the current zip file
+ ZipEntry entry = new ZipEntry(entryName);
+ zipOuputStream.putNextEntry(entry);
+ //write the data
+ byteArrayOS.writeTo(zipOuputStream);
+ zipOuputStream.closeEntry();
+ currentLength += entry.getCompressedSize();
+
+ //clean up the byte array for next time
+ byteArrayOS.reset();
+ currentEntries++;
+ return true;
+ }
+
+ public void close() throws IOException {
+ if(zipOuputStream != null) zipOuputStream.close();
+ }
+ }
-
/**
* The zip files containing the document collection.
*/
protected List<CollectionFile> collectionFiles = null;
+ protected CollectionFileWriter collectionFileWriter;
+
+
private static Logger logger = Logger.getLogger(DocumentCollection.class);
/**
@@ -252,43 +339,15 @@
*/
public static final int ZIP_FILE_MAX_ENTRIES = 65530;
- /**
- * The zip file managed by this collection.
- */
- protected ZipOutputStream zipOuputStream;
+
/**
- * The zip file to which we are currently writing.
+ * The ID for the next document to be written in this collection. This value
+ * is initialised to 0 and then is automatically incremented whenever a new
+ * document is written.
*/
- protected File zipFile;
-
- /**
- * The number of entries written so far to the current zip file.
- */
- protected int currentEntries;
-
- /**
- * The amount of bytes written so far to the current zip file.
- */
- protected long currentLength;
-
- /**
- * A {@link ByteArrayOutputStream} used to temporarily store serialised
- * document data objects.
- */
- protected ByteArrayOutputStream byteArrayOS;
-
- /**
- * The ID for the next document to be written. This value is initialised to 0
- * and then is automatically incremented whenever a new document is written.
- */
protected long nextDocumentId;
-
- /**
- * The unique ID of the current zip file (the file open for writing).
- */
- protected int zipFileId;
/**
* Opens a zip file and creates a DocumentCollection object for accessing
the
@@ -322,10 +381,8 @@
documentCache = new Long2ObjectLinkedOpenHashMap<DocumentData>();
// prepare for writing
- byteArrayOS = new ByteArrayOutputStream();
nextDocumentId = collectionFiles.isEmpty() ? 0 :
(collectionFiles.get(collectionFiles.size() - 1).lastEntry + 1);
- zipFileId = collectionFiles.size();
inputBuffer = new Long2ObjectLinkedOpenHashMap<DocumentData>();
}
@@ -378,41 +435,31 @@
* collection file(s).
*/
public void writeDocument(DocumentData document) throws IndexException{
- if(zipFile == null) openZipFile();
+ if(collectionFileWriter == null) openCollectionWriter();
+
try{
- //write the new document to the byte array
- ObjectOutputStream objectOutStream = new ObjectOutputStream(byteArrayOS);
- objectOutStream.writeObject(document);
- objectOutStream.close();
-
- //see if we're about to go over the limits
- if(currentEntries >= ZIP_FILE_MAX_ENTRIES ||
- currentLength + byteArrayOS.size() >= ZIP_FILE_MAX_SIZE ||
- inputBuffer.size() >= INPUT_BUFFER_SIZE) {
- //move to the next zip file
- closeZipFile();
- // open the newly-closed zip file in read mode
- collectionFiles.add(new CollectionFile(zipFile));
- zipFileId++;
- openZipFile();
+ boolean success = false;
+ while(!success) {
+ success = collectionFileWriter.writeDocumentData(
+ Long.toString(nextDocumentId), document);
+ if(!success) {
+ // the current collection file is full: close it
+ collectionFileWriter.close();
+ synchronized(collectionFiles) {
+ // open the newly saved zip file
+ collectionFiles.add(new
CollectionFile(collectionFileWriter.zipFile));
+ inputBuffer.clear();
+ }
+ // open a new one and try again
+ openCollectionWriter();
+ }
}
-
- // create a new entry in the current zip file
- ZipEntry entry = new ZipEntry(Long.toString(nextDocumentId++));
- zipOuputStream.putNextEntry(entry);
- //write the data
- byteArrayOS.writeTo(zipOuputStream);
- zipOuputStream.closeEntry();
- currentLength += entry.getCompressedSize();
-
- //clean up the byte array for next time
- byteArrayOS.reset();
- currentEntries++;
} catch(IOException e){
throw new IndexException("Problem while accessing the collection file",
e);
} finally {
// save the document data to the input buffer
inputBuffer.put(nextDocumentId, document);
+ nextDocumentId++;
}
}
@@ -422,33 +469,19 @@
* @throws IndexException if the collection zip file already exists, or
cannot
* be opened for writing.
*/
- protected void openZipFile() throws IndexException{
- zipFile = new File(indexDirectory,
- CollectionFile.getCollectionFileName(Integer.toString(zipFileId)));
- if(zipFile.exists()) throw new IndexException("Collection zip file (" +
- zipFile.getAbsolutePath() + ") already exists!");
-
- try {
- zipOuputStream = new ZipOutputStream(new BufferedOutputStream(
- new FileOutputStream(zipFile)));
- } catch(FileNotFoundException e) {
- throw new IndexException("Cannot write to collection zip file (" +
- zipFile.getAbsolutePath() + ")", e);
+ protected void openCollectionWriter() throws IndexException{
+ int zipFileNumber = 0;
+ synchronized(collectionFiles) {
+ zipFileNumber = collectionFiles.isEmpty() ? 0 :
+ collectionFiles.get(collectionFiles.size() - 1).collectionFileNumber +
1;
}
- currentEntries = 0;
- currentLength = 0;
- inputBuffer.clear();
+ collectionFileWriter = new CollectionFileWriter(
+ new File(indexDirectory,
+ CollectionFile.getCollectionFileName(
+ Integer.toString(zipFileNumber))));
}
/**
- * Closes the current zip file.
- * @throws IOException
- */
- protected void closeZipFile() throws IOException{
- if(zipOuputStream != null) zipOuputStream.close();
- }
-
- /**
* Close this document collection and release all allocated resources (such
* as open file handles).
* @throws IOException
@@ -456,7 +489,7 @@
*/
public void close() throws IOException {
// close the writer
- closeZipFile();
+ collectionFileWriter.close();
// close the reader
closed = true;
if(collectionFiles != null){
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
CenturyLink Cloud: The Leader in Enterprise Cloud Services.
Learn Why More Businesses Are Choosing CenturyLink Cloud For
Critical Workloads, Development Environments & Everything In Between.
Get a Quote or Start a Free Trial Today.
http://pubads.g.doubleclick.net/gampad/clk?id=119420431&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs