Github user arunmahadevan commented on a diff in the pull request: https://github.com/apache/storm/pull/1044#discussion_r55965933 --- Diff: external/storm-hdfs/src/main/java/org/apache/storm/hdfs/bolt/AbstractHdfsBolt.java --- @@ -223,29 +259,57 @@ public final void execute(Tuple tuple) { public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) { } - /** - * writes a tuple to the underlying filesystem but makes no guarantees about syncing data. - * - * this.offset is also updated to reflect additional data written - * - * @param tuple - * @throws IOException - */ - abstract void writeTuple(Tuple tuple) throws IOException; + private void syncAllWriters() throws IOException { + for (AbstractHDFSWriter writer : writers.values()) { + writer.sync(); + } + } - /** - * Make the best effort to sync written data to the underlying file system. Concrete classes should very clearly - * state the file state that sync guarantees. For example, HdfsBolt can make a much stronger guarantee than - * SequenceFileBolt. - * - * @throws IOException - */ - abstract void syncTuples() throws IOException; + private String getKeyToOldestWriter() + { + String oldestKey = null; + long oldestTime = Long.MAX_VALUE; + for (final Map.Entry<String, AbstractHDFSWriter> entry : writers.entrySet()) { + if (entry.getValue().getLastUsedTime() < oldestTime) { + oldestKey = entry.getKey(); + oldestTime = entry.getValue().getLastUsedTime(); + } + } - abstract void closeOutputFile() throws IOException; + return oldestKey; + } - abstract Path createOutputFile() throws IOException; + private void startTimedRotationPolicy() { + long interval = ((TimedRotationPolicy)this.rotationPolicy).getInterval(); + this.rotationTimer = new Timer(true); + TimerTask task = new TimerTask() { + @Override + public void run() { + for (final AbstractHDFSWriter writer : writers.values()) { + try { + rotateOutputFile(writer); + } catch (IOException e) { + LOG.warn("IOException during scheduled file rotation.", e); + } + } + writers.clear(); + } + }; + this.rotationTimer.scheduleAtFixedRate(task, interval, interval); + } + + protected Path getBasePathForNextFile(Tuple tuple) { + + Path fullPathToFile = new Path(this.fsUrl + this.fileNameFormat.getPath() + this.partitioner.getPartitionPath(tuple), + this.fileNameFormat.getName(this.rotation, System.currentTimeMillis())); + + return fullPathToFile; + } abstract void doPrepare(Map conf, TopologyContext topologyContext, OutputCollector collector) throws IOException; + abstract String getWriterKey(Tuple tuple); --- End diff -- this method is not required if partition path is used as the key.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---