Repository: incubator-joshua
Updated Branches:
  refs/heads/master 76bb8fdcf -> 6da3961be


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/util/file_piece.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/util/file_piece.hh b/ext/kenlm/util/file_piece.hh
deleted file mode 100644
index d3d8305..0000000
--- a/ext/kenlm/util/file_piece.hh
+++ /dev/null
@@ -1,175 +0,0 @@
-#ifndef UTIL_FILE_PIECE_H
-#define UTIL_FILE_PIECE_H
-
-#include "util/ersatz_progress.hh"
-#include "util/exception.hh"
-#include "util/file.hh"
-#include "util/mmap.hh"
-#include "util/read_compressed.hh"
-#include "util/string_piece.hh"
-
-#include <cstddef>
-#include <iosfwd>
-#include <string>
-#include <cassert>
-#include <stdint.h>
-
-namespace util {
-
-class ParseNumberException : public Exception {
-  public:
-    explicit ParseNumberException(StringPiece value) throw();
-    ~ParseNumberException() throw() {}
-};
-
-extern const bool kSpaces[256];
-
-// Memory backing the returned StringPiece may vanish on the next call.
-class FilePiece {
-  public:
-    // 1 MB default.
-    explicit FilePiece(const char *file, std::ostream *show_progress = NULL, 
std::size_t min_buffer = 1048576);
-    // Takes ownership of fd.  name is used for messages.
-    explicit FilePiece(int fd, const char *name = NULL, std::ostream 
*show_progress = NULL, std::size_t min_buffer = 1048576);
-
-    /* Read from an istream.  Don't use this if you can avoid it.  Raw fd IO is
-     * much faster.  But sometimes you just have an istream like Boost's HTTP
-     * server and want to parse it the same way.
-     * name is just used for messages and FileName().
-     */
-    explicit FilePiece(std::istream &stream, const char *name = NULL, 
std::size_t min_buffer = 1048576);
-
-    ~FilePiece();
-
-    char get() {
-      if (position_ == position_end_) {
-        Shift();
-        if (at_end_) throw EndOfFileException();
-      }
-      return *(position_++);
-    }
-
-    // Leaves the delimiter, if any, to be returned by get().  Delimiters 
defined by isspace().
-    StringPiece ReadDelimited(const bool *delim = kSpaces) {
-      SkipSpaces(delim);
-      return Consume(FindDelimiterOrEOF(delim));
-    }
-
-    /// Read word until the line or file ends.
-    bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) {
-      assert(delim[static_cast<unsigned char>('\n')]);
-      // Skip non-enter spaces.
-      for (; ; ++position_) {
-        if (position_ == position_end_) {
-          try {
-            Shift();
-          } catch (const util::EndOfFileException &e) { return false; }
-          // And break out at end of file.
-          if (position_ == position_end_) return false;
-        }
-        if (!delim[static_cast<unsigned char>(*position_)]) break;
-        if (*position_ == '\n') return false;
-      }
-      // We can't be at the end of file because there's at least one character 
open.
-      to = Consume(FindDelimiterOrEOF(delim));
-      return true;
-    }
-
-    /** Read a line of text from the file.
-     *
-     * Unlike ReadDelimited, this includes leading spaces and consumes the
-     * delimiter.   It is similar to getline in that way.
-     *
-     * If strip_cr is true, any trailing carriate return (as would be found on
-     * a file written on Windows) will be left out of the returned line.
-     *
-     * Throws EndOfFileException if the end of the file is encountered.  If the
-     * file does not end in a newline, this could mean that the last line is
-     * never read.
-     */
-    StringPiece ReadLine(char delim = '\n', bool strip_cr = true);
-
-    /** Read a line of text from the file, or return false on EOF.
-     *
-     * This is like ReadLine, except it returns false where ReadLine throws
-     * EndOfFileException.  Like ReadLine it may not read the last line in the
-     * file if the file does not end in a newline.
-     *
-     * If strip_cr is true, any trailing carriate return (as would be found on
-     * a file written on Windows) will be left out of the returned line.
-     */
-    bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = 
true);
-
-    float ReadFloat();
-    double ReadDouble();
-    long int ReadLong();
-    unsigned long int ReadULong();
-
-    // Skip spaces defined by isspace.
-    void SkipSpaces(const bool *delim = kSpaces) {
-      assert(position_ <= position_end_);
-      for (; ; ++position_) {
-        if (position_ == position_end_) {
-          Shift();
-          // And break out at end of file.
-          if (position_ == position_end_) return;
-        }
-        assert(position_ < position_end_);
-        if (!delim[static_cast<unsigned char>(*position_)]) return;
-      }
-    }
-
-    uint64_t Offset() const {
-      return position_ - data_.begin() + mapped_offset_;
-    }
-
-    const std::string &FileName() const { return file_name_; }
-
-  private:
-    void InitializeNoRead(const char *name, std::size_t min_buffer);
-    // Calls InitializeNoRead, so don't call both.
-    void Initialize(const char *name, std::ostream *show_progress, std::size_t 
min_buffer);
-
-    template <class T> T ReadNumber();
-
-    StringPiece Consume(const char *to) {
-      assert(to >= position_);
-      StringPiece ret(position_, to - position_);
-      position_ = to;
-      return ret;
-    }
-
-    const char *FindDelimiterOrEOF(const bool *delim = kSpaces);
-
-    void Shift();
-    // Backends to Shift().
-    void MMapShift(uint64_t desired_begin);
-
-    void TransitionToRead();
-    void ReadShift();
-
-    const char *position_, *last_space_, *position_end_;
-
-    scoped_fd file_;
-    const uint64_t total_size_;
-    const uint64_t page_;
-
-    std::size_t default_map_size_;
-    uint64_t mapped_offset_;
-
-    // Order matters: file_ should always be destroyed after this.
-    scoped_memory data_;
-
-    bool at_end_;
-    bool fallback_to_read_;
-
-    ErsatzProgress progress_;
-
-    std::string file_name_;
-
-    ReadCompressed fell_back_;
-};
-
-} // namespace util
-
-#endif // UTIL_FILE_PIECE_H

Reply via email to