diff options
Diffstat (limited to 'src/include/fst/extensions/far/far.h')
-rw-r--r-- | src/include/fst/extensions/far/far.h | 184 |
1 files changed, 178 insertions, 6 deletions
diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h index 82b9e5c..acce76e 100644 --- a/src/include/fst/extensions/far/far.h +++ b/src/include/fst/extensions/far/far.h @@ -32,6 +32,13 @@ namespace fst { enum FarEntryType { FET_LINE, FET_FILE }; enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; +inline bool IsFst(const string &filename) { + ifstream strm(filename.c_str()); + if (!strm) + return false; + return IsFstHeader(strm, filename); +} + // FST archive header class class FarHeader { public: @@ -40,8 +47,11 @@ class FarHeader { bool Read(const string &filename) { FstHeader fsthdr; - if (filename.empty()) { // Header reading unsupported on stdin. - return false; + if (filename.empty()) { + // Header reading unsupported on stdin. Assumes STList and StdArc. + fartype_ = "stlist"; + arctype_ = "standard"; + return true; } else if (IsSTTable(filename)) { // Check if STTable ReadSTTableHeader(filename, &fsthdr); fartype_ = "sttable"; @@ -52,6 +62,12 @@ class FarHeader { fartype_ = "sttable"; arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); return true; + } else if (IsFst(filename)) { // Check if Fst + ifstream istrm(filename.c_str()); + fsthdr.Read(istrm, filename); + fartype_ = "fst"; + arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); + return true; } return false; } @@ -61,8 +77,12 @@ class FarHeader { string arctype_; }; -enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2, - FAR_SSTABLE = 3 }; +enum FarType { + FAR_DEFAULT = 0, + FAR_STTABLE = 1, + FAR_STLIST = 2, + FAR_FST = 3, +}; // This class creates an archive of FSTs. template <class A> @@ -153,7 +173,7 @@ class STTableFarWriter : public FarWriter<A> { public: typedef A Arc; - static STTableFarWriter *Create(const string filename) { + static STTableFarWriter *Create(const string &filename) { STTableWriter<Fst<A>, FstWriter<A> > *writer = STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); return new STTableFarWriter(writer); @@ -183,7 +203,7 @@ class STListFarWriter : public FarWriter<A> { public: typedef A Arc; - static STListFarWriter *Create(const string filename) { + static STListFarWriter *Create(const string &filename) { STListWriter<Fst<A>, FstWriter<A> > *writer = STListWriter<Fst<A>, FstWriter<A> >::Create(filename); return new STListFarWriter(writer); @@ -209,6 +229,43 @@ class STListFarWriter : public FarWriter<A> { template <class A> +class FstFarWriter : public FarWriter<A> { + public: + typedef A Arc; + + explicit FstFarWriter(const string &filename) + : filename_(filename), error_(false), written_(false) {} + + static FstFarWriter *Create(const string &filename) { + return new FstFarWriter(filename); + } + + void Add(const string &key, const Fst<A> &fst) { + if (written_) { + LOG(WARNING) << "FstFarWriter::Add: only one Fst supported," + << " subsequent entries discarded."; + } else { + error_ = !fst.Write(filename_); + written_ = true; + } + } + + FarType Type() const { return FAR_FST; } + + bool Error() const { return error_; } + + ~FstFarWriter() {} + + private: + string filename_; + bool error_; + bool written_; + + DISALLOW_COPY_AND_ASSIGN(FstFarWriter); +}; + + +template <class A> FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { switch(type) { case FAR_DEFAULT: @@ -220,6 +277,9 @@ FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { case FAR_STLIST: return STListFarWriter<A>::Create(filename); break; + case FAR_FST: + return FstFarWriter<A>::Create(filename); + break; default: LOG(ERROR) << "FarWriter::Create: unknown far type"; return 0; @@ -331,6 +391,114 @@ class STListFarReader : public FarReader<A> { DISALLOW_COPY_AND_ASSIGN(STListFarReader); }; +template <class A> +class FstFarReader : public FarReader<A> { + public: + typedef A Arc; + + static FstFarReader *Open(const string &filename) { + vector<string> filenames; + filenames.push_back(filename); + return new FstFarReader<A>(filenames); + } + + static FstFarReader *Open(const vector<string> &filenames) { + return new FstFarReader<A>(filenames); + } + + FstFarReader(const vector<string> &filenames) + : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) { + sort(keys_.begin(), keys_.end()); + streams_.resize(keys_.size(), 0); + for (size_t i = 0; i < keys_.size(); ++i) { + if (keys_[i].empty()) { + if (!has_stdin_) { + streams_[i] = &cin; + //sources_[i] = "stdin"; + has_stdin_ = true; + } else { + FSTERROR() << "FstFarReader::FstFarReader: stdin should only " + << "appear once in the input file list."; + error_ = true; + return; + } + } else { + streams_[i] = new ifstream( + keys_[i].c_str(), ifstream::in | ifstream::binary); + } + } + if (pos_ >= keys_.size()) return; + ReadFst(); + } + + void Reset() { + if (has_stdin_) { + FSTERROR() << "FstFarReader::Reset: operation not supported on stdin"; + error_ = true; + return; + } + pos_ = 0; + ReadFst(); + } + + bool Find(const string &key) { + if (has_stdin_) { + FSTERROR() << "FstFarReader::Find: operation not supported on stdin"; + error_ = true; + return false; + } + pos_ = 0;//TODO + ReadFst(); + return true; + } + + bool Done() const { return error_ || pos_ >= keys_.size(); } + + void Next() { + ++pos_; + ReadFst(); + } + + const string &GetKey() const { + return keys_[pos_]; + } + + const Fst<A> &GetFst() const { + return *fst_; + } + + FarType Type() const { return FAR_FST; } + + bool Error() const { return error_; } + + ~FstFarReader() { + if (fst_) delete fst_; + for (size_t i = 0; i < keys_.size(); ++i) + delete streams_[i]; + } + + private: + void ReadFst() { + if (fst_) delete fst_; + if (pos_ >= keys_.size()) return; + streams_[pos_]->seekg(0); + fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions()); + if (!fst_) { + FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_]; + error_ = true; + } + } + + private: + vector<string> keys_; + vector<istream*> streams_; + bool has_stdin_; + size_t pos_; + mutable Fst<A> *fst_; + mutable bool error_; + + DISALLOW_COPY_AND_ASSIGN(FstFarReader); +}; template <class A> FarReader<A> *FarReader<A>::Open(const string &filename) { @@ -340,6 +508,8 @@ FarReader<A> *FarReader<A>::Open(const string &filename) { return STTableFarReader<A>::Open(filename); else if (IsSTList(filename)) return STListFarReader<A>::Open(filename); + else if (IsFst(filename)) + return FstFarReader<A>::Open(filename); return 0; } @@ -352,6 +522,8 @@ FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { return STTableFarReader<A>::Open(filenames); else if (!filenames.empty() && IsSTList(filenames[0])) return STListFarReader<A>::Open(filenames); + else if (!filenames.empty() && IsFst(filenames[0])) + return FstFarReader<A>::Open(filenames); return 0; } |