aboutsummaryrefslogtreecommitdiff
path: root/src/include/fst/extensions/far/far.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/fst/extensions/far/far.h')
-rw-r--r--src/include/fst/extensions/far/far.h184
1 files changed, 178 insertions, 6 deletions
diff --git a/src/include/fst/extensions/far/far.h b/src/include/fst/extensions/far/far.h
index 82b9e5c..acce76e 100644
--- a/src/include/fst/extensions/far/far.h
+++ b/src/include/fst/extensions/far/far.h
@@ -32,6 +32,13 @@ namespace fst {
enum FarEntryType { FET_LINE, FET_FILE };
enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
+inline bool IsFst(const string &filename) {
+ ifstream strm(filename.c_str());
+ if (!strm)
+ return false;
+ return IsFstHeader(strm, filename);
+}
+
// FST archive header class
class FarHeader {
public:
@@ -40,8 +47,11 @@ class FarHeader {
bool Read(const string &filename) {
FstHeader fsthdr;
- if (filename.empty()) { // Header reading unsupported on stdin.
- return false;
+ if (filename.empty()) {
+ // Header reading unsupported on stdin. Assumes STList and StdArc.
+ fartype_ = "stlist";
+ arctype_ = "standard";
+ return true;
} else if (IsSTTable(filename)) { // Check if STTable
ReadSTTableHeader(filename, &fsthdr);
fartype_ = "sttable";
@@ -52,6 +62,12 @@ class FarHeader {
fartype_ = "sttable";
arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
return true;
+ } else if (IsFst(filename)) { // Check if Fst
+ ifstream istrm(filename.c_str());
+ fsthdr.Read(istrm, filename);
+ fartype_ = "fst";
+ arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
+ return true;
}
return false;
}
@@ -61,8 +77,12 @@ class FarHeader {
string arctype_;
};
-enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
- FAR_SSTABLE = 3 };
+enum FarType {
+ FAR_DEFAULT = 0,
+ FAR_STTABLE = 1,
+ FAR_STLIST = 2,
+ FAR_FST = 3,
+};
// This class creates an archive of FSTs.
template <class A>
@@ -153,7 +173,7 @@ class STTableFarWriter : public FarWriter<A> {
public:
typedef A Arc;
- static STTableFarWriter *Create(const string filename) {
+ static STTableFarWriter *Create(const string &filename) {
STTableWriter<Fst<A>, FstWriter<A> > *writer =
STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STTableFarWriter(writer);
@@ -183,7 +203,7 @@ class STListFarWriter : public FarWriter<A> {
public:
typedef A Arc;
- static STListFarWriter *Create(const string filename) {
+ static STListFarWriter *Create(const string &filename) {
STListWriter<Fst<A>, FstWriter<A> > *writer =
STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
return new STListFarWriter(writer);
@@ -209,6 +229,43 @@ class STListFarWriter : public FarWriter<A> {
template <class A>
+class FstFarWriter : public FarWriter<A> {
+ public:
+ typedef A Arc;
+
+ explicit FstFarWriter(const string &filename)
+ : filename_(filename), error_(false), written_(false) {}
+
+ static FstFarWriter *Create(const string &filename) {
+ return new FstFarWriter(filename);
+ }
+
+ void Add(const string &key, const Fst<A> &fst) {
+ if (written_) {
+ LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
+ << " subsequent entries discarded.";
+ } else {
+ error_ = !fst.Write(filename_);
+ written_ = true;
+ }
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarWriter() {}
+
+ private:
+ string filename_;
+ bool error_;
+ bool written_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
+};
+
+
+template <class A>
FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
switch(type) {
case FAR_DEFAULT:
@@ -220,6 +277,9 @@ FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
case FAR_STLIST:
return STListFarWriter<A>::Create(filename);
break;
+ case FAR_FST:
+ return FstFarWriter<A>::Create(filename);
+ break;
default:
LOG(ERROR) << "FarWriter::Create: unknown far type";
return 0;
@@ -331,6 +391,114 @@ class STListFarReader : public FarReader<A> {
DISALLOW_COPY_AND_ASSIGN(STListFarReader);
};
+template <class A>
+class FstFarReader : public FarReader<A> {
+ public:
+ typedef A Arc;
+
+ static FstFarReader *Open(const string &filename) {
+ vector<string> filenames;
+ filenames.push_back(filename);
+ return new FstFarReader<A>(filenames);
+ }
+
+ static FstFarReader *Open(const vector<string> &filenames) {
+ return new FstFarReader<A>(filenames);
+ }
+
+ FstFarReader(const vector<string> &filenames)
+ : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
+ sort(keys_.begin(), keys_.end());
+ streams_.resize(keys_.size(), 0);
+ for (size_t i = 0; i < keys_.size(); ++i) {
+ if (keys_[i].empty()) {
+ if (!has_stdin_) {
+ streams_[i] = &cin;
+ //sources_[i] = "stdin";
+ has_stdin_ = true;
+ } else {
+ FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
+ << "appear once in the input file list.";
+ error_ = true;
+ return;
+ }
+ } else {
+ streams_[i] = new ifstream(
+ keys_[i].c_str(), ifstream::in | ifstream::binary);
+ }
+ }
+ if (pos_ >= keys_.size()) return;
+ ReadFst();
+ }
+
+ void Reset() {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
+ error_ = true;
+ return;
+ }
+ pos_ = 0;
+ ReadFst();
+ }
+
+ bool Find(const string &key) {
+ if (has_stdin_) {
+ FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
+ error_ = true;
+ return false;
+ }
+ pos_ = 0;//TODO
+ ReadFst();
+ return true;
+ }
+
+ bool Done() const { return error_ || pos_ >= keys_.size(); }
+
+ void Next() {
+ ++pos_;
+ ReadFst();
+ }
+
+ const string &GetKey() const {
+ return keys_[pos_];
+ }
+
+ const Fst<A> &GetFst() const {
+ return *fst_;
+ }
+
+ FarType Type() const { return FAR_FST; }
+
+ bool Error() const { return error_; }
+
+ ~FstFarReader() {
+ if (fst_) delete fst_;
+ for (size_t i = 0; i < keys_.size(); ++i)
+ delete streams_[i];
+ }
+
+ private:
+ void ReadFst() {
+ if (fst_) delete fst_;
+ if (pos_ >= keys_.size()) return;
+ streams_[pos_]->seekg(0);
+ fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
+ if (!fst_) {
+ FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
+ error_ = true;
+ }
+ }
+
+ private:
+ vector<string> keys_;
+ vector<istream*> streams_;
+ bool has_stdin_;
+ size_t pos_;
+ mutable Fst<A> *fst_;
+ mutable bool error_;
+
+ DISALLOW_COPY_AND_ASSIGN(FstFarReader);
+};
template <class A>
FarReader<A> *FarReader<A>::Open(const string &filename) {
@@ -340,6 +508,8 @@ FarReader<A> *FarReader<A>::Open(const string &filename) {
return STTableFarReader<A>::Open(filename);
else if (IsSTList(filename))
return STListFarReader<A>::Open(filename);
+ else if (IsFst(filename))
+ return FstFarReader<A>::Open(filename);
return 0;
}
@@ -352,6 +522,8 @@ FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
return STTableFarReader<A>::Open(filenames);
else if (!filenames.empty() && IsSTList(filenames[0]))
return STListFarReader<A>::Open(filenames);
+ else if (!filenames.empty() && IsFst(filenames[0]))
+ return FstFarReader<A>::Open(filenames);
return 0;
}