| // string-weight.h |
| |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Copyright 2005-2010 Google, Inc. |
| // Author: riley@google.com (Michael Riley) |
| // |
| // \file |
| // String weight set and associated semiring operation definitions. |
| |
| #ifndef FST_LIB_STRING_WEIGHT_H__ |
| #define FST_LIB_STRING_WEIGHT_H__ |
| |
| #include <list> |
| #include <string> |
| |
| #include <fst/product-weight.h> |
| #include <fst/weight.h> |
| |
| namespace fst { |
| |
| const int kStringInfinity = -1; // Label for the infinite string |
| const int kStringBad = -2; // Label for a non-string |
| const char kStringSeparator = '_'; // Label separator in strings |
| |
| // Determines whether to use left or right string semiring. Includes |
| // restricted versions that signal an error if proper prefixes |
| // (suffixes) would otherwise be returned by Plus, useful with various |
| // algorithms that require functional transducer input with the |
| // string semirings. |
| enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 , |
| STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT }; |
| |
| #define REVERSE_STRING_TYPE(S) \ |
| ((S) == STRING_LEFT ? STRING_RIGHT : \ |
| ((S) == STRING_RIGHT ? STRING_LEFT : \ |
| ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT : \ |
| STRING_LEFT_RESTRICT))) |
| |
| template <typename L, StringType S = STRING_LEFT> |
| class StringWeight; |
| |
| template <typename L, StringType S = STRING_LEFT> |
| class StringWeightIterator; |
| |
| template <typename L, StringType S = STRING_LEFT> |
| class StringWeightReverseIterator; |
| |
| template <typename L, StringType S> |
| bool operator==(const StringWeight<L, S> &, const StringWeight<L, S> &); |
| |
| |
| // String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon) |
| template <typename L, StringType S> |
| class StringWeight { |
| public: |
| typedef L Label; |
| typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight; |
| |
| friend class StringWeightIterator<L, S>; |
| friend class StringWeightReverseIterator<L, S>; |
| friend bool operator==<>(const StringWeight<L, S> &, |
| const StringWeight<L, S> &); |
| |
| StringWeight() { Init(); } |
| |
| template <typename Iter> |
| StringWeight(const Iter &begin, const Iter &end) { |
| Init(); |
| for (Iter iter = begin; iter != end; ++iter) |
| PushBack(*iter); |
| } |
| |
| explicit StringWeight(L l) { Init(); PushBack(l); } |
| |
| static const StringWeight<L, S> &Zero() { |
| static const StringWeight<L, S> zero(kStringInfinity); |
| return zero; |
| } |
| |
| static const StringWeight<L, S> &One() { |
| static const StringWeight<L, S> one; |
| return one; |
| } |
| |
| static const StringWeight<L, S> &NoWeight() { |
| static const StringWeight<L, S> no_weight(kStringBad); |
| return no_weight; |
| } |
| |
| static const string &Type() { |
| static const string type = |
| S == STRING_LEFT ? "string" : |
| (S == STRING_RIGHT ? "right_string" : |
| (S == STRING_LEFT_RESTRICT ? "restricted_string" : |
| "right_restricted_string")); |
| return type; |
| } |
| |
| bool Member() const; |
| |
| istream &Read(istream &strm); |
| |
| ostream &Write(ostream &strm) const; |
| |
| size_t Hash() const; |
| |
| StringWeight<L, S> Quantize(float delta = kDelta) const { |
| return *this; |
| } |
| |
| ReverseWeight Reverse() const; |
| |
| static uint64 Properties() { |
| return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ? |
| kLeftSemiring : kRightSemiring) | kIdempotent; |
| } |
| |
| // NB: This needs to be uncommented only if default fails for this impl. |
| // StringWeight<L, S> &operator=(const StringWeight<L, S> &w); |
| |
| // These operations combined with the StringWeightIterator and |
| // StringWeightReverseIterator provide the access and mutation of |
| // the string internal elements. |
| |
| // Common initializer among constructors. |
| void Init() { first_ = 0; } |
| |
| // Clear existing StringWeight. |
| void Clear() { first_ = 0; rest_.clear(); } |
| |
| size_t Size() const { return first_ ? rest_.size() + 1 : 0; } |
| |
| void PushFront(L l) { |
| if (first_) |
| rest_.push_front(first_); |
| first_ = l; |
| } |
| |
| void PushBack(L l) { |
| if (!first_) |
| first_ = l; |
| else |
| rest_.push_back(l); |
| } |
| |
| private: |
| L first_; // first label in string (0 if empty) |
| list<L> rest_; // remaining labels in string |
| }; |
| |
| |
| // Traverses string in forward direction. |
| template <typename L, StringType S> |
| class StringWeightIterator { |
| public: |
| explicit StringWeightIterator(const StringWeight<L, S>& w) |
| : first_(w.first_), rest_(w.rest_), init_(true), |
| iter_(rest_.begin()) {} |
| |
| bool Done() const { |
| if (init_) return first_ == 0; |
| else return iter_ == rest_.end(); |
| } |
| |
| const L& Value() const { return init_ ? first_ : *iter_; } |
| |
| void Next() { |
| if (init_) init_ = false; |
| else ++iter_; |
| } |
| |
| void Reset() { |
| init_ = true; |
| iter_ = rest_.begin(); |
| } |
| |
| private: |
| const L &first_; |
| const list<L> &rest_; |
| bool init_; // in the initialized state? |
| typename list<L>::const_iterator iter_; |
| |
| DISALLOW_COPY_AND_ASSIGN(StringWeightIterator); |
| }; |
| |
| |
| // Traverses string in backward direction. |
| template <typename L, StringType S> |
| class StringWeightReverseIterator { |
| public: |
| explicit StringWeightReverseIterator(const StringWeight<L, S>& w) |
| : first_(w.first_), rest_(w.rest_), fin_(first_ == 0), |
| iter_(rest_.rbegin()) {} |
| |
| bool Done() const { return fin_; } |
| |
| const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; } |
| |
| void Next() { |
| if (iter_ == rest_.rend()) fin_ = true; |
| else ++iter_; |
| } |
| |
| void Reset() { |
| fin_ = false; |
| iter_ = rest_.rbegin(); |
| } |
| |
| private: |
| const L &first_; |
| const list<L> &rest_; |
| bool fin_; // in the final state? |
| typename list<L>::const_reverse_iterator iter_; |
| |
| DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator); |
| }; |
| |
| |
| // StringWeight member functions follow that require |
| // StringWeightIterator or StringWeightReverseIterator. |
| |
| template <typename L, StringType S> |
| inline istream &StringWeight<L, S>::Read(istream &strm) { |
| Clear(); |
| int32 size; |
| ReadType(strm, &size); |
| for (int i = 0; i < size; ++i) { |
| L label; |
| ReadType(strm, &label); |
| PushBack(label); |
| } |
| return strm; |
| } |
| |
| template <typename L, StringType S> |
| inline ostream &StringWeight<L, S>::Write(ostream &strm) const { |
| int32 size = Size(); |
| WriteType(strm, size); |
| for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) { |
| L label = iter.Value(); |
| WriteType(strm, label); |
| } |
| return strm; |
| } |
| |
| template <typename L, StringType S> |
| inline bool StringWeight<L, S>::Member() const { |
| if (Size() != 1) |
| return true; |
| StringWeightIterator<L, S> iter(*this); |
| return iter.Value() != kStringBad; |
| } |
| |
| template <typename L, StringType S> |
| inline typename StringWeight<L, S>::ReverseWeight |
| StringWeight<L, S>::Reverse() const { |
| ReverseWeight rw; |
| for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) |
| rw.PushFront(iter.Value()); |
| return rw; |
| } |
| |
| template <typename L, StringType S> |
| inline size_t StringWeight<L, S>::Hash() const { |
| size_t h = 0; |
| for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) |
| h ^= h<<1 ^ iter.Value(); |
| return h; |
| } |
| |
| // NB: This needs to be uncommented only if default fails for this the impl. |
| // |
| // template <typename L, StringType S> |
| // inline StringWeight<L, S> |
| // &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) { |
| // if (this != &w) { |
| // Clear(); |
| // for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next()) |
| // PushBack(iter.Value()); |
| // } |
| // return *this; |
| // } |
| |
| template <typename L, StringType S> |
| inline bool operator==(const StringWeight<L, S> &w1, |
| const StringWeight<L, S> &w2) { |
| if (w1.Size() != w2.Size()) |
| return false; |
| |
| StringWeightIterator<L, S> iter1(w1); |
| StringWeightIterator<L, S> iter2(w2); |
| |
| for (; !iter1.Done() ; iter1.Next(), iter2.Next()) |
| if (iter1.Value() != iter2.Value()) |
| return false; |
| |
| return true; |
| } |
| |
| template <typename L, StringType S> |
| inline bool operator!=(const StringWeight<L, S> &w1, |
| const StringWeight<L, S> &w2) { |
| return !(w1 == w2); |
| } |
| |
| template <typename L, StringType S> |
| inline bool ApproxEqual(const StringWeight<L, S> &w1, |
| const StringWeight<L, S> &w2, |
| float delta = kDelta) { |
| return w1 == w2; |
| } |
| |
| template <typename L, StringType S> |
| inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) { |
| StringWeightIterator<L, S> iter(w); |
| if (iter.Done()) |
| return strm << "Epsilon"; |
| else if (iter.Value() == kStringInfinity) |
| return strm << "Infinity"; |
| else if (iter.Value() == kStringBad) |
| return strm << "BadString"; |
| else |
| for (size_t i = 0; !iter.Done(); ++i, iter.Next()) { |
| if (i > 0) |
| strm << kStringSeparator; |
| strm << iter.Value(); |
| } |
| return strm; |
| } |
| |
| template <typename L, StringType S> |
| inline istream &operator>>(istream &strm, StringWeight<L, S> &w) { |
| string s; |
| strm >> s; |
| if (s == "Infinity") { |
| w = StringWeight<L, S>::Zero(); |
| } else if (s == "Epsilon") { |
| w = StringWeight<L, S>::One(); |
| } else { |
| w.Clear(); |
| char *p = 0; |
| for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) { |
| int l = strtoll(cs, &p, 10); |
| if (p == cs || (*p != 0 && *p != kStringSeparator)) { |
| strm.clear(std::ios::badbit); |
| break; |
| } |
| w.PushBack(l); |
| } |
| } |
| return strm; |
| } |
| |
| |
| // Default is for the restricted left and right semirings. String |
| // equality is required (for non-Zero() input. This restriction |
| // is used in e.g. Determinize to ensure functional input. |
| template <typename L, StringType S> inline StringWeight<L, S> |
| Plus(const StringWeight<L, S> &w1, |
| const StringWeight<L, S> &w2) { |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, S>::NoWeight(); |
| if (w1 == StringWeight<L, S>::Zero()) |
| return w2; |
| if (w2 == StringWeight<L, S>::Zero()) |
| return w1; |
| |
| if (w1 != w2) { |
| FSTERROR() << "StringWeight::Plus: unequal arguments " |
| << "(non-functional FST?)" |
| << " w1 = " << w1 |
| << " w2 = " << w2; |
| return StringWeight<L, S>::NoWeight(); |
| } |
| |
| return w1; |
| } |
| |
| |
| // Longest common prefix for left string semiring. |
| template <typename L> inline StringWeight<L, STRING_LEFT> |
| Plus(const StringWeight<L, STRING_LEFT> &w1, |
| const StringWeight<L, STRING_LEFT> &w2) { |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, STRING_LEFT>::NoWeight(); |
| if (w1 == StringWeight<L, STRING_LEFT>::Zero()) |
| return w2; |
| if (w2 == StringWeight<L, STRING_LEFT>::Zero()) |
| return w1; |
| |
| StringWeight<L, STRING_LEFT> sum; |
| StringWeightIterator<L, STRING_LEFT> iter1(w1); |
| StringWeightIterator<L, STRING_LEFT> iter2(w2); |
| for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value(); |
| iter1.Next(), iter2.Next()) |
| sum.PushBack(iter1.Value()); |
| return sum; |
| } |
| |
| |
| // Longest common suffix for right string semiring. |
| template <typename L> inline StringWeight<L, STRING_RIGHT> |
| Plus(const StringWeight<L, STRING_RIGHT> &w1, |
| const StringWeight<L, STRING_RIGHT> &w2) { |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, STRING_RIGHT>::NoWeight(); |
| if (w1 == StringWeight<L, STRING_RIGHT>::Zero()) |
| return w2; |
| if (w2 == StringWeight<L, STRING_RIGHT>::Zero()) |
| return w1; |
| |
| StringWeight<L, STRING_RIGHT> sum; |
| StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1); |
| StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2); |
| for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value(); |
| iter1.Next(), iter2.Next()) |
| sum.PushFront(iter1.Value()); |
| return sum; |
| } |
| |
| |
| template <typename L, StringType S> |
| inline StringWeight<L, S> Times(const StringWeight<L, S> &w1, |
| const StringWeight<L, S> &w2) { |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, S>::NoWeight(); |
| if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero()) |
| return StringWeight<L, S>::Zero(); |
| |
| StringWeight<L, S> prod(w1); |
| for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next()) |
| prod.PushBack(iter.Value()); |
| |
| return prod; |
| } |
| |
| |
| // Default is for left division in the left string and the |
| // left restricted string semirings. |
| template <typename L, StringType S> inline StringWeight<L, S> |
| Divide(const StringWeight<L, S> &w1, |
| const StringWeight<L, S> &w2, |
| DivideType typ) { |
| |
| if (typ != DIVIDE_LEFT) { |
| FSTERROR() << "StringWeight::Divide: only left division is defined " |
| << "for the " << StringWeight<L, S>::Type() << " semiring"; |
| return StringWeight<L, S>::NoWeight(); |
| } |
| |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, S>::NoWeight(); |
| |
| if (w2 == StringWeight<L, S>::Zero()) |
| return StringWeight<L, S>(kStringBad); |
| else if (w1 == StringWeight<L, S>::Zero()) |
| return StringWeight<L, S>::Zero(); |
| |
| StringWeight<L, S> div; |
| StringWeightIterator<L, S> iter(w1); |
| for (int i = 0; !iter.Done(); iter.Next(), ++i) { |
| if (i >= w2.Size()) |
| div.PushBack(iter.Value()); |
| } |
| return div; |
| } |
| |
| |
| // Right division in the right string semiring. |
| template <typename L> inline StringWeight<L, STRING_RIGHT> |
| Divide(const StringWeight<L, STRING_RIGHT> &w1, |
| const StringWeight<L, STRING_RIGHT> &w2, |
| DivideType typ) { |
| |
| if (typ != DIVIDE_RIGHT) { |
| FSTERROR() << "StringWeight::Divide: only right division is defined " |
| << "for the right string semiring"; |
| return StringWeight<L, STRING_RIGHT>::NoWeight(); |
| } |
| |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, STRING_RIGHT>::NoWeight(); |
| |
| if (w2 == StringWeight<L, STRING_RIGHT>::Zero()) |
| return StringWeight<L, STRING_RIGHT>(kStringBad); |
| else if (w1 == StringWeight<L, STRING_RIGHT>::Zero()) |
| return StringWeight<L, STRING_RIGHT>::Zero(); |
| |
| StringWeight<L, STRING_RIGHT> div; |
| StringWeightReverseIterator<L, STRING_RIGHT> iter(w1); |
| for (int i = 0; !iter.Done(); iter.Next(), ++i) { |
| if (i >= w2.Size()) |
| div.PushFront(iter.Value()); |
| } |
| return div; |
| } |
| |
| |
| // Right division in the right restricted string semiring. |
| template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT> |
| Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1, |
| const StringWeight<L, STRING_RIGHT_RESTRICT> &w2, |
| DivideType typ) { |
| |
| if (typ != DIVIDE_RIGHT) { |
| FSTERROR() << "StringWeight::Divide: only right division is defined " |
| << "for the right restricted string semiring"; |
| return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight(); |
| } |
| |
| if (!w1.Member() || !w2.Member()) |
| return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight(); |
| |
| if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero()) |
| return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad); |
| else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero()) |
| return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero(); |
| |
| StringWeight<L, STRING_RIGHT_RESTRICT> div; |
| StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1); |
| for (int i = 0; !iter.Done(); iter.Next(), ++i) { |
| if (i >= w2.Size()) |
| div.PushFront(iter.Value()); |
| } |
| return div; |
| } |
| |
| |
| // Product of string weight and an arbitray weight. |
| template <class L, class W, StringType S = STRING_LEFT> |
| struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> { |
| typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)> |
| ReverseWeight; |
| |
| GallicWeight() {} |
| |
| GallicWeight(StringWeight<L, S> w1, W w2) |
| : ProductWeight<StringWeight<L, S>, W>(w1, w2) {} |
| |
| explicit GallicWeight(const string &s, int *nread = 0) |
| : ProductWeight<StringWeight<L, S>, W>(s, nread) {} |
| |
| GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w) |
| : ProductWeight<StringWeight<L, S>, W>(w) {} |
| }; |
| |
| } // namespace fst |
| |
| #endif // FST_LIB_STRING_WEIGHT_H__ |