diff -crB openfst-1.3.4.orig/src/include/fst/interval-set.h openfst-1.3.4/src/include/fst/interval-set.h *** openfst-1.3.4.orig/src/include/fst/interval-set.h 2013-01-24 06:36:23.000000000 +0400 --- openfst-1.3.4/src/include/fst/interval-set.h 2013-11-21 15:59:08.541136087 +0400 *************** *** 37,74 **** class IntervalSet { public: struct Interval { ! T begin; ! T end; ! Interval() : begin(-1), end(-1) {} ! Interval(T b, T e) : begin(b), end(e) {} bool operator<(const Interval &i) const { ! return begin < i.begin || (begin == i.begin && end > i.end); } bool operator==(const Interval &i) const { ! return begin == i.begin && end == i.end; } bool operator!=(const Interval &i) const { ! return begin != i.begin || end != i.end; } istream &Read(istream &strm) { T n; ReadType(strm, &n); ! begin = n; ReadType(strm, &n); ! end = n; return strm; } ostream &Write(ostream &strm) const { ! T n = begin; WriteType(strm, n); ! n = end; WriteType(strm, n); return strm; } --- 37,74 ---- class IntervalSet { public: struct Interval { ! T begin_; ! T end_; ! Interval() : begin_(-1), end_(-1) {} ! Interval(T b, T e) : begin_(b), end_(e) {} bool operator<(const Interval &i) const { ! return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); } bool operator==(const Interval &i) const { ! return begin_ == i.begin_ && end_ == i.end_; } bool operator!=(const Interval &i) const { ! return begin_ != i.begin_ || end_ != i.end_; } istream &Read(istream &strm) { T n; ReadType(strm, &n); ! begin_ = n; ReadType(strm, &n); ! end_ = n; return strm; } ostream &Write(ostream &strm) const { ! T n = begin_; WriteType(strm, n); ! n = end_; WriteType(strm, n); return strm; } *************** *** 108,114 **** lower_bound(intervals_.begin(), intervals_.end(), interval); if (lb == intervals_.begin()) return false; ! return (--lb)->end > value; } // Requires intervals be normalized. --- 108,114 ---- lower_bound(intervals_.begin(), intervals_.end(), interval); if (lb == intervals_.begin()) return false; ! return (--lb)->end_ > value; } // Requires intervals be normalized. *************** *** 123,129 **** bool Singleton() const { return intervals_.size() == 1 && ! intervals_[0].begin + 1 == intervals_[0].end; } --- 123,129 ---- bool Singleton() const { return intervals_.size() == 1 && ! intervals_[0].begin_ + 1 == intervals_[0].end_; } *************** *** 178,194 **** T size = 0; for (T i = 0; i < intervals_.size(); ++i) { Interval &inti = intervals_[i]; ! if (inti.begin == inti.end) continue; for (T j = i + 1; j < intervals_.size(); ++j) { Interval &intj = intervals_[j]; ! if (intj.begin > inti.end) break; ! if (intj.end > inti.end) ! inti.end = intj.end; ++i; } ! count_ += inti.end - inti.begin; intervals_[size++] = inti; } intervals_.resize(size); --- 178,194 ---- T size = 0; for (T i = 0; i < intervals_.size(); ++i) { Interval &inti = intervals_[i]; ! if (inti.begin_ == inti.end_) continue; for (T j = i + 1; j < intervals_.size(); ++j) { Interval &intj = intervals_[j]; ! if (intj.begin_ > inti.end_) break; ! if (intj.end_ > inti.end_) ! inti.end_ = intj.end_; ++i; } ! count_ += inti.end_ - inti.begin_; intervals_[size++] = inti; } intervals_.resize(size); *************** *** 208,224 **** oset->count_ = 0; while (it1 != intervals_.end() && it2 != iintervals->end()) { ! if (it1->end <= it2->begin) { ++it1; ! } else if (it2->end <= it1->begin) { ++it2; } else { Interval interval; ! interval.begin = max(it1->begin, it2->begin); ! interval.end = min(it1->end, it2->end); ointervals->push_back(interval); ! oset->count_ += interval.end - interval.begin; ! if (it1->end < it2->end) ++it1; else ++it2; --- 208,224 ---- oset->count_ = 0; while (it1 != intervals_.end() && it2 != iintervals->end()) { ! if (it1->end_ <= it2->begin_) { ++it1; ! } else if (it2->end_ <= it1->begin_) { ++it2; } else { Interval interval; ! interval.begin_ = max(it1->begin_, it2->begin_); ! interval.end_ = min(it1->end_, it2->end_); ointervals->push_back(interval); ! oset->count_ += interval.end_ - interval.begin_; ! if (it1->end_ < it2->end_) ++it1; else ++it2; *************** *** 235,255 **** oset->count_ = 0; Interval interval; ! interval.begin = 0; for (typename vector::const_iterator it = intervals_.begin(); it != intervals_.end(); ++it) { ! interval.end = min(it->begin, maxval); ! if (interval.begin < interval.end) { ointervals->push_back(interval); ! oset->count_ += interval.end - interval.begin; } ! interval.begin = it->end; } ! interval.end = maxval; ! if (interval.begin < interval.end) { ointervals->push_back(interval); ! oset->count_ += interval.end - interval.begin; } } --- 235,255 ---- oset->count_ = 0; Interval interval; ! interval.begin_ = 0; for (typename vector::const_iterator it = intervals_.begin(); it != intervals_.end(); ++it) { ! interval.end_ = min(it->begin_, maxval); ! if (interval.begin_ < interval.end_) { ointervals->push_back(interval); ! oset->count_ += interval.end_ - interval.begin_; } ! interval.begin_ = it->end_; } ! interval.end_ = maxval; ! if (interval.begin_ < interval.end_) { ointervals->push_back(interval); ! oset->count_ += interval.end_ - interval.begin_; } } *************** *** 263,269 **** oset->count_ = 0; } else { IntervalSet cset; ! iset.Complement(intervals_.back().end, &cset); Intersect(cset, oset); } } --- 263,269 ---- oset->count_ = 0; } else { IntervalSet cset; ! iset.Complement(intervals_.back().end_, &cset); Intersect(cset, oset); } } *************** *** 277,285 **** typename vector::const_iterator it2 = intervals->begin(); while (it1 != intervals_.end() && it2 != intervals->end()) { ! if (it1->end <= it2->begin) { ++it1; ! } else if (it2->end <= it1->begin) { ++it2; } else { return true; --- 277,285 ---- typename vector::const_iterator it2 = intervals->begin(); while (it1 != intervals_.end() && it2 != intervals->end()) { ! if (it1->end_ <= it2->begin_) { ++it1; ! } else if (it2->end_ <= it1->begin_) { ++it2; } else { return true; *************** *** 300,320 **** bool overlap = false; // point in both intervals_ and intervals while (it1 != intervals_.end() && it2 != intervals->end()) { ! if (it1->end <= it2->begin) { // no overlap - it1 first only1 = true; ++it1; ! } else if (it2->end <= it1->begin) { // no overlap - it2 first only2 = true; ++it2; ! } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals overlap = true; ++it1; ++it2; ! } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 only2 = true; overlap = true; ++it1; ! } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 only1 = true; overlap = true; ++it2; --- 300,320 ---- bool overlap = false; // point in both intervals_ and intervals while (it1 != intervals_.end() && it2 != intervals->end()) { ! if (it1->end_ <= it2->begin_) { // no overlap - it1 first only1 = true; ++it1; ! } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first only2 = true; ++it2; ! } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals overlap = true; ++it1; ++it2; ! } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 only2 = true; overlap = true; ++it1; ! } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 only1 = true; overlap = true; ++it2; *************** *** 346,356 **** typename vector::const_iterator it2 = intervals->begin(); while (it1 != intervals_.end() && it2 != intervals->end()) { ! if (it1->end <= it2->begin) { // no overlap - it1 first ++it1; ! } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C return false; ! } else if (it2->end == it1->end) { ++it1; ++it2; } else { --- 346,356 ---- typename vector::const_iterator it2 = intervals->begin(); while (it1 != intervals_.end() && it2 != intervals->end()) { ! if (it1->end_ <= it2->begin_) { // no overlap - it1 first ++it1; ! } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C return false; ! } else if (it2->end_ == it1->end_) { ++it1; ++it2; } else { *************** *** 370,376 **** ++it) { if (it != intervals->begin()) strm << ","; ! strm << "[" << it->begin << "," << it->end << ")"; } strm << "}"; return strm; --- 370,376 ---- ++it) { if (it != intervals->begin()) strm << ","; ! strm << "[" << it->begin_ << "," << it->end_ << ")"; } strm << "}"; return strm; diff -crB openfst-1.3.4.orig/src/include/fst/minimize.h openfst-1.3.4/src/include/fst/minimize.h *** openfst-1.3.4.orig/src/include/fst/minimize.h 2013-01-24 06:36:23.000000000 +0400 --- openfst-1.3.4/src/include/fst/minimize.h 2013-11-21 15:59:08.539136087 +0400 *************** *** 134,140 **** typedef typename A::Weight Weight; typedef ReverseArc RevA; ! CyclicMinimizer(const ExpandedFst& fst) { Initialize(fst); Compute(fst); } --- 134,147 ---- typedef typename A::Weight Weight; typedef ReverseArc RevA; ! CyclicMinimizer(const ExpandedFst& fst): ! // tell the Partition data-member to expect multiple repeated ! // calls to SplitOn with the same element if we are non-deterministic. ! P_(fst.Properties(kIDeterministic, true) == 0) { ! if(fst.Properties(kIDeterministic, true) == 0) ! CHECK(Weight::Properties() & kIdempotent); // this minimization ! // algorithm for non-deterministic FSTs can only work with idempotent ! // semirings. Initialize(fst); Compute(fst); } *************** *** 315,321 **** typedef typename A::StateId ClassId; typedef typename A::Weight Weight; ! AcyclicMinimizer(const ExpandedFst& fst) { Initialize(fst); Refine(fst); } --- 322,334 ---- typedef typename A::StateId ClassId; typedef typename A::Weight Weight; ! AcyclicMinimizer(const ExpandedFst& fst): ! // tell the Partition data-member to expect multiple repeated ! // calls to SplitOn with the same element if we are non-deterministic. ! partition_(fst.Properties(kIDeterministic, true) == 0) { ! if(fst.Properties(kIDeterministic, true) == 0) ! CHECK(Weight::Properties() & kIdempotent); // minimization for ! // non-deterministic FSTs can only work with idempotent semirings. Initialize(fst); Refine(fst); } *************** *** 531,543 **** void Minimize(MutableFst* fst, MutableFst* sfst = 0, float delta = kDelta) { ! uint64 props = fst->Properties(kAcceptor | kIDeterministic| ! kWeighted | kUnweighted, true); ! if (!(props & kIDeterministic)) { ! FSTERROR() << "FST is not deterministic"; ! fst->SetProperties(kError, kError); ! return; ! } if (!(props & kAcceptor)) { // weighted transducer VectorFst< GallicArc > gfst; --- 544,550 ---- void Minimize(MutableFst* fst, MutableFst* sfst = 0, float delta = kDelta) { ! uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); if (!(props & kAcceptor)) { // weighted transducer VectorFst< GallicArc > gfst; diff -crB openfst-1.3.4.orig/src/include/fst/partition.h openfst-1.3.4/src/include/fst/partition.h *** openfst-1.3.4.orig/src/include/fst/partition.h 2013-01-24 06:36:23.000000000 +0400 --- openfst-1.3.4/src/include/fst/partition.h 2013-11-21 15:59:08.539136087 +0400 *************** *** 43,50 **** friend class PartitionIterator; struct Element { ! Element() : value(0), next(0), prev(0) {} ! Element(T v) : value(v), next(0), prev(0) {} T value; Element* next; --- 43,50 ---- friend class PartitionIterator; struct Element { ! Element() : value(0), next(0), prev(0) {} ! Element(T v) : value(v), next(0), prev(0) {} T value; Element* next; *************** *** 52,60 **** }; public: ! Partition() {} ! Partition(T num_states) { Initialize(num_states); } --- 52,62 ---- }; public: ! Partition(bool allow_repeated_split): ! allow_repeated_split_(allow_repeated_split) {} ! Partition(bool allow_repeated_split, T num_states): ! allow_repeated_split_(allow_repeated_split) { Initialize(num_states); } *************** *** 137,152 **** if (class_size_[class_id] == 1) return; // first time class is split ! if (split_size_[class_id] == 0) visited_classes_.push_back(class_id); ! // increment size of split (set of element at head of chain) split_size_[class_id]++; ! // update split point ! if (class_split_[class_id] == 0) ! class_split_[class_id] = classes_[class_id]; ! if (class_split_[class_id] == elements_[element_id]) class_split_[class_id] = elements_[element_id]->next; // move to head of chain in same class --- 139,154 ---- if (class_size_[class_id] == 1) return; // first time class is split ! if (split_size_[class_id] == 0) { visited_classes_.push_back(class_id); ! class_split_[class_id] = classes_[class_id]; ! } // increment size of split (set of element at head of chain) split_size_[class_id]++; ! // update split point ! if (class_split_[class_id] != 0 ! && class_split_[class_id] == elements_[element_id]) class_split_[class_id] = elements_[element_id]->next; // move to head of chain in same class *************** *** 157,165 **** // class indices of the newly created class. Returns the new_class id // or -1 if no new class was created. T SplitRefine(T class_id) { // only split if necessary ! if (class_size_[class_id] == split_size_[class_id]) { ! class_split_[class_id] = 0; split_size_[class_id] = 0; return -1; } else { --- 159,169 ---- // class indices of the newly created class. Returns the new_class id // or -1 if no new class was created. T SplitRefine(T class_id) { + + Element* split_el = class_split_[class_id]; // only split if necessary ! //if (class_size_[class_id] == split_size_[class_id]) { ! if(split_el == NULL) { // we split on everything... split_size_[class_id] = 0; return -1; } else { *************** *** 163,180 **** split_size_[class_id] = 0; return -1; } else { - T new_class = AddClass(); size_t remainder = class_size_[class_id] - split_size_[class_id]; if (remainder < split_size_[class_id]) { // add smaller - Element* split_el = class_split_[class_id]; classes_[new_class] = split_el; - class_size_[class_id] = split_size_[class_id]; - class_size_[new_class] = remainder; split_el->prev->next = 0; split_el->prev = 0; } else { - Element* split_el = class_split_[class_id]; classes_[new_class] = classes_[class_id]; class_size_[class_id] = remainder; class_size_[new_class] = split_size_[class_id]; --- 167,189 ---- split_size_[class_id] = 0; return -1; } else { T new_class = AddClass(); + + if(allow_repeated_split_) { // split_size_ is possibly + // inaccurate, so work it out exactly. + size_t split_count; Element *e; + for(split_count=0,e=classes_[class_id]; + e != split_el; split_count++, e=e->next); + split_size_[class_id] = split_count; + } size_t remainder = class_size_[class_id] - split_size_[class_id]; if (remainder < split_size_[class_id]) { // add smaller classes_[new_class] = split_el; split_el->prev->next = 0; split_el->prev = 0; + class_size_[class_id] = split_size_[class_id]; + class_size_[new_class] = remainder; } else { classes_[new_class] = classes_[class_id]; class_size_[class_id] = remainder; class_size_[new_class] = split_size_[class_id]; *************** *** 245,254 **** --- 254,269 ---- vector class_size_; // size of split for each class + // in the nondeterministic case, split_size_ is actually an upper + // bound on the size of split for each class. vector split_size_; // set of visited classes to be used in split refine vector visited_classes_; + + // true if input fst was deterministic: we can make + // certain assumptions in this case that speed up the algorithm. + bool allow_repeated_split_; };