diff options
Diffstat (limited to 'sci-misc')
-rw-r--r-- | sci-misc/openfst/ChangeLog | 6 | ||||
-rw-r--r-- | sci-misc/openfst/Manifest | 2 | ||||
-rw-r--r-- | sci-misc/openfst/files/kaldi.patch | 599 | ||||
-rw-r--r-- | sci-misc/openfst/openfst-1.3.4.ebuild (renamed from sci-misc/openfst/openfst-1.3.2.ebuild) | 14 |
4 files changed, 610 insertions, 11 deletions
diff --git a/sci-misc/openfst/ChangeLog b/sci-misc/openfst/ChangeLog index b84b7dbd2..3d9cf2cd8 100644 --- a/sci-misc/openfst/ChangeLog +++ b/sci-misc/openfst/ChangeLog @@ -2,6 +2,12 @@ # Copyright 1999-2013 Gentoo Foundation; Distributed under the GPL v2 # $Header: $ +*openfst-1.3.4 (21 Nov 2013) + + 21 Nov 2013; Pavel Denisov <pavel.a.denisov@gmail.com> +files/kaldi.patch, + +openfst-1.3.4.ebuild, -openfst-1.3.2.ebuild: + sci-misc/openfst: version bump, patch from Kaldi + 03 Mar 2013; Justin Lecher <jlec@gentoo.org> openfst-1.2.7.ebuild, openfst-1.3.2.ebuild, metadata.xml: Add ~amd64 or ~x86 keywords for revdeps diff --git a/sci-misc/openfst/Manifest b/sci-misc/openfst/Manifest index 6c0467fd1..effcbf472 100644 --- a/sci-misc/openfst/Manifest +++ b/sci-misc/openfst/Manifest @@ -1,2 +1,2 @@ DIST openfst-1.2.7.tar.gz 668134 SHA256 a38b10a7a1b6c054fb15035f3197f566ac6b23bc2acd5958052165ae7ce5c11c SHA512 899297a12b51acef51a82148a04160acd4bb4e1925da18e20a810814401542aaa582928ae3b80b96f49890420796cae7acd7cbf1d21fb6bc6c755254f1ae2c67 WHIRLPOOL b9c1d9a7b592a43cd77aa6778451c9de8bbf135f39f1415d467b25279c6a360c9537dcdb8b9795023e482462668e61ad66de8e843f211b72a7986b448d6e200a -DIST openfst-1.3.2.tar.gz 723259 SHA256 4e1c2e4f6c54aef15cf13f4f01611ac72122a97214dfa7dab0b7d1a7e1214602 SHA512 5cbaebc3e99c2156379bdaeddef6972b37019cbdce7e2744cf5020f300e959410dce8d1fb1b354c3691975a2d1c0f40123feab46ded59919e4b13a010bbb78f0 WHIRLPOOL a0ca8e814ff1f7c09fd64687dacadf66da3945560e3b16f9afc797df391dc85b7d124314354ddd823ee142131b3d7bbd7914f5659e8c6fc49e8b0abac74be0b2 +DIST openfst-1.3.4.tar.gz 769158 SHA256 e95fa96674e1c9d6866bbf79b9cd755c5121ad165b76224c7c6bbfa139399a61 SHA512 6ed87cbb949776b8702545a51d129e03f33428369b20255d637191e054ce04e529969aef49549b098b144cfde45cd9b741e6cba6680f815c04ad5717fdb0cbd9 WHIRLPOOL 593fd22668044f9ec8df70c274ee910f9da85a48fcd33c73993e54afd21298208d69804342b4e3a5d36e59aa92e853e84d95d1623c3708e7e1fc70e42916949f diff --git a/sci-misc/openfst/files/kaldi.patch b/sci-misc/openfst/files/kaldi.patch new file mode 100644 index 000000000..3a3854eff --- /dev/null +++ b/sci-misc/openfst/files/kaldi.patch @@ -0,0 +1,599 @@ +diff -crB openfst-1.3.4.orig/src/include/fst/interval-set.h openfst-1.3.4/src/include/fst/interval-set.h +*** openfst-1.3.4.orig/src/include/fst/interval-set.h 2013-01-24 06:36:23.000000000 +0400 +--- openfst-1.3.4/src/include/fst/interval-set.h 2013-11-21 15:59:08.541136087 +0400 +*************** +*** 37,74 **** + class IntervalSet { + public: + struct Interval { +! T begin; +! T end; + +! Interval() : begin(-1), end(-1) {} + +! Interval(T b, T e) : begin(b), end(e) {} + + bool operator<(const Interval &i) const { +! return begin < i.begin || (begin == i.begin && end > i.end); + } + + bool operator==(const Interval &i) const { +! return begin == i.begin && end == i.end; + } + + bool operator!=(const Interval &i) const { +! return begin != i.begin || end != i.end; + } + + istream &Read(istream &strm) { + T n; + ReadType(strm, &n); +! begin = n; + ReadType(strm, &n); +! end = n; + return strm; + } + + ostream &Write(ostream &strm) const { +! T n = begin; + WriteType(strm, n); +! n = end; + WriteType(strm, n); + return strm; + } +--- 37,74 ---- + class IntervalSet { + public: + struct Interval { +! T begin_; +! T end_; + +! Interval() : begin_(-1), end_(-1) {} + +! Interval(T b, T e) : begin_(b), end_(e) {} + + bool operator<(const Interval &i) const { +! return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); + } + + bool operator==(const Interval &i) const { +! return begin_ == i.begin_ && end_ == i.end_; + } + + bool operator!=(const Interval &i) const { +! return begin_ != i.begin_ || end_ != i.end_; + } + + istream &Read(istream &strm) { + T n; + ReadType(strm, &n); +! begin_ = n; + ReadType(strm, &n); +! end_ = n; + return strm; + } + + ostream &Write(ostream &strm) const { +! T n = begin_; + WriteType(strm, n); +! n = end_; + WriteType(strm, n); + return strm; + } +*************** +*** 108,114 **** + lower_bound(intervals_.begin(), intervals_.end(), interval); + if (lb == intervals_.begin()) + return false; +! return (--lb)->end > value; + } + + // Requires intervals be normalized. +--- 108,114 ---- + lower_bound(intervals_.begin(), intervals_.end(), interval); + if (lb == intervals_.begin()) + return false; +! return (--lb)->end_ > value; + } + + // Requires intervals be normalized. +*************** +*** 123,129 **** + + bool Singleton() const { + return intervals_.size() == 1 && +! intervals_[0].begin + 1 == intervals_[0].end; + } + + +--- 123,129 ---- + + bool Singleton() const { + return intervals_.size() == 1 && +! intervals_[0].begin_ + 1 == intervals_[0].end_; + } + + +*************** +*** 178,194 **** + T size = 0; + for (T i = 0; i < intervals_.size(); ++i) { + Interval &inti = intervals_[i]; +! if (inti.begin == inti.end) + continue; + for (T j = i + 1; j < intervals_.size(); ++j) { + Interval &intj = intervals_[j]; +! if (intj.begin > inti.end) + break; +! if (intj.end > inti.end) +! inti.end = intj.end; + ++i; + } +! count_ += inti.end - inti.begin; + intervals_[size++] = inti; + } + intervals_.resize(size); +--- 178,194 ---- + T size = 0; + for (T i = 0; i < intervals_.size(); ++i) { + Interval &inti = intervals_[i]; +! if (inti.begin_ == inti.end_) + continue; + for (T j = i + 1; j < intervals_.size(); ++j) { + Interval &intj = intervals_[j]; +! if (intj.begin_ > inti.end_) + break; +! if (intj.end_ > inti.end_) +! inti.end_ = intj.end_; + ++i; + } +! count_ += inti.end_ - inti.begin_; + intervals_[size++] = inti; + } + intervals_.resize(size); +*************** +*** 208,224 **** + oset->count_ = 0; + + while (it1 != intervals_.end() && it2 != iintervals->end()) { +! if (it1->end <= it2->begin) { + ++it1; +! } else if (it2->end <= it1->begin) { + ++it2; + } else { + Interval interval; +! interval.begin = max(it1->begin, it2->begin); +! interval.end = min(it1->end, it2->end); + ointervals->push_back(interval); +! oset->count_ += interval.end - interval.begin; +! if (it1->end < it2->end) + ++it1; + else + ++it2; +--- 208,224 ---- + oset->count_ = 0; + + while (it1 != intervals_.end() && it2 != iintervals->end()) { +! if (it1->end_ <= it2->begin_) { + ++it1; +! } else if (it2->end_ <= it1->begin_) { + ++it2; + } else { + Interval interval; +! interval.begin_ = max(it1->begin_, it2->begin_); +! interval.end_ = min(it1->end_, it2->end_); + ointervals->push_back(interval); +! oset->count_ += interval.end_ - interval.begin_; +! if (it1->end_ < it2->end_) + ++it1; + else + ++it2; +*************** +*** 235,255 **** + oset->count_ = 0; + + Interval interval; +! interval.begin = 0; + for (typename vector<Interval>::const_iterator it = intervals_.begin(); + it != intervals_.end(); + ++it) { +! interval.end = min(it->begin, maxval); +! if (interval.begin < interval.end) { + ointervals->push_back(interval); +! oset->count_ += interval.end - interval.begin; + } +! interval.begin = it->end; + } +! interval.end = maxval; +! if (interval.begin < interval.end) { + ointervals->push_back(interval); +! oset->count_ += interval.end - interval.begin; + } + } + +--- 235,255 ---- + oset->count_ = 0; + + Interval interval; +! interval.begin_ = 0; + for (typename vector<Interval>::const_iterator it = intervals_.begin(); + it != intervals_.end(); + ++it) { +! interval.end_ = min(it->begin_, maxval); +! if (interval.begin_ < interval.end_) { + ointervals->push_back(interval); +! oset->count_ += interval.end_ - interval.begin_; + } +! interval.begin_ = it->end_; + } +! interval.end_ = maxval; +! if (interval.begin_ < interval.end_) { + ointervals->push_back(interval); +! oset->count_ += interval.end_ - interval.begin_; + } + } + +*************** +*** 263,269 **** + oset->count_ = 0; + } else { + IntervalSet<T> cset; +! iset.Complement(intervals_.back().end, &cset); + Intersect(cset, oset); + } + } +--- 263,269 ---- + oset->count_ = 0; + } else { + IntervalSet<T> cset; +! iset.Complement(intervals_.back().end_, &cset); + Intersect(cset, oset); + } + } +*************** +*** 277,285 **** + typename vector<Interval>::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { +! if (it1->end <= it2->begin) { + ++it1; +! } else if (it2->end <= it1->begin) { + ++it2; + } else { + return true; +--- 277,285 ---- + typename vector<Interval>::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { +! if (it1->end_ <= it2->begin_) { + ++it1; +! } else if (it2->end_ <= it1->begin_) { + ++it2; + } else { + return true; +*************** +*** 300,320 **** + bool overlap = false; // point in both intervals_ and intervals + + while (it1 != intervals_.end() && it2 != intervals->end()) { +! if (it1->end <= it2->begin) { // no overlap - it1 first + only1 = true; + ++it1; +! } else if (it2->end <= it1->begin) { // no overlap - it2 first + only2 = true; + ++it2; +! } else if (it2->begin == it1->begin && it2->end == it1->end) { // equals + overlap = true; + ++it1; + ++it2; +! } else if (it2->begin <= it1->begin && it2->end >= it1->end) { // 1 c 2 + only2 = true; + overlap = true; + ++it1; +! } else if (it1->begin <= it2->begin && it1->end >= it2->end) { // 2 c 1 + only1 = true; + overlap = true; + ++it2; +--- 300,320 ---- + bool overlap = false; // point in both intervals_ and intervals + + while (it1 != intervals_.end() && it2 != intervals->end()) { +! if (it1->end_ <= it2->begin_) { // no overlap - it1 first + only1 = true; + ++it1; +! } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first + only2 = true; + ++it2; +! } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals + overlap = true; + ++it1; + ++it2; +! } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 + only2 = true; + overlap = true; + ++it1; +! } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 + only1 = true; + overlap = true; + ++it2; +*************** +*** 346,356 **** + typename vector<Interval>::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { +! if (it1->end <= it2->begin) { // no overlap - it1 first + ++it1; +! } else if (it2->begin < it1->begin || it2->end > it1->end) { // no C + return false; +! } else if (it2->end == it1->end) { + ++it1; + ++it2; + } else { +--- 346,356 ---- + typename vector<Interval>::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { +! if (it1->end_ <= it2->begin_) { // no overlap - it1 first + ++it1; +! } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C + return false; +! } else if (it2->end_ == it1->end_) { + ++it1; + ++it2; + } else { +*************** +*** 370,376 **** + ++it) { + if (it != intervals->begin()) + strm << ","; +! strm << "[" << it->begin << "," << it->end << ")"; + } + strm << "}"; + return strm; +--- 370,376 ---- + ++it) { + if (it != intervals->begin()) + strm << ","; +! strm << "[" << it->begin_ << "," << it->end_ << ")"; + } + strm << "}"; + return strm; +diff -crB openfst-1.3.4.orig/src/include/fst/minimize.h openfst-1.3.4/src/include/fst/minimize.h +*** openfst-1.3.4.orig/src/include/fst/minimize.h 2013-01-24 06:36:23.000000000 +0400 +--- openfst-1.3.4/src/include/fst/minimize.h 2013-11-21 15:59:08.539136087 +0400 +*************** +*** 134,140 **** + typedef typename A::Weight Weight; + typedef ReverseArc<A> RevA; + +! CyclicMinimizer(const ExpandedFst<A>& fst) { + Initialize(fst); + Compute(fst); + } +--- 134,147 ---- + typedef typename A::Weight Weight; + typedef ReverseArc<A> RevA; + +! CyclicMinimizer(const ExpandedFst<A>& fst): +! // tell the Partition data-member to expect multiple repeated +! // calls to SplitOn with the same element if we are non-deterministic. +! P_(fst.Properties(kIDeterministic, true) == 0) { +! if(fst.Properties(kIDeterministic, true) == 0) +! CHECK(Weight::Properties() & kIdempotent); // this minimization +! // algorithm for non-deterministic FSTs can only work with idempotent +! // semirings. + Initialize(fst); + Compute(fst); + } +*************** +*** 315,321 **** + typedef typename A::StateId ClassId; + typedef typename A::Weight Weight; + +! AcyclicMinimizer(const ExpandedFst<A>& fst) { + Initialize(fst); + Refine(fst); + } +--- 322,334 ---- + typedef typename A::StateId ClassId; + typedef typename A::Weight Weight; + +! AcyclicMinimizer(const ExpandedFst<A>& fst): +! // tell the Partition data-member to expect multiple repeated +! // calls to SplitOn with the same element if we are non-deterministic. +! partition_(fst.Properties(kIDeterministic, true) == 0) { +! if(fst.Properties(kIDeterministic, true) == 0) +! CHECK(Weight::Properties() & kIdempotent); // minimization for +! // non-deterministic FSTs can only work with idempotent semirings. + Initialize(fst); + Refine(fst); + } +*************** +*** 531,543 **** + void Minimize(MutableFst<A>* fst, + MutableFst<A>* sfst = 0, + float delta = kDelta) { +! uint64 props = fst->Properties(kAcceptor | kIDeterministic| +! kWeighted | kUnweighted, true); +! if (!(props & kIDeterministic)) { +! FSTERROR() << "FST is not deterministic"; +! fst->SetProperties(kError, kError); +! return; +! } + + if (!(props & kAcceptor)) { // weighted transducer + VectorFst< GallicArc<A, STRING_LEFT> > gfst; +--- 544,550 ---- + void Minimize(MutableFst<A>* fst, + MutableFst<A>* sfst = 0, + float delta = kDelta) { +! uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); + + if (!(props & kAcceptor)) { // weighted transducer + VectorFst< GallicArc<A, STRING_LEFT> > gfst; +diff -crB openfst-1.3.4.orig/src/include/fst/partition.h openfst-1.3.4/src/include/fst/partition.h +*** openfst-1.3.4.orig/src/include/fst/partition.h 2013-01-24 06:36:23.000000000 +0400 +--- openfst-1.3.4/src/include/fst/partition.h 2013-11-21 15:59:08.539136087 +0400 +*************** +*** 43,50 **** + friend class PartitionIterator<T>; + + struct Element { +! Element() : value(0), next(0), prev(0) {} +! Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; +--- 43,50 ---- + friend class PartitionIterator<T>; + + struct Element { +! Element() : value(0), next(0), prev(0) {} +! Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; +*************** +*** 52,60 **** + }; + + public: +! Partition() {} + +! Partition(T num_states) { + Initialize(num_states); + } + +--- 52,62 ---- + }; + + public: +! Partition(bool allow_repeated_split): +! allow_repeated_split_(allow_repeated_split) {} + +! Partition(bool allow_repeated_split, T num_states): +! allow_repeated_split_(allow_repeated_split) { + Initialize(num_states); + } + +*************** +*** 137,152 **** + if (class_size_[class_id] == 1) return; + + // first time class is split +! if (split_size_[class_id] == 0) + visited_classes_.push_back(class_id); +! + // increment size of split (set of element at head of chain) + split_size_[class_id]++; +! + // update split point +! if (class_split_[class_id] == 0) +! class_split_[class_id] = classes_[class_id]; +! if (class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class +--- 139,154 ---- + if (class_size_[class_id] == 1) return; + + // first time class is split +! if (split_size_[class_id] == 0) { + visited_classes_.push_back(class_id); +! class_split_[class_id] = classes_[class_id]; +! } + // increment size of split (set of element at head of chain) + split_size_[class_id]++; +! + // update split point +! if (class_split_[class_id] != 0 +! && class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class +*************** +*** 157,165 **** + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { + // only split if necessary +! if (class_size_[class_id] == split_size_[class_id]) { +! class_split_[class_id] = 0; + split_size_[class_id] = 0; + return -1; + } else { +--- 159,169 ---- + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { ++ ++ Element* split_el = class_split_[class_id]; + // only split if necessary +! //if (class_size_[class_id] == split_size_[class_id]) { +! if(split_el == NULL) { // we split on everything... + split_size_[class_id] = 0; + return -1; + } else { +*************** +*** 163,180 **** + split_size_[class_id] = 0; + return -1; + } else { +- + T new_class = AddClass(); + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller +- Element* split_el = class_split_[class_id]; + classes_[new_class] = split_el; +- class_size_[class_id] = split_size_[class_id]; +- class_size_[new_class] = remainder; + split_el->prev->next = 0; + split_el->prev = 0; + } else { +- Element* split_el = class_split_[class_id]; + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; +--- 167,189 ---- + split_size_[class_id] = 0; + return -1; + } else { + T new_class = AddClass(); ++ ++ if(allow_repeated_split_) { // split_size_ is possibly ++ // inaccurate, so work it out exactly. ++ size_t split_count; Element *e; ++ for(split_count=0,e=classes_[class_id]; ++ e != split_el; split_count++, e=e->next); ++ split_size_[class_id] = split_count; ++ } + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller + classes_[new_class] = split_el; + split_el->prev->next = 0; + split_el->prev = 0; ++ class_size_[class_id] = split_size_[class_id]; ++ class_size_[new_class] = remainder; + } else { + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; +*************** +*** 245,254 **** +--- 254,269 ---- + vector<T> class_size_; + + // size of split for each class ++ // in the nondeterministic case, split_size_ is actually an upper ++ // bound on the size of split for each class. + vector<T> split_size_; + + // set of visited classes to be used in split refine + vector<T> visited_classes_; ++ ++ // true if input fst was deterministic: we can make ++ // certain assumptions in this case that speed up the algorithm. ++ bool allow_repeated_split_; + }; + + diff --git a/sci-misc/openfst/openfst-1.3.2.ebuild b/sci-misc/openfst/openfst-1.3.4.ebuild index 7f92b1cc3..70e39d14d 100644 --- a/sci-misc/openfst/openfst-1.3.2.ebuild +++ b/sci-misc/openfst/openfst-1.3.4.ebuild @@ -2,6 +2,8 @@ # Distributed under the terms of the GNU General Public License v2 # $Header: $ +EAPI="5" + inherit eutils flag-o-matic multilib DESCRIPTION="Finite State Transducer tools by Google et al." @@ -9,19 +11,11 @@ HOMEPAGE="http://www.openfst.org" SRC_URI="http://www.openfst.org/twiki/pub/FST/FstDownload/${P}.tar.gz" LICENSE="Apache-2.0" - SLOT="0" - KEYWORDS="~amd64 ~x86" -IUSE="" - -DEPEND="" -RDEPEND="${DEPEND}" - -src_install() { - emake DESTDIR="${D}" install || die "install failed" - dodoc AUTHORS NEWS README || die "docs missing" +src_prepare() { + epatch "${FILESDIR}/kaldi.patch" } src_test() { |