[packages/sphinxtrain] - new - added update patch (adjust to recent openfst+opengrm_ngram, add missing file)
qboosh
qboosh at pld-linux.org
Mon Mar 19 19:25:24 CET 2018
commit baf4a513bb446991237df6d5514f5d626b6803e5
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date: Mon Mar 19 19:28:05 2018 +0100
- new
- added update patch (adjust to recent openfst+opengrm_ngram, add missing file)
sphinxtrain-update.patch | 162 +++++++++++++++++++++++++++++++++++++++++
sphinxtrain.spec | 183 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 345 insertions(+)
---
diff --git a/sphinxtrain.spec b/sphinxtrain.spec
new file mode 100644
index 0000000..d62a55d
--- /dev/null
+++ b/sphinxtrain.spec
@@ -0,0 +1,183 @@
+#
+# Conditional build:
+%bcond_without python # Python extension
+
+Summary: CMU SphinxTrain - open source acoustic model trainer
+Summary(pl.UTF-8): CMU SpinxTrain - mający otwarte źródła trener modeli akustycznych
+Name: sphinxtrain
+Version: 1.0.8
+Release: 1
+License: BSD
+Group: Applications/Sound
+Source0: http://downloads.sourceforge.net/cmusphinx/%{name}-%{version}.tar.gz
+# Source0-md5: 0f7155ba92fbdec169c92c1759303106
+Patch0: %{name}-update.patch
+URL: https://cmusphinx.github.io/
+BuildRequires: autoconf
+BuildRequires: automake
+# C++11 required because of openfst
+BuildRequires: libstdc++-devel >= 6:4.7
+BuildRequires: libtool >= 2:1.5
+BuildRequires: openfst-devel
+BuildRequires: opengrm-ngram-devel
+BuildRequires: perl-base
+BuildRequires: pkgconfig
+BuildRequires: sed >= 4.0
+%if %{with python}
+BuildRequires: python-Cython
+BuildRequires: python-devel >= 2.0
+%endif
+BuildRequires: sphinxbase-devel >= 0.8
+Requires: python-numpy
+Requires: python-pyopenfst
+Requires: python-scipy
+Requires: python-sphinxbase >= 0.8
+Requires: sphinxbase >= 0.8
+BuildRoot: %{tmpdir}/%{name}-%{version}-root-%(id -u -n)
+
+%description
+This is SphinxTrain, Carnegie Mellon University's open source acoustic
+model trainer. This directory contains the scripts and instructions
+necessary for building models for the CMU Sphinx Recognizer.
+
+%description -l pl.UTF-8
+PocketSphinx - jeden z pochodzących z Carnegie Mellon University,
+mających otwarte źródła i bogaty zasób słów, niezależnych od mówiącego
+silników rozpoznawania mowy ciągłej.
+
+%package devel
+Summary: Header files for CMU SphinxTrain
+Summary(pl.UTF-8): Pliki nagłówkowe CMU SphinxTrain
+Group: Development/Libraries
+Requires: sphinxbase-devel >= 0.8
+# doesn't require base currently
+
+%description devel
+Header files for CMU SphinxTrain.
+
+%description devel -l pl.UTF-8
+Pliki nagłówkowe CMU SphinxTrain.
+
+%prep
+%setup -q
+%patch0 -p1
+
+%{__sed} -i -e "s,/libexec/,/$(basename %{_libexec})/," scripts/sphinxtrain
+
+%build
+# rebuild ac/am/lt for as-needed to work
+%{__libtoolize}
+%{__aclocal} -I m4
+%{__autoconf}
+%{__automake}
+CXXFLAGS="%{rpmcxxflags} -std=c++11"
+%configure \
+ --enable-g2p-decoder \
+ %{!?with_static_libs:--disable-static} \
+ %{!?with_python:--without-python}
+%{__make}
+
+%install
+rm -rf $RPM_BUILD_ROOT
+
+%{__make} install \
+ DESTDIR=$RPM_BUILD_ROOT
+
+# not needed
+%{__rm} -r $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/setup.py \
+ $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/cmusphinx/{test,test_*.py} \
+ $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/scripts/lib/test_*
+
+# not really executable
+sed -i -e '1s,.*/usr/bin/env python.*,,' $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/cmusphinx/lattice.py
+# invoke python directly
+sed -i -e '1s,/usr/bin/env python,%{__python},' $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/cmusphinx/*.py
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post -p /sbin/ldconfig
+%postun -p /sbin/ldconfig
+
+%files
+%defattr(644,root,root,755)
+%doc COPYING NEWS README
+%attr(755,root,root) %{_bindir}/sphinxtrain
+%if "%{_libexecdir}" != "%{_libdir}"
+%dir %{_libexecdir}/sphinxtrain
+%endif
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/agg_seg
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/bldtree
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/bw
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/cdcn_norm
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/cdcn_train
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/cp_parm
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/delint
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/g2p_train
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/inc_comp
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/init_gau
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/init_mixw
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/kdtree
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/kmeans_init
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/make_quests
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/map_adapt
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mixw_interp
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_flat
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_mdef_gen
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_mllr_class
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_s2sendump
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_ts2cb
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mllr_solve
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mllr_transform
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/norm
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/param_cnt
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/phonetisaurus-g2p
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/printp
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/prunetree
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/tiestate
+%dir %{_libdir}/sphinxtrain
+%{_libdir}/sphinxtrain/etc
+%dir %{_libdir}/sphinxtrain/python
+%dir %{_libdir}/sphinxtrain/python/cmusphinx
+%{_libdir}/sphinxtrain/python/cmusphinx/feat
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/classlm2fst.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/cluster_mixw.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/dict_spd.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/fstutils.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat2dot.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat2fsg.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat_rescore.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat_rescore_fst.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_conv.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_error.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_error_fst.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_prune.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lda.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/mllr.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/mllt.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/prune_mixw.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/quantize_mixw.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/sendump.py
+%{_libdir}/sphinxtrain/python/cmusphinx/__init__.py
+%{_libdir}/sphinxtrain/python/cmusphinx/arpalm.py
+%{_libdir}/sphinxtrain/python/cmusphinx/corpus.py
+%{_libdir}/sphinxtrain/python/cmusphinx/divergence.py
+%{_libdir}/sphinxtrain/python/cmusphinx/evaluation.py
+%{_libdir}/sphinxtrain/python/cmusphinx/gmm.py
+%{_libdir}/sphinxtrain/python/cmusphinx/hmm.py
+%{_libdir}/sphinxtrain/python/cmusphinx/htkmfc.py
+%{_libdir}/sphinxtrain/python/cmusphinx/hypseg.py
+%{_libdir}/sphinxtrain/python/cmusphinx/lattice.py
+%{_libdir}/sphinxtrain/python/cmusphinx/mfcc.py
+%{_libdir}/sphinxtrain/python/cmusphinx/qmwx.pyx
+%{_libdir}/sphinxtrain/python/cmusphinx/s2mfc.py
+%{_libdir}/sphinxtrain/python/cmusphinx/s3*.py
+%dir %{_libdir}/sphinxtrain/scripts
+%attr(755,root,root) %{_libdir}/sphinxtrain/scripts/[0-9]*
+%attr(755,root,root) %{_libdir}/sphinxtrain/scripts/decode
+%attr(755,root,root) %{_libdir}/sphinxtrain/scripts/prepare
+%{_libdir}/sphinxtrain/scripts/lib
+
+%files devel
+%defattr(644,root,root,755)
+%{_includedir}/sphinxtrain
diff --git a/sphinxtrain-update.patch b/sphinxtrain-update.patch
new file mode 100644
index 0000000..8262dc4
--- /dev/null
+++ b/sphinxtrain-update.patch
@@ -0,0 +1,162 @@
+--- sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.cpp.orig 2012-09-28 23:26:18.000000000 +0200
++++ sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.cpp 2018-03-10 19:49:09.407123669 +0100
+@@ -20,18 +20,18 @@
+ #include <fst/script/fst-class.h>
+ #include <fst/script/determinize.h>
+ #include <fst/script/minimize.h>
+-#include <fst/extensions/far/main.h>
++#include <fst/extensions/far/getters.h>
+ #include <fst/script/print.h>
+ #include <ngram/ngram-shrink.h>
+ #include <ngram/ngram-relentropy.h>
+-#include <ngram/ngram-seymoreshrink.h>
+-#include <ngram/ngram-countprune.h>
++#include <ngram/ngram-seymore-shrink.h>
++#include <ngram/ngram-count-prune.h>
+ #include <ngram/ngram-input.h>
+ #include <ngram/ngram-make.h>
+-#include <ngram/ngram-kneserney.h>
++#include <ngram/ngram-kneser-ney.h>
+ #include <ngram/ngram-absolute.h>
+ #include <ngram/ngram-katz.h>
+-#include <ngram/ngram-wittenbell.h>
++#include <ngram/ngram-witten-bell.h>
+ #include <ngram/ngram-unsmoothed.h>
+ #include <sphinxbase/err.h>
+ #include "M2MFstAligner.hpp"
+@@ -47,7 +47,6 @@
+ #define keep_symbols true
+ #define initial_symbols true
+ #define allow_negative_labels false
+-#define file_list_input false
+ #define key_prefix ""
+ #define key_suffix ""
+ #define backoff false
+@@ -194,12 +193,11 @@
+ osyms->WriteText(prefix+".output.syms");
+
+ string dest = prefix+".fst.txt";
+- ostream *ostrm = new fst::ofstream(dest.c_str());
+- ostrm->precision(9);
+- s::FstClass *fstc = new s::FstClass(out);
+- s::PrintFst(*fstc, *ostrm, dest, isyms, osyms, NULL, acceptor, show_weight_one);
+- ostrm->flush();
+- delete ostrm;
++ fst::ofstream ostrm(dest.c_str());
++ ostrm.precision(9);
++ s::FstClass fstc(*out);
++ s::PrintFst(fstc, ostrm, dest, isyms, osyms, NULL, acceptor, show_weight_one);
++ ostrm.flush();
+ }
+
+ void train_model(string eps, string s1s2_sep, string skip, int order, string smooth, string prefix, string seq_sep, string prune, double theta, string count_pattern) {
+@@ -218,9 +216,13 @@
+
+ // compile strings into a far archive
+ cout << "Compiling symbols into FAR archive..." << endl;
+- fst::FarEntryType fet = fst::StringToFarEntryType(entry_type);
+- fst::FarTokenType ftt = fst::StringToFarTokenType(token_type);
+- fst::FarType fartype = fst::FarTypeFromString(far_type);
++ fst::FarEntryType fet;
++ if (!fst::script::GetFarEntryType(entry_type, &fet))
++ E_FATAL("Unknown or unsupported FAR entry type");
++ fst::FarTokenType ftt;
++ if (!fst::script::GetFarTokenType(token_type, &ftt))
++ E_FATAL("Unknown or unsupported FAR token type");
++ fst::FarType fartype = fst::script::GetFarType(far_type);
+
+ delete ingram;
+
+@@ -232,7 +234,7 @@
+ prefix+".corpus.syms", unknown_symbol,
+ keep_symbols, initial_symbols,
+ allow_negative_labels,
+- file_list_input, key_prefix,
++ key_prefix,
+ key_suffix);
+
+ //count n-grams
+@@ -247,7 +249,7 @@
+ while (!far_reader->Done()) {
+ if (ifst)
+ delete ifst;
+- ifst = far_reader->GetFst().Copy();
++ ifst = far_reader->GetFst()->Copy();
+
+ if (!ifst) {
+ E_FATAL("ngramcount: unable to read fst #%d\n", fstnumber);
+@@ -309,7 +311,7 @@
+ ngram.MakeNGramModel();
+ fst = ngram.GetMutableFst();
+ } else if (smooth == "katz") {
+- NGramKatz ngram(fst, backoff, backoff_label,
++ NGramKatz<StdArc> ngram(fst, backoff, backoff_label,
+ norm_eps, check_consistency,
+ bins);
+ ngram.MakeNGramModel();
+@@ -355,7 +357,7 @@
+ }
+
+ cout << "Minimizing model..." << endl;
+- MutableFstClass *minimized = new s::MutableFstClass(fst);
++ MutableFstClass *minimized = new s::MutableFstClass(*fst);
+ Minimize(minimized, 0, fst::kDelta);
+ fst = minimized->GetMutableFst<StdArc>();
+
+--- sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.hpp.orig 1970-01-01 01:00:00.000000000 +0100
++++ sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.hpp 2018-03-10 20:04:49.127112944 +0100
+@@ -0,0 +1,54 @@
++/* ====================================================================
++ * Copyright (c) 1995-2012 Carnegie Mellon University. All rights
++ * reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in
++ * the documentation and/or other materials provided with the
++ * distribution.
++ *
++ * This work was supported in part by funding from the Defense Advanced
++ * Research Projects Agency and the National Science Foundation of the
++ * United States of America, and the CMU Sphinx Speech Consortium.
++ *
++ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
++ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
++ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
++ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * ====================================================================
++ *
++ */
++
++#ifndef G2P_TRAIN_H_
++#define G2P_TRAIN_H_
++#include <string>
++
++using namespace std;
++
++void split(string input_file, string prefix, int ratio);
++
++void align(string input_file, string prefix, bool seq1_del, bool seq2_del,
++ int seq1_max, int seq2_max, string seq_sep, string s1s2_sep,
++ string eps, string skip, string seq1in_sep, string seq2in_sep,
++ string s1s2_delim, int iter);
++
++void train_model(string eps, string s1s2_sep, string skip, int order,
++ string smooth, string prefix, string seq_sep, string prune,
++ double theta, string count_pattern);
++
++#endif /* G2P_TRAIN_H_ */
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/sphinxtrain.git/commitdiff/baf4a513bb446991237df6d5514f5d626b6803e5
More information about the pld-cvs-commit
mailing list