[packages/sphinxtrain] - new - added update patch (adjust to recent openfst+opengrm_ngram, add missing file)

qboosh qboosh at pld-linux.org
Mon Mar 19 19:25:24 CET 2018


commit baf4a513bb446991237df6d5514f5d626b6803e5
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date:   Mon Mar 19 19:28:05 2018 +0100

    - new
    - added update patch (adjust to recent openfst+opengrm_ngram, add missing file)

 sphinxtrain-update.patch | 162 +++++++++++++++++++++++++++++++++++++++++
 sphinxtrain.spec         | 183 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 345 insertions(+)
---
diff --git a/sphinxtrain.spec b/sphinxtrain.spec
new file mode 100644
index 0000000..d62a55d
--- /dev/null
+++ b/sphinxtrain.spec
@@ -0,0 +1,183 @@
+#
+# Conditional build:
+%bcond_without	python		# Python extension
+
+Summary:	CMU SphinxTrain - open source acoustic model trainer
+Summary(pl.UTF-8):	CMU SpinxTrain - mający otwarte źródła trener modeli akustycznych
+Name:		sphinxtrain
+Version:	1.0.8
+Release:	1
+License:	BSD
+Group:		Applications/Sound
+Source0:	http://downloads.sourceforge.net/cmusphinx/%{name}-%{version}.tar.gz
+# Source0-md5:	0f7155ba92fbdec169c92c1759303106
+Patch0:		%{name}-update.patch
+URL:		https://cmusphinx.github.io/
+BuildRequires:	autoconf
+BuildRequires:	automake
+# C++11 required because of openfst
+BuildRequires:	libstdc++-devel >= 6:4.7
+BuildRequires:	libtool >= 2:1.5
+BuildRequires:	openfst-devel
+BuildRequires:	opengrm-ngram-devel
+BuildRequires:	perl-base
+BuildRequires:	pkgconfig
+BuildRequires:	sed >= 4.0
+%if %{with python}
+BuildRequires:	python-Cython
+BuildRequires:	python-devel >= 2.0
+%endif
+BuildRequires:	sphinxbase-devel >= 0.8
+Requires:	python-numpy
+Requires:	python-pyopenfst
+Requires:	python-scipy
+Requires:	python-sphinxbase >= 0.8
+Requires:	sphinxbase >= 0.8
+BuildRoot:	%{tmpdir}/%{name}-%{version}-root-%(id -u -n)
+
+%description
+This is SphinxTrain, Carnegie Mellon University's open source acoustic
+model trainer. This directory contains the scripts and instructions
+necessary for building models for the CMU Sphinx Recognizer.
+
+%description -l pl.UTF-8
+PocketSphinx - jeden z pochodzących z Carnegie Mellon University,
+mających otwarte źródła i bogaty zasób słów, niezależnych od mówiącego
+silników rozpoznawania mowy ciągłej.
+
+%package devel
+Summary:	Header files for CMU SphinxTrain
+Summary(pl.UTF-8):	Pliki nagłówkowe CMU SphinxTrain
+Group:		Development/Libraries
+Requires:	sphinxbase-devel >= 0.8
+# doesn't require base currently
+
+%description devel
+Header files for CMU SphinxTrain.
+
+%description devel -l pl.UTF-8
+Pliki nagłówkowe CMU SphinxTrain.
+
+%prep
+%setup -q
+%patch0 -p1
+
+%{__sed} -i -e "s,/libexec/,/$(basename %{_libexec})/," scripts/sphinxtrain
+
+%build
+# rebuild ac/am/lt for as-needed to work
+%{__libtoolize}
+%{__aclocal} -I m4
+%{__autoconf}
+%{__automake}
+CXXFLAGS="%{rpmcxxflags} -std=c++11"
+%configure \
+	--enable-g2p-decoder \
+	%{!?with_static_libs:--disable-static} \
+	%{!?with_python:--without-python}
+%{__make}
+
+%install
+rm -rf $RPM_BUILD_ROOT
+
+%{__make} install \
+	DESTDIR=$RPM_BUILD_ROOT
+
+# not needed
+%{__rm} -r $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/setup.py \
+	$RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/cmusphinx/{test,test_*.py} \
+	$RPM_BUILD_ROOT%{_libdir}/sphinxtrain/scripts/lib/test_*
+
+# not really executable
+sed -i -e '1s,.*/usr/bin/env python.*,,' $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/cmusphinx/lattice.py
+# invoke python directly
+sed -i -e '1s,/usr/bin/env python,%{__python},' $RPM_BUILD_ROOT%{_libdir}/sphinxtrain/python/cmusphinx/*.py
+
+%clean
+rm -rf $RPM_BUILD_ROOT
+
+%post	-p /sbin/ldconfig
+%postun	-p /sbin/ldconfig
+
+%files
+%defattr(644,root,root,755)
+%doc COPYING NEWS README
+%attr(755,root,root) %{_bindir}/sphinxtrain
+%if "%{_libexecdir}" != "%{_libdir}"
+%dir %{_libexecdir}/sphinxtrain
+%endif
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/agg_seg
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/bldtree
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/bw
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/cdcn_norm
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/cdcn_train
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/cp_parm
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/delint
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/g2p_train
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/inc_comp
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/init_gau
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/init_mixw
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/kdtree
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/kmeans_init
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/make_quests
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/map_adapt
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mixw_interp
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_flat
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_mdef_gen
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_mllr_class
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_s2sendump
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mk_ts2cb
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mllr_solve
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/mllr_transform
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/norm
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/param_cnt
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/phonetisaurus-g2p
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/printp
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/prunetree
+%attr(755,root,root) %{_libexecdir}/sphinxtrain/tiestate
+%dir %{_libdir}/sphinxtrain
+%{_libdir}/sphinxtrain/etc
+%dir %{_libdir}/sphinxtrain/python
+%dir %{_libdir}/sphinxtrain/python/cmusphinx
+%{_libdir}/sphinxtrain/python/cmusphinx/feat
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/classlm2fst.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/cluster_mixw.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/dict_spd.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/fstutils.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat2dot.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat2fsg.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat_rescore.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lat_rescore_fst.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_conv.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_error.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_error_fst.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lattice_prune.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/lda.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/mllr.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/mllt.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/prune_mixw.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/quantize_mixw.py
+%attr(755,root,root) %{_libdir}/sphinxtrain/python/cmusphinx/sendump.py
+%{_libdir}/sphinxtrain/python/cmusphinx/__init__.py
+%{_libdir}/sphinxtrain/python/cmusphinx/arpalm.py
+%{_libdir}/sphinxtrain/python/cmusphinx/corpus.py
+%{_libdir}/sphinxtrain/python/cmusphinx/divergence.py
+%{_libdir}/sphinxtrain/python/cmusphinx/evaluation.py
+%{_libdir}/sphinxtrain/python/cmusphinx/gmm.py
+%{_libdir}/sphinxtrain/python/cmusphinx/hmm.py
+%{_libdir}/sphinxtrain/python/cmusphinx/htkmfc.py
+%{_libdir}/sphinxtrain/python/cmusphinx/hypseg.py
+%{_libdir}/sphinxtrain/python/cmusphinx/lattice.py
+%{_libdir}/sphinxtrain/python/cmusphinx/mfcc.py
+%{_libdir}/sphinxtrain/python/cmusphinx/qmwx.pyx
+%{_libdir}/sphinxtrain/python/cmusphinx/s2mfc.py
+%{_libdir}/sphinxtrain/python/cmusphinx/s3*.py
+%dir %{_libdir}/sphinxtrain/scripts
+%attr(755,root,root) %{_libdir}/sphinxtrain/scripts/[0-9]*
+%attr(755,root,root) %{_libdir}/sphinxtrain/scripts/decode
+%attr(755,root,root) %{_libdir}/sphinxtrain/scripts/prepare
+%{_libdir}/sphinxtrain/scripts/lib
+
+%files devel
+%defattr(644,root,root,755)
+%{_includedir}/sphinxtrain
diff --git a/sphinxtrain-update.patch b/sphinxtrain-update.patch
new file mode 100644
index 0000000..8262dc4
--- /dev/null
+++ b/sphinxtrain-update.patch
@@ -0,0 +1,162 @@
+--- sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.cpp.orig	2012-09-28 23:26:18.000000000 +0200
++++ sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.cpp	2018-03-10 19:49:09.407123669 +0100
+@@ -20,18 +20,18 @@
+ #include <fst/script/fst-class.h>
+ #include <fst/script/determinize.h>
+ #include <fst/script/minimize.h>
+-#include <fst/extensions/far/main.h>
++#include <fst/extensions/far/getters.h>
+ #include <fst/script/print.h>
+ #include <ngram/ngram-shrink.h>
+ #include <ngram/ngram-relentropy.h>
+-#include <ngram/ngram-seymoreshrink.h>
+-#include <ngram/ngram-countprune.h>
++#include <ngram/ngram-seymore-shrink.h>
++#include <ngram/ngram-count-prune.h>
+ #include <ngram/ngram-input.h>
+ #include <ngram/ngram-make.h>
+-#include <ngram/ngram-kneserney.h>
++#include <ngram/ngram-kneser-ney.h>
+ #include <ngram/ngram-absolute.h>
+ #include <ngram/ngram-katz.h>
+-#include <ngram/ngram-wittenbell.h>
++#include <ngram/ngram-witten-bell.h>
+ #include <ngram/ngram-unsmoothed.h>
+ #include <sphinxbase/err.h>
+ #include "M2MFstAligner.hpp"
+@@ -47,7 +47,6 @@
+ #define keep_symbols true
+ #define initial_symbols true
+ #define allow_negative_labels false
+-#define file_list_input false
+ #define key_prefix ""
+ #define key_suffix ""
+ #define backoff false
+@@ -194,12 +193,11 @@
+ 	osyms->WriteText(prefix+".output.syms");
+ 
+ 	string dest = prefix+".fst.txt";
+-	ostream *ostrm = new fst::ofstream(dest.c_str());
+-	ostrm->precision(9);
+-	s::FstClass *fstc = new s::FstClass(out);
+-	s::PrintFst(*fstc, *ostrm, dest, isyms, osyms, NULL, acceptor, show_weight_one);
+-	ostrm->flush();
+-	delete ostrm;
++	fst::ofstream ostrm(dest.c_str());
++	ostrm.precision(9);
++	s::FstClass fstc(*out);
++	s::PrintFst(fstc, ostrm, dest, isyms, osyms, NULL, acceptor, show_weight_one);
++	ostrm.flush();
+ }
+ 
+ void train_model(string eps, string s1s2_sep, string skip, int order, string smooth, string prefix, string seq_sep, string prune, double theta, string count_pattern) {
+@@ -218,9 +216,13 @@
+ 
+ 	// compile strings into a far archive
+ 	cout << "Compiling symbols into FAR archive..." << endl;
+-	fst::FarEntryType fet = fst::StringToFarEntryType(entry_type);
+-	fst::FarTokenType ftt = fst::StringToFarTokenType(token_type);
+-    fst::FarType fartype = fst::FarTypeFromString(far_type);
++	fst::FarEntryType fet;
++	if (!fst::script::GetFarEntryType(entry_type, &fet))
++		E_FATAL("Unknown or unsupported FAR entry type");
++	fst::FarTokenType ftt;
++	if (!fst::script::GetFarTokenType(token_type, &ftt))
++		E_FATAL("Unknown or unsupported FAR token type");
++    fst::FarType fartype = fst::script::GetFarType(far_type);
+ 
+     delete ingram;
+ 
+@@ -232,7 +234,7 @@
+ 	                       prefix+".corpus.syms", unknown_symbol,
+ 	                       keep_symbols, initial_symbols,
+ 	                       allow_negative_labels,
+-	                       file_list_input, key_prefix,
++	                       key_prefix,
+ 	                       key_suffix);
+ 
+ 	//count n-grams
+@@ -247,7 +249,7 @@
+ 	while (!far_reader->Done()) {
+ 		if (ifst)
+ 			delete ifst;
+-		ifst = far_reader->GetFst().Copy();
++		ifst = far_reader->GetFst()->Copy();
+ 
+ 		if (!ifst) {
+ 			E_FATAL("ngramcount: unable to read fst #%d\n", fstnumber);
+@@ -309,7 +311,7 @@
+ 		ngram.MakeNGramModel();
+ 		fst = ngram.GetMutableFst();
+ 	} else if (smooth == "katz") {
+-		NGramKatz ngram(fst, backoff, backoff_label,
++		NGramKatz<StdArc> ngram(fst, backoff, backoff_label,
+ 				norm_eps, check_consistency,
+ 				bins);
+ 		ngram.MakeNGramModel();
+@@ -355,7 +357,7 @@
+ 	}
+ 
+ 	cout << "Minimizing model..." << endl;
+-	MutableFstClass *minimized = new s::MutableFstClass(fst);
++	MutableFstClass *minimized = new s::MutableFstClass(*fst);
+ 	Minimize(minimized, 0, fst::kDelta);
+ 	fst = minimized->GetMutableFst<StdArc>();
+ 
+--- sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.hpp.orig	1970-01-01 01:00:00.000000000 +0100
++++ sphinxtrain-1.0.8/src/programs/g2p_train/g2p_train.hpp	2018-03-10 20:04:49.127112944 +0100
+@@ -0,0 +1,54 @@
++/* ====================================================================
++ * Copyright (c) 1995-2012 Carnegie Mellon University.  All rights 
++ * reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer. 
++ *
++ * 2. Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * This work was supported in part by funding from the Defense Advanced 
++ * Research Projects Agency and the National Science Foundation of the 
++ * United States of America, and the CMU Sphinx Speech Consortium.
++ *
++ * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
++ * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
++ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
++ * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
++ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
++ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
++ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
++ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ *
++ * ====================================================================
++ *
++ */
++
++#ifndef G2P_TRAIN_H_
++#define G2P_TRAIN_H_
++#include <string>
++
++using namespace std;
++
++void split(string input_file, string prefix, int ratio);
++
++void align(string input_file, string prefix, bool seq1_del, bool seq2_del, 
++		int seq1_max, int seq2_max, string seq_sep, string s1s2_sep,
++		string eps, string skip, string seq1in_sep, string seq2in_sep,
++		string s1s2_delim, int iter);
++		
++void train_model(string eps, string s1s2_sep, string skip, int order, 
++		string smooth, string prefix, string seq_sep, string prune, 
++		double theta, string count_pattern);
++
++#endif /* G2P_TRAIN_H_ */
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/sphinxtrain.git/commitdiff/baf4a513bb446991237df6d5514f5d626b6803e5



More information about the pld-cvs-commit mailing list