[packages/lttoolbox] - updated to 3.3.1 (note: soname changed) - removed outdated svn and soname patches
qboosh
qboosh at pld-linux.org
Fri Dec 25 09:33:22 CET 2015
commit 78ddaac8fc4a3cd5335057d9c391f686cfcf68c7
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date: Fri Dec 25 09:34:54 2015 +0100
- updated to 3.3.1 (note: soname changed)
- removed outdated svn and soname patches
lttoolbox-soname.patch | 11 -
lttoolbox-svn20130412.patch | 3112 -------------------------------------------
lttoolbox.spec | 26 +-
3 files changed, 11 insertions(+), 3138 deletions(-)
---
diff --git a/lttoolbox.spec b/lttoolbox.spec
index 72b4a69..0a7eae4 100644
--- a/lttoolbox.spec
+++ b/lttoolbox.spec
@@ -1,17 +1,13 @@
Summary: Augmented letter transducer tools for natural language processing
Summary(pl.UTF-8): Narzędzia do przetwarzania słów w językach naturalnych
Name: lttoolbox
-Version: 3.2.0
-%define subver svn20130412
-%define rel 1
-Release: 2.%{subver}.1
+Version: 3.3.1
+Release: 1
License: GPL v2+
Group: Applications/Text
Source0: http://downloads.sourceforge.net/apertium/%{name}-%{version}.tar.gz
-# Source0-md5: 708e7de837ed363f7103035ef2849fe4
-Patch0: %{name}-svn20130412.patch
-Patch1: %{name}-soname.patch
-Patch2: %{name}-opt.patch
+# Source0-md5: d50479b2376a4839b7acac352505623e
+Patch0: %{name}-opt.patch
URL: http://wiki.apertium.org/wiki/Lttoolbox
BuildRequires: autoconf >= 2.52
BuildRequires: automake
@@ -62,9 +58,7 @@ Statyczna biblioteka lttoolbox.
%prep
%setup -q
-%patch0 -p0
-%patch1 -p1
-%patch2 -p1
+%patch0 -p1
%build
%{__libtoolize}
@@ -97,8 +91,9 @@ rm -rf $RPM_BUILD_ROOT
%attr(755,root,root) %{_bindir}/lt-proc
%attr(755,root,root) %{_bindir}/lt-tmxcomp
%attr(755,root,root) %{_bindir}/lt-tmxproc
-%attr(755,root,root) %{_libdir}/liblttoolbox3-3.2.so.*.*.*
-%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.1
+%attr(755,root,root) %{_bindir}/lt-trim
+%attr(755,root,root) %{_libdir}/liblttoolbox3-3.3.so.*.*.*
+%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.3.so.0
%{_datadir}/lttoolbox
%{_mandir}/man1/lt-comp.1*
%{_mandir}/man1/lt-expand.1*
@@ -106,13 +101,14 @@ rm -rf $RPM_BUILD_ROOT
%{_mandir}/man1/lt-proc.1*
%{_mandir}/man1/lt-tmxcomp.1*
%{_mandir}/man1/lt-tmxproc.1*
+%{_mandir}/man1/lt-trim.1*
%files devel
%defattr(644,root,root,755)
%attr(755,root,root) %{_libdir}/liblttoolbox3.so
%{_libdir}/liblttoolbox3.la
-%{_includedir}/lttoolbox-3.2
-%{_pkgconfigdir}/lttoolbox-3.2.pc
+%{_includedir}/lttoolbox-3.3
+%{_pkgconfigdir}/lttoolbox.pc
%files static
%defattr(644,root,root,755)
diff --git a/lttoolbox-soname.patch b/lttoolbox-soname.patch
deleted file mode 100644
index 973caac..0000000
--- a/lttoolbox-soname.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- lttoolbox-3.2.0/configure.ac.orig 2013-06-26 16:15:39.881717927 +0200
-+++ lttoolbox-3.2.0/configure.ac 2013-06-26 16:23:06.398365855 +0200
-@@ -23,7 +23,7 @@
- AC_SUBST(GENERIC_MAJOR_VERSION)
-
- # Shared library versioning
--GENERIC_LIBRARY_VERSION=0:0:0
-+GENERIC_LIBRARY_VERSION=1:0:0
- # | | |
- # +------+ | +---+
- # | | |
diff --git a/lttoolbox-svn20130412.patch b/lttoolbox-svn20130412.patch
deleted file mode 100644
index 71dc646..0000000
--- a/lttoolbox-svn20130412.patch
+++ /dev/null
@@ -1,3112 +0,0 @@
-Index: lttoolbox/lt-proc.1
-===================================================================
---- lttoolbox/lt-proc.1 (revision 21745)
-+++ lttoolbox/lt-proc.1 (working copy)
-@@ -12,7 +12,9 @@
- [
- .B \-a \fR|
- .B \-b \fR|
-+.B \-o \fR|
- .B \-c \fR|
-+.B \-d \fR|
- .B \-e \fR|
- .B \-g \fR|
- .B \-n \fR|
-@@ -29,7 +31,10 @@
- [
- .B \-\-analysis \fR|
- .B \-\-bilingual \fR|
-+.B \-\-surf-bilingual \fR|
- .B \-\-case-sensitive \fR|
-+.B \-\-debugged-gen \fR|
-+.B \-\-decompose-nouns \fR|
- .B \-\-generation \fR|
- .B \-\-non-marked-gen \fR|
- .B \-\-tagged-gen \fR|
-@@ -98,9 +103,18 @@
- form in the source language. Works tipically with the output of
- apertium-pretransfer.
- .TP
-+.B \-o, \-\-surf-bilingual
-+As with \-b, but takes input from apertium\-tagger \-p , with
-+surface forms, and if the lexical form is not found in the bilingual
-+dictionary, it outputs the surface form of the word.
-+.TP
-+
- .B \-c, \-\-case-sensitive
- Use the literal case of the incoming characters
- .TP
-+.B \-d, \-\-debugged-gen
-+Morph. generation with all the stuff
-+.TP
- .B \-e, \-\-decompose-compounds
- Try to treat unknown words as compounds, and decompose them.
- .TP
-@@ -154,5 +168,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/fst_processor.cc
-===================================================================
---- lttoolbox/fst_processor.cc (revision 21745)
-+++ lttoolbox/fst_processor.cc (working copy)
-@@ -44,14 +44,17 @@
-
- caseSensitive = false;
- dictionaryCase = false;
-- compoundDecomposition = false;
-+ do_decomposition = false;
- nullFlush = false;
- nullFlushGeneration = false;
-+ showControlSymbols = false;
-+ biltransSurfaceForms = false;
-+ compoundOnlyLSymbol = 0;
-+ compoundRSymbol = 0;
-+ compound_max_elements = 4;
-
-- pool = new Pool<vector<int> >(4, vector<int>(50));
--
-- initial_state = new State(pool);
-- current_state = new State(pool);
-+ initial_state = new State();
-+ current_state = new State();
- }
-
- FSTProcessor::~FSTProcessor()
-@@ -58,7 +61,6 @@
- {
- delete current_state;
- delete initial_state;
-- delete pool;
- }
-
- void
-@@ -408,6 +410,100 @@
- return 0x7fffffff;
- }
-
-+pair<wstring, int>
-+FSTProcessor::readBilingual(FILE *input, FILE *output)
-+{
-+ wint_t val = fgetwc_unlocked(input);
-+ wstring symbol = L"";
-+
-+ if(feof(input))
-+ {
-+ return pair<wstring, int>(symbol, 0x7fffffff);
-+ }
-+
-+ if(outOfWord)
-+ {
-+ if(val == L'^')
-+ {
-+ val = fgetwc_unlocked(input);
-+ if(feof(input))
-+ {
-+ return pair<wstring, int>(symbol, 0x7fffffff);
-+ }
-+ }
-+ else if(val == L'\\')
-+ {
-+ fputwc_unlocked(val, output);
-+ val = fgetwc_unlocked(input);
-+ if(feof(input))
-+ {
-+ return pair<wstring, int>(symbol, 0x7fffffff);
-+ }
-+ fputwc_unlocked(val,output);
-+ skipUntil(input, output, L'^');
-+ val = fgetwc_unlocked(input);
-+ if(feof(input))
-+ {
-+ return pair<wstring, int>(symbol, 0x7fffffff);
-+ }
-+ }
-+ else
-+ {
-+ fputwc_unlocked(val, output);
-+ skipUntil(input, output, L'^');
-+ val = fgetwc_unlocked(input);
-+ if(feof(input))
-+ {
-+ return pair<wstring, int>(symbol, 0x7fffffff);
-+ }
-+ }
-+ outOfWord = false;
-+ }
-+
-+ if(val == L'\\')
-+ {
-+ val = fgetwc_unlocked(input);
-+ return pair<wstring, int>(symbol, val);
-+ }
-+ else if(val == L'$')
-+ {
-+ outOfWord = true;
-+ return pair<wstring, int>(symbol, static_cast<int>(L'$'));
-+ }
-+ else if(val == L'<')
-+ {
-+ wstring cad = L"";
-+ cad += static_cast<wchar_t>(val);
-+ while((val = fgetwc_unlocked(input)) != L'>')
-+ {
-+ if(feof(input))
-+ {
-+ streamError();
-+ }
-+ cad += static_cast<wchar_t>(val);
-+ }
-+ cad += static_cast<wchar_t>(val);
-+
-+ int res = alphabet(cad);
-+
-+ if (res == 0) {
-+ symbol = cad;
-+ }
-+ return pair<wstring, int>(symbol, res);
-+ }
-+ else if(val == L'[')
-+ {
-+ fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
-+ return readBilingual(input, output);
-+ }
-+ else
-+ {
-+ return pair<wstring, int>(symbol, val);
-+ }
-+
-+ return pair<wstring, int>(symbol, 0x7fffffff);
-+}
-+
- void
- FSTProcessor::flushBlanks(FILE *output)
- {
-@@ -494,6 +590,27 @@
- }
-
- void
-+FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
-+{
-+ for(unsigned int i = 0, limit = str.size(); i < limit; i++)
-+ {
-+ if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
-+ {
-+ fputws_unlocked(str.substr(i).c_str(), output);
-+ return;
-+ }
-+
-+ if(escaped_chars.find(str[i]) != escaped_chars.end())
-+ {
-+ fputwc_unlocked(L'\\', output);
-+ }
-+ fputwc_unlocked(str[i], output);
-+ }
-+}
-+
-+
-+
-+void
- FSTProcessor::printWord(wstring const &sf, wstring const &lf, FILE *output)
- {
- fputwc_unlocked(L'^', output);
-@@ -642,7 +759,86 @@
- initGeneration();
- }
-
-+
- wstring
-+FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupper) {
-+ const int MAX_COMBINATIONS = 500;
-+ //wcerr << L"compoundAnalysis(input_word = " << input_word << L")" << endl;
-+
-+ State current_state = *initial_state;
-+
-+ for(unsigned int i=0; i<input_word.size(); i++) {
-+ wchar_t val=input_word.at(i);
-+
-+ //wcerr << val << L" før step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
-+ current_state.step_case(val, caseSensitive);
-+
-+ if(current_state.size() > MAX_COMBINATIONS) {
-+ wcerr << L"Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << L"'" << endl;
-+ wcerr << L" gave up at char " << i << L" '" << val << L"'." << endl;
-+
-+ wstring nullString = L"";
-+ return nullString;
-+ }
-+
-+ //wcerr << val << L" eft step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
-+
-+ if(i < input_word.size()-1)
-+ current_state.restartFinals(all_finals, compoundOnlyLSymbol, initial_state, '+');
-+
-+ //wcerr << val << " eft rest " << i << " current_state = " << current_state.getReadableString(alphabet) << endl;
-+ //wcerr << i << " result = " << current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper) << endl;
-+ //wcerr << i << " -- size = " << current_state.size() << endl;
-+
-+ if(current_state.size()==0) {
-+ wstring nullString = L"";
-+ return nullString;
-+ }
-+ }
-+
-+ current_state.pruneCompounds(compoundRSymbol, '+', compound_max_elements);
-+ wstring result = current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper);
-+ //wcerr << L"rrresult = " << result << endl;
-+
-+ return result;
-+}
-+
-+
-+
-+void
-+FSTProcessor::initDecompositionSymbols() {
-+ if ((compoundOnlyLSymbol=alphabet(L"<:co:only-L>")) == 0
-+ && (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0
-+ && (compoundOnlyLSymbol=alphabet(L"<@co:only-L>")) == 0
-+ && (compoundOnlyLSymbol=alphabet(L"<@compound:only-L>")) == 0
-+ && (compoundOnlyLSymbol=alphabet(L"<compound-only-L>")) == 0)
-+ {
-+ wcerr << L"Warning: Decomposition symbol <:compound:only-L> not found" << endl;
-+ }
-+ else if (!showControlSymbols)
-+ alphabet.setSymbol(compoundOnlyLSymbol, L"");
-+
-+ if ((compoundRSymbol=alphabet(L"<:co:R>")) == 0
-+ && (compoundRSymbol=alphabet(L"<:compound:R>")) == 0
-+ && (compoundRSymbol=alphabet(L"<@co:R>")) == 0
-+ && (compoundRSymbol=alphabet(L"<@compound:R>")) == 0
-+ && (compoundRSymbol=alphabet(L"<compound-R>")) == 0)
-+ {
-+ wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl;
-+ }
-+ else if (!showControlSymbols)
-+ alphabet.setSymbol(compoundRSymbol, L"");
-+}
-+
-+
-+void
-+FSTProcessor::initDecomposition() {
-+ do_decomposition = true;
-+ initAnalysis();
-+ initDecompositionSymbols();
-+}
-+
-+/*wstring
- FSTProcessor::decompose(wstring w)
- {
- State current_state = *initial_state;
-@@ -807,7 +1003,7 @@
- }
- //wcerr << L"+ decompose: " << lf << endl;
- return lf;
--}
-+}*/
-
- void
- FSTProcessor::analysis(FILE *input, FILE *output)
-@@ -839,6 +1035,10 @@
- uppercase = firstupper && iswupper(sf[sf.size()-1]);
- }
-
-+ if(do_decomposition && compoundOnlyLSymbol != 0)
-+ {
-+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+ }
- lf = current_state.filterFinals(all_finals, alphabet,
- escaped_chars,
- uppercase, firstupper);
-@@ -853,6 +1053,10 @@
- uppercase = firstupper && iswupper(sf[sf.size()-1]);
- }
-
-+ if(do_decomposition && compoundOnlyLSymbol != 0)
-+ {
-+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+ }
- lf = current_state.filterFinals(all_finals, alphabet,
- escaped_chars,
- uppercase, firstupper);
-@@ -867,6 +1071,10 @@
- uppercase = firstupper && iswupper(sf[sf.size()-1]);
- }
-
-+ if(do_decomposition && compoundOnlyLSymbol != 0)
-+ {
-+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+ }
- lf = current_state.filterFinals(all_finals, alphabet,
- escaped_chars,
- uppercase, firstupper);
-@@ -881,6 +1089,10 @@
- uppercase = firstupper && iswupper(sf[sf.size()-1]);
- }
-
-+ if(do_decomposition && compoundOnlyLSymbol != 0)
-+ {
-+ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+ }
- lf = current_state.filterFinals(all_finals, alphabet,
- escaped_chars,
- uppercase, firstupper);
-@@ -969,16 +1181,22 @@
- if(limit == 0)
- {
- input_buffer.back(sf.size());
-- fputwc_unlocked(sf[0], output);
-+ writeEscaped(sf.substr(0,1), output);
- }
- else
- {
- input_buffer.back(1+(size-limit));
- wstring unknown_word = sf.substr(0, limit);
-- if(compoundDecomposition)
-+ if(do_decomposition)
- {
-+ if(!dictionaryCase)
-+ {
-+ firstupper = iswupper(sf[0]);
-+ uppercase = firstupper && iswupper(sf[sf.size()-1]);
-+ }
-+
- wstring compound = L"";
-- compound = decompose(unknown_word);
-+ compound = compoundAnalysis(unknown_word, uppercase, firstupper);
- if(compound != L"")
- {
- printWord(unknown_word, compound, output);
-@@ -1002,16 +1220,22 @@
- if(limit == 0)
- {
- input_buffer.back(sf.size());
-- fputwc_unlocked(sf[0], output);
-+ writeEscaped(sf.substr(0,1), output);
- }
- else
- {
- input_buffer.back(1+(size-limit));
- wstring unknown_word = sf.substr(0, limit);
-- if(compoundDecomposition)
-+ if(do_decomposition)
- {
-+ if(!dictionaryCase)
-+ {
-+ firstupper = iswupper(sf[0]);
-+ uppercase = firstupper && iswupper(sf[sf.size()-1]);
-+ }
-+
- wstring compound = L"";
-- compound = decompose(unknown_word);
-+ compound = compoundAnalysis(unknown_word, uppercase, firstupper);
- if(compound != L"")
- {
- printWord(unknown_word, compound, output);
-@@ -1296,19 +1520,27 @@
- fputwc(L'=', output);
- val = readGeneration(input, output);
- }
--
-+
- if(val == L'$' && outOfWord)
- {
- if(sf[0] == L'*' || sf[0] == L'%')
- {
-- if(mode != gm_clean)
-+ if(mode != gm_clean && mode != gm_tagged_nm)
- {
- writeEscaped(sf, output);
- }
-- else
-+ else if (mode == gm_clean)
- {
- writeEscaped(sf.substr(1), output);
- }
-+ else if(mode == gm_tagged_nm)
-+ {
-+ fputwc_unlocked(L'^', output);
-+ writeEscaped(removeTags(sf.substr(1)), output);
-+ fputwc_unlocked(L'/', output);
-+ writeEscapedWithTags(sf, output);
-+ fputwc_unlocked(L'$', output);
-+ }
- }
- else if(sf[0] == L'@')
- {
-@@ -1324,6 +1556,18 @@
- {
- writeEscaped(removeTags(sf), output);
- }
-+ else if(mode == gm_tagged)
-+ {
-+ writeEscaped(removeTags(sf), output);
-+ }
-+ else if(mode == gm_tagged_nm)
-+ {
-+ fputwc_unlocked(L'^', output);
-+ writeEscaped(removeTags(sf.substr(1)), output);
-+ fputwc_unlocked(L'/', output);
-+ writeEscapedWithTags(sf, output);
-+ fputwc_unlocked(L'$', output);
-+ }
- }
- else if(current_state.isFinal(all_finals))
- {
-@@ -1330,7 +1574,7 @@
- bool uppercase = sf.size() > 1 && iswupper(sf[1]);
- bool firstupper= iswupper(sf[0]);
-
-- if(mode == gm_tagged)
-+ if(mode == gm_tagged || mode == gm_tagged_nm)
- {
- fputwc_unlocked(L'^', output);
- }
-@@ -1339,10 +1583,10 @@
- escaped_chars,
- uppercase, firstupper).substr(1).c_str(),
- output);
-- if(mode == gm_tagged)
-+ if(mode == gm_tagged || mode == gm_tagged_nm)
- {
- fputwc_unlocked(L'/', output);
-- fputws_unlocked(sf.c_str(), output);
-+ writeEscapedWithTags(sf, output);
- fputwc_unlocked(L'$', output);
- }
-
-@@ -1360,9 +1604,26 @@
- }
- else if(mode == gm_unknown)
- {
-+ if(sf != L"")
-+ {
-+ fputwc_unlocked(L'#', output);
-+ writeEscaped(removeTags(sf), output);
-+ }
-+ }
-+ else if(mode == gm_tagged)
-+ {
- fputwc_unlocked(L'#', output);
- writeEscaped(removeTags(sf), output);
- }
-+ else if(mode == gm_tagged_nm)
-+ {
-+ fputwc_unlocked(L'^', output);
-+ writeEscaped(removeTags(sf), output);
-+ fputwc_unlocked(L'/', output);
-+ fputwc_unlocked(L'#', output);
-+ writeEscapedWithTags(sf, output);
-+ fputwc_unlocked(L'$', output);
-+ }
- }
-
- current_state = *initial_state;
-@@ -2033,19 +2294,62 @@
- }
-
- State current_state = *initial_state;
-- wstring sf = L"";
-- wstring queue = L"";
-- wstring result = L"";
-+ wstring sf = L""; // source language analysis
-+ wstring queue = L""; // symbols to be added to each target
-+ wstring result = L""; // result of looking up analysis in bidix
-
- outOfWord = false;
-
- skipUntil(input, output, L'^');
-- int val;
-+ pair<wstring,int> tr; // readBilingual return value, containing:
-+ int val; // the alphabet value of current symbol, and
-+ wstring symbol = L""; // the current symbol as a string
-+ bool seentags = false; // have we seen any tags at all in the analysis?
-
-- while((val = readGeneration(input, output)) != 0x7fffffff)
-+ bool seensurface = false;
-+ wstring surface = L"";
-+
-+ while(true) // ie. while(val != 0x7fffffff)
- {
-+ tr = readBilingual(input, output);
-+ symbol = tr.first;
-+ val = tr.second;
-+
-+ //fwprintf(stderr, L"> %S : %C : %d\n", tr.first.c_str(), tr.second, tr.second);
-+ if(biltransSurfaceForms && !seensurface && !outOfWord)
-+ {
-+ while(val != L'/' && val != 0x7fffffff)
-+ {
-+ surface = surface + symbol;
-+ alphabet.getSymbol(surface, val);
-+ tr = readBilingual(input, output);
-+ symbol = tr.first;
-+ val = tr.second;
-+ //fwprintf(stderr, L" == %S : %C : %d => %S\n", symbol.c_str(), val, val, surface.c_str());
-+ }
-+ seensurface = true;
-+ tr = readBilingual(input, output);
-+ symbol = tr.first;
-+ val = tr.second;
-+ }
-+
-+ if (val == 0x7fffffff)
-+ {
-+ break;
-+ }
-+
- if(val == L'$' && outOfWord)
- {
-+ if(!seentags) // if no tags: only return complete matches
-+ {
-+ bool uppercase = sf.size() > 1 && iswupper(sf[1]);
-+ bool firstupper= iswupper(sf[0]);
-+
-+ result = current_state.filterFinals(all_finals, alphabet,
-+ escaped_chars,
-+ uppercase, firstupper, 0);
-+ }
-+
- if(sf[0] == L'*')
- {
- printWordBilingual(sf, L"/"+sf, output);
-@@ -2055,14 +2359,23 @@
- printWordBilingual(sf, compose(result, queue), output);
- }
- else
-- {
-- printWordBilingual(sf, L"/@"+sf, output);
-+ { //xxx
-+ if(biltransSurfaceForms)
-+ {
-+ printWordBilingual(surface, L"/@"+surface, output);
-+ }
-+ else
-+ {
-+ printWordBilingual(sf, L"/@"+sf, output);
-+ }
- }
--
-+ seensurface = false;
-+ surface = L"";
- queue = L"";
- result = L"";
- current_state = *initial_state;
- sf = L"";
-+ seentags = false;
- }
- else if(iswspace(val) && sf.size() == 0)
- {
-@@ -2074,7 +2387,11 @@
- {
- sf += L'\\';
- }
-- alphabet.getSymbol(sf, val);
-+ alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
-+ if(val == 0) // non-alphabetic, possibly unknown tag; add to sf
-+ {
-+ sf += symbol;
-+ }
- }
- else
- {
-@@ -2082,7 +2399,15 @@
- {
- sf += L'\\';
- }
-- alphabet.getSymbol(sf,val);
-+ alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
-+ if(val == 0) // non-alphabetic, possibly unknown tag; add to sf
-+ {
-+ sf += symbol;
-+ }
-+ if(alphabet.isTag(val) || val == 0)
-+ {
-+ seentags = true;
-+ }
- if(current_state.size() != 0)
- {
- if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
-@@ -2105,12 +2430,21 @@
- }
- if(current_state.size() == 0 && result != L"")
- {
-- if(alphabet.isTag(val))
-+ // We already have a result, but there is still more to read
-+ // of the analysis; following tags are not consumed, but
-+ // output as target language tags (added to result on
-+ // end-of-word)
-+ if(alphabet.isTag(val)) // known tag
- {
- alphabet.getSymbol(queue, val);
- }
-+ else if (val == 0) // non-alphabetic, possibly unknown tag
-+ {
-+ queue += symbol;
-+ }
- else
- {
-+ // There are no more alive transductions and the current symbol is not a tag -- unknown word!
- result = L"";
- }
- }
-@@ -2127,6 +2461,7 @@
- unsigned int end_point = input_word.size()-2;
- wstring queue = L"";
- bool mark = false;
-+ bool seentags = false; // have we seen any tags at all in the analysis?
-
- if(with_delim == false)
- {
-@@ -2160,6 +2495,7 @@
- }
- else if(input_word[i] == L'<')
- {
-+ seentags = true;
- symbol = L'<';
- for(unsigned int j = i + 1; j <= end_point; j++)
- {
-@@ -2217,7 +2553,7 @@
- }
-
- if(current_state.size() == 0)
-- {
-+ {
- if(symbol != L"" && result != L"")
- {
- queue.append(symbol);
-@@ -2224,20 +2560,39 @@
- }
- else
- {
-- // word is not present
-+ // word is not present
- if(with_delim)
-- {
-+ {
- result = L"^@" + input_word.substr(1);
-- }
-+ }
- else
-- {
-+ {
- result = L"@" + input_word;
-- }
-+ }
- return pair<wstring, int>(result, 0);
- }
- }
- }
-
-+ if (!seentags
-+ && L"" == current_state.filterFinals(all_finals, alphabet,
-+ escaped_chars,
-+ uppercase, firstupper, 0))
-+ {
-+ // word is not present
-+ if(with_delim)
-+ {
-+ result = L"^@" + input_word.substr(1);
-+ }
-+ else
-+ {
-+ result = L"@" + input_word;
-+ }
-+ return pair<wstring, int>(result, 0);
-+ }
-+
-+
-+
- // attach unmatched queue automatically
-
- if(queue != L"")
-@@ -2661,10 +3016,11 @@
- return str;
- }
-
-+
- void
--FSTProcessor::setDecompoundingMode(bool const value)
-+FSTProcessor::setBiltransSurfaceForms(bool const value)
- {
-- compoundDecomposition = value;
-+ biltransSurfaceForms = value;
- }
-
- void
-@@ -2688,7 +3044,7 @@
- bool
- FSTProcessor::getDecompoundingMode()
- {
-- return compoundDecomposition;
-+ return do_decomposition;
- }
-
- bool
-Index: lttoolbox/lt_comp.cc
-===================================================================
---- lttoolbox/lt_comp.cc (revision 21745)
-+++ lttoolbox/lt_comp.cc (working copy)
-@@ -23,6 +23,7 @@
- #include <iostream>
- #include <libgen.h>
- #include <string>
-+#include <getopt.h>
-
- using namespace std;
-
-@@ -31,7 +32,11 @@
- if(name != NULL)
- {
- cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl;
-- cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl;
-+ cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl;
-+ cout << " -v: set language variant" << endl;
-+ cout << " -a: set alternative (monodix)" << endl;
-+ cout << " -l: set left language variant (bidix)" << endl;
-+ cout << " -r: set right language variant (bidix)" << endl;
- cout << "Modes:" << endl;
- cout << " lr: left-to-right compilation" << endl;
- cout << " rl: right-to-left compilation" << endl;
-@@ -42,27 +47,113 @@
-
- int main(int argc, char *argv[])
- {
-- if(argc != 4 && argc != 5)
-+ Compiler c;
-+ c.setVerbose(false);
-+
-+#if HAVE_GETOPT_LONG
-+ int option_index=0;
-+#endif
-+
-+ string vl;
-+ string vr;
-+
-+ while (true) {
-+#if HAVE_GETOPT_LONG
-+ static struct option long_options[] =
-+ {
-+ {"alt", required_argument, 0, 'a'},
-+ {"var", required_argument, 0, 'v'},
-+ {"var-left", required_argument, 0, 'l'},
-+ {"var-right", required_argument, 0, 'r'},
-+ {"help", no_argument, 0, 'h'},
-+ {"verbose", no_argument, 0, 'V'},
-+ {0, 0, 0, 0}
-+ };
-+
-+ int cnt=getopt_long(argc, argv, "a:v:l:r:hV", long_options, &option_index);
-+#else
-+ int cnt=getopt(argc, argv, "a:v:l:r:hV");
-+#endif
-+ if (cnt==-1)
-+ break;
-+
-+ switch (cnt)
-+ {
-+ case 'a':
-+ c.setAltValue(optarg);
-+ break;
-+
-+ case 'v':
-+ c.setVariantValue(optarg);
-+ break;
-+
-+ case 'l':
-+ vl = optarg;
-+ c.setVariantLeftValue(vl);
-+ break;
-+
-+ case 'r':
-+ vr = optarg;
-+ c.setVariantRightValue(vr);
-+ break;
-+
-+ case 'V':
-+ c.setVerbose(true);
-+ break;
-+
-+ case 'h':
-+ default:
-+ endProgram(argv[0]);
-+ break;
-+ }
-+ }
-+
-+ string opc;
-+ string infile;
-+ string outfile;
-+ string acxfile;
-+
-+ switch(argc - optind + 1)
- {
-- endProgram(argv[0]);
-+ case 5:
-+ opc = argv[argc-4];
-+ infile = argv[argc-3];
-+ outfile = argv[argc-2];
-+ acxfile = argv[argc-1];
-+ break;
-+
-+ case 4:
-+ opc = argv[argc-3];
-+ infile = argv[argc-2];
-+ outfile = argv[argc-1];
-+ break;
-+
-+ default:
-+ endProgram(argv[0]);
-+ break;
- }
-
-- string opc = argv[1];
--
-- Compiler c;
--
--
- if(opc == "lr")
- {
-- if(argc == 5)
-+ if(vr == "" && vl != "")
- {
-- c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL);
-+ cout << "Error: -l specified, but mode is lr" << endl;
-+ endProgram(argv[0]);
- }
-- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL);
-+ if(acxfile != "")
-+ {
-+ c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL);
-+ }
-+ c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL);
- }
- else if(opc == "rl")
- {
-- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL);
-+ if(vl == "" && vr != "")
-+ {
-+ cout << "Error: -r specified, but mode is rl" << endl;
-+ endProgram(argv[0]);
-+ }
-+ c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL);
- }
- else
- {
-@@ -69,10 +160,10 @@
- endProgram(argv[0]);
- }
-
-- FILE *output = fopen(argv[3], "wb");
-+ FILE *output = fopen(outfile.c_str(), "wb");
- if(!output)
- {
-- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
-+ cerr << "Error: Cannot open file '" << outfile << "'." << endl;
- exit(EXIT_FAILURE);
- }
- c.write(output);
-Index: lttoolbox/fst_processor.h
-===================================================================
---- lttoolbox/fst_processor.h (revision 21745)
-+++ lttoolbox/fst_processor.h (working copy)
-@@ -43,7 +43,8 @@
- gm_clean, // clear all
- gm_unknown, // display unknown words, clear transfer and generation tags
- gm_all, // display all
-- gm_tagged // tagged generation
-+ gm_tagged, // tagged generation
-+ gm_tagged_nm // clean tagged generation
- };
-
- /**
-@@ -57,8 +58,6 @@
- */
- map<wstring, TransExe, Ltstr> transducers;
-
-- Pool<vector<int> > *pool;
--
- /**
- * Current state of lexical analysis
- */
-@@ -130,6 +129,12 @@
- bool outOfWord;
-
- /**
-+ * true if we're automatically removing surface forms.
-+ */
-+ bool biltransSurfaceForms;
-+
-+
-+ /**
- * if true, makes always difference between uppercase and lowercase
- * characters
- */
-@@ -154,9 +159,30 @@
- /**
- * try analysing unknown words as compounds
- */
-- bool compoundDecomposition;
-+ bool do_decomposition;
-
- /**
-+ * Symbol of CompoundOnlyL
-+ */
-+ int compoundOnlyLSymbol;
-+
-+ /**
-+ * Symbol of CompoundR
-+ */
-+ int compoundRSymbol;
-+
-+ /**
-+ * Show or not the controls symbols (as compoundRSymbol)
-+ */
-+ bool showControlSymbols;
-+
-+ /**
-+ * Max compound elements
-+ * Hard coded for now, but there might come a switch one day
-+ */
-+ int compound_max_elements;
-+
-+ /**
- * Prints an error of input stream and exits
- */
- void streamError();
-@@ -219,6 +245,13 @@
- int readGeneration(FILE *input, FILE *output);
-
- /**
-+ * Read text from stream (biltrans version)
-+ * @param input the stream to read
-+ * @return the queue of 0-symbols, and the next symbol in the stream
-+ */
-+ pair<wstring, int> readBilingual(FILE *input, FILE *output);
-+
-+ /**
- * Read text from stream (SAO version)
- * @param input the stream to read
- * @return the next symbol in the stream
-@@ -248,7 +281,17 @@
- */
- void writeEscaped(wstring const &str, FILE *output);
-
-+
- /**
-+ * Write a string to an output stream, escaping all escapable characters
-+ * but keeping symbols without escaping
-+ * @param str the string to write, escaping characters
-+ * @param output the stream to write in
-+ */
-+ void writeEscapedWithTags(wstring const &str, FILE *output);
-+
-+
-+ /**
- * Checks if an string ends with a particular suffix
- * @param str the string to test
- * @param the searched suffix
-@@ -287,6 +330,8 @@
- */
- void printUnknownWord(wstring const &sf, FILE *output);
-
-+ void initDecompositionSymbols();
-+
- vector<wstring> numbers;
- int readTMAnalysis(FILE *input);
-
-@@ -294,7 +339,7 @@
- void printSpace(wchar_t const val, FILE *output);
- void skipUntil(FILE *input, FILE *output, wint_t const character);
- static wstring removeTags(wstring const &str);
-- wstring decompose(wstring str);
-+ wstring compoundAnalysis(wstring str, bool uppercase, bool firstupper);
- size_t firstNotAlpha(wstring const &sf);
-
- void analysis_wrapper_null_flush(FILE *input, FILE *output);
-@@ -338,9 +383,9 @@
-
- void setCaseSensitiveMode(bool const value);
- void setDictionaryCaseMode(bool const value);
-+ void setBiltransSurfaceForms(bool const value);
- void setNullFlush(bool const value);
- bool getNullFlush();
-- void setDecompoundingMode(bool const value);
- bool getDecompoundingMode();
- };
-
-Index: lttoolbox/lt_proc.cc
-===================================================================
---- lttoolbox/lt_proc.cc (revision 21745)
-+++ lttoolbox/lt_proc.cc (working copy)
-@@ -36,35 +36,42 @@
- void endProgram(char *name)
- {
- cout << basename(name) << ": process a stream with a letter transducer" << endl;
-- cout << "USAGE: " << basename(name) << " [-c] [-a|-g|-n|-d|-p|-s|-t|-b] fst_file [input_file [output_file]]" << endl;
-+ cout << "USAGE: " << basename(name) << " [ -a | -b | -c | -d | -e | -g | -n | -p | -s | -t | -v | -h -z -w ] fst_file [input_file [output_file]]" << endl;
- cout << "Options:" << endl;
- #if HAVE_GETOPT_LONG
- cout << " -a, --analysis: morphological analysis (default behavior)" << endl;
-- cout << " -b, --bilingual: lexical transference" << endl;
-+ cout << " -b, --bilingual: lexical transfer" << endl;
- cout << " -c, --case-sensitive: use the literal case of the incoming characters" << endl;
-+ cout << " -d, --debugged-gen morph. generation with all the stuff" <<endl;
-+ cout << " -e, --decompose-nouns: Try to decompound unknown words" << endl;
- cout << " -g, --generation: morphological generation" << endl;
-+ cout << " -l, --tagged-gen: morphological generation keeping lexical forms" << endl;
-+ cout << " -m, --tagged-nm-gen: same as -l but without unknown word marks" << endl;
- cout << " -n, --non-marked-gen morph. generation without unknown word marks" << endl;
-- cout << " -d, --debugged-gen morph. generation with all the stuff" <<endl;
-+ cout << " -o, --surf-bilingual: lexical transfer with surface forms" << endl;
- cout << " -p, --post-generation: post-generation" << endl;
-- cout << " -e, --decompose-compounds: try to decompose unknown word as compounds" << endl;
- cout << " -s, --sao: SAO annotation system input processing" << endl;
- cout << " -t, --transliteration: apply transliteration dictionary" << endl;
-+ cout << " -v, --version: version" << endl;
- cout << " -z, --null-flush: flush output on the null character " << endl;
- cout << " -w, --dictionary-case: use dictionary case instead of surface case" << endl;
-- cout << " -v, --version: version" << endl;
- cout << " -h, --help: show this help" << endl;
- #else
- cout << " -a: morphological analysis (default behavior)" << endl;
-+ cout << " -b: lexical transfer" << endl;
- cout << " -c: use the literal case of the incoming characters" << endl;
-+ cout << " -d: morph. generation with all the stuff" << endl;
-+ cout << " -e: try to decompose unknown words as compounds" << endl;
- cout << " -g: morphological generation" << endl;
-+ cout << " -l: morphological generation keeping lexical forms" << endl;
- cout << " -n: morph. generation without unknown word marks" << endl;
-+ cout << " -o: lexical transfer with surface forms" << endl;
- cout << " -p: post-generation" << endl;
-- cout << " -e: try to decompose unknown words as compounds" << endl;
- cout << " -s: SAO annotation system input processing" << endl;
- cout << " -t: apply transliteration dictionary" << endl;
-+ cout << " -v: version" << endl;
- cout << " -z: flush output on the null character " << endl;
- cout << " -w: use dictionary case instead of surface case" << endl;
-- cout << " -v: version" << endl;
- cout << " -h: show this help" << endl;
- #endif
- exit(EXIT_FAILURE);
-@@ -88,10 +95,12 @@
- {
- {"analysis", 0, 0, 'a'},
- {"bilingual", 0, 0, 'b'},
-+ {"surf-bilingual", 0, 0, 'o'},
- {"generation", 0, 0, 'g'},
- {"non-marked-gen", 0, 0, 'n'},
- {"debugged-gen", 0, 0, 'd'},
- {"tagged-gen", 0, 0, 'l'},
-+ {"tagged-nm-gen", 0, 0, 'm'},
- {"post-generation", 0, 0, 'p'},
- {"sao", 0, 0, 's'},
- {"transliteration", 0, 0, 't'},
-@@ -107,9 +116,9 @@
- {
- #if HAVE_GETOPT_LONG
- int option_index;
-- int c = getopt_long(argc, argv, "abceglndpstzwvh", long_options, &option_index);
-+ int c = getopt_long(argc, argv, "abceglmndopstzwvh", long_options, &option_index);
- #else
-- int c = getopt(argc, argv, "abceglndpstzwvh");
-+ int c = getopt(argc, argv, "abceglmndopstzwvh");
- #endif
-
- if(c == -1)
-@@ -123,13 +132,12 @@
- fstp.setCaseSensitiveMode(true);
- break;
-
-- case 'e':
-- fstp.setDecompoundingMode(true);
-- break;
--
-+ case 'e':
- case 'a':
- case 'b':
-+ case 'o':
- case 'l':
-+ case 'm':
- case 'g':
- case 'n':
- case 'd':
-@@ -248,11 +256,19 @@
- fstp.initGeneration();
- checkValidity(fstp);
- fstp.generation(input, output, gm_all);
-+ break;
-
- case 'l':
- fstp.initGeneration();
- checkValidity(fstp);
- fstp.generation(input, output, gm_tagged);
-+ break;
-+
-+ case 'm':
-+ fstp.initGeneration();
-+ checkValidity(fstp);
-+ fstp.generation(input, output, gm_tagged_nm);
-+ break;
-
- case 'p':
- fstp.initPostgeneration();
-@@ -272,11 +288,24 @@
- fstp.transliteration(input, output);
- break;
-
-+ case 'o':
-+ fstp.initBiltrans();
-+ checkValidity(fstp);
-+ fstp.setBiltransSurfaceForms(true);
-+ fstp.bilingual(input, output);
-+ break;
-+
- case 'b':
- fstp.initBiltrans();
- checkValidity(fstp);
- fstp.bilingual(input, output);
- break;
-+
-+ case 'e':
-+ fstp.initDecomposition();
-+ checkValidity(fstp);
-+ fstp.analysis(input, output);
-+ break;
-
- case 'a':
- default:
-Index: lttoolbox/expander.cc
-===================================================================
---- lttoolbox/expander.cc (revision 21745)
-+++ lttoolbox/expander.cc (working copy)
-@@ -295,9 +295,18 @@
- {
- wstring atributo=this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
- wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR);
-+ wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR);
-+ wstring varval = this->attrib(Compiler::COMPILER_V_ATTR);
-+ wstring varl = this->attrib(Compiler::COMPILER_VL_ATTR);
-+ wstring varr = this->attrib(Compiler::COMPILER_VR_ATTR);
-
- wstring myname = L"";
-- if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes")
-+ if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes"
-+ || altval != L"" && altval != alt
-+ || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
-+ || ((varl != L"" && varl != variant_left) && (varr != L"" && varr != variant_right))
-+ || (varl != L"" && varl != variant_left && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
-+ || (varr != L"" && varr != variant_right && atributo == Compiler::COMPILER_RESTRICTION_LR_VAL))
- {
- do
- {
-@@ -316,11 +325,14 @@
- }
-
- EntList items, items_lr, items_rl;
-- if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL)
-+ if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL
-+ || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL)
-+ || varl != L"" && varl != variant_left)
- {
- items_lr.push_back(pair<wstring, wstring>(L"", L""));
- }
-- else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
-+ else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL
-+ || (varr != L"" && varr != variant_right))
- {
- items_rl.push_back(pair<wstring, wstring>(L"", L""));
- }
-@@ -594,3 +606,27 @@
- it->second.append(endings.second);
- }
- }
-+
-+void
-+Expander::setAltValue(string const &a)
-+{
-+ alt = XMLParseUtil::stows(a);
-+}
-+
-+void
-+Expander::setVariantValue(string const &v)
-+{
-+ variant = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Expander::setVariantLeftValue(string const &v)
-+{
-+ variant_left = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Expander::setVariantRightValue(string const &v)
-+{
-+ variant_right = XMLParseUtil::stows(v);
-+}
-Index: lttoolbox/lt-expand.1
-===================================================================
---- lttoolbox/lt-expand.1 (revision 21745)
-+++ lttoolbox/lt-expand.1 (working copy)
-@@ -9,11 +9,28 @@
- architecture: \fBhttp://www.apertium.org\fR.
- .SH SYNOPSIS
- .B lt-expand
-+[
-+.B \-a \fR|
-+.B \-v \fR|
-+.B \-l \fR|
-+.B \-r \fR|
-+.B \-h
-+]
- dictionary_file [output_file]
- .PP
-+.B lt-expand
-+[
-+.B \-\-alt \fR|
-+.B \-\-var \fR|
-+.B \-\-var\-left \fR|
-+.B \-\-var\-right \fR|
-+.B \-\-help
-+]
-+dictionary_file [output_file]
-+.PP
- .SH DESCRIPTION
- .BR lt-expand
--Is the application responsible of expanding a dictionary into a
-+Is the application responsible for expanding a dictionary into a
- simple list of input string-output string pairs by eliminating
- paradigms through substitution and unfolding.
- .PP
-@@ -20,6 +37,23 @@
- The output goes to \fIoutput_file\fR if it is present or to standard
- output if it is missing.
- .PP
-+.SH OPTIONS
-+.TP
-+.B \-a, \-\-alt
-+Sets the value of the \fIalt\fR attribute to use in expansion
-+.TP
-+.B \-v, \-\-var
-+Sets the value of the \fIv\fR attribute to use in expansion of monodixes
-+.TP
-+.B \-l, \-\-var\-left
-+Sets the value of the \fIvl\fR attribute to use in expansion of bidixes
-+.TP
-+.B \-r, \-\-var\-right
-+Sets the value of the \fIvr\fR attribute to use in expansion of bidixes
-+.TP
-+.B \-h, \-\-help
-+Prints a short help message
-+.PP
- .SH FILES
- .B dictionary_file
- The input dictionary to expand.
-@@ -34,5 +68,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/dix.dtd
-===================================================================
---- lttoolbox/dix.dtd (revision 21745)
-+++ lttoolbox/dix.dtd (working copy)
-@@ -1,4 +1,21 @@
- <!--
-+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
-+
-+ This program is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU General Public License as
-+ published by the Free Software Foundation; either version 2 of the
-+ License, or (at your option) any later version.
-+
-+ This program is distributed in the hope that it will be useful, but
-+ WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ General Public License for more details.
-+
-+ You should have received a copy of the GNU General Public License
-+ along with this program; if not, write to the Free Software
-+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-+ 02111-1307, USA.
-+
- DTD for the format of dictionaries
- -->
- <!ELEMENT dictionary (alphabet?, sdefs?,
-@@ -57,6 +74,10 @@
- i CDATA #IMPLIED
- slr CDATA #IMPLIED
- srl CDATA #IMPLIED
-+ alt CDATA #IMPLIED
-+ v CDATA #IMPLIED
-+ vl CDATA #IMPLIED
-+ vr CDATA #IMPLIED
- >
- <!-- r: restriction LR: left-to-right,
- RL: right-to-left -->
-@@ -66,6 +87,10 @@
- <!-- i: ignore ('yes') means ignore, otherwise it is not ignored) -->
- <!-- slr: translation sense when translating from left to right -->
- <!-- srl: translation sense when translating from right to left -->
-+ <!-- alt: alternative entries are omitted if not selected -->
-+ <!-- v: variant sets (monodix) direction restrictions based on language variant -->
-+ <!-- vl: variant left sets direction restrictions based on language variant for language on left of bidix -->
-+ <!-- vr: variant right sets direction restrictions based on language variant for language on right of bidix -->
- <!ELEMENT par EMPTY>
- <!-- reference to paradigm -->
- <!ATTLIST par
-Index: lttoolbox/compiler.cc
-===================================================================
---- lttoolbox/compiler.cc (revision 21745)
-+++ lttoolbox/compiler.cc (working copy)
-@@ -56,6 +56,10 @@
- wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm";
- wstring const Compiler::COMPILER_IGNORE_ATTR = L"i";
- wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes";
-+wstring const Compiler::COMPILER_ALT_ATTR = L"alt";
-+wstring const Compiler::COMPILER_V_ATTR = L"v";
-+wstring const Compiler::COMPILER_VL_ATTR = L"vl";
-+wstring const Compiler::COMPILER_VR_ATTR = L"vr";
-
- Compiler::Compiler()
- {
-@@ -417,6 +421,12 @@
- }
- }
-
-+ if(verbose && first_element && (both_sides.front() == (int)L' '))
-+ {
-+ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-+ wcerr << L"): Entry begins with space." << endl;
-+ }
-+ first_element = false;
- EntryToken e;
- e.setSingleTransduction(both_sides, both_sides);
- return e;
-@@ -444,6 +454,13 @@
- readString(lhs, name);
- }
- }
-+
-+ if(verbose && first_element && (lhs.front() == (int)L' '))
-+ {
-+ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-+ wcerr << L"): Entry begins with space." << endl;
-+ }
-+ first_element = false;
-
- skip(name, COMPILER_RIGHT_ELEM);
-
-@@ -480,7 +497,15 @@
- {
- EntryToken e;
- wstring nomparadigma = attrib(COMPILER_N_ATTR);
-+ first_element = false;
-
-+ if(current_paradigm != L"" && nomparadigma == current_paradigm)
-+ {
-+ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-+ wcerr << L"): Paradigm refers to itself '" << nomparadigma << L"'." <<endl;
-+ exit(EXIT_FAILURE);
-+ }
-+
- if(paradigms.find(nomparadigma) == paradigms.end())
- {
- wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-@@ -632,9 +657,18 @@
- {
- wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
- wstring ignore = this->attrib(COMPILER_IGNORE_ATTR);
-+ wstring altval = this->attrib(COMPILER_ALT_ATTR);
-+ wstring varval = this->attrib(COMPILER_V_ATTR);
-+ wstring varl = this->attrib(COMPILER_VL_ATTR);
-+ wstring varr = this->attrib(COMPILER_VR_ATTR);
-
- //�if entry is masked by a restriction of direction or an ignore mark
-- if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL)
-+ if((atributo != L"" && atributo != direction)
-+ || ignore == COMPILER_IGNORE_YES_VAL
-+ || (altval != L"" && altval != alt)
-+ || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant)
-+ || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left)
-+ || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right))
- {
- // parse to the end of the entry
- wstring name = L"";
-@@ -662,6 +696,11 @@
- wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
- skipBlanks(name);
-
-+ if(current_paradigm == L"" && verbose)
-+ {
-+ first_element = true;
-+ }
-+
- int tipo = xmlTextReaderNodeType(reader);
- if(name == COMPILER_PAIR_ELEM)
- {
-@@ -845,3 +884,33 @@
- it->second.write(output);
- }
- }
-+
-+void
-+Compiler::setAltValue(string const &a)
-+{
-+ alt = XMLParseUtil::stows(a);
-+}
-+
-+void
-+Compiler::setVariantValue(string const &v)
-+{
-+ variant = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Compiler::setVariantLeftValue(string const &v)
-+{
-+ variant_left = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Compiler::setVariantRightValue(string const &v)
-+{
-+ variant_right = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Compiler::setVerbose(bool verbosity)
-+{
-+ verbose = verbosity;
-+}
-Index: lttoolbox/transducer.h
-===================================================================
---- lttoolbox/transducer.h (revision 21745)
-+++ lttoolbox/transducer.h (working copy)
-@@ -146,6 +146,13 @@
- bool isFinal(int const state) const;
-
- /**
-+ * Test if a pattern is recognised by the FST
-+ * @param a widestring of the pattern to be recognised
-+ * @return true if the pattern is recognised by the transducer
-+ */
-+ bool recognise(wstring patro, Alphabet &a, FILE *err = stderr);
-+
-+ /**
- * Set the state as a final or not, yes by default
- * @param state the state
- * @param value if true, the state is set as final state
-@@ -179,6 +186,12 @@
- void reverse(int const epsilon_tag = 0);
-
- /**
-+ * Print all the transductions of a transducer in ATT format
-+ * @param epsilon_tag the tag to take as epsilon
-+ */
-+ void show(Alphabet &a, FILE *output = stdout, int const epsilon_tag = 0);
-+
-+ /**
- * Determinize the transducer
- * @param epsilon_tag the tag to take as epsilon
- */
-@@ -242,6 +255,12 @@
- bool isEmpty(int const state) const;
-
- /**
-+ * Returns the number of transitions from a given state
-+ * @return the number of transitions
-+ */
-+ int getStateSize(int const state);
-+
-+ /**
- * Write method
- * @param output the stream to write to
- * @param decalage offset to sum to the tags
-Index: lttoolbox/lt_expand.cc
-===================================================================
---- lttoolbox/lt_expand.cc (revision 21745)
-+++ lttoolbox/lt_expand.cc (working copy)
-@@ -24,6 +24,7 @@
- #include <iostream>
- #include <libgen.h>
- #include <string>
-+#include <getopt.h>
-
- #ifdef _MSC_VER
- #include <io.h>
-@@ -37,7 +38,7 @@
- if(name != NULL)
- {
- cout << basename(name) << " v" << PACKAGE_VERSION <<": expand the contents of a dictionary file" << endl;
-- cout << "USAGE: " << basename(name) << " dictionary_file [output_file]" << endl;
-+ cout << "USAGE: " << basename(name) << " [-avlrh] dictionary_file [output_file]" << endl;
- }
- exit(EXIT_FAILURE);
- }
-@@ -45,14 +46,67 @@
- int main(int argc, char *argv[])
- {
- FILE *input = NULL, *output = NULL;
-+ Expander e;
-
-- switch(argc)
-+#if HAVE_GETOPT_LONG
-+ int option_index=0;
-+#endif
-+
-+ while (true) {
-+#if HAVE_GETOPT_LONG
-+ static struct option long_options[] =
-+ {
-+ {"alt", required_argument, 0, 'a'},
-+ {"var", required_argument, 0, 'v'},
-+ {"var-left", required_argument, 0, 'l'},
-+ {"var-right", required_argument, 0, 'r'},
-+ {"help", no_argument, 0, 'h'},
-+ {0, 0, 0, 0}
-+ };
-+
-+ int cnt=getopt_long(argc, argv, "a:v:l:r:h", long_options, &option_index);
-+#else
-+ int cnt=getopt(argc, argv, "a:v:l:r:h");
-+#endif
-+ if (cnt==-1)
-+ break;
-+
-+ switch (cnt)
-+ {
-+ case 'a':
-+ e.setAltValue(optarg);
-+ break;
-+
-+ case 'v':
-+ e.setVariantValue(optarg);
-+ break;
-+
-+ case 'l':
-+ e.setVariantLeftValue(optarg);
-+ break;
-+
-+ case 'r':
-+ e.setVariantRightValue(optarg);
-+ break;
-+
-+ case 'h':
-+ default:
-+ endProgram(argv[0]);
-+ break;
-+ }
-+ }
-+
-+ string infile;
-+ string outfile;
-+
-+ switch(argc - optind + 1)
- {
- case 2:
-- input = fopen(argv[1], "rb");
-+ infile = argv[argc-1];
-+ input = fopen(infile.c_str(), "rb");
- if(input == NULL)
- {
-- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
-+ cerr << "Error: Cannot open file '" << infile << "'." << endl;
- exit(EXIT_FAILURE);
- }
- fclose(input);
-@@ -60,18 +114,20 @@
- break;
-
- case 3:
-- input = fopen(argv[1], "rb");
-+ infile = argv[argc-2];
-+ input = fopen(infile.c_str(), "rb");
- if(input == NULL)
- {
-- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
-+ cerr << "Error: Cannot open file '" << infile << "'." << endl;
- exit(EXIT_FAILURE);
- }
- fclose(input);
-
-- output = fopen(argv[2], "wb");
-+ outfile = argv[argc-1];
-+ output = fopen(argv[argc-1], "wb");
- if(output == NULL)
- {
-- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
-+ cerr << "Error: Cannot open file '" << outfile << "'." << endl;
- exit(EXIT_FAILURE);
- }
- break;
-@@ -85,8 +141,7 @@
- _setmode(_fileno(output), _O_U8TEXT);
- #endif
-
-- Expander e;
-- e.expand(argv[1], output);
-+ e.expand(infile, output);
- fclose(output);
-
- return EXIT_SUCCESS;
-Index: lttoolbox/state.cc
-===================================================================
---- lttoolbox/state.cc (revision 21745)
-+++ lttoolbox/state.cc (working copy)
-@@ -20,10 +20,15 @@
-
- #include <cstring>
- #include <cwctype>
-+#include <climits>
-
--State::State(Pool<vector<int> > *p)
-+//debug//
-+//#include <iostream>
-+//using namespace std;
-+//debug//
-+
-+State::State()
- {
-- pool = p;
- }
-
- State::~State()
-@@ -51,10 +56,9 @@
- void
- State::destroy()
- {
-- // release references
- for(size_t i = 0, limit = state.size(); i != limit; i++)
- {
-- pool->release(state[i].sequence);
-+ delete state[i].sequence;
- }
-
- state.clear();
-@@ -66,15 +70,14 @@
- // release references
- for(size_t i = 0, limit = state.size(); i != limit; i++)
- {
-- pool->release(state[i].sequence);
-+ delete state[i].sequence;
- }
-
- state = s.state;
-- pool = s.pool;
-
- for(size_t i = 0, limit = state.size(); i != limit; i++)
- {
-- vector<int> *tmp = pool->get();
-+ vector<int> *tmp = new vector<int>();
- *tmp = *(state[i].sequence);
- state[i].sequence = tmp;
- }
-@@ -90,7 +93,7 @@
- State::init(Node *initial)
- {
- state.clear();
-- state.push_back(TNodeState(initial,pool->get(),false));
-+ state.push_back(TNodeState(initial, new vector<int>(), false));
- state[0].sequence->clear();
- epsilonClosure();
- }
-@@ -113,7 +116,7 @@
- {
- for(int j = 0; j != it->second.size; j++)
- {
-- vector<int> *new_v = pool->get();
-+ vector<int> *new_v = new vector<int>();
- *new_v = *(state[i].sequence);
- if(it->first != 0)
- {
-@@ -122,7 +125,7 @@
- new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
- }
- }
-- pool->release(state[i].sequence);
-+ delete state[i].sequence;
- }
-
- state = new_state;
-@@ -147,8 +150,8 @@
- {
- for(int j = 0; j != it->second.size; j++)
- {
-- vector<int> *new_v = pool->get();
-- *new_v = *(state[i].sequence);
-+ vector<int> *new_v = new vector<int>();
-+ *new_v = *(state[i].sequence);
- if(it->first != 0)
- {
- new_v->push_back(it->second.out_tag[j]);
-@@ -161,7 +164,7 @@
- {
- for(int j = 0; j != it->second.size; j++)
- {
-- vector<int> *new_v = pool->get();
-+ vector<int> *new_v = new vector<int>();
- *new_v = *(state[i].sequence);
- if(it->first != 0)
- {
-@@ -170,7 +173,7 @@
- new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
- }
- }
-- pool->release(state[i].sequence);
-+ delete state[i].sequence;
- }
-
- state = new_state;
-@@ -187,7 +190,7 @@
- {
- for(int j = 0 ; j != it2->second.size; j++)
- {
-- vector<int> *tmp = pool->get();
-+ vector<int> *tmp = new vector<int>();
- *tmp = *(state[i].sequence);
- if(it2->second.out_tag[j] != 0)
- {
-@@ -199,6 +202,69 @@
- }
- }
-
-+void
-+State::apply(int const input, int const alt1, int const alt2)
-+{
-+ vector<TNodeState> new_state;
-+ if(input == 0 || alt1 == 0 || alt2 == 0)
-+ {
-+ state = new_state;
-+ return;
-+ }
-+
-+ for(size_t i = 0, limit = state.size(); i != limit; i++)
-+ {
-+ map<int, Dest>::const_iterator it;
-+ it = state[i].where->transitions.find(input);
-+ if(it != state[i].where->transitions.end())
-+ {
-+ for(int j = 0; j != it->second.size; j++)
-+ {
-+ vector<int> *new_v = new vector<int>();
-+ *new_v = *(state[i].sequence);
-+ if(it->first != 0)
-+ {
-+ new_v->push_back(it->second.out_tag[j]);
-+ }
-+ new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
-+ }
-+ }
-+ it = state[i].where->transitions.find(alt1);
-+ if(it != state[i].where->transitions.end())
-+ {
-+ for(int j = 0; j != it->second.size; j++)
-+ {
-+ vector<int> *new_v = new vector<int>();
-+ *new_v = *(state[i].sequence);
-+ if(it->first != 0)
-+ {
-+ new_v->push_back(it->second.out_tag[j]);
-+ }
-+ new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
-+ }
-+ }
-+ it = state[i].where->transitions.find(alt2);
-+ if(it != state[i].where->transitions.end())
-+ {
-+ for(int j = 0; j != it->second.size; j++)
-+ {
-+ vector<int> *new_v = new vector<int>();
-+ *new_v = *(state[i].sequence);
-+ if(it->first != 0)
-+ {
-+ new_v->push_back(it->second.out_tag[j]);
-+ }
-+ new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
-+ }
-+ }
-+
-+ delete state[i].sequence;
-+ }
-+
-+ state = new_state;
-+}
-+
-+
- void
- State::step(int const input)
- {
-@@ -213,6 +279,37 @@
- epsilonClosure();
- }
-
-+void
-+State::step(int const input, int const alt1, int const alt2)
-+{
-+ apply(input, alt1, alt2);
-+ epsilonClosure();
-+}
-+
-+void
-+State::step_case(wchar_t val, wchar_t val2, bool caseSensitive)
-+{
-+ if (!iswupper(val) || caseSensitive) {
-+ step(val, val2);
-+ } else if(val != towlower(val)) {
-+ step(val, towlower(val), val2);
-+ } else {
-+ step(val, val2);
-+ }
-+}
-+
-+
-+void
-+State::step_case(wchar_t val, bool caseSensitive)
-+{
-+ if (!iswupper(val) || caseSensitive) {
-+ step(val);
-+ } else {
-+ step(val, towlower(val));
-+ }
-+}
-+
-+
- bool
- State::isFinal(set<Node *> const &finals) const
- {
-@@ -282,6 +379,60 @@
- return result;
- }
-
-+
-+set<pair<wstring, vector<wstring> > >
-+State::filterFinalsLRX(set<Node *> const &finals,
-+ Alphabet const &alphabet,
-+ set<wchar_t> const &escaped_chars,
-+ bool uppercase, bool firstupper, int firstchar) const
-+{
-+ set<pair<wstring, vector<wstring> > > results;
-+
-+ vector<wstring> current_result;
-+ wstring rule_id = L"";
-+
-+ // /<$><select>station<n><ANY_TAG><$><skip><6>/<$><select>station<n><ANY_TAG><$><skip><6>
-+
-+ // if <$> current_result.push_back(current_word)
-+ // if / results.insert(current_result)
-+
-+ for(size_t i = 0, limit = state.size(); i != limit; i++)
-+ {
-+ if(finals.find(state[i].where) != finals.end())
-+ {
-+ current_result.clear();
-+ rule_id = L"";
-+ wstring current_word = L"";
-+ for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
-+ {
-+ if(escaped_chars.find((*(state[i].sequence))[j]) != escaped_chars.end())
-+ {
-+ current_word += L'\\';
-+ }
-+ wstring sym = L"";
-+ alphabet.getSymbol(sym, (*(state[i].sequence))[j], uppercase);
-+ if(sym == L"<$>")
-+ {
-+ if(current_word != L"")
-+ {
-+ current_result.push_back(current_word);
-+ }
-+ current_word = L"";
-+ }
-+ else
-+ {
-+ current_word += sym;
-+ }
-+ }
-+ rule_id = current_word;
-+ results.insert(make_pair(rule_id, current_result));
-+ }
-+ }
-+
-+ return results;
-+}
-+
-+
- wstring
- State::filterFinalsSAO(set<Node *> const &finals,
- Alphabet const &alphabet,
-@@ -438,3 +589,149 @@
-
- return result;
- }
-+
-+
-+
-+void
-+State::pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements)
-+{
-+ int minNoOfCompoundElements = compound_max_elements;
-+ int *noOfCompoundElements = new int[state.size()];
-+
-+ //wcerr << L"pruneCompounds..." << endl;
-+
-+ for (unsigned int i = 0; i<state.size(); i++) {
-+ vector<int> seq = *state.at(i).sequence;
-+
-+ if (lastPartHasRequiredSymbol(seq, requiredSymbol, separationSymbol)) {
-+ int this_noOfCompoundElements = 0;
-+ for (int j = seq.size()-2; j>0; j--) if (seq.at(j)==separationSymbol) this_noOfCompoundElements++;
-+ noOfCompoundElements[i] = this_noOfCompoundElements;
-+ minNoOfCompoundElements = (minNoOfCompoundElements < this_noOfCompoundElements) ?
-+ minNoOfCompoundElements : this_noOfCompoundElements;
-+ }
-+ else {
-+ noOfCompoundElements[i] = INT_MAX;
-+ //wcerr << L"Prune - No requiered symbol in state number " << i << endl;
-+ }
-+ }
-+
-+ // remove states with more than minimum number of compounds (or without the requiered symbol in the last part)
-+ vector<TNodeState>::iterator it = state.begin();
-+ int i=0;
-+ while(it != state.end()) {
-+ if (noOfCompoundElements[i] > minNoOfCompoundElements) {
-+ delete (*it).sequence;
-+ it = state.erase(it);
-+ //wcerr << L"Prune - State number " << i << L" removed!" << endl;
-+ }
-+ else it++;
-+ i++;
-+ }
-+
-+ delete[] noOfCompoundElements;
-+}
-+
-+
-+
-+void
-+State::pruneStatesWithForbiddenSymbol(int forbiddenSymbol)
-+{
-+ vector<TNodeState>::iterator it = state.begin();
-+ while(it != state.end()) {
-+ vector<int> *seq = (*it).sequence;
-+ bool found = false;
-+ for(int i = seq->size()-1; i>=0; i--) {
-+ if(seq->at(i) == forbiddenSymbol) {
-+ i=-1;
-+ delete (*it).sequence;
-+ it = state.erase(it);
-+ found = true;
-+ }
-+ }
-+ if (!found) it++;
-+ }
-+}
-+
-+
-+
-+bool
-+State::lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol)
-+{
-+ // state is final - it should be restarted it with all elements in stateset restart_state, with old symbols conserved
-+ bool restart=false;
-+ for (int n=seq.size()-1; n>=0; n--) {
-+ int symbol=seq.at(n);
-+ if (symbol==requiredSymbol) {
-+ restart=true;
-+ break;
-+ }
-+ if (symbol==separationSymbol) {
-+ break;
-+ }
-+ }
-+ return restart;
-+}
-+
-+
-+void
-+State::restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol)
-+{
-+
-+ for (unsigned int i=0; i<state.size(); i++) {
-+ TNodeState state_i = state.at(i);
-+ // A state can be a possible final state and still have transitions
-+
-+ if (finals.count(state_i.where) > 0) {
-+ bool restart = lastPartHasRequiredSymbol(*(state_i.sequence), requiredSymbol, separationSymbol);
-+ if (restart) {
-+ if (restart_state != NULL) {
-+ for (unsigned int j=0; j<restart_state->state.size(); j++) {
-+ TNodeState initst = restart_state->state.at(j);
-+ vector<int> *tnvec = new vector<int>;
-+
-+ for(unsigned int k=0; k < state_i.sequence->size(); k++) tnvec->push_back(state_i.sequence->at(k));
-+ TNodeState tn(initst.where, tnvec, state_i.dirty);
-+ tn.sequence->push_back(separationSymbol);
-+ state.push_back(tn);
-+ }
-+ }
-+ }
-+ }
-+ }
-+}
-+
-+
-+
-+wstring
-+State::getReadableString(const Alphabet &a)
-+{
-+ wstring retval = L"[";
-+
-+ for(unsigned int i=0; i<state.size(); i++) {
-+ vector<int>* seq = state.at(i).sequence;
-+ if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++) {
-+ wstring ws = L"";
-+ a.getSymbol(ws, seq->at(j));
-+ //if(ws == L"") ws = L"?";
-+ retval.append(ws);
-+ }
-+
-+ /*Node *where = state.at(i).where;
-+ if(where == NULL) retval.append(L"→@null");
-+ else {
-+ retval.append(L"→");
-+ map<int, Dest>::iterator it;
-+ wstring ws;
-+ for (it = where->transitions.begin(); it != where->transitions.end(); it++) {
-+ int symbol = (*it).first;
-+ a.getSymbol(ws, symbol);
-+ retval.append(ws);
-+ }
-+ }*/
-+ if (i+1 < state.size()) retval.append(L", ");
-+ }
-+ retval.append(L"]");
-+ return retval;
-+}
-+
-Index: lttoolbox/alphabet.cc
-===================================================================
---- lttoolbox/alphabet.cc (revision 21745)
-+++ lttoolbox/alphabet.cc (working copy)
-@@ -221,3 +221,9 @@
- {
- return spairinv[code];
- }
-+
-+
-+void Alphabet::setSymbol(int symbol, wstring newSymbolString) {
-+ //Should be a special character!
-+ if (symbol < 0) slexicinv[-symbol-1] = newSymbolString;
-+}
-Index: lttoolbox/lt-tmxproc.1
-===================================================================
---- lttoolbox/lt-tmxproc.1 (revision 21745)
-+++ lttoolbox/lt-tmxproc.1 (working copy)
-@@ -30,5 +30,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/lt-comp.1
-===================================================================
---- lttoolbox/lt-comp.1 (revision 21745)
-+++ lttoolbox/lt-comp.1 (working copy)
-@@ -10,10 +10,30 @@
- .SH SYNOPSIS
- .B lt-comp
- [
-+.B \-a \fR|
-+.B \-v \fR|
-+.B \-l \fR|
-+.B \-r \fR|
-+.B \-h
-+]
-+[
- .B lr \fR|
- .B rl
- ] dictionary_file output_file
- .PP
-+.B lt-comp
-+[
-+.B \-\-alt \fR|
-+.B \-\-var \fR|
-+.B \-\-var\-left \fR|
-+.B \-\-var\-right \fR|
-+.B \-\-help
-+]
-+[
-+.B lr \fR|
-+.B rl
-+] dictionary_file output_file
-+.PP
- .SH DESCRIPTION
- .BR lt-comp
- Is the application responsible of compiling dictionaries used by
-@@ -23,6 +43,32 @@
- .PP
- .SH OPTIONS
- .TP
-+.B \-a, \-\-alt
-+Sets the value of the \fIalt\fR attribute to use in compilation.
-+
-+Note that if no value is set, all entries containing an \fIalt\fR
-+attribute are omitted.
-+.TP
-+.B \-v, \-\-var
-+Sets the value of the \fIv\fR attribute to use in compilation.
-+This should only be used with monodixes; for bidixes, see \-l and \-r.
-+
-+Note that if no value is set, all entries containing a \fIv\fR
-+attribute are considered to be \fIleft-to-right\fR.
-+.TP
-+.B \-l, \-\-var\-left
-+Sets the value of the \fIvl\fR attribute for use in compilation of bidixes.
-+"Left" here refers to the side of the dictionary, so this option is only valid
-+in \fIrl\fR mode.
-+.TP
-+.B \-r, \-\-var\-right
-+Sets the value of the \fIvr\fR attribute for use in compilation of bidixes.
-+"Right" here refers to the side of the dictionary, so this option is only valid
-+in \fIlr\fR mode.
-+.TP
-+.B \-h, \-\-help
-+Prints a short help message
-+.TP
- .B lr
- The resulting transducer will process dictionary entries
- \fIleft-to-right\fR.
-@@ -45,5 +91,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/lt_locale.h
-===================================================================
---- lttoolbox/lt_locale.h (revision 21745)
-+++ lttoolbox/lt_locale.h (working copy)
-@@ -16,6 +16,7 @@
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- * 02111-1307, USA.
- */
-+
- #ifndef _MYLOCALE_
- #define _MYLOCALE_
-
-Index: lttoolbox/expander.h
-===================================================================
---- lttoolbox/expander.h (revision 21745)
-+++ lttoolbox/expander.h (working copy)
-@@ -42,6 +42,26 @@
- xmlTextReaderPtr reader;
-
- /**
-+ * The alt value
-+ */
-+ wstring alt;
-+
-+ /**
-+ * The variant value (monodix)
-+ */
-+ wstring variant;
-+
-+ /**
-+ * The variant value (left side of bidix)
-+ */
-+ wstring variant_left;
-+
-+ /**
-+ * The variant value (right side of bidix)
-+ */
-+ wstring variant_right;
-+
-+ /**
- * The paradigm being compiled
- */
- wstring current_paradigm;
-@@ -186,6 +206,29 @@
- * Compile dictionary to letter transducers
- */
- void expand(string const &fichero, FILE *output);
-+ /**
-+ * Set the alt value to use in compilation
-+ * @param a the value
-+ */
-+ void setAltValue(string const &a);
-+
-+ /**
-+ * Set the variant value to use in expansion
-+ * @param v the value
-+ */
-+ void setVariantValue(string const &v);
-+
-+ /**
-+ * Set the variant_left value to use in expansion
-+ * @param v the value
-+ */
-+ void setVariantLeftValue(string const &v);
-+
-+ /**
-+ * Set the variant_right value to use in expansion
-+ * @param v the value
-+ */
-+ void setVariantRightValue(string const &v);
- };
-
-
-Index: lttoolbox/transducer.cc
-===================================================================
---- lttoolbox/transducer.cc (revision 21745)
-+++ lttoolbox/transducer.cc (working copy)
-@@ -18,6 +18,7 @@
- */
- #include <lttoolbox/transducer.h>
- #include <lttoolbox/compression.h>
-+#include <lttoolbox/alphabet.h>
- #include <lttoolbox/lttoolbox_config.h>
- #include <lttoolbox/my_stdio.h>
-
-@@ -187,6 +188,13 @@
- void
- Transducer::setFinal(int const state, bool valor)
- {
-+ int initial_copy = getInitial();
-+/*
-+ if(state == initial_copy)
-+ {
-+ wcerr << L"Setting initial state to final" << endl;
-+ }
-+*/
- if(valor)
- {
- finals.insert(state);
-@@ -609,3 +617,119 @@
- finals.clear();
- finals.insert(tmp);
- }
-+
-+void
-+Transducer::show(Alphabet &alphabet, FILE *output, int const epsilon_tag)
-+{
-+ joinFinals(epsilon_tag);
-+
-+ map<int, multimap<int, int> > temporal;
-+
-+ for(map<int, multimap<int, int> >::iterator it = transitions.begin(); it != transitions.end(); it++)
-+ {
-+ multimap<int, int> aux = it->second;
-+
-+ for(multimap<int, int>::iterator it2 = aux.begin(); it2 != aux.end(); it2++)
-+ {
-+ pair<int, int> t = alphabet.decode(it2->first);
-+ fwprintf(output, L"%d\t", it->first);
-+ fwprintf(output, L"%d\t", it2->second);
-+ wstring l = L"";
-+ alphabet.getSymbol(l, t.first);
-+ if(l == L"") // If we find an epsilon
-+ {
-+ fwprintf(output, L"ε\t", l.c_str());
-+ }
-+ else
-+ {
-+ fwprintf(output, L"%S\t", l.c_str());
-+ }
-+ wstring r = L"";
-+ alphabet.getSymbol(r, t.second);
-+ if(r == L"") // If we find an epsilon
-+ {
-+ fwprintf(output, L"ε\t", r.c_str());
-+ }
-+ else
-+ {
-+ fwprintf(output, L"%S\t", r.c_str());
-+ }
-+ fwprintf(output, L"\n");
-+ }
-+ }
-+
-+ for(set<int>::iterator it3 = finals.begin(); it3 != finals.end(); it3++)
-+ {
-+ fwprintf(output, L"%d\n", *it3);
-+ }
-+}
-+
-+int
-+Transducer::getStateSize(int const state)
-+{
-+ set<int> states;
-+ set<int> myclosure1 = closure(state, 0);
-+ states.insert(myclosure1.begin(), myclosure1.end());
-+ int num_transitions = 0;
-+
-+ for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
-+ {
-+ num_transitions += transitions[*it2].size();
-+ }
-+
-+ return num_transitions;
-+}
-+
-+bool
-+Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
-+{
-+ bool accepted = false;
-+ set<int> states ;
-+
-+ set<int> myclosure1 = closure(getInitial(), 0);
-+ states.insert(myclosure1.begin(), myclosure1.end());
-+ // For each of the characters in the input string
-+ for(wstring::iterator it = patro.begin(); it != patro.end(); it++)
-+ {
-+ set<int> new_state; //Transducer::closure(int const state, int const epsilon_tag)
-+ int sym = *it;
-+ // For each of the current alive states
-+ //fwprintf(err, L"step: %S %C (%d)\n", patro.c_str(), *it, sym);
-+ for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
-+ {
-+ multimap<int, int> p = transitions[*it2];
-+ // For each of the transitions in the state
-+
-+ for(multimap<int, int>::iterator it3 = p.begin(); it3 != p.end(); it3++)
-+ {
-+
-+ pair<int, int> t = a.decode(it3->first);
-+ wstring l = L"";
-+ a.getSymbol(l, t.first);
-+ //wstring r = L"";
-+ //a.getSymbol(r, t.second);
-+
-+ //fwprintf(err, L" -> state: %d, trans: %S:%S, targ: %d\n", *it2, (l == L"") ? L"ε" : l.c_str(), (r == L"") ? L"ε" : r.c_str(), it3->second);
-+ //if(l.find(*it) != wstring::npos || l == L"" )
-+ if(l.find(*it) != wstring::npos)
-+ {
-+ set<int> myclosure = closure(it3->second, 0);
-+ //wcerr << L"Before closure alives: " <<new_state.size() << endl;
-+ new_state.insert(myclosure.begin(), myclosure.end());
-+ //wcerr << L"After closure alives: " <<new_state.size() << endl;
-+ }
-+ }
-+ }
-+ states = new_state;
-+ }
-+ for(set<int>::iterator it4 = states.begin(); it4 != states.end(); it4++)
-+ {
-+ if(isFinal(*it4))
-+ {
-+ accepted = true;
-+ }
-+ }
-+
-+ return accepted;
-+}
-+
-Index: lttoolbox/pool.h
-===================================================================
---- lttoolbox/pool.h (revision 21745)
-+++ lttoolbox/pool.h (working copy)
-@@ -1,175 +0,0 @@
--/*
-- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License as
-- * published by the Free Software Foundation; either version 2 of the
-- * License, or (at your option) any later version.
-- *
-- * This program is distributed in the hope that it will be useful, but
-- * WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-- * General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with this program; if not, write to the Free Software
-- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-- * 02111-1307, USA.
-- */
--#ifndef _GENERIC_POOL_
--#define _GENERIC_POOL_
--
--#include <list>
--
--using namespace std;
--
--/**
-- * Pool of T objects
-- */
--template <class T>
--class Pool
--{
--private:
-- /**
-- * Free pointers to objects
-- */
-- list<T *> free;
--
-- /**
-- * Currently created objects
-- */
-- list<T> created;
--
-- /**
-- * copy method
-- * @param other pool object
-- */
-- void copy(Pool const &p)
-- {
-- created = p.created;
--
-- // all new members are available
-- for(typename list<T>::iterator it = created.begin(), limit = created.end();
-- it != limit; it++)
-- {
-- free.push_back(&(*it));
-- }
-- }
--
-- /**
-- * destroy method
-- */
-- void destroy()
-- {
-- // do nothing
-- }
--
-- /**
-- * Allocate a pool of nelems size
-- * @param nelems initial size of the pool
-- */
-- void init(unsigned int const nelems)
-- {
-- created.clear();
-- free.clear();
-- T tmp;
-- for(unsigned int i = 0; i != nelems; i++)
-- {
-- created.push_front(tmp);
-- free.push_front(&(*(created.begin())));
-- }
-- }
--
-- /**
-- * Allocate a pool of nelems size with objects equal to 'object'
-- * @param nelems initial size of the pool
-- * @param object initial value of the objects in the pool
-- */
-- void init(unsigned int const nelems, T const &object)
-- {
-- created.clear();
-- free.clear();
-- for(unsigned int i = 0; i != nelems; i++)
-- {
-- created.push_front(object);
-- free.push_front(&(*(created.begin())));
-- }
-- }
--
--
--public:
--
-- /**
-- * Constructor
-- */
-- Pool()
-- {
-- init(1);
-- }
--
-- /**
-- * Parametrized constructor
-- * @param nelems initial size of the pool
-- * @param object initial value of the objects in the pool
-- */
-- Pool(unsigned int const nelems, T const &object)
-- {
-- init(nelems, object);
-- }
--
-- /**
-- * Parametrized constructor
-- * @param nelems initial size of the pool
-- */
-- Pool(unsigned int const nelems)
-- {
-- init(nelems);
-- }
--
-- /**
-- * Destructor
-- */
-- ~Pool()
-- {
-- destroy();
-- }
--
-- /**
-- * Copy constructor
-- */
-- Pool(Pool const &p)
-- {
-- copy(p);
-- }
--
-- /**
-- * Allocate a pointer to a free 'new' object.
-- * @return pointer to the object
-- */
-- T * get()
-- {
-- if(free.size() != 0)
-- {
-- T *result = *(free.begin());
-- free.erase(free.begin());
-- return result;
-- }
-- else
-- {
-- T tmp;
-- created.push_front(tmp);
-- return &(*(created.begin()));
-- }
-- }
--
-- /**
-- * Release a no more needed instance of a pooled object
-- * @param item the no more needed instance of the object
-- */
-- void release(T *item)
-- {
-- free.push_front(item);
-- }
--};
--
--#endif
-Index: lttoolbox/compiler.h
-===================================================================
---- lttoolbox/compiler.h (revision 21745)
-+++ lttoolbox/compiler.h (working copy)
-@@ -44,6 +44,26 @@
- xmlTextReaderPtr reader;
-
- /**
-+ * The alt value
-+ */
-+ wstring alt;
-+
-+ /**
-+ * The variant value (monodix)
-+ */
-+ wstring variant;
-+
-+ /**
-+ * The variant value (left side of bidix)
-+ */
-+ wstring variant_left;
-+
-+ /**
-+ * The variant value (right side of bidix)
-+ */
-+ wstring variant_right;
-+
-+ /**
- * The paradigm being compiled
- */
- wstring current_paradigm;
-@@ -65,6 +85,16 @@
- wstring letters;
-
- /**
-+ * Set verbose mode: warnings which may or may not be correct
-+ */
-+ bool verbose;
-+
-+ /**
-+ * First element (of an entry)
-+ */
-+ bool first_element;
-+
-+ /**
- * Identifier of all the symbols during the compilation
- */
- Alphabet alphabet;
-@@ -264,10 +294,14 @@
- static wstring const COMPILER_LEMMA_ATTR;
- static wstring const COMPILER_IGNORE_ATTR;
- static wstring const COMPILER_IGNORE_YES_VAL;
-+ static wstring const COMPILER_ALT_ATTR;
-+ static wstring const COMPILER_V_ATTR;
-+ static wstring const COMPILER_VL_ATTR;
-+ static wstring const COMPILER_VR_ATTR;
-
-
- /**
-- * Copnstructor
-+ * Constructor
- */
- Compiler();
-
-@@ -292,6 +326,35 @@
- * @param fd the stream where write the result
- */
- void write(FILE *fd);
-+
-+ /**
-+ * Set verbose output
-+ */
-+ void setVerbose(bool verbosity = false);
-+
-+ /**
-+ * Set the alt value to use in compilation
-+ * @param a the value
-+ */
-+ void setAltValue(string const &a);
-+
-+ /**
-+ * Set the variant value to use in compilation
-+ * @param v the value
-+ */
-+ void setVariantValue(string const &v);
-+
-+ /**
-+ * Set the variant_left value to use in compilation
-+ * @param v the value
-+ */
-+ void setVariantLeftValue(string const &v);
-+
-+ /**
-+ * Set the variant_right value to use in compilation
-+ * @param v the value
-+ */
-+ void setVariantRightValue(string const &v);
- };
-
-
-Index: lttoolbox/lt-tmxcomp.1
-===================================================================
---- lttoolbox/lt-tmxcomp.1 (revision 21745)
-+++ lttoolbox/lt-tmxcomp.1 (working copy)
-@@ -38,5 +38,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/alphabet.h
-===================================================================
---- lttoolbox/alphabet.h (revision 21745)
-+++ lttoolbox/alphabet.h (working copy)
-@@ -145,6 +145,13 @@
- */
- bool isTag(int const symbol) const;
-
-+ /**
-+ * Sets an already existing symbol to represent a new value
-+ * @param symbol the code of the symbol to set
-+ * @param newSymbolString the new string for this symbol
-+ */
-+ void setSymbol(int symbol, wstring newSymbolString);
-+
- pair<int, int> const & decode(int const code) const;
-
- };
-Index: lttoolbox/state.h
-===================================================================
---- lttoolbox/state.h (revision 21745)
-+++ lttoolbox/state.h (working copy)
-@@ -19,6 +19,7 @@
- #ifndef _STATE_
- #define _STATE_
-
-+#include <map>
- #include <set>
- #include <string>
- #include <vector>
-@@ -26,7 +27,9 @@
-
- #include <lttoolbox/alphabet.h>
- #include <lttoolbox/node.h>
--#include <lttoolbox/pool.h>
-+#include <lttoolbox/match_exe.h>
-+#include <lttoolbox/match_state.h>
-+#include <lttoolbox/transducer.h>
-
- using namespace std;
-
-@@ -43,7 +46,7 @@
- {
- Node *where;
- vector<int> *sequence;
-- bool dirty;
-+ bool dirty; // What does "dirty" mean ?
-
- TNodeState(Node * const &w, vector<int> * const &s, bool const &d): where(w), sequence(s), dirty(d){}
- TNodeState & operator=(TNodeState const &other)
-@@ -58,17 +61,6 @@
- vector<TNodeState> state;
-
- /**
-- * Pool of wchar_t vectors, for efficience (static class)
-- */
-- Pool<vector<int> > *pool;
--
-- /**
-- * Copy function
-- * @param s the state to be copied
-- */
-- void copy(State const &s);
--
-- /**
- * Destroy function
- */
- void destroy();
-@@ -86,6 +78,8 @@
- */
- void apply(int const input, int const alt);
-
-+ void apply(int const input, int const alt1, int const alt2);
-+
- /**
- * Calculate the epsilon closure over the current state, replacing
- * its content.
-@@ -92,11 +86,21 @@
- */
- void epsilonClosure();
-
-+ bool lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol);
-+
- public:
-+
- /**
-+ * Copy function
-+ * @param s the state to be copied
-+ */
-+ void copy(State const &s);
-+
-+
-+ /**
- * Constructor
- */
-- State(Pool<vector<int> > *);
-+ State();
-
- /**
- * Destructor
-@@ -135,6 +139,13 @@
- */
- void step(int const input, int const alt);
-
-+ void step(int const input, int const alt1, int const alt2);
-+
-+ void step_case(wchar_t val, bool caseSensitive);
-+
-+ void step_case(wchar_t val, wchar_t val2, bool caseSensitive);
-+
-+
- /**
- * Init the state with the initial node and empty output
- * @param initial the initial node of the transducer
-@@ -142,6 +153,21 @@
- void init(Node *initial);
-
- /**
-+ * Remove states not containing a specific symbol in their last 'part', and states
-+ * with more than a number of 'parts'
-+ * @param requieredSymbol the symbol requiered in the last part
-+ * @param separationSymbol the symbol that represent the separation between two parts
-+ * @param compound_max_elements the maximum part number allowed
-+ */
-+ void pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements);
-+
-+ /**
-+ * Remove states containing a forbidden symbol
-+ * @param forbiddenSymbol the symbol forbidden
-+ */
-+ void pruneStatesWithForbiddenSymbol(int forbiddenSymbol);
-+
-+ /**
- * Print all outputs of current parsing, preceded by a bar '/',
- * from the final nodes of the state
- * @param finals the set of final nodes
-@@ -156,8 +182,8 @@
- wstring filterFinals(set<Node *> const &finals, Alphabet const &a,
- set<wchar_t> const &escaped_chars,
- bool uppercase = false,
-- bool firstupper = false,
-- int firstchar = 0) const;
-+ bool firstupper = false,
-+ int firstchar = 0) const;
-
- /**
- * Same as previous one, but the output is adapted to the SAO system
-@@ -173,11 +199,44 @@
- wstring filterFinalsSAO(set<Node *> const &finals, Alphabet const &a,
- set<wchar_t> const &escaped_chars,
- bool uppercase = false,
-- bool firstupper = false,
-- int firstchar = 0) const;
-+ bool firstupper = false,
-+ int firstchar = 0) const;
-
-
- /**
-+ * Same as previous one, but the output is adapted to the LRX system
-+ * @param finals the set of final nodes
-+ * @param a the alphabet to decode strings
-+ * @param escaped_chars the set of chars to be preceded with one
-+ * backslash
-+ * @param uppercase true if the word is uppercase
-+ * @param firstupper true if the first letter of a word is uppercase
-+ * @param firstchar first character of the word
-+ * @return the result of the transduction
-+ */
-+
-+ set<pair<wstring, vector<wstring> > > filterFinalsLRX(set<Node *> const &finals, Alphabet const &a,
-+ set<wchar_t> const &escaped_chars,
-+ bool uppercase = false,
-+ bool firstupper = false,
-+ int firstchar = 0) const;
-+
-+
-+
-+
-+
-+ /**
-+ * Find final states, remove those that not has a requiredSymbol and 'restart' each of them as the
-+ * set of initial states, but remembering the sequence and adding a separationSymbol
-+ * @param finals
-+ * @param requiredSymbol
-+ * @param restart_state
-+ * @param separationSymbol
-+ */
-+ void restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol);
-+
-+
-+ /**
- * Returns true if at least one record of the state references a
- * final node of the set
- * @param finals set of final nodes @return
-@@ -185,6 +244,11 @@
- */
- bool isFinal(set<Node *> const &finals) const;
-
-+ /**
-+ * Return the full states string (to allow debuging...) using a Java ArrayList.toString style
-+ */
-+ wstring getReadableString(const Alphabet &a);
-+
- wstring filterFinalsTM(set<Node *> const &finals,
- Alphabet const &alphabet,
- set<wchar_t> const &escaped_chars,
-Index: lttoolbox/Makefile.am
-===================================================================
---- lttoolbox/Makefile.am (revision 21745)
-+++ lttoolbox/Makefile.am (working copy)
-@@ -2,7 +2,7 @@
- h_sources = alphabet.h buffer.h compiler.h compression.h \
- entry_token.h expander.h fst_processor.h lt_locale.h ltstr.h \
- match_exe.h match_node.h match_state.h my_stdio.h node.h \
-- pattern_list.h pool.h regexp_compiler.h sorted_vector.h state.h \
-+ pattern_list.h regexp_compiler.h sorted_vector.h state.h \
- transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h
- cc_sources = alphabet.cc compiler.cc compression.cc entry_token.cc \
- expander.cc fst_processor.cc lt_locale.cc match_exe.cc \
-@@ -13,7 +13,7 @@
- library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
- library_include_HEADERS = $(h_sources)
-
--bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc
-+bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc lt-print
- instdir = lttoolbox
-
- lib_LTLIBRARIES= liblttoolbox3.la
-@@ -26,6 +26,10 @@
-
- lttoolbox_DATA = dix.dtd
-
-+lt_print_SOURCES = lt_print.cc
-+lt_print_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
-+lt_print_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
-+
- lt_comp_SOURCES = lt_comp.cc
- lt_comp_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
- lt_comp_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
-@@ -46,8 +50,18 @@
- lt_tmxproc_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
- lt_tmxproc_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
-
--man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1
-+#lt-validate-dictionary: Makefile.am validate-header.sh
-+# @echo "Creating lt-validate-dictionary script"
-+# @echo "#!$(BASH)" > $@
-+# @cat validate-header.sh >> $@
-+# @echo "$(XMLLINT) --dtdvalid $(apertiumdir)/dix.dtd --noout \$$FILE1 && exit 0;" >> $@
-+# @echo "exit 1;" >> $@
-+# @chmod a+x $@
-
-+
-+
-+man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1
-+
- INCLUDES = -I$(top_srcdir) $(LTTOOLBOX_CFLAGS)
- CLEANFILES = *~
-
-Index: lttoolbox/lt-print.1
-===================================================================
---- lttoolbox/lt-print.1 (revision 0)
-+++ lttoolbox/lt-print.1 (revision 44914)
-@@ -0,0 +1,34 @@
-+.TH lt-print 1 2006-03-08 "" ""
-+.SH NAME
-+lt-print \- This application is part of the lexical processing modules
-+and tools (
-+.B lttoolbox
-+)
-+.PP
-+This tool is part of the apertium machine translation
-+architecture: \fBhttp://www.apertium.org\fR.
-+.SH SYNOPSIS
-+.B lt-print
-+ bin_file
-+.PP
-+.SH DESCRIPTION
-+.BR lt-print
-+Is the application responsible for printing compiled dictionaries in
-+ATT format.
-+.PP
-+.B bin_file
-+The compiled input file .
-+.PP
-+.B output_file
-+The transducer in ATT format .
-+
-+.SH SEE ALSO
-+.I lt-comp\fR(1),
-+.I lt-proc\fR(1),
-+.I lt-expand\fR(1),
-+.I apertium-tagger\fR(1),
-+.I apertium\fR(1).
-+.SH BUGS
-+Lots of...lurking in the dark and waiting for you!
-+.SH AUTHOR
-+(c) 2005--2012 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/lt_print.cc
-===================================================================
---- lttoolbox/lt_print.cc (revision 0)
-+++ lttoolbox/lt_print.cc (revision 44914)
-@@ -0,0 +1,106 @@
-+/*
-+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License as
-+ * published by the Free Software Foundation; either version 2 of the
-+ * License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ * General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-+ * 02111-1307, USA.
-+ */
-+#include <lttoolbox/transducer.h>
-+#include <lttoolbox/compression.h>
-+#include <lttoolbox/lttoolbox_config.h>
-+
-+#include <lttoolbox/my_stdio.h>
-+#include <lttoolbox/lt_locale.h>
-+
-+#include <cstdlib>
-+#include <iostream>
-+#include <libgen.h>
-+#include <string>
-+
-+using namespace std;
-+
-+void endProgram(char *name)
-+{
-+ if(name != NULL)
-+ {
-+ cout << basename(name) << " v" << PACKAGE_VERSION <<": dump a transducer to text in ATT format" << endl;
-+ cout << "USAGE: " << basename(name) << " bin_file " << endl;
-+ }
-+ exit(EXIT_FAILURE);
-+}
-+
-+
-+int main(int argc, char *argv[])
-+{
-+ if(argc != 2)
-+ {
-+ endProgram(argv[0]);
-+ }
-+
-+ LtLocale::tryToSetLocale();
-+
-+
-+ FILE *input = fopen(argv[1], "r");
-+
-+ Alphabet new_alphabet;
-+ set<wchar_t> alphabetic_chars;
-+
-+ map<wstring, Transducer> transducers;
-+
-+ // letters
-+ int len = Compression::multibyte_read(input);
-+ while(len > 0)
-+ {
-+ alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
-+ len--;
-+ }
-+
-+ // symbols
-+ new_alphabet.read(input);
-+
-+ len = Compression::multibyte_read(input);
-+
-+ while(len > 0)
-+ {
-+ int len2 = Compression::multibyte_read(input);
-+ wstring name = L"";
-+ while(len2 > 0)
-+ {
-+ name += static_cast<wchar_t>(Compression::multibyte_read(input));
-+ len2--;
-+ }
-+ transducers[name].read(input);
-+
-+ len--;
-+ }
-+
-+ /////////////////////
-+
-+ FILE *output = stdout;
-+ map<wstring, Transducer>::iterator penum = transducers.end();
-+ penum--;
-+ for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
-+ {
-+ //it->second.minimize();
-+ it->second.show(new_alphabet, output);
-+ if(it != penum)
-+ {
-+ fwprintf(output, L"--\n", it->first.c_str());
-+ }
-+ }
-+
-+ fclose(input);
-+
-+ return 0;
-+}
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/lttoolbox.git/commitdiff/78ddaac8fc4a3cd5335057d9c391f686cfcf68c7
More information about the pld-cvs-commit
mailing list