[packages/lttoolbox] - added svn20130412 patch (some features needed by apertium-lex-tools) - added soname patch (bump so
qboosh
qboosh at pld-linux.org
Wed Jun 26 17:33:42 CEST 2013
commit 4306e092da881c50b399a1865e0713d856e4a0ec
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date: Wed Jun 26 17:33:24 2013 +0200
- added svn20130412 patch (some features needed by apertium-lex-tools)
- added soname patch (bump soname because svn changes change ABI)
- relase 2.20130412.1
lttoolbox-soname.patch | 11 +
lttoolbox-svn20130412.patch | 3112 +++++++++++++++++++++++++++++++++++++++++++
lttoolbox.spec | 16 +-
3 files changed, 3135 insertions(+), 4 deletions(-)
---
diff --git a/lttoolbox.spec b/lttoolbox.spec
index e1535f7..72b4a69 100644
--- a/lttoolbox.spec
+++ b/lttoolbox.spec
@@ -2,12 +2,16 @@ Summary: Augmented letter transducer tools for natural language processing
Summary(pl.UTF-8): Narzędzia do przetwarzania słów w językach naturalnych
Name: lttoolbox
Version: 3.2.0
-Release: 1
+%define subver svn20130412
+%define rel 1
+Release: 2.%{subver}.1
License: GPL v2+
Group: Applications/Text
Source0: http://downloads.sourceforge.net/apertium/%{name}-%{version}.tar.gz
# Source0-md5: 708e7de837ed363f7103035ef2849fe4
-Patch0: %{name}-opt.patch
+Patch0: %{name}-svn20130412.patch
+Patch1: %{name}-soname.patch
+Patch2: %{name}-opt.patch
URL: http://wiki.apertium.org/wiki/Lttoolbox
BuildRequires: autoconf >= 2.52
BuildRequires: automake
@@ -58,7 +62,9 @@ Statyczna biblioteka lttoolbox.
%prep
%setup -q
-%patch0 -p1
+%patch0 -p0
+%patch1 -p1
+%patch2 -p1
%build
%{__libtoolize}
@@ -87,14 +93,16 @@ rm -rf $RPM_BUILD_ROOT
%doc AUTHORS ChangeLog NEWS README
%attr(755,root,root) %{_bindir}/lt-comp
%attr(755,root,root) %{_bindir}/lt-expand
+%attr(755,root,root) %{_bindir}/lt-print
%attr(755,root,root) %{_bindir}/lt-proc
%attr(755,root,root) %{_bindir}/lt-tmxcomp
%attr(755,root,root) %{_bindir}/lt-tmxproc
%attr(755,root,root) %{_libdir}/liblttoolbox3-3.2.so.*.*.*
-%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.0
+%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.1
%{_datadir}/lttoolbox
%{_mandir}/man1/lt-comp.1*
%{_mandir}/man1/lt-expand.1*
+%{_mandir}/man1/lt-print.1*
%{_mandir}/man1/lt-proc.1*
%{_mandir}/man1/lt-tmxcomp.1*
%{_mandir}/man1/lt-tmxproc.1*
diff --git a/lttoolbox-soname.patch b/lttoolbox-soname.patch
new file mode 100644
index 0000000..973caac
--- /dev/null
+++ b/lttoolbox-soname.patch
@@ -0,0 +1,11 @@
+--- lttoolbox-3.2.0/configure.ac.orig 2013-06-26 16:15:39.881717927 +0200
++++ lttoolbox-3.2.0/configure.ac 2013-06-26 16:23:06.398365855 +0200
+@@ -23,7 +23,7 @@
+ AC_SUBST(GENERIC_MAJOR_VERSION)
+
+ # Shared library versioning
+-GENERIC_LIBRARY_VERSION=0:0:0
++GENERIC_LIBRARY_VERSION=1:0:0
+ # | | |
+ # +------+ | +---+
+ # | | |
diff --git a/lttoolbox-svn20130412.patch b/lttoolbox-svn20130412.patch
new file mode 100644
index 0000000..71dc646
--- /dev/null
+++ b/lttoolbox-svn20130412.patch
@@ -0,0 +1,3112 @@
+Index: lttoolbox/lt-proc.1
+===================================================================
+--- lttoolbox/lt-proc.1 (revision 21745)
++++ lttoolbox/lt-proc.1 (working copy)
+@@ -12,7 +12,9 @@
+ [
+ .B \-a \fR|
+ .B \-b \fR|
++.B \-o \fR|
+ .B \-c \fR|
++.B \-d \fR|
+ .B \-e \fR|
+ .B \-g \fR|
+ .B \-n \fR|
+@@ -29,7 +31,10 @@
+ [
+ .B \-\-analysis \fR|
+ .B \-\-bilingual \fR|
++.B \-\-surf-bilingual \fR|
+ .B \-\-case-sensitive \fR|
++.B \-\-debugged-gen \fR|
++.B \-\-decompose-nouns \fR|
+ .B \-\-generation \fR|
+ .B \-\-non-marked-gen \fR|
+ .B \-\-tagged-gen \fR|
+@@ -98,9 +103,18 @@
+ form in the source language. Works tipically with the output of
+ apertium-pretransfer.
+ .TP
++.B \-o, \-\-surf-bilingual
++As with \-b, but takes input from apertium\-tagger \-p , with
++surface forms, and if the lexical form is not found in the bilingual
++dictionary, it outputs the surface form of the word.
++.TP
++
+ .B \-c, \-\-case-sensitive
+ Use the literal case of the incoming characters
+ .TP
++.B \-d, \-\-debugged-gen
++Morph. generation with all the stuff
++.TP
+ .B \-e, \-\-decompose-compounds
+ Try to treat unknown words as compounds, and decompose them.
+ .TP
+@@ -154,5 +168,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/fst_processor.cc
+===================================================================
+--- lttoolbox/fst_processor.cc (revision 21745)
++++ lttoolbox/fst_processor.cc (working copy)
+@@ -44,14 +44,17 @@
+
+ caseSensitive = false;
+ dictionaryCase = false;
+- compoundDecomposition = false;
++ do_decomposition = false;
+ nullFlush = false;
+ nullFlushGeneration = false;
++ showControlSymbols = false;
++ biltransSurfaceForms = false;
++ compoundOnlyLSymbol = 0;
++ compoundRSymbol = 0;
++ compound_max_elements = 4;
+
+- pool = new Pool<vector<int> >(4, vector<int>(50));
+-
+- initial_state = new State(pool);
+- current_state = new State(pool);
++ initial_state = new State();
++ current_state = new State();
+ }
+
+ FSTProcessor::~FSTProcessor()
+@@ -58,7 +61,6 @@
+ {
+ delete current_state;
+ delete initial_state;
+- delete pool;
+ }
+
+ void
+@@ -408,6 +410,100 @@
+ return 0x7fffffff;
+ }
+
++pair<wstring, int>
++FSTProcessor::readBilingual(FILE *input, FILE *output)
++{
++ wint_t val = fgetwc_unlocked(input);
++ wstring symbol = L"";
++
++ if(feof(input))
++ {
++ return pair<wstring, int>(symbol, 0x7fffffff);
++ }
++
++ if(outOfWord)
++ {
++ if(val == L'^')
++ {
++ val = fgetwc_unlocked(input);
++ if(feof(input))
++ {
++ return pair<wstring, int>(symbol, 0x7fffffff);
++ }
++ }
++ else if(val == L'\\')
++ {
++ fputwc_unlocked(val, output);
++ val = fgetwc_unlocked(input);
++ if(feof(input))
++ {
++ return pair<wstring, int>(symbol, 0x7fffffff);
++ }
++ fputwc_unlocked(val,output);
++ skipUntil(input, output, L'^');
++ val = fgetwc_unlocked(input);
++ if(feof(input))
++ {
++ return pair<wstring, int>(symbol, 0x7fffffff);
++ }
++ }
++ else
++ {
++ fputwc_unlocked(val, output);
++ skipUntil(input, output, L'^');
++ val = fgetwc_unlocked(input);
++ if(feof(input))
++ {
++ return pair<wstring, int>(symbol, 0x7fffffff);
++ }
++ }
++ outOfWord = false;
++ }
++
++ if(val == L'\\')
++ {
++ val = fgetwc_unlocked(input);
++ return pair<wstring, int>(symbol, val);
++ }
++ else if(val == L'$')
++ {
++ outOfWord = true;
++ return pair<wstring, int>(symbol, static_cast<int>(L'$'));
++ }
++ else if(val == L'<')
++ {
++ wstring cad = L"";
++ cad += static_cast<wchar_t>(val);
++ while((val = fgetwc_unlocked(input)) != L'>')
++ {
++ if(feof(input))
++ {
++ streamError();
++ }
++ cad += static_cast<wchar_t>(val);
++ }
++ cad += static_cast<wchar_t>(val);
++
++ int res = alphabet(cad);
++
++ if (res == 0) {
++ symbol = cad;
++ }
++ return pair<wstring, int>(symbol, res);
++ }
++ else if(val == L'[')
++ {
++ fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
++ return readBilingual(input, output);
++ }
++ else
++ {
++ return pair<wstring, int>(symbol, val);
++ }
++
++ return pair<wstring, int>(symbol, 0x7fffffff);
++}
++
+ void
+ FSTProcessor::flushBlanks(FILE *output)
+ {
+@@ -494,6 +590,27 @@
+ }
+
+ void
++FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
++{
++ for(unsigned int i = 0, limit = str.size(); i < limit; i++)
++ {
++ if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
++ {
++ fputws_unlocked(str.substr(i).c_str(), output);
++ return;
++ }
++
++ if(escaped_chars.find(str[i]) != escaped_chars.end())
++ {
++ fputwc_unlocked(L'\\', output);
++ }
++ fputwc_unlocked(str[i], output);
++ }
++}
++
++
++
++void
+ FSTProcessor::printWord(wstring const &sf, wstring const &lf, FILE *output)
+ {
+ fputwc_unlocked(L'^', output);
+@@ -642,7 +759,86 @@
+ initGeneration();
+ }
+
++
+ wstring
++FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupper) {
++ const int MAX_COMBINATIONS = 500;
++ //wcerr << L"compoundAnalysis(input_word = " << input_word << L")" << endl;
++
++ State current_state = *initial_state;
++
++ for(unsigned int i=0; i<input_word.size(); i++) {
++ wchar_t val=input_word.at(i);
++
++ //wcerr << val << L" før step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
++ current_state.step_case(val, caseSensitive);
++
++ if(current_state.size() > MAX_COMBINATIONS) {
++ wcerr << L"Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << L"'" << endl;
++ wcerr << L" gave up at char " << i << L" '" << val << L"'." << endl;
++
++ wstring nullString = L"";
++ return nullString;
++ }
++
++ //wcerr << val << L" eft step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
++
++ if(i < input_word.size()-1)
++ current_state.restartFinals(all_finals, compoundOnlyLSymbol, initial_state, '+');
++
++ //wcerr << val << " eft rest " << i << " current_state = " << current_state.getReadableString(alphabet) << endl;
++ //wcerr << i << " result = " << current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper) << endl;
++ //wcerr << i << " -- size = " << current_state.size() << endl;
++
++ if(current_state.size()==0) {
++ wstring nullString = L"";
++ return nullString;
++ }
++ }
++
++ current_state.pruneCompounds(compoundRSymbol, '+', compound_max_elements);
++ wstring result = current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper);
++ //wcerr << L"rrresult = " << result << endl;
++
++ return result;
++}
++
++
++
++void
++FSTProcessor::initDecompositionSymbols() {
++ if ((compoundOnlyLSymbol=alphabet(L"<:co:only-L>")) == 0
++ && (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0
++ && (compoundOnlyLSymbol=alphabet(L"<@co:only-L>")) == 0
++ && (compoundOnlyLSymbol=alphabet(L"<@compound:only-L>")) == 0
++ && (compoundOnlyLSymbol=alphabet(L"<compound-only-L>")) == 0)
++ {
++ wcerr << L"Warning: Decomposition symbol <:compound:only-L> not found" << endl;
++ }
++ else if (!showControlSymbols)
++ alphabet.setSymbol(compoundOnlyLSymbol, L"");
++
++ if ((compoundRSymbol=alphabet(L"<:co:R>")) == 0
++ && (compoundRSymbol=alphabet(L"<:compound:R>")) == 0
++ && (compoundRSymbol=alphabet(L"<@co:R>")) == 0
++ && (compoundRSymbol=alphabet(L"<@compound:R>")) == 0
++ && (compoundRSymbol=alphabet(L"<compound-R>")) == 0)
++ {
++ wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl;
++ }
++ else if (!showControlSymbols)
++ alphabet.setSymbol(compoundRSymbol, L"");
++}
++
++
++void
++FSTProcessor::initDecomposition() {
++ do_decomposition = true;
++ initAnalysis();
++ initDecompositionSymbols();
++}
++
++/*wstring
+ FSTProcessor::decompose(wstring w)
+ {
+ State current_state = *initial_state;
+@@ -807,7 +1003,7 @@
+ }
+ //wcerr << L"+ decompose: " << lf << endl;
+ return lf;
+-}
++}*/
+
+ void
+ FSTProcessor::analysis(FILE *input, FILE *output)
+@@ -839,6 +1035,10 @@
+ uppercase = firstupper && iswupper(sf[sf.size()-1]);
+ }
+
++ if(do_decomposition && compoundOnlyLSymbol != 0)
++ {
++ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++ }
+ lf = current_state.filterFinals(all_finals, alphabet,
+ escaped_chars,
+ uppercase, firstupper);
+@@ -853,6 +1053,10 @@
+ uppercase = firstupper && iswupper(sf[sf.size()-1]);
+ }
+
++ if(do_decomposition && compoundOnlyLSymbol != 0)
++ {
++ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++ }
+ lf = current_state.filterFinals(all_finals, alphabet,
+ escaped_chars,
+ uppercase, firstupper);
+@@ -867,6 +1071,10 @@
+ uppercase = firstupper && iswupper(sf[sf.size()-1]);
+ }
+
++ if(do_decomposition && compoundOnlyLSymbol != 0)
++ {
++ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++ }
+ lf = current_state.filterFinals(all_finals, alphabet,
+ escaped_chars,
+ uppercase, firstupper);
+@@ -881,6 +1089,10 @@
+ uppercase = firstupper && iswupper(sf[sf.size()-1]);
+ }
+
++ if(do_decomposition && compoundOnlyLSymbol != 0)
++ {
++ current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++ }
+ lf = current_state.filterFinals(all_finals, alphabet,
+ escaped_chars,
+ uppercase, firstupper);
+@@ -969,16 +1181,22 @@
+ if(limit == 0)
+ {
+ input_buffer.back(sf.size());
+- fputwc_unlocked(sf[0], output);
++ writeEscaped(sf.substr(0,1), output);
+ }
+ else
+ {
+ input_buffer.back(1+(size-limit));
+ wstring unknown_word = sf.substr(0, limit);
+- if(compoundDecomposition)
++ if(do_decomposition)
+ {
++ if(!dictionaryCase)
++ {
++ firstupper = iswupper(sf[0]);
++ uppercase = firstupper && iswupper(sf[sf.size()-1]);
++ }
++
+ wstring compound = L"";
+- compound = decompose(unknown_word);
++ compound = compoundAnalysis(unknown_word, uppercase, firstupper);
+ if(compound != L"")
+ {
+ printWord(unknown_word, compound, output);
+@@ -1002,16 +1220,22 @@
+ if(limit == 0)
+ {
+ input_buffer.back(sf.size());
+- fputwc_unlocked(sf[0], output);
++ writeEscaped(sf.substr(0,1), output);
+ }
+ else
+ {
+ input_buffer.back(1+(size-limit));
+ wstring unknown_word = sf.substr(0, limit);
+- if(compoundDecomposition)
++ if(do_decomposition)
+ {
++ if(!dictionaryCase)
++ {
++ firstupper = iswupper(sf[0]);
++ uppercase = firstupper && iswupper(sf[sf.size()-1]);
++ }
++
+ wstring compound = L"";
+- compound = decompose(unknown_word);
++ compound = compoundAnalysis(unknown_word, uppercase, firstupper);
+ if(compound != L"")
+ {
+ printWord(unknown_word, compound, output);
+@@ -1296,19 +1520,27 @@
+ fputwc(L'=', output);
+ val = readGeneration(input, output);
+ }
+-
++
+ if(val == L'$' && outOfWord)
+ {
+ if(sf[0] == L'*' || sf[0] == L'%')
+ {
+- if(mode != gm_clean)
++ if(mode != gm_clean && mode != gm_tagged_nm)
+ {
+ writeEscaped(sf, output);
+ }
+- else
++ else if (mode == gm_clean)
+ {
+ writeEscaped(sf.substr(1), output);
+ }
++ else if(mode == gm_tagged_nm)
++ {
++ fputwc_unlocked(L'^', output);
++ writeEscaped(removeTags(sf.substr(1)), output);
++ fputwc_unlocked(L'/', output);
++ writeEscapedWithTags(sf, output);
++ fputwc_unlocked(L'$', output);
++ }
+ }
+ else if(sf[0] == L'@')
+ {
+@@ -1324,6 +1556,18 @@
+ {
+ writeEscaped(removeTags(sf), output);
+ }
++ else if(mode == gm_tagged)
++ {
++ writeEscaped(removeTags(sf), output);
++ }
++ else if(mode == gm_tagged_nm)
++ {
++ fputwc_unlocked(L'^', output);
++ writeEscaped(removeTags(sf.substr(1)), output);
++ fputwc_unlocked(L'/', output);
++ writeEscapedWithTags(sf, output);
++ fputwc_unlocked(L'$', output);
++ }
+ }
+ else if(current_state.isFinal(all_finals))
+ {
+@@ -1330,7 +1574,7 @@
+ bool uppercase = sf.size() > 1 && iswupper(sf[1]);
+ bool firstupper= iswupper(sf[0]);
+
+- if(mode == gm_tagged)
++ if(mode == gm_tagged || mode == gm_tagged_nm)
+ {
+ fputwc_unlocked(L'^', output);
+ }
+@@ -1339,10 +1583,10 @@
+ escaped_chars,
+ uppercase, firstupper).substr(1).c_str(),
+ output);
+- if(mode == gm_tagged)
++ if(mode == gm_tagged || mode == gm_tagged_nm)
+ {
+ fputwc_unlocked(L'/', output);
+- fputws_unlocked(sf.c_str(), output);
++ writeEscapedWithTags(sf, output);
+ fputwc_unlocked(L'$', output);
+ }
+
+@@ -1360,9 +1604,26 @@
+ }
+ else if(mode == gm_unknown)
+ {
++ if(sf != L"")
++ {
++ fputwc_unlocked(L'#', output);
++ writeEscaped(removeTags(sf), output);
++ }
++ }
++ else if(mode == gm_tagged)
++ {
+ fputwc_unlocked(L'#', output);
+ writeEscaped(removeTags(sf), output);
+ }
++ else if(mode == gm_tagged_nm)
++ {
++ fputwc_unlocked(L'^', output);
++ writeEscaped(removeTags(sf), output);
++ fputwc_unlocked(L'/', output);
++ fputwc_unlocked(L'#', output);
++ writeEscapedWithTags(sf, output);
++ fputwc_unlocked(L'$', output);
++ }
+ }
+
+ current_state = *initial_state;
+@@ -2033,19 +2294,62 @@
+ }
+
+ State current_state = *initial_state;
+- wstring sf = L"";
+- wstring queue = L"";
+- wstring result = L"";
++ wstring sf = L""; // source language analysis
++ wstring queue = L""; // symbols to be added to each target
++ wstring result = L""; // result of looking up analysis in bidix
+
+ outOfWord = false;
+
+ skipUntil(input, output, L'^');
+- int val;
++ pair<wstring,int> tr; // readBilingual return value, containing:
++ int val; // the alphabet value of current symbol, and
++ wstring symbol = L""; // the current symbol as a string
++ bool seentags = false; // have we seen any tags at all in the analysis?
+
+- while((val = readGeneration(input, output)) != 0x7fffffff)
++ bool seensurface = false;
++ wstring surface = L"";
++
++ while(true) // ie. while(val != 0x7fffffff)
+ {
++ tr = readBilingual(input, output);
++ symbol = tr.first;
++ val = tr.second;
++
++ //fwprintf(stderr, L"> %S : %C : %d\n", tr.first.c_str(), tr.second, tr.second);
++ if(biltransSurfaceForms && !seensurface && !outOfWord)
++ {
++ while(val != L'/' && val != 0x7fffffff)
++ {
++ surface = surface + symbol;
++ alphabet.getSymbol(surface, val);
++ tr = readBilingual(input, output);
++ symbol = tr.first;
++ val = tr.second;
++ //fwprintf(stderr, L" == %S : %C : %d => %S\n", symbol.c_str(), val, val, surface.c_str());
++ }
++ seensurface = true;
++ tr = readBilingual(input, output);
++ symbol = tr.first;
++ val = tr.second;
++ }
++
++ if (val == 0x7fffffff)
++ {
++ break;
++ }
++
+ if(val == L'$' && outOfWord)
+ {
++ if(!seentags) // if no tags: only return complete matches
++ {
++ bool uppercase = sf.size() > 1 && iswupper(sf[1]);
++ bool firstupper= iswupper(sf[0]);
++
++ result = current_state.filterFinals(all_finals, alphabet,
++ escaped_chars,
++ uppercase, firstupper, 0);
++ }
++
+ if(sf[0] == L'*')
+ {
+ printWordBilingual(sf, L"/"+sf, output);
+@@ -2055,14 +2359,23 @@
+ printWordBilingual(sf, compose(result, queue), output);
+ }
+ else
+- {
+- printWordBilingual(sf, L"/@"+sf, output);
++ { //xxx
++ if(biltransSurfaceForms)
++ {
++ printWordBilingual(surface, L"/@"+surface, output);
++ }
++ else
++ {
++ printWordBilingual(sf, L"/@"+sf, output);
++ }
+ }
+-
++ seensurface = false;
++ surface = L"";
+ queue = L"";
+ result = L"";
+ current_state = *initial_state;
+ sf = L"";
++ seentags = false;
+ }
+ else if(iswspace(val) && sf.size() == 0)
+ {
+@@ -2074,7 +2387,11 @@
+ {
+ sf += L'\\';
+ }
+- alphabet.getSymbol(sf, val);
++ alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
++ if(val == 0) // non-alphabetic, possibly unknown tag; add to sf
++ {
++ sf += symbol;
++ }
+ }
+ else
+ {
+@@ -2082,7 +2399,15 @@
+ {
+ sf += L'\\';
+ }
+- alphabet.getSymbol(sf,val);
++ alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
++ if(val == 0) // non-alphabetic, possibly unknown tag; add to sf
++ {
++ sf += symbol;
++ }
++ if(alphabet.isTag(val) || val == 0)
++ {
++ seentags = true;
++ }
+ if(current_state.size() != 0)
+ {
+ if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+@@ -2105,12 +2430,21 @@
+ }
+ if(current_state.size() == 0 && result != L"")
+ {
+- if(alphabet.isTag(val))
++ // We already have a result, but there is still more to read
++ // of the analysis; following tags are not consumed, but
++ // output as target language tags (added to result on
++ // end-of-word)
++ if(alphabet.isTag(val)) // known tag
+ {
+ alphabet.getSymbol(queue, val);
+ }
++ else if (val == 0) // non-alphabetic, possibly unknown tag
++ {
++ queue += symbol;
++ }
+ else
+ {
++ // There are no more alive transductions and the current symbol is not a tag -- unknown word!
+ result = L"";
+ }
+ }
+@@ -2127,6 +2461,7 @@
+ unsigned int end_point = input_word.size()-2;
+ wstring queue = L"";
+ bool mark = false;
++ bool seentags = false; // have we seen any tags at all in the analysis?
+
+ if(with_delim == false)
+ {
+@@ -2160,6 +2495,7 @@
+ }
+ else if(input_word[i] == L'<')
+ {
++ seentags = true;
+ symbol = L'<';
+ for(unsigned int j = i + 1; j <= end_point; j++)
+ {
+@@ -2217,7 +2553,7 @@
+ }
+
+ if(current_state.size() == 0)
+- {
++ {
+ if(symbol != L"" && result != L"")
+ {
+ queue.append(symbol);
+@@ -2224,20 +2560,39 @@
+ }
+ else
+ {
+- // word is not present
++ // word is not present
+ if(with_delim)
+- {
++ {
+ result = L"^@" + input_word.substr(1);
+- }
++ }
+ else
+- {
++ {
+ result = L"@" + input_word;
+- }
++ }
+ return pair<wstring, int>(result, 0);
+ }
+ }
+ }
+
++ if (!seentags
++ && L"" == current_state.filterFinals(all_finals, alphabet,
++ escaped_chars,
++ uppercase, firstupper, 0))
++ {
++ // word is not present
++ if(with_delim)
++ {
++ result = L"^@" + input_word.substr(1);
++ }
++ else
++ {
++ result = L"@" + input_word;
++ }
++ return pair<wstring, int>(result, 0);
++ }
++
++
++
+ // attach unmatched queue automatically
+
+ if(queue != L"")
+@@ -2661,10 +3016,11 @@
+ return str;
+ }
+
++
+ void
+-FSTProcessor::setDecompoundingMode(bool const value)
++FSTProcessor::setBiltransSurfaceForms(bool const value)
+ {
+- compoundDecomposition = value;
++ biltransSurfaceForms = value;
+ }
+
+ void
+@@ -2688,7 +3044,7 @@
+ bool
+ FSTProcessor::getDecompoundingMode()
+ {
+- return compoundDecomposition;
++ return do_decomposition;
+ }
+
+ bool
+Index: lttoolbox/lt_comp.cc
+===================================================================
+--- lttoolbox/lt_comp.cc (revision 21745)
++++ lttoolbox/lt_comp.cc (working copy)
+@@ -23,6 +23,7 @@
+ #include <iostream>
+ #include <libgen.h>
+ #include <string>
++#include <getopt.h>
+
+ using namespace std;
+
+@@ -31,7 +32,11 @@
+ if(name != NULL)
+ {
+ cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl;
+- cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl;
++ cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl;
++ cout << " -v: set language variant" << endl;
++ cout << " -a: set alternative (monodix)" << endl;
++ cout << " -l: set left language variant (bidix)" << endl;
++ cout << " -r: set right language variant (bidix)" << endl;
+ cout << "Modes:" << endl;
+ cout << " lr: left-to-right compilation" << endl;
+ cout << " rl: right-to-left compilation" << endl;
+@@ -42,27 +47,113 @@
+
+ int main(int argc, char *argv[])
+ {
+- if(argc != 4 && argc != 5)
++ Compiler c;
++ c.setVerbose(false);
++
++#if HAVE_GETOPT_LONG
++ int option_index=0;
++#endif
++
++ string vl;
++ string vr;
++
++ while (true) {
++#if HAVE_GETOPT_LONG
++ static struct option long_options[] =
++ {
++ {"alt", required_argument, 0, 'a'},
++ {"var", required_argument, 0, 'v'},
++ {"var-left", required_argument, 0, 'l'},
++ {"var-right", required_argument, 0, 'r'},
++ {"help", no_argument, 0, 'h'},
++ {"verbose", no_argument, 0, 'V'},
++ {0, 0, 0, 0}
++ };
++
++ int cnt=getopt_long(argc, argv, "a:v:l:r:hV", long_options, &option_index);
++#else
++ int cnt=getopt(argc, argv, "a:v:l:r:hV");
++#endif
++ if (cnt==-1)
++ break;
++
++ switch (cnt)
++ {
++ case 'a':
++ c.setAltValue(optarg);
++ break;
++
++ case 'v':
++ c.setVariantValue(optarg);
++ break;
++
++ case 'l':
++ vl = optarg;
++ c.setVariantLeftValue(vl);
++ break;
++
++ case 'r':
++ vr = optarg;
++ c.setVariantRightValue(vr);
++ break;
++
++ case 'V':
++ c.setVerbose(true);
++ break;
++
++ case 'h':
++ default:
++ endProgram(argv[0]);
++ break;
++ }
++ }
++
++ string opc;
++ string infile;
++ string outfile;
++ string acxfile;
++
++ switch(argc - optind + 1)
+ {
+- endProgram(argv[0]);
++ case 5:
++ opc = argv[argc-4];
++ infile = argv[argc-3];
++ outfile = argv[argc-2];
++ acxfile = argv[argc-1];
++ break;
++
++ case 4:
++ opc = argv[argc-3];
++ infile = argv[argc-2];
++ outfile = argv[argc-1];
++ break;
++
++ default:
++ endProgram(argv[0]);
++ break;
+ }
+
+- string opc = argv[1];
+-
+- Compiler c;
+-
+-
+ if(opc == "lr")
+ {
+- if(argc == 5)
++ if(vr == "" && vl != "")
+ {
+- c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL);
++ cout << "Error: -l specified, but mode is lr" << endl;
++ endProgram(argv[0]);
+ }
+- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL);
++ if(acxfile != "")
++ {
++ c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL);
++ }
++ c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL);
+ }
+ else if(opc == "rl")
+ {
+- c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL);
++ if(vl == "" && vr != "")
++ {
++ cout << "Error: -r specified, but mode is rl" << endl;
++ endProgram(argv[0]);
++ }
++ c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL);
+ }
+ else
+ {
+@@ -69,10 +160,10 @@
+ endProgram(argv[0]);
+ }
+
+- FILE *output = fopen(argv[3], "wb");
++ FILE *output = fopen(outfile.c_str(), "wb");
+ if(!output)
+ {
+- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
++ cerr << "Error: Cannot open file '" << outfile << "'." << endl;
+ exit(EXIT_FAILURE);
+ }
+ c.write(output);
+Index: lttoolbox/fst_processor.h
+===================================================================
+--- lttoolbox/fst_processor.h (revision 21745)
++++ lttoolbox/fst_processor.h (working copy)
+@@ -43,7 +43,8 @@
+ gm_clean, // clear all
+ gm_unknown, // display unknown words, clear transfer and generation tags
+ gm_all, // display all
+- gm_tagged // tagged generation
++ gm_tagged, // tagged generation
++ gm_tagged_nm // clean tagged generation
+ };
+
+ /**
+@@ -57,8 +58,6 @@
+ */
+ map<wstring, TransExe, Ltstr> transducers;
+
+- Pool<vector<int> > *pool;
+-
+ /**
+ * Current state of lexical analysis
+ */
+@@ -130,6 +129,12 @@
+ bool outOfWord;
+
+ /**
++ * true if we're automatically removing surface forms.
++ */
++ bool biltransSurfaceForms;
++
++
++ /**
+ * if true, makes always difference between uppercase and lowercase
+ * characters
+ */
+@@ -154,9 +159,30 @@
+ /**
+ * try analysing unknown words as compounds
+ */
+- bool compoundDecomposition;
++ bool do_decomposition;
+
+ /**
++ * Symbol of CompoundOnlyL
++ */
++ int compoundOnlyLSymbol;
++
++ /**
++ * Symbol of CompoundR
++ */
++ int compoundRSymbol;
++
++ /**
++ * Show or not the controls symbols (as compoundRSymbol)
++ */
++ bool showControlSymbols;
++
++ /**
++ * Max compound elements
++ * Hard coded for now, but there might come a switch one day
++ */
++ int compound_max_elements;
++
++ /**
+ * Prints an error of input stream and exits
+ */
+ void streamError();
+@@ -219,6 +245,13 @@
+ int readGeneration(FILE *input, FILE *output);
+
+ /**
++ * Read text from stream (biltrans version)
++ * @param input the stream to read
++ * @return the queue of 0-symbols, and the next symbol in the stream
++ */
++ pair<wstring, int> readBilingual(FILE *input, FILE *output);
++
++ /**
+ * Read text from stream (SAO version)
+ * @param input the stream to read
+ * @return the next symbol in the stream
+@@ -248,7 +281,17 @@
+ */
+ void writeEscaped(wstring const &str, FILE *output);
+
++
+ /**
++ * Write a string to an output stream, escaping all escapable characters
++ * but keeping symbols without escaping
++ * @param str the string to write, escaping characters
++ * @param output the stream to write in
++ */
++ void writeEscapedWithTags(wstring const &str, FILE *output);
++
++
++ /**
+ * Checks if an string ends with a particular suffix
+ * @param str the string to test
+ * @param the searched suffix
+@@ -287,6 +330,8 @@
+ */
+ void printUnknownWord(wstring const &sf, FILE *output);
+
++ void initDecompositionSymbols();
++
+ vector<wstring> numbers;
+ int readTMAnalysis(FILE *input);
+
+@@ -294,7 +339,7 @@
+ void printSpace(wchar_t const val, FILE *output);
+ void skipUntil(FILE *input, FILE *output, wint_t const character);
+ static wstring removeTags(wstring const &str);
+- wstring decompose(wstring str);
++ wstring compoundAnalysis(wstring str, bool uppercase, bool firstupper);
+ size_t firstNotAlpha(wstring const &sf);
+
+ void analysis_wrapper_null_flush(FILE *input, FILE *output);
+@@ -338,9 +383,9 @@
+
+ void setCaseSensitiveMode(bool const value);
+ void setDictionaryCaseMode(bool const value);
++ void setBiltransSurfaceForms(bool const value);
+ void setNullFlush(bool const value);
+ bool getNullFlush();
+- void setDecompoundingMode(bool const value);
+ bool getDecompoundingMode();
+ };
+
+Index: lttoolbox/lt_proc.cc
+===================================================================
+--- lttoolbox/lt_proc.cc (revision 21745)
++++ lttoolbox/lt_proc.cc (working copy)
+@@ -36,35 +36,42 @@
+ void endProgram(char *name)
+ {
+ cout << basename(name) << ": process a stream with a letter transducer" << endl;
+- cout << "USAGE: " << basename(name) << " [-c] [-a|-g|-n|-d|-p|-s|-t|-b] fst_file [input_file [output_file]]" << endl;
++ cout << "USAGE: " << basename(name) << " [ -a | -b | -c | -d | -e | -g | -n | -p | -s | -t | -v | -h -z -w ] fst_file [input_file [output_file]]" << endl;
+ cout << "Options:" << endl;
+ #if HAVE_GETOPT_LONG
+ cout << " -a, --analysis: morphological analysis (default behavior)" << endl;
+- cout << " -b, --bilingual: lexical transference" << endl;
++ cout << " -b, --bilingual: lexical transfer" << endl;
+ cout << " -c, --case-sensitive: use the literal case of the incoming characters" << endl;
++ cout << " -d, --debugged-gen morph. generation with all the stuff" <<endl;
++ cout << " -e, --decompose-nouns: Try to decompound unknown words" << endl;
+ cout << " -g, --generation: morphological generation" << endl;
++ cout << " -l, --tagged-gen: morphological generation keeping lexical forms" << endl;
++ cout << " -m, --tagged-nm-gen: same as -l but without unknown word marks" << endl;
+ cout << " -n, --non-marked-gen morph. generation without unknown word marks" << endl;
+- cout << " -d, --debugged-gen morph. generation with all the stuff" <<endl;
++ cout << " -o, --surf-bilingual: lexical transfer with surface forms" << endl;
+ cout << " -p, --post-generation: post-generation" << endl;
+- cout << " -e, --decompose-compounds: try to decompose unknown word as compounds" << endl;
+ cout << " -s, --sao: SAO annotation system input processing" << endl;
+ cout << " -t, --transliteration: apply transliteration dictionary" << endl;
++ cout << " -v, --version: version" << endl;
+ cout << " -z, --null-flush: flush output on the null character " << endl;
+ cout << " -w, --dictionary-case: use dictionary case instead of surface case" << endl;
+- cout << " -v, --version: version" << endl;
+ cout << " -h, --help: show this help" << endl;
+ #else
+ cout << " -a: morphological analysis (default behavior)" << endl;
++ cout << " -b: lexical transfer" << endl;
+ cout << " -c: use the literal case of the incoming characters" << endl;
++ cout << " -d: morph. generation with all the stuff" << endl;
++ cout << " -e: try to decompose unknown words as compounds" << endl;
+ cout << " -g: morphological generation" << endl;
++ cout << " -l: morphological generation keeping lexical forms" << endl;
+ cout << " -n: morph. generation without unknown word marks" << endl;
++ cout << " -o: lexical transfer with surface forms" << endl;
+ cout << " -p: post-generation" << endl;
+- cout << " -e: try to decompose unknown words as compounds" << endl;
+ cout << " -s: SAO annotation system input processing" << endl;
+ cout << " -t: apply transliteration dictionary" << endl;
++ cout << " -v: version" << endl;
+ cout << " -z: flush output on the null character " << endl;
+ cout << " -w: use dictionary case instead of surface case" << endl;
+- cout << " -v: version" << endl;
+ cout << " -h: show this help" << endl;
+ #endif
+ exit(EXIT_FAILURE);
+@@ -88,10 +95,12 @@
+ {
+ {"analysis", 0, 0, 'a'},
+ {"bilingual", 0, 0, 'b'},
++ {"surf-bilingual", 0, 0, 'o'},
+ {"generation", 0, 0, 'g'},
+ {"non-marked-gen", 0, 0, 'n'},
+ {"debugged-gen", 0, 0, 'd'},
+ {"tagged-gen", 0, 0, 'l'},
++ {"tagged-nm-gen", 0, 0, 'm'},
+ {"post-generation", 0, 0, 'p'},
+ {"sao", 0, 0, 's'},
+ {"transliteration", 0, 0, 't'},
+@@ -107,9 +116,9 @@
+ {
+ #if HAVE_GETOPT_LONG
+ int option_index;
+- int c = getopt_long(argc, argv, "abceglndpstzwvh", long_options, &option_index);
++ int c = getopt_long(argc, argv, "abceglmndopstzwvh", long_options, &option_index);
+ #else
+- int c = getopt(argc, argv, "abceglndpstzwvh");
++ int c = getopt(argc, argv, "abceglmndopstzwvh");
+ #endif
+
+ if(c == -1)
+@@ -123,13 +132,12 @@
+ fstp.setCaseSensitiveMode(true);
+ break;
+
+- case 'e':
+- fstp.setDecompoundingMode(true);
+- break;
+-
++ case 'e':
+ case 'a':
+ case 'b':
++ case 'o':
+ case 'l':
++ case 'm':
+ case 'g':
+ case 'n':
+ case 'd':
+@@ -248,11 +256,19 @@
+ fstp.initGeneration();
+ checkValidity(fstp);
+ fstp.generation(input, output, gm_all);
++ break;
+
+ case 'l':
+ fstp.initGeneration();
+ checkValidity(fstp);
+ fstp.generation(input, output, gm_tagged);
++ break;
++
++ case 'm':
++ fstp.initGeneration();
++ checkValidity(fstp);
++ fstp.generation(input, output, gm_tagged_nm);
++ break;
+
+ case 'p':
+ fstp.initPostgeneration();
+@@ -272,11 +288,24 @@
+ fstp.transliteration(input, output);
+ break;
+
++ case 'o':
++ fstp.initBiltrans();
++ checkValidity(fstp);
++ fstp.setBiltransSurfaceForms(true);
++ fstp.bilingual(input, output);
++ break;
++
+ case 'b':
+ fstp.initBiltrans();
+ checkValidity(fstp);
+ fstp.bilingual(input, output);
+ break;
++
++ case 'e':
++ fstp.initDecomposition();
++ checkValidity(fstp);
++ fstp.analysis(input, output);
++ break;
+
+ case 'a':
+ default:
+Index: lttoolbox/expander.cc
+===================================================================
+--- lttoolbox/expander.cc (revision 21745)
++++ lttoolbox/expander.cc (working copy)
+@@ -295,9 +295,18 @@
+ {
+ wstring atributo=this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
+ wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR);
++ wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR);
++ wstring varval = this->attrib(Compiler::COMPILER_V_ATTR);
++ wstring varl = this->attrib(Compiler::COMPILER_VL_ATTR);
++ wstring varr = this->attrib(Compiler::COMPILER_VR_ATTR);
+
+ wstring myname = L"";
+- if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes")
++ if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes"
++ || altval != L"" && altval != alt
++ || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
++ || ((varl != L"" && varl != variant_left) && (varr != L"" && varr != variant_right))
++ || (varl != L"" && varl != variant_left && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
++ || (varr != L"" && varr != variant_right && atributo == Compiler::COMPILER_RESTRICTION_LR_VAL))
+ {
+ do
+ {
+@@ -316,11 +325,14 @@
+ }
+
+ EntList items, items_lr, items_rl;
+- if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL)
++ if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL
++ || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL)
++ || varl != L"" && varl != variant_left)
+ {
+ items_lr.push_back(pair<wstring, wstring>(L"", L""));
+ }
+- else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
++ else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL
++ || (varr != L"" && varr != variant_right))
+ {
+ items_rl.push_back(pair<wstring, wstring>(L"", L""));
+ }
+@@ -594,3 +606,27 @@
+ it->second.append(endings.second);
+ }
+ }
++
++void
++Expander::setAltValue(string const &a)
++{
++ alt = XMLParseUtil::stows(a);
++}
++
++void
++Expander::setVariantValue(string const &v)
++{
++ variant = XMLParseUtil::stows(v);
++}
++
++void
++Expander::setVariantLeftValue(string const &v)
++{
++ variant_left = XMLParseUtil::stows(v);
++}
++
++void
++Expander::setVariantRightValue(string const &v)
++{
++ variant_right = XMLParseUtil::stows(v);
++}
+Index: lttoolbox/lt-expand.1
+===================================================================
+--- lttoolbox/lt-expand.1 (revision 21745)
++++ lttoolbox/lt-expand.1 (working copy)
+@@ -9,11 +9,28 @@
+ architecture: \fBhttp://www.apertium.org\fR.
+ .SH SYNOPSIS
+ .B lt-expand
++[
++.B \-a \fR|
++.B \-v \fR|
++.B \-l \fR|
++.B \-r \fR|
++.B \-h
++]
+ dictionary_file [output_file]
+ .PP
++.B lt-expand
++[
++.B \-\-alt \fR|
++.B \-\-var \fR|
++.B \-\-var\-left \fR|
++.B \-\-var\-right \fR|
++.B \-\-help
++]
++dictionary_file [output_file]
++.PP
+ .SH DESCRIPTION
+ .BR lt-expand
+-Is the application responsible of expanding a dictionary into a
++Is the application responsible for expanding a dictionary into a
+ simple list of input string-output string pairs by eliminating
+ paradigms through substitution and unfolding.
+ .PP
+@@ -20,6 +37,23 @@
+ The output goes to \fIoutput_file\fR if it is present or to standard
+ output if it is missing.
+ .PP
++.SH OPTIONS
++.TP
++.B \-a, \-\-alt
++Sets the value of the \fIalt\fR attribute to use in expansion
++.TP
++.B \-v, \-\-var
++Sets the value of the \fIv\fR attribute to use in expansion of monodixes
++.TP
++.B \-l, \-\-var\-left
++Sets the value of the \fIvl\fR attribute to use in expansion of bidixes
++.TP
++.B \-r, \-\-var\-right
++Sets the value of the \fIvr\fR attribute to use in expansion of bidixes
++.TP
++.B \-h, \-\-help
++Prints a short help message
++.PP
+ .SH FILES
+ .B dictionary_file
+ The input dictionary to expand.
+@@ -34,5 +68,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/dix.dtd
+===================================================================
+--- lttoolbox/dix.dtd (revision 21745)
++++ lttoolbox/dix.dtd (working copy)
+@@ -1,4 +1,21 @@
+ <!--
++ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
++
++ This program is free software; you can redistribute it and/or
++ modify it under the terms of the GNU General Public License as
++ published by the Free Software Foundation; either version 2 of the
++ License, or (at your option) any later version.
++
++ This program is distributed in the hope that it will be useful, but
++ WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with this program; if not, write to the Free Software
++ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ 02111-1307, USA.
++
+ DTD for the format of dictionaries
+ -->
+ <!ELEMENT dictionary (alphabet?, sdefs?,
+@@ -57,6 +74,10 @@
+ i CDATA #IMPLIED
+ slr CDATA #IMPLIED
+ srl CDATA #IMPLIED
++ alt CDATA #IMPLIED
++ v CDATA #IMPLIED
++ vl CDATA #IMPLIED
++ vr CDATA #IMPLIED
+ >
+ <!-- r: restriction LR: left-to-right,
+ RL: right-to-left -->
+@@ -66,6 +87,10 @@
+ <!-- i: ignore ('yes') means ignore, otherwise it is not ignored) -->
+ <!-- slr: translation sense when translating from left to right -->
+ <!-- srl: translation sense when translating from right to left -->
++ <!-- alt: alternative entries are omitted if not selected -->
++ <!-- v: variant sets (monodix) direction restrictions based on language variant -->
++ <!-- vl: variant left sets direction restrictions based on language variant for language on left of bidix -->
++ <!-- vr: variant right sets direction restrictions based on language variant for language on right of bidix -->
+ <!ELEMENT par EMPTY>
+ <!-- reference to paradigm -->
+ <!ATTLIST par
+Index: lttoolbox/compiler.cc
+===================================================================
+--- lttoolbox/compiler.cc (revision 21745)
++++ lttoolbox/compiler.cc (working copy)
+@@ -56,6 +56,10 @@
+ wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm";
+ wstring const Compiler::COMPILER_IGNORE_ATTR = L"i";
+ wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes";
++wstring const Compiler::COMPILER_ALT_ATTR = L"alt";
++wstring const Compiler::COMPILER_V_ATTR = L"v";
++wstring const Compiler::COMPILER_VL_ATTR = L"vl";
++wstring const Compiler::COMPILER_VR_ATTR = L"vr";
+
+ Compiler::Compiler()
+ {
+@@ -417,6 +421,12 @@
+ }
+ }
+
++ if(verbose && first_element && (both_sides.front() == (int)L' '))
++ {
++ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
++ wcerr << L"): Entry begins with space." << endl;
++ }
++ first_element = false;
+ EntryToken e;
+ e.setSingleTransduction(both_sides, both_sides);
+ return e;
+@@ -444,6 +454,13 @@
+ readString(lhs, name);
+ }
+ }
++
++ if(verbose && first_element && (lhs.front() == (int)L' '))
++ {
++ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
++ wcerr << L"): Entry begins with space." << endl;
++ }
++ first_element = false;
+
+ skip(name, COMPILER_RIGHT_ELEM);
+
+@@ -480,7 +497,15 @@
+ {
+ EntryToken e;
+ wstring nomparadigma = attrib(COMPILER_N_ATTR);
++ first_element = false;
+
++ if(current_paradigm != L"" && nomparadigma == current_paradigm)
++ {
++ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
++ wcerr << L"): Paradigm refers to itself '" << nomparadigma << L"'." <<endl;
++ exit(EXIT_FAILURE);
++ }
++
+ if(paradigms.find(nomparadigma) == paradigms.end())
+ {
+ wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
+@@ -632,9 +657,18 @@
+ {
+ wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
+ wstring ignore = this->attrib(COMPILER_IGNORE_ATTR);
++ wstring altval = this->attrib(COMPILER_ALT_ATTR);
++ wstring varval = this->attrib(COMPILER_V_ATTR);
++ wstring varl = this->attrib(COMPILER_VL_ATTR);
++ wstring varr = this->attrib(COMPILER_VR_ATTR);
+
+ //�if entry is masked by a restriction of direction or an ignore mark
+- if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL)
++ if((atributo != L"" && atributo != direction)
++ || ignore == COMPILER_IGNORE_YES_VAL
++ || (altval != L"" && altval != alt)
++ || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant)
++ || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left)
++ || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right))
+ {
+ // parse to the end of the entry
+ wstring name = L"";
+@@ -662,6 +696,11 @@
+ wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+ skipBlanks(name);
+
++ if(current_paradigm == L"" && verbose)
++ {
++ first_element = true;
++ }
++
+ int tipo = xmlTextReaderNodeType(reader);
+ if(name == COMPILER_PAIR_ELEM)
+ {
+@@ -845,3 +884,33 @@
+ it->second.write(output);
+ }
+ }
++
++void
++Compiler::setAltValue(string const &a)
++{
++ alt = XMLParseUtil::stows(a);
++}
++
++void
++Compiler::setVariantValue(string const &v)
++{
++ variant = XMLParseUtil::stows(v);
++}
++
++void
++Compiler::setVariantLeftValue(string const &v)
++{
++ variant_left = XMLParseUtil::stows(v);
++}
++
++void
++Compiler::setVariantRightValue(string const &v)
++{
++ variant_right = XMLParseUtil::stows(v);
++}
++
++void
++Compiler::setVerbose(bool verbosity)
++{
++ verbose = verbosity;
++}
+Index: lttoolbox/transducer.h
+===================================================================
+--- lttoolbox/transducer.h (revision 21745)
++++ lttoolbox/transducer.h (working copy)
+@@ -146,6 +146,13 @@
+ bool isFinal(int const state) const;
+
+ /**
++ * Test if a pattern is recognised by the FST
++ * @param a widestring of the pattern to be recognised
++ * @return true if the pattern is recognised by the transducer
++ */
++ bool recognise(wstring patro, Alphabet &a, FILE *err = stderr);
++
++ /**
+ * Set the state as a final or not, yes by default
+ * @param state the state
+ * @param value if true, the state is set as final state
+@@ -179,6 +186,12 @@
+ void reverse(int const epsilon_tag = 0);
+
+ /**
++ * Print all the transductions of a transducer in ATT format
++ * @param epsilon_tag the tag to take as epsilon
++ */
++ void show(Alphabet &a, FILE *output = stdout, int const epsilon_tag = 0);
++
++ /**
+ * Determinize the transducer
+ * @param epsilon_tag the tag to take as epsilon
+ */
+@@ -242,6 +255,12 @@
+ bool isEmpty(int const state) const;
+
+ /**
++ * Returns the number of transitions from a given state
++ * @return the number of transitions
++ */
++ int getStateSize(int const state);
++
++ /**
+ * Write method
+ * @param output the stream to write to
+ * @param decalage offset to sum to the tags
+Index: lttoolbox/lt_expand.cc
+===================================================================
+--- lttoolbox/lt_expand.cc (revision 21745)
++++ lttoolbox/lt_expand.cc (working copy)
+@@ -24,6 +24,7 @@
+ #include <iostream>
+ #include <libgen.h>
+ #include <string>
++#include <getopt.h>
+
+ #ifdef _MSC_VER
+ #include <io.h>
+@@ -37,7 +38,7 @@
+ if(name != NULL)
+ {
+ cout << basename(name) << " v" << PACKAGE_VERSION <<": expand the contents of a dictionary file" << endl;
+- cout << "USAGE: " << basename(name) << " dictionary_file [output_file]" << endl;
++ cout << "USAGE: " << basename(name) << " [-avlrh] dictionary_file [output_file]" << endl;
+ }
+ exit(EXIT_FAILURE);
+ }
+@@ -45,14 +46,67 @@
+ int main(int argc, char *argv[])
+ {
+ FILE *input = NULL, *output = NULL;
++ Expander e;
+
+- switch(argc)
++#if HAVE_GETOPT_LONG
++ int option_index=0;
++#endif
++
++ while (true) {
++#if HAVE_GETOPT_LONG
++ static struct option long_options[] =
++ {
++ {"alt", required_argument, 0, 'a'},
++ {"var", required_argument, 0, 'v'},
++ {"var-left", required_argument, 0, 'l'},
++ {"var-right", required_argument, 0, 'r'},
++ {"help", no_argument, 0, 'h'},
++ {0, 0, 0, 0}
++ };
++
++ int cnt=getopt_long(argc, argv, "a:v:l:r:h", long_options, &option_index);
++#else
++ int cnt=getopt(argc, argv, "a:v:l:r:h");
++#endif
++ if (cnt==-1)
++ break;
++
++ switch (cnt)
++ {
++ case 'a':
++ e.setAltValue(optarg);
++ break;
++
++ case 'v':
++ e.setVariantValue(optarg);
++ break;
++
++ case 'l':
++ e.setVariantLeftValue(optarg);
++ break;
++
++ case 'r':
++ e.setVariantRightValue(optarg);
++ break;
++
++ case 'h':
++ default:
++ endProgram(argv[0]);
++ break;
++ }
++ }
++
++ string infile;
++ string outfile;
++
++ switch(argc - optind + 1)
+ {
+ case 2:
+- input = fopen(argv[1], "rb");
++ infile = argv[argc-1];
++ input = fopen(infile.c_str(), "rb");
+ if(input == NULL)
+ {
+- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
++ cerr << "Error: Cannot open file '" << infile << "'." << endl;
+ exit(EXIT_FAILURE);
+ }
+ fclose(input);
+@@ -60,18 +114,20 @@
+ break;
+
+ case 3:
+- input = fopen(argv[1], "rb");
++ infile = argv[argc-2];
++ input = fopen(infile.c_str(), "rb");
+ if(input == NULL)
+ {
+- cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
++ cerr << "Error: Cannot open file '" << infile << "'." << endl;
+ exit(EXIT_FAILURE);
+ }
+ fclose(input);
+
+- output = fopen(argv[2], "wb");
++ outfile = argv[argc-1];
++ output = fopen(argv[argc-1], "wb");
+ if(output == NULL)
+ {
+- cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
++ cerr << "Error: Cannot open file '" << outfile << "'." << endl;
+ exit(EXIT_FAILURE);
+ }
+ break;
+@@ -85,8 +141,7 @@
+ _setmode(_fileno(output), _O_U8TEXT);
+ #endif
+
+- Expander e;
+- e.expand(argv[1], output);
++ e.expand(infile, output);
+ fclose(output);
+
+ return EXIT_SUCCESS;
+Index: lttoolbox/state.cc
+===================================================================
+--- lttoolbox/state.cc (revision 21745)
++++ lttoolbox/state.cc (working copy)
+@@ -20,10 +20,15 @@
+
+ #include <cstring>
+ #include <cwctype>
++#include <climits>
+
+-State::State(Pool<vector<int> > *p)
++//debug//
++//#include <iostream>
++//using namespace std;
++//debug//
++
++State::State()
+ {
+- pool = p;
+ }
+
+ State::~State()
+@@ -51,10 +56,9 @@
+ void
+ State::destroy()
+ {
+- // release references
+ for(size_t i = 0, limit = state.size(); i != limit; i++)
+ {
+- pool->release(state[i].sequence);
++ delete state[i].sequence;
+ }
+
+ state.clear();
+@@ -66,15 +70,14 @@
+ // release references
+ for(size_t i = 0, limit = state.size(); i != limit; i++)
+ {
+- pool->release(state[i].sequence);
++ delete state[i].sequence;
+ }
+
+ state = s.state;
+- pool = s.pool;
+
+ for(size_t i = 0, limit = state.size(); i != limit; i++)
+ {
+- vector<int> *tmp = pool->get();
++ vector<int> *tmp = new vector<int>();
+ *tmp = *(state[i].sequence);
+ state[i].sequence = tmp;
+ }
+@@ -90,7 +93,7 @@
+ State::init(Node *initial)
+ {
+ state.clear();
+- state.push_back(TNodeState(initial,pool->get(),false));
++ state.push_back(TNodeState(initial, new vector<int>(), false));
+ state[0].sequence->clear();
+ epsilonClosure();
+ }
+@@ -113,7 +116,7 @@
+ {
+ for(int j = 0; j != it->second.size; j++)
+ {
+- vector<int> *new_v = pool->get();
++ vector<int> *new_v = new vector<int>();
+ *new_v = *(state[i].sequence);
+ if(it->first != 0)
+ {
+@@ -122,7 +125,7 @@
+ new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
+ }
+ }
+- pool->release(state[i].sequence);
++ delete state[i].sequence;
+ }
+
+ state = new_state;
+@@ -147,8 +150,8 @@
+ {
+ for(int j = 0; j != it->second.size; j++)
+ {
+- vector<int> *new_v = pool->get();
+- *new_v = *(state[i].sequence);
++ vector<int> *new_v = new vector<int>();
++ *new_v = *(state[i].sequence);
+ if(it->first != 0)
+ {
+ new_v->push_back(it->second.out_tag[j]);
+@@ -161,7 +164,7 @@
+ {
+ for(int j = 0; j != it->second.size; j++)
+ {
+- vector<int> *new_v = pool->get();
++ vector<int> *new_v = new vector<int>();
+ *new_v = *(state[i].sequence);
+ if(it->first != 0)
+ {
+@@ -170,7 +173,7 @@
+ new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
+ }
+ }
+- pool->release(state[i].sequence);
++ delete state[i].sequence;
+ }
+
+ state = new_state;
+@@ -187,7 +190,7 @@
+ {
+ for(int j = 0 ; j != it2->second.size; j++)
+ {
+- vector<int> *tmp = pool->get();
++ vector<int> *tmp = new vector<int>();
+ *tmp = *(state[i].sequence);
+ if(it2->second.out_tag[j] != 0)
+ {
+@@ -199,6 +202,69 @@
+ }
+ }
+
++void
++State::apply(int const input, int const alt1, int const alt2)
++{
++ vector<TNodeState> new_state;
++ if(input == 0 || alt1 == 0 || alt2 == 0)
++ {
++ state = new_state;
++ return;
++ }
++
++ for(size_t i = 0, limit = state.size(); i != limit; i++)
++ {
++ map<int, Dest>::const_iterator it;
++ it = state[i].where->transitions.find(input);
++ if(it != state[i].where->transitions.end())
++ {
++ for(int j = 0; j != it->second.size; j++)
++ {
++ vector<int> *new_v = new vector<int>();
++ *new_v = *(state[i].sequence);
++ if(it->first != 0)
++ {
++ new_v->push_back(it->second.out_tag[j]);
++ }
++ new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
++ }
++ }
++ it = state[i].where->transitions.find(alt1);
++ if(it != state[i].where->transitions.end())
++ {
++ for(int j = 0; j != it->second.size; j++)
++ {
++ vector<int> *new_v = new vector<int>();
++ *new_v = *(state[i].sequence);
++ if(it->first != 0)
++ {
++ new_v->push_back(it->second.out_tag[j]);
++ }
++ new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
++ }
++ }
++ it = state[i].where->transitions.find(alt2);
++ if(it != state[i].where->transitions.end())
++ {
++ for(int j = 0; j != it->second.size; j++)
++ {
++ vector<int> *new_v = new vector<int>();
++ *new_v = *(state[i].sequence);
++ if(it->first != 0)
++ {
++ new_v->push_back(it->second.out_tag[j]);
++ }
++ new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
++ }
++ }
++
++ delete state[i].sequence;
++ }
++
++ state = new_state;
++}
++
++
+ void
+ State::step(int const input)
+ {
+@@ -213,6 +279,37 @@
+ epsilonClosure();
+ }
+
++void
++State::step(int const input, int const alt1, int const alt2)
++{
++ apply(input, alt1, alt2);
++ epsilonClosure();
++}
++
++void
++State::step_case(wchar_t val, wchar_t val2, bool caseSensitive)
++{
++ if (!iswupper(val) || caseSensitive) {
++ step(val, val2);
++ } else if(val != towlower(val)) {
++ step(val, towlower(val), val2);
++ } else {
++ step(val, val2);
++ }
++}
++
++
++void
++State::step_case(wchar_t val, bool caseSensitive)
++{
++ if (!iswupper(val) || caseSensitive) {
++ step(val);
++ } else {
++ step(val, towlower(val));
++ }
++}
++
++
+ bool
+ State::isFinal(set<Node *> const &finals) const
+ {
+@@ -282,6 +379,60 @@
+ return result;
+ }
+
++
++set<pair<wstring, vector<wstring> > >
++State::filterFinalsLRX(set<Node *> const &finals,
++ Alphabet const &alphabet,
++ set<wchar_t> const &escaped_chars,
++ bool uppercase, bool firstupper, int firstchar) const
++{
++ set<pair<wstring, vector<wstring> > > results;
++
++ vector<wstring> current_result;
++ wstring rule_id = L"";
++
++ // /<$><select>station<n><ANY_TAG><$><skip><6>/<$><select>station<n><ANY_TAG><$><skip><6>
++
++ // if <$> current_result.push_back(current_word)
++ // if / results.insert(current_result)
++
++ for(size_t i = 0, limit = state.size(); i != limit; i++)
++ {
++ if(finals.find(state[i].where) != finals.end())
++ {
++ current_result.clear();
++ rule_id = L"";
++ wstring current_word = L"";
++ for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
++ {
++ if(escaped_chars.find((*(state[i].sequence))[j]) != escaped_chars.end())
++ {
++ current_word += L'\\';
++ }
++ wstring sym = L"";
++ alphabet.getSymbol(sym, (*(state[i].sequence))[j], uppercase);
++ if(sym == L"<$>")
++ {
++ if(current_word != L"")
++ {
++ current_result.push_back(current_word);
++ }
++ current_word = L"";
++ }
++ else
++ {
++ current_word += sym;
++ }
++ }
++ rule_id = current_word;
++ results.insert(make_pair(rule_id, current_result));
++ }
++ }
++
++ return results;
++}
++
++
+ wstring
+ State::filterFinalsSAO(set<Node *> const &finals,
+ Alphabet const &alphabet,
+@@ -438,3 +589,149 @@
+
+ return result;
+ }
++
++
++
++void
++State::pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements)
++{
++ int minNoOfCompoundElements = compound_max_elements;
++ int *noOfCompoundElements = new int[state.size()];
++
++ //wcerr << L"pruneCompounds..." << endl;
++
++ for (unsigned int i = 0; i<state.size(); i++) {
++ vector<int> seq = *state.at(i).sequence;
++
++ if (lastPartHasRequiredSymbol(seq, requiredSymbol, separationSymbol)) {
++ int this_noOfCompoundElements = 0;
++ for (int j = seq.size()-2; j>0; j--) if (seq.at(j)==separationSymbol) this_noOfCompoundElements++;
++ noOfCompoundElements[i] = this_noOfCompoundElements;
++ minNoOfCompoundElements = (minNoOfCompoundElements < this_noOfCompoundElements) ?
++ minNoOfCompoundElements : this_noOfCompoundElements;
++ }
++ else {
++ noOfCompoundElements[i] = INT_MAX;
++ //wcerr << L"Prune - No requiered symbol in state number " << i << endl;
++ }
++ }
++
++ // remove states with more than minimum number of compounds (or without the requiered symbol in the last part)
++ vector<TNodeState>::iterator it = state.begin();
++ int i=0;
++ while(it != state.end()) {
++ if (noOfCompoundElements[i] > minNoOfCompoundElements) {
++ delete (*it).sequence;
++ it = state.erase(it);
++ //wcerr << L"Prune - State number " << i << L" removed!" << endl;
++ }
++ else it++;
++ i++;
++ }
++
++ delete[] noOfCompoundElements;
++}
++
++
++
++void
++State::pruneStatesWithForbiddenSymbol(int forbiddenSymbol)
++{
++ vector<TNodeState>::iterator it = state.begin();
++ while(it != state.end()) {
++ vector<int> *seq = (*it).sequence;
++ bool found = false;
++ for(int i = seq->size()-1; i>=0; i--) {
++ if(seq->at(i) == forbiddenSymbol) {
++ i=-1;
++ delete (*it).sequence;
++ it = state.erase(it);
++ found = true;
++ }
++ }
++ if (!found) it++;
++ }
++}
++
++
++
++bool
++State::lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol)
++{
++ // state is final - it should be restarted it with all elements in stateset restart_state, with old symbols conserved
++ bool restart=false;
++ for (int n=seq.size()-1; n>=0; n--) {
++ int symbol=seq.at(n);
++ if (symbol==requiredSymbol) {
++ restart=true;
++ break;
++ }
++ if (symbol==separationSymbol) {
++ break;
++ }
++ }
++ return restart;
++}
++
++
++void
++State::restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol)
++{
++
++ for (unsigned int i=0; i<state.size(); i++) {
++ TNodeState state_i = state.at(i);
++ // A state can be a possible final state and still have transitions
++
++ if (finals.count(state_i.where) > 0) {
++ bool restart = lastPartHasRequiredSymbol(*(state_i.sequence), requiredSymbol, separationSymbol);
++ if (restart) {
++ if (restart_state != NULL) {
++ for (unsigned int j=0; j<restart_state->state.size(); j++) {
++ TNodeState initst = restart_state->state.at(j);
++ vector<int> *tnvec = new vector<int>;
++
++ for(unsigned int k=0; k < state_i.sequence->size(); k++) tnvec->push_back(state_i.sequence->at(k));
++ TNodeState tn(initst.where, tnvec, state_i.dirty);
++ tn.sequence->push_back(separationSymbol);
++ state.push_back(tn);
++ }
++ }
++ }
++ }
++ }
++}
++
++
++
++wstring
++State::getReadableString(const Alphabet &a)
++{
++ wstring retval = L"[";
++
++ for(unsigned int i=0; i<state.size(); i++) {
++ vector<int>* seq = state.at(i).sequence;
++ if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++) {
++ wstring ws = L"";
++ a.getSymbol(ws, seq->at(j));
++ //if(ws == L"") ws = L"?";
++ retval.append(ws);
++ }
++
++ /*Node *where = state.at(i).where;
++ if(where == NULL) retval.append(L"→@null");
++ else {
++ retval.append(L"→");
++ map<int, Dest>::iterator it;
++ wstring ws;
++ for (it = where->transitions.begin(); it != where->transitions.end(); it++) {
++ int symbol = (*it).first;
++ a.getSymbol(ws, symbol);
++ retval.append(ws);
++ }
++ }*/
++ if (i+1 < state.size()) retval.append(L", ");
++ }
++ retval.append(L"]");
++ return retval;
++}
++
+Index: lttoolbox/alphabet.cc
+===================================================================
+--- lttoolbox/alphabet.cc (revision 21745)
++++ lttoolbox/alphabet.cc (working copy)
+@@ -221,3 +221,9 @@
+ {
+ return spairinv[code];
+ }
++
++
++void Alphabet::setSymbol(int symbol, wstring newSymbolString) {
++ //Should be a special character!
++ if (symbol < 0) slexicinv[-symbol-1] = newSymbolString;
++}
+Index: lttoolbox/lt-tmxproc.1
+===================================================================
+--- lttoolbox/lt-tmxproc.1 (revision 21745)
++++ lttoolbox/lt-tmxproc.1 (working copy)
+@@ -30,5 +30,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/lt-comp.1
+===================================================================
+--- lttoolbox/lt-comp.1 (revision 21745)
++++ lttoolbox/lt-comp.1 (working copy)
+@@ -10,10 +10,30 @@
+ .SH SYNOPSIS
+ .B lt-comp
+ [
++.B \-a \fR|
++.B \-v \fR|
++.B \-l \fR|
++.B \-r \fR|
++.B \-h
++]
++[
+ .B lr \fR|
+ .B rl
+ ] dictionary_file output_file
+ .PP
++.B lt-comp
++[
++.B \-\-alt \fR|
++.B \-\-var \fR|
++.B \-\-var\-left \fR|
++.B \-\-var\-right \fR|
++.B \-\-help
++]
++[
++.B lr \fR|
++.B rl
++] dictionary_file output_file
++.PP
+ .SH DESCRIPTION
+ .BR lt-comp
+ Is the application responsible of compiling dictionaries used by
+@@ -23,6 +43,32 @@
+ .PP
+ .SH OPTIONS
+ .TP
++.B \-a, \-\-alt
++Sets the value of the \fIalt\fR attribute to use in compilation.
++
++Note that if no value is set, all entries containing an \fIalt\fR
++attribute are omitted.
++.TP
++.B \-v, \-\-var
++Sets the value of the \fIv\fR attribute to use in compilation.
++This should only be used with monodixes; for bidixes, see \-l and \-r.
++
++Note that if no value is set, all entries containing a \fIv\fR
++attribute are considered to be \fIleft-to-right\fR.
++.TP
++.B \-l, \-\-var\-left
++Sets the value of the \fIvl\fR attribute for use in compilation of bidixes.
++"Left" here refers to the side of the dictionary, so this option is only valid
++in \fIrl\fR mode.
++.TP
++.B \-r, \-\-var\-right
++Sets the value of the \fIvr\fR attribute for use in compilation of bidixes.
++"Right" here refers to the side of the dictionary, so this option is only valid
++in \fIlr\fR mode.
++.TP
++.B \-h, \-\-help
++Prints a short help message
++.TP
+ .B lr
+ The resulting transducer will process dictionary entries
+ \fIleft-to-right\fR.
+@@ -45,5 +91,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/lt_locale.h
+===================================================================
+--- lttoolbox/lt_locale.h (revision 21745)
++++ lttoolbox/lt_locale.h (working copy)
+@@ -16,6 +16,7 @@
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
++
+ #ifndef _MYLOCALE_
+ #define _MYLOCALE_
+
+Index: lttoolbox/expander.h
+===================================================================
+--- lttoolbox/expander.h (revision 21745)
++++ lttoolbox/expander.h (working copy)
+@@ -42,6 +42,26 @@
+ xmlTextReaderPtr reader;
+
+ /**
++ * The alt value
++ */
++ wstring alt;
++
++ /**
++ * The variant value (monodix)
++ */
++ wstring variant;
++
++ /**
++ * The variant value (left side of bidix)
++ */
++ wstring variant_left;
++
++ /**
++ * The variant value (right side of bidix)
++ */
++ wstring variant_right;
++
++ /**
+ * The paradigm being compiled
+ */
+ wstring current_paradigm;
+@@ -186,6 +206,29 @@
+ * Compile dictionary to letter transducers
+ */
+ void expand(string const &fichero, FILE *output);
++ /**
++ * Set the alt value to use in compilation
++ * @param a the value
++ */
++ void setAltValue(string const &a);
++
++ /**
++ * Set the variant value to use in expansion
++ * @param v the value
++ */
++ void setVariantValue(string const &v);
++
++ /**
++ * Set the variant_left value to use in expansion
++ * @param v the value
++ */
++ void setVariantLeftValue(string const &v);
++
++ /**
++ * Set the variant_right value to use in expansion
++ * @param v the value
++ */
++ void setVariantRightValue(string const &v);
+ };
+
+
+Index: lttoolbox/transducer.cc
+===================================================================
+--- lttoolbox/transducer.cc (revision 21745)
++++ lttoolbox/transducer.cc (working copy)
+@@ -18,6 +18,7 @@
+ */
+ #include <lttoolbox/transducer.h>
+ #include <lttoolbox/compression.h>
++#include <lttoolbox/alphabet.h>
+ #include <lttoolbox/lttoolbox_config.h>
+ #include <lttoolbox/my_stdio.h>
+
+@@ -187,6 +188,13 @@
+ void
+ Transducer::setFinal(int const state, bool valor)
+ {
++ int initial_copy = getInitial();
++/*
++ if(state == initial_copy)
++ {
++ wcerr << L"Setting initial state to final" << endl;
++ }
++*/
+ if(valor)
+ {
+ finals.insert(state);
+@@ -609,3 +617,119 @@
+ finals.clear();
+ finals.insert(tmp);
+ }
++
++void
++Transducer::show(Alphabet &alphabet, FILE *output, int const epsilon_tag)
++{
++ joinFinals(epsilon_tag);
++
++ map<int, multimap<int, int> > temporal;
++
++ for(map<int, multimap<int, int> >::iterator it = transitions.begin(); it != transitions.end(); it++)
++ {
++ multimap<int, int> aux = it->second;
++
++ for(multimap<int, int>::iterator it2 = aux.begin(); it2 != aux.end(); it2++)
++ {
++ pair<int, int> t = alphabet.decode(it2->first);
++ fwprintf(output, L"%d\t", it->first);
++ fwprintf(output, L"%d\t", it2->second);
++ wstring l = L"";
++ alphabet.getSymbol(l, t.first);
++ if(l == L"") // If we find an epsilon
++ {
++ fwprintf(output, L"ε\t", l.c_str());
++ }
++ else
++ {
++ fwprintf(output, L"%S\t", l.c_str());
++ }
++ wstring r = L"";
++ alphabet.getSymbol(r, t.second);
++ if(r == L"") // If we find an epsilon
++ {
++ fwprintf(output, L"ε\t", r.c_str());
++ }
++ else
++ {
++ fwprintf(output, L"%S\t", r.c_str());
++ }
++ fwprintf(output, L"\n");
++ }
++ }
++
++ for(set<int>::iterator it3 = finals.begin(); it3 != finals.end(); it3++)
++ {
++ fwprintf(output, L"%d\n", *it3);
++ }
++}
++
++int
++Transducer::getStateSize(int const state)
++{
++ set<int> states;
++ set<int> myclosure1 = closure(state, 0);
++ states.insert(myclosure1.begin(), myclosure1.end());
++ int num_transitions = 0;
++
++ for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
++ {
++ num_transitions += transitions[*it2].size();
++ }
++
++ return num_transitions;
++}
++
++bool
++Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
++{
++ bool accepted = false;
++ set<int> states ;
++
++ set<int> myclosure1 = closure(getInitial(), 0);
++ states.insert(myclosure1.begin(), myclosure1.end());
++ // For each of the characters in the input string
++ for(wstring::iterator it = patro.begin(); it != patro.end(); it++)
++ {
++ set<int> new_state; //Transducer::closure(int const state, int const epsilon_tag)
++ int sym = *it;
++ // For each of the current alive states
++ //fwprintf(err, L"step: %S %C (%d)\n", patro.c_str(), *it, sym);
++ for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
++ {
++ multimap<int, int> p = transitions[*it2];
++ // For each of the transitions in the state
++
++ for(multimap<int, int>::iterator it3 = p.begin(); it3 != p.end(); it3++)
++ {
++
++ pair<int, int> t = a.decode(it3->first);
++ wstring l = L"";
++ a.getSymbol(l, t.first);
++ //wstring r = L"";
++ //a.getSymbol(r, t.second);
++
++ //fwprintf(err, L" -> state: %d, trans: %S:%S, targ: %d\n", *it2, (l == L"") ? L"ε" : l.c_str(), (r == L"") ? L"ε" : r.c_str(), it3->second);
++ //if(l.find(*it) != wstring::npos || l == L"" )
++ if(l.find(*it) != wstring::npos)
++ {
++ set<int> myclosure = closure(it3->second, 0);
++ //wcerr << L"Before closure alives: " <<new_state.size() << endl;
++ new_state.insert(myclosure.begin(), myclosure.end());
++ //wcerr << L"After closure alives: " <<new_state.size() << endl;
++ }
++ }
++ }
++ states = new_state;
++ }
++ for(set<int>::iterator it4 = states.begin(); it4 != states.end(); it4++)
++ {
++ if(isFinal(*it4))
++ {
++ accepted = true;
++ }
++ }
++
++ return accepted;
++}
++
+Index: lttoolbox/pool.h
+===================================================================
+--- lttoolbox/pool.h (revision 21745)
++++ lttoolbox/pool.h (working copy)
+@@ -1,175 +0,0 @@
+-/*
+- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License as
+- * published by the Free Software Foundation; either version 2 of the
+- * License, or (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- * General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+- * 02111-1307, USA.
+- */
+-#ifndef _GENERIC_POOL_
+-#define _GENERIC_POOL_
+-
+-#include <list>
+-
+-using namespace std;
+-
+-/**
+- * Pool of T objects
+- */
+-template <class T>
+-class Pool
+-{
+-private:
+- /**
+- * Free pointers to objects
+- */
+- list<T *> free;
+-
+- /**
+- * Currently created objects
+- */
+- list<T> created;
+-
+- /**
+- * copy method
+- * @param other pool object
+- */
+- void copy(Pool const &p)
+- {
+- created = p.created;
+-
+- // all new members are available
+- for(typename list<T>::iterator it = created.begin(), limit = created.end();
+- it != limit; it++)
+- {
+- free.push_back(&(*it));
+- }
+- }
+-
+- /**
+- * destroy method
+- */
+- void destroy()
+- {
+- // do nothing
+- }
+-
+- /**
+- * Allocate a pool of nelems size
+- * @param nelems initial size of the pool
+- */
+- void init(unsigned int const nelems)
+- {
+- created.clear();
+- free.clear();
+- T tmp;
+- for(unsigned int i = 0; i != nelems; i++)
+- {
+- created.push_front(tmp);
+- free.push_front(&(*(created.begin())));
+- }
+- }
+-
+- /**
+- * Allocate a pool of nelems size with objects equal to 'object'
+- * @param nelems initial size of the pool
+- * @param object initial value of the objects in the pool
+- */
+- void init(unsigned int const nelems, T const &object)
+- {
+- created.clear();
+- free.clear();
+- for(unsigned int i = 0; i != nelems; i++)
+- {
+- created.push_front(object);
+- free.push_front(&(*(created.begin())));
+- }
+- }
+-
+-
+-public:
+-
+- /**
+- * Constructor
+- */
+- Pool()
+- {
+- init(1);
+- }
+-
+- /**
+- * Parametrized constructor
+- * @param nelems initial size of the pool
+- * @param object initial value of the objects in the pool
+- */
+- Pool(unsigned int const nelems, T const &object)
+- {
+- init(nelems, object);
+- }
+-
+- /**
+- * Parametrized constructor
+- * @param nelems initial size of the pool
+- */
+- Pool(unsigned int const nelems)
+- {
+- init(nelems);
+- }
+-
+- /**
+- * Destructor
+- */
+- ~Pool()
+- {
+- destroy();
+- }
+-
+- /**
+- * Copy constructor
+- */
+- Pool(Pool const &p)
+- {
+- copy(p);
+- }
+-
+- /**
+- * Allocate a pointer to a free 'new' object.
+- * @return pointer to the object
+- */
+- T * get()
+- {
+- if(free.size() != 0)
+- {
+- T *result = *(free.begin());
+- free.erase(free.begin());
+- return result;
+- }
+- else
+- {
+- T tmp;
+- created.push_front(tmp);
+- return &(*(created.begin()));
+- }
+- }
+-
+- /**
+- * Release a no more needed instance of a pooled object
+- * @param item the no more needed instance of the object
+- */
+- void release(T *item)
+- {
+- free.push_front(item);
+- }
+-};
+-
+-#endif
+Index: lttoolbox/compiler.h
+===================================================================
+--- lttoolbox/compiler.h (revision 21745)
++++ lttoolbox/compiler.h (working copy)
+@@ -44,6 +44,26 @@
+ xmlTextReaderPtr reader;
+
+ /**
++ * The alt value
++ */
++ wstring alt;
++
++ /**
++ * The variant value (monodix)
++ */
++ wstring variant;
++
++ /**
++ * The variant value (left side of bidix)
++ */
++ wstring variant_left;
++
++ /**
++ * The variant value (right side of bidix)
++ */
++ wstring variant_right;
++
++ /**
+ * The paradigm being compiled
+ */
+ wstring current_paradigm;
+@@ -65,6 +85,16 @@
+ wstring letters;
+
+ /**
++ * Set verbose mode: warnings which may or may not be correct
++ */
++ bool verbose;
++
++ /**
++ * First element (of an entry)
++ */
++ bool first_element;
++
++ /**
+ * Identifier of all the symbols during the compilation
+ */
+ Alphabet alphabet;
+@@ -264,10 +294,14 @@
+ static wstring const COMPILER_LEMMA_ATTR;
+ static wstring const COMPILER_IGNORE_ATTR;
+ static wstring const COMPILER_IGNORE_YES_VAL;
++ static wstring const COMPILER_ALT_ATTR;
++ static wstring const COMPILER_V_ATTR;
++ static wstring const COMPILER_VL_ATTR;
++ static wstring const COMPILER_VR_ATTR;
+
+
+ /**
+- * Copnstructor
++ * Constructor
+ */
+ Compiler();
+
+@@ -292,6 +326,35 @@
+ * @param fd the stream where write the result
+ */
+ void write(FILE *fd);
++
++ /**
++ * Set verbose output
++ */
++ void setVerbose(bool verbosity = false);
++
++ /**
++ * Set the alt value to use in compilation
++ * @param a the value
++ */
++ void setAltValue(string const &a);
++
++ /**
++ * Set the variant value to use in compilation
++ * @param v the value
++ */
++ void setVariantValue(string const &v);
++
++ /**
++ * Set the variant_left value to use in compilation
++ * @param v the value
++ */
++ void setVariantLeftValue(string const &v);
++
++ /**
++ * Set the variant_right value to use in compilation
++ * @param v the value
++ */
++ void setVariantRightValue(string const &v);
+ };
+
+
+Index: lttoolbox/lt-tmxcomp.1
+===================================================================
+--- lttoolbox/lt-tmxcomp.1 (revision 21745)
++++ lttoolbox/lt-tmxcomp.1 (working copy)
+@@ -38,5 +38,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/alphabet.h
+===================================================================
+--- lttoolbox/alphabet.h (revision 21745)
++++ lttoolbox/alphabet.h (working copy)
+@@ -145,6 +145,13 @@
+ */
+ bool isTag(int const symbol) const;
+
++ /**
++ * Sets an already existing symbol to represent a new value
++ * @param symbol the code of the symbol to set
++ * @param newSymbolString the new string for this symbol
++ */
++ void setSymbol(int symbol, wstring newSymbolString);
++
+ pair<int, int> const & decode(int const code) const;
+
+ };
+Index: lttoolbox/state.h
+===================================================================
+--- lttoolbox/state.h (revision 21745)
++++ lttoolbox/state.h (working copy)
+@@ -19,6 +19,7 @@
+ #ifndef _STATE_
+ #define _STATE_
+
++#include <map>
+ #include <set>
+ #include <string>
+ #include <vector>
+@@ -26,7 +27,9 @@
+
+ #include <lttoolbox/alphabet.h>
+ #include <lttoolbox/node.h>
+-#include <lttoolbox/pool.h>
++#include <lttoolbox/match_exe.h>
++#include <lttoolbox/match_state.h>
++#include <lttoolbox/transducer.h>
+
+ using namespace std;
+
+@@ -43,7 +46,7 @@
+ {
+ Node *where;
+ vector<int> *sequence;
+- bool dirty;
++ bool dirty; // What does "dirty" mean ?
+
+ TNodeState(Node * const &w, vector<int> * const &s, bool const &d): where(w), sequence(s), dirty(d){}
+ TNodeState & operator=(TNodeState const &other)
+@@ -58,17 +61,6 @@
+ vector<TNodeState> state;
+
+ /**
+- * Pool of wchar_t vectors, for efficience (static class)
+- */
+- Pool<vector<int> > *pool;
+-
+- /**
+- * Copy function
+- * @param s the state to be copied
+- */
+- void copy(State const &s);
+-
+- /**
+ * Destroy function
+ */
+ void destroy();
+@@ -86,6 +78,8 @@
+ */
+ void apply(int const input, int const alt);
+
++ void apply(int const input, int const alt1, int const alt2);
++
+ /**
+ * Calculate the epsilon closure over the current state, replacing
+ * its content.
+@@ -92,11 +86,21 @@
+ */
+ void epsilonClosure();
+
++ bool lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol);
++
+ public:
++
+ /**
++ * Copy function
++ * @param s the state to be copied
++ */
++ void copy(State const &s);
++
++
++ /**
+ * Constructor
+ */
+- State(Pool<vector<int> > *);
++ State();
+
+ /**
+ * Destructor
+@@ -135,6 +139,13 @@
+ */
+ void step(int const input, int const alt);
+
++ void step(int const input, int const alt1, int const alt2);
++
++ void step_case(wchar_t val, bool caseSensitive);
++
++ void step_case(wchar_t val, wchar_t val2, bool caseSensitive);
++
++
+ /**
+ * Init the state with the initial node and empty output
+ * @param initial the initial node of the transducer
+@@ -142,6 +153,21 @@
+ void init(Node *initial);
+
+ /**
++ * Remove states not containing a specific symbol in their last 'part', and states
++ * with more than a number of 'parts'
++ * @param requieredSymbol the symbol requiered in the last part
++ * @param separationSymbol the symbol that represent the separation between two parts
++ * @param compound_max_elements the maximum part number allowed
++ */
++ void pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements);
++
++ /**
++ * Remove states containing a forbidden symbol
++ * @param forbiddenSymbol the symbol forbidden
++ */
++ void pruneStatesWithForbiddenSymbol(int forbiddenSymbol);
++
++ /**
+ * Print all outputs of current parsing, preceded by a bar '/',
+ * from the final nodes of the state
+ * @param finals the set of final nodes
+@@ -156,8 +182,8 @@
+ wstring filterFinals(set<Node *> const &finals, Alphabet const &a,
+ set<wchar_t> const &escaped_chars,
+ bool uppercase = false,
+- bool firstupper = false,
+- int firstchar = 0) const;
++ bool firstupper = false,
++ int firstchar = 0) const;
+
+ /**
+ * Same as previous one, but the output is adapted to the SAO system
+@@ -173,11 +199,44 @@
+ wstring filterFinalsSAO(set<Node *> const &finals, Alphabet const &a,
+ set<wchar_t> const &escaped_chars,
+ bool uppercase = false,
+- bool firstupper = false,
+- int firstchar = 0) const;
++ bool firstupper = false,
++ int firstchar = 0) const;
+
+
+ /**
++ * Same as previous one, but the output is adapted to the LRX system
++ * @param finals the set of final nodes
++ * @param a the alphabet to decode strings
++ * @param escaped_chars the set of chars to be preceded with one
++ * backslash
++ * @param uppercase true if the word is uppercase
++ * @param firstupper true if the first letter of a word is uppercase
++ * @param firstchar first character of the word
++ * @return the result of the transduction
++ */
++
++ set<pair<wstring, vector<wstring> > > filterFinalsLRX(set<Node *> const &finals, Alphabet const &a,
++ set<wchar_t> const &escaped_chars,
++ bool uppercase = false,
++ bool firstupper = false,
++ int firstchar = 0) const;
++
++
++
++
++
++ /**
++ * Find final states, remove those that not has a requiredSymbol and 'restart' each of them as the
++ * set of initial states, but remembering the sequence and adding a separationSymbol
++ * @param finals
++ * @param requiredSymbol
++ * @param restart_state
++ * @param separationSymbol
++ */
++ void restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol);
++
++
++ /**
+ * Returns true if at least one record of the state references a
+ * final node of the set
+ * @param finals set of final nodes @return
+@@ -185,6 +244,11 @@
+ */
+ bool isFinal(set<Node *> const &finals) const;
+
++ /**
++ * Return the full states string (to allow debuging...) using a Java ArrayList.toString style
++ */
++ wstring getReadableString(const Alphabet &a);
++
+ wstring filterFinalsTM(set<Node *> const &finals,
+ Alphabet const &alphabet,
+ set<wchar_t> const &escaped_chars,
+Index: lttoolbox/Makefile.am
+===================================================================
+--- lttoolbox/Makefile.am (revision 21745)
++++ lttoolbox/Makefile.am (working copy)
+@@ -2,7 +2,7 @@
+ h_sources = alphabet.h buffer.h compiler.h compression.h \
+ entry_token.h expander.h fst_processor.h lt_locale.h ltstr.h \
+ match_exe.h match_node.h match_state.h my_stdio.h node.h \
+- pattern_list.h pool.h regexp_compiler.h sorted_vector.h state.h \
++ pattern_list.h regexp_compiler.h sorted_vector.h state.h \
+ transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h
+ cc_sources = alphabet.cc compiler.cc compression.cc entry_token.cc \
+ expander.cc fst_processor.cc lt_locale.cc match_exe.cc \
+@@ -13,7 +13,7 @@
+ library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
+ library_include_HEADERS = $(h_sources)
+
+-bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc
++bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc lt-print
+ instdir = lttoolbox
+
+ lib_LTLIBRARIES= liblttoolbox3.la
+@@ -26,6 +26,10 @@
+
+ lttoolbox_DATA = dix.dtd
+
++lt_print_SOURCES = lt_print.cc
++lt_print_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
++lt_print_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
++
+ lt_comp_SOURCES = lt_comp.cc
+ lt_comp_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
+ lt_comp_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
+@@ -46,8 +50,18 @@
+ lt_tmxproc_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
+ lt_tmxproc_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
+
+-man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1
++#lt-validate-dictionary: Makefile.am validate-header.sh
++# @echo "Creating lt-validate-dictionary script"
++# @echo "#!$(BASH)" > $@
++# @cat validate-header.sh >> $@
++# @echo "$(XMLLINT) --dtdvalid $(apertiumdir)/dix.dtd --noout \$$FILE1 && exit 0;" >> $@
++# @echo "exit 1;" >> $@
++# @chmod a+x $@
+
++
++
++man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1
++
+ INCLUDES = -I$(top_srcdir) $(LTTOOLBOX_CFLAGS)
+ CLEANFILES = *~
+
+Index: lttoolbox/lt-print.1
+===================================================================
+--- lttoolbox/lt-print.1 (revision 0)
++++ lttoolbox/lt-print.1 (revision 44914)
+@@ -0,0 +1,34 @@
++.TH lt-print 1 2006-03-08 "" ""
++.SH NAME
++lt-print \- This application is part of the lexical processing modules
++and tools (
++.B lttoolbox
++)
++.PP
++This tool is part of the apertium machine translation
++architecture: \fBhttp://www.apertium.org\fR.
++.SH SYNOPSIS
++.B lt-print
++ bin_file
++.PP
++.SH DESCRIPTION
++.BR lt-print
++Is the application responsible for printing compiled dictionaries in
++ATT format.
++.PP
++.B bin_file
++The compiled input file .
++.PP
++.B output_file
++The transducer in ATT format .
++
++.SH SEE ALSO
++.I lt-comp\fR(1),
++.I lt-proc\fR(1),
++.I lt-expand\fR(1),
++.I apertium-tagger\fR(1),
++.I apertium\fR(1).
++.SH BUGS
++Lots of...lurking in the dark and waiting for you!
++.SH AUTHOR
++(c) 2005--2012 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/lt_print.cc
+===================================================================
+--- lttoolbox/lt_print.cc (revision 0)
++++ lttoolbox/lt_print.cc (revision 44914)
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <lttoolbox/transducer.h>
++#include <lttoolbox/compression.h>
++#include <lttoolbox/lttoolbox_config.h>
++
++#include <lttoolbox/my_stdio.h>
++#include <lttoolbox/lt_locale.h>
++
++#include <cstdlib>
++#include <iostream>
++#include <libgen.h>
++#include <string>
++
++using namespace std;
++
++void endProgram(char *name)
++{
++ if(name != NULL)
++ {
++ cout << basename(name) << " v" << PACKAGE_VERSION <<": dump a transducer to text in ATT format" << endl;
++ cout << "USAGE: " << basename(name) << " bin_file " << endl;
++ }
++ exit(EXIT_FAILURE);
++}
++
++
++int main(int argc, char *argv[])
++{
++ if(argc != 2)
++ {
++ endProgram(argv[0]);
++ }
++
++ LtLocale::tryToSetLocale();
++
++
++ FILE *input = fopen(argv[1], "r");
++
++ Alphabet new_alphabet;
++ set<wchar_t> alphabetic_chars;
++
++ map<wstring, Transducer> transducers;
++
++ // letters
++ int len = Compression::multibyte_read(input);
++ while(len > 0)
++ {
++ alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
++ len--;
++ }
++
++ // symbols
++ new_alphabet.read(input);
++
++ len = Compression::multibyte_read(input);
++
++ while(len > 0)
++ {
++ int len2 = Compression::multibyte_read(input);
++ wstring name = L"";
++ while(len2 > 0)
++ {
++ name += static_cast<wchar_t>(Compression::multibyte_read(input));
++ len2--;
++ }
++ transducers[name].read(input);
++
++ len--;
++ }
++
++ /////////////////////
++
++ FILE *output = stdout;
++ map<wstring, Transducer>::iterator penum = transducers.end();
++ penum--;
++ for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
++ {
++ //it->second.minimize();
++ it->second.show(new_alphabet, output);
++ if(it != penum)
++ {
++ fwprintf(output, L"--\n", it->first.c_str());
++ }
++ }
++
++ fclose(input);
++
++ return 0;
++}
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/lttoolbox.git/commitdiff/4306e092da881c50b399a1865e0713d856e4a0ec
More information about the pld-cvs-commit
mailing list