[packages/lttoolbox] - updated to 3.3.1 (note: soname changed) - removed outdated svn and soname patches

qboosh qboosh at pld-linux.org
Fri Dec 25 09:33:22 CET 2015


commit 78ddaac8fc4a3cd5335057d9c391f686cfcf68c7
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date:   Fri Dec 25 09:34:54 2015 +0100

    - updated to 3.3.1 (note: soname changed)
    - removed outdated svn and soname patches

 lttoolbox-soname.patch      |   11 -
 lttoolbox-svn20130412.patch | 3112 -------------------------------------------
 lttoolbox.spec              |   26 +-
 3 files changed, 11 insertions(+), 3138 deletions(-)
---
diff --git a/lttoolbox.spec b/lttoolbox.spec
index 72b4a69..0a7eae4 100644
--- a/lttoolbox.spec
+++ b/lttoolbox.spec
@@ -1,17 +1,13 @@
 Summary:	Augmented letter transducer tools for natural language processing
 Summary(pl.UTF-8):	Narzędzia do przetwarzania słów w językach naturalnych
 Name:		lttoolbox
-Version:	3.2.0
-%define	subver	svn20130412
-%define	rel	1
-Release:	2.%{subver}.1
+Version:	3.3.1
+Release:	1
 License:	GPL v2+
 Group:		Applications/Text
 Source0:	http://downloads.sourceforge.net/apertium/%{name}-%{version}.tar.gz
-# Source0-md5:	708e7de837ed363f7103035ef2849fe4
-Patch0:		%{name}-svn20130412.patch
-Patch1:		%{name}-soname.patch
-Patch2:		%{name}-opt.patch
+# Source0-md5:	d50479b2376a4839b7acac352505623e
+Patch0:		%{name}-opt.patch
 URL:		http://wiki.apertium.org/wiki/Lttoolbox
 BuildRequires:	autoconf >= 2.52
 BuildRequires:	automake
@@ -62,9 +58,7 @@ Statyczna biblioteka lttoolbox.
 
 %prep
 %setup -q
-%patch0 -p0
-%patch1 -p1
-%patch2 -p1
+%patch0 -p1
 
 %build
 %{__libtoolize}
@@ -97,8 +91,9 @@ rm -rf $RPM_BUILD_ROOT
 %attr(755,root,root) %{_bindir}/lt-proc
 %attr(755,root,root) %{_bindir}/lt-tmxcomp
 %attr(755,root,root) %{_bindir}/lt-tmxproc
-%attr(755,root,root) %{_libdir}/liblttoolbox3-3.2.so.*.*.*
-%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.1
+%attr(755,root,root) %{_bindir}/lt-trim
+%attr(755,root,root) %{_libdir}/liblttoolbox3-3.3.so.*.*.*
+%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.3.so.0
 %{_datadir}/lttoolbox
 %{_mandir}/man1/lt-comp.1*
 %{_mandir}/man1/lt-expand.1*
@@ -106,13 +101,14 @@ rm -rf $RPM_BUILD_ROOT
 %{_mandir}/man1/lt-proc.1*
 %{_mandir}/man1/lt-tmxcomp.1*
 %{_mandir}/man1/lt-tmxproc.1*
+%{_mandir}/man1/lt-trim.1*
 
 %files devel
 %defattr(644,root,root,755)
 %attr(755,root,root) %{_libdir}/liblttoolbox3.so
 %{_libdir}/liblttoolbox3.la
-%{_includedir}/lttoolbox-3.2
-%{_pkgconfigdir}/lttoolbox-3.2.pc
+%{_includedir}/lttoolbox-3.3
+%{_pkgconfigdir}/lttoolbox.pc
 
 %files static
 %defattr(644,root,root,755)
diff --git a/lttoolbox-soname.patch b/lttoolbox-soname.patch
deleted file mode 100644
index 973caac..0000000
--- a/lttoolbox-soname.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- lttoolbox-3.2.0/configure.ac.orig	2013-06-26 16:15:39.881717927 +0200
-+++ lttoolbox-3.2.0/configure.ac	2013-06-26 16:23:06.398365855 +0200
-@@ -23,7 +23,7 @@
- AC_SUBST(GENERIC_MAJOR_VERSION)
- 
- # Shared library versioning
--GENERIC_LIBRARY_VERSION=0:0:0
-+GENERIC_LIBRARY_VERSION=1:0:0
- #                       | | |
- #                +------+ | +---+
- #                |        |     |
diff --git a/lttoolbox-svn20130412.patch b/lttoolbox-svn20130412.patch
deleted file mode 100644
index 71dc646..0000000
--- a/lttoolbox-svn20130412.patch
+++ /dev/null
@@ -1,3112 +0,0 @@
-Index: lttoolbox/lt-proc.1
-===================================================================
---- lttoolbox/lt-proc.1	(revision 21745)
-+++ lttoolbox/lt-proc.1	(working copy)
-@@ -12,7 +12,9 @@
- [
- .B \-a \fR| 
- .B \-b \fR| 
-+.B \-o \fR| 
- .B \-c \fR| 
-+.B \-d \fR| 
- .B \-e \fR| 
- .B \-g \fR|
- .B \-n \fR|
-@@ -29,7 +31,10 @@
- [
- .B \-\-analysis \fR| 
- .B \-\-bilingual \fR|
-+.B \-\-surf-bilingual \fR|
- .B \-\-case-sensitive \fR|
-+.B \-\-debugged-gen \fR|
-+.B \-\-decompose-nouns \fR|
- .B \-\-generation \fR|
- .B \-\-non-marked-gen \fR|
- .B \-\-tagged-gen \fR|
-@@ -98,9 +103,18 @@
- form in the source language. Works tipically with the output of
- apertium-pretransfer.
- .TP
-+.B \-o, \-\-surf-bilingual
-+As with \-b, but takes input from apertium\-tagger \-p , with
-+surface forms, and if the lexical form is not found in the bilingual
-+dictionary, it outputs the surface form of the word. 
-+.TP
-+
- .B \-c, \-\-case-sensitive
- Use the literal case of the incoming characters
- .TP
-+.B \-d, \-\-debugged-gen
-+Morph. generation with all the stuff
-+.TP
- .B \-e, \-\-decompose-compounds
- Try to treat unknown words as compounds, and decompose them.
- .TP
-@@ -154,5 +168,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
-Index: lttoolbox/fst_processor.cc
-===================================================================
---- lttoolbox/fst_processor.cc	(revision 21745)
-+++ lttoolbox/fst_processor.cc	(working copy)
-@@ -44,14 +44,17 @@
-   
-   caseSensitive = false;
-   dictionaryCase = false;
--  compoundDecomposition = false;
-+  do_decomposition = false;
-   nullFlush = false;
-   nullFlushGeneration = false;
-+  showControlSymbols = false;
-+  biltransSurfaceForms = false;  
-+  compoundOnlyLSymbol = 0;
-+  compoundRSymbol = 0;
-+  compound_max_elements = 4;
- 
--  pool = new Pool<vector<int> >(4, vector<int>(50));
--
--  initial_state = new State(pool);
--  current_state = new State(pool);
-+  initial_state = new State();
-+  current_state = new State();
- }
- 
- FSTProcessor::~FSTProcessor()
-@@ -58,7 +61,6 @@
- {
-   delete current_state;
-   delete initial_state;
--  delete pool;
- }
- 
- void
-@@ -408,6 +410,100 @@
-   return 0x7fffffff;
- }
- 
-+pair<wstring, int>
-+FSTProcessor::readBilingual(FILE *input, FILE *output)
-+{
-+  wint_t val = fgetwc_unlocked(input);
-+  wstring symbol = L"";
-+
-+  if(feof(input))
-+  {
-+    return pair<wstring, int>(symbol, 0x7fffffff);
-+  }
-+  
-+  if(outOfWord)
-+  {
-+    if(val == L'^')
-+    {
-+      val = fgetwc_unlocked(input);
-+      if(feof(input))
-+      {
-+        return pair<wstring, int>(symbol, 0x7fffffff);
-+      }
-+    }
-+    else if(val == L'\\')
-+    {
-+      fputwc_unlocked(val, output);
-+      val = fgetwc_unlocked(input);
-+      if(feof(input))
-+      {
-+        return pair<wstring, int>(symbol, 0x7fffffff);
-+      }
-+      fputwc_unlocked(val,output);
-+      skipUntil(input, output, L'^');
-+      val = fgetwc_unlocked(input);
-+      if(feof(input))
-+      {
-+        return pair<wstring, int>(symbol, 0x7fffffff);
-+      }
-+    }
-+    else
-+    {
-+      fputwc_unlocked(val, output);
-+      skipUntil(input, output, L'^');
-+      val = fgetwc_unlocked(input);
-+      if(feof(input))
-+      {
-+        return pair<wstring, int>(symbol, 0x7fffffff);
-+      }
-+    }
-+    outOfWord = false;
-+  }
-+
-+  if(val == L'\\')
-+  {
-+    val = fgetwc_unlocked(input);
-+    return pair<wstring, int>(symbol, val);
-+  }
-+  else if(val == L'$')
-+  {
-+    outOfWord = true;
-+    return pair<wstring, int>(symbol, static_cast<int>(L'$'));
-+  }
-+  else if(val == L'<')
-+  {
-+    wstring cad = L"";
-+    cad += static_cast<wchar_t>(val);
-+    while((val = fgetwc_unlocked(input)) != L'>')
-+    {
-+      if(feof(input))
-+      {
-+	streamError();
-+      }
-+      cad += static_cast<wchar_t>(val);
-+    }
-+    cad += static_cast<wchar_t>(val);
-+
-+    int res = alphabet(cad);
-+    
-+    if (res == 0) {
-+	    symbol = cad;
-+    }
-+    return pair<wstring, int>(symbol, res);
-+  }
-+  else if(val == L'[')
-+  {
-+    fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
-+    return readBilingual(input, output);
-+  }
-+  else
-+  {
-+    return pair<wstring, int>(symbol, val);
-+  }
-+
-+  return pair<wstring, int>(symbol, 0x7fffffff);
-+}
-+
- void
- FSTProcessor::flushBlanks(FILE *output)
- {  
-@@ -494,6 +590,27 @@
- }
- 
- void
-+FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
-+{
-+  for(unsigned int i = 0, limit = str.size(); i < limit; i++)
-+  {    
-+    if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
-+    {
-+      fputws_unlocked(str.substr(i).c_str(), output);
-+      return;
-+    }
-+
-+    if(escaped_chars.find(str[i]) != escaped_chars.end())
-+    {
-+      fputwc_unlocked(L'\\', output);
-+    }
-+    fputwc_unlocked(str[i], output);
-+  } 
-+}
-+
-+
-+
-+void
- FSTProcessor::printWord(wstring const &sf, wstring const &lf, FILE *output)
- {
-   fputwc_unlocked(L'^', output);
-@@ -642,7 +759,86 @@
-   initGeneration();
- }
- 
-+
- wstring
-+FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupper) {
-+    const int MAX_COMBINATIONS = 500;
-+    //wcerr << L"compoundAnalysis(input_word = " << input_word << L")" << endl;
-+
-+    State current_state = *initial_state;
-+
-+    for(unsigned int i=0; i<input_word.size(); i++) {
-+        wchar_t val=input_word.at(i);
-+
-+        //wcerr << val << L" før step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
-+        current_state.step_case(val, caseSensitive);
-+        
-+        if(current_state.size() > MAX_COMBINATIONS) {
-+            wcerr << L"Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << L"'" << endl;
-+            wcerr << L"         gave up at char " << i << L" '" << val << L"'." << endl;
-+
-+            wstring nullString = L"";
-+            return  nullString;
-+        }
-+
-+        //wcerr << val << L" eft step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
-+
-+        if(i < input_word.size()-1)
-+            current_state.restartFinals(all_finals, compoundOnlyLSymbol, initial_state, '+');
-+        
-+        //wcerr << val << " eft rest " << i << " current_state = " << current_state.getReadableString(alphabet) << endl;
-+        //wcerr << i << " result = "  << current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper) << endl;
-+        //wcerr << i << " -- size = " << current_state.size() << endl;
-+
-+        if(current_state.size()==0) {
-+            wstring nullString = L"";
-+            return nullString;
-+        }
-+    }
-+
-+    current_state.pruneCompounds(compoundRSymbol, '+', compound_max_elements);
-+    wstring result = current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper);
-+    //wcerr << L"rrresult = " << result << endl;
-+    
-+    return result;
-+}
-+
-+
-+
-+void 
-+FSTProcessor::initDecompositionSymbols() {
-+  if ((compoundOnlyLSymbol=alphabet(L"<:co:only-L>")) == 0
-+     && (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0
-+     && (compoundOnlyLSymbol=alphabet(L"<@co:only-L>")) == 0
-+     && (compoundOnlyLSymbol=alphabet(L"<@compound:only-L>")) == 0
-+     && (compoundOnlyLSymbol=alphabet(L"<compound-only-L>")) == 0)
-+  {
-+    wcerr << L"Warning: Decomposition symbol <:compound:only-L> not found" << endl;
-+  }
-+  else if (!showControlSymbols)
-+      alphabet.setSymbol(compoundOnlyLSymbol, L"");
-+
-+  if ((compoundRSymbol=alphabet(L"<:co:R>")) == 0
-+     && (compoundRSymbol=alphabet(L"<:compound:R>")) == 0
-+     && (compoundRSymbol=alphabet(L"<@co:R>")) == 0
-+     && (compoundRSymbol=alphabet(L"<@compound:R>")) == 0
-+     && (compoundRSymbol=alphabet(L"<compound-R>")) == 0) 
-+  {
-+    wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl;
-+  }
-+  else if (!showControlSymbols)
-+      alphabet.setSymbol(compoundRSymbol, L"");
-+}
-+
-+
-+void 
-+FSTProcessor::initDecomposition() {
-+  do_decomposition = true;
-+  initAnalysis();
-+  initDecompositionSymbols();
-+}
-+
-+/*wstring
- FSTProcessor::decompose(wstring w) 
- {
-         State current_state = *initial_state;
-@@ -807,7 +1003,7 @@
-         }
-         //wcerr << L"+ decompose: " << lf << endl;
-         return lf;
--}
-+}*/
- 
- void
- FSTProcessor::analysis(FILE *input, FILE *output)
-@@ -839,6 +1035,10 @@
-           uppercase = firstupper && iswupper(sf[sf.size()-1]);
-         }
- 
-+        if(do_decomposition && compoundOnlyLSymbol != 0) 
-+        {
-+          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+        }
-         lf = current_state.filterFinals(all_finals, alphabet,
-                                         escaped_chars,
-                                         uppercase, firstupper);
-@@ -853,6 +1053,10 @@
-           uppercase = firstupper && iswupper(sf[sf.size()-1]);
-         }
- 
-+        if(do_decomposition && compoundOnlyLSymbol != 0) 
-+        {
-+          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+        }
-         lf = current_state.filterFinals(all_finals, alphabet,
-                                         escaped_chars,
-                                         uppercase, firstupper);
-@@ -867,6 +1071,10 @@
-           uppercase = firstupper && iswupper(sf[sf.size()-1]);
-         }
- 
-+        if(do_decomposition && compoundOnlyLSymbol != 0) 
-+        {
-+          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+        }
-         lf = current_state.filterFinals(all_finals, alphabet,
-                                         escaped_chars,
-                                         uppercase, firstupper);
-@@ -881,6 +1089,10 @@
-           uppercase = firstupper && iswupper(sf[sf.size()-1]);
-         }
- 
-+        if(do_decomposition && compoundOnlyLSymbol != 0) 
-+        {
-+          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
-+        }
-         lf = current_state.filterFinals(all_finals, alphabet, 
-                                         escaped_chars, 
-                                         uppercase, firstupper);
-@@ -969,16 +1181,22 @@
-         if(limit == 0)
-         {
-           input_buffer.back(sf.size());
--          fputwc_unlocked(sf[0], output);          
-+          writeEscaped(sf.substr(0,1), output);          
-         }
-         else
-         { 
-           input_buffer.back(1+(size-limit));
-           wstring unknown_word = sf.substr(0, limit);
--          if(compoundDecomposition) 
-+          if(do_decomposition) 
-           {
-+            if(!dictionaryCase)
-+            {
-+              firstupper = iswupper(sf[0]);
-+              uppercase = firstupper && iswupper(sf[sf.size()-1]);
-+            }
-+
-             wstring compound = L"";
--            compound = decompose(unknown_word);
-+            compound = compoundAnalysis(unknown_word, uppercase, firstupper);
-             if(compound != L"") 
-             {
-               printWord(unknown_word, compound, output);
-@@ -1002,16 +1220,22 @@
-         if(limit == 0)
-         {
-           input_buffer.back(sf.size());
--          fputwc_unlocked(sf[0], output);          
-+          writeEscaped(sf.substr(0,1), output);
-         }
-         else
-         { 
-           input_buffer.back(1+(size-limit));
-           wstring unknown_word = sf.substr(0, limit);
--          if(compoundDecomposition) 
-+          if(do_decomposition) 
-           {
-+            if(!dictionaryCase)
-+            {
-+              firstupper = iswupper(sf[0]);
-+              uppercase = firstupper && iswupper(sf[sf.size()-1]);
-+            }
-+
-             wstring compound = L"";
--            compound = decompose(unknown_word);
-+            compound = compoundAnalysis(unknown_word, uppercase, firstupper);
-             if(compound != L"") 
-             {
-               printWord(unknown_word, compound, output);
-@@ -1296,19 +1520,27 @@
-       fputwc(L'=', output);
-       val = readGeneration(input, output);
-     }
--    
-+
-     if(val == L'$' && outOfWord)
-     {
-       if(sf[0] == L'*' || sf[0] == L'%')
-       {
--	if(mode != gm_clean)
-+	if(mode != gm_clean && mode != gm_tagged_nm)
-         {
- 	  writeEscaped(sf, output);
- 	}
--	else
-+	else if (mode == gm_clean)
- 	{
- 	  writeEscaped(sf.substr(1), output);
- 	}
-+	else if(mode == gm_tagged_nm)
-+	{
-+	  fputwc_unlocked(L'^', output);        
-+	  writeEscaped(removeTags(sf.substr(1)), output);
-+	  fputwc_unlocked(L'/', output);          
-+          writeEscapedWithTags(sf, output);
-+	  fputwc_unlocked(L'$', output);
-+	}
-       }
-       else if(sf[0] == L'@')
-       {
-@@ -1324,6 +1556,18 @@
-         {
-           writeEscaped(removeTags(sf), output);
-         }
-+        else if(mode == gm_tagged)
-+        {
-+          writeEscaped(removeTags(sf), output);
-+        }
-+        else if(mode == gm_tagged_nm)
-+        { 
-+	  fputwc_unlocked(L'^', output);        
-+	  writeEscaped(removeTags(sf.substr(1)), output);
-+	  fputwc_unlocked(L'/', output);          
-+          writeEscapedWithTags(sf, output);
-+	  fputwc_unlocked(L'$', output);
-+        }
-       }
-       else if(current_state.isFinal(all_finals))
-       {
-@@ -1330,7 +1574,7 @@
-         bool uppercase = sf.size() > 1 && iswupper(sf[1]);
-         bool firstupper= iswupper(sf[0]);
- 
--        if(mode == gm_tagged)
-+        if(mode == gm_tagged || mode == gm_tagged_nm)
-         {
- 	  fputwc_unlocked(L'^', output);
-         }
-@@ -1339,10 +1583,10 @@
-                                                   escaped_chars,
-                                                   uppercase, firstupper).substr(1).c_str(),
- 						  output);
--        if(mode == gm_tagged)
-+        if(mode == gm_tagged || mode == gm_tagged_nm)
-         {
- 	  fputwc_unlocked(L'/', output);
--	  fputws_unlocked(sf.c_str(), output);
-+	  writeEscapedWithTags(sf, output);
- 	  fputwc_unlocked(L'$', output);
-         }
- 
-@@ -1360,9 +1604,26 @@
-         }
-         else if(mode == gm_unknown)
-         {
-+          if(sf != L"")
-+          {
-+            fputwc_unlocked(L'#', output);
-+            writeEscaped(removeTags(sf), output);
-+          }
-+        }
-+        else if(mode == gm_tagged)
-+        {
-           fputwc_unlocked(L'#', output);
-           writeEscaped(removeTags(sf), output);
-         }
-+        else if(mode == gm_tagged_nm)
-+        {
-+	  fputwc_unlocked(L'^', output);        
-+	  writeEscaped(removeTags(sf), output);
-+	  fputwc_unlocked(L'/', output);          
-+	  fputwc_unlocked(L'#', output);          
-+          writeEscapedWithTags(sf, output);
-+	  fputwc_unlocked(L'$', output);
-+        }
-       }
-   
-       current_state = *initial_state;
-@@ -2033,19 +2294,62 @@
-   }
- 
-   State current_state = *initial_state;
--  wstring sf = L"";
--  wstring queue = L"";
--  wstring result = L"";
-+  wstring sf = L"";		// source language analysis
-+  wstring queue = L"";		// symbols to be added to each target
-+  wstring result = L"";		// result of looking up analysis in bidix
-   
-   outOfWord = false;
-  
-   skipUntil(input, output, L'^');
--  int val;
-+  pair<wstring,int> tr;		// readBilingual return value, containing:
-+  int val;			// the alphabet value of current symbol, and
-+  wstring symbol = L"";		// the current symbol as a string
-+  bool seentags = false;  // have we seen any tags at all in the analysis?
- 
--  while((val = readGeneration(input, output)) != 0x7fffffff)
-+  bool seensurface = false;
-+  wstring surface = L"";
-+  
-+  while(true)			// ie. while(val != 0x7fffffff)
-   {
-+    tr = readBilingual(input, output);
-+    symbol = tr.first;
-+    val = tr.second;
-+
-+    //fwprintf(stderr, L"> %S : %C : %d\n", tr.first.c_str(), tr.second, tr.second);
-+    if(biltransSurfaceForms && !seensurface && !outOfWord) 
-+    {
-+      while(val != L'/' && val != 0x7fffffff) 
-+      {
-+        surface = surface + symbol; 
-+        alphabet.getSymbol(surface, val);
-+        tr = readBilingual(input, output);
-+        symbol = tr.first;
-+        val = tr.second;
-+        //fwprintf(stderr, L" == %S : %C : %d => %S\n", symbol.c_str(), val, val, surface.c_str());
-+      }
-+      seensurface = true;
-+      tr = readBilingual(input, output);
-+      symbol = tr.first;
-+      val = tr.second;
-+    }
-+
-+    if (val == 0x7fffffff) 
-+    {
-+      break;
-+    }
-+    
-     if(val == L'$' && outOfWord)
-     {
-+      if(!seentags)        // if no tags: only return complete matches
-+      {
-+        bool uppercase = sf.size() > 1 && iswupper(sf[1]);
-+        bool firstupper= iswupper(sf[0]);
-+
-+        result = current_state.filterFinals(all_finals, alphabet,
-+                                            escaped_chars,
-+                                            uppercase, firstupper, 0);
-+      }
-+      
-       if(sf[0] == L'*')
-       {
-         printWordBilingual(sf, L"/"+sf, output);
-@@ -2055,14 +2359,23 @@
-         printWordBilingual(sf, compose(result, queue), output);
-       }
-       else
--      {
--        printWordBilingual(sf, L"/@"+sf, output);
-+      { //xxx
-+        if(biltransSurfaceForms) 
-+        {
-+          printWordBilingual(surface, L"/@"+surface, output);
-+        }
-+        else
-+        { 
-+          printWordBilingual(sf, L"/@"+sf, output);
-+        }
-       }
--  
-+      seensurface = false;
-+      surface = L""; 
-       queue = L"";
-       result = L"";
-       current_state = *initial_state;
-       sf = L"";
-+      seentags = false;
-     }
-     else if(iswspace(val) && sf.size() == 0)
-     {
-@@ -2074,7 +2387,11 @@
-       {
-         sf += L'\\';
-       }
--      alphabet.getSymbol(sf, val);
-+      alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
-+      if(val == 0)  // non-alphabetic, possibly unknown tag; add to sf
-+      {
-+	sf += symbol;
-+      }      
-     }
-     else
-     {
-@@ -2082,7 +2399,15 @@
-       {
-         sf += L'\\';
-       }
--      alphabet.getSymbol(sf,val);
-+      alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
-+      if(val == 0)  // non-alphabetic, possibly unknown tag; add to sf
-+      {
-+	sf += symbol;
-+      }
-+      if(alphabet.isTag(val) || val == 0) 
-+      {
-+        seentags = true;
-+      }      
-       if(current_state.size() != 0)
-       {
- 	if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
-@@ -2105,12 +2430,21 @@
-       }
-       if(current_state.size() == 0 && result != L"")
-       {
--        if(alphabet.isTag(val))
-+        // We already have a result, but there is still more to read
-+        // of the analysis; following tags are not consumed, but
-+        // output as target language tags (added to result on
-+        // end-of-word)
-+        if(alphabet.isTag(val)) // known tag
-         {
-           alphabet.getSymbol(queue, val);
-         }
-+        else if (val == 0) // non-alphabetic, possibly unknown tag
-+        {
-+          queue += symbol;
-+        }
-         else
-         {
-+          // There are no more alive transductions and the current symbol is not a tag -- unknown word!
-           result = L"";
-         }
-       }
-@@ -2127,6 +2461,7 @@
-   unsigned int end_point = input_word.size()-2;
-   wstring queue = L"";
-   bool mark = false;
-+  bool seentags = false;  // have we seen any tags at all in the analysis?
-   
-   if(with_delim == false)
-   {
-@@ -2160,6 +2495,7 @@
-     }
-     else if(input_word[i] == L'<')
-     {
-+      seentags = true;
-       symbol = L'<';
-       for(unsigned int j = i + 1; j <= end_point; j++)
-       {
-@@ -2217,7 +2553,7 @@
-     }
-     
-     if(current_state.size() == 0)
--    { 
-+    {
-       if(symbol != L"" && result != L"")
-       {
-         queue.append(symbol);
-@@ -2224,20 +2560,39 @@
-       }
-       else
-       {
--	// word is not present
-+        // word is not present
-         if(with_delim)
--	{
-+        {
-           result = L"^@" + input_word.substr(1);  
--	}
-+        }
-         else
--	{
-+        {
-           result = L"@" + input_word;
--	}
-+        }
-         return pair<wstring, int>(result, 0);  
-       }      
-     }
-   }
- 
-+  if (!seentags
-+      && L"" == current_state.filterFinals(all_finals, alphabet,
-+                                           escaped_chars,
-+                                           uppercase, firstupper, 0)) 
-+  {
-+    // word is not present
-+    if(with_delim)
-+    {
-+      result = L"^@" + input_word.substr(1);  
-+    }
-+    else
-+    {
-+      result = L"@" + input_word;
-+    }
-+    return pair<wstring, int>(result, 0);  
-+  }
-+        
-+
-+
-   // attach unmatched queue automatically
- 
-   if(queue != L"")
-@@ -2661,10 +3016,11 @@
-   return str;
- }
- 
-+
- void
--FSTProcessor::setDecompoundingMode(bool const value)
-+FSTProcessor::setBiltransSurfaceForms(bool const value)
- {
--  compoundDecomposition = value;
-+  biltransSurfaceForms = value;
- }
- 
- void
-@@ -2688,7 +3044,7 @@
- bool
- FSTProcessor::getDecompoundingMode()
- {
--  return compoundDecomposition;
-+  return do_decomposition;
- }
- 
- bool
-Index: lttoolbox/lt_comp.cc
-===================================================================
---- lttoolbox/lt_comp.cc	(revision 21745)
-+++ lttoolbox/lt_comp.cc	(working copy)
-@@ -23,6 +23,7 @@
- #include <iostream>
- #include <libgen.h>
- #include <string>
-+#include <getopt.h>
- 
- using namespace std;
- 
-@@ -31,7 +32,11 @@
-   if(name != NULL)
-   {
-     cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl;
--    cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl;
-+    cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl;
-+    cout << "  -v:     set language variant" << endl;
-+    cout << "  -a:     set alternative (monodix)" << endl;
-+    cout << "  -l:     set left language variant (bidix)" << endl;
-+    cout << "  -r:     set right language variant (bidix)" << endl;
-     cout << "Modes:" << endl;
-     cout << "  lr:     left-to-right compilation" << endl;
-     cout << "  rl:     right-to-left compilation" << endl;
-@@ -42,27 +47,113 @@
- 
- int main(int argc, char *argv[])
- {
--  if(argc != 4 && argc != 5)
-+  Compiler c;
-+  c.setVerbose(false);
-+  
-+#if HAVE_GETOPT_LONG
-+  int option_index=0;
-+#endif
-+
-+  string vl;
-+  string vr;
-+
-+  while (true) {
-+#if HAVE_GETOPT_LONG
-+    static struct option long_options[] =
-+    {
-+      {"alt",       required_argument, 0, 'a'},
-+      {"var",       required_argument, 0, 'v'},
-+      {"var-left",  required_argument, 0, 'l'},
-+      {"var-right", required_argument, 0, 'r'},
-+      {"help",      no_argument,       0, 'h'}, 
-+      {"verbose",   no_argument,       0, 'V'}, 
-+      {0, 0, 0, 0}
-+    };
-+
-+    int cnt=getopt_long(argc, argv, "a:v:l:r:hV", long_options, &option_index);
-+#else
-+    int cnt=getopt(argc, argv, "a:v:l:r:hV");
-+#endif
-+    if (cnt==-1)
-+      break;
-+
-+    switch (cnt)
-+    {
-+      case 'a':
-+        c.setAltValue(optarg);
-+        break;
-+
-+      case 'v':
-+        c.setVariantValue(optarg);
-+        break;
-+
-+      case 'l':
-+        vl = optarg;
-+        c.setVariantLeftValue(vl);
-+        break;
-+
-+      case 'r':
-+        vr = optarg;
-+        c.setVariantRightValue(vr);
-+        break;
-+
-+      case 'V':
-+        c.setVerbose(true);
-+        break;
-+
-+      case 'h':
-+      default:
-+        endProgram(argv[0]);
-+        break;
-+    }
-+  }
-+
-+  string opc;
-+  string infile;
-+  string outfile;
-+  string acxfile;
-+
-+  switch(argc - optind + 1)
-   {
--    endProgram(argv[0]);
-+    case 5:
-+      opc = argv[argc-4];
-+      infile = argv[argc-3];
-+      outfile = argv[argc-2];
-+      acxfile = argv[argc-1];
-+      break;
-+
-+    case 4:
-+      opc = argv[argc-3];
-+      infile = argv[argc-2];
-+      outfile = argv[argc-1];
-+      break;
-+
-+    default:
-+      endProgram(argv[0]);
-+      break;
-   }
- 
--  string opc = argv[1];
--
--  Compiler c;
--  
--  
-   if(opc == "lr")
-   {
--    if(argc == 5)
-+    if(vr == "" && vl != "")
-     {
--      c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL);
-+      cout << "Error: -l specified, but mode is lr" << endl;
-+      endProgram(argv[0]);
-     }
--    c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL);
-+    if(acxfile != "")
-+    {
-+      c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL);
-+    }
-+    c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL);
-   }
-   else if(opc == "rl")
-   {
--    c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL);
-+    if(vl == "" && vr != "")
-+    {
-+      cout << "Error: -r specified, but mode is rl" << endl;
-+      endProgram(argv[0]);
-+    }
-+    c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL);
-   }
-   else
-   {
-@@ -69,10 +160,10 @@
-     endProgram(argv[0]);
-   }
- 
--  FILE *output = fopen(argv[3], "wb");
-+  FILE *output = fopen(outfile.c_str(), "wb");
-   if(!output)
-   {
--    cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
-+    cerr << "Error: Cannot open file '" << outfile << "'." << endl;
-     exit(EXIT_FAILURE);
-   }
-   c.write(output);
-Index: lttoolbox/fst_processor.h
-===================================================================
---- lttoolbox/fst_processor.h	(revision 21745)
-+++ lttoolbox/fst_processor.h	(working copy)
-@@ -43,7 +43,8 @@
-   gm_clean,      // clear all
-   gm_unknown,    // display unknown words, clear transfer and generation tags
-   gm_all,        // display all
--  gm_tagged      // tagged generation
-+  gm_tagged,     // tagged generation
-+  gm_tagged_nm   // clean tagged generation
- };
- 
- /**
-@@ -57,8 +58,6 @@
-    */
-   map<wstring, TransExe, Ltstr> transducers;
- 
--  Pool<vector<int> > *pool;
--
-   /**
-    * Current state of lexical analysis
-    */
-@@ -130,6 +129,12 @@
-   bool outOfWord;
- 
-   /**
-+   * true if we're automatically removing surface forms.
-+   */
-+  bool biltransSurfaceForms;
-+
-+
-+  /**
-    * if true, makes always difference between uppercase and lowercase
-    * characters
-    */
-@@ -154,9 +159,30 @@
-   /**
-    * try analysing unknown words as compounds
-    */
--  bool compoundDecomposition;
-+  bool do_decomposition;
- 
-   /**
-+   * Symbol of CompoundOnlyL
-+   */
-+  int compoundOnlyLSymbol;
-+
-+  /**
-+   * Symbol of CompoundR
-+   */
-+  int compoundRSymbol;
-+
-+  /**
-+   * Show or not the controls symbols (as compoundRSymbol)
-+   */
-+   bool showControlSymbols;
-+
-+  /**
-+   * Max compound elements
-+   * Hard coded for now, but there might come a switch one day
-+   */
-+  int compound_max_elements;
-+
-+  /**
-    * Prints an error of input stream and exits
-    */
-   void streamError();
-@@ -219,6 +245,13 @@
-   int readGeneration(FILE *input, FILE *output);
- 
-   /**
-+   * Read text from stream (biltrans version)
-+   * @param input the stream to read
-+   * @return the queue of 0-symbols, and the next symbol in the stream
-+   */
-+  pair<wstring, int> readBilingual(FILE *input, FILE *output);
-+
-+  /**
-    * Read text from stream (SAO version)
-    * @param input the stream to read
-    * @return the next symbol in the stream
-@@ -248,7 +281,17 @@
-    */
-   void writeEscaped(wstring const &str, FILE *output);
- 
-+
-   /**
-+   * Write a string to an output stream, escaping all escapable characters
-+   * but keeping symbols without escaping
-+   * @param str the string to write, escaping characters
-+   * @param output the stream to write in
-+   */
-+  void writeEscapedWithTags(wstring const &str, FILE *output);
-+
-+
-+  /**
-    * Checks if an string ends with a particular suffix
-    * @param str the string to test
-    * @param the searched suffix
-@@ -287,6 +330,8 @@
-    */
-   void printUnknownWord(wstring const &sf, FILE *output);
- 
-+  void initDecompositionSymbols();
-+
-   vector<wstring> numbers;
-   int readTMAnalysis(FILE *input);
- 
-@@ -294,7 +339,7 @@
-   void printSpace(wchar_t const val, FILE *output);
-   void skipUntil(FILE *input, FILE *output, wint_t const character);
-   static wstring removeTags(wstring const &str);
--  wstring decompose(wstring str);
-+  wstring compoundAnalysis(wstring str, bool uppercase, bool firstupper);
-   size_t firstNotAlpha(wstring const &sf);
- 
-   void analysis_wrapper_null_flush(FILE *input, FILE *output);
-@@ -338,9 +383,9 @@
- 
-   void setCaseSensitiveMode(bool const value);
-   void setDictionaryCaseMode(bool const value);
-+  void setBiltransSurfaceForms(bool const value);
-   void setNullFlush(bool const value);
-   bool getNullFlush();
--  void setDecompoundingMode(bool const value);
-   bool getDecompoundingMode();
- };
- 
-Index: lttoolbox/lt_proc.cc
-===================================================================
---- lttoolbox/lt_proc.cc	(revision 21745)
-+++ lttoolbox/lt_proc.cc	(working copy)
-@@ -36,35 +36,42 @@
- void endProgram(char *name)
- {
-   cout << basename(name) << ": process a stream with a letter transducer" << endl;
--  cout << "USAGE: " << basename(name) << " [-c] [-a|-g|-n|-d|-p|-s|-t|-b] fst_file [input_file [output_file]]" << endl;
-+  cout << "USAGE: " << basename(name) << " [ -a | -b | -c | -d | -e | -g | -n | -p | -s | -t | -v | -h -z -w ] fst_file [input_file [output_file]]" << endl;
-   cout << "Options:" << endl;
- #if HAVE_GETOPT_LONG
-   cout << "  -a, --analysis:         morphological analysis (default behavior)" << endl;
--  cout << "  -b, --bilingual:        lexical transference" << endl;
-+  cout << "  -b, --bilingual:        lexical transfer" << endl;
-   cout << "  -c, --case-sensitive:   use the literal case of the incoming characters" << endl;
-+  cout << "  -d, --debugged-gen      morph. generation with all the stuff" <<endl;
-+  cout << "  -e, --decompose-nouns:  Try to decompound unknown words" << endl;
-   cout << "  -g, --generation:       morphological generation" << endl;
-+  cout << "  -l, --tagged-gen:       morphological generation keeping lexical forms" << endl;
-+  cout << "  -m, --tagged-nm-gen:    same as -l but without unknown word marks" << endl;                              
-   cout << "  -n, --non-marked-gen    morph. generation without unknown word marks" << endl;
--  cout << "  -d, --debugged-gen      morph. generation with all the stuff" <<endl;
-+  cout << "  -o, --surf-bilingual:   lexical transfer with surface forms" << endl;
-   cout << "  -p, --post-generation:  post-generation" << endl;
--  cout << "  -e, --decompose-compounds: try to decompose unknown word as compounds" << endl;
-   cout << "  -s, --sao:              SAO annotation system input processing" << endl;
-   cout << "  -t, --transliteration:  apply transliteration dictionary" << endl;
-+  cout << "  -v, --version:          version" << endl;
-   cout << "  -z, --null-flush:       flush output on the null character " << endl;
-   cout << "  -w, --dictionary-case:  use dictionary case instead of surface case" << endl;
--  cout << "  -v, --version:          version" << endl;
-   cout << "  -h, --help:             show this help" << endl;
- #else
-   cout << "  -a:   morphological analysis (default behavior)" << endl;
-+  cout << "  -b:   lexical transfer" << endl;
-   cout << "  -c:   use the literal case of the incoming characters" << endl;
-+  cout << "  -d:   morph. generation with all the stuff" << endl;
-+  cout << "  -e:   try to decompose unknown words as compounds" << endl;
-   cout << "  -g:   morphological generation" << endl;
-+  cout << "  -l:   morphological generation keeping lexical forms" << endl;
-   cout << "  -n:   morph. generation without unknown word marks" << endl;
-+  cout << "  -o:   lexical transfer with surface forms" << endl;
-   cout << "  -p:   post-generation" << endl;
--  cout << "  -e:   try to decompose unknown words as compounds" << endl;
-   cout << "  -s:   SAO annotation system input processing" << endl;
-   cout << "  -t:   apply transliteration dictionary" << endl;
-+  cout << "  -v:   version" << endl;
-   cout << "  -z:   flush output on the null character " << endl;
-   cout << "  -w:   use dictionary case instead of surface case" << endl;
--  cout << "  -v:   version" << endl;
-   cout << "  -h:   show this help" << endl;
- #endif
-   exit(EXIT_FAILURE);
-@@ -88,10 +95,12 @@
-     {
-       {"analysis",        0, 0, 'a'},
-       {"bilingual",       0, 0, 'b'},
-+      {"surf-bilingual",  0, 0, 'o'},
-       {"generation",      0, 0, 'g'},
-       {"non-marked-gen",  0, 0, 'n'},
-       {"debugged-gen",    0, 0, 'd'},
-       {"tagged-gen",      0, 0, 'l'},
-+      {"tagged-nm-gen",   0, 0, 'm'},
-       {"post-generation", 0, 0, 'p'},
-       {"sao",             0, 0, 's'},
-       {"transliteration", 0, 0, 't'},
-@@ -107,9 +116,9 @@
-   {
- #if HAVE_GETOPT_LONG
-     int option_index;
--    int c = getopt_long(argc, argv, "abceglndpstzwvh", long_options, &option_index);
-+    int c = getopt_long(argc, argv, "abceglmndopstzwvh", long_options, &option_index);
- #else
--    int c = getopt(argc, argv, "abceglndpstzwvh");
-+    int c = getopt(argc, argv, "abceglmndopstzwvh");
- #endif    
- 
-     if(c == -1)
-@@ -123,13 +132,12 @@
-       fstp.setCaseSensitiveMode(true);
-       break;
- 
--    case 'e':
--      fstp.setDecompoundingMode(true);
--      break;
--      
-+    case 'e':      
-     case 'a':
-     case 'b':
-+    case 'o':
-     case 'l':
-+    case 'm':
-     case 'g':
-     case 'n':
-     case 'd':
-@@ -248,11 +256,19 @@
-         fstp.initGeneration();
-         checkValidity(fstp);
-         fstp.generation(input, output, gm_all);
-+        break;
-       
-       case 'l':
-         fstp.initGeneration();
-         checkValidity(fstp);
-         fstp.generation(input, output, gm_tagged);
-+        break;
-+
-+      case 'm':
-+        fstp.initGeneration();
-+        checkValidity(fstp);
-+        fstp.generation(input, output, gm_tagged_nm);
-+        break;
-       
-       case 'p':
-         fstp.initPostgeneration();
-@@ -272,11 +288,24 @@
-         fstp.transliteration(input, output);
-         break;
-         
-+      case 'o':
-+        fstp.initBiltrans();
-+        checkValidity(fstp);
-+        fstp.setBiltransSurfaceForms(true);
-+        fstp.bilingual(input, output);
-+        break;
-+   
-       case 'b':
-         fstp.initBiltrans();
-         checkValidity(fstp);
-         fstp.bilingual(input, output);
-         break;
-+
-+      case 'e':
-+        fstp.initDecomposition();
-+        checkValidity(fstp);
-+        fstp.analysis(input, output);
-+        break;
-       
-       case 'a':
-       default:
-Index: lttoolbox/expander.cc
-===================================================================
---- lttoolbox/expander.cc	(revision 21745)
-+++ lttoolbox/expander.cc	(working copy)
-@@ -295,9 +295,18 @@
- {
-   wstring atributo=this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
-   wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR);
-+  wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR);
-+  wstring varval = this->attrib(Compiler::COMPILER_V_ATTR);
-+  wstring varl   = this->attrib(Compiler::COMPILER_VL_ATTR);
-+  wstring varr   = this->attrib(Compiler::COMPILER_VR_ATTR);
-   
-   wstring myname = L"";
--  if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes")
-+  if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes"
-+   || altval != L"" && altval != alt
-+   || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
-+   || ((varl != L"" && varl != variant_left) && (varr != L"" && varr != variant_right))
-+   || (varl != L"" && varl != variant_left && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
-+   || (varr != L"" && varr != variant_right && atributo == Compiler::COMPILER_RESTRICTION_LR_VAL))
-   {    
-     do
-     {
-@@ -316,11 +325,14 @@
-   }
-   
-   EntList items, items_lr, items_rl;
--  if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL)
-+  if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL 
-+   || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL)
-+   || varl != L"" && varl != variant_left)
-   {
-     items_lr.push_back(pair<wstring, wstring>(L"", L""));
-   }
--  else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
-+  else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL
-+        || (varr != L"" && varr != variant_right))
-   {
-     items_rl.push_back(pair<wstring, wstring>(L"", L""));
-   }
-@@ -594,3 +606,27 @@
-     it->second.append(endings.second);
-   }
- }
-+
-+void
-+Expander::setAltValue(string const &a)
-+{
-+  alt = XMLParseUtil::stows(a);
-+}
-+
-+void
-+Expander::setVariantValue(string const &v)
-+{
-+  variant = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Expander::setVariantLeftValue(string const &v)
-+{
-+  variant_left = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Expander::setVariantRightValue(string const &v)
-+{
-+  variant_right = XMLParseUtil::stows(v);
-+}
-Index: lttoolbox/lt-expand.1
-===================================================================
---- lttoolbox/lt-expand.1	(revision 21745)
-+++ lttoolbox/lt-expand.1	(working copy)
-@@ -9,11 +9,28 @@
- architecture: \fBhttp://www.apertium.org\fR.
- .SH SYNOPSIS
- .B lt-expand
-+[
-+.B \-a \fR| 
-+.B \-v \fR| 
-+.B \-l \fR| 
-+.B \-r \fR| 
-+.B \-h
-+]
- dictionary_file [output_file]
- .PP
-+.B lt-expand
-+[
-+.B \-\-alt \fR| 
-+.B \-\-var \fR| 
-+.B \-\-var\-left \fR| 
-+.B \-\-var\-right \fR| 
-+.B \-\-help
-+]
-+dictionary_file [output_file]
-+.PP
- .SH DESCRIPTION
- .BR lt-expand 
--Is the application responsible of expanding a dictionary into a
-+Is the application responsible for expanding a dictionary into a
- simple list of input string-output string pairs by eliminating
- paradigms through substitution and unfolding.
- .PP
-@@ -20,6 +37,23 @@
- The output goes to \fIoutput_file\fR if it is present or to standard
- output if it is missing.
- .PP
-+.SH OPTIONS
-+.TP
-+.B \-a, \-\-alt
-+Sets the value of the \fIalt\fR attribute to use in expansion
-+.TP
-+.B \-v, \-\-var
-+Sets the value of the \fIv\fR attribute to use in expansion of monodixes
-+.TP
-+.B \-l, \-\-var\-left
-+Sets the value of the \fIvl\fR attribute to use in expansion of bidixes
-+.TP
-+.B \-r, \-\-var\-right
-+Sets the value of the \fIvr\fR attribute to use in expansion of bidixes
-+.TP
-+.B \-h, \-\-help
-+Prints a short help message
-+.PP
- .SH FILES
- .B dictionary_file
- The input dictionary to expand.
-@@ -34,5 +68,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
-Index: lttoolbox/dix.dtd
-===================================================================
---- lttoolbox/dix.dtd	(revision 21745)
-+++ lttoolbox/dix.dtd	(working copy)
-@@ -1,4 +1,21 @@
- <!--
-+   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
-+  
-+   This program is free software; you can redistribute it and/or
-+   modify it under the terms of the GNU General Public License as
-+   published by the Free Software Foundation; either version 2 of the
-+   License, or (at your option) any later version.
-+  
-+   This program is distributed in the hope that it will be useful, but
-+   WITHOUT ANY WARRANTY; without even the implied warranty of
-+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+   General Public License for more details.
-+  
-+   You should have received a copy of the GNU General Public License
-+   along with this program; if not, write to the Free Software
-+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-+   02111-1307, USA.
-+
-       DTD for the format of dictionaries
- -->
- <!ELEMENT dictionary (alphabet?, sdefs?,
-@@ -57,6 +74,10 @@
- 	i CDATA #IMPLIED
- 	slr CDATA #IMPLIED
- 	srl CDATA #IMPLIED
-+	alt CDATA #IMPLIED
-+	v CDATA #IMPLIED
-+	vl CDATA #IMPLIED
-+	vr CDATA #IMPLIED
- >
- 	<!-- r: restriction LR: left-to-right,
- 			    RL: right-to-left -->
-@@ -66,6 +87,10 @@
- 	<!-- i: ignore ('yes') means ignore, otherwise it is not ignored) -->
- 	<!-- slr: translation sense when translating from left to right -->
- 	<!-- srl: translation sense when translating from right to left --> 
-+	<!-- alt: alternative entries are omitted if not selected -->
-+	<!-- v: variant sets (monodix) direction restrictions based on language variant -->
-+	<!-- vl: variant left sets direction restrictions based on language variant for language on left of bidix -->
-+	<!-- vr: variant right sets direction restrictions based on language variant for language on right of bidix -->
- <!ELEMENT par EMPTY>
- 	<!-- reference to paradigm -->
- <!ATTLIST par
-Index: lttoolbox/compiler.cc
-===================================================================
---- lttoolbox/compiler.cc	(revision 21745)
-+++ lttoolbox/compiler.cc	(working copy)
-@@ -56,6 +56,10 @@
- wstring const Compiler::COMPILER_LEMMA_ATTR         = L"lm";
- wstring const Compiler::COMPILER_IGNORE_ATTR        = L"i";
- wstring const Compiler::COMPILER_IGNORE_YES_VAL     = L"yes";
-+wstring const Compiler::COMPILER_ALT_ATTR           = L"alt";
-+wstring const Compiler::COMPILER_V_ATTR             = L"v";
-+wstring const Compiler::COMPILER_VL_ATTR            = L"vl";
-+wstring const Compiler::COMPILER_VR_ATTR            = L"vr";
- 
- Compiler::Compiler()
- {
-@@ -417,6 +421,12 @@
-     }
-   }
-   
-+  if(verbose && first_element && (both_sides.front() == (int)L' ')) 
-+  {
-+    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-+    wcerr << L"): Entry begins with space." << endl; 
-+  }
-+  first_element = false; 
-   EntryToken e;
-   e.setSingleTransduction(both_sides, both_sides);
-   return e;
-@@ -444,6 +454,13 @@
-       readString(lhs, name);
-     }
-   }
-+
-+  if(verbose && first_element && (lhs.front() == (int)L' ')) 
-+  {
-+    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-+    wcerr << L"): Entry begins with space." << endl;
-+  }
-+  first_element = false;
-  
-   skip(name, COMPILER_RIGHT_ELEM);
- 
-@@ -480,7 +497,15 @@
- {
-   EntryToken e;
-   wstring nomparadigma = attrib(COMPILER_N_ATTR);
-+  first_element = false;
- 
-+  if(current_paradigm != L"" && nomparadigma == current_paradigm)
-+  {
-+    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-+    wcerr << L"): Paradigm refers to itself '" << nomparadigma << L"'." <<endl;
-+    exit(EXIT_FAILURE);
-+  }
-+
-   if(paradigms.find(nomparadigma) == paradigms.end())
-   {
-     wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-@@ -632,9 +657,18 @@
- {
-   wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
-   wstring ignore = this->attrib(COMPILER_IGNORE_ATTR);
-+  wstring altval = this->attrib(COMPILER_ALT_ATTR);
-+  wstring varval = this->attrib(COMPILER_V_ATTR);
-+  wstring varl   = this->attrib(COMPILER_VL_ATTR);
-+  wstring varr   = this->attrib(COMPILER_VR_ATTR);
- 
-   //�if entry is masked by a restriction of direction or an ignore mark
--  if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL)
-+  if((atributo != L"" && atributo != direction) 
-+   || ignore == COMPILER_IGNORE_YES_VAL
-+   || (altval != L"" && altval != alt)
-+   || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant)
-+   || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left)
-+   || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right))
-   {
-     // parse to the end of the entry
-     wstring name = L"";
-@@ -662,6 +696,11 @@
-     wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
-     skipBlanks(name);
- 
-+    if(current_paradigm == L"" && verbose)
-+    {
-+      first_element = true;
-+    }
-+
-     int tipo = xmlTextReaderNodeType(reader);
-     if(name == COMPILER_PAIR_ELEM)
-     {      
-@@ -845,3 +884,33 @@
-     it->second.write(output);
-   }
- }
-+
-+void
-+Compiler::setAltValue(string const &a)
-+{
-+  alt = XMLParseUtil::stows(a);
-+}
-+
-+void
-+Compiler::setVariantValue(string const &v)
-+{
-+  variant = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Compiler::setVariantLeftValue(string const &v)
-+{
-+  variant_left = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Compiler::setVariantRightValue(string const &v)
-+{
-+  variant_right = XMLParseUtil::stows(v);
-+}
-+
-+void
-+Compiler::setVerbose(bool verbosity)
-+{
-+  verbose = verbosity;
-+}
-Index: lttoolbox/transducer.h
-===================================================================
---- lttoolbox/transducer.h	(revision 21745)
-+++ lttoolbox/transducer.h	(working copy)
-@@ -146,6 +146,13 @@
-   bool isFinal(int const state) const;
- 
-   /**
-+   * Test if a pattern is recognised by the FST
-+   * @param a widestring of the pattern to be recognised
-+   * @return true if the pattern is recognised by the transducer
-+   */
-+  bool recognise(wstring patro, Alphabet &a, FILE *err = stderr);
-+
-+  /**
-    * Set the state as a final or not, yes by default
-    * @param state the state
-    * @param value if true, the state is set as final state
-@@ -179,6 +186,12 @@
-   void reverse(int const epsilon_tag = 0);
- 
-   /**
-+   * Print all the transductions of a transducer in ATT format
-+   * @param epsilon_tag the tag to take as epsilon
-+   */
-+  void show(Alphabet &a, FILE *output = stdout, int const epsilon_tag = 0);
-+
-+  /**
-    * Determinize the transducer
-    * @param epsilon_tag the tag to take as epsilon
-    */
-@@ -242,6 +255,12 @@
-   bool isEmpty(int const state) const;
- 
-   /**
-+   * Returns the number of transitions from a given state
-+   * @return the number of transitions
-+   */
-+  int getStateSize(int const state);
-+
-+  /**
-    * Write method
-    * @param output the stream to write to
-    * @param decalage offset to sum to the tags
-Index: lttoolbox/lt_expand.cc
-===================================================================
---- lttoolbox/lt_expand.cc	(revision 21745)
-+++ lttoolbox/lt_expand.cc	(working copy)
-@@ -24,6 +24,7 @@
- #include <iostream>
- #include <libgen.h>
- #include <string>
-+#include <getopt.h>
- 
- #ifdef _MSC_VER
- #include <io.h>
-@@ -37,7 +38,7 @@
-   if(name != NULL)
-   {
-     cout << basename(name) << " v" << PACKAGE_VERSION <<": expand the contents of a dictionary file" << endl;
--    cout << "USAGE: " << basename(name) << " dictionary_file [output_file]" << endl;
-+    cout << "USAGE: " << basename(name) << " [-avlrh] dictionary_file [output_file]" << endl;
-   }
-   exit(EXIT_FAILURE);
- }
-@@ -45,14 +46,67 @@
- int main(int argc, char *argv[])
- {
-   FILE *input = NULL, *output = NULL;
-+  Expander e;
- 
--  switch(argc)
-+#if HAVE_GETOPT_LONG
-+  int option_index=0;
-+#endif
-+
-+  while (true) {
-+#if HAVE_GETOPT_LONG
-+    static struct option long_options[] =
-+    {
-+      {"alt",       required_argument, 0, 'a'},
-+      {"var",       required_argument, 0, 'v'},
-+      {"var-left",  required_argument, 0, 'l'},
-+      {"var-right", required_argument, 0, 'r'},
-+      {"help",      no_argument,       0, 'h'}, 
-+      {0, 0, 0, 0}
-+    };
-+
-+    int cnt=getopt_long(argc, argv, "a:v:l:r:h", long_options, &option_index);
-+#else
-+    int cnt=getopt(argc, argv, "a:v:l:r:h");
-+#endif
-+    if (cnt==-1)
-+      break;
-+
-+    switch (cnt)
-+    {
-+      case 'a':
-+        e.setAltValue(optarg);
-+        break;
-+
-+      case 'v':
-+        e.setVariantValue(optarg);
-+        break;
-+
-+      case 'l':
-+        e.setVariantLeftValue(optarg);
-+        break;
-+
-+      case 'r':
-+        e.setVariantRightValue(optarg);
-+        break;
-+
-+      case 'h':
-+      default:
-+        endProgram(argv[0]);
-+        break;
-+    }
-+  }
-+
-+  string infile;
-+  string outfile;
-+
-+  switch(argc - optind + 1)
-   {
-     case 2:
--      input = fopen(argv[1], "rb");
-+      infile = argv[argc-1];
-+      input = fopen(infile.c_str(), "rb");
-       if(input == NULL)
-       {
--        cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
-+        cerr << "Error: Cannot open file '" << infile << "'." << endl;
-         exit(EXIT_FAILURE);
-       }      
-       fclose(input);
-@@ -60,18 +114,20 @@
-       break;
-     
-     case 3:
--      input = fopen(argv[1], "rb");
-+      infile = argv[argc-2];
-+      input = fopen(infile.c_str(), "rb");
-       if(input == NULL)
-       {
--        cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
-+        cerr << "Error: Cannot open file '" << infile << "'." << endl;
-         exit(EXIT_FAILURE);
-       }
-       fclose(input);
- 
--      output = fopen(argv[2], "wb");
-+      outfile = argv[argc-1];
-+      output = fopen(argv[argc-1], "wb");
-       if(output == NULL)
-       {
--        cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
-+        cerr << "Error: Cannot open file '" << outfile << "'." << endl;
-         exit(EXIT_FAILURE);
-       }
-       break;
-@@ -85,8 +141,7 @@
-   _setmode(_fileno(output), _O_U8TEXT);
- #endif
- 
--  Expander e;
--  e.expand(argv[1], output);
-+  e.expand(infile, output);
-   fclose(output);
-   
-   return EXIT_SUCCESS;
-Index: lttoolbox/state.cc
-===================================================================
---- lttoolbox/state.cc	(revision 21745)
-+++ lttoolbox/state.cc	(working copy)
-@@ -20,10 +20,15 @@
- 
- #include <cstring>
- #include <cwctype>
-+#include <climits>
- 
--State::State(Pool<vector<int> > *p)
-+//debug//
-+//#include <iostream>
-+//using namespace std;
-+//debug//
-+
-+State::State()
- {
--  pool = p;
- }
-  
- State::~State()
-@@ -51,10 +56,9 @@
- void 
- State::destroy()
- {
--  // release references
-   for(size_t i = 0, limit = state.size(); i != limit; i++)
-   {
--    pool->release(state[i].sequence);
-+    delete state[i].sequence;
-   }
- 
-   state.clear();
-@@ -66,15 +70,14 @@
-   // release references
-   for(size_t i = 0, limit = state.size(); i != limit; i++)
-   {
--    pool->release(state[i].sequence);
-+    delete state[i].sequence;
-   }
- 
-   state = s.state;
--  pool = s.pool;
- 
-   for(size_t i = 0, limit = state.size(); i != limit; i++)
-   {
--    vector<int> *tmp = pool->get();
-+    vector<int> *tmp = new vector<int>();
-     *tmp = *(state[i].sequence);
-     state[i].sequence = tmp;
-   }
-@@ -90,7 +93,7 @@
- State::init(Node *initial)
- {
-   state.clear();
--  state.push_back(TNodeState(initial,pool->get(),false));
-+  state.push_back(TNodeState(initial, new vector<int>(), false));
-   state[0].sequence->clear();
-   epsilonClosure();  
- }  
-@@ -113,7 +116,7 @@
-     {
-       for(int j = 0; j != it->second.size; j++)
-       {
--        vector<int> *new_v = pool->get();
-+        vector<int> *new_v = new vector<int>();
-         *new_v = *(state[i].sequence);
-         if(it->first != 0)
-         {
-@@ -122,7 +125,7 @@
-         new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
-       }
-     }
--    pool->release(state[i].sequence);
-+    delete state[i].sequence;
-   }
-   
-   state = new_state;
-@@ -147,8 +150,8 @@
-     {
-       for(int j = 0; j != it->second.size; j++)
-       {
--        vector<int> *new_v = pool->get();
--        *new_v = *(state[i].sequence);
-+        vector<int> *new_v = new vector<int>();
-+	*new_v = *(state[i].sequence);
-         if(it->first != 0)
-         {
-           new_v->push_back(it->second.out_tag[j]);
-@@ -161,7 +164,7 @@
-     {
-       for(int j = 0; j != it->second.size; j++)
-       {
--        vector<int> *new_v = pool->get();
-+        vector<int> *new_v = new vector<int>();
-         *new_v = *(state[i].sequence);
-         if(it->first != 0)
-         {
-@@ -170,7 +173,7 @@
-         new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
-       }
-     }
--    pool->release(state[i].sequence);
-+    delete state[i].sequence;
-   }
- 
-   state = new_state;
-@@ -187,7 +190,7 @@
-     {
-       for(int j = 0 ; j != it2->second.size; j++)
-       {
--        vector<int> *tmp = pool->get();
-+        vector<int> *tmp = new vector<int>();
-         *tmp = *(state[i].sequence);
-         if(it2->second.out_tag[j] != 0)
-         {
-@@ -199,6 +202,69 @@
-   }
- }
- 
-+void 
-+State::apply(int const input, int const alt1, int const alt2)
-+{
-+  vector<TNodeState> new_state;
-+  if(input == 0 || alt1 == 0 || alt2 == 0)
-+  {
-+    state = new_state;
-+    return;
-+  }
-+  
-+  for(size_t i = 0, limit = state.size(); i != limit; i++)
-+  {
-+    map<int, Dest>::const_iterator it;
-+    it = state[i].where->transitions.find(input);
-+    if(it != state[i].where->transitions.end())
-+    {
-+      for(int j = 0; j != it->second.size; j++)
-+      {
-+        vector<int> *new_v = new vector<int>();
-+	*new_v = *(state[i].sequence);
-+        if(it->first != 0)
-+        {
-+          new_v->push_back(it->second.out_tag[j]);
-+        }
-+        new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
-+      }
-+    }
-+    it = state[i].where->transitions.find(alt1);
-+    if(it != state[i].where->transitions.end())
-+    {
-+      for(int j = 0; j != it->second.size; j++)
-+      {
-+        vector<int> *new_v = new vector<int>();
-+        *new_v = *(state[i].sequence);
-+        if(it->first != 0)
-+        {
-+          new_v->push_back(it->second.out_tag[j]);
-+        }
-+        new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
-+      }
-+    }
-+    it = state[i].where->transitions.find(alt2);
-+    if(it != state[i].where->transitions.end())
-+    {
-+      for(int j = 0; j != it->second.size; j++)
-+      {
-+        vector<int> *new_v = new vector<int>();
-+        *new_v = *(state[i].sequence);
-+        if(it->first != 0)
-+        {
-+          new_v->push_back(it->second.out_tag[j]);
-+        }
-+        new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
-+      }
-+    }
-+
-+    delete state[i].sequence;
-+  }
-+
-+  state = new_state;
-+}
-+
-+
- void
- State::step(int const input)
- {
-@@ -213,6 +279,37 @@
-   epsilonClosure();
- }
- 
-+void
-+State::step(int const input, int const alt1, int const alt2)
-+{
-+  apply(input, alt1, alt2);
-+  epsilonClosure();
-+}
-+
-+void 
-+State::step_case(wchar_t val, wchar_t val2, bool caseSensitive) 
-+{
-+  if (!iswupper(val) || caseSensitive) {
-+    step(val, val2);
-+  } else if(val != towlower(val)) {
-+    step(val, towlower(val), val2);
-+  } else {
-+    step(val, val2);
-+  }
-+}
-+
-+
-+void 
-+State::step_case(wchar_t val, bool caseSensitive) 
-+{
-+  if (!iswupper(val) || caseSensitive) {
-+    step(val);
-+  } else {
-+    step(val, towlower(val));
-+  }
-+}
-+
-+
- bool
- State::isFinal(set<Node *> const &finals) const
- {
-@@ -282,6 +379,60 @@
-   return result;
- }
- 
-+
-+set<pair<wstring, vector<wstring> > >
-+State::filterFinalsLRX(set<Node *> const &finals, 
-+		    Alphabet const &alphabet,
-+		    set<wchar_t> const &escaped_chars,
-+		    bool uppercase, bool firstupper, int firstchar) const
-+{
-+  set<pair<wstring, vector<wstring> > > results;
-+
-+  vector<wstring> current_result;
-+  wstring rule_id = L""; 
-+
-+  // /<$><select>station<n><ANY_TAG><$><skip><6>/<$><select>station<n><ANY_TAG><$><skip><6>
-+
-+  // if <$> current_result.push_back(current_word)
-+  // if /   results.insert(current_result)
-+
-+  for(size_t i = 0, limit = state.size(); i != limit; i++)
-+  {
-+    if(finals.find(state[i].where) != finals.end())
-+    {
-+      current_result.clear();
-+      rule_id = L"";
-+      wstring current_word = L"";
-+      for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
-+      {
-+        if(escaped_chars.find((*(state[i].sequence))[j]) != escaped_chars.end())
-+        {
-+          current_word += L'\\';
-+        }
-+        wstring sym = L"";
-+        alphabet.getSymbol(sym, (*(state[i].sequence))[j], uppercase);
-+        if(sym == L"<$>") 
-+        { 
-+          if(current_word != L"")  
-+          {
-+            current_result.push_back(current_word); 
-+          }
-+          current_word = L"";
-+        }
-+        else 
-+        {
-+          current_word += sym; 
-+        }
-+      }
-+      rule_id = current_word;
-+      results.insert(make_pair(rule_id, current_result)); 
-+    }
-+  }
-+    
-+  return results;
-+}
-+
-+
- wstring
- State::filterFinalsSAO(set<Node *> const &finals, 
- 		       Alphabet const &alphabet,
-@@ -438,3 +589,149 @@
-   
-   return result;
- }
-+
-+
-+
-+void
-+State::pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements) 
-+{
-+  int minNoOfCompoundElements = compound_max_elements;
-+  int *noOfCompoundElements = new int[state.size()];
-+
-+  //wcerr << L"pruneCompounds..." << endl;
-+
-+  for (unsigned int i = 0;  i<state.size(); i++) {
-+    vector<int> seq = *state.at(i).sequence;
-+
-+    if (lastPartHasRequiredSymbol(seq, requiredSymbol, separationSymbol)) {
-+      int this_noOfCompoundElements = 0;
-+      for (int j = seq.size()-2; j>0; j--) if (seq.at(j)==separationSymbol) this_noOfCompoundElements++;
-+      noOfCompoundElements[i] = this_noOfCompoundElements;
-+      minNoOfCompoundElements = (minNoOfCompoundElements < this_noOfCompoundElements) ? 
-+                        minNoOfCompoundElements : this_noOfCompoundElements;
-+    }
-+    else {
-+      noOfCompoundElements[i] = INT_MAX;
-+		  //wcerr << L"Prune - No requiered symbol in state number " << i << endl;
-+    }
-+  }
-+
-+  // remove states with more than minimum number of compounds (or without the requiered symbol in the last part)
-+  vector<TNodeState>::iterator it = state.begin();
-+  int i=0;
-+  while(it != state.end()) {
-+    if (noOfCompoundElements[i] > minNoOfCompoundElements) {
-+      delete (*it).sequence;
-+      it = state.erase(it);
-+      //wcerr << L"Prune - State number " << i << L" removed!" << endl;
-+    }
-+    else it++;
-+    i++;
-+  }
-+
-+ delete[] noOfCompoundElements;
-+}
-+
-+
-+
-+void
-+State::pruneStatesWithForbiddenSymbol(int forbiddenSymbol) 
-+{
-+  vector<TNodeState>::iterator it = state.begin();
-+  while(it != state.end()) {
-+    vector<int> *seq = (*it).sequence;
-+    bool found = false;
-+    for(int i = seq->size()-1; i>=0; i--) {
-+      if(seq->at(i) == forbiddenSymbol) {
-+        i=-1;
-+        delete (*it).sequence;
-+        it = state.erase(it);
-+        found = true;
-+      }
-+    }
-+    if (!found) it++;
-+  }
-+}
-+
-+
-+
-+bool
-+State::lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol) 
-+{
-+  // state is final - it should be restarted it with all elements in stateset restart_state, with old symbols conserved
-+  bool restart=false;
-+  for (int n=seq.size()-1; n>=0; n--) {
-+    int symbol=seq.at(n);
-+    if (symbol==requiredSymbol) {
-+      restart=true;
-+      break;
-+    }
-+    if (symbol==separationSymbol) {
-+      break;
-+    }
-+  }
-+  return restart;
-+}
-+
-+
-+void
-+State::restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol) 
-+{
-+
-+  for (unsigned int i=0;  i<state.size(); i++) {
-+    TNodeState state_i = state.at(i);
-+    // A state can be a possible final state and still have transitions
-+
-+    if (finals.count(state_i.where) > 0) {
-+      bool restart = lastPartHasRequiredSymbol(*(state_i.sequence), requiredSymbol, separationSymbol);
-+      if (restart) {
-+        if (restart_state != NULL) {
-+          for (unsigned int j=0; j<restart_state->state.size(); j++) {
-+            TNodeState initst = restart_state->state.at(j);
-+            vector<int> *tnvec = new vector<int>;
-+
-+            for(unsigned int k=0; k < state_i.sequence->size(); k++) tnvec->push_back(state_i.sequence->at(k));
-+            TNodeState tn(initst.where, tnvec, state_i.dirty);
-+            tn.sequence->push_back(separationSymbol);
-+            state.push_back(tn);
-+            }
-+          }
-+        }
-+      }
-+    }
-+}
-+
-+
-+
-+wstring
-+State::getReadableString(const Alphabet &a) 
-+{
-+  wstring retval = L"[";
-+
-+  for(unsigned int i=0; i<state.size(); i++) {
-+    vector<int>* seq = state.at(i).sequence;
-+    if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++) {
-+      wstring ws = L"";
-+      a.getSymbol(ws, seq->at(j));
-+      //if(ws == L"") ws = L"?";
-+      retval.append(ws);
-+    }
-+
-+    /*Node *where = state.at(i).where;
-+    if(where == NULL) retval.append(L"→@null");
-+    else {
-+      retval.append(L"→");
-+      map<int, Dest>::iterator it;
-+      wstring ws;
-+      for (it = where->transitions.begin(); it != where->transitions.end(); it++) {
-+        int symbol = (*it).first;
-+        a.getSymbol(ws, symbol);
-+        retval.append(ws);
-+      }
-+    }*/
-+    if (i+1 < state.size()) retval.append(L", ");
-+  }
-+  retval.append(L"]");
-+  return retval;
-+}
-+
-Index: lttoolbox/alphabet.cc
-===================================================================
---- lttoolbox/alphabet.cc	(revision 21745)
-+++ lttoolbox/alphabet.cc	(working copy)
-@@ -221,3 +221,9 @@
- {
-   return spairinv[code];
- }
-+
-+
-+void Alphabet::setSymbol(int symbol, wstring newSymbolString) {
-+  //Should be a special character!
-+  if (symbol < 0) slexicinv[-symbol-1] = newSymbolString;
-+}
-Index: lttoolbox/lt-tmxproc.1
-===================================================================
---- lttoolbox/lt-tmxproc.1	(revision 21745)
-+++ lttoolbox/lt-tmxproc.1	(working copy)
-@@ -30,5 +30,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
-Index: lttoolbox/lt-comp.1
-===================================================================
---- lttoolbox/lt-comp.1	(revision 21745)
-+++ lttoolbox/lt-comp.1	(working copy)
-@@ -10,10 +10,30 @@
- .SH SYNOPSIS
- .B lt-comp
- [
-+.B \-a \fR| 
-+.B \-v \fR| 
-+.B \-l \fR| 
-+.B \-r \fR| 
-+.B \-h
-+]
-+[
- .B lr \fR| 
- .B rl
- ] dictionary_file output_file
- .PP
-+.B lt-comp
-+[
-+.B \-\-alt \fR| 
-+.B \-\-var \fR| 
-+.B \-\-var\-left \fR| 
-+.B \-\-var\-right \fR| 
-+.B \-\-help
-+]
-+[
-+.B lr \fR| 
-+.B rl
-+] dictionary_file output_file
-+.PP
- .SH DESCRIPTION
- .BR lt-comp 
- Is the application responsible of compiling dictionaries used by
-@@ -23,6 +43,32 @@
- .PP
- .SH OPTIONS
- .TP
-+.B \-a, \-\-alt
-+Sets the value of the \fIalt\fR attribute to use in compilation.
-+
-+Note that if no value is set, all entries containing an \fIalt\fR
-+attribute are omitted.
-+.TP
-+.B \-v, \-\-var
-+Sets the value of the \fIv\fR attribute to use in compilation. 
-+This should only be used with monodixes; for bidixes, see \-l and \-r.
-+
-+Note that if no value is set, all entries containing a \fIv\fR
-+attribute are considered to be \fIleft-to-right\fR.
-+.TP
-+.B \-l, \-\-var\-left
-+Sets the value of the \fIvl\fR attribute for use in compilation of bidixes.
-+"Left" here refers to the side of the dictionary, so this option is only valid
-+in \fIrl\fR mode.
-+.TP
-+.B \-r, \-\-var\-right
-+Sets the value of the \fIvr\fR attribute for use in compilation of bidixes.
-+"Right" here refers to the side of the dictionary, so this option is only valid
-+in \fIlr\fR mode.
-+.TP
-+.B \-h, \-\-help
-+Prints a short help message
-+.TP
- .B lr
- The resulting transducer will process dictionary entries
- \fIleft-to-right\fR.
-@@ -45,5 +91,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
-Index: lttoolbox/lt_locale.h
-===================================================================
---- lttoolbox/lt_locale.h	(revision 21745)
-+++ lttoolbox/lt_locale.h	(working copy)
-@@ -16,6 +16,7 @@
-  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-  * 02111-1307, USA.
-  */
-+
- #ifndef _MYLOCALE_
- #define _MYLOCALE_
- 
-Index: lttoolbox/expander.h
-===================================================================
---- lttoolbox/expander.h	(revision 21745)
-+++ lttoolbox/expander.h	(working copy)
-@@ -42,6 +42,26 @@
-   xmlTextReaderPtr reader;
-   
-   /**
-+   * The alt value
-+   */
-+  wstring alt;
-+  
-+  /**
-+   * The variant value (monodix)
-+   */
-+  wstring variant;
-+  
-+  /**
-+   * The variant value (left side of bidix)
-+   */
-+  wstring variant_left;
-+  
-+  /**
-+   * The variant value (right side of bidix)
-+   */
-+  wstring variant_right;
-+  
-+  /**
-    * The paradigm being compiled
-    */
-   wstring current_paradigm;
-@@ -186,6 +206,29 @@
-    * Compile dictionary to letter transducers
-    */
-   void expand(string const &fichero, FILE *output);
-+  /**
-+   * Set the alt value to use in compilation
-+   * @param a the value
-+   */
-+   void setAltValue(string const &a);
-+
-+  /**
-+   * Set the variant value to use in expansion
-+   * @param v the value
-+   */
-+   void setVariantValue(string const &v);
-+
-+  /**
-+   * Set the variant_left value to use in expansion
-+   * @param v the value
-+   */
-+   void setVariantLeftValue(string const &v);
-+
-+  /**
-+   * Set the variant_right value to use in expansion
-+   * @param v the value
-+   */
-+   void setVariantRightValue(string const &v);
- };
- 
- 
-Index: lttoolbox/transducer.cc
-===================================================================
---- lttoolbox/transducer.cc	(revision 21745)
-+++ lttoolbox/transducer.cc	(working copy)
-@@ -18,6 +18,7 @@
-  */
- #include <lttoolbox/transducer.h>
- #include <lttoolbox/compression.h>
-+#include <lttoolbox/alphabet.h>
- #include <lttoolbox/lttoolbox_config.h>
- #include <lttoolbox/my_stdio.h>
- 
-@@ -187,6 +188,13 @@
- void
- Transducer::setFinal(int const state, bool valor)
- {
-+  int initial_copy = getInitial();
-+/*
-+  if(state == initial_copy)
-+  {
-+    wcerr << L"Setting initial state to final" << endl;
-+  }
-+*/
-   if(valor)
-   {
-     finals.insert(state);
-@@ -609,3 +617,119 @@
-   finals.clear();
-   finals.insert(tmp);
- }
-+
-+void
-+Transducer::show(Alphabet &alphabet, FILE *output, int const epsilon_tag)
-+{
-+  joinFinals(epsilon_tag);
-+
-+  map<int, multimap<int, int> > temporal;
-+
-+  for(map<int, multimap<int, int> >::iterator it = transitions.begin(); it != transitions.end(); it++)
-+  {
-+    multimap<int, int> aux = it->second;
-+  
-+    for(multimap<int, int>::iterator it2 = aux.begin(); it2 != aux.end(); it2++) 
-+    {
-+      pair<int, int> t = alphabet.decode(it2->first);
-+      fwprintf(output, L"%d\t", it->first);
-+      fwprintf(output, L"%d\t", it2->second);
-+      wstring l = L"";
-+      alphabet.getSymbol(l, t.first);
-+      if(l == L"")  // If we find an epsilon
-+      {
-+        fwprintf(output, L"ε\t", l.c_str());
-+      }
-+      else 
-+      {
-+        fwprintf(output, L"%S\t", l.c_str());
-+      }
-+      wstring r = L"";
-+      alphabet.getSymbol(r, t.second);
-+      if(r == L"")  // If we find an epsilon
-+      {
-+        fwprintf(output, L"ε\t", r.c_str());
-+      }
-+      else 
-+      {
-+        fwprintf(output, L"%S\t", r.c_str());
-+      }
-+      fwprintf(output, L"\n");
-+    } 
-+  } 
-+
-+  for(set<int>::iterator it3 = finals.begin(); it3 != finals.end(); it3++)
-+  {
-+    fwprintf(output, L"%d\n", *it3);
-+  }
-+}
-+
-+int 
-+Transducer::getStateSize(int const state)
-+{
-+ set<int> states;
-+ set<int> myclosure1 = closure(state, 0);
-+ states.insert(myclosure1.begin(), myclosure1.end());
-+ int num_transitions = 0;
-+
-+ for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
-+ {
-+   num_transitions += transitions[*it2].size();
-+ }
-+
-+ return num_transitions;
-+}
-+
-+bool
-+Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
-+{
-+  bool accepted = false;
-+  set<int> states ;
-+
-+  set<int> myclosure1 = closure(getInitial(), 0); 
-+  states.insert(myclosure1.begin(), myclosure1.end()); 
-+  // For each of the characters in the input string
-+  for(wstring::iterator it = patro.begin(); it != patro.end(); it++)  
-+  {
-+    set<int> new_state;        //Transducer::closure(int const state, int const epsilon_tag)
-+    int sym = *it;
-+    // For each of the current alive states
-+    //fwprintf(err, L"step: %S %C (%d)\n", patro.c_str(), *it, sym);
-+    for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
-+    {
-+      multimap<int, int> p = transitions[*it2];
-+      // For each of the transitions in the state 
-+
-+      for(multimap<int, int>::iterator it3 = p.begin(); it3 != p.end(); it3++)
-+      { 
-+        
-+	pair<int, int> t = a.decode(it3->first);
-+        wstring l = L"";
-+        a.getSymbol(l, t.first);
-+        //wstring r = L"";
-+        //a.getSymbol(r, t.second);
-+
-+        //fwprintf(err, L"  -> state: %d, trans: %S:%S, targ: %d\n", *it2, (l == L"") ?  L"ε" : l.c_str(),  (r == L"") ?  L"ε" : r.c_str(), it3->second);
-+        //if(l.find(*it) != wstring::npos || l == L"" )
-+        if(l.find(*it) != wstring::npos)
-+        {
-+          set<int> myclosure = closure(it3->second, 0);
-+          //wcerr << L"Before closure alives: " <<new_state.size() << endl;  
-+          new_state.insert(myclosure.begin(), myclosure.end());
-+          //wcerr << L"After closure alives: " <<new_state.size() << endl;  
-+        }
-+      }
-+    }
-+    states = new_state;
-+  }
-+  for(set<int>::iterator it4 = states.begin(); it4 != states.end(); it4++)
-+  {
-+    if(isFinal(*it4)) 
-+    {
-+      accepted = true;
-+    }
-+  }
-+
-+  return accepted;
-+}
-+
-Index: lttoolbox/pool.h
-===================================================================
---- lttoolbox/pool.h	(revision 21745)
-+++ lttoolbox/pool.h	(working copy)
-@@ -1,175 +0,0 @@
--/*
-- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
-- *
-- * This program is free software; you can redistribute it and/or
-- * modify it under the terms of the GNU General Public License as
-- * published by the Free Software Foundation; either version 2 of the
-- * License, or (at your option) any later version.
-- *
-- * This program is distributed in the hope that it will be useful, but
-- * WITHOUT ANY WARRANTY; without even the implied warranty of
-- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-- * General Public License for more details.
-- *
-- * You should have received a copy of the GNU General Public License
-- * along with this program; if not, write to the Free Software
-- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-- * 02111-1307, USA.
-- */
--#ifndef _GENERIC_POOL_
--#define _GENERIC_POOL_
--
--#include <list>
--
--using namespace std;
--
--/**
-- * Pool of T objects
-- */
--template <class T>
--class Pool
--{
--private:
--  /**
--   * Free pointers to objects
--   */
--  list<T *> free;
--  
--  /**
--   * Currently created objects
--   */
--  list<T> created;
--  
--  /**
--   * copy method
--   * @param other pool object
--   */
--  void copy(Pool const &p)
--  {
--    created = p.created;
--  
--    // all new members are available
--    for(typename list<T>::iterator it = created.begin(), limit = created.end();
--        it != limit; it++)
--    {
--      free.push_back(&(*it));
--    }
--  }
--  
--  /**
--   * destroy method
--   */
--  void destroy()
--  {
--    // do nothing
--  }
--  
--  /**
--   * Allocate a pool of nelems size
--   * @param nelems initial size of the pool
--   */
--  void init(unsigned int const nelems)
--  {
--    created.clear();
--    free.clear();
--    T tmp;
--    for(unsigned int i = 0; i != nelems; i++)
--    {
--      created.push_front(tmp);
--      free.push_front(&(*(created.begin())));
--    }
--  }
--
--  /**
--   * Allocate a pool of nelems size with objects equal to 'object'
--   * @param nelems initial size of the pool
--   * @param object initial value of the objects in the pool
--   */
--  void init(unsigned int const nelems, T const &object)
--  {
--    created.clear();
--    free.clear();
--    for(unsigned int i = 0; i != nelems; i++)
--    {
--      created.push_front(object);
--      free.push_front(&(*(created.begin())));
--    }
--  }
--
--  
--public:
--  
--  /**
--   * Constructor
--   */
--  Pool()
--  {
--    init(1);
--  }
--    
--  /**
--   * Parametrized constructor
--   * @param nelems initial size of the pool
--   * @param object initial value of the objects in the pool
--   */
--  Pool(unsigned int const nelems, T const &object)
--  {
--    init(nelems, object);
--  }
--  
--  /**
--   * Parametrized constructor
--   * @param nelems initial size of the pool
--   */
--  Pool(unsigned int const nelems)
--  {
--    init(nelems);
--  }
--  
--  /**
--   * Destructor
--   */
--  ~Pool()
--  {
--    destroy();
--  }
--  
--  /**
--   * Copy constructor
--   */
--  Pool(Pool const &p)
--  {
--    copy(p);
--  }
--   
--  /**
--   * Allocate a pointer to a free 'new' object.
--   * @return pointer to the object
--   */
--  T * get()
--  {
--    if(free.size() != 0)
--    {
--      T *result = *(free.begin());
--      free.erase(free.begin());
--      return result;
--    }
--    else
--    {
--      T tmp;
--      created.push_front(tmp);
--      return &(*(created.begin()));
--    }
--  }  
--  
--  /**
--   * Release a no more needed instance of a pooled object
--   * @param item the no more needed instance of the object
--   */ 
--  void release(T *item)
--  {
--    free.push_front(item);
--  }
--};
--
--#endif
-Index: lttoolbox/compiler.h
-===================================================================
---- lttoolbox/compiler.h	(revision 21745)
-+++ lttoolbox/compiler.h	(working copy)
-@@ -44,6 +44,26 @@
-   xmlTextReaderPtr reader;
-   
-   /**
-+   * The alt value
-+   */
-+  wstring alt;
-+
-+  /**
-+   * The variant value (monodix)
-+   */
-+  wstring variant;
-+  
-+  /**
-+   * The variant value (left side of bidix)
-+   */
-+  wstring variant_left;
-+  
-+  /**
-+   * The variant value (right side of bidix)
-+   */
-+  wstring variant_right;
-+    
-+  /**
-    * The paradigm being compiled
-    */
-   wstring current_paradigm;
-@@ -65,6 +85,16 @@
-   wstring letters;
-   
-   /**
-+   * Set verbose mode: warnings which may or may not be correct
-+   */
-+  bool verbose;
-+
-+  /**
-+   * First element (of an entry)
-+   */
-+  bool first_element;
-+
-+  /**
-    * Identifier of all the symbols during the compilation
-    */
-   Alphabet alphabet;  
-@@ -264,10 +294,14 @@
-   static wstring const COMPILER_LEMMA_ATTR;
-   static wstring const COMPILER_IGNORE_ATTR;
-   static wstring const COMPILER_IGNORE_YES_VAL;
-+  static wstring const COMPILER_ALT_ATTR;
-+  static wstring const COMPILER_V_ATTR;
-+  static wstring const COMPILER_VL_ATTR;
-+  static wstring const COMPILER_VR_ATTR;
- 
- 
-   /**
--   * Copnstructor
-+   * Constructor
-    */
-   Compiler();
- 
-@@ -292,6 +326,35 @@
-    * @param fd the stream where write the result
-    */
-   void write(FILE *fd);
-+
-+  /**
-+   * Set verbose output
-+   */
-+  void setVerbose(bool verbosity = false);
-+
-+  /**
-+   * Set the alt value to use in compilation
-+   * @param a the value
-+   */
-+  void setAltValue(string const &a);
-+
-+  /**
-+   * Set the variant value to use in compilation
-+   * @param v the value
-+   */
-+  void setVariantValue(string const &v);
-+
-+  /**
-+   * Set the variant_left value to use in compilation
-+   * @param v the value
-+   */
-+  void setVariantLeftValue(string const &v);
-+
-+  /**
-+   * Set the variant_right value to use in compilation
-+   * @param v the value
-+   */
-+  void setVariantRightValue(string const &v);
- };
- 
- 
-Index: lttoolbox/lt-tmxcomp.1
-===================================================================
---- lttoolbox/lt-tmxcomp.1	(revision 21745)
-+++ lttoolbox/lt-tmxcomp.1	(working copy)
-@@ -38,5 +38,4 @@
- .SH BUGS
- Lots of...lurking in the dark and waiting for you!
- .SH AUTHOR
--(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
--reserved.
-+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
-Index: lttoolbox/alphabet.h
-===================================================================
---- lttoolbox/alphabet.h	(revision 21745)
-+++ lttoolbox/alphabet.h	(working copy)
-@@ -145,6 +145,13 @@
-    */
-   bool isTag(int const symbol) const;
- 
-+  /**
-+   * Sets an already existing symbol to represent a new value
-+   * @param symbol the code of the symbol to set
-+   * @param newSymbolString the new string for this symbol
-+   */
-+  void setSymbol(int symbol, wstring newSymbolString);
-+
-   pair<int, int> const & decode(int const code) const;
-   
- };
-Index: lttoolbox/state.h
-===================================================================
---- lttoolbox/state.h	(revision 21745)
-+++ lttoolbox/state.h	(working copy)
-@@ -19,6 +19,7 @@
- #ifndef _STATE_
- #define _STATE_
- 
-+#include <map>
- #include <set>
- #include <string>
- #include <vector>
-@@ -26,7 +27,9 @@
- 
- #include <lttoolbox/alphabet.h>
- #include <lttoolbox/node.h>
--#include <lttoolbox/pool.h>
-+#include <lttoolbox/match_exe.h>
-+#include <lttoolbox/match_state.h>
-+#include <lttoolbox/transducer.h>
- 
- using namespace std;
- 
-@@ -43,7 +46,7 @@
-   {
-     Node *where;
-     vector<int> *sequence;
--    bool dirty;
-+    bool dirty; // What does "dirty" mean ? 
-     
-     TNodeState(Node * const &w, vector<int> * const &s, bool const &d): where(w), sequence(s), dirty(d){}
-     TNodeState & operator=(TNodeState const &other)
-@@ -58,17 +61,6 @@
-   vector<TNodeState> state;
- 
-   /**
--   * Pool of wchar_t vectors, for efficience (static class)
--   */
--  Pool<vector<int> > *pool;  
--
--  /**
--   * Copy function
--   * @param s the state to be copied
--   */
--  void copy(State const &s);
--
--  /**
-    * Destroy function
-    */
-   void destroy();
-@@ -86,6 +78,8 @@
-    */
-   void apply(int const input, int const alt);
- 
-+  void apply(int const input, int const alt1, int const alt2);
-+
-   /**
-    * Calculate the epsilon closure over the current state, replacing
-    * its content.
-@@ -92,11 +86,21 @@
-    */
-   void epsilonClosure();
- 
-+  bool lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol);
-+
- public:
-+
-   /**
-+   * Copy function
-+   * @param s the state to be copied
-+   */
-+  void copy(State const &s);
-+
-+
-+  /**
-    * Constructor
-    */
--  State(Pool<vector<int> > *);
-+  State();
- 
-   /**
-    * Destructor
-@@ -135,6 +139,13 @@
-    */
-   void step(int const input, int const alt);
- 
-+  void step(int const input, int const alt1, int const alt2);
-+
-+  void step_case(wchar_t val, bool caseSensitive);
-+
-+  void step_case(wchar_t val, wchar_t val2, bool caseSensitive);
-+
-+
-   /**
-    * Init the state with the initial node and empty output
-    * @param initial the initial node of the transducer
-@@ -142,6 +153,21 @@
-   void init(Node *initial);
- 
-   /**
-+    * Remove states not containing a specific symbol in their last 'part', and states 
-+    * with more than a number of 'parts'
-+    * @param requieredSymbol the symbol requiered in the last part
-+    * @param separationSymbol the symbol that represent the separation between two parts
-+    * @param compound_max_elements the maximum part number allowed
-+    */
-+  void pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements);
-+
-+  /**
-+    * Remove states containing a forbidden symbol
-+    * @param forbiddenSymbol the symbol forbidden
-+    */
-+  void pruneStatesWithForbiddenSymbol(int forbiddenSymbol);
-+
-+  /**
-    * Print all outputs of current parsing, preceded by a bar '/',
-    * from the final nodes of the state
-    * @param finals the set of final nodes
-@@ -156,8 +182,8 @@
-   wstring filterFinals(set<Node *> const &finals, Alphabet const &a,
-                       set<wchar_t> const &escaped_chars,
-                       bool uppercase = false,
--		      bool firstupper = false,
--		      int firstchar = 0) const;
-+                      bool firstupper = false,
-+                      int firstchar = 0) const;
- 
-   /**
-    * Same as previous one, but  the output is adapted to the SAO system
-@@ -173,11 +199,44 @@
-   wstring filterFinalsSAO(set<Node *> const &finals, Alphabet const &a,
-                       set<wchar_t> const &escaped_chars,
-                       bool uppercase = false,
--		      bool firstupper = false,
--		      int firstchar = 0) const;
-+                      bool firstupper = false,
-+                      int firstchar = 0) const;
- 
- 
-   /**
-+   * Same as previous one, but  the output is adapted to the LRX system
-+   * @param finals the set of final nodes
-+   * @param a the alphabet to decode strings
-+   * @param escaped_chars the set of chars to be preceded with one 
-+   *                      backslash
-+   * @param uppercase true if the word is uppercase
-+   * @param firstupper true if the first letter of a word is uppercase
-+   * @param firstchar first character of the word
-+   * @return the result of the transduction
-+   */
-+
-+  set<pair<wstring, vector<wstring> > > filterFinalsLRX(set<Node *> const &finals, Alphabet const &a,
-+                      set<wchar_t> const &escaped_chars,
-+                      bool uppercase = false,
-+                      bool firstupper = false,
-+                      int firstchar = 0) const;
-+
-+
-+
-+
-+
-+  /**
-+   * Find final states, remove those that not has a requiredSymbol and 'restart' each of them as the 
-+   * set of initial states, but remembering the sequence and adding a separationSymbol
-+   * @param finals
-+   * @param requiredSymbol
-+   * @param restart_state
-+   * @param separationSymbol
-+   */
-+    void restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol);
-+
-+
-+  /**
-    * Returns true if at least one record of the state references a
-    * final node of the set
-    * @param finals set of final nodes @return
-@@ -185,6 +244,11 @@
-    */
-   bool isFinal(set<Node *> const &finals) const;
- 
-+  /**
-+   * Return the full states string (to allow debuging...) using a Java ArrayList.toString style
-+   */
-+  wstring getReadableString(const Alphabet &a);
-+
-   wstring filterFinalsTM(set<Node *> const &finals, 
- 			 Alphabet const &alphabet,
-                          set<wchar_t> const &escaped_chars,
-Index: lttoolbox/Makefile.am
-===================================================================
---- lttoolbox/Makefile.am	(revision 21745)
-+++ lttoolbox/Makefile.am	(working copy)
-@@ -2,7 +2,7 @@
- h_sources = alphabet.h buffer.h compiler.h compression.h  \
-             entry_token.h expander.h fst_processor.h lt_locale.h ltstr.h \
-             match_exe.h match_node.h match_state.h my_stdio.h node.h \
--            pattern_list.h pool.h regexp_compiler.h sorted_vector.h state.h \
-+            pattern_list.h regexp_compiler.h sorted_vector.h state.h \
-             transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h
- cc_sources = alphabet.cc compiler.cc compression.cc entry_token.cc \
-              expander.cc fst_processor.cc lt_locale.cc match_exe.cc \
-@@ -13,7 +13,7 @@
- library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
- library_include_HEADERS = $(h_sources)
- 
--bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc
-+bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc lt-print
- instdir = lttoolbox
- 
- lib_LTLIBRARIES= liblttoolbox3.la
-@@ -26,6 +26,10 @@
- 
- lttoolbox_DATA = dix.dtd
- 
-+lt_print_SOURCES = lt_print.cc  
-+lt_print_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
-+lt_print_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
-+
- lt_comp_SOURCES = lt_comp.cc  
- lt_comp_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
- lt_comp_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
-@@ -46,8 +50,18 @@
- lt_tmxproc_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
- lt_tmxproc_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
- 
--man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1
-+#lt-validate-dictionary: Makefile.am validate-header.sh
-+#	@echo "Creating lt-validate-dictionary script"
-+#	@echo "#!$(BASH)" > $@
-+#	@cat validate-header.sh >> $@
-+#	@echo "$(XMLLINT) --dtdvalid $(apertiumdir)/dix.dtd --noout \$$FILE1 && exit 0;" >> $@
-+#	@echo "exit 1;" >> $@
-+#	@chmod a+x $@
- 
-+
-+
-+man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1
-+
- INCLUDES = -I$(top_srcdir) $(LTTOOLBOX_CFLAGS)
- CLEANFILES = *~
- 
-Index: lttoolbox/lt-print.1
-===================================================================
---- lttoolbox/lt-print.1	(revision 0)
-+++ lttoolbox/lt-print.1	(revision 44914)
-@@ -0,0 +1,34 @@
-+.TH lt-print 1 2006-03-08 "" ""
-+.SH NAME
-+lt-print \- This application is part of the lexical processing modules
-+and tools (
-+.B lttoolbox
-+)
-+.PP
-+This tool is part of the apertium machine translation
-+architecture: \fBhttp://www.apertium.org\fR.
-+.SH SYNOPSIS
-+.B lt-print
-+ bin_file
-+.PP
-+.SH DESCRIPTION
-+.BR lt-print
-+Is the application responsible for printing compiled dictionaries in
-+ATT format.
-+.PP
-+.B bin_file 
-+The compiled input file .
-+.PP
-+.B output_file
-+The transducer in ATT format .  
-+
-+.SH SEE ALSO
-+.I lt-comp\fR(1),
-+.I lt-proc\fR(1),
-+.I lt-expand\fR(1),
-+.I apertium-tagger\fR(1),
-+.I apertium\fR(1).
-+.SH BUGS
-+Lots of...lurking in the dark and waiting for you!
-+.SH AUTHOR
-+(c) 2005--2012 Universitat d'Alacant / Universidad de Alicante. 
-Index: lttoolbox/lt_print.cc
-===================================================================
---- lttoolbox/lt_print.cc	(revision 0)
-+++ lttoolbox/lt_print.cc	(revision 44914)
-@@ -0,0 +1,106 @@
-+/*
-+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License as
-+ * published by the Free Software Foundation; either version 2 of the
-+ * License, or (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful, but
-+ * WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-+ * General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-+ * 02111-1307, USA.
-+ */
-+#include <lttoolbox/transducer.h>
-+#include <lttoolbox/compression.h>
-+#include <lttoolbox/lttoolbox_config.h>
-+
-+#include <lttoolbox/my_stdio.h>
-+#include <lttoolbox/lt_locale.h>
-+
-+#include <cstdlib>
-+#include <iostream>
-+#include <libgen.h>
-+#include <string>
-+
-+using namespace std;
-+
-+void endProgram(char *name)
-+{
-+  if(name != NULL)
-+  {
-+    cout << basename(name) << " v" << PACKAGE_VERSION <<": dump a transducer to text in ATT format" << endl;
-+    cout << "USAGE: " << basename(name) << " bin_file " << endl;
-+  }
-+  exit(EXIT_FAILURE);
-+}
-+
-+
-+int main(int argc, char *argv[])
-+{
-+  if(argc != 2) 
-+  {
-+    endProgram(argv[0]);
-+  }
-+
-+  LtLocale::tryToSetLocale();
-+
-+
-+  FILE *input = fopen(argv[1], "r");
-+
-+  Alphabet new_alphabet;
-+  set<wchar_t> alphabetic_chars;
-+
-+  map<wstring, Transducer> transducers;
-+
-+  // letters
-+  int len = Compression::multibyte_read(input);
-+  while(len > 0)
-+  {
-+    alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
-+    len--;
-+  }  
-+
-+  // symbols  
-+  new_alphabet.read(input);
-+
-+  len = Compression::multibyte_read(input);
-+
-+  while(len > 0)
-+  {
-+    int len2 = Compression::multibyte_read(input);
-+    wstring name = L"";
-+    while(len2 > 0)
-+    {
-+      name += static_cast<wchar_t>(Compression::multibyte_read(input));
-+      len2--;
-+    }
-+    transducers[name].read(input);
-+
-+    len--;
-+  } 
-+
-+  /////////////////////
-+ 
-+  FILE *output = stdout;
-+  map<wstring, Transducer>::iterator penum = transducers.end();
-+  penum--;
-+  for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
-+  {
-+    //it->second.minimize();
-+    it->second.show(new_alphabet, output);
-+    if(it != penum) 
-+    {
-+      fwprintf(output, L"--\n", it->first.c_str());
-+    }
-+  }
-+
-+  fclose(input);
-+  
-+  return 0;
-+}
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/lttoolbox.git/commitdiff/78ddaac8fc4a3cd5335057d9c391f686cfcf68c7




More information about the pld-cvs-commit mailing list