[packages/lttoolbox] - added svn20130412 patch (some features needed by apertium-lex-tools) - added soname patch (bump so

qboosh qboosh at pld-linux.org
Wed Jun 26 17:33:42 CEST 2013


commit 4306e092da881c50b399a1865e0713d856e4a0ec
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date:   Wed Jun 26 17:33:24 2013 +0200

    - added svn20130412 patch (some features needed by apertium-lex-tools)
    - added soname patch (bump soname because svn changes change ABI)
    - relase 2.20130412.1

 lttoolbox-soname.patch      |   11 +
 lttoolbox-svn20130412.patch | 3112 +++++++++++++++++++++++++++++++++++++++++++
 lttoolbox.spec              |   16 +-
 3 files changed, 3135 insertions(+), 4 deletions(-)
---
diff --git a/lttoolbox.spec b/lttoolbox.spec
index e1535f7..72b4a69 100644
--- a/lttoolbox.spec
+++ b/lttoolbox.spec
@@ -2,12 +2,16 @@ Summary:	Augmented letter transducer tools for natural language processing
 Summary(pl.UTF-8):	Narzędzia do przetwarzania słów w językach naturalnych
 Name:		lttoolbox
 Version:	3.2.0
-Release:	1
+%define	subver	svn20130412
+%define	rel	1
+Release:	2.%{subver}.1
 License:	GPL v2+
 Group:		Applications/Text
 Source0:	http://downloads.sourceforge.net/apertium/%{name}-%{version}.tar.gz
 # Source0-md5:	708e7de837ed363f7103035ef2849fe4
-Patch0:		%{name}-opt.patch
+Patch0:		%{name}-svn20130412.patch
+Patch1:		%{name}-soname.patch
+Patch2:		%{name}-opt.patch
 URL:		http://wiki.apertium.org/wiki/Lttoolbox
 BuildRequires:	autoconf >= 2.52
 BuildRequires:	automake
@@ -58,7 +62,9 @@ Statyczna biblioteka lttoolbox.
 
 %prep
 %setup -q
-%patch0 -p1
+%patch0 -p0
+%patch1 -p1
+%patch2 -p1
 
 %build
 %{__libtoolize}
@@ -87,14 +93,16 @@ rm -rf $RPM_BUILD_ROOT
 %doc AUTHORS ChangeLog NEWS README
 %attr(755,root,root) %{_bindir}/lt-comp
 %attr(755,root,root) %{_bindir}/lt-expand
+%attr(755,root,root) %{_bindir}/lt-print
 %attr(755,root,root) %{_bindir}/lt-proc
 %attr(755,root,root) %{_bindir}/lt-tmxcomp
 %attr(755,root,root) %{_bindir}/lt-tmxproc
 %attr(755,root,root) %{_libdir}/liblttoolbox3-3.2.so.*.*.*
-%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.0
+%attr(755,root,root) %ghost %{_libdir}/liblttoolbox3-3.2.so.1
 %{_datadir}/lttoolbox
 %{_mandir}/man1/lt-comp.1*
 %{_mandir}/man1/lt-expand.1*
+%{_mandir}/man1/lt-print.1*
 %{_mandir}/man1/lt-proc.1*
 %{_mandir}/man1/lt-tmxcomp.1*
 %{_mandir}/man1/lt-tmxproc.1*
diff --git a/lttoolbox-soname.patch b/lttoolbox-soname.patch
new file mode 100644
index 0000000..973caac
--- /dev/null
+++ b/lttoolbox-soname.patch
@@ -0,0 +1,11 @@
+--- lttoolbox-3.2.0/configure.ac.orig	2013-06-26 16:15:39.881717927 +0200
++++ lttoolbox-3.2.0/configure.ac	2013-06-26 16:23:06.398365855 +0200
+@@ -23,7 +23,7 @@
+ AC_SUBST(GENERIC_MAJOR_VERSION)
+ 
+ # Shared library versioning
+-GENERIC_LIBRARY_VERSION=0:0:0
++GENERIC_LIBRARY_VERSION=1:0:0
+ #                       | | |
+ #                +------+ | +---+
+ #                |        |     |
diff --git a/lttoolbox-svn20130412.patch b/lttoolbox-svn20130412.patch
new file mode 100644
index 0000000..71dc646
--- /dev/null
+++ b/lttoolbox-svn20130412.patch
@@ -0,0 +1,3112 @@
+Index: lttoolbox/lt-proc.1
+===================================================================
+--- lttoolbox/lt-proc.1	(revision 21745)
++++ lttoolbox/lt-proc.1	(working copy)
+@@ -12,7 +12,9 @@
+ [
+ .B \-a \fR| 
+ .B \-b \fR| 
++.B \-o \fR| 
+ .B \-c \fR| 
++.B \-d \fR| 
+ .B \-e \fR| 
+ .B \-g \fR|
+ .B \-n \fR|
+@@ -29,7 +31,10 @@
+ [
+ .B \-\-analysis \fR| 
+ .B \-\-bilingual \fR|
++.B \-\-surf-bilingual \fR|
+ .B \-\-case-sensitive \fR|
++.B \-\-debugged-gen \fR|
++.B \-\-decompose-nouns \fR|
+ .B \-\-generation \fR|
+ .B \-\-non-marked-gen \fR|
+ .B \-\-tagged-gen \fR|
+@@ -98,9 +103,18 @@
+ form in the source language. Works tipically with the output of
+ apertium-pretransfer.
+ .TP
++.B \-o, \-\-surf-bilingual
++As with \-b, but takes input from apertium\-tagger \-p , with
++surface forms, and if the lexical form is not found in the bilingual
++dictionary, it outputs the surface form of the word. 
++.TP
++
+ .B \-c, \-\-case-sensitive
+ Use the literal case of the incoming characters
+ .TP
++.B \-d, \-\-debugged-gen
++Morph. generation with all the stuff
++.TP
+ .B \-e, \-\-decompose-compounds
+ Try to treat unknown words as compounds, and decompose them.
+ .TP
+@@ -154,5 +168,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante.
+Index: lttoolbox/fst_processor.cc
+===================================================================
+--- lttoolbox/fst_processor.cc	(revision 21745)
++++ lttoolbox/fst_processor.cc	(working copy)
+@@ -44,14 +44,17 @@
+   
+   caseSensitive = false;
+   dictionaryCase = false;
+-  compoundDecomposition = false;
++  do_decomposition = false;
+   nullFlush = false;
+   nullFlushGeneration = false;
++  showControlSymbols = false;
++  biltransSurfaceForms = false;  
++  compoundOnlyLSymbol = 0;
++  compoundRSymbol = 0;
++  compound_max_elements = 4;
+ 
+-  pool = new Pool<vector<int> >(4, vector<int>(50));
+-
+-  initial_state = new State(pool);
+-  current_state = new State(pool);
++  initial_state = new State();
++  current_state = new State();
+ }
+ 
+ FSTProcessor::~FSTProcessor()
+@@ -58,7 +61,6 @@
+ {
+   delete current_state;
+   delete initial_state;
+-  delete pool;
+ }
+ 
+ void
+@@ -408,6 +410,100 @@
+   return 0x7fffffff;
+ }
+ 
++pair<wstring, int>
++FSTProcessor::readBilingual(FILE *input, FILE *output)
++{
++  wint_t val = fgetwc_unlocked(input);
++  wstring symbol = L"";
++
++  if(feof(input))
++  {
++    return pair<wstring, int>(symbol, 0x7fffffff);
++  }
++  
++  if(outOfWord)
++  {
++    if(val == L'^')
++    {
++      val = fgetwc_unlocked(input);
++      if(feof(input))
++      {
++        return pair<wstring, int>(symbol, 0x7fffffff);
++      }
++    }
++    else if(val == L'\\')
++    {
++      fputwc_unlocked(val, output);
++      val = fgetwc_unlocked(input);
++      if(feof(input))
++      {
++        return pair<wstring, int>(symbol, 0x7fffffff);
++      }
++      fputwc_unlocked(val,output);
++      skipUntil(input, output, L'^');
++      val = fgetwc_unlocked(input);
++      if(feof(input))
++      {
++        return pair<wstring, int>(symbol, 0x7fffffff);
++      }
++    }
++    else
++    {
++      fputwc_unlocked(val, output);
++      skipUntil(input, output, L'^');
++      val = fgetwc_unlocked(input);
++      if(feof(input))
++      {
++        return pair<wstring, int>(symbol, 0x7fffffff);
++      }
++    }
++    outOfWord = false;
++  }
++
++  if(val == L'\\')
++  {
++    val = fgetwc_unlocked(input);
++    return pair<wstring, int>(symbol, val);
++  }
++  else if(val == L'$')
++  {
++    outOfWord = true;
++    return pair<wstring, int>(symbol, static_cast<int>(L'$'));
++  }
++  else if(val == L'<')
++  {
++    wstring cad = L"";
++    cad += static_cast<wchar_t>(val);
++    while((val = fgetwc_unlocked(input)) != L'>')
++    {
++      if(feof(input))
++      {
++	streamError();
++      }
++      cad += static_cast<wchar_t>(val);
++    }
++    cad += static_cast<wchar_t>(val);
++
++    int res = alphabet(cad);
++    
++    if (res == 0) {
++	    symbol = cad;
++    }
++    return pair<wstring, int>(symbol, res);
++  }
++  else if(val == L'[')
++  {
++    fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
++    return readBilingual(input, output);
++  }
++  else
++  {
++    return pair<wstring, int>(symbol, val);
++  }
++
++  return pair<wstring, int>(symbol, 0x7fffffff);
++}
++
+ void
+ FSTProcessor::flushBlanks(FILE *output)
+ {  
+@@ -494,6 +590,27 @@
+ }
+ 
+ void
++FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
++{
++  for(unsigned int i = 0, limit = str.size(); i < limit; i++)
++  {    
++    if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
++    {
++      fputws_unlocked(str.substr(i).c_str(), output);
++      return;
++    }
++
++    if(escaped_chars.find(str[i]) != escaped_chars.end())
++    {
++      fputwc_unlocked(L'\\', output);
++    }
++    fputwc_unlocked(str[i], output);
++  } 
++}
++
++
++
++void
+ FSTProcessor::printWord(wstring const &sf, wstring const &lf, FILE *output)
+ {
+   fputwc_unlocked(L'^', output);
+@@ -642,7 +759,86 @@
+   initGeneration();
+ }
+ 
++
+ wstring
++FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupper) {
++    const int MAX_COMBINATIONS = 500;
++    //wcerr << L"compoundAnalysis(input_word = " << input_word << L")" << endl;
++
++    State current_state = *initial_state;
++
++    for(unsigned int i=0; i<input_word.size(); i++) {
++        wchar_t val=input_word.at(i);
++
++        //wcerr << val << L" før step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
++        current_state.step_case(val, caseSensitive);
++        
++        if(current_state.size() > MAX_COMBINATIONS) {
++            wcerr << L"Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << L"'" << endl;
++            wcerr << L"         gave up at char " << i << L" '" << val << L"'." << endl;
++
++            wstring nullString = L"";
++            return  nullString;
++        }
++
++        //wcerr << val << L" eft step " << i << L" current_state = " << current_state.getReadableString(alphabet) << endl;
++
++        if(i < input_word.size()-1)
++            current_state.restartFinals(all_finals, compoundOnlyLSymbol, initial_state, '+');
++        
++        //wcerr << val << " eft rest " << i << " current_state = " << current_state.getReadableString(alphabet) << endl;
++        //wcerr << i << " result = "  << current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper) << endl;
++        //wcerr << i << " -- size = " << current_state.size() << endl;
++
++        if(current_state.size()==0) {
++            wstring nullString = L"";
++            return nullString;
++        }
++    }
++
++    current_state.pruneCompounds(compoundRSymbol, '+', compound_max_elements);
++    wstring result = current_state.filterFinals(all_finals, alphabet, escaped_chars, uppercase, firstupper);
++    //wcerr << L"rrresult = " << result << endl;
++    
++    return result;
++}
++
++
++
++void 
++FSTProcessor::initDecompositionSymbols() {
++  if ((compoundOnlyLSymbol=alphabet(L"<:co:only-L>")) == 0
++     && (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0
++     && (compoundOnlyLSymbol=alphabet(L"<@co:only-L>")) == 0
++     && (compoundOnlyLSymbol=alphabet(L"<@compound:only-L>")) == 0
++     && (compoundOnlyLSymbol=alphabet(L"<compound-only-L>")) == 0)
++  {
++    wcerr << L"Warning: Decomposition symbol <:compound:only-L> not found" << endl;
++  }
++  else if (!showControlSymbols)
++      alphabet.setSymbol(compoundOnlyLSymbol, L"");
++
++  if ((compoundRSymbol=alphabet(L"<:co:R>")) == 0
++     && (compoundRSymbol=alphabet(L"<:compound:R>")) == 0
++     && (compoundRSymbol=alphabet(L"<@co:R>")) == 0
++     && (compoundRSymbol=alphabet(L"<@compound:R>")) == 0
++     && (compoundRSymbol=alphabet(L"<compound-R>")) == 0) 
++  {
++    wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl;
++  }
++  else if (!showControlSymbols)
++      alphabet.setSymbol(compoundRSymbol, L"");
++}
++
++
++void 
++FSTProcessor::initDecomposition() {
++  do_decomposition = true;
++  initAnalysis();
++  initDecompositionSymbols();
++}
++
++/*wstring
+ FSTProcessor::decompose(wstring w) 
+ {
+         State current_state = *initial_state;
+@@ -807,7 +1003,7 @@
+         }
+         //wcerr << L"+ decompose: " << lf << endl;
+         return lf;
+-}
++}*/
+ 
+ void
+ FSTProcessor::analysis(FILE *input, FILE *output)
+@@ -839,6 +1035,10 @@
+           uppercase = firstupper && iswupper(sf[sf.size()-1]);
+         }
+ 
++        if(do_decomposition && compoundOnlyLSymbol != 0) 
++        {
++          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++        }
+         lf = current_state.filterFinals(all_finals, alphabet,
+                                         escaped_chars,
+                                         uppercase, firstupper);
+@@ -853,6 +1053,10 @@
+           uppercase = firstupper && iswupper(sf[sf.size()-1]);
+         }
+ 
++        if(do_decomposition && compoundOnlyLSymbol != 0) 
++        {
++          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++        }
+         lf = current_state.filterFinals(all_finals, alphabet,
+                                         escaped_chars,
+                                         uppercase, firstupper);
+@@ -867,6 +1071,10 @@
+           uppercase = firstupper && iswupper(sf[sf.size()-1]);
+         }
+ 
++        if(do_decomposition && compoundOnlyLSymbol != 0) 
++        {
++          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++        }
+         lf = current_state.filterFinals(all_finals, alphabet,
+                                         escaped_chars,
+                                         uppercase, firstupper);
+@@ -881,6 +1089,10 @@
+           uppercase = firstupper && iswupper(sf[sf.size()-1]);
+         }
+ 
++        if(do_decomposition && compoundOnlyLSymbol != 0) 
++        {
++          current_state.pruneStatesWithForbiddenSymbol(compoundOnlyLSymbol);
++        }
+         lf = current_state.filterFinals(all_finals, alphabet, 
+                                         escaped_chars, 
+                                         uppercase, firstupper);
+@@ -969,16 +1181,22 @@
+         if(limit == 0)
+         {
+           input_buffer.back(sf.size());
+-          fputwc_unlocked(sf[0], output);          
++          writeEscaped(sf.substr(0,1), output);          
+         }
+         else
+         { 
+           input_buffer.back(1+(size-limit));
+           wstring unknown_word = sf.substr(0, limit);
+-          if(compoundDecomposition) 
++          if(do_decomposition) 
+           {
++            if(!dictionaryCase)
++            {
++              firstupper = iswupper(sf[0]);
++              uppercase = firstupper && iswupper(sf[sf.size()-1]);
++            }
++
+             wstring compound = L"";
+-            compound = decompose(unknown_word);
++            compound = compoundAnalysis(unknown_word, uppercase, firstupper);
+             if(compound != L"") 
+             {
+               printWord(unknown_word, compound, output);
+@@ -1002,16 +1220,22 @@
+         if(limit == 0)
+         {
+           input_buffer.back(sf.size());
+-          fputwc_unlocked(sf[0], output);          
++          writeEscaped(sf.substr(0,1), output);
+         }
+         else
+         { 
+           input_buffer.back(1+(size-limit));
+           wstring unknown_word = sf.substr(0, limit);
+-          if(compoundDecomposition) 
++          if(do_decomposition) 
+           {
++            if(!dictionaryCase)
++            {
++              firstupper = iswupper(sf[0]);
++              uppercase = firstupper && iswupper(sf[sf.size()-1]);
++            }
++
+             wstring compound = L"";
+-            compound = decompose(unknown_word);
++            compound = compoundAnalysis(unknown_word, uppercase, firstupper);
+             if(compound != L"") 
+             {
+               printWord(unknown_word, compound, output);
+@@ -1296,19 +1520,27 @@
+       fputwc(L'=', output);
+       val = readGeneration(input, output);
+     }
+-    
++
+     if(val == L'$' && outOfWord)
+     {
+       if(sf[0] == L'*' || sf[0] == L'%')
+       {
+-	if(mode != gm_clean)
++	if(mode != gm_clean && mode != gm_tagged_nm)
+         {
+ 	  writeEscaped(sf, output);
+ 	}
+-	else
++	else if (mode == gm_clean)
+ 	{
+ 	  writeEscaped(sf.substr(1), output);
+ 	}
++	else if(mode == gm_tagged_nm)
++	{
++	  fputwc_unlocked(L'^', output);        
++	  writeEscaped(removeTags(sf.substr(1)), output);
++	  fputwc_unlocked(L'/', output);          
++          writeEscapedWithTags(sf, output);
++	  fputwc_unlocked(L'$', output);
++	}
+       }
+       else if(sf[0] == L'@')
+       {
+@@ -1324,6 +1556,18 @@
+         {
+           writeEscaped(removeTags(sf), output);
+         }
++        else if(mode == gm_tagged)
++        {
++          writeEscaped(removeTags(sf), output);
++        }
++        else if(mode == gm_tagged_nm)
++        { 
++	  fputwc_unlocked(L'^', output);        
++	  writeEscaped(removeTags(sf.substr(1)), output);
++	  fputwc_unlocked(L'/', output);          
++          writeEscapedWithTags(sf, output);
++	  fputwc_unlocked(L'$', output);
++        }
+       }
+       else if(current_state.isFinal(all_finals))
+       {
+@@ -1330,7 +1574,7 @@
+         bool uppercase = sf.size() > 1 && iswupper(sf[1]);
+         bool firstupper= iswupper(sf[0]);
+ 
+-        if(mode == gm_tagged)
++        if(mode == gm_tagged || mode == gm_tagged_nm)
+         {
+ 	  fputwc_unlocked(L'^', output);
+         }
+@@ -1339,10 +1583,10 @@
+                                                   escaped_chars,
+                                                   uppercase, firstupper).substr(1).c_str(),
+ 						  output);
+-        if(mode == gm_tagged)
++        if(mode == gm_tagged || mode == gm_tagged_nm)
+         {
+ 	  fputwc_unlocked(L'/', output);
+-	  fputws_unlocked(sf.c_str(), output);
++	  writeEscapedWithTags(sf, output);
+ 	  fputwc_unlocked(L'$', output);
+         }
+ 
+@@ -1360,9 +1604,26 @@
+         }
+         else if(mode == gm_unknown)
+         {
++          if(sf != L"")
++          {
++            fputwc_unlocked(L'#', output);
++            writeEscaped(removeTags(sf), output);
++          }
++        }
++        else if(mode == gm_tagged)
++        {
+           fputwc_unlocked(L'#', output);
+           writeEscaped(removeTags(sf), output);
+         }
++        else if(mode == gm_tagged_nm)
++        {
++	  fputwc_unlocked(L'^', output);        
++	  writeEscaped(removeTags(sf), output);
++	  fputwc_unlocked(L'/', output);          
++	  fputwc_unlocked(L'#', output);          
++          writeEscapedWithTags(sf, output);
++	  fputwc_unlocked(L'$', output);
++        }
+       }
+   
+       current_state = *initial_state;
+@@ -2033,19 +2294,62 @@
+   }
+ 
+   State current_state = *initial_state;
+-  wstring sf = L"";
+-  wstring queue = L"";
+-  wstring result = L"";
++  wstring sf = L"";		// source language analysis
++  wstring queue = L"";		// symbols to be added to each target
++  wstring result = L"";		// result of looking up analysis in bidix
+   
+   outOfWord = false;
+  
+   skipUntil(input, output, L'^');
+-  int val;
++  pair<wstring,int> tr;		// readBilingual return value, containing:
++  int val;			// the alphabet value of current symbol, and
++  wstring symbol = L"";		// the current symbol as a string
++  bool seentags = false;  // have we seen any tags at all in the analysis?
+ 
+-  while((val = readGeneration(input, output)) != 0x7fffffff)
++  bool seensurface = false;
++  wstring surface = L"";
++  
++  while(true)			// ie. while(val != 0x7fffffff)
+   {
++    tr = readBilingual(input, output);
++    symbol = tr.first;
++    val = tr.second;
++
++    //fwprintf(stderr, L"> %S : %C : %d\n", tr.first.c_str(), tr.second, tr.second);
++    if(biltransSurfaceForms && !seensurface && !outOfWord) 
++    {
++      while(val != L'/' && val != 0x7fffffff) 
++      {
++        surface = surface + symbol; 
++        alphabet.getSymbol(surface, val);
++        tr = readBilingual(input, output);
++        symbol = tr.first;
++        val = tr.second;
++        //fwprintf(stderr, L" == %S : %C : %d => %S\n", symbol.c_str(), val, val, surface.c_str());
++      }
++      seensurface = true;
++      tr = readBilingual(input, output);
++      symbol = tr.first;
++      val = tr.second;
++    }
++
++    if (val == 0x7fffffff) 
++    {
++      break;
++    }
++    
+     if(val == L'$' && outOfWord)
+     {
++      if(!seentags)        // if no tags: only return complete matches
++      {
++        bool uppercase = sf.size() > 1 && iswupper(sf[1]);
++        bool firstupper= iswupper(sf[0]);
++
++        result = current_state.filterFinals(all_finals, alphabet,
++                                            escaped_chars,
++                                            uppercase, firstupper, 0);
++      }
++      
+       if(sf[0] == L'*')
+       {
+         printWordBilingual(sf, L"/"+sf, output);
+@@ -2055,14 +2359,23 @@
+         printWordBilingual(sf, compose(result, queue), output);
+       }
+       else
+-      {
+-        printWordBilingual(sf, L"/@"+sf, output);
++      { //xxx
++        if(biltransSurfaceForms) 
++        {
++          printWordBilingual(surface, L"/@"+surface, output);
++        }
++        else
++        { 
++          printWordBilingual(sf, L"/@"+sf, output);
++        }
+       }
+-  
++      seensurface = false;
++      surface = L""; 
+       queue = L"";
+       result = L"";
+       current_state = *initial_state;
+       sf = L"";
++      seentags = false;
+     }
+     else if(iswspace(val) && sf.size() == 0)
+     {
+@@ -2074,7 +2387,11 @@
+       {
+         sf += L'\\';
+       }
+-      alphabet.getSymbol(sf, val);
++      alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
++      if(val == 0)  // non-alphabetic, possibly unknown tag; add to sf
++      {
++	sf += symbol;
++      }      
+     }
+     else
+     {
+@@ -2082,7 +2399,15 @@
+       {
+         sf += L'\\';
+       }
+-      alphabet.getSymbol(sf,val);
++      alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
++      if(val == 0)  // non-alphabetic, possibly unknown tag; add to sf
++      {
++	sf += symbol;
++      }
++      if(alphabet.isTag(val) || val == 0) 
++      {
++        seentags = true;
++      }      
+       if(current_state.size() != 0)
+       {
+ 	if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+@@ -2105,12 +2430,21 @@
+       }
+       if(current_state.size() == 0 && result != L"")
+       {
+-        if(alphabet.isTag(val))
++        // We already have a result, but there is still more to read
++        // of the analysis; following tags are not consumed, but
++        // output as target language tags (added to result on
++        // end-of-word)
++        if(alphabet.isTag(val)) // known tag
+         {
+           alphabet.getSymbol(queue, val);
+         }
++        else if (val == 0) // non-alphabetic, possibly unknown tag
++        {
++          queue += symbol;
++        }
+         else
+         {
++          // There are no more alive transductions and the current symbol is not a tag -- unknown word!
+           result = L"";
+         }
+       }
+@@ -2127,6 +2461,7 @@
+   unsigned int end_point = input_word.size()-2;
+   wstring queue = L"";
+   bool mark = false;
++  bool seentags = false;  // have we seen any tags at all in the analysis?
+   
+   if(with_delim == false)
+   {
+@@ -2160,6 +2495,7 @@
+     }
+     else if(input_word[i] == L'<')
+     {
++      seentags = true;
+       symbol = L'<';
+       for(unsigned int j = i + 1; j <= end_point; j++)
+       {
+@@ -2217,7 +2553,7 @@
+     }
+     
+     if(current_state.size() == 0)
+-    { 
++    {
+       if(symbol != L"" && result != L"")
+       {
+         queue.append(symbol);
+@@ -2224,20 +2560,39 @@
+       }
+       else
+       {
+-	// word is not present
++        // word is not present
+         if(with_delim)
+-	{
++        {
+           result = L"^@" + input_word.substr(1);  
+-	}
++        }
+         else
+-	{
++        {
+           result = L"@" + input_word;
+-	}
++        }
+         return pair<wstring, int>(result, 0);  
+       }      
+     }
+   }
+ 
++  if (!seentags
++      && L"" == current_state.filterFinals(all_finals, alphabet,
++                                           escaped_chars,
++                                           uppercase, firstupper, 0)) 
++  {
++    // word is not present
++    if(with_delim)
++    {
++      result = L"^@" + input_word.substr(1);  
++    }
++    else
++    {
++      result = L"@" + input_word;
++    }
++    return pair<wstring, int>(result, 0);  
++  }
++        
++
++
+   // attach unmatched queue automatically
+ 
+   if(queue != L"")
+@@ -2661,10 +3016,11 @@
+   return str;
+ }
+ 
++
+ void
+-FSTProcessor::setDecompoundingMode(bool const value)
++FSTProcessor::setBiltransSurfaceForms(bool const value)
+ {
+-  compoundDecomposition = value;
++  biltransSurfaceForms = value;
+ }
+ 
+ void
+@@ -2688,7 +3044,7 @@
+ bool
+ FSTProcessor::getDecompoundingMode()
+ {
+-  return compoundDecomposition;
++  return do_decomposition;
+ }
+ 
+ bool
+Index: lttoolbox/lt_comp.cc
+===================================================================
+--- lttoolbox/lt_comp.cc	(revision 21745)
++++ lttoolbox/lt_comp.cc	(working copy)
+@@ -23,6 +23,7 @@
+ #include <iostream>
+ #include <libgen.h>
+ #include <string>
++#include <getopt.h>
+ 
+ using namespace std;
+ 
+@@ -31,7 +32,11 @@
+   if(name != NULL)
+   {
+     cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a dictionary" << endl;
+-    cout << "USAGE: " << basename(name) << " lr | rl dictionary_file output_file [acx_file]" << endl;
++    cout << "USAGE: " << basename(name) << " [-avh] lr | rl dictionary_file output_file [acx_file]" << endl;
++    cout << "  -v:     set language variant" << endl;
++    cout << "  -a:     set alternative (monodix)" << endl;
++    cout << "  -l:     set left language variant (bidix)" << endl;
++    cout << "  -r:     set right language variant (bidix)" << endl;
+     cout << "Modes:" << endl;
+     cout << "  lr:     left-to-right compilation" << endl;
+     cout << "  rl:     right-to-left compilation" << endl;
+@@ -42,27 +47,113 @@
+ 
+ int main(int argc, char *argv[])
+ {
+-  if(argc != 4 && argc != 5)
++  Compiler c;
++  c.setVerbose(false);
++  
++#if HAVE_GETOPT_LONG
++  int option_index=0;
++#endif
++
++  string vl;
++  string vr;
++
++  while (true) {
++#if HAVE_GETOPT_LONG
++    static struct option long_options[] =
++    {
++      {"alt",       required_argument, 0, 'a'},
++      {"var",       required_argument, 0, 'v'},
++      {"var-left",  required_argument, 0, 'l'},
++      {"var-right", required_argument, 0, 'r'},
++      {"help",      no_argument,       0, 'h'}, 
++      {"verbose",   no_argument,       0, 'V'}, 
++      {0, 0, 0, 0}
++    };
++
++    int cnt=getopt_long(argc, argv, "a:v:l:r:hV", long_options, &option_index);
++#else
++    int cnt=getopt(argc, argv, "a:v:l:r:hV");
++#endif
++    if (cnt==-1)
++      break;
++
++    switch (cnt)
++    {
++      case 'a':
++        c.setAltValue(optarg);
++        break;
++
++      case 'v':
++        c.setVariantValue(optarg);
++        break;
++
++      case 'l':
++        vl = optarg;
++        c.setVariantLeftValue(vl);
++        break;
++
++      case 'r':
++        vr = optarg;
++        c.setVariantRightValue(vr);
++        break;
++
++      case 'V':
++        c.setVerbose(true);
++        break;
++
++      case 'h':
++      default:
++        endProgram(argv[0]);
++        break;
++    }
++  }
++
++  string opc;
++  string infile;
++  string outfile;
++  string acxfile;
++
++  switch(argc - optind + 1)
+   {
+-    endProgram(argv[0]);
++    case 5:
++      opc = argv[argc-4];
++      infile = argv[argc-3];
++      outfile = argv[argc-2];
++      acxfile = argv[argc-1];
++      break;
++
++    case 4:
++      opc = argv[argc-3];
++      infile = argv[argc-2];
++      outfile = argv[argc-1];
++      break;
++
++    default:
++      endProgram(argv[0]);
++      break;
+   }
+ 
+-  string opc = argv[1];
+-
+-  Compiler c;
+-  
+-  
+   if(opc == "lr")
+   {
+-    if(argc == 5)
++    if(vr == "" && vl != "")
+     {
+-      c.parseACX(argv[4], Compiler::COMPILER_RESTRICTION_LR_VAL);
++      cout << "Error: -l specified, but mode is lr" << endl;
++      endProgram(argv[0]);
+     }
+-    c.parse(argv[2], Compiler::COMPILER_RESTRICTION_LR_VAL);
++    if(acxfile != "")
++    {
++      c.parseACX(acxfile, Compiler::COMPILER_RESTRICTION_LR_VAL);
++    }
++    c.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL);
+   }
+   else if(opc == "rl")
+   {
+-    c.parse(argv[2], Compiler::COMPILER_RESTRICTION_RL_VAL);
++    if(vl == "" && vr != "")
++    {
++      cout << "Error: -r specified, but mode is rl" << endl;
++      endProgram(argv[0]);
++    }
++    c.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL);
+   }
+   else
+   {
+@@ -69,10 +160,10 @@
+     endProgram(argv[0]);
+   }
+ 
+-  FILE *output = fopen(argv[3], "wb");
++  FILE *output = fopen(outfile.c_str(), "wb");
+   if(!output)
+   {
+-    cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
++    cerr << "Error: Cannot open file '" << outfile << "'." << endl;
+     exit(EXIT_FAILURE);
+   }
+   c.write(output);
+Index: lttoolbox/fst_processor.h
+===================================================================
+--- lttoolbox/fst_processor.h	(revision 21745)
++++ lttoolbox/fst_processor.h	(working copy)
+@@ -43,7 +43,8 @@
+   gm_clean,      // clear all
+   gm_unknown,    // display unknown words, clear transfer and generation tags
+   gm_all,        // display all
+-  gm_tagged      // tagged generation
++  gm_tagged,     // tagged generation
++  gm_tagged_nm   // clean tagged generation
+ };
+ 
+ /**
+@@ -57,8 +58,6 @@
+    */
+   map<wstring, TransExe, Ltstr> transducers;
+ 
+-  Pool<vector<int> > *pool;
+-
+   /**
+    * Current state of lexical analysis
+    */
+@@ -130,6 +129,12 @@
+   bool outOfWord;
+ 
+   /**
++   * true if we're automatically removing surface forms.
++   */
++  bool biltransSurfaceForms;
++
++
++  /**
+    * if true, makes always difference between uppercase and lowercase
+    * characters
+    */
+@@ -154,9 +159,30 @@
+   /**
+    * try analysing unknown words as compounds
+    */
+-  bool compoundDecomposition;
++  bool do_decomposition;
+ 
+   /**
++   * Symbol of CompoundOnlyL
++   */
++  int compoundOnlyLSymbol;
++
++  /**
++   * Symbol of CompoundR
++   */
++  int compoundRSymbol;
++
++  /**
++   * Show or not the controls symbols (as compoundRSymbol)
++   */
++   bool showControlSymbols;
++
++  /**
++   * Max compound elements
++   * Hard coded for now, but there might come a switch one day
++   */
++  int compound_max_elements;
++
++  /**
+    * Prints an error of input stream and exits
+    */
+   void streamError();
+@@ -219,6 +245,13 @@
+   int readGeneration(FILE *input, FILE *output);
+ 
+   /**
++   * Read text from stream (biltrans version)
++   * @param input the stream to read
++   * @return the queue of 0-symbols, and the next symbol in the stream
++   */
++  pair<wstring, int> readBilingual(FILE *input, FILE *output);
++
++  /**
+    * Read text from stream (SAO version)
+    * @param input the stream to read
+    * @return the next symbol in the stream
+@@ -248,7 +281,17 @@
+    */
+   void writeEscaped(wstring const &str, FILE *output);
+ 
++
+   /**
++   * Write a string to an output stream, escaping all escapable characters
++   * but keeping symbols without escaping
++   * @param str the string to write, escaping characters
++   * @param output the stream to write in
++   */
++  void writeEscapedWithTags(wstring const &str, FILE *output);
++
++
++  /**
+    * Checks if an string ends with a particular suffix
+    * @param str the string to test
+    * @param the searched suffix
+@@ -287,6 +330,8 @@
+    */
+   void printUnknownWord(wstring const &sf, FILE *output);
+ 
++  void initDecompositionSymbols();
++
+   vector<wstring> numbers;
+   int readTMAnalysis(FILE *input);
+ 
+@@ -294,7 +339,7 @@
+   void printSpace(wchar_t const val, FILE *output);
+   void skipUntil(FILE *input, FILE *output, wint_t const character);
+   static wstring removeTags(wstring const &str);
+-  wstring decompose(wstring str);
++  wstring compoundAnalysis(wstring str, bool uppercase, bool firstupper);
+   size_t firstNotAlpha(wstring const &sf);
+ 
+   void analysis_wrapper_null_flush(FILE *input, FILE *output);
+@@ -338,9 +383,9 @@
+ 
+   void setCaseSensitiveMode(bool const value);
+   void setDictionaryCaseMode(bool const value);
++  void setBiltransSurfaceForms(bool const value);
+   void setNullFlush(bool const value);
+   bool getNullFlush();
+-  void setDecompoundingMode(bool const value);
+   bool getDecompoundingMode();
+ };
+ 
+Index: lttoolbox/lt_proc.cc
+===================================================================
+--- lttoolbox/lt_proc.cc	(revision 21745)
++++ lttoolbox/lt_proc.cc	(working copy)
+@@ -36,35 +36,42 @@
+ void endProgram(char *name)
+ {
+   cout << basename(name) << ": process a stream with a letter transducer" << endl;
+-  cout << "USAGE: " << basename(name) << " [-c] [-a|-g|-n|-d|-p|-s|-t|-b] fst_file [input_file [output_file]]" << endl;
++  cout << "USAGE: " << basename(name) << " [ -a | -b | -c | -d | -e | -g | -n | -p | -s | -t | -v | -h -z -w ] fst_file [input_file [output_file]]" << endl;
+   cout << "Options:" << endl;
+ #if HAVE_GETOPT_LONG
+   cout << "  -a, --analysis:         morphological analysis (default behavior)" << endl;
+-  cout << "  -b, --bilingual:        lexical transference" << endl;
++  cout << "  -b, --bilingual:        lexical transfer" << endl;
+   cout << "  -c, --case-sensitive:   use the literal case of the incoming characters" << endl;
++  cout << "  -d, --debugged-gen      morph. generation with all the stuff" <<endl;
++  cout << "  -e, --decompose-nouns:  Try to decompound unknown words" << endl;
+   cout << "  -g, --generation:       morphological generation" << endl;
++  cout << "  -l, --tagged-gen:       morphological generation keeping lexical forms" << endl;
++  cout << "  -m, --tagged-nm-gen:    same as -l but without unknown word marks" << endl;                              
+   cout << "  -n, --non-marked-gen    morph. generation without unknown word marks" << endl;
+-  cout << "  -d, --debugged-gen      morph. generation with all the stuff" <<endl;
++  cout << "  -o, --surf-bilingual:   lexical transfer with surface forms" << endl;
+   cout << "  -p, --post-generation:  post-generation" << endl;
+-  cout << "  -e, --decompose-compounds: try to decompose unknown word as compounds" << endl;
+   cout << "  -s, --sao:              SAO annotation system input processing" << endl;
+   cout << "  -t, --transliteration:  apply transliteration dictionary" << endl;
++  cout << "  -v, --version:          version" << endl;
+   cout << "  -z, --null-flush:       flush output on the null character " << endl;
+   cout << "  -w, --dictionary-case:  use dictionary case instead of surface case" << endl;
+-  cout << "  -v, --version:          version" << endl;
+   cout << "  -h, --help:             show this help" << endl;
+ #else
+   cout << "  -a:   morphological analysis (default behavior)" << endl;
++  cout << "  -b:   lexical transfer" << endl;
+   cout << "  -c:   use the literal case of the incoming characters" << endl;
++  cout << "  -d:   morph. generation with all the stuff" << endl;
++  cout << "  -e:   try to decompose unknown words as compounds" << endl;
+   cout << "  -g:   morphological generation" << endl;
++  cout << "  -l:   morphological generation keeping lexical forms" << endl;
+   cout << "  -n:   morph. generation without unknown word marks" << endl;
++  cout << "  -o:   lexical transfer with surface forms" << endl;
+   cout << "  -p:   post-generation" << endl;
+-  cout << "  -e:   try to decompose unknown words as compounds" << endl;
+   cout << "  -s:   SAO annotation system input processing" << endl;
+   cout << "  -t:   apply transliteration dictionary" << endl;
++  cout << "  -v:   version" << endl;
+   cout << "  -z:   flush output on the null character " << endl;
+   cout << "  -w:   use dictionary case instead of surface case" << endl;
+-  cout << "  -v:   version" << endl;
+   cout << "  -h:   show this help" << endl;
+ #endif
+   exit(EXIT_FAILURE);
+@@ -88,10 +95,12 @@
+     {
+       {"analysis",        0, 0, 'a'},
+       {"bilingual",       0, 0, 'b'},
++      {"surf-bilingual",  0, 0, 'o'},
+       {"generation",      0, 0, 'g'},
+       {"non-marked-gen",  0, 0, 'n'},
+       {"debugged-gen",    0, 0, 'd'},
+       {"tagged-gen",      0, 0, 'l'},
++      {"tagged-nm-gen",   0, 0, 'm'},
+       {"post-generation", 0, 0, 'p'},
+       {"sao",             0, 0, 's'},
+       {"transliteration", 0, 0, 't'},
+@@ -107,9 +116,9 @@
+   {
+ #if HAVE_GETOPT_LONG
+     int option_index;
+-    int c = getopt_long(argc, argv, "abceglndpstzwvh", long_options, &option_index);
++    int c = getopt_long(argc, argv, "abceglmndopstzwvh", long_options, &option_index);
+ #else
+-    int c = getopt(argc, argv, "abceglndpstzwvh");
++    int c = getopt(argc, argv, "abceglmndopstzwvh");
+ #endif    
+ 
+     if(c == -1)
+@@ -123,13 +132,12 @@
+       fstp.setCaseSensitiveMode(true);
+       break;
+ 
+-    case 'e':
+-      fstp.setDecompoundingMode(true);
+-      break;
+-      
++    case 'e':      
+     case 'a':
+     case 'b':
++    case 'o':
+     case 'l':
++    case 'm':
+     case 'g':
+     case 'n':
+     case 'd':
+@@ -248,11 +256,19 @@
+         fstp.initGeneration();
+         checkValidity(fstp);
+         fstp.generation(input, output, gm_all);
++        break;
+       
+       case 'l':
+         fstp.initGeneration();
+         checkValidity(fstp);
+         fstp.generation(input, output, gm_tagged);
++        break;
++
++      case 'm':
++        fstp.initGeneration();
++        checkValidity(fstp);
++        fstp.generation(input, output, gm_tagged_nm);
++        break;
+       
+       case 'p':
+         fstp.initPostgeneration();
+@@ -272,11 +288,24 @@
+         fstp.transliteration(input, output);
+         break;
+         
++      case 'o':
++        fstp.initBiltrans();
++        checkValidity(fstp);
++        fstp.setBiltransSurfaceForms(true);
++        fstp.bilingual(input, output);
++        break;
++   
+       case 'b':
+         fstp.initBiltrans();
+         checkValidity(fstp);
+         fstp.bilingual(input, output);
+         break;
++
++      case 'e':
++        fstp.initDecomposition();
++        checkValidity(fstp);
++        fstp.analysis(input, output);
++        break;
+       
+       case 'a':
+       default:
+Index: lttoolbox/expander.cc
+===================================================================
+--- lttoolbox/expander.cc	(revision 21745)
++++ lttoolbox/expander.cc	(working copy)
+@@ -295,9 +295,18 @@
+ {
+   wstring atributo=this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
+   wstring entrname=this->attrib(Compiler::COMPILER_LEMMA_ATTR);
++  wstring altval = this->attrib(Compiler::COMPILER_ALT_ATTR);
++  wstring varval = this->attrib(Compiler::COMPILER_V_ATTR);
++  wstring varl   = this->attrib(Compiler::COMPILER_VL_ATTR);
++  wstring varr   = this->attrib(Compiler::COMPILER_VR_ATTR);
+   
+   wstring myname = L"";
+-  if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes")
++  if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes"
++   || altval != L"" && altval != alt
++   || (varval != L"" && varval != variant && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
++   || ((varl != L"" && varl != variant_left) && (varr != L"" && varr != variant_right))
++   || (varl != L"" && varl != variant_left && atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
++   || (varr != L"" && varr != variant_right && atributo == Compiler::COMPILER_RESTRICTION_LR_VAL))
+   {    
+     do
+     {
+@@ -316,11 +325,14 @@
+   }
+   
+   EntList items, items_lr, items_rl;
+-  if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL)
++  if(atributo == Compiler::COMPILER_RESTRICTION_LR_VAL 
++   || (varval != L"" && varval != variant && atributo != Compiler::COMPILER_RESTRICTION_RL_VAL)
++   || varl != L"" && varl != variant_left)
+   {
+     items_lr.push_back(pair<wstring, wstring>(L"", L""));
+   }
+-  else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL)
++  else if(atributo == Compiler::COMPILER_RESTRICTION_RL_VAL
++        || (varr != L"" && varr != variant_right))
+   {
+     items_rl.push_back(pair<wstring, wstring>(L"", L""));
+   }
+@@ -594,3 +606,27 @@
+     it->second.append(endings.second);
+   }
+ }
++
++void
++Expander::setAltValue(string const &a)
++{
++  alt = XMLParseUtil::stows(a);
++}
++
++void
++Expander::setVariantValue(string const &v)
++{
++  variant = XMLParseUtil::stows(v);
++}
++
++void
++Expander::setVariantLeftValue(string const &v)
++{
++  variant_left = XMLParseUtil::stows(v);
++}
++
++void
++Expander::setVariantRightValue(string const &v)
++{
++  variant_right = XMLParseUtil::stows(v);
++}
+Index: lttoolbox/lt-expand.1
+===================================================================
+--- lttoolbox/lt-expand.1	(revision 21745)
++++ lttoolbox/lt-expand.1	(working copy)
+@@ -9,11 +9,28 @@
+ architecture: \fBhttp://www.apertium.org\fR.
+ .SH SYNOPSIS
+ .B lt-expand
++[
++.B \-a \fR| 
++.B \-v \fR| 
++.B \-l \fR| 
++.B \-r \fR| 
++.B \-h
++]
+ dictionary_file [output_file]
+ .PP
++.B lt-expand
++[
++.B \-\-alt \fR| 
++.B \-\-var \fR| 
++.B \-\-var\-left \fR| 
++.B \-\-var\-right \fR| 
++.B \-\-help
++]
++dictionary_file [output_file]
++.PP
+ .SH DESCRIPTION
+ .BR lt-expand 
+-Is the application responsible of expanding a dictionary into a
++Is the application responsible for expanding a dictionary into a
+ simple list of input string-output string pairs by eliminating
+ paradigms through substitution and unfolding.
+ .PP
+@@ -20,6 +37,23 @@
+ The output goes to \fIoutput_file\fR if it is present or to standard
+ output if it is missing.
+ .PP
++.SH OPTIONS
++.TP
++.B \-a, \-\-alt
++Sets the value of the \fIalt\fR attribute to use in expansion
++.TP
++.B \-v, \-\-var
++Sets the value of the \fIv\fR attribute to use in expansion of monodixes
++.TP
++.B \-l, \-\-var\-left
++Sets the value of the \fIvl\fR attribute to use in expansion of bidixes
++.TP
++.B \-r, \-\-var\-right
++Sets the value of the \fIvr\fR attribute to use in expansion of bidixes
++.TP
++.B \-h, \-\-help
++Prints a short help message
++.PP
+ .SH FILES
+ .B dictionary_file
+ The input dictionary to expand.
+@@ -34,5 +68,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
+Index: lttoolbox/dix.dtd
+===================================================================
+--- lttoolbox/dix.dtd	(revision 21745)
++++ lttoolbox/dix.dtd	(working copy)
+@@ -1,4 +1,21 @@
+ <!--
++   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
++  
++   This program is free software; you can redistribute it and/or
++   modify it under the terms of the GNU General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++  
++   This program is distributed in the hope that it will be useful, but
++   WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   General Public License for more details.
++  
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++   02111-1307, USA.
++
+       DTD for the format of dictionaries
+ -->
+ <!ELEMENT dictionary (alphabet?, sdefs?,
+@@ -57,6 +74,10 @@
+ 	i CDATA #IMPLIED
+ 	slr CDATA #IMPLIED
+ 	srl CDATA #IMPLIED
++	alt CDATA #IMPLIED
++	v CDATA #IMPLIED
++	vl CDATA #IMPLIED
++	vr CDATA #IMPLIED
+ >
+ 	<!-- r: restriction LR: left-to-right,
+ 			    RL: right-to-left -->
+@@ -66,6 +87,10 @@
+ 	<!-- i: ignore ('yes') means ignore, otherwise it is not ignored) -->
+ 	<!-- slr: translation sense when translating from left to right -->
+ 	<!-- srl: translation sense when translating from right to left --> 
++	<!-- alt: alternative entries are omitted if not selected -->
++	<!-- v: variant sets (monodix) direction restrictions based on language variant -->
++	<!-- vl: variant left sets direction restrictions based on language variant for language on left of bidix -->
++	<!-- vr: variant right sets direction restrictions based on language variant for language on right of bidix -->
+ <!ELEMENT par EMPTY>
+ 	<!-- reference to paradigm -->
+ <!ATTLIST par
+Index: lttoolbox/compiler.cc
+===================================================================
+--- lttoolbox/compiler.cc	(revision 21745)
++++ lttoolbox/compiler.cc	(working copy)
+@@ -56,6 +56,10 @@
+ wstring const Compiler::COMPILER_LEMMA_ATTR         = L"lm";
+ wstring const Compiler::COMPILER_IGNORE_ATTR        = L"i";
+ wstring const Compiler::COMPILER_IGNORE_YES_VAL     = L"yes";
++wstring const Compiler::COMPILER_ALT_ATTR           = L"alt";
++wstring const Compiler::COMPILER_V_ATTR             = L"v";
++wstring const Compiler::COMPILER_VL_ATTR            = L"vl";
++wstring const Compiler::COMPILER_VR_ATTR            = L"vr";
+ 
+ Compiler::Compiler()
+ {
+@@ -417,6 +421,12 @@
+     }
+   }
+   
++  if(verbose && first_element && (both_sides.front() == (int)L' ')) 
++  {
++    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
++    wcerr << L"): Entry begins with space." << endl; 
++  }
++  first_element = false; 
+   EntryToken e;
+   e.setSingleTransduction(both_sides, both_sides);
+   return e;
+@@ -444,6 +454,13 @@
+       readString(lhs, name);
+     }
+   }
++
++  if(verbose && first_element && (lhs.front() == (int)L' ')) 
++  {
++    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
++    wcerr << L"): Entry begins with space." << endl;
++  }
++  first_element = false;
+  
+   skip(name, COMPILER_RIGHT_ELEM);
+ 
+@@ -480,7 +497,15 @@
+ {
+   EntryToken e;
+   wstring nomparadigma = attrib(COMPILER_N_ATTR);
++  first_element = false;
+ 
++  if(current_paradigm != L"" && nomparadigma == current_paradigm)
++  {
++    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
++    wcerr << L"): Paradigm refers to itself '" << nomparadigma << L"'." <<endl;
++    exit(EXIT_FAILURE);
++  }
++
+   if(paradigms.find(nomparadigma) == paradigms.end())
+   {
+     wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
+@@ -632,9 +657,18 @@
+ {
+   wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
+   wstring ignore = this->attrib(COMPILER_IGNORE_ATTR);
++  wstring altval = this->attrib(COMPILER_ALT_ATTR);
++  wstring varval = this->attrib(COMPILER_V_ATTR);
++  wstring varl   = this->attrib(COMPILER_VL_ATTR);
++  wstring varr   = this->attrib(COMPILER_VR_ATTR);
+ 
+   //�if entry is masked by a restriction of direction or an ignore mark
+-  if((atributo != L"" && atributo != direction) || ignore == COMPILER_IGNORE_YES_VAL)
++  if((atributo != L"" && atributo != direction) 
++   || ignore == COMPILER_IGNORE_YES_VAL
++   || (altval != L"" && altval != alt)
++   || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant)
++   || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left)
++   || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right))
+   {
+     // parse to the end of the entry
+     wstring name = L"";
+@@ -662,6 +696,11 @@
+     wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+     skipBlanks(name);
+ 
++    if(current_paradigm == L"" && verbose)
++    {
++      first_element = true;
++    }
++
+     int tipo = xmlTextReaderNodeType(reader);
+     if(name == COMPILER_PAIR_ELEM)
+     {      
+@@ -845,3 +884,33 @@
+     it->second.write(output);
+   }
+ }
++
++void
++Compiler::setAltValue(string const &a)
++{
++  alt = XMLParseUtil::stows(a);
++}
++
++void
++Compiler::setVariantValue(string const &v)
++{
++  variant = XMLParseUtil::stows(v);
++}
++
++void
++Compiler::setVariantLeftValue(string const &v)
++{
++  variant_left = XMLParseUtil::stows(v);
++}
++
++void
++Compiler::setVariantRightValue(string const &v)
++{
++  variant_right = XMLParseUtil::stows(v);
++}
++
++void
++Compiler::setVerbose(bool verbosity)
++{
++  verbose = verbosity;
++}
+Index: lttoolbox/transducer.h
+===================================================================
+--- lttoolbox/transducer.h	(revision 21745)
++++ lttoolbox/transducer.h	(working copy)
+@@ -146,6 +146,13 @@
+   bool isFinal(int const state) const;
+ 
+   /**
++   * Test if a pattern is recognised by the FST
++   * @param a widestring of the pattern to be recognised
++   * @return true if the pattern is recognised by the transducer
++   */
++  bool recognise(wstring patro, Alphabet &a, FILE *err = stderr);
++
++  /**
+    * Set the state as a final or not, yes by default
+    * @param state the state
+    * @param value if true, the state is set as final state
+@@ -179,6 +186,12 @@
+   void reverse(int const epsilon_tag = 0);
+ 
+   /**
++   * Print all the transductions of a transducer in ATT format
++   * @param epsilon_tag the tag to take as epsilon
++   */
++  void show(Alphabet &a, FILE *output = stdout, int const epsilon_tag = 0);
++
++  /**
+    * Determinize the transducer
+    * @param epsilon_tag the tag to take as epsilon
+    */
+@@ -242,6 +255,12 @@
+   bool isEmpty(int const state) const;
+ 
+   /**
++   * Returns the number of transitions from a given state
++   * @return the number of transitions
++   */
++  int getStateSize(int const state);
++
++  /**
+    * Write method
+    * @param output the stream to write to
+    * @param decalage offset to sum to the tags
+Index: lttoolbox/lt_expand.cc
+===================================================================
+--- lttoolbox/lt_expand.cc	(revision 21745)
++++ lttoolbox/lt_expand.cc	(working copy)
+@@ -24,6 +24,7 @@
+ #include <iostream>
+ #include <libgen.h>
+ #include <string>
++#include <getopt.h>
+ 
+ #ifdef _MSC_VER
+ #include <io.h>
+@@ -37,7 +38,7 @@
+   if(name != NULL)
+   {
+     cout << basename(name) << " v" << PACKAGE_VERSION <<": expand the contents of a dictionary file" << endl;
+-    cout << "USAGE: " << basename(name) << " dictionary_file [output_file]" << endl;
++    cout << "USAGE: " << basename(name) << " [-avlrh] dictionary_file [output_file]" << endl;
+   }
+   exit(EXIT_FAILURE);
+ }
+@@ -45,14 +46,67 @@
+ int main(int argc, char *argv[])
+ {
+   FILE *input = NULL, *output = NULL;
++  Expander e;
+ 
+-  switch(argc)
++#if HAVE_GETOPT_LONG
++  int option_index=0;
++#endif
++
++  while (true) {
++#if HAVE_GETOPT_LONG
++    static struct option long_options[] =
++    {
++      {"alt",       required_argument, 0, 'a'},
++      {"var",       required_argument, 0, 'v'},
++      {"var-left",  required_argument, 0, 'l'},
++      {"var-right", required_argument, 0, 'r'},
++      {"help",      no_argument,       0, 'h'}, 
++      {0, 0, 0, 0}
++    };
++
++    int cnt=getopt_long(argc, argv, "a:v:l:r:h", long_options, &option_index);
++#else
++    int cnt=getopt(argc, argv, "a:v:l:r:h");
++#endif
++    if (cnt==-1)
++      break;
++
++    switch (cnt)
++    {
++      case 'a':
++        e.setAltValue(optarg);
++        break;
++
++      case 'v':
++        e.setVariantValue(optarg);
++        break;
++
++      case 'l':
++        e.setVariantLeftValue(optarg);
++        break;
++
++      case 'r':
++        e.setVariantRightValue(optarg);
++        break;
++
++      case 'h':
++      default:
++        endProgram(argv[0]);
++        break;
++    }
++  }
++
++  string infile;
++  string outfile;
++
++  switch(argc - optind + 1)
+   {
+     case 2:
+-      input = fopen(argv[1], "rb");
++      infile = argv[argc-1];
++      input = fopen(infile.c_str(), "rb");
+       if(input == NULL)
+       {
+-        cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
++        cerr << "Error: Cannot open file '" << infile << "'." << endl;
+         exit(EXIT_FAILURE);
+       }      
+       fclose(input);
+@@ -60,18 +114,20 @@
+       break;
+     
+     case 3:
+-      input = fopen(argv[1], "rb");
++      infile = argv[argc-2];
++      input = fopen(infile.c_str(), "rb");
+       if(input == NULL)
+       {
+-        cerr << "Error: Cannot open file '" << argv[1] << "'." << endl;
++        cerr << "Error: Cannot open file '" << infile << "'." << endl;
+         exit(EXIT_FAILURE);
+       }
+       fclose(input);
+ 
+-      output = fopen(argv[2], "wb");
++      outfile = argv[argc-1];
++      output = fopen(argv[argc-1], "wb");
+       if(output == NULL)
+       {
+-        cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
++        cerr << "Error: Cannot open file '" << outfile << "'." << endl;
+         exit(EXIT_FAILURE);
+       }
+       break;
+@@ -85,8 +141,7 @@
+   _setmode(_fileno(output), _O_U8TEXT);
+ #endif
+ 
+-  Expander e;
+-  e.expand(argv[1], output);
++  e.expand(infile, output);
+   fclose(output);
+   
+   return EXIT_SUCCESS;
+Index: lttoolbox/state.cc
+===================================================================
+--- lttoolbox/state.cc	(revision 21745)
++++ lttoolbox/state.cc	(working copy)
+@@ -20,10 +20,15 @@
+ 
+ #include <cstring>
+ #include <cwctype>
++#include <climits>
+ 
+-State::State(Pool<vector<int> > *p)
++//debug//
++//#include <iostream>
++//using namespace std;
++//debug//
++
++State::State()
+ {
+-  pool = p;
+ }
+  
+ State::~State()
+@@ -51,10 +56,9 @@
+ void 
+ State::destroy()
+ {
+-  // release references
+   for(size_t i = 0, limit = state.size(); i != limit; i++)
+   {
+-    pool->release(state[i].sequence);
++    delete state[i].sequence;
+   }
+ 
+   state.clear();
+@@ -66,15 +70,14 @@
+   // release references
+   for(size_t i = 0, limit = state.size(); i != limit; i++)
+   {
+-    pool->release(state[i].sequence);
++    delete state[i].sequence;
+   }
+ 
+   state = s.state;
+-  pool = s.pool;
+ 
+   for(size_t i = 0, limit = state.size(); i != limit; i++)
+   {
+-    vector<int> *tmp = pool->get();
++    vector<int> *tmp = new vector<int>();
+     *tmp = *(state[i].sequence);
+     state[i].sequence = tmp;
+   }
+@@ -90,7 +93,7 @@
+ State::init(Node *initial)
+ {
+   state.clear();
+-  state.push_back(TNodeState(initial,pool->get(),false));
++  state.push_back(TNodeState(initial, new vector<int>(), false));
+   state[0].sequence->clear();
+   epsilonClosure();  
+ }  
+@@ -113,7 +116,7 @@
+     {
+       for(int j = 0; j != it->second.size; j++)
+       {
+-        vector<int> *new_v = pool->get();
++        vector<int> *new_v = new vector<int>();
+         *new_v = *(state[i].sequence);
+         if(it->first != 0)
+         {
+@@ -122,7 +125,7 @@
+         new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
+       }
+     }
+-    pool->release(state[i].sequence);
++    delete state[i].sequence;
+   }
+   
+   state = new_state;
+@@ -147,8 +150,8 @@
+     {
+       for(int j = 0; j != it->second.size; j++)
+       {
+-        vector<int> *new_v = pool->get();
+-        *new_v = *(state[i].sequence);
++        vector<int> *new_v = new vector<int>();
++	*new_v = *(state[i].sequence);
+         if(it->first != 0)
+         {
+           new_v->push_back(it->second.out_tag[j]);
+@@ -161,7 +164,7 @@
+     {
+       for(int j = 0; j != it->second.size; j++)
+       {
+-        vector<int> *new_v = pool->get();
++        vector<int> *new_v = new vector<int>();
+         *new_v = *(state[i].sequence);
+         if(it->first != 0)
+         {
+@@ -170,7 +173,7 @@
+         new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
+       }
+     }
+-    pool->release(state[i].sequence);
++    delete state[i].sequence;
+   }
+ 
+   state = new_state;
+@@ -187,7 +190,7 @@
+     {
+       for(int j = 0 ; j != it2->second.size; j++)
+       {
+-        vector<int> *tmp = pool->get();
++        vector<int> *tmp = new vector<int>();
+         *tmp = *(state[i].sequence);
+         if(it2->second.out_tag[j] != 0)
+         {
+@@ -199,6 +202,69 @@
+   }
+ }
+ 
++void 
++State::apply(int const input, int const alt1, int const alt2)
++{
++  vector<TNodeState> new_state;
++  if(input == 0 || alt1 == 0 || alt2 == 0)
++  {
++    state = new_state;
++    return;
++  }
++  
++  for(size_t i = 0, limit = state.size(); i != limit; i++)
++  {
++    map<int, Dest>::const_iterator it;
++    it = state[i].where->transitions.find(input);
++    if(it != state[i].where->transitions.end())
++    {
++      for(int j = 0; j != it->second.size; j++)
++      {
++        vector<int> *new_v = new vector<int>();
++	*new_v = *(state[i].sequence);
++        if(it->first != 0)
++        {
++          new_v->push_back(it->second.out_tag[j]);
++        }
++        new_state.push_back(TNodeState(it->second.dest[j], new_v, state[i].dirty||false));
++      }
++    }
++    it = state[i].where->transitions.find(alt1);
++    if(it != state[i].where->transitions.end())
++    {
++      for(int j = 0; j != it->second.size; j++)
++      {
++        vector<int> *new_v = new vector<int>();
++        *new_v = *(state[i].sequence);
++        if(it->first != 0)
++        {
++          new_v->push_back(it->second.out_tag[j]);
++        }
++        new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
++      }
++    }
++    it = state[i].where->transitions.find(alt2);
++    if(it != state[i].where->transitions.end())
++    {
++      for(int j = 0; j != it->second.size; j++)
++      {
++        vector<int> *new_v = new vector<int>();
++        *new_v = *(state[i].sequence);
++        if(it->first != 0)
++        {
++          new_v->push_back(it->second.out_tag[j]);
++        }
++        new_state.push_back(TNodeState(it->second.dest[j], new_v, true));
++      }
++    }
++
++    delete state[i].sequence;
++  }
++
++  state = new_state;
++}
++
++
+ void
+ State::step(int const input)
+ {
+@@ -213,6 +279,37 @@
+   epsilonClosure();
+ }
+ 
++void
++State::step(int const input, int const alt1, int const alt2)
++{
++  apply(input, alt1, alt2);
++  epsilonClosure();
++}
++
++void 
++State::step_case(wchar_t val, wchar_t val2, bool caseSensitive) 
++{
++  if (!iswupper(val) || caseSensitive) {
++    step(val, val2);
++  } else if(val != towlower(val)) {
++    step(val, towlower(val), val2);
++  } else {
++    step(val, val2);
++  }
++}
++
++
++void 
++State::step_case(wchar_t val, bool caseSensitive) 
++{
++  if (!iswupper(val) || caseSensitive) {
++    step(val);
++  } else {
++    step(val, towlower(val));
++  }
++}
++
++
+ bool
+ State::isFinal(set<Node *> const &finals) const
+ {
+@@ -282,6 +379,60 @@
+   return result;
+ }
+ 
++
++set<pair<wstring, vector<wstring> > >
++State::filterFinalsLRX(set<Node *> const &finals, 
++		    Alphabet const &alphabet,
++		    set<wchar_t> const &escaped_chars,
++		    bool uppercase, bool firstupper, int firstchar) const
++{
++  set<pair<wstring, vector<wstring> > > results;
++
++  vector<wstring> current_result;
++  wstring rule_id = L""; 
++
++  // /<$><select>station<n><ANY_TAG><$><skip><6>/<$><select>station<n><ANY_TAG><$><skip><6>
++
++  // if <$> current_result.push_back(current_word)
++  // if /   results.insert(current_result)
++
++  for(size_t i = 0, limit = state.size(); i != limit; i++)
++  {
++    if(finals.find(state[i].where) != finals.end())
++    {
++      current_result.clear();
++      rule_id = L"";
++      wstring current_word = L"";
++      for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
++      {
++        if(escaped_chars.find((*(state[i].sequence))[j]) != escaped_chars.end())
++        {
++          current_word += L'\\';
++        }
++        wstring sym = L"";
++        alphabet.getSymbol(sym, (*(state[i].sequence))[j], uppercase);
++        if(sym == L"<$>") 
++        { 
++          if(current_word != L"")  
++          {
++            current_result.push_back(current_word); 
++          }
++          current_word = L"";
++        }
++        else 
++        {
++          current_word += sym; 
++        }
++      }
++      rule_id = current_word;
++      results.insert(make_pair(rule_id, current_result)); 
++    }
++  }
++    
++  return results;
++}
++
++
+ wstring
+ State::filterFinalsSAO(set<Node *> const &finals, 
+ 		       Alphabet const &alphabet,
+@@ -438,3 +589,149 @@
+   
+   return result;
+ }
++
++
++
++void
++State::pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements) 
++{
++  int minNoOfCompoundElements = compound_max_elements;
++  int *noOfCompoundElements = new int[state.size()];
++
++  //wcerr << L"pruneCompounds..." << endl;
++
++  for (unsigned int i = 0;  i<state.size(); i++) {
++    vector<int> seq = *state.at(i).sequence;
++
++    if (lastPartHasRequiredSymbol(seq, requiredSymbol, separationSymbol)) {
++      int this_noOfCompoundElements = 0;
++      for (int j = seq.size()-2; j>0; j--) if (seq.at(j)==separationSymbol) this_noOfCompoundElements++;
++      noOfCompoundElements[i] = this_noOfCompoundElements;
++      minNoOfCompoundElements = (minNoOfCompoundElements < this_noOfCompoundElements) ? 
++                        minNoOfCompoundElements : this_noOfCompoundElements;
++    }
++    else {
++      noOfCompoundElements[i] = INT_MAX;
++		  //wcerr << L"Prune - No requiered symbol in state number " << i << endl;
++    }
++  }
++
++  // remove states with more than minimum number of compounds (or without the requiered symbol in the last part)
++  vector<TNodeState>::iterator it = state.begin();
++  int i=0;
++  while(it != state.end()) {
++    if (noOfCompoundElements[i] > minNoOfCompoundElements) {
++      delete (*it).sequence;
++      it = state.erase(it);
++      //wcerr << L"Prune - State number " << i << L" removed!" << endl;
++    }
++    else it++;
++    i++;
++  }
++
++ delete[] noOfCompoundElements;
++}
++
++
++
++void
++State::pruneStatesWithForbiddenSymbol(int forbiddenSymbol) 
++{
++  vector<TNodeState>::iterator it = state.begin();
++  while(it != state.end()) {
++    vector<int> *seq = (*it).sequence;
++    bool found = false;
++    for(int i = seq->size()-1; i>=0; i--) {
++      if(seq->at(i) == forbiddenSymbol) {
++        i=-1;
++        delete (*it).sequence;
++        it = state.erase(it);
++        found = true;
++      }
++    }
++    if (!found) it++;
++  }
++}
++
++
++
++bool
++State::lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol) 
++{
++  // state is final - it should be restarted it with all elements in stateset restart_state, with old symbols conserved
++  bool restart=false;
++  for (int n=seq.size()-1; n>=0; n--) {
++    int symbol=seq.at(n);
++    if (symbol==requiredSymbol) {
++      restart=true;
++      break;
++    }
++    if (symbol==separationSymbol) {
++      break;
++    }
++  }
++  return restart;
++}
++
++
++void
++State::restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol) 
++{
++
++  for (unsigned int i=0;  i<state.size(); i++) {
++    TNodeState state_i = state.at(i);
++    // A state can be a possible final state and still have transitions
++
++    if (finals.count(state_i.where) > 0) {
++      bool restart = lastPartHasRequiredSymbol(*(state_i.sequence), requiredSymbol, separationSymbol);
++      if (restart) {
++        if (restart_state != NULL) {
++          for (unsigned int j=0; j<restart_state->state.size(); j++) {
++            TNodeState initst = restart_state->state.at(j);
++            vector<int> *tnvec = new vector<int>;
++
++            for(unsigned int k=0; k < state_i.sequence->size(); k++) tnvec->push_back(state_i.sequence->at(k));
++            TNodeState tn(initst.where, tnvec, state_i.dirty);
++            tn.sequence->push_back(separationSymbol);
++            state.push_back(tn);
++            }
++          }
++        }
++      }
++    }
++}
++
++
++
++wstring
++State::getReadableString(const Alphabet &a) 
++{
++  wstring retval = L"[";
++
++  for(unsigned int i=0; i<state.size(); i++) {
++    vector<int>* seq = state.at(i).sequence;
++    if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++) {
++      wstring ws = L"";
++      a.getSymbol(ws, seq->at(j));
++      //if(ws == L"") ws = L"?";
++      retval.append(ws);
++    }
++
++    /*Node *where = state.at(i).where;
++    if(where == NULL) retval.append(L"→@null");
++    else {
++      retval.append(L"→");
++      map<int, Dest>::iterator it;
++      wstring ws;
++      for (it = where->transitions.begin(); it != where->transitions.end(); it++) {
++        int symbol = (*it).first;
++        a.getSymbol(ws, symbol);
++        retval.append(ws);
++      }
++    }*/
++    if (i+1 < state.size()) retval.append(L", ");
++  }
++  retval.append(L"]");
++  return retval;
++}
++
+Index: lttoolbox/alphabet.cc
+===================================================================
+--- lttoolbox/alphabet.cc	(revision 21745)
++++ lttoolbox/alphabet.cc	(working copy)
+@@ -221,3 +221,9 @@
+ {
+   return spairinv[code];
+ }
++
++
++void Alphabet::setSymbol(int symbol, wstring newSymbolString) {
++  //Should be a special character!
++  if (symbol < 0) slexicinv[-symbol-1] = newSymbolString;
++}
+Index: lttoolbox/lt-tmxproc.1
+===================================================================
+--- lttoolbox/lt-tmxproc.1	(revision 21745)
++++ lttoolbox/lt-tmxproc.1	(working copy)
+@@ -30,5 +30,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
+Index: lttoolbox/lt-comp.1
+===================================================================
+--- lttoolbox/lt-comp.1	(revision 21745)
++++ lttoolbox/lt-comp.1	(working copy)
+@@ -10,10 +10,30 @@
+ .SH SYNOPSIS
+ .B lt-comp
+ [
++.B \-a \fR| 
++.B \-v \fR| 
++.B \-l \fR| 
++.B \-r \fR| 
++.B \-h
++]
++[
+ .B lr \fR| 
+ .B rl
+ ] dictionary_file output_file
+ .PP
++.B lt-comp
++[
++.B \-\-alt \fR| 
++.B \-\-var \fR| 
++.B \-\-var\-left \fR| 
++.B \-\-var\-right \fR| 
++.B \-\-help
++]
++[
++.B lr \fR| 
++.B rl
++] dictionary_file output_file
++.PP
+ .SH DESCRIPTION
+ .BR lt-comp 
+ Is the application responsible of compiling dictionaries used by
+@@ -23,6 +43,32 @@
+ .PP
+ .SH OPTIONS
+ .TP
++.B \-a, \-\-alt
++Sets the value of the \fIalt\fR attribute to use in compilation.
++
++Note that if no value is set, all entries containing an \fIalt\fR
++attribute are omitted.
++.TP
++.B \-v, \-\-var
++Sets the value of the \fIv\fR attribute to use in compilation. 
++This should only be used with monodixes; for bidixes, see \-l and \-r.
++
++Note that if no value is set, all entries containing a \fIv\fR
++attribute are considered to be \fIleft-to-right\fR.
++.TP
++.B \-l, \-\-var\-left
++Sets the value of the \fIvl\fR attribute for use in compilation of bidixes.
++"Left" here refers to the side of the dictionary, so this option is only valid
++in \fIrl\fR mode.
++.TP
++.B \-r, \-\-var\-right
++Sets the value of the \fIvr\fR attribute for use in compilation of bidixes.
++"Right" here refers to the side of the dictionary, so this option is only valid
++in \fIlr\fR mode.
++.TP
++.B \-h, \-\-help
++Prints a short help message
++.TP
+ .B lr
+ The resulting transducer will process dictionary entries
+ \fIleft-to-right\fR.
+@@ -45,5 +91,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
+Index: lttoolbox/lt_locale.h
+===================================================================
+--- lttoolbox/lt_locale.h	(revision 21745)
++++ lttoolbox/lt_locale.h	(working copy)
+@@ -16,6 +16,7 @@
+  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+  * 02111-1307, USA.
+  */
++
+ #ifndef _MYLOCALE_
+ #define _MYLOCALE_
+ 
+Index: lttoolbox/expander.h
+===================================================================
+--- lttoolbox/expander.h	(revision 21745)
++++ lttoolbox/expander.h	(working copy)
+@@ -42,6 +42,26 @@
+   xmlTextReaderPtr reader;
+   
+   /**
++   * The alt value
++   */
++  wstring alt;
++  
++  /**
++   * The variant value (monodix)
++   */
++  wstring variant;
++  
++  /**
++   * The variant value (left side of bidix)
++   */
++  wstring variant_left;
++  
++  /**
++   * The variant value (right side of bidix)
++   */
++  wstring variant_right;
++  
++  /**
+    * The paradigm being compiled
+    */
+   wstring current_paradigm;
+@@ -186,6 +206,29 @@
+    * Compile dictionary to letter transducers
+    */
+   void expand(string const &fichero, FILE *output);
++  /**
++   * Set the alt value to use in compilation
++   * @param a the value
++   */
++   void setAltValue(string const &a);
++
++  /**
++   * Set the variant value to use in expansion
++   * @param v the value
++   */
++   void setVariantValue(string const &v);
++
++  /**
++   * Set the variant_left value to use in expansion
++   * @param v the value
++   */
++   void setVariantLeftValue(string const &v);
++
++  /**
++   * Set the variant_right value to use in expansion
++   * @param v the value
++   */
++   void setVariantRightValue(string const &v);
+ };
+ 
+ 
+Index: lttoolbox/transducer.cc
+===================================================================
+--- lttoolbox/transducer.cc	(revision 21745)
++++ lttoolbox/transducer.cc	(working copy)
+@@ -18,6 +18,7 @@
+  */
+ #include <lttoolbox/transducer.h>
+ #include <lttoolbox/compression.h>
++#include <lttoolbox/alphabet.h>
+ #include <lttoolbox/lttoolbox_config.h>
+ #include <lttoolbox/my_stdio.h>
+ 
+@@ -187,6 +188,13 @@
+ void
+ Transducer::setFinal(int const state, bool valor)
+ {
++  int initial_copy = getInitial();
++/*
++  if(state == initial_copy)
++  {
++    wcerr << L"Setting initial state to final" << endl;
++  }
++*/
+   if(valor)
+   {
+     finals.insert(state);
+@@ -609,3 +617,119 @@
+   finals.clear();
+   finals.insert(tmp);
+ }
++
++void
++Transducer::show(Alphabet &alphabet, FILE *output, int const epsilon_tag)
++{
++  joinFinals(epsilon_tag);
++
++  map<int, multimap<int, int> > temporal;
++
++  for(map<int, multimap<int, int> >::iterator it = transitions.begin(); it != transitions.end(); it++)
++  {
++    multimap<int, int> aux = it->second;
++  
++    for(multimap<int, int>::iterator it2 = aux.begin(); it2 != aux.end(); it2++) 
++    {
++      pair<int, int> t = alphabet.decode(it2->first);
++      fwprintf(output, L"%d\t", it->first);
++      fwprintf(output, L"%d\t", it2->second);
++      wstring l = L"";
++      alphabet.getSymbol(l, t.first);
++      if(l == L"")  // If we find an epsilon
++      {
++        fwprintf(output, L"ε\t", l.c_str());
++      }
++      else 
++      {
++        fwprintf(output, L"%S\t", l.c_str());
++      }
++      wstring r = L"";
++      alphabet.getSymbol(r, t.second);
++      if(r == L"")  // If we find an epsilon
++      {
++        fwprintf(output, L"ε\t", r.c_str());
++      }
++      else 
++      {
++        fwprintf(output, L"%S\t", r.c_str());
++      }
++      fwprintf(output, L"\n");
++    } 
++  } 
++
++  for(set<int>::iterator it3 = finals.begin(); it3 != finals.end(); it3++)
++  {
++    fwprintf(output, L"%d\n", *it3);
++  }
++}
++
++int 
++Transducer::getStateSize(int const state)
++{
++ set<int> states;
++ set<int> myclosure1 = closure(state, 0);
++ states.insert(myclosure1.begin(), myclosure1.end());
++ int num_transitions = 0;
++
++ for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
++ {
++   num_transitions += transitions[*it2].size();
++ }
++
++ return num_transitions;
++}
++
++bool
++Transducer::recognise(wstring patro, Alphabet &a, FILE *err)
++{
++  bool accepted = false;
++  set<int> states ;
++
++  set<int> myclosure1 = closure(getInitial(), 0); 
++  states.insert(myclosure1.begin(), myclosure1.end()); 
++  // For each of the characters in the input string
++  for(wstring::iterator it = patro.begin(); it != patro.end(); it++)  
++  {
++    set<int> new_state;        //Transducer::closure(int const state, int const epsilon_tag)
++    int sym = *it;
++    // For each of the current alive states
++    //fwprintf(err, L"step: %S %C (%d)\n", patro.c_str(), *it, sym);
++    for(set<int>::iterator it2 = states.begin(); it2 != states.end(); it2++)
++    {
++      multimap<int, int> p = transitions[*it2];
++      // For each of the transitions in the state 
++
++      for(multimap<int, int>::iterator it3 = p.begin(); it3 != p.end(); it3++)
++      { 
++        
++	pair<int, int> t = a.decode(it3->first);
++        wstring l = L"";
++        a.getSymbol(l, t.first);
++        //wstring r = L"";
++        //a.getSymbol(r, t.second);
++
++        //fwprintf(err, L"  -> state: %d, trans: %S:%S, targ: %d\n", *it2, (l == L"") ?  L"ε" : l.c_str(),  (r == L"") ?  L"ε" : r.c_str(), it3->second);
++        //if(l.find(*it) != wstring::npos || l == L"" )
++        if(l.find(*it) != wstring::npos)
++        {
++          set<int> myclosure = closure(it3->second, 0);
++          //wcerr << L"Before closure alives: " <<new_state.size() << endl;  
++          new_state.insert(myclosure.begin(), myclosure.end());
++          //wcerr << L"After closure alives: " <<new_state.size() << endl;  
++        }
++      }
++    }
++    states = new_state;
++  }
++  for(set<int>::iterator it4 = states.begin(); it4 != states.end(); it4++)
++  {
++    if(isFinal(*it4)) 
++    {
++      accepted = true;
++    }
++  }
++
++  return accepted;
++}
++
+Index: lttoolbox/pool.h
+===================================================================
+--- lttoolbox/pool.h	(revision 21745)
++++ lttoolbox/pool.h	(working copy)
+@@ -1,175 +0,0 @@
+-/*
+- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+- *
+- * This program is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU General Public License as
+- * published by the Free Software Foundation; either version 2 of the
+- * License, or (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+- * General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+- * 02111-1307, USA.
+- */
+-#ifndef _GENERIC_POOL_
+-#define _GENERIC_POOL_
+-
+-#include <list>
+-
+-using namespace std;
+-
+-/**
+- * Pool of T objects
+- */
+-template <class T>
+-class Pool
+-{
+-private:
+-  /**
+-   * Free pointers to objects
+-   */
+-  list<T *> free;
+-  
+-  /**
+-   * Currently created objects
+-   */
+-  list<T> created;
+-  
+-  /**
+-   * copy method
+-   * @param other pool object
+-   */
+-  void copy(Pool const &p)
+-  {
+-    created = p.created;
+-  
+-    // all new members are available
+-    for(typename list<T>::iterator it = created.begin(), limit = created.end();
+-        it != limit; it++)
+-    {
+-      free.push_back(&(*it));
+-    }
+-  }
+-  
+-  /**
+-   * destroy method
+-   */
+-  void destroy()
+-  {
+-    // do nothing
+-  }
+-  
+-  /**
+-   * Allocate a pool of nelems size
+-   * @param nelems initial size of the pool
+-   */
+-  void init(unsigned int const nelems)
+-  {
+-    created.clear();
+-    free.clear();
+-    T tmp;
+-    for(unsigned int i = 0; i != nelems; i++)
+-    {
+-      created.push_front(tmp);
+-      free.push_front(&(*(created.begin())));
+-    }
+-  }
+-
+-  /**
+-   * Allocate a pool of nelems size with objects equal to 'object'
+-   * @param nelems initial size of the pool
+-   * @param object initial value of the objects in the pool
+-   */
+-  void init(unsigned int const nelems, T const &object)
+-  {
+-    created.clear();
+-    free.clear();
+-    for(unsigned int i = 0; i != nelems; i++)
+-    {
+-      created.push_front(object);
+-      free.push_front(&(*(created.begin())));
+-    }
+-  }
+-
+-  
+-public:
+-  
+-  /**
+-   * Constructor
+-   */
+-  Pool()
+-  {
+-    init(1);
+-  }
+-    
+-  /**
+-   * Parametrized constructor
+-   * @param nelems initial size of the pool
+-   * @param object initial value of the objects in the pool
+-   */
+-  Pool(unsigned int const nelems, T const &object)
+-  {
+-    init(nelems, object);
+-  }
+-  
+-  /**
+-   * Parametrized constructor
+-   * @param nelems initial size of the pool
+-   */
+-  Pool(unsigned int const nelems)
+-  {
+-    init(nelems);
+-  }
+-  
+-  /**
+-   * Destructor
+-   */
+-  ~Pool()
+-  {
+-    destroy();
+-  }
+-  
+-  /**
+-   * Copy constructor
+-   */
+-  Pool(Pool const &p)
+-  {
+-    copy(p);
+-  }
+-   
+-  /**
+-   * Allocate a pointer to a free 'new' object.
+-   * @return pointer to the object
+-   */
+-  T * get()
+-  {
+-    if(free.size() != 0)
+-    {
+-      T *result = *(free.begin());
+-      free.erase(free.begin());
+-      return result;
+-    }
+-    else
+-    {
+-      T tmp;
+-      created.push_front(tmp);
+-      return &(*(created.begin()));
+-    }
+-  }  
+-  
+-  /**
+-   * Release a no more needed instance of a pooled object
+-   * @param item the no more needed instance of the object
+-   */ 
+-  void release(T *item)
+-  {
+-    free.push_front(item);
+-  }
+-};
+-
+-#endif
+Index: lttoolbox/compiler.h
+===================================================================
+--- lttoolbox/compiler.h	(revision 21745)
++++ lttoolbox/compiler.h	(working copy)
+@@ -44,6 +44,26 @@
+   xmlTextReaderPtr reader;
+   
+   /**
++   * The alt value
++   */
++  wstring alt;
++
++  /**
++   * The variant value (monodix)
++   */
++  wstring variant;
++  
++  /**
++   * The variant value (left side of bidix)
++   */
++  wstring variant_left;
++  
++  /**
++   * The variant value (right side of bidix)
++   */
++  wstring variant_right;
++    
++  /**
+    * The paradigm being compiled
+    */
+   wstring current_paradigm;
+@@ -65,6 +85,16 @@
+   wstring letters;
+   
+   /**
++   * Set verbose mode: warnings which may or may not be correct
++   */
++  bool verbose;
++
++  /**
++   * First element (of an entry)
++   */
++  bool first_element;
++
++  /**
+    * Identifier of all the symbols during the compilation
+    */
+   Alphabet alphabet;  
+@@ -264,10 +294,14 @@
+   static wstring const COMPILER_LEMMA_ATTR;
+   static wstring const COMPILER_IGNORE_ATTR;
+   static wstring const COMPILER_IGNORE_YES_VAL;
++  static wstring const COMPILER_ALT_ATTR;
++  static wstring const COMPILER_V_ATTR;
++  static wstring const COMPILER_VL_ATTR;
++  static wstring const COMPILER_VR_ATTR;
+ 
+ 
+   /**
+-   * Copnstructor
++   * Constructor
+    */
+   Compiler();
+ 
+@@ -292,6 +326,35 @@
+    * @param fd the stream where write the result
+    */
+   void write(FILE *fd);
++
++  /**
++   * Set verbose output
++   */
++  void setVerbose(bool verbosity = false);
++
++  /**
++   * Set the alt value to use in compilation
++   * @param a the value
++   */
++  void setAltValue(string const &a);
++
++  /**
++   * Set the variant value to use in compilation
++   * @param v the value
++   */
++  void setVariantValue(string const &v);
++
++  /**
++   * Set the variant_left value to use in compilation
++   * @param v the value
++   */
++  void setVariantLeftValue(string const &v);
++
++  /**
++   * Set the variant_right value to use in compilation
++   * @param v the value
++   */
++  void setVariantRightValue(string const &v);
+ };
+ 
+ 
+Index: lttoolbox/lt-tmxcomp.1
+===================================================================
+--- lttoolbox/lt-tmxcomp.1	(revision 21745)
++++ lttoolbox/lt-tmxcomp.1	(working copy)
+@@ -38,5 +38,4 @@
+ .SH BUGS
+ Lots of...lurking in the dark and waiting for you!
+ .SH AUTHOR
+-(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+-reserved.
++(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. 
+Index: lttoolbox/alphabet.h
+===================================================================
+--- lttoolbox/alphabet.h	(revision 21745)
++++ lttoolbox/alphabet.h	(working copy)
+@@ -145,6 +145,13 @@
+    */
+   bool isTag(int const symbol) const;
+ 
++  /**
++   * Sets an already existing symbol to represent a new value
++   * @param symbol the code of the symbol to set
++   * @param newSymbolString the new string for this symbol
++   */
++  void setSymbol(int symbol, wstring newSymbolString);
++
+   pair<int, int> const & decode(int const code) const;
+   
+ };
+Index: lttoolbox/state.h
+===================================================================
+--- lttoolbox/state.h	(revision 21745)
++++ lttoolbox/state.h	(working copy)
+@@ -19,6 +19,7 @@
+ #ifndef _STATE_
+ #define _STATE_
+ 
++#include <map>
+ #include <set>
+ #include <string>
+ #include <vector>
+@@ -26,7 +27,9 @@
+ 
+ #include <lttoolbox/alphabet.h>
+ #include <lttoolbox/node.h>
+-#include <lttoolbox/pool.h>
++#include <lttoolbox/match_exe.h>
++#include <lttoolbox/match_state.h>
++#include <lttoolbox/transducer.h>
+ 
+ using namespace std;
+ 
+@@ -43,7 +46,7 @@
+   {
+     Node *where;
+     vector<int> *sequence;
+-    bool dirty;
++    bool dirty; // What does "dirty" mean ? 
+     
+     TNodeState(Node * const &w, vector<int> * const &s, bool const &d): where(w), sequence(s), dirty(d){}
+     TNodeState & operator=(TNodeState const &other)
+@@ -58,17 +61,6 @@
+   vector<TNodeState> state;
+ 
+   /**
+-   * Pool of wchar_t vectors, for efficience (static class)
+-   */
+-  Pool<vector<int> > *pool;  
+-
+-  /**
+-   * Copy function
+-   * @param s the state to be copied
+-   */
+-  void copy(State const &s);
+-
+-  /**
+    * Destroy function
+    */
+   void destroy();
+@@ -86,6 +78,8 @@
+    */
+   void apply(int const input, int const alt);
+ 
++  void apply(int const input, int const alt1, int const alt2);
++
+   /**
+    * Calculate the epsilon closure over the current state, replacing
+    * its content.
+@@ -92,11 +86,21 @@
+    */
+   void epsilonClosure();
+ 
++  bool lastPartHasRequiredSymbol(const vector<int> &seq, int requiredSymbol, int separationSymbol);
++
+ public:
++
+   /**
++   * Copy function
++   * @param s the state to be copied
++   */
++  void copy(State const &s);
++
++
++  /**
+    * Constructor
+    */
+-  State(Pool<vector<int> > *);
++  State();
+ 
+   /**
+    * Destructor
+@@ -135,6 +139,13 @@
+    */
+   void step(int const input, int const alt);
+ 
++  void step(int const input, int const alt1, int const alt2);
++
++  void step_case(wchar_t val, bool caseSensitive);
++
++  void step_case(wchar_t val, wchar_t val2, bool caseSensitive);
++
++
+   /**
+    * Init the state with the initial node and empty output
+    * @param initial the initial node of the transducer
+@@ -142,6 +153,21 @@
+   void init(Node *initial);
+ 
+   /**
++    * Remove states not containing a specific symbol in their last 'part', and states 
++    * with more than a number of 'parts'
++    * @param requieredSymbol the symbol requiered in the last part
++    * @param separationSymbol the symbol that represent the separation between two parts
++    * @param compound_max_elements the maximum part number allowed
++    */
++  void pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements);
++
++  /**
++    * Remove states containing a forbidden symbol
++    * @param forbiddenSymbol the symbol forbidden
++    */
++  void pruneStatesWithForbiddenSymbol(int forbiddenSymbol);
++
++  /**
+    * Print all outputs of current parsing, preceded by a bar '/',
+    * from the final nodes of the state
+    * @param finals the set of final nodes
+@@ -156,8 +182,8 @@
+   wstring filterFinals(set<Node *> const &finals, Alphabet const &a,
+                       set<wchar_t> const &escaped_chars,
+                       bool uppercase = false,
+-		      bool firstupper = false,
+-		      int firstchar = 0) const;
++                      bool firstupper = false,
++                      int firstchar = 0) const;
+ 
+   /**
+    * Same as previous one, but  the output is adapted to the SAO system
+@@ -173,11 +199,44 @@
+   wstring filterFinalsSAO(set<Node *> const &finals, Alphabet const &a,
+                       set<wchar_t> const &escaped_chars,
+                       bool uppercase = false,
+-		      bool firstupper = false,
+-		      int firstchar = 0) const;
++                      bool firstupper = false,
++                      int firstchar = 0) const;
+ 
+ 
+   /**
++   * Same as previous one, but  the output is adapted to the LRX system
++   * @param finals the set of final nodes
++   * @param a the alphabet to decode strings
++   * @param escaped_chars the set of chars to be preceded with one 
++   *                      backslash
++   * @param uppercase true if the word is uppercase
++   * @param firstupper true if the first letter of a word is uppercase
++   * @param firstchar first character of the word
++   * @return the result of the transduction
++   */
++
++  set<pair<wstring, vector<wstring> > > filterFinalsLRX(set<Node *> const &finals, Alphabet const &a,
++                      set<wchar_t> const &escaped_chars,
++                      bool uppercase = false,
++                      bool firstupper = false,
++                      int firstchar = 0) const;
++
++
++
++
++
++  /**
++   * Find final states, remove those that not has a requiredSymbol and 'restart' each of them as the 
++   * set of initial states, but remembering the sequence and adding a separationSymbol
++   * @param finals
++   * @param requiredSymbol
++   * @param restart_state
++   * @param separationSymbol
++   */
++    void restartFinals(const set<Node *> &finals, int requiredSymbol, State *restart_state, int separationSymbol);
++
++
++  /**
+    * Returns true if at least one record of the state references a
+    * final node of the set
+    * @param finals set of final nodes @return
+@@ -185,6 +244,11 @@
+    */
+   bool isFinal(set<Node *> const &finals) const;
+ 
++  /**
++   * Return the full states string (to allow debuging...) using a Java ArrayList.toString style
++   */
++  wstring getReadableString(const Alphabet &a);
++
+   wstring filterFinalsTM(set<Node *> const &finals, 
+ 			 Alphabet const &alphabet,
+                          set<wchar_t> const &escaped_chars,
+Index: lttoolbox/Makefile.am
+===================================================================
+--- lttoolbox/Makefile.am	(revision 21745)
++++ lttoolbox/Makefile.am	(working copy)
+@@ -2,7 +2,7 @@
+ h_sources = alphabet.h buffer.h compiler.h compression.h  \
+             entry_token.h expander.h fst_processor.h lt_locale.h ltstr.h \
+             match_exe.h match_node.h match_state.h my_stdio.h node.h \
+-            pattern_list.h pool.h regexp_compiler.h sorted_vector.h state.h \
++            pattern_list.h regexp_compiler.h sorted_vector.h state.h \
+             transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h
+ cc_sources = alphabet.cc compiler.cc compression.cc entry_token.cc \
+              expander.cc fst_processor.cc lt_locale.cc match_exe.cc \
+@@ -13,7 +13,7 @@
+ library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
+ library_include_HEADERS = $(h_sources)
+ 
+-bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc
++bin_PROGRAMS = lt-comp lt-proc lt-expand lt-tmxcomp lt-tmxproc lt-print
+ instdir = lttoolbox
+ 
+ lib_LTLIBRARIES= liblttoolbox3.la
+@@ -26,6 +26,10 @@
+ 
+ lttoolbox_DATA = dix.dtd
+ 
++lt_print_SOURCES = lt_print.cc  
++lt_print_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
++lt_print_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
++
+ lt_comp_SOURCES = lt_comp.cc  
+ lt_comp_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
+ lt_comp_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
+@@ -46,8 +50,18 @@
+ lt_tmxproc_LDADD = liblttoolbox$(GENERIC_MAJOR_VERSION).la
+ lt_tmxproc_LDFLAGS = -llttoolbox$(GENERIC_MAJOR_VERSION) $(LTTOOLBOX_LIBS)
+ 
+-man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1
++#lt-validate-dictionary: Makefile.am validate-header.sh
++#	@echo "Creating lt-validate-dictionary script"
++#	@echo "#!$(BASH)" > $@
++#	@cat validate-header.sh >> $@
++#	@echo "$(XMLLINT) --dtdvalid $(apertiumdir)/dix.dtd --noout \$$FILE1 && exit 0;" >> $@
++#	@echo "exit 1;" >> $@
++#	@chmod a+x $@
+ 
++
++
++man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1
++
+ INCLUDES = -I$(top_srcdir) $(LTTOOLBOX_CFLAGS)
+ CLEANFILES = *~
+ 
+Index: lttoolbox/lt-print.1
+===================================================================
+--- lttoolbox/lt-print.1	(revision 0)
++++ lttoolbox/lt-print.1	(revision 44914)
+@@ -0,0 +1,34 @@
++.TH lt-print 1 2006-03-08 "" ""
++.SH NAME
++lt-print \- This application is part of the lexical processing modules
++and tools (
++.B lttoolbox
++)
++.PP
++This tool is part of the apertium machine translation
++architecture: \fBhttp://www.apertium.org\fR.
++.SH SYNOPSIS
++.B lt-print
++ bin_file
++.PP
++.SH DESCRIPTION
++.BR lt-print
++Is the application responsible for printing compiled dictionaries in
++ATT format.
++.PP
++.B bin_file 
++The compiled input file .
++.PP
++.B output_file
++The transducer in ATT format .  
++
++.SH SEE ALSO
++.I lt-comp\fR(1),
++.I lt-proc\fR(1),
++.I lt-expand\fR(1),
++.I apertium-tagger\fR(1),
++.I apertium\fR(1).
++.SH BUGS
++Lots of...lurking in the dark and waiting for you!
++.SH AUTHOR
++(c) 2005--2012 Universitat d'Alacant / Universidad de Alicante. 
+Index: lttoolbox/lt_print.cc
+===================================================================
+--- lttoolbox/lt_print.cc	(revision 0)
++++ lttoolbox/lt_print.cc	(revision 44914)
+@@ -0,0 +1,106 @@
++/*
++ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License as
++ * published by the Free Software Foundation; either version 2 of the
++ * License, or (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
++ * 02111-1307, USA.
++ */
++#include <lttoolbox/transducer.h>
++#include <lttoolbox/compression.h>
++#include <lttoolbox/lttoolbox_config.h>
++
++#include <lttoolbox/my_stdio.h>
++#include <lttoolbox/lt_locale.h>
++
++#include <cstdlib>
++#include <iostream>
++#include <libgen.h>
++#include <string>
++
++using namespace std;
++
++void endProgram(char *name)
++{
++  if(name != NULL)
++  {
++    cout << basename(name) << " v" << PACKAGE_VERSION <<": dump a transducer to text in ATT format" << endl;
++    cout << "USAGE: " << basename(name) << " bin_file " << endl;
++  }
++  exit(EXIT_FAILURE);
++}
++
++
++int main(int argc, char *argv[])
++{
++  if(argc != 2) 
++  {
++    endProgram(argv[0]);
++  }
++
++  LtLocale::tryToSetLocale();
++
++
++  FILE *input = fopen(argv[1], "r");
++
++  Alphabet new_alphabet;
++  set<wchar_t> alphabetic_chars;
++
++  map<wstring, Transducer> transducers;
++
++  // letters
++  int len = Compression::multibyte_read(input);
++  while(len > 0)
++  {
++    alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
++    len--;
++  }  
++
++  // symbols  
++  new_alphabet.read(input);
++
++  len = Compression::multibyte_read(input);
++
++  while(len > 0)
++  {
++    int len2 = Compression::multibyte_read(input);
++    wstring name = L"";
++    while(len2 > 0)
++    {
++      name += static_cast<wchar_t>(Compression::multibyte_read(input));
++      len2--;
++    }
++    transducers[name].read(input);
++
++    len--;
++  } 
++
++  /////////////////////
++ 
++  FILE *output = stdout;
++  map<wstring, Transducer>::iterator penum = transducers.end();
++  penum--;
++  for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
++  {
++    //it->second.minimize();
++    it->second.show(new_alphabet, output);
++    if(it != penum) 
++    {
++      fwprintf(output, L"--\n", it->first.c_str());
++    }
++  }
++
++  fclose(input);
++  
++  return 0;
++}
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/lttoolbox.git/commitdiff/4306e092da881c50b399a1865e0713d856e4a0ec




More information about the pld-cvs-commit mailing list