/************************************************************************/
/* GL_String.cc: Replacement for GNU's libg++ string class...      	*/
/************************************************************************/
/* Author: Andreas Gerstlauer <gerstl>		first version: 03/02/00 */
/************************************************************************/

/* last update: 09/26/06 */

/* modifications: (most recent first)
 *
 * 09/26/06 PC  Adjustments for scrc 2.1
 * 02/20/06 AG	changed internal string length from 'unsigned short' to 'size_t'
 * 06/03/05 RD	reorganized and renamed global type names
 * 10/05/04 RD	adjustments for compilation on 64-bit architectures
 * 06/15/04 PC  Adjustments for scrc 2.0
 * 04/19/02 AG	fixed a bug in string comparing, added regression test
 * 11/21/01 RD	took out default arguments from function definitions
 * 11/21/01 RD	started this header (last change was 11/05/01 by AG)
 */

#include "Global.h"

#include <stdio.h>
#include <limits.h>


const char* const _nullString = "";


// ------------------- GL_String methods ---------------------


// format string
GL_String& GL_String::vform(const char* format, va_list args)
{
  int dummy;
  char* old_str;
  va_list arglst;
  va_copy(arglst, args);
  const char* s = format;
  size_t len = strlen(format);
  
  // Loop over format string to determine length needed
  while( (s = strchr(s, '%')) )
  {
    s++;
    len -= 2;

    // what's the parameter type?
    switch(s[0])
    {
      case 'd':
      case 'u':
      case 'i':
      case 'x':
      case 'X':
      case 'o':
	dummy = va_arg(arglst, int);
        // leave room for max. length (octal)
        len += ((sizeof(int) * CHAR_BIT) / 3) + 1;
        break;

      case 'c':
        dummy = va_arg(arglst, int);
        len++;
        break;
      
      case 's':
        // room according to string length
        len += strlen( va_arg(arglst, char*) );
        break;
      
      case '%':
        s++;
        len++;
        break;
	
      default: assert(false);
    }
  }
  va_end(arglst);
  
  // Allocate memory
  old_str = salloc(len);
  if ((old_str != _str) && (old_str != _nullString)) GL_free(old_str);
  
  // Assign string
  _len = vsprintf(_str, format, args);
  assert(((int)_len) >= 0);
  assert(_len < _size);
  
  return *this;
}

GL_String& GL_String::form(const char* format, ...)
{  
  va_list  arglst;  
  va_start(arglst, format);
  vform(format, arglst);
  va_end(arglst);  
  return *this;
}


//  return number of occurences of target in string
int GL_String::freq(char c) const
{
   int   count = 0;
   char* cur = _str;
   
   while ((cur = strchr(cur, c))) {
      count++;
      cur++;
   }
   return count;   
}

int GL_String::freq(const char* t) const
{
  int   count = 0;
  
  if (t) {
    char* cur = _str;
    size_t t_len = strlen(t);
   
    while ((cur = strstr(cur, t))) {
      count++;
      cur += t_len;
    }
  }
  
  return count;   
}


// Substring extraction

GL_SubString GL_String::at(const char* t, size_t startpos /* = 0 */)
{
  assert ((startpos >= 0) && (startpos <= _len));
  
  char* p = (_len && t)? strstr((startpos > 0)? &_str[startpos] : _str, t) : 0;

  assert ((p - _str) >= 0);
  return p? at((size_t)(p - _str), strlen(t)) : at((size_t)0, (size_t)0);
}

GL_SubString GL_String::at(char c, size_t startpos /* = 0 */) 
{
  assert ((startpos >= 0) && (startpos <= _len));
  
  char* p = _len? strchr((startpos > 0)? &_str[startpos] : _str, c) : 0;

  assert ((p - _str) >= 0);
  return p? at((size_t)(p - _str), 1) : at((size_t)0, (size_t)0);
}


GL_SubString GL_String::before(const char* t, size_t startpos /* = 0 */)
{
  assert (startpos <= _len);
  
  char* p = (_len && t)? strstr((startpos > 0)? &_str[startpos] : _str, t) : 0;

  assert ((p - _str) >= 0);
  return p? before((size_t)(p - _str)) : before((size_t)0);
}

GL_SubString GL_String::before(char c, size_t startpos /* = 0 */) 
{
  assert (startpos <= _len);
  
  char* p = _len? strchr((startpos > 0)? &_str[startpos] : _str, c) : 0;

  assert ((p - _str) >= 0);
  return p? before((size_t)(p - _str)) : before((size_t)0);
}

GL_SubString GL_String::beforelast(char c, size_t startpos /* = 0 */) 
{
  assert (startpos <= _len);
  
  char* p = _len? strrchr((startpos > 0)? &_str[startpos] : _str, c) : 0;

  assert ((p - _str) >= 0);
  return p? before((size_t)(p - _str)) : before((size_t)0);
}

GL_SubString GL_String::after(const char* t, size_t startpos /* = 0 */)
{
  assert (startpos <= _len);
  
  char* p = (_len && t)? strstr((startpos > 0)? &_str[startpos] : _str, t) : 0;
  size_t pos = p? ((p - _str) + (strlen(t) - 1)) : (_len - 1);
   
  return after(pos);
}

GL_SubString GL_String::after(char c, size_t startpos /* = 0 */)
{
  assert (startpos <= _len);
  
  char* p = _len? strchr((startpos > 0)? &_str[startpos] : _str, c) : 0;

  assert ((p - _str) >= 0);
  return p? after((size_t)(p - _str)) : after(_len - 1);
}

GL_SubString GL_String::afterlast(char c, size_t startpos /* = 0 */)
{
  assert (startpos <= _len);
  
  char* p = _len? strrchr((startpos > 0)? &_str[startpos] : _str, c) : 0;

  assert ((p - _str) >= 0);
  return p? after((size_t)(p - _str)) : after(_len - 1);
}

// Helper routines for allocation, copying, etc.

char* GL_String::salloc(size_t len, size_t keep, bool force)
{
  assert (len >= 0);
  assert ((keep >= 0) && (keep <= _len));
  
  char* old_str = _str;
  
  // enough room?
  if ( (len >= _size) || force )
  {
    // grow
    if (!_size) _size = 16;
    while (_size <= len) _size <<= 1;
    
    _str = (char*) GL_malloc (_size * sizeof(char));
    _str[len] = '\0';
    
    // Are we keeping a copy of current string?
    if (keep) {
      strncpy(_str, old_str, keep);
    }
  }
   
  // set new length
  _len = len;
  
  return old_str;
}
  
void GL_String::scopy(const char* t, size_t startpos, size_t len)
{
  assert (t);
  assert ((startpos >= 0) && (startpos <= _len));
  assert (len + startpos  <= _len + 1);
     
  // fill in with given data
  // strings might overlap -> use memmove() !
  memmove(&_str[startpos], t, len * sizeof(char));
}


void GL_String::sinsert(const char* t, size_t pos, size_t len, size_t repl)
{
  assert ((pos + repl >= 0) && (pos + repl <= _len));
  // if t and _str overlap we have to make a copy!
  char* old_str = salloc(_len + len - repl, pos, !(t+len<_str) || (t>_str+_len));
  scopy(&old_str[pos + repl], pos + len, _len - (pos + len) + 1);
  scopy(t, pos, len);
  if ((old_str != _str) && (old_str != _nullString)) GL_free(old_str);
}

void GL_String::sinsert(char c, size_t pos, size_t repl)
{
  assert ((pos + repl >= 0) && (pos + repl <= _len));
  char* old_str = salloc(_len + 1 - repl, pos);
  scopy(&old_str[pos + repl], pos + 1, _len - pos);
  _str[pos] = c;
  if ((old_str != _str) && (old_str != _nullString)) GL_free(old_str);
}


// ------------------- GL_SubString methods ---------------------


GL_SubString& GL_SubString::operator =  (const GL_String&     y) 
{
  _str.sinsert(y.chars(), _pos, y.length(), _len); 
  _len = y.length();
  return *this;
}

GL_SubString& GL_SubString::operator =  (const GL_SubString&  y) 
{
  _str.sinsert(y.chars(), _pos, y.length(), _len); 
  _len = y.length();
  return *this;
}

GL_SubString& GL_SubString::operator =  (const char* t) 
{
  if (t) {
     int t_len = strlen(t);
     _str.sinsert(t, _pos, t_len, _len);
     _len = t_len;
  } else {
     _str.sinsert("", _pos, 0, _len);
  }
  return *this;
}

GL_SubString& GL_SubString::operator =  (char        c) 
{
  _str.sinsert(c, _pos, _len); 
  _len = 1;
  return *this;
}



// ------------------- Global functions ---------------------


int compare(const GL_String& x, const GL_String& y) 
{
  return strncmp(x.chars(), y.chars(), MAX(x.length(), y.length()));
}

int compare(const GL_String& x, const char* t) 
{
  int res = strncmp(x.chars(), t, x.length());
  if (!res) {
    return x.length() - strlen(t);
  }
  return res;
}

int compare(const GL_SubString& x, const GL_String& y) 
{
  int res = strncmp(x.chars(), y.chars(), x.length());
  if (!res) {
    return x.length() - y.length();
  }
  return res;
}

int compare(const GL_String& x, const GL_SubString&  y) 
{
  int res = strncmp(y.chars(), x.chars(), y.length());
  if (!res) {
    return x.length() - y.length();
  }
  return -res;
}

int compare(const GL_SubString& x, const GL_SubString&  y) 
{
  int res;
  int diff = x.length() - y.length();
  
  if (diff < 0) {
    res = strncmp(x.chars(), y.chars(), x.length());
  } else {
    res = -strncmp(y.chars(), x.chars(), y.length());
  }
  if (!res) {
    return diff;
  }
  return res;
}

int compare(const GL_SubString& x, const char* t) 
{
  int res = strncmp(x.chars(), t, x.length());
  if (!res) {
    return x.length() - strlen(t);
  }
  return res;
}


/************************************************************************/
/*** main (for debugging only)					      ***/
/************************************************************************/


#ifdef DEBUG	/* module self test */

void GL_StringError(GL_String S, const char* msg)
{
   fprintf(stderr, "String error: %s is %s\n", msg, (const char*)S );
   exit(1);
}

int main(int argc, char **argv)
{
   GL_String x = "Hello";
   GL_String y = "world";
   GL_String n = "123";
   GL_String z(0, 31);
   const char*  s = ",";
   GL_String lft, mid, rgt;
   GL_String words[10];
   words[0] = "a";
   words[1] = "b";
   words[2] = "c";
   
   printf("DEBUGGING: GL_String ('%s')\n\n", argv[0]);
   if (argc != 1)
     { puts("WARNING: Arguments will be ignored!\n");
     } /* fi */
   
   printf("Testing GL_String class...\n");
   if( z.length() != 0 ) GL_StringError(z, "z.length() = 0");  
  
   /* The following tests were taken from the specification of the GL_String class in the
    * GNU libg++ library. At this point only very few selected methods are really 
    * implemented (and hence tested). This will be extended as need arises...
    */
   
   /* Comparing, Searching, and Matching */
   
   /*  x.index("lo") 
           returns the zero-based index of the leftmost occurrence of substring "lo" (3, in this case). The argument may be a GL_String, GL_SubString, char, char*, or
           Regex. */   
   /*  x.index("l", 2) 
             returns the index of the first of the leftmost occurrence of "l" found starting the search at position x[2], or 2 in this case. */
   /*  x.index("l", -1) 
             returns the index of the rightmost occurrence of "l", or 3 here. */
   /*  x.index("l", -3) 
             returns the index of the rightmost occurrence of "l" found by starting the search at the 3rd to the last position of x, returning 2 in this case. */
   /*  pos = r.search("leo", 3, len, 0) 
             returns the index of r in the char* string of length 3, starting at position 0, also placing the length of the match in reference parameter len.  */
   /*  x.contains("He") 
             returns nonzero if the string x contains the substring "He". The argument may be a GL_String, GL_SubString, char, char*, or Regex.  */
   /*  x.contains("el", 1) 
             returns nonzero if x contains the substring "el" at position 1. As in this example, the second argument to contains, if present, means to match the
           substring only at that position, and not to search elsewhere in the string.  */
   /*  x.contains(RXwhite); 
         returns nonzero if x contains any whitespace (space, tab, or newline). Recall that RXwhite is a global whitespace Regex. */
   /*  x.matches("lo", 3) 
             returns nonzero if x starting at position 3 exactly matches "lo", with no trailing characters (as it does in this example). */
   /*  x.matches(r) 
             returns nonzero if string x as a whole matches Regex r.  */
   int f = x.freq("l");		if( f != 2 ) GL_StringError( x, "x.freq(\"l\")" );
          /* returns the number of distinct, nonoverlapping matches to the argument (2 in this case). */
   
   /* Substring extraction */
   
   z = x.at(2, 3);		if( z != "llo" ) GL_StringError( z, "x.at(2,3)" );
	 /* sets string z to be equal to the length 3 substring of string x
	    starting at zero-based position 2, setting z to "llo" in this case. A
	    nil string is returned if the arguments don't make sense. */

   x.at(2, 2) = "r";		if( x != "Hero" ) GL_StringError( x, "x.at(2,2) = \"r\"" );
	 /* Sets what was in positions 2 to 3 of x to "r", setting x to "Hero" in
	    this case. As indicated here, GL_SubString assignments may be of different
	    lengths. */
            x = "Hello";
   x.at("He") = "je";		if( x != "jello" ) GL_StringError( x, "x.at(\"He\") = \"je\"" );
	 /*  x("He") is the substring of x that matches the first occurrence of it's
	     argument. The substitution sets x to "jello". If "He" did not occur,
	     the substring would be nil, and the assignment would have no effect. */
             x = "Hello";
   /* x.at("l", -1) = "i"; */
          /* replaces the rightmost occurrence of "l" with "i", setting x to
	     "Helio". */
   /* z = x.at(r); */
	  /* sets String z to the first match in x of Regex r, or "ello" in this
	     case. A nil string is returned if there is no match. */
   z = x.before("o");		if( z != "Hell" ) GL_StringError( z, "x.before(\"o\")"  );
	  /* sets z to the part of x to the left of the first occurrence of "o", or
	     "Hell" in this case. The argument may also be a GL_String, GL_SubString, or
	     Regex. (If there is no match, z is set to "".) */
   x.before("ll") = "Bri";	if( x != "Brillo" ) GL_StringError( x, "x.before(\"ll\") = \"Bri\"" );
	  /* sets the part of x to the left of "ll" to "Bri", setting x to "Brillo". */
             x = "Hello";
   z = x.before(2);		if( z != "He" ) GL_StringError( z, "x.before(2)" );
	  /* sets z to the part of x to the left of x[2], or "He" in this case. */
   z = x.after("Hel");		if( z != "lo" ) GL_StringError( z, "x.after(\"Hel\")" );
	  /* sets z to the part of x to the right of "Hel", or "lo" in this case. */
   /* z = x.through("el")
	     sets z to the part of x up and including "el", or "Hel" in this case. */
   /* z = x.from("el")
	     sets z to the part of x from "el" to the end, or "ello" in this case. */
   x.after("Hel") = "p";	if( x != "Help" ) GL_StringError( x, "x.after(\"Hel\") = \"p\"" );
	  /* sets x to "Help"; */
             x = "Hello";
   z = x.after(3);		if( z != "o" ) GL_StringError( z, "x.after(3)" );
          /* sets z to the part of x to the right of x[3] or "o" in this case. */
   /* z = " ab c"; z = z.after(RXwhite)
	     sets z to the part of its old string to the right of the first group of
	     whitespace, setting z to "ab c"; Use gsub(below) to strip out multiple
	     occurrences of whitespace or any pattern. */
   x[0] = 'J';			if( x != "Jello" ) GL_StringError( x, "x[0] = J" );
	  /* sets the first element of x to 'J'. x[i] returns a reference to the ith
	     element of x, or triggers an error if i is out of range. */
             x = "Hello";									  
   /* common_prefix(x, "Help")
	     returns the string containing the common prefix of the two strings or
	     "Hel" in this case. */
   /* common_suffix(x, "to")
	     returns the string containing the common suffix of the two strings or
	     "o" in this case. */
   
   
   /* Concatenation */

   z = x + s + ' ' + y.at("w") + y.after("w") + "."; if( z != "Hello, world." ) GL_StringError( z, "z = ..." );
	 /*  sets z to "Hello, world." */
   x += y;		if( x != "Helloworld" ) GL_StringError( x, "x += y" );
	 /*  sets x to "Helloworld" */
         x = "Hello";
   /* cat(x, y, z)
	     A faster way to say z = x + y. */
   /* cat(z, y, x, x)
	     Double concatenation; A faster way to say x = z + y + x. */
   y.prepend(x);	if( y != "Helloworld" ) GL_StringError( y, "y.prepend(x)" );
	 /*  A faster way to say y = x + y. */
             y = "world";
   /* z = replicate(x, 3);
	     sets z to "HelloHelloHello". */
   /* z = join(words, 3, "/")
	     sets z to the concatenation of the first 3 strings in string array
	     words, each separated by "/", setting z to "a/b/c" in this case. The
	     last argument may be "" or 0, indicating no separation. */

   /* Other manipulations */

   /* z = "this string has five words"; i = split(z, words, 10, RXwhite);
	     sets up to 10 elements of string array words to the parts of z
	     separated by whitespace, and returns the number of parts actually
	     encountered (5 in this case). Here, words[0] = "this", words[1] =
	     "string", etc. The last argument may be any of the usual. If there is
	     no match, all of z ends up in words[0]. The words array is not
	     dynamically created by split. */
   /* int nmatches x.gsub("l","ll")
	     substitutes all original occurrences of "l" with "ll", setting x to
	     "Hellllo". The first argument may be any of the usual, including Regex.
	     If the second argument is "" or 0, all occurrences are deleted. gsub
	     returns the number of matches that were replaced. */
   z = x + y; z.del("loworl");		if( z != "Held" ) GL_StringError( z, "z.del()" );
	 /*  deletes the leftmost occurrence of "loworl" in z, setting z to "Held". */
   /* z = reverse(x)
	     sets z to the reverse of x, or "olleH". */
   /* z = upcase(x)
	     sets z to x, with all letters set to uppercase, setting z to "HELLO" */
   /* z = downcase(x)
	     sets z to x, with all letters set to lowercase, setting z to "hello" */
   /* z = capitalize(x)
	     sets z to x, with the first letter of each word set to uppercase, and
	     all others to lowercase, setting z to "Hello" */
   /* x.reverse(), x.upcase(), x.downcase(), x.capitalize()
	     in-place, self-modifying versions of the above. */

   
   /* Some regression test */

   GL_String s1("ab");
   GL_String s2("abc");
   if( s1 == s2 ) GL_StringError("TRUE", "'ab' == 'abc'");
  
   GL_String pes("proc1 proc2");
   pes += ' ';   
   pes = pes.after(' ');
   if( pes != "proc2 " ) GL_StringError(pes, "pes = pes.after()");      

   pes.form("%c1%s3%d5%u7%x9%o%%", '%', "2", 4, 6, 8, 0);
   if( pes != "%1234567890%" ) GL_StringError(pes, "pes.form() = '%1234567890%'");

   GL_String tst("Test\0Test", 9);
   if( tst.length() != 9 ) GL_StringError(tst, "tst.length() = 9");
   if( tst[4] != '\0' || tst[5] != 'T' ) GL_StringError(tst, "tst = 'Test\\0Test'");
     
   exit(0);
}

#endif
