3c50320adac71693cef577a5dbd3d4d4

This function splits a string into different parts using a separator character and a escape character. Returns an array of strings with a NULL value at the end of the array.

Definitions:
* Separator character: Is the character that divide the string in parts.
* Escape character: When this character is found, it'll search to another escape character, and all the characters contained between the pair will create a part, though this part contains a separator character.

Sorry for my bad English.

char **split (char *string, char sep, char escape) {

    char **ret = NULL;
    char *p = string;
    size_t i = 0;
    int nParts = 0;
    size_t len = strlen (string);

    ret = (char **) malloc (sizeof (char *));
    if (ret == NULL)
        return NULL;

    ret[0] = NULL;

    while (i < len) {

        // Jump the separator characters
        while ((p[i] == sep) && (i < len))
            i++;

        // A non separator character is found
        char c = sep;

        // Check if it's a escape character
        if ((i < len) && (p[i] == escape)) {
            i++;

            // If it's the last character, exit
            if (i >= len)
                break;

            c = escape;
        }


        // If it's not the end of the string
        if (i < len) {

            char **tmpList = (char **) realloc (ret, (nParts + 2) * sizeof (char *));
            if (tmpList == NULL)
                goto Error;

            ret = tmpList;
            tmpList = NULL;

            ret[nParts + 1] = NULL;
            ret[nParts] = (char *) malloc (sizeof (char));
            if (ret[nParts] == NULL)
                goto Error;

            int j = 0;
            while ((p[i] != c) && (i < len)) {
                ret[nParts][j] = p[i];

                i++;
                j++;

                char *tmpStr = (char *) realloc (ret[nParts], (j + 1) * sizeof (char));
                if (tmpStr == NULL)
                    goto Error;

                ret[nParts] = tmpStr;
                tmpStr = NULL;
            }

            ret[nParts][j] = '\0';
            nParts++;

            i++;
        }
    }

    return ret;

Error:

    for (i = 0; ret[i] != NULL; i++)
        free (ret[i]);
    free (ret);

    return NULL;
}

Refactorings

No refactoring yet !

1e8f141e7857d397d8020ed3b759e88a

Maciej Piechotka

September 16, 2008, September 16, 2008 10:06, permalink

No rating. Login to rate!

May be something like that? The addition of error-checking should be relativly easy (removed to be clear).

I'm not sure what your code do - especially with the memory allocation:
ret = (char **) malloc (sizeof (char *)); // Array of string of size 1?
ret[nParts] = (char *) malloc (sizeof (char)); // String of size 0? ("\0")

#include <stdlib.h>
#include <string.h>

const int ALLOC_BLOCK = 16;

char **
split (char *str, const char sep, const char esc)
{
  char **ret;        /* Allocated block */
  int    ret_alloc;  /* How much we allocated */
  int    ret_iter;   /* The current pointer index */
  char  *iter;       /* The current character */
  char  *iter_begin; /* The start of current */
  int    in_escape;  /* If currently in escape block */

  /* Initial allocation */
  ret_alloc = ALLOC_BLOCK;
  ret = malloc(sizeof(char *) * ret_alloc);
  ret_iter = 0;

  /* Main loop */
  iter = str;
  iter_begin = iter;
  in_escape = 0;
  while(1)
    {
      if(*iter == esc) /* If escape character */
  	    {
  	      if(in_escape)
  	        in_escape = 1;
      	  else
      	    in_escape = 0;
      	}
      else if(*iter == sep || *iter == 0) /* If separator */
      	{
      	  /* Copy from the beginning */
      	  ret[ret_iter++] = strndup(iter_begin, iter - iter_begin);
      	  /* Allocate block if near empty */
      	  if(ret_iter == ret_alloc)
      	    {
      	      ret_alloc += ALLOC_BLOCK;
      	      ret = realloc(NULL, sizeof(char *) * ret_alloc);
      	    }
      	  if (*iter) /* If it is not the end */
      	    {
      	      iter_begin = (iter + 1);
      	    }
      	  else /* It is the end */
      	    {
      	      break;
      	    }
      	}
      else
      	{
      	  /* Do nothing */
      	}
      iter++;
    }
  ret[ret_iter] = NULL; /* Terminating NULL */
  return ret;
}
1e8f141e7857d397d8020ed3b759e88a

Maciej Piechotka

September 16, 2008, September 16, 2008 10:42, permalink

No rating. Login to rate!

YAR - it ommits the separators and have much more cleaner structure.

#include <stdlib.h>
#include <string.h>

const int ALLOC_BLOCK = 16;

void
process_escape_block (char **iter, char **out, int *offset, char esp)
{
  char *begin, *end;

  begin = *iter;
  while(**iter != esp)
    (*iter)++;
  end = (*iter)++;

  *out = realloc(*out, *offset + (end - begin) + 1);
  strncpy(*out + *offset, begin, end - begin);
  *offset = *offset + (end - begin);
}

char *
process_block (char **iter, const char sep, const char esp)
{
  char *out;
  int   len;

  out = NULL;
  len = 0;

  while(1)
    {
      char *begin, *end;

      begin = *iter;
      while(**iter != sep && **iter != esp && **iter != '\0')
        (*iter)++;
      end = *iter;

      out = realloc(out, len + (end - begin) + 1);
      strncpy(out + len, begin, end - begin);
      len += (end - begin);
      
      if(**iter == esp)
      	{
      	  (*iter)++;
      	  process_escape_block(iter, &out, &len, esp);
      	}
      else if(**iter == sep)
      	{
      	  (*iter)++;
      	  break;
      	}
      else
      	{
      	  break;
      	}
    }
  return out;
}

char **
split (char *str, const char sep, const char esc)
{
  const int ALLOC_BLOCK = 16;
  
  char     **ret;
  int        ret_alloc;
  int        ret_iter;
  char      *iter;

  ret = NULL;
  ret_alloc = 0;
  ret_iter = 0;
  
  iter = str;
  while(1)
    {
      if(ret_iter == ret_alloc)
      	{
      	  ret_alloc += ALLOC_BLOCK;
      	  ret = realloc(ret, ret_alloc);
      	}

      if(*iter == '\0')
      	break;
      
      ret[ret_iter++] = process_block(&iter, sep, esc);
    }
  ret[ret_iter] = NULL;

  return ret;
}
3c50320adac71693cef577a5dbd3d4d4

Fran

September 16, 2008, September 16, 2008 13:07, permalink

No rating. Login to rate!

char **pchar = (char **) malloc (sizeof (char *)); // allocs memory for a pointer to a string
char *string = (char *) malloc (sizeof (char)); // allocs memory for a character.

So pchar[0] will contains a string, while string[0] will contains a character (like 'a').

A8d3f35baafdaea851914b17dae9e1fc

Adam

September 17, 2008, September 17, 2008 18:54, permalink

No rating. Login to rate!
#ifndef _SPLIT_H_
#define _SPLIT_H_

char * string_create(char *start, char *end);
char ** split(char *string, char seperator, char escape);

#endif
#include <stdlib.h>
#include <string.h>
#include "array.h"

char * string_create(char *start, char *end)
{
    char *result = malloc(sizeof(char) * (end - start + 1));

    strncpy(result, start, end - start);
    
    return result;
}

char ** split(char *string, char seperator, char escape)
{
    char **result;
    int escape_count = 0;
    char *string_offset = string;
    char seperation_character = seperator;
    Array *array = array_init();
    
    while (*string++) {
        if (*string == escape) {
            escape_count++;
            seperation_character = escape;
        }
        
        if (*string == seperation_character || *string == '\0') {
            array_push(array, string_create(string_offset, string));
            string_offset = string + 1;
            
            if (escape_count > 1) {
                escape_count--;
                seperation_character = seperator;
            }
        }
    }
    
    result = array_to_char_array(array);
    array_free(array);    
    
    return result;
}
#ifndef _ARRAY_H_
#define _ARRAY_H_

typedef struct _array {
    char **elements;
    int size;
} Array;

Array * array_init();
Array * array_push(Array *array, char *value);
char ** array_to_char_array(Array *array);
void array_free(Array *array);

#endif
#include <stdlib.h>
#include <string.h>
#include "array.h"

Array * array_init()
{
    Array *array = malloc(sizeof(Array));

    array->elements = NULL;
    array->size = 0;
    
    return array;
}

Array * array_push(Array *array, char *value)
{
    array->elements = realloc(array->elements, sizeof(char *) * ++array->size);
    array->elements[array->size - 1] = value;
    
    return array;
}

char ** array_to_char_array(Array *array)
{
    char **result = malloc(sizeof(char *) * array->size + 1);
    
    memcpy(result, array->elements, sizeof(char *) * array->size);
    
    return result;
}

void array_free(Array *array)
{
    free(array->elements);
    free(array);
}
F9a9ba6663645458aa8630157ed5e71e

Ants

January 26, 2009, January 26, 2009 11:23, permalink

No rating. Login to rate!

There's a MAJOR bug in split() when dealing with with escape characters. Consider what happens with a call to split("@a;@", ';', '@'). I'll let Adam deal with that bug.

MINOR bugs in array_to_char_array():
1) Multiplication has higher precedence than addition.
2) malloc() does not zero fill, so this does not set the last array element to NULL as required by the problem. You should use calloc() to zero fill, or just set the las element explicitly.

Fix for the minor bug is below:

char ** array_to_char_array(Array *array)
{
    char **result = malloc(sizeof(char *) * (array->size + 1));
    
    memcpy(result, array->elements, sizeof(char *) * array->size);

    // Not overrunning buffer because malloc above used (array->size + 1)
    result[array->size] = NULL;

    return result;
}

Your refactoring





Format Copy from initial code

or Cancel