3c50320adac71693cef577a5dbd3d4d4

This function splits a string into different parts using a separator character and a escape character. Returns an array of strings with a NULL value at the end of the array.

Definitions:
* Separator character: Is the character that divide the string in parts.
* Escape character: When this character is found, it'll search to another escape character, and all the characters contained between the pair will create a part, though this part contains a separator character.

Sorry for my bad English.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
char **split (char *string, char sep, char escape) {

    char **ret = NULL;
    char *p = string;
    size_t i = 0;
    int nParts = 0;
    size_t len = strlen (string);

    ret = (char **) malloc (sizeof (char *));
    if (ret == NULL)
        return NULL;

    ret[0] = NULL;

    while (i < len) {

        // Jump the separator characters
        while ((p[i] == sep) && (i < len))
            i++;

        // A non separator character is found
        char c = sep;

        // Check if it's a escape character
        if ((i < len) && (p[i] == escape)) {
            i++;

            // If it's the last character, exit
            if (i >= len)
                break;

            c = escape;
        }


        // If it's not the end of the string
        if (i < len) {

            char **tmpList = (char **) realloc (ret, (nParts + 2) * sizeof (char *));
            if (tmpList == NULL)
                goto Error;

            ret = tmpList;
            tmpList = NULL;

            ret[nParts + 1] = NULL;
            ret[nParts] = (char *) malloc (sizeof (char));
            if (ret[nParts] == NULL)
                goto Error;

            int j = 0;
            while ((p[i] != c) && (i < len)) {
                ret[nParts][j] = p[i];

                i++;
                j++;

                char *tmpStr = (char *) realloc (ret[nParts], (j + 1) * sizeof (char));
                if (tmpStr == NULL)
                    goto Error;

                ret[nParts] = tmpStr;
                tmpStr = NULL;
            }

            ret[nParts][j] = '\0';
            nParts++;

            i++;
        }
    }

    return ret;

Error:

    for (i = 0; ret[i] != NULL; i++)
        free (ret[i]);
    free (ret);

    return NULL;
}

Refactorings

No refactoring yet !

1e8f141e7857d397d8020ed3b759e88a

Maciej Piechotka

September 16, 2008, September 16, 2008 10:06, permalink

No rating. Login to rate!

May be something like that? The addition of error-checking should be relativly easy (removed to be clear).

I'm not sure what your code do - especially with the memory allocation:
ret = (char **) malloc (sizeof (char *)); // Array of string of size 1?
ret[nParts] = (char *) malloc (sizeof (char)); // String of size 0? ("\0")

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include <stdlib.h>
#include <string.h>

const int ALLOC_BLOCK = 16;

char **
split (char *str, const char sep, const char esc)
{
  char **ret;        /* Allocated block */
  int    ret_alloc;  /* How much we allocated */
  int    ret_iter;   /* The current pointer index */
  char  *iter;       /* The current character */
  char  *iter_begin; /* The start of current */
  int    in_escape;  /* If currently in escape block */

  /* Initial allocation */
  ret_alloc = ALLOC_BLOCK;
  ret = malloc(sizeof(char *) * ret_alloc);
  ret_iter = 0;

  /* Main loop */
  iter = str;
  iter_begin = iter;
  in_escape = 0;
  while(1)
    {
      if(*iter == esc) /* If escape character */
  	    {
  	      if(in_escape)
  	        in_escape = 1;
      	  else
      	    in_escape = 0;
      	}
      else if(*iter == sep || *iter == 0) /* If separator */
      	{
      	  /* Copy from the beginning */
      	  ret[ret_iter++] = strndup(iter_begin, iter - iter_begin);
      	  /* Allocate block if near empty */
      	  if(ret_iter == ret_alloc)
      	    {
      	      ret_alloc += ALLOC_BLOCK;
      	      ret = realloc(NULL, sizeof(char *) * ret_alloc);
      	    }
      	  if (*iter) /* If it is not the end */
      	    {
      	      iter_begin = (iter + 1);
      	    }
      	  else /* It is the end */
      	    {
      	      break;
      	    }
      	}
      else
      	{
      	  /* Do nothing */
      	}
      iter++;
    }
  ret[ret_iter] = NULL; /* Terminating NULL */
  return ret;
}
1e8f141e7857d397d8020ed3b759e88a

Maciej Piechotka

September 16, 2008, September 16, 2008 10:42, permalink

No rating. Login to rate!

YAR - it ommits the separators and have much more cleaner structure.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#include <stdlib.h>
#include <string.h>

const int ALLOC_BLOCK = 16;

void
process_escape_block (char **iter, char **out, int *offset, char esp)
{
  char *begin, *end;

  begin = *iter;
  while(**iter != esp)
    (*iter)++;
  end = (*iter)++;

  *out = realloc(*out, *offset + (end - begin) + 1);
  strncpy(*out + *offset, begin, end - begin);
  *offset = *offset + (end - begin);
}

char *
process_block (char **iter, const char sep, const char esp)
{
  char *out;
  int   len;

  out = NULL;
  len = 0;

  while(1)
    {
      char *begin, *end;

      begin = *iter;
      while(**iter != sep && **iter != esp && **iter != '\0')
        (*iter)++;
      end = *iter;

      out = realloc(out, len + (end - begin) + 1);
      strncpy(out + len, begin, end - begin);
      len += (end - begin);
      
      if(**iter == esp)
      	{
      	  (*iter)++;
      	  process_escape_block(iter, &out, &len, esp);
      	}
      else if(**iter == sep)
      	{
      	  (*iter)++;
      	  break;
      	}
      else
      	{
      	  break;
      	}
    }
  return out;
}

char **
split (char *str, const char sep, const char esc)
{
  const int ALLOC_BLOCK = 16;
  
  char     **ret;
  int        ret_alloc;
  int        ret_iter;
  char      *iter;

  ret = NULL;
  ret_alloc = 0;
  ret_iter = 0;
  
  iter = str;
  while(1)
    {
      if(ret_iter == ret_alloc)
      	{
      	  ret_alloc += ALLOC_BLOCK;
      	  ret = realloc(ret, ret_alloc);
      	}

      if(*iter == '\0')
      	break;
      
      ret[ret_iter++] = process_block(&iter, sep, esc);
    }
  ret[ret_iter] = NULL;

  return ret;
}
3c50320adac71693cef577a5dbd3d4d4

Fran

September 16, 2008, September 16, 2008 13:07, permalink

No rating. Login to rate!

char **pchar = (char **) malloc (sizeof (char *)); // allocs memory for a pointer to a string
char *string = (char *) malloc (sizeof (char)); // allocs memory for a character.

So pchar[0] will contains a string, while string[0] will contains a character (like 'a').

A8d3f35baafdaea851914b17dae9e1fc

Adam

September 17, 2008, September 17, 2008 18:54, permalink

No rating. Login to rate!

split.h

1
2
3
4
5
6
7
#ifndef _SPLIT_H_
#define _SPLIT_H_

char * string_create(char *start, char *end);
char ** split(char *string, char seperator, char escape);

#endif

split.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#include <stdlib.h>
#include <string.h>
#include "array.h"

char * string_create(char *start, char *end)
{
    char *result = malloc(sizeof(char) * (end - start + 1));

    strncpy(result, start, end - start);
    
    return result;
}

char ** split(char *string, char seperator, char escape)
{
    char **result;
    int escape_count = 0;
    char *string_offset = string;
    char seperation_character = seperator;
    Array *array = array_init();
    
    while (*string++) {
        if (*string == escape) {
            escape_count++;
            seperation_character = escape;
        }
        
        if (*string == seperation_character || *string == '\0') {
            array_push(array, string_create(string_offset, string));
            string_offset = string + 1;
            
            if (escape_count > 1) {
                escape_count--;
                seperation_character = seperator;
            }
        }
    }
    
    result = array_to_char_array(array);
    array_free(array);    
    
    return result;
}

array.h

1
2
3
4
5
6
7
8
9
10
11
12
13
14
#ifndef _ARRAY_H_
#define _ARRAY_H_

typedef struct _array {
    char **elements;
    int size;
} Array;

Array * array_init();
Array * array_push(Array *array, char *value);
char ** array_to_char_array(Array *array);
void array_free(Array *array);

#endif

array.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <stdlib.h>
#include <string.h>
#include "array.h"

Array * array_init()
{
    Array *array = malloc(sizeof(Array));

    array->elements = NULL;
    array->size = 0;
    
    return array;
}

Array * array_push(Array *array, char *value)
{
    array->elements = realloc(array->elements, sizeof(char *) * ++array->size);
    array->elements[array->size - 1] = value;
    
    return array;
}

char ** array_to_char_array(Array *array)
{
    char **result = malloc(sizeof(char *) * array->size + 1);
    
    memcpy(result, array->elements, sizeof(char *) * array->size);
    
    return result;
}

void array_free(Array *array)
{
    free(array->elements);
    free(array);
}

Your refactoring





Format Copy from initial code

or Cancel