char **split (char *string, char sep, char escape) {
char **ret = NULL;
char *p = string;
size_t i = 0;
int nParts = 0;
size_t len = strlen (string);
ret = (char **) malloc (sizeof (char *));
if (ret == NULL)
return NULL;
ret[0] = NULL;
while (i < len) {
// Jump the separator characters
while ((p[i] == sep) && (i < len))
i++;
// A non separator character is found
char c = sep;
// Check if it's a escape character
if ((i < len) && (p[i] == escape)) {
i++;
// If it's the last character, exit
if (i >= len)
break;
c = escape;
}
// If it's not the end of the string
if (i < len) {
char **tmpList = (char **) realloc (ret, (nParts + 2) * sizeof (char *));
if (tmpList == NULL)
goto Error;
ret = tmpList;
tmpList = NULL;
ret[nParts + 1] = NULL;
ret[nParts] = (char *) malloc (sizeof (char));
if (ret[nParts] == NULL)
goto Error;
int j = 0;
while ((p[i] != c) && (i < len)) {
ret[nParts][j] = p[i];
i++;
j++;
char *tmpStr = (char *) realloc (ret[nParts], (j + 1) * sizeof (char));
if (tmpStr == NULL)
goto Error;
ret[nParts] = tmpStr;
tmpStr = NULL;
}
ret[nParts][j] = '\0';
nParts++;
i++;
}
}
return ret;
Error:
for (i = 0; ret[i] != NULL; i++)
free (ret[i]);
free (ret);
return NULL;
}
Refactorings
No refactoring yet !
Maciej Piechotka
September 16, 2008, September 16, 2008 10:06, permalink
May be something like that? The addition of error-checking should be relativly easy (removed to be clear).
I'm not sure what your code do - especially with the memory allocation:
ret = (char **) malloc (sizeof (char *)); // Array of string of size 1?
ret[nParts] = (char *) malloc (sizeof (char)); // String of size 0? ("\0")
#include <stdlib.h>
#include <string.h>
const int ALLOC_BLOCK = 16;
char **
split (char *str, const char sep, const char esc)
{
char **ret; /* Allocated block */
int ret_alloc; /* How much we allocated */
int ret_iter; /* The current pointer index */
char *iter; /* The current character */
char *iter_begin; /* The start of current */
int in_escape; /* If currently in escape block */
/* Initial allocation */
ret_alloc = ALLOC_BLOCK;
ret = malloc(sizeof(char *) * ret_alloc);
ret_iter = 0;
/* Main loop */
iter = str;
iter_begin = iter;
in_escape = 0;
while(1)
{
if(*iter == esc) /* If escape character */
{
if(in_escape)
in_escape = 1;
else
in_escape = 0;
}
else if(*iter == sep || *iter == 0) /* If separator */
{
/* Copy from the beginning */
ret[ret_iter++] = strndup(iter_begin, iter - iter_begin);
/* Allocate block if near empty */
if(ret_iter == ret_alloc)
{
ret_alloc += ALLOC_BLOCK;
ret = realloc(NULL, sizeof(char *) * ret_alloc);
}
if (*iter) /* If it is not the end */
{
iter_begin = (iter + 1);
}
else /* It is the end */
{
break;
}
}
else
{
/* Do nothing */
}
iter++;
}
ret[ret_iter] = NULL; /* Terminating NULL */
return ret;
}
Maciej Piechotka
September 16, 2008, September 16, 2008 10:42, permalink
YAR - it ommits the separators and have much more cleaner structure.
#include <stdlib.h>
#include <string.h>
const int ALLOC_BLOCK = 16;
void
process_escape_block (char **iter, char **out, int *offset, char esp)
{
char *begin, *end;
begin = *iter;
while(**iter != esp)
(*iter)++;
end = (*iter)++;
*out = realloc(*out, *offset + (end - begin) + 1);
strncpy(*out + *offset, begin, end - begin);
*offset = *offset + (end - begin);
}
char *
process_block (char **iter, const char sep, const char esp)
{
char *out;
int len;
out = NULL;
len = 0;
while(1)
{
char *begin, *end;
begin = *iter;
while(**iter != sep && **iter != esp && **iter != '\0')
(*iter)++;
end = *iter;
out = realloc(out, len + (end - begin) + 1);
strncpy(out + len, begin, end - begin);
len += (end - begin);
if(**iter == esp)
{
(*iter)++;
process_escape_block(iter, &out, &len, esp);
}
else if(**iter == sep)
{
(*iter)++;
break;
}
else
{
break;
}
}
return out;
}
char **
split (char *str, const char sep, const char esc)
{
const int ALLOC_BLOCK = 16;
char **ret;
int ret_alloc;
int ret_iter;
char *iter;
ret = NULL;
ret_alloc = 0;
ret_iter = 0;
iter = str;
while(1)
{
if(ret_iter == ret_alloc)
{
ret_alloc += ALLOC_BLOCK;
ret = realloc(ret, ret_alloc);
}
if(*iter == '\0')
break;
ret[ret_iter++] = process_block(&iter, sep, esc);
}
ret[ret_iter] = NULL;
return ret;
}
Fran
September 16, 2008, September 16, 2008 13:07, permalink
char **pchar = (char **) malloc (sizeof (char *)); // allocs memory for a pointer to a string
char *string = (char *) malloc (sizeof (char)); // allocs memory for a character.
So pchar[0] will contains a string, while string[0] will contains a character (like 'a').
Adam
September 17, 2008, September 17, 2008 18:54, permalink
#ifndef _SPLIT_H_ #define _SPLIT_H_ char * string_create(char *start, char *end); char ** split(char *string, char seperator, char escape); #endif
#include <stdlib.h>
#include <string.h>
#include "array.h"
char * string_create(char *start, char *end)
{
char *result = malloc(sizeof(char) * (end - start + 1));
strncpy(result, start, end - start);
return result;
}
char ** split(char *string, char seperator, char escape)
{
char **result;
int escape_count = 0;
char *string_offset = string;
char seperation_character = seperator;
Array *array = array_init();
while (*string++) {
if (*string == escape) {
escape_count++;
seperation_character = escape;
}
if (*string == seperation_character || *string == '\0') {
array_push(array, string_create(string_offset, string));
string_offset = string + 1;
if (escape_count > 1) {
escape_count--;
seperation_character = seperator;
}
}
}
result = array_to_char_array(array);
array_free(array);
return result;
}
#ifndef _ARRAY_H_
#define _ARRAY_H_
typedef struct _array {
char **elements;
int size;
} Array;
Array * array_init();
Array * array_push(Array *array, char *value);
char ** array_to_char_array(Array *array);
void array_free(Array *array);
#endif
#include <stdlib.h>
#include <string.h>
#include "array.h"
Array * array_init()
{
Array *array = malloc(sizeof(Array));
array->elements = NULL;
array->size = 0;
return array;
}
Array * array_push(Array *array, char *value)
{
array->elements = realloc(array->elements, sizeof(char *) * ++array->size);
array->elements[array->size - 1] = value;
return array;
}
char ** array_to_char_array(Array *array)
{
char **result = malloc(sizeof(char *) * array->size + 1);
memcpy(result, array->elements, sizeof(char *) * array->size);
return result;
}
void array_free(Array *array)
{
free(array->elements);
free(array);
}
Ants
January 26, 2009, January 26, 2009 11:23, permalink
There's a MAJOR bug in split() when dealing with with escape characters. Consider what happens with a call to split("@a;@", ';', '@'). I'll let Adam deal with that bug.
MINOR bugs in array_to_char_array():
1) Multiplication has higher precedence than addition.
2) malloc() does not zero fill, so this does not set the last array element to NULL as required by the problem. You should use calloc() to zero fill, or just set the las element explicitly.
Fix for the minor bug is below:
char ** array_to_char_array(Array *array)
{
char **result = malloc(sizeof(char *) * (array->size + 1));
memcpy(result, array->elements, sizeof(char *) * array->size);
// Not overrunning buffer because malloc above used (array->size + 1)
result[array->size] = NULL;
return result;
}
This function splits a string into different parts using a separator character and a escape character. Returns an array of strings with a NULL value at the end of the array.
Definitions:
* Separator character: Is the character that divide the string in parts.
* Escape character: When this character is found, it'll search to another escape character, and all the characters contained between the pair will create a part, though this part contains a separator character.
Sorry for my bad English.