I have been working on writing a regsub function n C using PCRE to give as much flexibility as possible. For the most part it works however when using capturing globally i get a double free. Though I cant figure out why the block which is trying to be freed has been allocated. The code is as follows:
Code:
/*
Search for a given regular expression in the string.
Arguments:
char *pattern - The RE pattern to search for.
char *string - The string to search in
char *sub - The string to subsitute into
int flags - The pcre regex flags to pass
int *error - The error code indicator
Return:
char*
Error Condition:
NULL if:cannot match pattern and sets error messgae in error
*/
char* regsub(char* pattern, char* string, char* sub, int flags, int *error){
int erroffset, forloop, startoffset = 0;
int mod_length = 0; //Amount by which the strings length change due to substitution.
const char *errptr;
int ovector[30];
const char **listptr;
char *tmpptr;
char *reg_string = calloc(sizeof(char), strlen(string) + 1); //storage reference for string
char *new_str = calloc(sizeof(char), 1); //temporary storage for modified string
//Check the returned and modified string are valid.
if (!reg_string || !new_str){
if (reg_string) free(reg_string);
if (new_str) free(new_str);
return NULL;
}
strcpy(reg_string, string);
//Compile the RE
pcre *pregex = pcre_compile2(pattern, flags, error, &errptr, &erroffset, NULL);
if (*error){
free(reg_string);
free(new_str);
return NULL;
}
while (1){
int match_count = pcre_exec(pregex, NULL, string, strlen(string), startoffset, 0, ovector, 30);
if (match_count < 1) {
if (!startoffset) free(new_str);
break;
}
else if (match_count > 0) {
//Size of newstring is sizeof(string to match_point) + sizeof(sub) + sizeof(string after match point)
if (!(tmpptr = realloc(new_str, ovector[0] + strlen(sub) +
(strlen(reg_string) - ovector[1]) + mod_length + 1))){
free(reg_string);
free(new_str);
return NULL;
}
new_str = tmpptr;
//Copy string upto match point.
memcpy(new_str, reg_string, ovector[0] + mod_length);
//Copy substring in at match point.
memcpy(&new_str[ovector[0] + mod_length], sub, strlen(sub));
//Copy remaing string after match point
memcpy(&new_str[ovector[0] + mod_length + strlen(sub)],
®_string[ovector[1] + mod_length],
strlen(®_string[ovector[1] + mod_length]));
//Keep track of the modifed length.
mod_length = mod_length + strlen(sub) - (ovector[1] - ovector[0]);
//Complete the string and copy it to reg_string.
new_str[strlen(string) + mod_length] = 0;
if (!(tmpptr = (char *) realloc(reg_string, sizeof(char) * (strlen(new_str) + 1)))){
free(reg_string);
free(new_str);
return NULL;
}
reg_string = tmpptr;
strcpy(reg_string, new_str);
free(new_str); //Crashes here upon second iteration.
} if (match_count > 1){
//Groups used gut each group number.
*error = pcre_get_substring_list(string, ovector, match_count, &listptr);
if (*error) break;
for (forloop =1; forloop < match_count; forloop++){
//Get the pattern to subsitute.
char *num = itos(forloop);
if (!num){
free (new_str);
free(reg_string);
return NULL;
}
tmpptr = realloc(num, sizeof(char) * (strlen(num) + 3));
if (!tmpptr){
free(num);
free (new_str);
free(reg_string);
return NULL;
}
num = tmpptr;
sprintf(num, "\\$%d", forloop);
//substitute the string.
tmpptr = regsub(num, reg_string, (char *) listptr[forloop], 0, error);
if (!tmpptr){
free (new_str);
free(reg_string);
return NULL;
}
free(reg_string);
reg_string = tmpptr;
//update the length of the modified string.
mod_length = mod_length + strlen(listptr[forloop]) - strlen(num) + 1;
free(num);
}
}
//update the start of the string.
startoffset = ovector[1] + 1;
}
pcre_free(pregex);
return reg_string;
}
If it helps the code for itos is:
Code:
/*
Convert a int to char*
Arguments:
n - the int to convert
Return:
char*
Error Condition
NULL if cannot allocate the space.
*/
char* itos(int n){
int i_size = floor(log10(n)) + 1;
char *str = calloc(sizeof(char), i_size + 1);
if (!str) return NULL;
sprintf(str, "%d", n);
return str;
}
A rather rudimentary example of usage would be regsub("(\\w*?) (\\w*?)$", "Hello World\nHello Universe", "$2 $1", PCRE_MULTILINE, &error);
which would return "World Hello\nUniverse Hello"
Can anyone help me understand the double free error?