LinuxQuestions.org
Review your favorite Linux distribution.
Go Back   LinuxQuestions.org > Forums > Non-*NIX Forums > Programming
User Name
Password
Programming This forum is for all programming questions.
The question does not have to be directly related to Linux and any language is fair game.

Notices

Reply
 
Search this Thread
Old 03-21-2011, 12:26 AM   #1
evansash
LQ Newbie
 
Registered: Mar 2011
Posts: 5

Rep: Reputation: 0
Memory leaks.. *** glibc detected *** ./SuffixTree: malloc(): memory corruption: 0x00


Hi all,

I am new to C and linux. My code below does arbitary writes but I cant figure out where or how it does it. Please let me know if you need more info.

I am calling the insertNode() function with seq = 'MISSISSPPI$' and alphabets = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ$'

Code:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>

int alphabetsize;

typedef struct TreeNode
{
	int node_id;
	int edgelbl_beg;
	int edgelbl_end;
	struct TreeNode *suff_link;
	struct TreeNode *parentptr;
	int strdepth;
	int suff_id;
	char chars[10];
	struct TreeNode *child[];
}node;

         node root;
         int depthOfTree =0;
         int globalsuff = 0;
         int globalnodeid = 0;
	
	
node *initializeNode(node *Node,int id,char *alphabets)
{
	  int i=0,j=0,size=0;
	  int index = strlen(alphabets);
	  char temp[index];
	  Node->node_id = id;
  	  Node->strdepth = 0;
  	  Node->suff_link = NULL;
  	  Node->parentptr = NULL;
  	  Node->suff_id = -1;
  	  Node->edgelbl_beg = 0;
  	  Node->edgelbl_end = 0;

	  strcpy(temp,alphabets);
		
          for(i = 0; i < strlen(temp); i++)
          {
              if(temp[i] != ' ' && temp[i] != '\0' && temp[i] != '\n')
              {
                  char index = temp[i] - 'A';
	          Node->child[temp[i]] = 0;
              }  
          } 
	  return Node;
}

void printNode(node *Node,char *seq,char *alphabets)
{
	int i=0;
	printf("Node id: %d\n",Node->node_id);
		
	if(Node->parentptr != NULL)
	{
		printf("\nParent:%d\n",Node->parentptr->node_id);
	}
	if(Node->suff_link !=NULL)
	{
		printf("\nSuffixLink:%d\n",Node->suff_link->node_id);
	}
	else
	{
		printf("\nSuffixLink: NULL\n");
	}
	printf("string depth:%d\n",Node->strdepth);
	printf("Beg:%d\n",Node->edgelbl_beg);
	printf("End:%d\n",Node->edgelbl_end);
	i=Node->edgelbl_beg;
	while(i<=Node->edgelbl_end)
	{
		printf("%c",seq[i++]);
	}
	printf("\n");
	if(Node->suff_id!=-1)
	{
		printf("Suffix Id:%d\n",Node->suff_id);
	}
		printf("\n");
	return;
}

node *insertNode(node *root,node *curNode,int suffbeg,char *seq,char *alphabets)
{
	node *prevprevptr = curNode->parentptr;
	node *lastnode = curNode->parentptr;
	node *child = NULL;
	node *returnptr = NULL;
	node *newchild = NULL;
	node *intlnode = NULL;
	int seqsize = strlen(seq)+1;
	char temp[seqsize];
	char alpha[strlen(alphabets)];
	int mismatch = 0;
	int i=0,j=0,k=0,l=0,child_flag=-1;
	char index;
	strcpy(alpha,alphabets);	
	strcpy(temp,seq);

	//Find an ancestor node with a suffix link
	while(prevprevptr->suff_link == NULL)
	{
		prevprevptr = prevprevptr->parentptr;
	}
	prevprevptr = prevprevptr->suff_link;

	int beg = suffbeg;
	while(1)
	{
		for(l=0;l<strlen(prevprevptr->chars);l++)
		{	
			if(prevprevptr->chars[l] == temp[beg])
			{
				child_flag = l;
				break;
			}
		}
		if(child_flag>=0) //already has a branch
		{	
                        index = temp[beg] - 'A';
			child = prevprevptr->child[index];
			i = beg;
			j = child->edgelbl_beg;
			mismatch = 0;
			while(j<=child->edgelbl_end)
			{				
				if(temp[i] != temp[j])
				{
					mismatch = 1;
					break;
				}
				i++;
				j++;
			}
			printf("i=%d j=%d\n",i,j);
			if(mismatch == 1)
			{
				break;
			}
			else
			{
			        prevprevptr = child;
				beg++;
			}
		}
		else
		{
			break;
		}
	}

	//create a new internal node

	if(mismatch ==1)
	{
         	int oldsuffid = child->suff_id;
		int lblbeg = child->edgelbl_beg;
	        int lblend = child->edgelbl_end;
		intlnode = malloc(sizeof(node *)+alphabetsize);
		initializeNode(intlnode,globalnodeid++,alphabets);
		intlnode->edgelbl_beg = child->edgelbl_beg;
		intlnode->edgelbl_end = j-1;
		intlnode->parentptr = prevprevptr;
		intlnode->suff_id = -1;
		intlnode->strdepth = (intlnode->edgelbl_end - intlnode->edgelbl_beg + 1);
		intlnode->suff_link = NULL;
		index = temp[beg] - 'A';
		prevprevptr->child[index] = intlnode;
		k = strlen(prevprevptr->chars);
		prevprevptr->chars[k] = temp[beg]; 
		prevprevptr = intlnode;
		int sufbegtemp = suffbeg;
		suffbeg = j-1;
		printNode(intlnode,temp,alphabets);
		
		//insert new child
		newchild = malloc(sizeof(node *)+alphabetsize);
		initializeNode(newchild,globalnodeid++,alphabets);
		newchild->edgelbl_beg = i;
		newchild->edgelbl_end = strlen(temp)-1;
		newchild->parentptr = intlnode;
		index = temp[i] - 'A';
		intlnode->child[index] = newchild;
		k = strlen(intlnode->chars);
		intlnode->chars[k] = temp[i];
		newchild->suff_id = suffbeg;
		newchild->strdepth =  (newchild->edgelbl_end - newchild->edgelbl_beg + 1);
		newchild->suff_link = NULL;	printNode(newchild,temp,alphabets);
		
		//change the old child pointer
		child->parentptr = intlnode;
		child->edgelbl_beg = j;
		child->edgelbl_end = lblend;
		child->strdepth =  (child->edgelbl_end - child->edgelbl_beg + 1);;
        	child->suff_link = NULL;
		child->suff_id = oldsuffid;
                printNode(child,temp,alphabets);
	        index = temp[j]-'A';
		intlnode->child[index]=child;
		k = strlen(prevprevptr->chars);
		intlnode->chars[k] = temp[j];
		returnptr = child;
		
	}
	else
	{
		child = malloc(sizeof(node *)+alphabetsize);
		initializeNode(child,globalnodeid++,alphabets);
		child->edgelbl_beg = suffbeg;
		child->edgelbl_end = strlen(temp)-1;
		child->parentptr = prevprevptr;
		child->suff_id = suffbeg;
		child->strdepth = (child->edgelbl_end - child->edgelbl_beg + 1);
		child->suff_link = NULL;
		intlnode = child->parentptr;
		if(child)printNode(child,temp,alphabets);
		index = temp[suffbeg] - 'A';
		returnptr =child;
		prevprevptr->chars[suffbeg] = temp[suffbeg];//temp solution
		prevprevptr->child[index] = returnptr;
		child->parentptr = intlnode;
	}
		
	//create suffix links
	int depth = intlnode->edgelbl_end - intlnode->edgelbl_beg + 1;
	if(intlnode->suff_link == NULL && depth==1 && intlnode->parentptr->node_id == 0)
	{
		intlnode->suff_link = root;
	}
	if(lastnode->suff_link == NULL)
	{
		lastnode->suff_link = intlnode;			
	}


	return returnptr;
}

void buildST(char *seq, char *alphabets)
{
	node *root,*currNode,*temproot;
	int i=0,seqsize=0;
	int asize = strlen(alphabets);
	seqsize = strlen(seq)+1;
	char temp[seqsize];
	char alpha[asize];
	strcpy(temp,seq);
	strcpy(alpha,alphabets);
	root = (node *)malloc(sizeof(node *)+alphabetsize);
	initializeNode(root,0,alphabets);//initialize root
	root->suff_link = root;
	root->parentptr = root;
	root->suff_id = globalsuff++;	
	root->edgelbl_beg = -1;
	root->edgelbl_end = -1;
	depthOfTree++;
	globalnodeid++;
	currNode = root;
	temproot = root;
	while(i < strlen(temp))
	{
		currNode = insertNode(root,currNode,i++,temp,alpha);
	}
}
Weird behaviour I should mention is that when I check for NULL pointer in node->child[index], the unassigned values are not null anymore, they point to arbitary memory.

Any idea would be helpful. Thanks in advance.

Last edited by evansash; 03-21-2011 at 01:19 AM.
 
Old 03-21-2011, 12:28 AM   #2
smeezekitty
Senior Member
 
Registered: Sep 2009
Location: Washington U.S.
Distribution: M$ Windows / Debian / Ubuntu / DSL / many others
Posts: 2,230

Rep: Reputation: 173Reputation: 173
Please use [code] tags
 
Old 03-21-2011, 12:58 AM   #3
posixculprit
Member
 
Registered: May 2010
Posts: 136

Rep: Reputation: 42
You're doing things like these:

Code:
int size = strlen(somestring);
char copy[size];
strcpy(copy, somestring);
Which is bad, mkay?

This is even worse:

Code:
newchild = malloc(sizeof(node *)+alphabetsize);
initializeNode(newchild,globalnodeid++,alphabets);
This isn't how you allocate memory for a TreeNode (unless you are extremely lucky).

Last edited by posixculprit; 03-21-2011 at 01:00 AM.
 
Old 03-21-2011, 01:07 AM   #4
evansash
LQ Newbie
 
Registered: Mar 2011
Posts: 5

Original Poster
Rep: Reputation: 0
Hi posixculprit,

Sorry about the non - indentation. I did the strcpy bcoz everytime I used the parameters as arguments to another function like I do here,


Code:
printNode(newchild,temp,alphabets);
it become "". I have no idea why. Somewhere I read this happens with char pointers and copying them to char arrays should make the problem go away and it did.

I have an N-ary tree. I dont know the size of the children for each node. Its depends on the alphabets variable "ABC..$". That is why I declared a flexible array. This is my first time using them. Please let me know if I am declaring them wrong.

Thanks,
Ash
 
Old 03-21-2011, 02:11 AM   #5
worzel1968
Member
 
Registered: Mar 2011
Location: Melbourne, Australia
Distribution: fedora17_64, ubuntu
Posts: 44

Rep: Reputation: 2
Hi Ash,

I might be completely naive & wrong about this, but does alphabetsize get initialised anywhere? I notice you have a asize variable, which is initialised to the length of alphabets.


Code:
void buildST(char *seq, char *alphabets)
{
	node *root,*currNode,*temproot;
	int i=0,seqsize=0;
	int asize = strlen(alphabets);
	seqsize = strlen(seq)+1;
	char temp[seqsize];
	char alpha[asize];
	strcpy(temp,seq);
	strcpy(alpha,alphabets);
	root = (node *)malloc(sizeof(node *)+alphabetsize);
it's also used here without having been initialised anywhere that I could see.

Code:
//create a new internal node

	if(mismatch ==1)
	{
         	int oldsuffid = child->suff_id;
		int lblbeg = child->edgelbl_beg;
	        int lblend = child->edgelbl_end;
		intlnode = malloc(sizeof(node *)+alphabetsize);
		initializeNode(intlnode,globalnodeid++,alphabets);
and here

Code:
//insert new child
		newchild = malloc(sizeof(node *)+alphabetsize);
		initializeNode(newchild,globalnodeid++,alphabets);
and this is the last one

Code:
}
	else
	{
		child = malloc(sizeof(node *)+alphabetsize);
		initializeNode(child,globalnodeid++,alphabets);
		child->edgelbl_beg = suffbeg;

That's my 1 cent contribution. Hope it may help.

Cheers David

Last edited by worzel1968; 03-21-2011 at 02:23 AM. Reason: found more occurences
 
Old 03-21-2011, 02:38 AM   #6
posixculprit
Member
 
Registered: May 2010
Posts: 136

Rep: Reputation: 42
I'm guessing you're trying to implement some sort of suffix tree. My current opinion is that this will prove to be a somewhat difficult task, because you seem to be missing some fundamental knowledge regarding the C programming language.

1. Consider the following program:

Code:
#include <stdio.h>
#include <string.h>


int main()
{
  const char alpha[] = "hello";
  char omega[strlen(alpha)];

  printf("sizeof(alpha): %d\n", sizeof(alpha));
  printf("sizeof(omega): %d\n", sizeof(omega));
}
2. Consider this:

Code:
root = (node *)malloc(sizeof(node *)+alphabetsize);
Why are you allocating memory to hold a pointer to node (rather than a node) + memory to hold the alphabet?

3. I believe you might be using a C++ compiler in order to compile C code (don't). gcc support for C99 can be turned on via `-std=c99`. If you're using gcc, I suggest trying to compile your code using (at least) the following command line flags:

Code:
-std=c99 -pedantic -Wall -Wextra
Make sure to get rid of all the errors and warnings (and understand why they were errors and warnings in the first place).

Last edited by posixculprit; 03-21-2011 at 03:06 AM.
 
Old 03-21-2011, 03:10 AM   #7
evansash
LQ Newbie
 
Registered: Mar 2011
Posts: 5

Original Poster
Rep: Reputation: 0
Hi,

@David: I am initializing the value of alphabetsize in a driver prog. Here is the code for that prog, if it helps:

Code:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include "suffixtree.h"

char *strip_copy(const char *s) 
{
     char *p = malloc(strlen(s) + 1);  
     if(p) 
     {
         char *p2 = p;   
         while(*s != '\0')
         {
                 if(*s != ' ' && *s != '\n' && *s != '\r') 
                 {
                         *p2++ = *s++;
                 } 
                 else 
                 {
                         ++s;
                 }
         }
         *p2 = '$';
	 *p2 = '\0';
     } 
    return p;
} 

int main(int argc, char *argv[])
{
	char seqFile[50];
	char *seqname = NULL;
	char alphabetFile[50];
	char *seq = NULL;
	char *alphabets = NULL;
	int len=0;
	char *str = NULL;
	char c;
	int i=0,j=0;
	seqname=(char *)malloc(500);

	FILE *infile = NULL;
	
	if ((argc != 3))
	{
		printf("SuffixTree  <input sequence file>  <input alphabet file>\n");
	}
	//Read from the sequence file
	strcpy(seqFile,argv[1]);
	infile = fopen(seqFile,"r");
	if(infile == NULL)
	{
		printf("Could not open file");
	}
	fseek(infile,0,SEEK_END); //go to end
	len=ftell(infile); //get position at end (length)
	fseek(infile,0,SEEK_SET); //go to beg.
	seq=(char *)malloc(len); //malloc buffer
	fgetc(infile);
	while  ( c = fgetc(infile) != '\n' )
	{
		seqname+=c;
	}
	fread(seq,len,1,infile); //read into buffer
	seq = strip_copy(seq);
	seq[strlen(seq)] = '$';
	seq[strlen(seq)] = '\0';
	fclose(infile);
	
	//Read from the alphabet file
	strcpy(alphabetFile,argv[2]);
	infile = fopen(alphabetFile,"r");
	if(infile == NULL)
	{
		printf("Could not open file");
	}
	fseek(infile,0,SEEK_END); //go to end
	len=ftell(infile); //get position at end (length)
	fseek(infile,0,SEEK_SET); //go to beg.
	alphabets=(char *)malloc(len); //malloc buffer
	fread(alphabets,len,1,infile); //read into buffer
	alphabets = strip_copy(alphabets);
	alphabets[strlen(alphabets)]='$';
	alphabets[strlen(alphabets)]='\0';
	fclose(infile);
	for(j=0;j<strlen(alphabets);j++)
	{
		if(alphabets[i] != ' ' && alphabets[i] != '\0' && alphabets[i] != '\n')
		{
			alphabetsize++;
		}
	}
	//Build the Suffix Tree
	buildST(seq,alphabets);
	return 0;
}
@Posixculprit: Thank you for helping me with my mistakes. Like I said I am pretty new to C.

1. would

Code:
char omega[strlen(alpha)+1];
be right?

2. Similarly,
Code:
node = (node *)malloc(sizeof(node)+alphabetsize);
be right? We have to decalre flexible arrays using '+n' right?

3. I am compiling with cc -o SuffixTree s1.c. I will add the flags.

Last edited by evansash; 03-21-2011 at 03:14 AM.
 
Old 03-21-2011, 03:34 AM   #8
posixculprit
Member
 
Registered: May 2010
Posts: 136

Rep: Reputation: 42
Quote:
node = (node *)malloc(sizeof(node)+alphabetsize);
Better but still not good. Now the problem is with +alphabetsize. You are trying to allocate memory for a structure containing a flexible array member. The array contains pointers to nodes. A pointer to a node has the size sizeof(struct node*). You want the array to hold alphabetsize of these pointers. The total memory occupied by the flexible array would thus be:

Code:
alphabetsize * sizeof(struct node*)
and thus the whole thing should look something along these lines:

Code:
malloc(sizeof(struct node) + alphabetsize * sizeof(struct node*))
Note that in C (as opposed to C++) there is an implicit conversion from void pointers to typed pointers (so that (node *) cast is (not only wrong but also) redundant). It is wrong because in C node is not an automatic synonim for struct node as in C++.

P.S.: Indeed, that was my objection to the strlen/strcpy thing.
 
1 members found this post helpful.
Old 03-21-2011, 04:20 AM   #9
worzel1968
Member
 
Registered: Mar 2011
Location: Melbourne, Australia
Distribution: fedora17_64, ubuntu
Posts: 44

Rep: Reputation: 2
Hi Ash,

Forgive me if I am wasting your time (I will keep if that is the case), I could help but notice the following:


Code:
for(j=0;j<strlen(alphabets);j++)
	{
		if(alphabets[i] != ' ' && alphabets[i] != '\0' && alphabets[i] != '\n')
		{
			alphabetsize++;
		}
	}
i and j were both set to zero at the start of main. Your loop uses j, but the subscripts inside the loop use i, so the for loop continually tests alphabets[0]. This doesn't cause a compile error, so maybe in this case it might have better not to initialise j or declare i, because the for loop will do that any way ( the j I mean), and the compiler would have flagged an error about i. I try not to use i or j because they look too similar and sometimes make this sort of thing hard to see.

Also forgive me if I am particulary dense, I still don't see alphabetsize = 0 anywhere. Even if it were initialised to 0 somehow, it will still be 0 because of the for loop.

I hope I am not being a pest.

David
 
1 members found this post helpful.
Old 03-21-2011, 04:38 AM   #10
posixculprit
Member
 
Registered: May 2010
Posts: 136

Rep: Reputation: 42
A good example of why one should try to declare variables as close to their initial point of usage and in an as limited scope as possible:

Code:
for (size_t i = 0; i < strlen(alphabet); ++i) {
// ...
}
There are also various memory leaks, useless statements, you test for failure to open a file but you carry on as if everything is alright (sure, you write a message to stdout, but that doesn't stop the execution flow) and who know what other problems.

Chances are a close inspection of the code will yield many bugs. Is this some form of homework? If you are writing this program in order to refresh / extend your knowledge of C, might I suggest a considerably simpler project first?
 
1 members found this post helpful.
Old 03-21-2011, 04:58 AM   #11
evansash
LQ Newbie
 
Registered: Mar 2011
Posts: 5

Original Poster
Rep: Reputation: 0
Hi,

@David: Thank you for pointing out the alphabetsize errors. I pasted the wrong version of the driver prog. I am so sorry for the confusion. But I havent initialized alphabetsize though, I will change that.

@Posixculprit: I will look into the bugs closer. I just have a bad habit of leaving exception handling to the end, thats why my code is so messy. This is a test code I am writing for my research on Suffix trees.

The code seems to work so far with the earlier changes. Thanks you so much. My errors have been solved. Thanks you for the explanations too.

Ash

Last edited by evansash; 03-21-2011 at 05:05 AM.
 
Old 03-21-2011, 05:22 AM   #12
worzel1968
Member
 
Registered: Mar 2011
Location: Melbourne, Australia
Distribution: fedora17_64, ubuntu
Posts: 44

Rep: Reputation: 2
Further thoughts about alphabetsize

Hi Ash,

If indeed alphabetsize hasn't been initialised or set properly, then it will contain garbage, so using it with malloc will be really bad.

Also consider using unsigned int type for variables like alphabetsize. There might be some that could be const unsigned int, and others that could be size_t. E.g. strlen returns size_t.

Finally alphabetsize is a global variable, this is ok at the moment because I think you have all your code in 1 file. If you were to have main in it's own file then this would fail. It would be better anyway if alphabetsize was declared and initialised in main, then a pointer to it could be passed to whichever functions needed it.

Cheers

David
 
1 members found this post helpful.
Old 03-21-2011, 01:17 PM   #13
evansash
LQ Newbie
 
Registered: Mar 2011
Posts: 5

Original Poster
Rep: Reputation: 0
Thank you David. I never thought about different data types assigning into alphabetsize. I just read about the difference between unsigned_int and size_t. I will make the changes you have suggested to alphabetsize.
 
  


Reply


Thread Tools Search this Thread
Search this Thread:

Advanced Search

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is Off
HTML code is Off


Similar Threads
Thread Thread Starter Forum Replies Last Post
[SOLVED] C program. error: *** glibc detected *** ./proyecto: malloc(): memory corruption Carlos2313 Programming 4 10-24-2010 04:56 PM
*** glibc detected ***: malloc() memory corruption Dinarchik Programming 5 02-16-2010 05:38 PM
*** glibc detected *** ruby: malloc(): memory corruption: priceey Linux - Software 0 10-16-2009 05:24 PM
*** glibc detected *** malloc(): memory corruption arvind.ayyangar Programming 2 11-20-2006 11:59 PM
glibc detected *** malloc(): memory corruption: abefroman Linux - Software 2 04-12-2006 12:12 PM


All times are GMT -5. The time now is 10:44 PM.

Main Menu
My LQ
Write for LQ
LinuxQuestions.org is looking for people interested in writing Editorials, Articles, Reviews, and more. If you'd like to contribute content, let us know.
Main Menu
Syndicate
RSS1  Latest Threads
RSS1  LQ News
Twitter: @linuxquestions
identi.ca: @linuxquestions
Facebook: linuxquestions Google+: linuxquestions
Open Source Consulting | Domain Registration