ProgrammingThis forum is for all programming questions.
The question does not have to be directly related to Linux and any language is fair game.
Notices
Welcome to LinuxQuestions.org, a friendly and active Linux Community.
You are currently viewing LQ as a guest. By joining our community you will have the ability to post topics, receive our newsletter, use the advanced search, subscribe to threads and access many other special features. Registration is quick, simple and absolutely free. Join our community today!
Note that registered members see fewer ads, and ContentLink is completely disabled once you log in.
If you have any problems with the registration process or your account login, please contact us. If you need to reset your password, click here.
Having a problem logging in? Please visit this page to clear all LQ-related cookies.
Get a virtual cloud desktop with the Linux distro that you want in less than five minutes with Shells! With over 10 pre-installed distros to choose from, the worry-free installation life is here! Whether you are a digital nomad or just looking for flexibility, Shells can put your Linux machine on the device that you want to use.
Exclusive for LQ members, get up to 45% off per month. Click here for more info.
I have written a code on Linux that searches a long dictionary. I have used hsearch() function but the problem is it does not work.
This is my code:
//Search the count values from the dictionary.
Code:
#define _GNU_SOURCE
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<search.h>
#include<inttypes.h>
#include<math.h>
#define DICTIONARY_LENGTH 180326
#define N 6075
char ** read_file ( char * );
size_t number_of_words = 0;
char ** read_file ( char *path )
{
char ch;
char *line = NULL;
size_t len = 0;
ssize_t read;
number_of_words = 0;
unsigned long int i = 0;
unsigned long int j = 0;
FILE *pointer = NULL;
char **word_array = NULL;
pointer = fopen ( path , "r" );
if ( pointer == NULL)
{
perror ( "File read error " );
//return ( EXIT_FAILURE );
}
//counting the number of words...
while ( !feof ( pointer ) )
{
ch = fgetc ( pointer );
if ( ch == '\n' && ch != EOF )
{
number_of_words ++;
}
}
rewind ( pointer );
word_array = malloc ( number_of_words * sizeof ( char * ) );
if ( word_array == NULL )
{
perror ( "malloc() memory allocation failure" );
//return ( EXIT_FAILURE );
}
for ( i = 0 ; i < number_of_words ; i ++ )
{
word_array[i] = malloc ( 100 * sizeof ( char ) );//CHANGEDHERE
if ( word_array[i] == NULL )
{
perror ( "malloc() memory allocation failure" );
//return ( EXIT_FAILURE );
}
}
//lets extract text here...
i = 0;
j = 0;
while ( !feof ( pointer ) )
{
while ( ( read = getline ( &line , &len , pointer ) ) != -1 )
{
strcpy ( word_array[i] , line );
//read = read - 1;
//for ( i = 0 ; i < read ; i ++ )
//{
// * ( word_array + j ++ ) = line [i];
//}
if ( i <= number_of_words )
{
i++;
}
}
}
//return ( word_array );
/*for ( i = 0 ; i < number_of_words ; i ++ )
{
printf ( "%s" , word_array[i] );
}*/
fclose ( pointer );
return ( word_array );
}
int32_t main ( int argc , char **argv )
{
FILE *dictionary = NULL;
FILE *dic_list = NULL;
FILE *output_pointer = NULL;
char *line = NULL;
size_t len = 0;
ssize_t read;
char temp_char[150] = {0};
float idf = 0;
char **words_from_dic = NULL;
size_t j = 0;
size_t i = 0;
float value = 0;
system ( "ls -1 *.dic > files_dic.mtp" );
ENTRY e , *ep;
int32_t number = 0;
char *word = NULL;
word = ( char * ) malloc ( 18 * sizeof ( char ) );
if ( word == NULL )
{
perror ( "malloc() memory allocation failure" );
return ( EXIT_FAILURE );
}
dictionary = fopen ( "count_words.txt" , "r" );
if ( dictionary == NULL )
{
perror ( "file open error:dictionary" );
return ( EXIT_FAILURE );
}
hcreate ( DICTIONARY_LENGTH );
while ( !feof ( dictionary ) )
{
fscanf ( dictionary , "%d %s" , &number , word );
e.key = word;
e.data = ( void * ) number;
ep = hsearch ( e , ENTER );
if ( ep == NULL )
{
fprintf ( stderr , "Entry failed\n" );
exit ( 1 );
}
}
dic_list = fopen ( "files_dic.mtp" , "r" );
if ( dic_list == NULL )
{
perror ( "file open error" );
return ( EXIT_FAILURE );
}
while ( ( read = getline ( &line , &len , dic_list ) ) != -1 )
{
if ( line [ read-1 ] == '\n' )
{
line [ read-1 ] = '\0';
}
words_from_dic = read_file ( line );
words_from_dic [ number_of_words ] = '\0';
strcat ( temp_char , "/data/out/" );
strcat ( temp_char , line );
output_pointer = fopen ( temp_char , "a" );
while ( words_from_dic[j] != NULL )
{
if ( words_from_dic[j] [ strlen ( words_from_dic[j] ) - 1 ] == '\n' )
{
words_from_dic[j] [ strlen ( words_from_dic [j] ) - 1 ] = '\0';
}
e.key = words_from_dic [ j ];
ep = hsearch ( e , FIND );
value = ( int ) ( ep->data );
fprintf ( output_pointer , "%f\n" , value );
j ++;
}
for ( i = 0 ; i < j ; i ++ )
{
free ( words_from_dic[i] );
}
free ( words_from_dic );
fclose ( output_pointer );
memset ( &temp_char , 0 , strlen ( temp_char ) );
memset ( line , 0 , strlen ( line ) );
}
hdestroy();
fclose ( dic_list );
return ( EXIT_SUCCESS );
I have some 600 *.dic files which I open one by one and extract words from them. THe dic files look something like this:
Code:
abc
efg
orange
apple
Hence, I open each DIC file, get the word from it and search the hash table and extract the key from it. The problem with the above code is that it is able to make the hash table but it returns NULL when searching. It should not return NULL in any case because all words from DIC files are there in the dictionary. I am not able to figure out why?
On a side note, you should break the contents of main() into several functions. The use of a global variable should be avoided. There are other pitfalls in your code, but they are too numerous to list. Suffice to say, avoid assuming the length of string before you actually read it, and never use scanf(), fscanf(), or gets() to read a string.
EDIT:
I just realized that below is perhaps the main problem with the code (note, this is just a guess):
Code:
e.key = word;
Try this instead:
Code:
e.key = strdup(word);
Otherwise, you are assigning the same value to e.key with each iteration of the loop, that is, the address of the 'word' array. Although the contents of 'word' change with each iteration of the loop, the address remains constant. Whether the ENTER action causes the hash table to create a deep-copy or a shallow-copy of 'word' is not clearly defined in the manpages. Anyhow, I may be wrong with my assessment.
Last edited by dwhitney67; 02-01-2011 at 06:10 AM.
LinuxQuestions.org is looking for people interested in writing
Editorials, Articles, Reviews, and more. If you'd like to contribute
content, let us know.