LinuxQuestions.org
Visit Jeremy's Blog.
Home Forums Tutorials Articles Register
Go Back   LinuxQuestions.org > Forums > Non-*NIX Forums > Programming
User Name
Password
Programming This forum is for all programming questions.
The question does not have to be directly related to Linux and any language is fair game.

Notices


Reply
  Search this Thread
Old 02-01-2011, 02:45 AM   #1
shoaibjameel123
LQ Newbie
 
Registered: Feb 2011
Posts: 1

Rep: Reputation: 0
Search using hsearch() Linux


Hi All,

I have written a code on Linux that searches a long dictionary. I have used hsearch() function but the problem is it does not work.
This is my code:
//Search the count values from the dictionary.
Code:
#define _GNU_SOURCE

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<search.h>
#include<inttypes.h>
#include<math.h>

#define DICTIONARY_LENGTH 180326
#define N 6075

char ** read_file ( char * );
size_t number_of_words = 0;

char ** read_file ( char *path )
{
    char ch;
    char *line = NULL;

    size_t len = 0;
    ssize_t read;

    number_of_words = 0;
    unsigned long int i = 0;
    unsigned long int j = 0;

    FILE *pointer = NULL;
    char **word_array = NULL;

    pointer = fopen ( path , "r" );
    if ( pointer == NULL)
    {
        perror ( "File read error " );
        //return ( EXIT_FAILURE );
    }

    //counting the number of words...
    while ( !feof ( pointer ) )
    {
        ch = fgetc ( pointer );
        if ( ch == '\n' && ch != EOF )
        {
            number_of_words ++;
        }
    }

    rewind ( pointer );

    word_array = malloc ( number_of_words * sizeof ( char * ) );
    if ( word_array == NULL )
    {
        perror ( "malloc() memory allocation failure" );
        //return ( EXIT_FAILURE );
    }

    for ( i = 0 ; i < number_of_words ; i ++ )
    {
        word_array[i] = malloc ( 100 * sizeof ( char ) );//CHANGEDHERE
        if ( word_array[i] == NULL )
        {
            perror ( "malloc() memory allocation failure" );
            //return ( EXIT_FAILURE );
        }
    }

    //lets extract text here...
    i = 0;
    j = 0;

    while ( !feof ( pointer ) )
    {
        while ( ( read = getline ( &line , &len , pointer ) ) != -1 )
        {
            strcpy ( word_array[i] , line );
            //read = read - 1;
            //for ( i = 0 ; i < read ; i ++ )
            //{
            //    * ( word_array + j ++ ) = line [i];
            //}
            if ( i <= number_of_words )
            {
                i++;
            }
        }
    }

    //return ( word_array );

    /*for ( i = 0 ; i < number_of_words ; i ++ )
    {
        printf ( "%s" , word_array[i] );
    }*/

    fclose ( pointer );
    return ( word_array );

}


int32_t main ( int argc , char **argv )
{
    FILE *dictionary = NULL;
    FILE *dic_list = NULL;
    FILE *output_pointer = NULL;

    char *line = NULL;
    size_t len = 0;
    ssize_t read;
    char temp_char[150] = {0};
    float idf = 0;
    char **words_from_dic = NULL;
    size_t j = 0;
    size_t i = 0;

    float value = 0;

    system ( "ls -1 *.dic > files_dic.mtp" );

    ENTRY e , *ep;

    int32_t number = 0;
    
    char *word = NULL;
    word = ( char * ) malloc ( 18 * sizeof ( char ) );
    if ( word == NULL )
    {
        perror ( "malloc() memory allocation failure" );
        return ( EXIT_FAILURE );
    }

    dictionary = fopen ( "count_words.txt" , "r" );
    if ( dictionary == NULL )
    {
        perror ( "file open error:dictionary" );
        return ( EXIT_FAILURE );
    }

    hcreate ( DICTIONARY_LENGTH );

    while ( !feof ( dictionary ) )
    {
        fscanf ( dictionary , "%d %s" , &number , word );
        e.key = word;
        e.data = ( void * ) number;
        ep = hsearch ( e , ENTER );
        if ( ep == NULL )
        {
            fprintf ( stderr , "Entry failed\n" );
            exit ( 1 );
        }
    }

    dic_list = fopen ( "files_dic.mtp" , "r" );
    if ( dic_list == NULL )
    {
        perror ( "file open error" );
        return ( EXIT_FAILURE );
    }

    while ( ( read = getline ( &line , &len , dic_list ) ) != -1 )
    {
        if ( line [ read-1 ] == '\n' )
            {
                    line [ read-1 ] = '\0';
        }

        words_from_dic = read_file ( line );
        words_from_dic [ number_of_words ] = '\0';

        strcat ( temp_char , "/data/out/" );
        strcat ( temp_char , line );
        output_pointer = fopen ( temp_char , "a" );

        while ( words_from_dic[j] != NULL )
        {
            if ( words_from_dic[j] [ strlen ( words_from_dic[j] ) - 1 ] == '\n' )
            {
                words_from_dic[j] [ strlen ( words_from_dic [j] ) - 1 ] = '\0';
            }

            e.key = words_from_dic [ j ];
            ep = hsearch ( e , FIND );
            value = ( int ) ( ep->data );
            fprintf ( output_pointer , "%f\n" , value );
            j ++;
        }

        for ( i = 0 ; i < j ; i ++ )
        {
            free ( words_from_dic[i] );
        }
            free ( words_from_dic );

        fclose ( output_pointer );
        memset ( &temp_char , 0 , strlen ( temp_char ) );
        memset ( line , 0 , strlen ( line ) );
    }

    hdestroy();    
    fclose ( dic_list );

    return ( EXIT_SUCCESS );
My dictionary file looks like this:

Code:
1 aanandi
1 aandrostane
1 aanganwadis
1 aanhui
1 aanhydrate
I have some 600 *.dic files which I open one by one and extract words from them. THe dic files look something like this:
Code:
abc
efg
orange
apple
Hence, I open each DIC file, get the word from it and search the hash table and extract the key from it. The problem with the above code is that it is able to make the hash table but it returns NULL when searching. It should not return NULL in any case because all words from DIC files are there in the dictionary. I am not able to figure out why?
 
Old 02-01-2011, 06:00 AM   #2
dwhitney67
Senior Member
 
Registered: Jun 2006
Location: Maryland
Distribution: Kubuntu, Fedora, RHEL
Posts: 1,541

Rep: Reputation: 335Reputation: 335Reputation: 335Reputation: 335
What value is returned by hcreate()?

On a side note, you should break the contents of main() into several functions. The use of a global variable should be avoided. There are other pitfalls in your code, but they are too numerous to list. Suffice to say, avoid assuming the length of string before you actually read it, and never use scanf(), fscanf(), or gets() to read a string.

EDIT:

I just realized that below is perhaps the main problem with the code (note, this is just a guess):
Code:
e.key = word;
Try this instead:
Code:
e.key = strdup(word);
Otherwise, you are assigning the same value to e.key with each iteration of the loop, that is, the address of the 'word' array. Although the contents of 'word' change with each iteration of the loop, the address remains constant. Whether the ENTER action causes the hash table to create a deep-copy or a shallow-copy of 'word' is not clearly defined in the manpages. Anyhow, I may be wrong with my assessment.

Last edited by dwhitney67; 02-01-2011 at 06:10 AM.
 
  


Reply



Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is Off
HTML code is Off



Similar Threads
Thread Thread Starter Forum Replies Last Post
Search tools (Affinity, Tracker Search Tool, etc.) not working - don't find any files Adamantus Linux - Newbie 1 03-29-2009 11:21 PM
possible search crash.../home/httpd/linuxquestions/questions/search.php aus9 LQ Suggestions & Feedback 3 09-06-2008 07:27 PM
Can you make search ...search a string in a link....a url...a web address aus9 LQ Suggestions & Feedback 4 04-16-2008 09:37 AM
libc hcreate and hsearch. ajpug Programming 1 06-03-2005 04:55 PM

LinuxQuestions.org > Forums > Non-*NIX Forums > Programming

All times are GMT -5. The time now is 02:22 AM.

Main Menu
Advertisement
My LQ
Write for LQ
LinuxQuestions.org is looking for people interested in writing Editorials, Articles, Reviews, and more. If you'd like to contribute content, let us know.
Main Menu
Syndicate
RSS1  Latest Threads
RSS1  LQ News
Twitter: @linuxquestions
Open Source Consulting | Domain Registration