LinuxQuestions.org

LinuxQuestions.org (/questions/)
-   Programming (https://www.linuxquestions.org/questions/programming-9/)
-   -   parse bibtex with flex+bison (https://www.linuxquestions.org/questions/programming-9/parse-bibtex-with-flex-bison-4175451265/)

RudraB 02-22-2013 06:07 AM

parse bibtex with flex+bison
 
I want to parse bibtex file using flex/bison. A sample bibtex is:
Code:

@Book{a1,
author="amook",
Title="ASR",
Publisher="oxf",
Year="2010",
Add="UK",
Edition="1",
}
@Article{a2,
Author="Rudra Banerjee",
Title={FeNiMo},
Publisher={P{\"R}B},
Issue="12",
Page="36690",
Year="2011",
Add="UK",
Edition="1",
}

Now, I have written a flex code:

Code:

%{
#include <stdio.h>
#include <stdlib.h>
%}

%{
char yylval;
int YEAR,i;
//char array_author[1000];
%}
%x author
%x title
%x pub
%x year
%%
@                                  printf("\nNEWENTRY\n");
[a-zA-Z][a-zA-Z0-9]*                  {printf("%s",yytext);
                                        BEGIN(INITIAL);}
author=                        {BEGIN(author);}
<author>\"[a-zA-Z\/.]+\"          {printf("%s",yytext);
                                          BEGIN(INITIAL);}
title=                                {BEGIN(title);}
<title>\"[a-zA-Z\/.]+\"          {printf("%s",yytext);
                                          BEGIN(INITIAL);}
publisher=                        {BEGIN(pub);}
<pub>\"[a-zA-Z\/.]+\"                  {printf("%s",yytext);
                                          BEGIN(INITIAL);}
[a-zA-Z0-9\/.-]+=        printf("ENTRY TYPE ");
\"                      printf("QUOTE ");
\{                      printf("LCB ");
\}                      printf(" RCB");
;                      printf("SEMICOLON ");
\n                      printf("\n");
%%

int main(){
  yylex();
//char array_author[1000];
//printf("%d%s",&i,array_author[i]);
i++;
return 0;
}

while this is compiled and running, its not peeking up the fields. I would like to store the keys and corresponding vals to be used in a C code.
Some help please.

NevemTeve 02-22-2013 06:10 AM

I'm rooting for you.

Edit later: Good, you have added code since my first reply.

Later: you forgot the Makefile, so here is one for test:
Code:

CFLAGS  += -m32 -std=c99 -g -W -Wall -Wextra
CPPFLAGS+= -D_GNU_SOURCE -D_XOPEN_SOURCE=500 -D_ALL_SOURCE
LDFLAGS += -m32 -g

clean:
        rm -f bibtex bibtex.o bibtex.c 2>/dev/null || true

bibtex.c: bibtex.l
        flex -o $@ $<

bibtex: LIBS += -lfl
bibtex: bibtex.o
        gcc -o $@ ${LDFLAGS} $^ ${LIBS}

It does something, I couldn't tell if it is the right thing or not:
Code:

$ ./bibtex <bibtex.in

NEWENTRY
BookLCB a1,
"amook",
ENTRY TYPE QUOTE ASRQUOTE ,
ENTRY TYPE QUOTE oxfQUOTE ,
ENTRY TYPE QUOTE 2010QUOTE ,
ENTRY TYPE QUOTE UKQUOTE ,
ENTRY TYPE QUOTE 1QUOTE ,
 RCB

NEWENTRY
ArticleLCB a2,
ENTRY TYPE QUOTE Rudra BanerjeeQUOTE ,
ENTRY TYPE LCB FeNiMo RCB,
ENTRY TYPE LCB PLCB \QUOTE R RCBB RCB,
ENTRY TYPE QUOTE 12QUOTE ,
ENTRY TYPE QUOTE 36690QUOTE ,
ENTRY TYPE QUOTE 2011QUOTE ,
ENTRY TYPE QUOTE UKQUOTE ,
ENTRY TYPE QUOTE 1QUOTE ,
 RCB


RudraB 02-22-2013 06:14 AM

NevemTeve?

RudraB 02-22-2013 08:41 AM

Quote:

Originally Posted by NevemTeve (Post 4897422)
It does something, I couldn't tell if it is the right thing or not:

Thanks for the makefile, mine was almost same, with flex is case-insensetive(flex -i).
With flex -i, one will get the output as
Quote:

ArticleLCB a2,
"Rudra Banerjee",
Title={FeNiMo},
Publisher={P{\"R}B},
Issue="12",
Page="36690",
Year="2011",
Add="UK",
ENTRY TYPE QUOTE 1QUOTE ,
RCB
What I want to do is to have those keys and vals stored in some way to be used in a C program.

i.e., may be something like which are commented in C block of the code.
Any further help please?

NevemTeve 02-22-2013 09:56 AM

I'd start with something simpler, eg:

sample.in:
Code:

author="amook"
title="ASR"
author="Ian M. Banks"
title="The Player of Games"

sample.l:
Code:

%{
#include <stdio.h>
#include <stdlib.h>

#define ECHO    /* don't */

#define AUTHOR 256
#define TITLE  257

char yylval;

%}
%option noyywrap

%x author
%x title
%%
author=\"[^\"]*\"      { return AUTHOR; };
title=\"[^\"]*\"        { return TITLE; };
<<EOF>>                  return -1;
%%

typedef struct REC {
    char *f_author;
    char *f_title;
} REC;

static char *DupStr (const char *from)
{
    const char *p;
    size_t len;
    char *q;

    p= strchr (from, '=');
    if (p) from= p+1;
    len= strlen (from);
    if (len>=2 && from[0]=='"' && from[len-1]=='"') {
        ++from;
        len -= 2;
    }
    q= malloc (len+1);
    memcpy (q, from, len);
    q[len]= '\0';
    return q;
}

int main ()
{
    int rc;
    int leave= 0;
    REC rec= {NULL, NULL};

    while (!leave) {
        rc= yylex();

/*      printf ("yylex returned %d (%s)\n", rc, yytext);        */
        if (rc==EOF) {
            leave= 1;

        } else if (rc==AUTHOR) {
            rec.f_author= DupStr (yytext);

        } else if (rc==TITLE) {
            rec.f_title= DupStr (yytext);

            printf ("Record found: %s: %s\n", rec.f_author, rec.f_title);
            free (rec.f_author); rec.f_author= NULL;
            free (rec.f_title);  rec.f_title= NULL;
        }
    }
    return 0;
}

output:
Code:

Record found: amook: ASR
Record found: Ian M. Banks: The Player of Games



All times are GMT -5. The time now is 11:53 AM.