%{

/* lexpgn by David A. Wheeler (http://www.dwheeler.com).

   This code processes files formatted using PGN, as defined in
   "Standard Portable Game Notation Specification and Implementation
   Guide" Revised 1994.03.12 by Steven J. Edwards.

   This code lexically analyzes PGN files as an import format; since it's
   importing, it tries to accept nonstandard formats as much as it can and
   figure out the "intent".  It handles varying newlines (e.g. \r), etc.
   It will accept a great deal of misformatting that isn't,
   strictly speaking, legal PGN, but the point is to be able to figure
   out what was intended.

   Calling the lexer will process ONE game in a file,
   starting with 0 or more tags, followed by 0 or more moves.
   It returns 0 if no erors, 1 if errors, 2 if terminated (normally).

   Some of the funny ordering (e.g., for bracecomment) is to make it
   high speed.  Flex/lex can produce high speed lexers, but only
   if it gets some help, in particular by defining patterns that
   maximally match.

   TODO: prevent buffer overflow for FEN.


*/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#include "util.h"
#include "lexpgn.h"
#include "book.h"
#include "move.h"
#include "protocol.h"
#include "log.h"

enum tagtype { NO_TAG, EVENT_TAG, SITE_TAG, DATE_TAG, ROUND_TAG,
	       WHITE_TAG, BLACK_TAG, RESULT_TAG, 
               WHITE_ELO_TAG, BLACK_ELO_TAG,
               WHITETITLE_TAG, BLACKTITLE_TAG, FEN_TAG,
               OTHER_TAG };


#define yyterminate() { if ( seen_tags || seen_moves) return 1; \
			return 2; }
		
void append_comment(game_t *pgn_game, const char *t) {
/* Append PGN text t after the current move */
   if (pgn_game->InitialGameCnt==pgn_game->RealGameCnt) {
     my_string_append((char **) &pgn_game->initial_comments,t);
   } else {
      my_string_append((char **) &(pgn_game->boards[pgn_game->RealGameCnt-pgn_game->InitialGameCnt].comments),t);
   }
}

%}


%option case-insensitive
%option full
%option ecs
%option pointer
%option noyywrap
%option reentrant


SPACE		[ \t\n\r\f]
NONSPACE	[^ \t\n\r\f]

%x tag_gobble
%x tagsymbol
%x tagsep
%x tagdata
%x tagend
%x bracecomment
%x RAV

%%
	/* These are executed on each entry to the code */
	int seen_tags = 0;
	int seen_moves = 0;
	enum tagtype ctag = NO_TAG;
	int firstmovenum = -1;
	int side = white;
	int rav_count = 0;
	int result = R_NORESULT;
        game_t *pgn_game;
	char tagname[80]; /* Name of tag currently being processed */
        board_t pgn_board[1];

        pgn_game=(game_t *) yyextra;        
        int pmove;

        GameNew(pgn_game);
        BoardFromGame(pgn_board,pgn_game,BOARD_LAST);
        CLEAR (Protocol->state_flags, MANUAL);
        CLEAR (Protocol->state_flags, THINK);



{SPACE}+		{ /* Ignore whitespace */ }
\%[^\r\n]*		{ /* Ignore PGN escape. We'll allow initial space. */ }

\[[ \t]*		{
	/* Handle "[".  If we've already seen a move, return "[" to
	   the stream and return. */
	/* We rashly match on any SC to avoid trouble with unmatched
	   brackets of various types */
	seen_tags = 1;
	ctag = NO_TAG;
	if (seen_moves) {
		BEGIN(INITIAL);
		yyless(0); /* put the "[" back. */
		return 0;
	}
           BEGIN(tagsymbol); 
	}


<tag_gobble>[^\n\r]*		{BEGIN(INITIAL);}

<tagsymbol>white		{ctag = WHITE_TAG; BEGIN(tagsep);}
<tagsymbol>black		{ctag = BLACK_TAG; BEGIN(tagsep);}
<tagsymbol>result		{ctag = RESULT_TAG; BEGIN(tagsep);}
<tagsymbol>whitetitle		{ctag = WHITETITLE_TAG; BEGIN(tagsep);}
<tagsymbol>blacktitle		{ctag = BLACKTITLE_TAG; BEGIN(tagsep);}
<tagsymbol>fen			{ctag = FEN_TAG; BEGIN(tagsep);}
<tagsymbol>event		{ctag = EVENT_TAG; BEGIN(tagsep);}
<tagsymbol>site			{ctag = SITE_TAG; BEGIN(tagsep);}
<tagsymbol>date			{ctag = DATE_TAG; BEGIN(tagsep);}
<tagsymbol>round		{ctag = ROUND_TAG; BEGIN(tagsep);}
<tagsymbol>whiteELO		{ctag = WHITE_ELO_TAG; BEGIN(tagsep);}
<tagsymbol>blackELO		{ctag = BLACK_ELO_TAG; BEGIN(tagsep);}
<tagsymbol>[a-z0-9_]+		{
	ctag = OTHER_TAG;
	if (yyleng >= (sizeof(tagname)-1)) {
		Output("Error, tagname too long: %s", yytext);
		return 1;
	}
	strncpy(tagname, yytext, sizeof(tagname)-1);
	BEGIN(tagsep);
	}
<tagsymbol>[ \t]*\]		{BEGIN(INITIAL); /* No tag name, skip. */}
<tagsymbol>[\n\r]		{BEGIN(INITIAL); /* Line ended early. */}
<tagsymbol>.			{
	Output("Bad character as tag name: %s\n", yytext);
	return 1;
	}

<tagsep>[ \t]+\"?		{BEGIN(tagdata);}
<tagsep>.			{
	Output("Bad character as tag separator: %s\n", yytext);
	return 1;
	}

<tagdata>("\\\""|[^\n\r\"])* { /* tag data */

	/* We start at first " in tag, and must match
	   till we reach an unprotected " or end of line */

	BEGIN(tagend);
	/* TODO: if DEST_GAME, store tag symbol and data for later saving */
        // Book add criteria:
        // IsTrustedPlayer
        // White(or Black)Title="GM","IM","FM"
	switch(ctag) {
		case WHITE_TAG:
			/* Output("White = %s\n", yytext); */
			my_string_set(&pgn_game->white_name,yytext);
			break;
		case BLACK_TAG:
			/* Output("Black = %s\n", yytext); */
			my_string_set(&pgn_game->black_name,yytext);
			break;
		case RESULT_TAG:
			/* Output("Result = %s\n", yytext); */
			if (!strcmp(yytext, "1-0")) {
                            pgn_game->result=result = R_WHITE_WINS;
                        }else if (!strcmp(yytext, "0-1")){
                            pgn_game->result=result = R_BLACK_WINS;
                        }else if (!strcmp(yytext, "1/2-1/2")){
				 pgn_game->result=result = R_DRAW;
                        }
			break;
		case WHITETITLE_TAG: /* We'll trust GM, IM, FMs */
			/*my_string_append(&pgn_game->othertags, "[WhiteTitle \"");
			my_string_append(&pgn_game->othertags, yytext);
			my_string_append(&pgn_game->othertags, "\"]\n");*/
                        my_string_set(&pgn_game->white_title,yytext);
			break;
		case BLACKTITLE_TAG:
			/*my_string_append(&pgn_game->othertags, "[BlackTitle \"");
			my_string_append(&pgn_game->othertags, yytext);
			my_string_append(&pgn_game->othertags, "\"]\n");*/
                        my_string_set(&pgn_game->black_title,yytext);
			break;
		case FEN_TAG:
			/* Legal FEN is no more than 81 chars long, because
			   71 (a character for every board cell, plus
			   separators) + 1 (space) + 1 (side, w or b) +
			   1 (space) + 4 (castling, KQkq) + 1 (space) +
			   2 (en passant) = 71.  We'll leave one char
			   for miscount/whitespace. This doesn't fully
			   protect against buffer overflow attacks; the
			   parsing routine still has to check to make sure
			   its input don't force it to walk off the end
			   of any arrays. Still, it helps as a sanity check. */
			/* Output("FEN tag encountered \"%s\"\n",yytext); */
			if (yyleng > 82) {
				Output("Error: FEN too long: %s\n", yytext);
				return 1;
			}
			/* Doesn't return failure/success; just
			   do the best you can */

                        /* XXX TODO: handle case of a corrupt fen */
                        GameSetBoard(pgn_game,yytext);
                        BoardFromGame(pgn_board,pgn_game,BOARD_LAST);
			break;
		case EVENT_TAG:
			my_string_set(&pgn_game->event,yytext);
                        break;
		case SITE_TAG:
			my_string_set(&pgn_game->site,yytext);
                        break;
		case DATE_TAG:
			my_string_set(&pgn_game->date,yytext);
                        break;
		case ROUND_TAG:
			my_string_set(&pgn_game->round ,yytext);
                        break;
		case WHITE_ELO_TAG:
			pgn_game->white_rating = atoi(yytext);
                        break;
		case BLACK_ELO_TAG:
			pgn_game->black_rating = atoi(yytext);
                        break;
		case OTHER_TAG:
			my_string_append((char **)&pgn_game->othertags, "[");
			my_string_append((char **)&pgn_game->othertags, tagname);
			my_string_append((char **)&pgn_game->othertags, " \"");
			my_string_append((char **)&pgn_game->othertags, yytext);
			my_string_append((char **)&pgn_game->othertags, "\"]\n");
			break;
                case NO_TAG:
                        break;
	}
	}

<tagdata>\"[ \t]*\][\n\r]*	{BEGIN(INITIAL);}
<tagdata>\"[^\n\r]*		{BEGIN(INITIAL); /* Garbage, do what can. */}
<tagdata>[\n\r]			{BEGIN(INITIAL); /* End-of-line. */}

<tagend>[^\n\r]*		{BEGIN(INITIAL); /* Consume leftover */}

\;[^\n\r]*		{ /* PGN comment */
	seen_moves = 1;
	append_comment(pgn_game,yytext);
	append_comment(pgn_game,"\n");
	}

\{			{ /* PGN comment */
	seen_moves = 1;
	append_comment(pgn_game,yytext);
	BEGIN(bracecomment);
	}
<bracecomment>[^\r\n}]+(\n+[^\r\n}]*)*	{ /* PGN comment; may embed \n */
	append_comment(pgn_game,yytext);
	}
<bracecomment>\}	{
	append_comment(pgn_game,yytext);
	BEGIN(INITIAL);
	}
<bracecomment>\n\r	{ append_comment(pgn_game,"\n"); }
<bracecomment>\r\n	{ append_comment(pgn_game,"\n"); }
<bracecomment>\r	{ append_comment(pgn_game,"\n"); }
<bracecomment>\n	{ append_comment(pgn_game,"\n"); }


\${NONSPACE}*		{ seen_moves = 1; /* Numeric Annotation Glyph */
	append_comment(pgn_game,yytext);
	}

\*{SPACE}*		{ return 0; /* could check if consistent w/Result */ }
1\/2-1\/2{SPACE}*	{ return 0; }
0-1{SPACE}*		{ return 0; }
1-0{SPACE}*		{ return 0; }

[1-9][0-9]*\.?		{ seen_moves = 1; /* Move number */
	if (firstmovenum == -1) {
		/* TODO: Use this info somehow */
		sscanf(yytext, "%d", &firstmovenum);
		if (firstmovenum < 0 || firstmovenum > 32000)
			firstmovenum = -1;
		/* Output("First move num=%d\n", firstmovenum); */
	}
	}

\.\.+			{ seen_moves = 1; side = black; }


[a-z0][a-z0-9\-=\+\#\?\!\,]*	{ /* Process a move */
	seen_moves = 1;
			/* SAN moves can be at most 7 characters. */
		if (yyleng > 7) {
			Output("Error: move too long: %s\n", yytext);
			return 1;
		}
                MoveFromString(pgn_board,&pmove,yytext);
                if(pmove==NOMOVE){
			Output ("Illegal move: %s\n", yytext);
			return 1;
		}
                /* if(pgn_board->GameCnt-pgn_board->Game50>=MAXSTACK){
                      Log("50 move stack overflow\n");
                      return 1;
                }*/
		MoveMake(pgn_board, pmove);
                pgn_game->boards[pgn_game->RealGameCnt-pgn_game->InitialGameCnt].move=pmove;
                pgn_game->RealGameCnt++;

	}

\(	{ rav_count = 1; append_comment(pgn_game,yytext); BEGIN(RAV); }
<RAV>\(	{ rav_count++; append_comment(pgn_game,yytext); }
<RAV>\)	{ rav_count--; append_comment(pgn_game,yytext);
	 if (rav_count <=0) BEGIN(INITIAL); }
<RAV>[^\(\)\[]+	{ append_comment(pgn_game,yytext); 
		/* We escape [ to avoid problems with
		   unclosed RAV */
		}
<RAV>^\[[wW]hite	{ yyless(0) ; BEGIN(INITIAL); return(0) ; /* Damn humans */}
<RAV>\[		{ append_comment(pgn_game,yytext); }


<INITIAL,tagsymbol,tagdata>.	{
	 Output("Illegal character %c in input stream.\n", yytext[0]);
	 return 1;
	}

%%

