#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h> /* mmap() is defined in this header */
#include <fcntl.h>
#include <time.h>
#include <assert.h>

#define MAXLEN 5000

FILE **boot_fp, *im_fp, *q_fp;

#define min(a,b)	(((a)<(b))?(a):(b))
#define max(a,b)	(((a)>(b))?(a):(b))

char *sgets( char * str, int size, char ** stream ) {	
    int c,i;

    for ( i = 0; i < size-1; i++ ) {	
 	c = **stream; (*stream)++;
	if ( '\0' == c )
		return NULL;
	str[i]=c;
	if ( ('\n' == c) || ('\r' == c) ) {	
		/* str[i++]='\n'; */
		break;
	}
    }
    str[i] = '\0';

    return str;
}

char *stoupper (char *s) {
     int i;
     char *S = (char *)malloc(strlen(s)+1);
     for(i=0;i<strlen(s);i++) {
	S[i]=toupper(s[i]);
     }
     S[i]='\0';
     return S;
}

int sisspace(char *s) {
    while((*s)!='\0') {
        if (!isspace(*s)) return 0;
        s++;
    }
    return 1;
}

void parse_header (char *src, char **header, int *attributes, int *k) {
    char *s = src;
    char line[MAXLEN], last_attribute[256], *tok, *c;

    sprintf(*header,""); *k=0; *attributes=0;
    while (sgets(line,256,&s)) {
        if (sisspace(line) || line[0]=='%') { continue; }
        sprintf(*header,"%s%s\n",*header,line);
        if (strstr(stoupper(line),"@ATTRIBUTE")) {
	    strcpy(last_attribute,line);
            (*attributes)++;
        }
        if (strcasecmp(stoupper(line),"@DATA"))
            continue;
        else
            break;
    }

    (*attributes)--;

    /* parse the classes: s */
    tok = strtok(last_attribute,"{");
    while((tok = strtok(NULL," \t,{}\n"))) {
	(*k)++;
    }
}

int main (int argc, char *argv[]) {
    int fp; 
    int i, j;
    char *src, *header;
    struct stat statbuf;
    char line[MAXLEN], prediction_line[256], command_line[256];

    time_t tm;
    (void) time(&tm);
    srand(tm);

    if (argc<4) {
	// T - number of bootstrap classifiers
	// path to T binary prediction files + root
	// 	"yeast/trees.J48/yeast"
	// stream-file "yeast/yeast.stream"
	// im-file "yeast/yeast.im"
	fprintf(stderr,"Usage: %s T predictions file\n",
	    argv[0]);
	exit(1);
    }

    int T = atoi(argv[1]);
    boot_fp = (FILE **)malloc(sizeof(FILE *)*T);

    for(i=0; i<T; i++) {
        sprintf(line,"%s-%d.boot.predict",argv[2],i);
        if ((boot_fp[i] = fopen(line,"r")) == NULL) {
            fprintf(stderr,"Can't open input file %s\n",line);
            exit(-1);
        }
    }

    sprintf(line,"cp %s.bootstrap %s.im",argv[3],argv[3]);
    system(line);
    sprintf(line,"%s.im",argv[3]);
    if ((im_fp = fopen(line,"a")) == NULL) {
            fprintf(stderr,"Can't open append file %s\n",line);
            exit(-1);
    }

    sprintf(line,"%s.queries.dat",argv[3]);
    if ((q_fp = fopen(line,"w")) == NULL) {
            fprintf(stderr,"Can't open append file %s\n",line);
            exit(-1);
    }

    /* open the stream file */
    sprintf(line,"%s.stream",argv[3]);
    if ((fp = open(line,O_RDONLY)) < 0) {
        fprintf(stderr,"Can't open input file %s\n",line);
        exit(-1);
    }

    /* find size of the stream file */
    if (fstat (fp,&statbuf) < 0) {
        fprintf(stderr,"fstat error"); exit(1);
    }

    /* mmap the input file */
    if ((src = (char *) mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, fp, 0))
   	== (caddr_t) -1) {
   	fprintf(stderr,"mmap error for input"); exit(1);
    }

    header = (char *)malloc(sizeof(char)*statbuf.st_size);
    int attributes, k;

    parse_header(src,&header,&attributes,&k);

    int summa, prediction;
    float rejection_probability, coin, w;
    int N = 0, Q = 0;

    char name[64];
    strcpy(name,argv[3]);
    char *y = strchr(name,'/');
    *y = '\0';

    while (sgets(line,MAXLEN,&src)) {
        if (sisspace(line) || line[0]=='%' || line[0]=='@') { continue; }
	// printf("%s\n",line);

	summa = 0;  N++;

	for(i=0; i<T; i++) {
	    if (!fgets(prediction_line,256,boot_fp[i])) {
                fprintf(stderr,"Can't read from prediction file %d\n",i);
		exit(-1);
	    }
	    // printf(">%d: %s",i,prediction_line);
	    sscanf(prediction_line,"%d %d",&j,&prediction);
	    // printf(">p %d\n",prediction);
	    summa += prediction;
	}
	if ((summa < T) && (summa > 0)) { rejection_probability = 1; w = 0.1; }
	else { rejection_probability = 0.1; w = 1; }

	coin = rand() * pow(RAND_MAX + 1.0, -1);
	if (coin <= rejection_probability) {
	    // query the example
	    Q++;
	    fprintf(im_fp,"%s, %f\n",line,w);
	    fflush(im_fp);
/*
	    if (!(Q % 20)) {
		sprintf(command_line,"./script-costing %s %d",name,Q);
	        system(command_line);
	    }
*/
	}
        if (! ((N-1) % 10)) {
            sprintf(command_line,"./script-costing %s %d",name,N);
	    fprintf(stderr,"Before script\n%s\n",command_line); fflush(stderr);
            system(command_line);
            fprintf(q_fp,"%d %d\n",Q,N);
            fflush(q_fp);
        }
    }
    sprintf(command_line,"./script-costing %s %d",name,Q);
    system(command_line);
    printf("Queried %d out of %d\n",Q,N);
}
