#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <time.h>
#include <assert.h>

#define min(a,b)	(((a)<(b))?(a):(b))
#define max(a,b)	(((a)>(b))?(a):(b))

#define MAXSTR 20000
#define maxlength 516

#define NDEBUG
#define MAXLEN 5000

int random_uniform_int(int min, int max) {
  assert(min <= max);
  return((lrand48() >> 5) % (max - min + 1) + min);
}

FILE **boot_fp, *stream_fp, *im_fp, *pass_fp;

int k;

/* split the training dataset into b*N boostrap sample B and (1-b)*N stream */
/* subsample from B with probability p to form each B_i, 0 < i < T */

float b;	  /* probability of being in the bootstrap set */
float p = 0.66;   /* probability of being in each bootstrap sample */
int T = 10; 	  /* number of bootstrap classifiers */

char *sgets( char * str, int size, char ** stream ) {	
    int c,i;

    for ( i = 0; i < size-1; i++ ) {	
 	c = **stream; (*stream)++;
	if ( '\0' == c )
		return NULL;
	str[i]=c;
	if ( ('\n' == c) || ('\r' == c) ) {	
		break;
	}
    }
    str[i] = '\0';

    return str;
}

char *stoupper (char *s) {
     int i;
     char *S = (char *)malloc(strlen(s)+1);
     for(i=0;i<strlen(s);i++) {
	S[i]=toupper(s[i]);
     }
     S[i]='\0';
     return S;
}

int sisspace(char *s) {
    while((*s)!='\0') {
        if (!isspace(*s)) return 0;
        s++;
    }
    return 1;
}

void parse_header (char *src, int *attributes, int *n) {
    char line[MAXLEN];

    *attributes = 0;
    int i;

    while (sgets(line,MAXLEN,&src)) {
	if (sisspace(line) || line[0]=='%') { continue; }
	fprintf(stream_fp,"%s\n",line);
	fprintf(im_fp,"%s\n",line);
	fprintf(pass_fp,"%s\n",line);
        for(i=0; i<T; i++) {
	    fprintf(boot_fp[i],"%s\n",line);
	}
	if (strstr(stoupper(line),"@ATTRIBUTE")) {
	    (*attributes)++;
	}
        if (strcasecmp(stoupper(line),"@DATA"))
            continue;
        else
            break;
    }
    *n = 0;
    while (sgets(line,MAXLEN,&src)) { 
	if (sisspace(line) || line[0]=='%') { continue; }
	(*n)++;
    }
}

int islspace(char *line) {
    int i, n = strlen(line);
    for(i=0; i<n; i++) if (!isspace(line[i])) return 0;
    return 1;
}

int main(int argc, char *argv[]) {
    char *src;
    struct stat statbuf;

    int i, j;
    int fp, n;
    char line[MAXLEN];
    float coin;

    time_t t;
    (void) time(&t);
    srand(t);

    if (argc < 3) {
	fprintf(stderr,"Usage: %s s FILE\n",argv[0]);
	exit(-1);
    }

    b = atof(argv[1]); assert(b >= 0); assert(b <=1);

    if ((fp = open(argv[2],O_RDONLY)) < 0) {
        fprintf(stderr,"Can't open input file %s\n",argv[2]);
        exit(-1);
    }

    time_t tm;
    (void) time(&tm);

    /* size of the input file */
    if (fstat (fp,&statbuf) < 0) {
        fprintf(stderr,"fstat error"); exit(1);
    }

    /* mmap the input file */
    if ((src = (char *) mmap (0, statbuf.st_size, PROT_READ, MAP_SHARED, fp, 0))
   	== (caddr_t) -1) {
   	fprintf(stderr,"mmap error for input"); exit(1);
    }

    int attributes;

    char name[80];
    strcpy((char *)name,argv[2]);
    char *s = strrchr(name,'.');
    *s = '\0';

    boot_fp = (FILE **)malloc(sizeof(FILE *)*T);
 
    system("mkdir -p bootstrap");
    for(i=0; i<T; i++) {
        sprintf(line,"bootstrap/%s-%d.boot",name,i);
        if ((boot_fp[i] = fopen(line,"w")) == NULL) {
            fprintf(stderr,"Can't open input file %s\n",line);
            exit(-1);
        }
    }
    sprintf(line,"%s.stream",name);
    if ((stream_fp = fopen(line,"w")) == NULL) {
        fprintf(stderr,"Can't open input file %s\n",line);
        exit(-1);
    }

    sprintf(line,"%s.bootstrap",name);
    if ((im_fp = fopen(line,"w")) == NULL) {
        fprintf(stderr,"Can't open input file %s\n",line);
        exit(-1);
    }

    sprintf(line,"%s.passive",name);
    if ((pass_fp = fopen(line,"w")) == NULL) {
        fprintf(stderr,"Can't open input file %s\n",line);
        exit(-1);
    }

    parse_header(src,&attributes,&n);

    k = 0;

    while (sgets(line,MAXLEN,&src)) {
        if (sisspace(line) || line[0]=='%' || line[0]=='@') { continue; }

	coin = rand() * pow(RAND_MAX + 1.0, -1);
	if (coin > b) { // stream
	    fprintf(stream_fp,"%s\n",line);
	    continue;
	}
	k++;
	fprintf(im_fp,"%s, 1\n",line); // 1 is the importance weight
	fprintf(pass_fp,"%s\n",line);
	for(i=0; i<T; i++) {
	    coin = rand() * pow(RAND_MAX + 1.0, -1);
	    if (coin <= p) {
	        fprintf(boot_fp[i],"%s\n",line);
	    }
	}
    }
    fclose(stream_fp);
    for(i=0; i<T; i++) fclose(boot_fp[i]);

    printf("Examples in the bootstrap set %d, total %d, classifiers %d\n",
        k,n,T);
}
