/*
	costing takes an input file in weka format (.arff);
	the last attribute is the importance of a given example
	it outputs a set of k training sets for a binary classifier learner
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h> /* mmap() is defined in this header */
#include <fcntl.h>
#include <time.h>
#include <assert.h>

#define MAXLEN 5000

#define min(a,b)	(((a)<(b))?(a):(b))
#define max(a,b)	(((a)>(b))?(a):(b))

char *sgets( char * str, int size, char ** stream ) {	
    int c,i;

    for ( i = 0; i < size-1; i++ ) {	
 	c = **stream; (*stream)++;
	if ( '\0' == c )
		return NULL;
	str[i]=c;
	if ( ('\n' == c) || ('\r' == c) ) {	
		/* str[i++]='\n'; */
		break;
	}
    }
    str[i] = '\0';

    return str;
}

int main (int argc, char *argv[]) {
    FILE **fpw;
    int i;
    char train[64];
    char *src, *header, *s;
    struct stat statbuf;
    double Z = 1;

    time_t t;
    (void) time(&t);
    srand(t);

    if (argc<4) {
	fprintf(stderr,"Usage: %s directory-with-predictions #runs Q\n",
	    argv[0]);
	exit(1);
    }

    int k = atoi(argv[2]);
    int Q = atoi(argv[3]);
    int n, e;

    fpw = (FILE **)malloc(k*sizeof(FILE *));

    char line[MAXLEN], file_name[256];

    /* remove the trailing '/' from the directory name */
    /* argv[2] contains the output directory for binary training sets */
    if (argv[1][strlen(argv[1])-1]=='/') argv[1][strlen(argv[1])-1]='\0';

    /* k is the number of runs */
    for (i=0;i<k;i++) {
        sprintf(file_name,"%s/binary-%d.predict",argv[1],i);
        if (!(fpw[i] = fopen(file_name,"r"))) {
	    fprintf(stderr,"Failed to open %s for reading\n",file_name);
	    exit(1);
	}
    }

    int index, prediction, label, predict;
    float confidence;
    int ones;

    n = e = 0;

    while (fgets(line,256,fpw[0])) {
	ones = 0;
	sscanf(line,"%d %d %f %d\n",&index,&prediction,&confidence,&label);
	ones += prediction;

	for (i=1;i<k;i++) {
	    if (!fgets(line,256,fpw[i])) {
		fprintf(stderr,"Error reading file %d\n",i);
		exit(1);
	    }
	    sscanf(line,"%d %d %f %d\n",&index,&prediction,&confidence,&label);
	    ones += prediction;
	}

	predict = 0;
	if (2*ones > k) predict = 1;
	if (2*ones == k) {
	    if (rand() * pow(RAND_MAX + 1.0, -1) > 0.5) predict = 1;
	    else predict = 0;
	}
	if (predict != label) { e++; }
	n++;
    }

    // printf("%d test examples, %d mistakes (%2.3f)\n",n,e,e*100*pow(n,-1));
    printf("%d %d %d %2.3f\n",Q,n,e,e*100*pow(n,-1));
    fflush(stdout);

    for (i=0;i<k;i++) {
	close(fpw[i]);
    }
}
