#!/bin/bash

if [ $# -ne 4 ] ; then
  echo "Usage: run ALG DATADIR DELTA WORKDIR"
  exit
fi
  
ALG=$1
DATADIR=$2
DELTA=$3
WORKDIR=$4
STARTDIR=`pwd`

if [ $ALG != "mclight" ] ; then
  echo "Only mclight supported!"
  exit
fi

if [ $SEMIBOUND_DIR ] ; then
  BASEDIR=$SEMIBOUND_DIR
else
  echo "SEMIBOUND_DIR not defined!"
  exit
fi

if [ ! -d $DATADIR ] ; then 
  echo "Directory $DATADIR does not exist!"
  exit
fi
cd $DATADIR
DATADIR=`pwd`
cd $STARTDIR

# copy the data to workdir
if [ -e $WORKDIR ] ; then
  echo "Directory $WORKDIR already exists!"
  exit
fi
mkdir -p $WORKDIR
cd $WORKDIR
WORKDIR=`pwd`

cp $DATADIR/* .

# train.  Classify the unlabeled data
cd $BASEDIR/algs/$ALG
train $WORKDIR/labeled
predict $WORKDIR/labeled $WORKDIR/labeled $WORKDIR/labeled_preds
predict $WORKDIR/unlabeled $WORKDIR/labeled $WORKDIR/unlabeled_preds
cd $BASEDIR

DELTASHARE=`perl -e "print $DELTA / 2"`
algs/$ALG/labels $WORKDIR/labeled > $WORKDIR/tmp
pbmargin/bound $WORKDIR/labeled_preds $WORKDIR/tmp $DELTASHARE > $WORKDIR/bound;
bound_for_rand=`perl -ane 'print $F[0]' $WORKDIR/bound`
best_mu=`perl -ane 'print $F[1]' $WORKDIR/bound`

pbmargin/rand_labels $WORKDIR/unlabeled_preds `lines $WORKDIR/labeled` $best_mu > $WORKDIR/unlabeled_rand_preds

algs/$ALG/labels $WORKDIR/unlabeled_preds > $WORKDIR/tmp
errors=`errors $WORKDIR/tmp $WORKDIR/unlabeled_rand_preds`
cases=`lines $WORKDIR/tmp`
DELTASHARE=`perl -e "print $DELTA / 2"`
bound_dist=`upper_bound $DELTASHARE $cases $errors`
rm $WORKDIR/tmp;

# cheat and compute the error rate of the randomized ensemble on unlabeled data
algs/$ALG/labels $WORKDIR/unlabeled > $WORKDIR/tmp
errors=`errors $WORKDIR/tmp $WORKDIR/unlabeled_rand_preds`
cases=`lines $WORKDIR/tmp`
error_rate=`perl -e "print $errors / $cases"`

# output the bound
perl -e "print $bound_for_rand + $bound_dist" > $WORKDIR/results
echo "" $bound_for_rand $bound_dist $error_rate >> $WORKDIR/results
cat $WORKDIR/results
GLOBIGNORE=$WORKDIR/results:$WORKDIR/bound
rm $WORKDIR/*
