/*
 *  tweakedGARSP.c
 *  
 *  An optimized implementation of the GARSP algorithm.
 *	- LOOKUP_FIRSTBLANK() macro replaces the first[] array
 *	- BITOFLIST() macro replaces the bit[] lookup table
 *	- Merged search for s and the shift of LIST and COMP
 *	- Use temporary variables to shift LIST and COMP
 *
 *  Additionally a number of search space reduction techniques were applied (as limit)
 *	- midpoint reduction (eliminate mirror images)
 *	- centerpoint reduction (eliminate center symmetry)
 *	- OGR limit (the remaining space must be enough to hold the OGR for the remaing marks)
 *	- CHOOSE limit (the remainig space must be enough to hold the smallest remaining distances)
 *
 *  A safety net was installed to allow searching for large rulers without enlarging the bitmap size
 *	- verify the golombness of found rulers
 *	- introduced sanity checks to guarantee the safe operation
 *
 *  Created by Michael Feiri on Thu Jan 03 2002.
 *  Copyright (c) 2002 Michael Feiri. All rights reserved.
 *
 */

#include <stdio.h>
#include "first0.h" /* LOOKUP_FIRSTBLANK */
#include "chooseV3_12_12.h" /* CHOOSE */

#define BITOFLIST(x) (0x80000000>>((x-1)&0x1f)) /*0x80000000 >> ((x-1) % 32)*/
#define MINIMUM(x,y) ((x<y) ? (x) : (y))

/*

  1, 65  -> no tests necessary (maintaned in bitmaps)
 66, 129 -> no tests necessary (principle 1)
130, 131 -> 65 may occur twice (need diffs[1])
132, 133 -> 65,66 may occur twice (need diffs[2])
134, 135 -> 65,66,67 may occur twice (need diffs[3])

((NUMWORDS*32*2)+1)+(NUMDIFFS*2)

The left part represents the real size of our bitmaps (times 2, thanks to principle 1)
The right part represents the size of our overfolw test (times 2, thanks to principle 1)

*/

#define NUMWORDS 2	/* 2 words (2*32=64) -> we can create rulers up to a length of 64-1=63 */
                        /* but thanks to principle 1 we can create rulers up to ((64*2)-1)=127 */
#define MAXMARKS 19	/* depth of stacks -> we can place up to 18 marks (plus the "0" mark means OGR-19) */
#define NUMDIFFS 32	/* size of the array in which I can verify the golombness of rulers */
                        /* e.g. with NUMDIFFS 8 we can safely search for rulers up to 16 units longer */

static unsigned int halfTweakedGARSP(const int maxmarks, const int maxlength); /* prototype */


void binary (unsigned int value)
{
    
    int index;
    for(index=0;index<32;index++)
    {
        if(value & 0x80000000) {
            printf("1");
        } else {
            printf("0");
        }
        value = value << 1;
    }
}


static unsigned int halfTweakedGARSP(const int maxmarks, const int maxlength) {

    int depth;                    	/* current mark we are placing corresponds to the D in the PDF */
    int count[MAXMARKS];          	/* current ruler length (e.g. LENGTH[])*/
    int limit[MAXMARKS];		/* current limit (based on CHOOSE, MIDPOINT, etc.) */    
    unsigned int list[MAXMARKS][NUMWORDS];   /* list bitmap */
    unsigned int dist[MAXMARKS][NUMWORDS];   /* dist bitmap */
    unsigned int comp[MAXMARKS][NUMWORDS];   /* dist bitmap */
    unsigned int nodes=0;
    
    const int OGR[32] = { /*  1 */    0,   1,   3,   6,  11,  17,  25,  34,  44,  55,  72,
                          /* 12 */   85, 106, 127, 151, 177, 199, 216, 246, 283, 333, 356,
                          /* 23 */  372, 425, 480, 492, 553, 585, 623, 680, 747, 784 };
    
    const int halfdepth=(maxmarks/2);
    const int halflength=(!(maxmarks%2) && (maxlength%2)) ? (maxlength/2) : (maxlength/2)-1;

    if ((maxlength/2)-(NUMWORDS*32) > NUMDIFFS) { /* sanity check -> size of bitmaps */
        printf("Your NUMWORDS(%d) including the overflow space in NUMDIFFS(%d) are too short\n",NUMWORDS,NUMDIFFS);
        printf("Your current limit is ((NUMWORDS*32*2)+1)+(NUMDIFFS*2)=%d\n",(((NUMWORDS*32*2)+1)+(NUMDIFFS*2)));
        printf("You can either use more bitmaps (NUMWORDS) or enlarge the overflow test (NUMDIFFS)\n");
        return 1;
    } else if (maxmarks>=MAXMARKS) { /* sanity check -> depth of stack */
        // Most compilers would allow maxmarks==MAXMARKS because the 0-level of our arrays if never used. Therefor
        // it wouldnt matter if for example the list[] array spills one level into (the 0-level of) the dist[] array.
        // But for safety reasons we enforce the logical limit.
        // The reason why we keep the 0-level is to maintain readability of the code and because count[0] is actually used.
        printf("With MAXMARKS=%d you can only search up to OGR-%d but you requested OGR-%d\n",MAXMARKS,MAXMARKS,maxmarks+1);
        printf("Just raise the MAXMARKS setting and you are fine");
        return 1;
    } else if (maxmarks>32) { /* sanity check -> OGR[] limit (the CHOOSE[] limit is selfregulating) */
        // Example: OGR-13 (at position OGR[13-1=12]) is the highest value used in a search for OGR-14
        // As of this writing lists of currently known optimal ruler sizes are publically available from
        // http://www.cuug.ab.ca/~millerl/g3-records.html and http://www.research.ibm.com/people/s/shearer/grtab.html
        // Alternatively one could always use the naively constructed ruler 0,1,3,7,15,31,63,127,255,511,1023,....
        printf("In a search for OGR-(N) you must provide the limits for all rulers up to OGR-(N-1)");
        printf("Note that these limits (in the OGR[] array) dont need to be prooven optimal");
        printf("But the better your limits are the more can you reduce the searchspace");
        return 1;
    }
    
    /* Start at the very beginning (one mark set at position zero) */
    depth = 1;
    count[0] = 0;
    count[1] = 0;
    
    list[depth][0]=0;
    list[depth][1]=0;/*
    list[depth][2]=0;
    list[depth][3]=0;
    list[depth][4]=0;
    list[depth][5]=0;
    list[depth][6]=0;
    list[depth][7]=0;*/

    dist[depth][0]=0;
    dist[depth][1]=0;/*
    dist[depth][2]=0;
    dist[depth][3]=0;
    dist[depth][4]=0;
    dist[depth][5]=0;
    dist[depth][6]=0;
    dist[depth][7]=0;*/

    comp[depth][0]=0;
    comp[depth][1]=0;/*
    comp[depth][2]=0;
    comp[depth][3]=0;
    comp[depth][4]=0;
    comp[depth][5]=0;
    comp[depth][6]=0;
    comp[depth][7]=0;*/


/* CHOOSE */
if (CHOOSEMARKS>maxmarks-depth) {
    limit[depth] = maxlength-chooseV3[((dist[depth][0])>>(32-CHOOSEBITS))][(maxmarks-depth)];
} else {
    limit[depth] = maxlength-OGR[maxmarks-depth]; // midpoint reduction may supercede this
}

/* MIDPOINT */
if (depth<=halfdepth) {
    int tempLimit;
    if (CHOOSEMARKS>halfdepth-depth) {
        tempLimit = halflength-chooseV3[((dist[depth][0])>>(32-CHOOSEBITS))][(halfdepth-depth)];
    } else {
        tempLimit = halflength-OGR[halfdepth-depth];
    }
    limit[depth]=MINIMUM(limit[depth],tempLimit);
} else if ((depth==(halfdepth+1)) && (maxmarks%2)) {
    int tempLimit;
    tempLimit = maxlength-1-count[halfdepth];
    limit[depth]=MINIMUM(limit[depth],tempLimit);
    //printf("l%d r%d\n",count[halfdepth],maxlength-1-count[halfdepth]);
}


//goto limit_calc;

    /* search through the search space **** */
    while (1) {
        
    unsigned int newbit=1;

    stay_plus_node:
nodes++;

        /* Find s = the next available mark location for this level */
        /* shift LIST s steps to the right and shift COMP s steps to the left */
        
    stay:
        if (comp[depth][0] < 0xfffffffe) {
            int s = LOOKUP_FIRSTBLANK( comp[depth][0] );
            if ((count[depth] += s) > limit[depth])   goto up; /* no spaces left */
            /*EXTEND_MARK(lev, s); *//* shift by s (LIST to the right, COMP to the left) */
            {
                unsigned int temp0, temp1;
                int ss=32-s;
                temp0 = (list[depth][0]) << ss; list[depth][0] = (list[depth][0] >> s) | (newbit << ss);
                /*temp1 = (list[depth][1]) << ss;*/ list[depth][1] = (list[depth][1] >> s) | temp0;
//                  temp0 = (list[depth][2]) << ss; list[depth][2] = (list[depth][2] >> s) | temp1;
//                  temp1 = (list[depth][3]) << ss; list[depth][3] = (list[depth][3] >> s) | temp0; 
//                  temp0 = (list[depth][4]) << ss; list[depth][4] = (list[depth][4] >> s) | temp1;
//                  temp1 = (list[depth][5]) << ss; list[depth][5] = (list[depth][5] >> s) | temp0;
//                  temp0 = (list[depth][6]) << ss; list[depth][6] = (list[depth][6] >> s) | temp1;
//                  temp1 = (list[depth][7]) << ss; list[depth][7] = (list[depth][7] >> s) | temp0;
                
//                  temp1 = (comp[depth][7]) >> ss; comp[depth][7] = (comp[depth][7] << s) | temp0;
//                  temp0 = (comp[depth][6]) >> ss; comp[depth][6] = (comp[depth][6] << s) | temp1;
//                  temp1 = (comp[depth][5]) >> ss; comp[depth][5] = (comp[depth][5] << s) | temp0; 
//                  temp0 = (comp[depth][4]) >> ss; comp[depth][4] = (comp[depth][4] << s) | temp1;
//                  temp1 = (comp[depth][3]) >> ss; comp[depth][3] = (comp[depth][3] << s) | temp0;
//                  temp0 = (comp[depth][2]) >> ss; comp[depth][2] = (comp[depth][2] << s) | temp1;
                temp1 = (comp[depth][1]) >> ss; comp[depth][1] = (comp[depth][1] << s)/* | temp0*/;
                comp[depth][0] = (comp[depth][0] << s) | temp1;
                //newbit = 0;
            }
        } else { /* s>=32 */
            if ((count[depth] += 32) > limit[depth])  goto up; /* no spaces left */
            /*EXTEND_MARK_BY_WORD(lev); *//* shift by s (LIST to the right, COMP to the left) */
            { unsigned int temp = comp[depth][0]; {
                comp[depth][0] = comp[depth][1];
//                  comp[depth][1] = comp[depth][2];
//                  comp[depth][2] = comp[depth][3];
//                  comp[depth][3] = comp[depth][4];
//                  comp[depth][4] = comp[depth][5];
//                  comp[depth][5] = comp[depth][6];
//                  comp[depth][6] = comp[depth][7];
//                  comp[depth][7] = 0;
                    comp[depth][1] = 0; 
//                  list[depth][7] = list[depth][6];   
//                  list[depth][6] = list[depth][5];   
//                  list[depth][5] = list[depth][4];   
//                  list[depth][4] = list[depth][3];   
//                  list[depth][3] = list[depth][2];   
//                  list[depth][2] = list[depth][1];
                list[depth][1] = list[depth][0];
                list[depth][0] = newbit;
                newbit = 0;
                } if (temp == 0xffffffff) goto stay; }
        }


//        {
//            int i; for(i=0; i<=depth; i++ ) printf("%4d",count[i]); printf("\n");
//            printf("DIST: ");binary(dist[depth][0]);/*binary(dist[depth][1])*/;printf("\n");
//            printf("LIST: ");binary(list[depth][0]);/*binary(list[depth][1]);*/printf("\n");
//            printf("COMP: ");binary(comp[depth][0]);/*binary(comp[depth][1]);*/printf("\n");
//        }

        
        /* did we find a ruler with the desired number of marks? */
        if (depth == (maxmarks)) {
            int i,j;
//              unsigned int validation=0;
            char diffs[NUMDIFFS];
            for(i=0;i<NUMDIFFS;i++) diffs[i]=0; // initialize to 0
          
            for (i=1; i <= maxmarks; i++) {
                for (j=0; j<i; j++) {
                    int diff = count[i]-count[j];  // only need check (NUMWORDS*32)<diff<=(maxlength/2)
//                    	if (((diff*2)<=maxlength) && (diff>(NUMWORDS*32))) {
                    if ((diff*2)<=maxlength) { // principle 1
                        if (diff<=(NUMWORDS*32)) break; // our real bitmaps are always safe

                        diff -= (NUMWORDS*32)+1;
                        if (diffs[diff]!=0) { // we want to start saving at position 0 rather than 1
                            //printf("dupe %d(count[i]-count[j]:%d-%d)\n",diff,count[i],count[j]);
                            goto stay;
                        }
                        diffs[diff] = 1;

/*
                        diff -= (NUMWORDS*32)+1;
                        if (validation & 1<<diff) { // we want to start saving at position 0 rather than 1
                            //printf("dupe %d(count[i]-count[j]:%d-%d)\n",diff,count[i],count[j]);
                            goto stay;
                        }
                        validation = validation | 1<<diff;
*/
                    }
                }
            }
        
            printf("FOUND A RULER!!!\n");
            for(i=0; i<=maxmarks; i++ ) printf("%4d",count[i]); printf("\n");
            continue; // not keeping newbit at 0 will break list[maxmarks], but its OK as we dont need it anywhere
            //nodes++; goto stay; /* go back to the start of our while(1) loop*/
            //goto stay_plus_node;
        } else {
        /* if we didnt distribute all marks we need to go deeper...*/
        
            /*
            int depthm1=depth++;	   // one level deeper
            count[depth] = count[depthm1]; // pass on our current length

            // prepare LIST[D+1]=LIST+(new_mark)
            list[depth][0] = list[depthm1][0];
            list[depth][1] = list[depthm1][1];
            
            // prepare the new DIST[D+1]=DIST|LIST[D+1]
            dist[depth][0] = list[depth][0] | dist[depthm1][0];
            dist[depth][1] = list[depth][1] | dist[depthm1][1];
            
            comp[depth][0] = dist[depth][0] | comp[depthm1][0];
            comp[depth][1] = dist[depth][1] | comp[depthm1][1];
            */
                        
            //#define depthp1 depth+1
            int depthp1 = depth+1;

            // prepare LIST[D+1]=LIST+(new_mark)
            list[depthp1][0] = list[depth][0];
            list[depthp1][1] = list[depth][1];
//              list[depthp1][2] = list[depth][2];
//              list[depthp1][3] = list[depth][3];
//              list[depthp1][4] = list[depth][4];
//              list[depthp1][5] = list[depth][5];
//              list[depthp1][6] = list[depth][6];
//              list[depthp1][7] = list[depth][7];

            // prepare the new DIST[D+1]=DIST|LIST[D+1]
            dist[depthp1][0] = list[depthp1][0] | dist[depth][0];
            dist[depthp1][1] = list[depthp1][1] | dist[depth][1];
//              dist[depthp1][2] = list[depthp1][2] | dist[depth][2];
//              dist[depthp1][3] = list[depthp1][3] | dist[depth][3];
//              dist[depthp1][4] = list[depthp1][4] | dist[depth][4];
//              dist[depthp1][5] = list[depthp1][5] | dist[depth][5];
//              dist[depthp1][6] = list[depthp1][6] | dist[depth][6];
//              dist[depthp1][7] = list[depthp1][7] | dist[depth][7];

            // prepare COMP[D+1]=COMP|DIST[D+1]
            comp[depthp1][0] = dist[depthp1][0] | comp[depth][0];
            comp[depthp1][1] = dist[depthp1][1] | comp[depth][1];
//              comp[depthp1][2] = dist[depthp1][2] | comp[depth][2];
//              comp[depthp1][3] = dist[depthp1][3] | comp[depth][3];
//              comp[depthp1][4] = dist[depthp1][4] | comp[depth][4];
//              comp[depthp1][5] = dist[depthp1][5] | comp[depth][5];
//              comp[depthp1][6] = dist[depthp1][6] | comp[depth][6];
//              comp[depthp1][7] = dist[depthp1][7] | comp[depth][7];
            
            count[depthp1] = count[depth]; // pass on our current length
            depth++;			   // one level deeper

//limit_calc:

                /* CHOOSE */
                if (CHOOSEMARKS>maxmarks-depth) {
                    limit[depth] = maxlength-chooseV3[((dist[depth][0])>>(32-CHOOSEBITS))][(maxmarks-depth)];
                } else {
                    limit[depth] = maxlength-OGR[maxmarks-depth]; // midpoint reduction may supercede this
                }

            /* MIDPOINT */
            if (depth<=halfdepth) {
                int tempLimit;
                if (CHOOSEMARKS>halfdepth-depth) {
                    tempLimit = halflength-chooseV3[((dist[depth][0])>>(32-CHOOSEBITS))][(halfdepth-depth)];
                } else {
                    tempLimit = halflength-OGR[halfdepth-depth];
                }
                limit[depth]=MINIMUM(limit[depth],tempLimit);
            } else if ((depth==(halfdepth+1)) && (maxmarks%2)) {
                int tempLimit;
                tempLimit = maxlength-1-count[halfdepth];
                limit[depth]=MINIMUM(limit[depth],tempLimit);
                //printf("l%d r%d\n",count[halfdepth],maxlength-1-count[halfdepth]);
            }

            continue; /* go back to the start of our while(1) loop*/
        }

		up:
		if (--depth == 0) return nodes; /* EXIT if we reached end of search space */
		newbit=0; /* when we go up we want to move the existing mark and not create a new one */
        goto stay_plus_node;
    }

    return nodes;
}

#ifndef AS_MODULE

#include <sys/time.h>
#include <sys/resource.h>

double dtime()
{
    struct rusage rusage;
    double q;

    getrusage(RUSAGE_SELF,&rusage);

    q = (double)(rusage.ru_utime.tv_sec);
    q = q + (double)(rusage.ru_utime.tv_usec) * 1.0e-06;
	
    return q;
}

int main (int argc, const char * argv[])
{
    double starttime, benchtime;
    unsigned int nodes;
/*   
    {
      printf("sizeof(char)=%d sizeof(short)=%d sizeof(int)=%d sizeof(long)=%d sizeof(double)=%d\n\n",
         (int)sizeof(char),(int)sizeof(short),(int)sizeof(int),(int)sizeof(long),(int)sizeof(double));
   }
*/  
    printf("\nhalfTweakedGARSP started...\n");
    
    starttime = dtime();
    nodes = halfTweakedGARSP(14,151);
    benchtime = dtime() - starttime;
    
    printf("Nodes per second = %.3f ( %u nodes/ %.3f seconds)\n",nodes/benchtime,nodes,benchtime);    
 
    return 0;
}

#endif