/******************************************************************** * Optimal_Estimator Program oax5.c * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Copyright (c) 1995 Government of Canada ##################### # Disclaimer Notice # ##################### The Government of Canada shall have no liability or responsibility to any person or entity with respect to any liability, loss or damage caused or alleged to be caused directly or indirectly by use of Department of Fisheries and Oceans, Maritimes Region, Canada hardware and\or software and\or data, including but not limited to any interruption of service, loss of business or anticipatory profits or consequential damages resulting from the use or operation of Department of Fisheries and Oceans, Scotia-Fundy hardware and\or software and\or data. There is no service guarantee associated with access to Department of Fisheries and Oceans, Maritimes Region hardware and\or software and\or data, and any defect or fault found therein will not be the responsibility of the Government of Canada. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * * Functions :1. Build a kd-tree from L given data points and find * * K nearest neighbours for each of M grid points. * * with the local scale. * * 2. Calculates optimal estimations of N dependent * * variables at M grid points from K nearest nei- * * ghbours (found fron kd tree search). * * * * Principle: 1. Heapsort bucket method with topdown search * * 2. Optimal Linear Estimation (Gauss-Markov theorem) * * * * Input: 1. Deck file: filename (user specified name and path) * * 2. data file: XXXXX.dat (user specified name and path) * * 3. grid file: XXXXX.grid (user specified name and path) * * * * Output: 1. log file : filename.log (Automatic generation) * * 2. result : filename.out (Automatic generation) * * * * Initial Release : Sept 1, 1994. * * By : Ian He * * * * BugFixs : Oct 22, 2002 * * By : Shawn Oakey * * ----------------------------------------------------------------- * * ************************** * * * Release Notes: * * * ************************** * * This oax5 program is based on the previous oax programs oax3 * * and oax4 which were developed before by Ian He, Ross Hendry and * * Gerry Boudreau. In this version 5, two major modifications are * * made. * * 1) The local coordinate transformation is made only to the * * necessary points within the bucket being searched which will * * save quite amount of time. * * 2) The nearest neighbour search routine is changed to be abale * * to handle any local scales which ensures that the right * * nearest neighbours are found independent of bucket size * *------------------------------------------------------------------ * * Major changes made: * DATE: Oct 22, 2002 * Function data_local * changed for(k=0;ktmp_array[MAXNIV] * 2) data_local moved into bucketsearch() function. * 3) find out the maximum local scale in get_neighbour() * and pass a parameter lscale_max into nodesearch() * 4) replace local_scale[cutdim] with lscale_max in * function. * 5) all other related function parameters passing ********************************************************************/ #include #include #include #include #include #include #define MAXNIV 10 /* Maximum number of independent variables */ #define MAXNDV 10 /* Maximum number of dependent variables */ #define MAXNBS 400 /* Maximum number of nearest neighbours */ #define HUGEVAL 9.9e50 /* Initial huge value for nearest neighbour search */ #define RAD 57.295779513 /***************************************************** Declaration of kd tree structure -----------------------------------------------------*/ struct kdnode { struct kdnode *father,*lt_son,*ge_son; int cutdim,lt_start,lt_count,ge_start,ge_count; float cutval; }; /***************************************************** Declaration of deck structure -----------------------------------------------------*/ struct Variable { char variables[80]; struct Variable *next; }; struct Scale { float value; struct Scale *next; }; struct Deck { struct Variable *dep,*indep; struct Scale *gscale; char Datafile_name[80],Gridfile_name[80],Method_name[80]; int num_data,num_grid,num_dep,num_indep,num_closet,Bucket; float noise,obv[MAXNBS][MAXNDV],global_scale[MAXNIV]; }; /***************************************************** Declaration of filename and debuger structure -----------------------------------------------------*/ struct Flname { char dflag[20],in_name[80],out_name[80],log_name[80]; FILE *fp1,*fp2; }; /***************************************************** Declaration of functions -----------------------------------------------------*/ void static error_estimate(float[][MAXNBS+1], float[],float[],int,char[], FILE*,char[]); void static data_load(int[],float*[],int*,char[],int,int,FILE*); void static print_kdtree(struct kdnode*,int,FILE*,char[]); void static grid_load(char[],float[],float*,float[],int,char[]); void static get_neighbour(struct kdnode*,int[],float*[],float[], float[][MAXNBS],float,int,int,int,float[],float[][MAXNIV],float[],FILE*, FILE*,char[]); void static nodesearch(struct kdnode*,double[],int[],int*,int[],float*[], float,int,int,int,float[],float,float[]); void static heapinsert(double[],int[],int*,double,int); void static heapremove(double[],int[],int*,int*,double*); void static cova_matrix(float[][MAXNBS+1],float[][MAXNBS+1],float[],float[], float[][MAXNBS],int,int,FILE*,char[]); void static cova_func(int,float[],int,float[],float[][MAXNBS+1],float[], float[],int); void static invert_cova_matrix(float[][MAXNBS+1],int,FILE*,char[]); void static cova_vector(float[],float[],float[][MAXNBS],int,int,float[], FILE*,char[]); void static cova_vecfunc(int,float[],float[],float[],float[],int); void static obtain_weight(float[][MAXNBS+1],float[][MAXNBS],float[],int,char[], int,float[][MAXNDV],FILE*,char[]); void static calculate_weight(float[][MAXNBS+1],int,float[][MAXNBS], float[][MAXNBS],int,int,float[][MAXNDV]); void static optimal_estimation(float[],float[][MAXNBS],float[],float[],int, int,char[],FILE*,char[]); void static oa_output(float[],float[],int,FILE*); void static message(int,char[],FILE*); void static message1(int,char[],clock_t,int,FILE*,int,int); void static oa_end(int,struct Flname*); void static become_local(float,float[][MAXNBS],int,float[]); void static data_local(int,int,float,float*[],int[],float[],float[]); void static findmaxspread(int[],int,int*,float*[],int,float[]); void static heapsort(int,float[],int[]); void static bucketsearch(double[],int[],struct kdnode*,int,int,int*, int[],float*[],float,int,int,int,float[],float,float[]); void static heapreplace(double[],int[],int*,double,int); void static heapup(double[],int[],int); void static heapdown(double[],int[],int*,int); float static ldist(float[],float[],float[],int); float static findmedian(int[],int,int,float*[]); float static gdist(float[],int,float[],float[]); struct Deck static *getDeckNode(void); struct Deck static *oa_start(struct Deck*,FILE*,char[]); struct kdnode static *build_kdtree(int,int,int[],float*[],int,float[],int,int); struct Flname static *oa_begin(struct Flname*,int,char*[]); int static check_data(char[]); FILE static *grid_open(int,char[],int*,FILE*,char[]); struct Variable static *AddVariableNode(struct Variable*,char*); struct Scale static *AddScaleNode(struct Scale*,float); /*========================================================== FUNCTION NAME: main() PURPOSE: Control the whole process at the top level. 1) Check the number of data points and allocate the memory for data_array and other arraies. 2) build kd tree (from global scale) 3) search for the nearest neighbours(from local scale) 4) do optimal estimations INPUT: command line options RETURN : standard output messages ===========================================================*/ main(int argc, char *argv[]) { struct kdnode *root; struct Deck *deck; struct Flname *name; float A[MAXNBS+1][MAXNBS+1],ivA[MAXNBS+1][MAXNBS+1], C[MAXNBS],w[MAXNDV][MAXNBS],est[MAXNDV],error[MAXNDV], est_mean[MAXNDV],noise[MAXNBS+1],local_scale[MAXNIV], angle,neighbour_array[MAXNIV+MAXNDV+1][MAXNBS], target[MAXNIV],*data_array[MAXNIV+MAXNDV+1]; char buf[200]; int i,ndp,ngp,*index_array,NDATA; clock_t ctime0; FILE *input; NDATA=0; angle = 0.0; if((name = (struct Flname*) malloc(sizeof(struct Flname)))==NULL){ printf("%s\n","Could not allocate memory for struct Flname -- aborting"); exit(EXIT_FAILURE); } deck=getDeckNode(); name=oa_begin(name,argc,argv); deck = oa_start(deck,name->fp1,name->in_name); ndp = check_data(deck->Datafile_name); if((index_array = (int *) malloc(sizeof(int)*(ndp)))==NULL){ printf("%s\n","Could not allocate memory for index_array -- aborting"); exit(EXIT_FAILURE); } for(i=0;inum_indep+deck->num_dep+1;i++)/*The actual memory allocated is only deck->num_indep+deck->num_dep+1*/ if((data_array[i] = (float *) malloc( sizeof(float)*(ndp)))==NULL){ printf("%s\n","Could not allocate memory for data_array -- aborting"); exit(EXIT_FAILURE); } ctime0 = clock(); data_load(index_array,data_array,&NDATA, deck->Datafile_name,deck->num_indep,deck->num_dep,name->fp1); root = build_kdtree(0,NDATA,index_array,data_array,deck->num_indep, deck->global_scale,deck->Bucket,NDATA); print_kdtree(root,NDATA,name->fp1,name->dflag); message1(7,name->dflag,ctime0,NDATA,name->fp1,deck->num_indep,deck->Bucket); ctime0 = clock(); input=grid_open(NDATA,deck->Gridfile_name,&deck->num_closet,name->fp1,name->dflag); ngp=0; while (fgets(buf, 200, input)!=NULL) { grid_load(buf,local_scale,&angle,target,deck->num_indep,deck->Gridfile_name); ngp=ngp+1; get_neighbour(root,index_array,data_array,noise,neighbour_array, angle,deck->num_closet,deck->num_indep,deck->num_dep, local_scale,deck->obv,target,name->fp1,name->fp2,name->dflag); /* The following section is for gnuplot to check the distribution of the neighbours. I commented out for this final release version ---------------------------------------------------------------- fpp=fopen("tt.tmp","w"); for(i=0;inum_closet;i++){ for(ii=0;iinum_indep;ii++){ fprintf(fpp,"%f ",neighbour_array[ii][i]); } fprintf(fpp,"\n"); } fclose(fpp); */ become_local(angle,neighbour_array,deck->num_closet,target); cova_matrix(A,ivA,noise,local_scale,neighbour_array, deck->num_closet,deck->num_indep,name->fp1,name->dflag); invert_cova_matrix(ivA,deck->num_closet,name->fp1,name->dflag); obtain_weight(ivA,w,est_mean,deck->num_closet,deck->Method_name, deck->num_dep,deck->obv,name->fp1,name->dflag); cova_vector(C,local_scale,neighbour_array,deck->num_closet, deck->num_indep,target,name->fp1,name->dflag); optimal_estimation(C,w,est,est_mean,deck->num_closet,deck->num_dep, deck->Method_name,name->fp1,name->dflag); error_estimate(ivA,C,error,deck->num_closet,deck->Method_name, name->fp1,name->dflag); oa_output(est,error,deck->num_dep,name->fp2); } message1(8,name->dflag,ctime0,NDATA,name->fp1,deck->num_indep,deck->Bucket); oa_end(ngp,name); return 0; } /*========================================================== FUNCTION NAME: check_data() PURPOSE: get the number of data point from data file INPUT: data filename RETURN : ndp ===========================================================*/ int check_data(char Datafile_name[]) { int n; char buf[200]; FILE *indata; if((indata=fopen(Datafile_name,"r")) == NULL){ printf("%s%s%s\n", "Data file ",Datafile_name, " does not exist, goodbye !"); exit(0); } n=0; while (fgets(buf, 200, indata)!=NULL) n=n+1; (void) fclose(indata); return n; } /*========================================================== FUNCTION NAME: oa_begin() PURPOSE: Start the oa program and pass the command line take down the program starting time create log file and out file and open them INPUT: command line command deck file name RETURN : opened log file and out file and other messages ===========================================================*/ struct Flname *oa_begin(struct Flname *name,int argc,char *argv[]) { struct tm *ptr; time_t now; char *file_name,*c; char bf[80],bf1[80],bf2[80]; FILE *tmp_fp1,*tmp_fp2; tmp_fp1=NULL; if(argc==1){ message(0,"none",tmp_fp1); exit(0); } file_name = argv[1]; if(argv[2] != NULL){ strcpy(name->dflag,argv[2]); } else{ sprintf(bf,"%s","none"); strcpy(name->dflag,bf); } sprintf(bf,"%s%s",file_name,".deck"); strcpy(name->in_name,bf); sprintf(bf1,"%s%s",file_name,".log"); strcpy(name->log_name,bf1); tmp_fp1=fopen(name->log_name,"w"); name->fp1=tmp_fp1; sprintf(bf2,"%s%s",file_name,".out"); strcpy(name->out_name,bf2); tmp_fp2=fopen(name->out_name,"w"); name->fp2=tmp_fp2; fprintf(name->fp1,"%s%s","File: ",name->log_name); message(1,"none",name->fp1); (void) time(&now); ptr=localtime(&now); c=asctime(ptr); (void) puts(c); fprintf(name->fp1,"Creation Time: "); (void) fputs(c,name->fp1); message(2,name->in_name,name->fp1); return name; } /*========================================================== FUNCTION NAME: oa_start() PURPOSE: Load the information from deck file INPUT: deck file (deck file name from command line) RETURN : a pointer to Deck structure which holds the dependent, independent, Data filename, Scale, Grid filename, bucket and num_closet. ===========================================================*/ struct Deck* oa_start(struct Deck *deck,FILE *fp1,char in_name[]) { FILE *input1; char buf[200],str[80]; char *p,*q; struct Variable *tmp_dep,*tmp_indep; struct Scale *tmp_scale; struct Scale *ip; int k; if ((input1=fopen(in_name,"r")) == NULL) { printf("\n\n%s%s%s\n", "File ",in_name," Not Exist! Try Again.\n\n"); fprintf(fp1,"\n\n%s%s%s\n", "File ",in_name," Not Exist! Try Again.\n\n"); exit(0); } /*initialize the deck structure*/ deck=getDeckNode(); /*read the deck file*/ while (fgets(buf, 200, input1)!=NULL) { /*load one line from the deck file*/ (void) sscanf(buf,"%s", str); /*check whether the dependent variables are in the line and if so, scan them in and record the number */ p = &str[0]; if((*p++ =='D')&&(*p == 'E')){ int num; num = 0; q = &buf[strlen(p)]; fprintf(fp1,"\nDEPENDENT"); tmp_dep=NULL; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ num++; (void) sscanf(q,"%s", str); tmp_dep=AddVariableNode(tmp_dep,&str[0]); } } deck->num_dep = num; deck->dep = tmp_dep; tmp_dep =(struct Variable*)NULL; tmp_dep = deck->dep; while(tmp_dep !=(struct Variable*)NULL){ fprintf(fp1," %s ",tmp_dep->variables); tmp_dep = tmp_dep->next; } } /*check whether the independent variables are in the line and if so, scan them in and record the number*/ p = &str[0]; if((*p++ =='I')&&(*p == 'N')){ int num; num=0; fprintf(fp1,"\n%s ","INDEPENDENT"); q= &buf[strlen(p)]; tmp_indep=NULL; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ (void) sscanf(q,"%s", str); num++; tmp_indep=AddVariableNode(tmp_indep,&str[0]); } } deck->num_indep = num; deck->indep = tmp_indep; while(tmp_indep !=(struct Variable*)NULL){ fprintf(fp1,"%s ",tmp_indep->variables); tmp_indep= tmp_indep->next; } } /*check whether the GLOBAL_SCALE variables are in the line and if so, scan them in */ p = &str[0]; if((*p++ =='G')&&(*p == 'L')){ float aValue; fprintf(fp1,"\n%s ","GLOBAL_SCALE"); q= &buf[strlen(p)]; tmp_scale=NULL; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ (void) sscanf(q,"%f", &aValue); tmp_scale=AddScaleNode(tmp_scale,aValue); } } deck->gscale= tmp_scale; while(tmp_scale!=(struct Scale*)NULL){ fprintf(fp1,"%f ",tmp_scale->value); tmp_scale=tmp_scale->next; } } /*check whether the GRIDFILE variables are in the line and if so, scan them in and record the grid filename*/ p = &str[0]; if((*p++ =='G')&&(*p == 'R')){ fprintf(fp1,"\n%s ","GRIDFILE"); q= &buf[strlen(p)]; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ (void) sscanf(q, "%s",deck->Gridfile_name); break; } } fprintf(fp1," %s",deck->Gridfile_name); } /*check whether the DATAFILE variables are in the line and if so, scan them in and record the data filename*/ p = &str[0]; if((*p++ =='D')&&(*p == 'A')){ fprintf(fp1,"\n%s ","DATAFILE"); q= &buf[strlen(p)]; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ (void) sscanf(q, "%s",deck->Datafile_name); break; } } fprintf(fp1," %s",deck->Datafile_name); } /*check whether the METHOD keyword is in the line and if so, scan it in and record it */ p = &str[0]; if((*p++ =='M')&&(*p == 'E')){ fprintf(fp1,"\n%s ","METHOD"); q = &buf[strlen(p)]; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ strcpy(deck->Method_name,q); break; } } fprintf(fp1," %s",deck->Method_name); } /*check whether the deck->Bucket keyword is in the line and if so, scan it in and record the bucket size*/ p = &str[0]; if((*p++ =='B')&&(*p == 'U')){ int bu; fprintf(fp1,"\n%s","deck->Bucket"); q= &buf[strlen(p)]; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ (void) sscanf(q,"%d",&bu); deck->Bucket = bu; break; } } fprintf(fp1," %d",deck->Bucket); } /*check whether the NUM_CLOSEST keyword is in the line and if so, scan it in and record the number*/ p = &str[0]; if((*p++ =='N')&&(*p == 'U')){ int bu; fprintf(fp1,"\n%s ","NUM_CLOSEST"); q= &buf[strlen(p)]; while (*q!='\0' || strlen(q) ==1) { if (*q++==' ' && *q!=' ' && (size_t) strlen(q)>1){ (void) sscanf(q,"%d",&bu); deck->num_closet = bu; break; } } fprintf(fp1," %d",deck->num_closet); } /*to this point, one line of deck has been read. The program will go back to the beginning of the loop and read the next line from the deck file and repeat the above process until the end of deck file.*/ } /*-- end of while --*/ /*adjust the space and close the deck file*/ fprintf(fp1,"%s\n\n"," "); (void) fclose(input1); ip=deck->gscale; for(k=0;knum_indep;k++){ deck->global_scale[k]=ip->value; ip=ip->next; } /*return the deck pointer to main*/ return deck; } /* End of oa_start() */ /*The function for adding the variable node to independent variables*/ struct Variable *AddVariableNode(struct Variable *tmp,char* name){ if (tmp==NULL) { if((tmp = (struct Variable*)malloc(sizeof(struct Variable)))==NULL){ printf("Could not allocate memory for struct Variable -- aborting"); exit(EXIT_FAILURE); } strcpy(tmp->variables,name); tmp->next=NULL; }else { tmp->next = AddVariableNode(tmp->next, name); } return tmp; } /*The function for adding the variable node to scale variable*/ struct Scale *AddScaleNode(struct Scale *tmp, float value){ if (tmp==NULL) { if((tmp = (struct Scale*)malloc(sizeof(struct Scale)))==NULL){ printf("Could not allocate memory for struct Scale -- aborting"); exit(EXIT_FAILURE); } tmp->value=value; tmp->next=NULL; }else { tmp->next = AddScaleNode(tmp->next, value); } return tmp; } /* The function for initializing deck structure */ struct Deck *getDeckNode(void) { struct Deck *tmp; if((tmp = (struct Deck*) malloc(sizeof(struct Deck)))==NULL){ printf("Could not allocate memory for struct Deck -- aborting"); exit(EXIT_FAILURE); } tmp->num_indep=0; tmp->indep=(struct Variable*)NULL; tmp->gscale=(struct Scale*)NULL; tmp->num_closet=2; tmp->Bucket=2; return tmp; } /*========================================================== FUNCTION NAME: data_load() PURPOSE: load data from data file INPUT: data filename, deck->num_indep,deck->num_dep RETURN : data_array[][],index_array[],NDATA ===========================================================*/ void data_load(int index_array[],float *data_array[],int *NDATA, char Datafile_name[],int num_indep,int num_dep,FILE *fp1) { int i,n; char *p,buf[200]; float tmp_xpt; FILE *indata; if((indata=fopen(Datafile_name,"r")) == NULL){ printf("%s%s%s\n", "Data file ",Datafile_name, " does not exist, goodbye !"); exit(0); } n=0; while (fgets(buf, 200, indata)!=NULL) { p= &buf[0]; while (*p == ' ' && *p != '\n') p++; for(i=0;inum_indep,deck->Bucket,global_scale RETURN : root (pointer to kdnode structure) ===========================================================*/ struct kdnode *build_kdtree(int Start,int Count,int index_array[], float *data_array[],int num_indep,float global_scale[],int Bucket,int NDATA) { struct kdnode *root; int M; if((root = (struct kdnode*) malloc(sizeof(struct kdnode)))==NULL){ printf("Could not allocate memory for kdnode root -- aborting"); exit(EXIT_FAILURE); } if(NDATA>1){ /*create and initialize a tree tree node*/ root->father = NULL; root->lt_son = NULL; root->ge_son = NULL; root->cutdim = 0; /*find dimension with maximum (weighted) spread*/ findmaxspread(index_array+Start,Count,&root->cutdim,data_array, num_indep,global_scale); /*find the median value along that dimension so the tree will be balanced*/ root->cutval = findmedian(index_array+Start,Count,root->cutdim,data_array); M=Count/2; root->lt_count=M; root->ge_count=Count-M; root->lt_start=Start; root->ge_start=Start+M; } else { root->father = root; root->lt_count=0; root->lt_start=0; root->ge_count=1; root->ge_start=0; root->lt_son = NULL; root->ge_son = NULL; root->cutdim=0; root->cutval = -99999; } /*if the number of data in little son branch is more than bucket size, continue to build tree by recursively call build_kdtree()*/ if (root->lt_count>Bucket) { root->lt_son = build_kdtree(root->lt_start,root->lt_count,index_array, data_array,num_indep,global_scale,Bucket,NDATA); root->lt_son->father=root; } /*if the number of data in great son branch is more than bucket size, continue to build tree by recursively call build_kdtree()*/ if (root->ge_count>Bucket) { root->ge_son = build_kdtree(root->ge_start,root->ge_count,index_array, data_array,num_indep,global_scale,Bucket,NDATA); root->ge_son->father=root; } /*finally when the data points in both branchs are less than or equal to the bucket size, tree building is finished, return the kdnode pointer root to main() */ return root; } /*-- end of build_kdtree() --*/ /*================================================================ FUNCTION findmaxspread() ~~~~~~~~~~~~~~~~~~~~~~~~ Purpose: finds the dimension with the maximum (weighted) spread ------- Input : global_scale,data_array,NDATA,deck->num_indep Output: return cut dimension "cutdim" =================================================================*/ void findmaxspread(int ii[],int nd,int *cutdim,float *data_array[], int num_indep,float global_scale[]) { int k,n; float minvalue, maxvalue, maxspread, x; *cutdim= -1; maxspread = -1; for (k=0;kmaxvalue) maxvalue=x;*/ if (x-maxvalue > FLT_EPSILON) maxvalue=x; } x = (maxvalue-minvalue) / global_scale[k]; /* if (x>maxspread) {*/ if (x-maxspread > FLT_EPSILON) { maxspread = x; *cutdim = k; } } } /*=============================================================== FUNCTION findmedian() ~~~~~~~~~~~~~~~~~~~~~ Purpose: findmedian finds the median value of a set of numbers -------- Input : data_array,NDATA,cutdim Output: returns a float variable "median" ================================================================*/ float findmedian(int ii[],int nd,int cutdim,float *data_array[]) { int n, np, np2; float *x, median; if((x = (float *) malloc( sizeof(float)*(nd+1)))==NULL){ printf("Could not allocate memory for array x -- aborting"); exit(EXIT_FAILURE); } for (n=0;n> 1)+1; ir=n; for (;;) { if (l > 1) { rra=ra[--l]; rrb=rb[l]; } else { rra=ra[ir]; rrb=rb[ir]; ra[ir]=ra[1]; rb[ir]=rb[1]; if (--ir == 1) { ra[1]=rra; rb[1]=rrb; return; } } i=l; j=l << 1; while (j <= ir) { /* if (j < ir && ra[j] < ra[j+1]) ++j;*/ if (((float) j-ir) < FLT_EPSILON && (ra[j]-ra[j+1]) < FLT_EPSILON) ++j; /* if (rra < ra[j]) {*/ if ((rra-ra[j]) < FLT_EPSILON) { ra[i]=ra[j]; rb[i]=rb[j]; j += (i=j); } else j=ir+1; } ra[i]=rra; rb[i]=rrb; } } /*================================================================= FUNCTION print_kdtree() ~~~~~~~~~~~~~~~~~~~~~~~ Purpose: print_kdtree recursively traverses and dumps the tree the same logic could easily be used to do this graphically Input : node,deck->num_indep Output :tree node structure ==================================================================*/ void print_kdtree(struct kdnode *node,int NDATA,FILE *fp1,char dflag[]) { if(strcmp(dflag,"-d3")== 0) { if(NDATA>1){ fprintf(fp1,"\n \n NODE %d \n",(int) node); fprintf(fp1,"\n Bounding box is\n"); fprintf(fp1,"divided at dimension %d into %d data points < %f \n", node->cutdim,node->lt_count,node->cutval); fprintf(fp1," and %d data points >= %f\n", node->ge_count,node->cutval); if ((node->lt_count>1) || (node->ge_count>1)) fprintf(fp1,"as follows: \n"); if (node->lt_son!=NULL) print_kdtree(node->lt_son,NDATA,fp1,dflag); if (node->ge_son!=NULL) print_kdtree(node->ge_son,NDATA,fp1,dflag); } else fprintf(fp1,"There is only one data point in the root node\n"); } } /*========================================================== FUNCTION NAME: grid_open() PURPOSE: open the grid file and check the number of nearest neighbour a and the number of data points INPUT: deck->Gridfile_name,NDATA,num_closet RETURN : messages ===========================================================*/ FILE *grid_open(int NDATA,char Gridfile_name[],int *num_closet,FILE *fp1, char dflag[]) { FILE *input; if ((input=fopen(Gridfile_name,"r")) == NULL) { printf("%s%s%s\n", "Grid file ",Gridfile_name, " does not exist, goodbye !"); exit(0); } if(strcmp(dflag,"-d1")== 0 || strcmp(dflag,"-d2")== 0) message(3,"none",fp1); if(*num_closet<1){ message(4,"none",fp1); exit(0); } if(NDATA<*num_closet){ message(5,"none",fp1); *num_closet = NDATA; } return input; } /*========================================================== FUNCTION NAME: grid_load() PURPOSE: load the coordinates of a grid point and an angle INPUT : buf[200](character array for holding the the string of a grid point coordinate) RETURN : target[MAXNIV] (float type array for storing the grid point coordinate) angle ( for clockwise rotation of axes) ===========================================================*/ void grid_load(char buf[],float local_scale[],float *angle,float target[], int num_indep,char Gridfile_name[]) { int i; float tmp_xpt; char *pp; FILE *tmp; tmp=NULL; pp= &buf[0]; while (*pp == ' ' && *pp != '\n') pp++; for(i=0; i< num_indep;i++) { (void) sscanf(pp,"%f",&tmp_xpt); target[i] = tmp_xpt; while (*pp != ' ' && *pp != '\n') pp++; while (*pp == ' ' && *pp != '\n') pp++; } if (*pp == '\n' && i != 2*num_indep) { message(6,Gridfile_name,tmp); exit(0); } (void) sscanf(pp,"%f",&tmp_xpt); *angle = tmp_xpt; while (*pp != ' ' && *pp != '\n') pp++; while (*pp == ' ' && *pp != '\n') pp++; if (*pp == '\n' && i !=2*num_indep) { message(6,Gridfile_name,tmp); exit(0); } for(i=num_indep+1; i< num_indep+1+num_indep;i++) { (void) sscanf(pp,"%f",&tmp_xpt); local_scale[i-num_indep-1] = tmp_xpt; while (*pp != ' ' && *pp != '\n') pp++; while (*pp == ' ' && *pp != '\n') pp++; if (*pp == '\n' && i != 2*num_indep) { message(6,Gridfile_name,tmp); exit(0); } } } /*========================================================== FUNCTION NAME: get_neighbour() PURPOSE: get the nearest neighbours of a given grid INPUT : data_array[][],target[],root,deck->num_indep,num_closet index_array[],HUGEVAL. RETURN : neighbour_array, deck->obv[][],nndist[] ===========================================================*/ void get_neighbour(struct kdnode *root,int index_array[],float *data_array[], float noise[],float neighbour_array[][MAXNBS],float angle,int num_closet, int num_indep,int num_dep,float local_scale[],float obv[][MAXNIV], float target[],FILE *fp1,FILE *fp2,char dflag[]) { int heapbottom,k,kk,p1,index,indexa[MAXNBS],nn[MAXNBS+1],i1; double value,valuea[MAXNBS]; double nndist[MAXNBS+1]; float lscale_max; for(i1=0;i1num_indep,node,nndist,nn,data_array, global_scale,heapbottom,num_closet,lscale_max Output: nndist[],nn[] ==================================================================*/ void nodesearch(struct kdnode *node,double nndist[],int nn[],int *heapbottom, int index_array[],float *data_array[],float angle,int num_closet, int num_indep,int num_dep,float local_scale[],float lscale_max,float target[]) { /* printf("target , cut_val ,cutdim %f %f %d\n", target[node->cutdim],node->cutval,node->cutdim); */ /* if(target[node->cutdim] < node->cutval) {*/ if((target[node->cutdim]-node->cutval) < FLT_EPSILON) { bucketsearch(nndist,nn,node->lt_son,node->lt_start, node->lt_count,heapbottom,index_array,data_array,angle, num_closet,num_indep,num_dep,local_scale,lscale_max,target); /* printf("cutval-target/lscale , nndist[1] %f %f\n", (node->cutval-target[node->cutdim])/local_scale[node->cutdim],nndist[1]);*/ /* if ((node->cutval-target[node->cutdim])/lscale_max < nndist[1])*/ if (((node->cutval-target[node->cutdim])/lscale_max - nndist[1]) < FLT_EPSILON) bucketsearch(nndist,nn,node->ge_son,node->ge_start, node->ge_count,heapbottom,index_array,data_array,angle, num_closet,num_indep,num_dep,local_scale,lscale_max,target); } else { bucketsearch(nndist,nn,node->ge_son,node->ge_start, node->ge_count,heapbottom,index_array,data_array,angle, num_closet,num_indep,num_dep,local_scale,lscale_max,target); /* printf("cutval-target/lscale , nndist[1] %f %f\n", (node->cutval-target[node->cutdim])/local_scale[node->cutdim],nndist[1]);*/ /* if ((target[node->cutdim]-node->cutval)/lscale_max < nndist[1])*/ if (((target[node->cutdim]-node->cutval)/lscale_max - nndist[1]) < FLT_EPSILON) bucketsearch(nndist,nn,node->lt_son,node->lt_start, node->lt_count,heapbottom,index_array,data_array,angle, num_closet,num_indep,num_dep,local_scale,lscale_max,target); } } /*=================================================================== FUNCTION bucketsearch() ~~~~~~~~~~~~~~~~~~~~~~~ Purpose: check whether this is a terminal bucket, if so, search the terminal bucket for nearest neighbours, otherwise, go back to the nodesearch() function to search the node. Input : target,deck->num_indep,nndist,nn,data_array, son,start,count,heapbottom,num_closet,lscale_max Output: nndist[],nn[] =====================================================================*/ void bucketsearch(double nndist[],int nn[],struct kdnode *son,int start, int count,int *heapbottom,int index_array[],float *data_array[],float angle, int num_closet,int num_indep,int num_dep,float local_scale[],float lscale_max, float target[]) { int i; double testdist; float tmp_array[MAXNIV]; if(son==NULL) { for (i=start;inum_indep,tmp_array,index_array,global_scale Output : returns a float variable which is a locally scaled distance between two points. NOTE: the target array holds the global coordinates of grid points. the tmp_array holds the transformed local coordinates of data points which are originated at grid point. Therefore, target[] should always be zero in the distance calculation. that's why we have (for k=0,1): ddist = tmp_array[k]/local_scale[k]; not ddist=(target[k]-tmp_array[k])/local_scale[k]; ==================================================================*/ float gdist(float tmp_array[],int num_indep,float local_scale[],float target[]) { float rdist, ddist, gridtmp[MAXNIV]; int k; rdist=0.0; for(k=0;k= nndist[j]) break;*/ if ((v - nndist[j]) >= FLT_EPSILON) break; nndist[downitem] = nndist[j]; /*printf("**downitem,nndist[downitem] %d %f\n",downitem,nndist[downitem]);*/ nn[downitem] = nn[j]; downitem = j; } nndist[downitem] = v; nn[downitem] = i; /*printf("downitem,nndist[downitem] %d %f\n",downitem,nndist[downitem]);*/ } /*========================================================== FUNCTION NAME: data_local() PURPOSE: At this particular grid point transform the given data point to a local coordinate system corresponding to this grid point local coordinate system. (a translation + rotation in XY plane) INPUT: data_array[][],i,num_indep,tmp_array,target[], angle,RAD RETURN : transformed tmp_array[] in local frame of the given grid. Here only data_array[0][id] and data_array[1][id] are transformed. The other elements of the neighbour array do not change because the transformation only takes place in XY plane. Once the data array becomes local, we can search the nearest neighbours by local scale. ===========================================================*/ void data_local(int i,int num_indep,float angle,float *data_array[], int index_array[],float tmp_array[],float target[]) { int k,i1; float c,s; float xo,yo,dpt[2],gpt[2]; for(i1=0;i1<2;i1++){dpt[i1]=0;gpt[i1]=0;} c=(float) cos(angle/RAD); s=(float) sin(angle/RAD); /* for(k=0;knum_indep,num_closet RETURN : covariance matrix A[][] ===========================================================*/ void cova_matrix(float A[][MAXNBS+1],float ivA[][MAXNBS+1],float noise[], float local_scale[],float neighbour_array[][MAXNBS],int num_closet, int num_indep,FILE *fp1,char dflag[]) { int id1,id2,ik,k; float pt1[MAXNIV],pt2[MAXNIV]; for(id1=1;id1<=num_closet;id1++) { ik=0; for(k=0;knum_indep(number of independent variables) pt1[],pt2[](two arrays for holding the coordinates of two points) Output: r (distance between pt1 and pt2) ===========================================================*/ float ldist(float pt1[],float pt2[],float local_scale[],int num_indep) { int k; float r; r=0; for(k=0;k> Chinese edition, Publiched by Qinghua University press, China.) ============================================================*/ void invert_cova_matrix(float ivA[][MAXNBS+1],int num_closet,FILE *fp1, char dflag[]) { float ww,g,b[MAXNBS+1]; int k,m,i,j,ik,i1; int n= num_closet; for(i1=0;i1<(MAXNBS+1);i1++){b[i1]=0;} for(k=1;k<=n;k++) { m=n-k+1; ww=ivA[1][1]; for(i=2;i<=n;i++){ g=ivA[i][1]; b[i]=g/ww; if(i<=m)b[i] = -b[i]; for(j=2;j<=i;j++) ivA[i-1][j-1]=ivA[i][j]+g*b[j]; } ivA[n][n]=(float) 1.0/ww; for(i=2;i<=n;i++) ivA[n][i-1]=b[i]; } for(i=1;i<=n-1;i++) for(j=i+1;j<=n;j++) ivA[i][j]=ivA[j][i]; if(strcmp(dflag,"-d2")== 0) fprintf(fp1,"\n%s\n"," ---- Invert Covariance Matrix IvA"); for(j=1;j<=n;j++) { ik=0; for(i=1;i<=n;i++) { ik=ik+1; ivA[i-1][j-1]=ivA[i][j]; if(strcmp(dflag,"-d2")== 0){ fprintf(fp1,"%f ",ivA[i][j]); if(ik == 8) { fprintf(fp1,"\n"); ik=0; } } } if(strcmp(dflag,"-d2")== 0) fprintf(fp1,"\n"); } } /*=========================================================== FUNCTION NAME: cova_vector() PURPOSE: calculate the covariance vector INPUT: target[],neighbour_array[][],deck->num_indep,num_closet RETURN : covariance vector C[] ===========================================================*/ void cova_vector(float C[],float local_scale[],float neighbour_array[][MAXNBS], int num_closet,int num_indep,float target[],FILE *fp1,char dflag[]) { int id,ik,k; float gpt[MAXNIV],dpt[MAXNIV]; for(k=0;knum_dep,ivA[][], RETURN : w[kd][id] ===========================================================*/ void obtain_weight(float ivA[][MAXNBS+1],float w[][MAXNBS],float est_mean[], int num_closet,char Method_name[],int num_dep,float obv[][MAXNDV], FILE *fp1,char dflag[]) { int id,kd,ik,i1,i2; float tmp,tmp1; float ivAsum[MAXNBS][MAXNBS]; for(i1=0;i1num_dep,ivA[id][idd],deck->obv[idd][kd]; RETURN : w[kd][id],ivAsum[kd][id] ===========================================================*/ void calculate_weight(float ivA[][MAXNBS+1],int id,float w[][MAXNBS], float ivAsum[][MAXNBS],int num_closet,int num_dep,float obv[][MAXNDV]) { int kd,idd; float tmp,tmp1; for(kd=0;kdnum_dep, num_closet,ivA[][],deck->Method_name, RETURN : est[] ===========================================================*/ void optimal_estimation(float C[],float w[][MAXNBS],float est[], float est_mean[],int num_closet,int num_dep,char Method_name[],FILE *fp1, char dflag[]) { int kd,id; float tmp; for(kd=0;kdnum_dep, num_closet,ivA[][],deck->Method_name, RETURN : error[] ===========================================================*/ void error_estimate(float ivA[][MAXNBS+1],float C[],float error[], int num_closet,char Method_name[],FILE *fp1,char dflag[]) { int kd=0; /* keep in mind possibility of multiple error estimates */ int id,idd; float tmp1,tmp2,tmp0; /*Calculation of the variance of the error*/ tmp0=0; tmp1=0; tmp2=0; for(id=0;idnum_dep,est[kd],error[kd] RETURN : est[kd],error[kd] to outfile ===========================================================*/ void oa_output(float est[],float error[],int num_dep,FILE *fp2) { int kd; for(kd=0;kd\n\n"); /* printf("%s\n\n","For example :"); printf("%s\n","you can type one of the following commands:"); printf("%s\n\n","-------------------------------------------"); printf("%s\n\n","oax ./DATA/test "); printf("%s\n\n","oax ./DATA/test -d1 "); printf("%s\n\n","oax ./DATA/test -d2 "); printf("%s\n\n","oax ./DATA/test -d3 "); printf("%s\n\n","oax ./DATA/test -time "); */ printf("%s\n","where filename is the deck file name (without .deck extension)"); /* printf("%s\n","in this example the deckfile test.deck is stored in the "); */ printf("%s\n","path ./DATA. -d1, -d2 and -d3 are debuger options for "); printf("%s\n","providing you with more information about the program "); printf("%s\n","operation. -time option will tell you the timing of the"); printf("%s\n\n","progrm spent in executing the given task."); printf("%s\n\n"," Try Again ! "); break; case(1): printf("%s\n"," oa v5.1 , Sept. 1994"); fprintf(fp1,"\n\nThis file contains the detailed debug information"); fprintf(fp1,"\n------------------------------------------------"); fprintf(fp1,"\n%s\n","**************************************************"); fprintf(fp1,"%s\n"," Welcome to the Optimal Analysis Program "); fprintf(fp1," (Version 5.1 , Oct 2002)\n"); fprintf(fp1,"%s\n","**************************************************"); break; case(2): fprintf(fp1,"\nReading Parameter File %s\n", dflag); fprintf(fp1,"---------------------------------"); break; case(3): /*-- echo the search result title to output file --*/ fprintf(fp1,"\n\n%s\n","Nearest Neighbour Search and Optimal Estimation:"); fprintf(fp1,"%s","================================================"); break; case(4): printf("\nProgram abnormal termination ..."); printf("\nNearest neighbours must be a positive number!\n\n"); break; case(5): printf("\n !!!!*** Warning ***!!!!\n"); printf("Nearest neighbours are more than data points.\n"); printf("The number of nearest neighbours are reset\n"); printf(" to be equal to the number of data point!\n\n"); break; case(6): printf("\n !!!!*** File Reading Error ***!!!!\n"); printf("%s%s\n\n",dflag," has fewer columns than required."); break; } } void message1(int key, char dflag[], clock_t ctime0, int NDATA, FILE *fp1, int num_indep, int Bucket ) { float secs; long ticks; clock_t ctime1; switch(key) { case(7): if(strcmp(dflag,"-time")==0) { ctime1 = clock(); ticks = (long) ctime1-ctime0; fprintf(fp1,"elapsed system ticks = %10ld ticks\n",ticks); secs = (1.0*ticks)/CLOCKS_PER_SEC ; fprintf(fp1,"%s %6d %6d %6d %10.4f\n", "%CHECKDATA: NDATA, num_indep, Bucket, BUILDTREE(sec) ", NDATA,num_indep,Bucket,secs); printf("%s %10.4f %s\n", "elapsed system time in building kdtree", secs,"(sec)"); } break; case(8): if(strcmp(dflag,"-time")==0) { ctime1 = clock(); ticks = (long) ctime1-ctime0; fprintf(fp1,"elapsed system ticks = %10ld ticks\n",ticks); secs = (1.0*ticks)/CLOCKS_PER_SEC ; fprintf(fp1,"%s %6d %6d %6d %10.4f\n", "%LOADDATA: NDATA, num_indep, Bucket, SEARCH+OPTEST(sec)", NDATA,num_indep,Bucket,secs); printf("%s %10.4f %s\n\n", "elapsed system time in optimal estimation", secs,"(sec)"); } break; } } /*=========================================================== FUNCTION NAME: oa_end() PURPOSE: end the oa program with closing the opened files, echoing the number of grid read and sending the message back to the user. INPUT: ngp,out_name,log_name RETURN : message to screen ===========================================================*/ void oa_end(int ngp,struct Flname *name) { fprintf(name->fp1,"\n Number grid point read : %d\n",ngp); (void) fclose(name->fp1); (void) fclose(name->fp2); printf("%s%s%s\n","Normal Termination --> See File " ,name->out_name," for estimation result."); printf("%s%s%s\n"," --> See File " ,name->log_name," for debug information."); }