LinkScheduleEstimator.cc

Go to the documentation of this file.
00001 
00002 
00003 #include<stdio.h>
00004 #include<string.h>
00005 #include<stdlib.h>
00006 #include<assert.h>
00007 
00008 #include "LinkScheduleEstimator.h"
00009 
00010 namespace dtn {
00011 
00012 // dist holds a temporary distance array for the dynamic programming algorithm
00013 unsigned int **dist;
00014 
00015 LinkScheduleEstimator::LinkScheduleEstimator() 
00016     : Logger("LinkScheduleEstimator", "/dtn/route/linkstate/estimator")
00017 {}  
00018 
00019 /*
00020  *  compute the distance between two log entries.
00021  *  a_offset, b_offset is used for comparing subsequences
00022  *  at different places in the log.
00023  *
00024  *  Distances larger than the window size are given the cost of the
00025  *  minimum duration smaller distances just use the actual distance.
00026  */
00027 unsigned int 
00028 LinkScheduleEstimator::entry_dist(Log &a, unsigned int a_index, unsigned int a_offset,
00029                                   Log &b, unsigned int b_index, unsigned int b_offset,
00030                                   unsigned int warping_window)
00031 {
00032     unsigned int diff=absdiff(a[a_index].start-a_offset,b[b_index].start-b_offset);
00033     unsigned int minduration=oasys::min(a[a_index].duration, b[b_index].duration);
00034     if(diff > warping_window)
00035         return minduration;
00036     else return oasys::min(diff, minduration);
00037 }
00038 
00039 /*
00040  *  Recursive internal function of log_dist.
00041  */
00042 unsigned int 
00043 LinkScheduleEstimator::log_dist_r(Log &a, unsigned int a_index, unsigned int a_offset,
00044                                   Log &b, unsigned int b_index, unsigned int b_offset,
00045                                   unsigned int warping_window)
00046 {
00047     unsigned int mindist;
00048 
00049     // can't compute values outside the boundaries.
00050     //XXX/jakob - why did we get here anyway?
00051     // XXX/demmer this is a bogus compariosn
00052     //if(a_index<0 || b_index<0) return MAX_DIST;
00053 
00054     // if we've already computed the dist, return it
00055     if(dist[a_index][b_index]<MAX_DIST)
00056         return dist[a_index][b_index];
00057 
00058     // no use going outside boundaries
00059     else if(a_index==0)
00060         mindist=log_dist_r(a,a_index,a_offset,
00061                            b,b_index-1,b_offset,
00062                            warping_window);
00063     // no use going outside boundaries
00064     else if(b_index==0)
00065         mindist=log_dist_r(a,a_index-1,a_offset,
00066                            b,b_index,b_offset,
00067                            warping_window);
00068     // find the minimum cost neighboring cell
00069     else {
00070         mindist = oasys::min(oasys::min(log_dist_r(a,a_index-1,a_offset,
00071                                                    b,b_index-1,b_offset,warping_window),
00072                                         log_dist_r(a,a_index-1,a_offset,
00073                                                    b,b_index,b_offset,warping_window)),
00074                              log_dist_r(a,a_index,a_offset,
00075                                         b,b_index-1,b_offset,warping_window));
00076     }
00077 
00078     unsigned int entrydist=entry_dist(a,a_index,a_offset,
00079                              b,b_index,b_offset,
00080                              warping_window);
00081 
00082     dist[a_index][b_index]=entrydist+mindist;
00083 
00084     return dist[a_index][b_index];
00085 }
00086 
00087 /*
00088  *  determines the distance between logs a and b using dynamic programming.
00089  *
00090  */
00091 unsigned int 
00092 LinkScheduleEstimator::log_dist(Log &a, unsigned int a_offset,
00093                                 Log &b, unsigned int b_offset,
00094                                 unsigned int warping_window, int print_table)
00095 {
00096     /*
00097      * Initialize a cost matrix for the DP algorithm
00098      */
00099     dist=(unsigned int**)malloc(a.size()*sizeof(unsigned int));
00100     for(unsigned int i=0;i<a.size();i++)
00101     {
00102         dist[i]=(unsigned int*)malloc(b.size()*sizeof(unsigned int));
00103 
00104         for(unsigned int j=0;j<b.size();j++)
00105             dist[i][j]=MAX_DIST;
00106     }
00107 
00108     dist[0][0]=entry_dist(a,0,a_offset,b,0,b_offset,warping_window);
00109 
00110     /*
00111      * Call inner function for progressively larger parts of the array.
00112      * Goes from 0,0 to n,n along the diagonal.
00113      */
00114     unsigned int d;
00115     for(unsigned int i=0;i<oasys::min(a.size(),b.size());i++)
00116         d=log_dist_r(a,i,a_offset,b,i,b_offset,warping_window);
00117 
00118     // compute the actual distance with a final call
00119     d=log_dist_r(a,a.size()-1,a_offset,
00120                  b,b.size()-1,b_offset,
00121                  warping_window);
00122 
00123     if(print_table)
00124     {
00125         for(unsigned int i=0;i<a.size();i++)
00126         {
00127             log_debug("%d\t| ",a[a.size()-i-1].start-a_offset);
00128             for(unsigned int j=0;j<b.size();j++)
00129             {
00130                 log_debug("%d\t",dist[a.size()-i-1][j]);
00131             }
00132             log_debug("\n");
00133         }
00134         log_debug("---------------------------------------------------\n\t ");
00135         for(unsigned int i=0;i<b.size();i++)
00136             log_debug("%d\t| ",b[i].start-b_offset);
00137         log_debug("\n\t ");
00138         for(unsigned int i=0;i<b.size();i++)
00139             log_debug("%d\t| ",b[i].duration);
00140         log_debug("\n\n");
00141     }
00142 
00143     free(dist);
00144 
00145     return d;
00146 }
00147 
00148 /*
00149  * Computes the distance between l and l shifted phase time
00150  * steps. This is actually the inverse autocorrelation function...
00151  */
00152 
00153 unsigned int 
00154 LinkScheduleEstimator::autocorrelation(Log &log, unsigned int phase, int print_table)
00155 {
00156     Log clone(log.size());
00157     for(unsigned int i=phase;i<log.size();i++) {
00158         clone[i-phase].start=log[i].start-log[phase].start;
00159         clone[i-phase].duration=log[i].duration;
00160     }
00161 
00162     for(unsigned int i=0;i<phase;i++) {
00163         clone[i+(log.size()-phase)].start=
00164             log[i].start+log[log.size()-1].start-log[phase-1].start;
00165         clone[i+(log.size()-phase)].duration=log[i].duration;
00166     }
00167 
00168     unsigned int d = log_dist(log, log[0].start, // a_offset
00169                      clone, clone[0].start,
00170                      (int)((log[log.size()].start+
00171                             log[log.size()].duration)*WARPING_WINDOW),
00172                               print_table);
00173 
00174 
00175     return d;
00176 }
00177 
00178 void 
00179 LinkScheduleEstimator::print_log(Log &log, int relative_dates)
00180 {
00181     unsigned int offset=0;
00182     if(relative_dates)
00183         offset=log[0].start;
00184 
00185     for(unsigned int i=0;i<log.size();i++) {
00186         log_debug("(%d, %d)",log[i].start-offset,log[i].duration);
00187         if(i<log.size()-1)
00188             log_debug(", ");
00189     }
00190     log_debug("\n");
00191 }
00192 
00193 
00194 /*
00195  *  Generates a randomized periodic log given a schedule and a jitter level.
00196  *
00197  */
00198 
00199 LinkScheduleEstimator::Log* LinkScheduleEstimator::generate_samples(Log &schedule,
00200                                         unsigned int log_size,
00201                                         unsigned int start_jitter,
00202                                         double duration_jitter)
00203 {
00204     Log *output = new Log(log_size);
00205     unsigned int schedule_index = 0;
00206     unsigned int start_time_offset = 0;
00207     for(unsigned int i=0;i<log_size;i++)
00208     {
00209         /*
00210          * The last schedule entry is assumed to be identical to the
00211          * first entry
00212          */
00213 
00214         if(schedule_index == schedule.size() - 1) {
00215             start_time_offset = start_time_offset +
00216                                 schedule[schedule.size()-1].start +
00217                                 schedule[schedule.size()-1].duration;
00218             schedule_index = 0;
00219         }
00220 
00221         (*output)[i].start = (unsigned int) oasys::max((unsigned int)0,(start_time_offset +
00222                                                           schedule[schedule_index].start -
00223                                                           start_jitter / 2 +
00224                                                           (unsigned int)(random() % start_jitter)));
00225 
00226         unsigned int duration_jitter_abs = (int)(duration_jitter*
00227                                         schedule[schedule_index].duration);
00228 
00229         (*output)[i].duration=schedule[schedule_index].duration -
00230                               duration_jitter_abs / 2 +
00231                               random() % duration_jitter_abs;
00232         schedule_index++;
00233     }
00234 
00235     return output;
00236 }
00237 
00238 
00239 /*
00240  * Calculate the (inverse) autocorrelation function for the log, then
00241  * look for the deepest valley.  If there second smallest value is
00242  * 2*period away, we have found our period.
00243  *
00244  * This is a pretty simple heuristic, should be possible to do better.
00245  */
00246 unsigned int 
00247 LinkScheduleEstimator::estimate_period(Log &log)
00248 {
00249     int* autoc=(int*)malloc(log.size()*sizeof(int));
00250     for(unsigned int i=1;i<log.size();i++) {
00251         autoc[i]=autocorrelation(log,i,0);
00252     }
00253 
00254     // first find the best autocorrelation period
00255     unsigned int candidate=1;
00256     for(unsigned int i=1;i<log.size();i++)
00257         if(autoc[i]<autoc[candidate])
00258             candidate=i;
00259 
00260     unsigned int candidate2=1;
00261     for(unsigned int i=1;i<log.size();i++)
00262         if(i!=candidate && autoc[i]<autoc[candidate2])
00263             candidate2=i;
00264 
00265 
00266     double should_be_2=log[candidate2].start/(double)log[candidate].start;
00267 
00268     if(absdiff(should_be_2,2)<2*PERIOD_TOLERANCE)
00269         return (int)(log[candidate2].start+log[candidate].start)/3;
00270     else
00271         return 0;
00272 }
00273 
00277 unsigned int 
00278 LinkScheduleEstimator::seek_to_before_date(Log &log, unsigned int date)
00279 {
00280     for( unsigned int i=0 ; i < log.size() ; i++ )
00281         if( log[i].start > date )
00282             return (unsigned int)oasys::max(0,(int)i-1);
00283     return 0;
00284 }
00285 
00289 unsigned int 
00290 LinkScheduleEstimator::closest_entry_to_date(Log &log, unsigned int date)
00291 {
00292     unsigned int i=seek_to_before_date(log, date);
00293 
00294     if(absdiff(log[i].start,date)<absdiff(log[i+1].start,date))
00295         return i;
00296     else
00297         return i+1;
00298 
00299     return 0;
00300 }
00301 
00302 
00303 LinkScheduleEstimator::Log* 
00304 LinkScheduleEstimator::clone_subsequence(Log &log, unsigned int start, unsigned int len)
00305 {
00306     Log *out = new Log(len);
00307     for(unsigned int i=0;i<len;i++) {
00308         (*out)[i].start=log[i+start].start;
00309         (*out)[i].duration=log[i+start].duration;
00310     }
00311 
00312     return out;
00313 }
00314 
00315 /*
00316  * Match the pattern against the log, one pattern_len at a time.
00317  * returns the sum of the distances.
00318  */
00319 unsigned int 
00320 LinkScheduleEstimator::badness_of_match(Log &pattern,
00321                                         Log &log,
00322                                         unsigned int warping_window, 
00323                                         unsigned int period)
00324 {
00325     unsigned int badness=0;
00326     for(unsigned int date=0; date<log[log.size()-pattern.size()].start; date+=period)
00327     {
00328         unsigned int start = closest_entry_to_date(log,date);
00329         Log* subsequence = clone_subsequence(log, start,
00330                                              closest_entry_to_date(log,date) -
00331                                              start+1);
00332 
00333         unsigned int change=log_dist(pattern,
00334                             pattern[0].start,
00335                             *subsequence,
00336                             date,
00337                             warping_window,
00338                             0);
00339 
00340         delete subsequence;
00341         badness+=change;
00342     }
00343     return badness;
00344 }
00345 
00346 
00347 /*
00348  *  Given a period estimate, determine the schedule that best fits the
00349  *  provided log.
00350  */
00351 LinkScheduleEstimator::Log* 
00352 LinkScheduleEstimator::extract_schedule(Log &log, unsigned int period_estimate)
00353 {
00354     Log* best_pattern=0;
00355     unsigned int best_pattern_badness=100000000;
00356 
00357     /*
00358        First find a canonical schedule approximation - the period that
00359        best matches all the other periods in this log. Once we have
00360        that, refine the period length and the schedule using all the
00361        other periods that match exactly.
00362     */
00363 
00364     // find the period that best matches the rest of the log
00365     for(unsigned int date=0;
00366         date<=(log[log.size()-1].start-period_estimate);
00367         date+=period_estimate)
00368     {
00369         unsigned int pattern_index=closest_entry_to_date(log,date);
00370         unsigned int pattern_len = closest_entry_to_date(log,date+period_estimate) -
00371                           pattern_index+1;
00372         Log* pattern = clone_subsequence(log, pattern_index, pattern_len);
00373 
00374         unsigned int badness=badness_of_match(*pattern,
00375                                      log,
00376                                      (int)(period_estimate*WARPING_WINDOW),
00377                                      period_estimate);
00378 
00379         if(badness < best_pattern_badness)
00380         {
00381             if(best_pattern)
00382                 delete best_pattern;
00383 
00384             best_pattern = pattern;
00385             best_pattern_badness = badness;
00386         }
00387         else delete pattern;
00388     }
00389 
00390     log_debug("And the best pattern is (%d): \n",best_pattern_badness);
00391     print_log(*best_pattern, 1);
00392 
00393     return new Log(*best_pattern);
00394 
00395 /*
00396       // This part computes an average schedule based on all the
00397       // subsequences matching the pattern.  apparently, it isn't
00398       // working as well as the "best pattern" thing is right now, so
00399       // it's commented until we can improve it.
00400 
00401     unsigned int dist;
00402     unsigned int count=0;
00403     for ( unsigned int date=0 ;
00404           date<=(log[log.size()-1].start-period_estimate);
00405           date+=period_estimate )
00406     {
00407         unsigned int pattern_index=closest_entry_to_date(l,log.size(),date);
00408         unsigned int pattern_len =
00409             closest_entry_to_date(l,log.size(),date+period_estimate) -
00410             pattern_index;
00411 
00412         if((pattern_len == best_pattern_len) &&
00413            !(dist=log_dist(best_pattern,,best_pattern_len,best_pattern[0].start,
00414                            &(log[pattern_index]), pattern_len, date,
00415                            (int)(period_estimate*WARPING_WINDOW),
00416                            0)))
00417         {
00418             log_debug("log at %d is good\n", date);
00419             count++;
00420 
00421             for(unsigned int i=0;i<pattern_len;i++) {
00422                 schedule[i].start+=log[pattern_index+i].start-date;
00423                 schedule[i].duration+=log[pattern_index+i].duration;
00424             }
00425         }
00426         else
00427             log_debug("tried date %d (%d-%d): %d\n",
00428                    date, pattern_index,pattern_len, dist);
00429 
00430     }
00431 
00432     for(unsigned int i=0;i<best_pattern_len;i++) {
00433         schedule[i].start/=count;
00434         schedule[i].duration/=count;
00435     }
00436 
00437     log_debug("Extracted schedule is\n");
00438     print_log(schedule,best_pattern_len,1);
00439 */
00440 }
00441 
00442 /*
00443  *  Given a period estimate, improve the estimate by fitting it
00444  *  against the entire log data.
00445  *
00446  *  XXX/jakob - if the last scheduled event doesn't occur in any given
00447  *  period, the refined period will be way off!  should check if the
00448  *  data makes sense before using it.
00449  */
00450 
00451 unsigned int 
00452 LinkScheduleEstimator::refine_period(LinkScheduleEstimator::Log &log, 
00453                                      unsigned int period_estimate)
00454 {
00455     unsigned int sum=0;
00456     int count=-1;
00457     int count2=0;
00458 
00459 
00460     for(unsigned int date=0 ; 
00461         date<=(log[log.size()-1].start-period_estimate) ; 
00462         date+=period_estimate)
00463     {
00464         unsigned int pattern_index=closest_entry_to_date(log,date);
00465         sum+=log[pattern_index].start;
00466         count++;
00467         count2+=count;
00468     }
00469 
00470     return sum/count2;
00471 }
00472 
00476 LinkScheduleEstimator::Log* 
00477 LinkScheduleEstimator::find_schedule(LinkScheduleEstimator::Log &log)
00478 {
00479     // find a first estimate of the period. If there is a period, this
00480     // will return a non-zero value.
00481 
00482     unsigned int period = estimate_period(log);
00483 
00484     if(period) {
00485         // now try to fit this period to the full log as closely as possible
00486         period = refine_period(log, period);
00487         // and then compute the best schedule for the given log and period
00488         return extract_schedule(log, period);
00489     }
00490     else return 0;
00491 }
00492 
00493 
00494 /*
00495 int main(int argc, char** argv)
00496 {
00497     Log f2(5);
00498     f2[0].start=0;
00499     f2[0].duration=1000;
00500 
00501     f2[1].start=4000;
00502     f2[1].duration=500;
00503 
00504     f2[2].start=9000;
00505     f2[2].duration=1000;
00506 
00507     f2[3].start=14000;
00508     f2[3].duration=2000;
00509 
00510     f2[4].start=19000;
00511     f2[4].duration=1000;
00512 
00513 //    log f1[]={CONTACT(0,1023),
00514 //              CONTACT(5006,1040),
00515 //             CONTACT(10150,1002),
00516 //              CONTACT(20040, 956),
00517 //              CONTACT(25200, 896),
00518 //              CONTACT(26150, 350),
00519 //              CONTACT(29682, 1098),
00520 //              CONTACT(39780, 1035),
00521 //              CONTACT(45000, 987),
00522 //              CONTACT(50045, 907),
00523 //              CONTACT(60340, 1055),
00524 //              CONTACT(65000, 987),
00525 //              CONTACT(70045, 907),
00526 //              CONTACT(80340, 1055)};
00527 
00528 
00529     Log* best_schedule=0;
00530     int  best_schedule_len;
00531     Log* schedule;
00532     int schedule_len;
00533 
00534     int log_len=100;
00535     Log *full_log=generate_samples(f2,log_len,1000,.1);
00536 
00537     int best_period=0;
00538     int best_cost=1000000;
00539 
00540     for(int current_event=1;current_event<log_len;current_event++)
00541     {
00542         Log *log=clone_subsequence(*full_log, 0, current_event+1);
00543         int period=estimate_period((*log));
00544         if(period)
00545         {
00546             log_debug("At time %d (%d), estimated period is %d.\n",
00547                    current_event,
00548                    (*log)[current_event-1].start,
00549                    period);
00550 
00551 
00552             period = refine_period((*log), period);
00553             log_debug("Period after refinement is %d\n",period);
00554 
00555             schedule=extract_schedule((*log), period);
00556 
00557             // XXX/jakob - this isn't fair. we're comparing the cost
00558             // of matching to a short interval to the cost of matching
00559             // to a long interval
00560             int cost=badness_of_match(*schedule,
00561                                       *log,
00562                                       (int)(period*WARPING_WINDOW),
00563                                       period);
00564 
00565             if(cost<best_cost) {
00566                 log_debug("Found a new best period %d and schedule at time %d, cost is %d.\n",
00567                        period,
00568                        (*log)[current_event].start,
00569                        cost);
00570                        if(best_schedule)
00571                        delete best_schedule;
00572 
00573                 best_schedule=schedule;
00574                 best_schedule_len=schedule_len;
00575                 best_period=period;
00576                 best_cost=cost;
00577             }
00578             else
00579                 delete schedule;
00580         }
00581 
00582         delete log;
00583     }
00584 
00585     log_debug("Best period %d, cost %d, schedule: ",best_period,best_cost);
00586     print_log(*best_schedule, 1);
00587 
00588     delete full_log;
00589 }
00590 
00591 */
00592 
00593 }

Generated on Fri Dec 22 14:47:59 2006 for DTN Reference Implementation by  doxygen 1.5.1