LinkScheduleEstimator.cc

Go to the documentation of this file.
00001 /*
00002  *    Copyright 2005-2006 Intel Corporation
00003  * 
00004  *    Licensed under the Apache License, Version 2.0 (the "License");
00005  *    you may not use this file except in compliance with the License.
00006  *    You may obtain a copy of the License at
00007  * 
00008  *        http://www.apache.org/licenses/LICENSE-2.0
00009  * 
00010  *    Unless required by applicable law or agreed to in writing, software
00011  *    distributed under the License is distributed on an "AS IS" BASIS,
00012  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  *    See the License for the specific language governing permissions and
00014  *    limitations under the License.
00015  */
00016 
00017 
00018 
00019 #include<stdio.h>
00020 #include<string.h>
00021 #include<stdlib.h>
00022 #include<assert.h>
00023 
00024 #include "LinkScheduleEstimator.h"
00025 
00026 namespace dtn {
00027 
00028 // dist holds a temporary distance array for the dynamic programming algorithm
00029 unsigned int **dist;
00030 
00031 LinkScheduleEstimator::LinkScheduleEstimator() 
00032     : Logger("LinkScheduleEstimator", "/dtn/route/linkstate/estimator")
00033 {}  
00034 
00035 /*
00036  *  compute the distance between two log entries.
00037  *  a_offset, b_offset is used for comparing subsequences
00038  *  at different places in the log.
00039  *
00040  *  Distances larger than the window size are given the cost of the
00041  *  minimum duration smaller distances just use the actual distance.
00042  */
00043 unsigned int 
00044 LinkScheduleEstimator::entry_dist(Log &a, unsigned int a_index, unsigned int a_offset,
00045                                   Log &b, unsigned int b_index, unsigned int b_offset,
00046                                   unsigned int warping_window)
00047 {
00048     unsigned int diff=absdiff(a[a_index].start-a_offset,b[b_index].start-b_offset);
00049     unsigned int minduration=oasys::min(a[a_index].duration, b[b_index].duration);
00050     if(diff > warping_window)
00051         return minduration;
00052     else return oasys::min(diff, minduration);
00053 }
00054 
00055 /*
00056  *  Recursive internal function of log_dist.
00057  */
00058 unsigned int 
00059 LinkScheduleEstimator::log_dist_r(Log &a, unsigned int a_index, unsigned int a_offset,
00060                                   Log &b, unsigned int b_index, unsigned int b_offset,
00061                                   unsigned int warping_window)
00062 {
00063     unsigned int mindist;
00064 
00065     // can't compute values outside the boundaries.
00066     //XXX/jakob - why did we get here anyway?
00067     // XXX/demmer this is a bogus compariosn
00068     //if(a_index<0 || b_index<0) return MAX_DIST;
00069 
00070     // if we've already computed the dist, return it
00071     if(dist[a_index][b_index]<MAX_DIST)
00072         return dist[a_index][b_index];
00073 
00074     // no use going outside boundaries
00075     else if(a_index==0)
00076         mindist=log_dist_r(a,a_index,a_offset,
00077                            b,b_index-1,b_offset,
00078                            warping_window);
00079     // no use going outside boundaries
00080     else if(b_index==0)
00081         mindist=log_dist_r(a,a_index-1,a_offset,
00082                            b,b_index,b_offset,
00083                            warping_window);
00084     // find the minimum cost neighboring cell
00085     else {
00086         mindist = oasys::min(oasys::min(log_dist_r(a,a_index-1,a_offset,
00087                                                    b,b_index-1,b_offset,warping_window),
00088                                         log_dist_r(a,a_index-1,a_offset,
00089                                                    b,b_index,b_offset,warping_window)),
00090                              log_dist_r(a,a_index,a_offset,
00091                                         b,b_index-1,b_offset,warping_window));
00092     }
00093 
00094     unsigned int entrydist=entry_dist(a,a_index,a_offset,
00095                              b,b_index,b_offset,
00096                              warping_window);
00097 
00098     dist[a_index][b_index]=entrydist+mindist;
00099 
00100     return dist[a_index][b_index];
00101 }
00102 
00103 /*
00104  *  determines the distance between logs a and b using dynamic programming.
00105  *
00106  */
00107 unsigned int 
00108 LinkScheduleEstimator::log_dist(Log &a, unsigned int a_offset,
00109                                 Log &b, unsigned int b_offset,
00110                                 unsigned int warping_window, int print_table)
00111 {
00112     /*
00113      * Initialize a cost matrix for the DP algorithm
00114      */
00115     dist=(unsigned int**)malloc(a.size()*sizeof(unsigned int));
00116     for(unsigned int i=0;i<a.size();i++)
00117     {
00118         dist[i]=(unsigned int*)malloc(b.size()*sizeof(unsigned int));
00119 
00120         for(unsigned int j=0;j<b.size();j++)
00121             dist[i][j]=MAX_DIST;
00122     }
00123 
00124     dist[0][0]=entry_dist(a,0,a_offset,b,0,b_offset,warping_window);
00125 
00126     /*
00127      * Call inner function for progressively larger parts of the array.
00128      * Goes from 0,0 to n,n along the diagonal.
00129      */
00130     unsigned int d;
00131     for(unsigned int i=0;i<oasys::min(a.size(),b.size());i++)
00132         d=log_dist_r(a,i,a_offset,b,i,b_offset,warping_window);
00133 
00134     // compute the actual distance with a final call
00135     d=log_dist_r(a,a.size()-1,a_offset,
00136                  b,b.size()-1,b_offset,
00137                  warping_window);
00138 
00139     if(print_table)
00140     {
00141         for(unsigned int i=0;i<a.size();i++)
00142         {
00143             log_debug("%d\t| ",a[a.size()-i-1].start-a_offset);
00144             for(unsigned int j=0;j<b.size();j++)
00145             {
00146                 log_debug("%d\t",dist[a.size()-i-1][j]);
00147             }
00148             log_debug("\n");
00149         }
00150         log_debug("---------------------------------------------------\n\t ");
00151         for(unsigned int i=0;i<b.size();i++)
00152             log_debug("%d\t| ",b[i].start-b_offset);
00153         log_debug("\n\t ");
00154         for(unsigned int i=0;i<b.size();i++)
00155             log_debug("%d\t| ",b[i].duration);
00156         log_debug("\n\n");
00157     }
00158 
00159     free(dist);
00160 
00161     return d;
00162 }
00163 
00164 /*
00165  * Computes the distance between l and l shifted phase time
00166  * steps. This is actually the inverse autocorrelation function...
00167  */
00168 
00169 unsigned int 
00170 LinkScheduleEstimator::autocorrelation(Log &log, unsigned int phase, int print_table)
00171 {
00172     Log clone(log.size());
00173     for(unsigned int i=phase;i<log.size();i++) {
00174         clone[i-phase].start=log[i].start-log[phase].start;
00175         clone[i-phase].duration=log[i].duration;
00176     }
00177 
00178     for(unsigned int i=0;i<phase;i++) {
00179         clone[i+(log.size()-phase)].start=
00180             log[i].start+log[log.size()-1].start-log[phase-1].start;
00181         clone[i+(log.size()-phase)].duration=log[i].duration;
00182     }
00183 
00184     unsigned int d = log_dist(log, log[0].start, // a_offset
00185                      clone, clone[0].start,
00186                      (int)((log[log.size()].start+
00187                             log[log.size()].duration)*WARPING_WINDOW),
00188                               print_table);
00189 
00190 
00191     return d;
00192 }
00193 
00194 void 
00195 LinkScheduleEstimator::print_log(Log &log, int relative_dates)
00196 {
00197     unsigned int offset=0;
00198     if(relative_dates)
00199         offset=log[0].start;
00200 
00201     for(unsigned int i=0;i<log.size();i++) {
00202         log_debug("(%d, %d)",log[i].start-offset,log[i].duration);
00203         if(i<log.size()-1)
00204             log_debug(", ");
00205     }
00206     log_debug("\n");
00207 }
00208 
00209 
00210 /*
00211  *  Generates a randomized periodic log given a schedule and a jitter level.
00212  *
00213  */
00214 
00215 LinkScheduleEstimator::Log* LinkScheduleEstimator::generate_samples(Log &schedule,
00216                                         unsigned int log_size,
00217                                         unsigned int start_jitter,
00218                                         double duration_jitter)
00219 {
00220     Log *output = new Log(log_size);
00221     unsigned int schedule_index = 0;
00222     unsigned int start_time_offset = 0;
00223     for(unsigned int i=0;i<log_size;i++)
00224     {
00225         /*
00226          * The last schedule entry is assumed to be identical to the
00227          * first entry
00228          */
00229 
00230         if(schedule_index == schedule.size() - 1) {
00231             start_time_offset = start_time_offset +
00232                                 schedule[schedule.size()-1].start +
00233                                 schedule[schedule.size()-1].duration;
00234             schedule_index = 0;
00235         }
00236 
00237         (*output)[i].start = (unsigned int) oasys::max((unsigned int)0,(start_time_offset +
00238                                                           schedule[schedule_index].start -
00239                                                           start_jitter / 2 +
00240                                                           (unsigned int)(random() % start_jitter)));
00241 
00242         unsigned int duration_jitter_abs = (int)(duration_jitter*
00243                                         schedule[schedule_index].duration);
00244 
00245         (*output)[i].duration=schedule[schedule_index].duration -
00246                               duration_jitter_abs / 2 +
00247                               random() % duration_jitter_abs;
00248         schedule_index++;
00249     }
00250 
00251     return output;
00252 }
00253 
00254 
00255 /*
00256  * Calculate the (inverse) autocorrelation function for the log, then
00257  * look for the deepest valley.  If there second smallest value is
00258  * 2*period away, we have found our period.
00259  *
00260  * This is a pretty simple heuristic, should be possible to do better.
00261  */
00262 unsigned int 
00263 LinkScheduleEstimator::estimate_period(Log &log)
00264 {
00265     int* autoc=(int*)malloc(log.size()*sizeof(int));
00266     for(unsigned int i=1;i<log.size();i++) {
00267         autoc[i]=autocorrelation(log,i,0);
00268     }
00269 
00270     // first find the best autocorrelation period
00271     unsigned int candidate=1;
00272     for(unsigned int i=1;i<log.size();i++)
00273         if(autoc[i]<autoc[candidate])
00274             candidate=i;
00275 
00276     unsigned int candidate2=1;
00277     for(unsigned int i=1;i<log.size();i++)
00278         if(i!=candidate && autoc[i]<autoc[candidate2])
00279             candidate2=i;
00280 
00281 
00282     double should_be_2=log[candidate2].start/(double)log[candidate].start;
00283 
00284     if(absdiff(should_be_2,2)<2*PERIOD_TOLERANCE)
00285         return (int)(log[candidate2].start+log[candidate].start)/3;
00286     else
00287         return 0;
00288 }
00289 
00293 unsigned int 
00294 LinkScheduleEstimator::seek_to_before_date(Log &log, unsigned int date)
00295 {
00296     for( unsigned int i=0 ; i < log.size() ; i++ )
00297         if( log[i].start > date )
00298             return (unsigned int)oasys::max(0,(int)i-1);
00299     return 0;
00300 }
00301 
00305 unsigned int 
00306 LinkScheduleEstimator::closest_entry_to_date(Log &log, unsigned int date)
00307 {
00308     unsigned int i=seek_to_before_date(log, date);
00309 
00310     if(absdiff(log[i].start,date)<absdiff(log[i+1].start,date))
00311         return i;
00312     else
00313         return i+1;
00314 
00315     return 0;
00316 }
00317 
00318 
00319 LinkScheduleEstimator::Log* 
00320 LinkScheduleEstimator::clone_subsequence(Log &log, unsigned int start, unsigned int len)
00321 {
00322     Log *out = new Log(len);
00323     for(unsigned int i=0;i<len;i++) {
00324         (*out)[i].start=log[i+start].start;
00325         (*out)[i].duration=log[i+start].duration;
00326     }
00327 
00328     return out;
00329 }
00330 
00331 /*
00332  * Match the pattern against the log, one pattern_len at a time.
00333  * returns the sum of the distances.
00334  */
00335 unsigned int 
00336 LinkScheduleEstimator::badness_of_match(Log &pattern,
00337                                         Log &log,
00338                                         unsigned int warping_window, 
00339                                         unsigned int period)
00340 {
00341     unsigned int badness=0;
00342     for(unsigned int date=0; date<log[log.size()-pattern.size()].start; date+=period)
00343     {
00344         unsigned int start = closest_entry_to_date(log,date);
00345         Log* subsequence = clone_subsequence(log, start,
00346                                              closest_entry_to_date(log,date) -
00347                                              start+1);
00348 
00349         unsigned int change=log_dist(pattern,
00350                             pattern[0].start,
00351                             *subsequence,
00352                             date,
00353                             warping_window,
00354                             0);
00355 
00356         delete subsequence;
00357         badness+=change;
00358     }
00359     return badness;
00360 }
00361 
00362 
00363 /*
00364  *  Given a period estimate, determine the schedule that best fits the
00365  *  provided log.
00366  */
00367 LinkScheduleEstimator::Log* 
00368 LinkScheduleEstimator::extract_schedule(Log &log, unsigned int period_estimate)
00369 {
00370     Log* best_pattern=0;
00371     unsigned int best_pattern_badness=100000000;
00372 
00373     /*
00374        First find a canonical schedule approximation - the period that
00375        best matches all the other periods in this log. Once we have
00376        that, refine the period length and the schedule using all the
00377        other periods that match exactly.
00378     */
00379 
00380     // find the period that best matches the rest of the log
00381     for(unsigned int date=0;
00382         date<=(log[log.size()-1].start-period_estimate);
00383         date+=period_estimate)
00384     {
00385         unsigned int pattern_index=closest_entry_to_date(log,date);
00386         unsigned int pattern_len = closest_entry_to_date(log,date+period_estimate) -
00387                           pattern_index+1;
00388         Log* pattern = clone_subsequence(log, pattern_index, pattern_len);
00389 
00390         unsigned int badness=badness_of_match(*pattern,
00391                                      log,
00392                                      (int)(period_estimate*WARPING_WINDOW),
00393                                      period_estimate);
00394 
00395         if(badness < best_pattern_badness)
00396         {
00397             if(best_pattern)
00398                 delete best_pattern;
00399 
00400             best_pattern = pattern;
00401             best_pattern_badness = badness;
00402         }
00403         else delete pattern;
00404     }
00405 
00406     log_debug("And the best pattern is (%d): \n",best_pattern_badness);
00407     print_log(*best_pattern, 1);
00408 
00409     return new Log(*best_pattern);
00410 
00411 /*
00412       // This part computes an average schedule based on all the
00413       // subsequences matching the pattern.  apparently, it isn't
00414       // working as well as the "best pattern" thing is right now, so
00415       // it's commented until we can improve it.
00416 
00417     unsigned int dist;
00418     unsigned int count=0;
00419     for ( unsigned int date=0 ;
00420           date<=(log[log.size()-1].start-period_estimate);
00421           date+=period_estimate )
00422     {
00423         unsigned int pattern_index=closest_entry_to_date(l,log.size(),date);
00424         unsigned int pattern_len =
00425             closest_entry_to_date(l,log.size(),date+period_estimate) -
00426             pattern_index;
00427 
00428         if((pattern_len == best_pattern_len) &&
00429            !(dist=log_dist(best_pattern,,best_pattern_len,best_pattern[0].start,
00430                            &(log[pattern_index]), pattern_len, date,
00431                            (int)(period_estimate*WARPING_WINDOW),
00432                            0)))
00433         {
00434             log_debug("log at %d is good\n", date);
00435             count++;
00436 
00437             for(unsigned int i=0;i<pattern_len;i++) {
00438                 schedule[i].start+=log[pattern_index+i].start-date;
00439                 schedule[i].duration+=log[pattern_index+i].duration;
00440             }
00441         }
00442         else
00443             log_debug("tried date %d (%d-%d): %d\n",
00444                    date, pattern_index,pattern_len, dist);
00445 
00446     }
00447 
00448     for(unsigned int i=0;i<best_pattern_len;i++) {
00449         schedule[i].start/=count;
00450         schedule[i].duration/=count;
00451     }
00452 
00453     log_debug("Extracted schedule is\n");
00454     print_log(schedule,best_pattern_len,1);
00455 */
00456 }
00457 
00458 /*
00459  *  Given a period estimate, improve the estimate by fitting it
00460  *  against the entire log data.
00461  *
00462  *  XXX/jakob - if the last scheduled event doesn't occur in any given
00463  *  period, the refined period will be way off!  should check if the
00464  *  data makes sense before using it.
00465  */
00466 
00467 unsigned int 
00468 LinkScheduleEstimator::refine_period(LinkScheduleEstimator::Log &log, 
00469                                      unsigned int period_estimate)
00470 {
00471     unsigned int sum=0;
00472     int count=-1;
00473     int count2=0;
00474 
00475 
00476     for(unsigned int date=0 ; 
00477         date<=(log[log.size()-1].start-period_estimate) ; 
00478         date+=period_estimate)
00479     {
00480         unsigned int pattern_index=closest_entry_to_date(log,date);
00481         sum+=log[pattern_index].start;
00482         count++;
00483         count2+=count;
00484     }
00485 
00486     return sum/count2;
00487 }
00488 
00492 LinkScheduleEstimator::Log* 
00493 LinkScheduleEstimator::find_schedule(LinkScheduleEstimator::Log &log)
00494 {
00495     // find a first estimate of the period. If there is a period, this
00496     // will return a non-zero value.
00497 
00498     unsigned int period = estimate_period(log);
00499 
00500     if(period) {
00501         // now try to fit this period to the full log as closely as possible
00502         period = refine_period(log, period);
00503         // and then compute the best schedule for the given log and period
00504         return extract_schedule(log, period);
00505     }
00506     else return 0;
00507 }
00508 
00509 
00510 /*
00511 int main(int argc, char** argv)
00512 {
00513     Log f2(5);
00514     f2[0].start=0;
00515     f2[0].duration=1000;
00516 
00517     f2[1].start=4000;
00518     f2[1].duration=500;
00519 
00520     f2[2].start=9000;
00521     f2[2].duration=1000;
00522 
00523     f2[3].start=14000;
00524     f2[3].duration=2000;
00525 
00526     f2[4].start=19000;
00527     f2[4].duration=1000;
00528 
00529 //    log f1[]={CONTACT(0,1023),
00530 //              CONTACT(5006,1040),
00531 //             CONTACT(10150,1002),
00532 //              CONTACT(20040, 956),
00533 //              CONTACT(25200, 896),
00534 //              CONTACT(26150, 350),
00535 //              CONTACT(29682, 1098),
00536 //              CONTACT(39780, 1035),
00537 //              CONTACT(45000, 987),
00538 //              CONTACT(50045, 907),
00539 //              CONTACT(60340, 1055),
00540 //              CONTACT(65000, 987),
00541 //              CONTACT(70045, 907),
00542 //              CONTACT(80340, 1055)};
00543 
00544 
00545     Log* best_schedule=0;
00546     int  best_schedule_len;
00547     Log* schedule;
00548     int schedule_len;
00549 
00550     int log_len=100;
00551     Log *full_log=generate_samples(f2,log_len,1000,.1);
00552 
00553     int best_period=0;
00554     int best_cost=1000000;
00555 
00556     for(int current_event=1;current_event<log_len;current_event++)
00557     {
00558         Log *log=clone_subsequence(*full_log, 0, current_event+1);
00559         int period=estimate_period((*log));
00560         if(period)
00561         {
00562             log_debug("At time %d (%d), estimated period is %d.\n",
00563                    current_event,
00564                    (*log)[current_event-1].start,
00565                    period);
00566 
00567 
00568             period = refine_period((*log), period);
00569             log_debug("Period after refinement is %d\n",period);
00570 
00571             schedule=extract_schedule((*log), period);
00572 
00573             // XXX/jakob - this isn't fair. we're comparing the cost
00574             // of matching to a short interval to the cost of matching
00575             // to a long interval
00576             int cost=badness_of_match(*schedule,
00577                                       *log,
00578                                       (int)(period*WARPING_WINDOW),
00579                                       period);
00580 
00581             if(cost<best_cost) {
00582                 log_debug("Found a new best period %d and schedule at time %d, cost is %d.\n",
00583                        period,
00584                        (*log)[current_event].start,
00585                        cost);
00586                        if(best_schedule)
00587                        delete best_schedule;
00588 
00589                 best_schedule=schedule;
00590                 best_schedule_len=schedule_len;
00591                 best_period=period;
00592                 best_cost=cost;
00593             }
00594             else
00595                 delete schedule;
00596         }
00597 
00598         delete log;
00599     }
00600 
00601     log_debug("Best period %d, cost %d, schedule: ",best_period,best_cost);
00602     print_log(*best_schedule, 1);
00603 
00604     delete full_log;
00605 }
00606 
00607 */
00608 
00609 }

Generated on Thu Jun 7 12:54:28 2007 for DTN Reference Implementation by  doxygen 1.5.1