<?xml version="1.0" encoding="UTF-8" ?>
<table xmlns="http://query.yahooapis.com/v1/schema/table.xsd">
  <meta>
    <sampleQuery> select * from {table}</sampleQuery>
  </meta>
  <bindings>
    <select itemPath="birthdays.person" produces="XML">
      <urls>
        <url></url>
      </urls>
<!-- since we can't match paging to the underlying BOSS paging model precisely, we'll just claim we can do fixed start (0) with variable
     number of results -->
      <paging model="offset">
        <pagesize id="count" max="300" />
        <total default="10" />
      </paging>
      <inputs>
        <key id='date' type='xs:string' paramType='variable' />
      </inputs>
      <execute><![CDATA[

//object to query imdb to extract bio info for a person
var celebInfo = function(name,url) {
   this.url = url;
   this.name =  name;
   var querystring = "select * from html where url = '"+url+"' and xpath=\"//div[@id='tn15']\"";
   this.query = y.query(querystring);
}

//actually extract the info and return an xml object
celebInfo.prototype.getData=function() {
   default xml namespace ='';
    var d = this.query.results;
   var img = d..div.(@["id"]=="tn15lhs").div.a.img;
    var content = d..div.(@['id'] =="tn15content");
   var bio = "";
   //this is pretty hacky
   for each (var node in content.p) {
      if (node.text().toString().trim().length>100) {
         bio = node.*;
          break;
      }
   }
   var anchors = content.a;
   var bornInYear = null;
   var bornWhere = null;
   var diedInYear = null;
   var onThisDay = [];
   //TODO see if there is a wildcard way of pulling these out using e4x/xpath
   for each (var a in anchors) {
      var href = a.@['href'].toString();
      if (href.indexOf("/BornInYear")==0) {
         bornInYear = a.toString().trim();
         continue;
      }
      if (href.indexOf("/DiedInYear")==0) {
         diedInYear = a.toString().trim();
         continue;
      }
      if (href.indexOf("/BornWhere")==0) {
         bornWhere = a.toString().trim();
         continue;
      }
      if (href.indexOf("/OnThisDay")==0) {
         onThisDay.push(a.text().toString().trim());
         continue;
      }
   }
   var bornDayMonth=null;
   var diedDayMonth=null;
   if (onThisDay.length>0) {
      bornDayMonth = onThisDay[0].replace(/^\s*(\d{1,2})[\s]+(\w+)\s*/,'$1 $2'); //tidy up whitespace around text
      if (diedInYear && onThisDay.length>1) {
         diedDayMonth= onThisDay[1].replace(/^\s*(\d{1,2})[\s]+(\w+)\s*/,'$1 $2'); //tidy up whitespace around text
      }
   }
   var url = this.url;
   var name = this.name;
   var bornTime = null;
   if (bornDayMonth) {
      var daymonth = bornDayMonth.split(" ");
      bornTime=new Date(bornInYear,Date.getMonthFromString(daymonth[1]),parseInt(daymonth[0])).getTime()/1000;
   }
   var diedTime = null;
   if (diedDayMonth) {
      var daymonth = diedDayMonth.split(" ");
      diedTime=new Date(diedInYear,Date.getMonthFromString(daymonth[1]),parseInt(daymonth[0])).getTime()/1000;
   }
   var person = <person url={url}><name>{name}</name>{img}<born utime={bornTime}>{bornDayMonth} {bornInYear}</born></person>;
   if (diedTime) person.person+=<died utime={diedTime}>{diedDayMonth} {diedInYear}</died>;
   if (bio) person.person+=<bio>{bio}</bio>;
   return person;
}

//general useful routines
String.prototype.trim =function() {
   return this.replace(/^[\s]*/,'').replace(/[\s]*$/,'');
}
Date.getMonthFromString = function(month) {
   return {'January':0, 'February':1, 'March':2, 'April':3, 'May':4, 'June':5, 'July':6, 'August':7, 'September':8, 'October':9, 'November':10, 'December':11}[month];
}
Date.prototype.getMonthName = function() {
   return ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'][this.getMonth()];
}

//the main object that uses boss to get the list (also gets peoples "death" days too)
celebSearch = function(when,start,count) {
   //search yahoo/boss using the current day and month only on bio pages on imdb
   var bornDayMonth = when.getDate()+" "+when.getMonthName();
   var ud = Math.round(when.getTime()/1000);
   var search = 'site:www.imdb.com "Date of birth" "'+bornDayMonth+'" title:biography'
   var query = "select * from search.web("+start+","+count+") where query='"+search+"'";
   var celebs = y.query(query).results;

   //go through each result and start to get the persons name and their imdb info page out
   var results = [];
   default xml namespace ='http://www.inktomi.com/'; //make sure our e4x is in the right namespace. IMPORTANT
   for each (var celeb in celebs.result) {
      //discard any hits on the date of death that also match in our yahoo search
      //(this is going to hurt our paging)
      if (celeb["abstract"].toString().indexOf("<b>Date of Birth</b>. <b>"+bornDayMonth)<0) continue;
      var j = celeb.title.toString().indexOf("-");   //use text up to "dash" from title for name
      var name = celeb.title.toString().substring(0,j).trim();
      //start parsing these entries by pulling from imdb directly
      results.push(new celebInfo(name,celeb.url));
   }

   //loop through each imdb fetch result, and create the result object
   default xml namespace = '';
   var data = <birthdays utime={ud} date={when} />;
   for each (var celeb in results) {
      data.birthdays+=celeb.getData();
   }
   return data;
}

//run it for today if no date was provided
var when = new Date();
if (date && date.length>0) {
   when = new Date(date); //TODO needs a well formed date including year
}
response.object = new celebSearch(when,0,count);

      ]]></execute>
    </select>
  </bindings>
</table>