/* Modified from: https://github.com/ucscXena/kaplan-meier */ var pluck, uniq, sortBy, groupBy, last, find; pluck = _.pluck; uniq = _.uniq; sortBy = _.sortBy; groupBy = _.groupBy; last = _.last; find = _.find; // jStat methods annoyingly demote types if they have length one. This makes // them fail to compose with other methods. Here we re-assert the proper types // for multiply and transpose. function multiply(a, b) { var r = jStat.multiply(a, b); return r.length ? r : [[r]]; } function transpose(a) { var r = jStat.transpose(a); return r[0].length ? r : [r]; } // Patch for bug, in case we need it. Affects reuse of matrices that // are passed to inv, aug, gauss_jordan. // https://github.com/jstat/jstat/issues/167 //jStat.aug = (a, b) => a.map((row, i) => row.concat(b[i])); // Compute at-risk, exiting, and deaths for each time t_i, from // a list of events. // tte: [number, ...] // ev: [boolean, ...] // returns: [{n, e, d, t}, ...] function timeTable(tte, ev) { var exits = sortBy(tte.map(function(x, i) { return {tte: x, ev: ev[i]}; }), 'tte'), // sort and collate uexits = uniq(pluck(exits, 'tte'), true), // unique tte gexits = groupBy(exits, function(x) { return x.tte; }); // group by common time of exit return uexits.reduce(function (a, tte) { // compute d_i, n_i for times t_i (including censor times) var group = gexits[tte], l = last(a) || {n: exits.length, e: 0}, events = group.filter(function(x) { return x.ev; }); a.push({ n: l.n - l.e, // at risk e: group.length, // number exiting d: events.length, // number events (death) t: group[0].tte // time }); return a; }, []); } // kaplan-meier // See http://en.wikipedia.org/wiki/Kaplan%E2%80%93Meier_estimator // // tte time to exit (event or censor) // ev is truthy if there is an event. function compute(tte, ev) { var dini = timeTable(tte, ev); // s : the survival probability from t=0 to the particular time (i.e. the // end of the time interval) // rate : the chance of an event happened within the time interval (as in t // and the previous t with an event) return dini.reduce(function (a, dn) { // survival at each t_i (including censor times) var l = last(a) || { s: 1 }; if (dn.d) { // there were events at this t_i a.push({t: dn.t, e: true, s: l.s * (1 - dn.d / dn.n), n: dn.n, d: dn.d, rate: dn.d / dn.n}); } else { // only censors a.push({t: dn.t, e: false, s: l.s, n: dn.n, d: dn.d, rate: null}); } return a; }, []); } //log-rank test of the difference between KM plots // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3059453/ // a good article to understand KM and comparing KM plots using log-rank test, // they used the pearson chisquared test to compute test statistics // sum of (O-E)^2/E // http://oto.sagepub.com/content/143/3/331.long // a good article to understand KM and comparing KM plots using log-rank test and hazardous ratio test // they also used the pearson chisquared test to compute test statistics // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC403858/ // introduce pearson chi-square to compute logrank statistics, however mentioned there is another way // http://ssp.unl.edu/Log%20Rank%20Test%20For%20More%20Than%202%20Groups.pdf // gives basic idea of the "other" way R seems to use the "other" way // (O-E)^2/V V is variance for two groups and covariance for multiple groups // https://cran.r-project.org/web/packages/survival/survival.pdf // R use (O-E)^2/V V is variance for two groups and covariance for multiple groups //https://github.com/CamDavidsonPilon/lifelines/blob/master/lifelines/statistics.py //python implementation, identical results to R // covariance calculation // https://books.google.com/books?id=nPkjIEVY-CsC&pg=PA451&lpg=PA451&dq=multivariate+hypergeometric+distribution+covariance&source=bl&ots=yoieGfA4bu&sig=dhRcSYKcYiqLXBPZWOaqzciViMs&hl=en&sa=X&ved=0CEQQ6AEwBmoVChMIkqbU09SuyAIVgimICh0J3w1x#v=onepage&q=multivariate%20hypergeometric%20distribution%20covariance&f=false //https://plot.ly/ipython-notebooks/survival-analysis-r-vs-python/#Using-R // R online tutorial // chisquare distribution at // https://github.com/jstat/jstat/blob/master/src/distribution.js // testing jStat accuracy: http://www.socscistatistics.com/pvalues/chidistribution.aspx // p value = 1- jStat.chisquare.cdf(x, dof ); -- x is chisquare statistics, dof is degree of freedom // for comparing two plots, the dof is n-1 = 1, comparing three plots dof = n-1 = 2 // given a theoretical survival curve (si), and tte + ev ( tte and ev is the data ), // compute the expected total number of events // report observed n events, expected n events. pearson's chi-square component (O-E)^2/E function expectedObservedEventNumber(si, tte, ev) { var data = timeTable(tte, ev), expectedNumber, observedNumber, dataByTimeTable = []; si = si.filter(function(item) { return item.e; }); expectedNumber = si.reduce(function (memo, item) { var pointerInData = find(data, function(x) { return x.t >= item.t; }); if (pointerInData) { var expected = pointerInData.n * item.rate; dataByTimeTable.push(pointerInData); return memo + expected; } else { return memo; } }, 0); observedNumber = ev.filter(function(x) { return x; }).length; return { expected: expectedNumber, observed: observedNumber, dataByTimeTable: dataByTimeTable, timeNumber: dataByTimeTable.length }; } function covariance(allGroupsRes, OETable) { var vv = jStat.zeros(OETable.length), i, j, //groups t, //timeIndex N, //total number of samples Ki, Kj, // at risk number from each group n; //total observed for (i = 0; i < OETable.length; i++) { for (j = i; j < OETable.length; j++) { for (t = 0; t < allGroupsRes.length; t++) { N = allGroupsRes[t].n; n = allGroupsRes[t].d; if (t < OETable[i].timeNumber && t < OETable[j].timeNumber) { Ki = OETable[i].dataByTimeTable[t].n; Kj = OETable[j].dataByTimeTable[t].n; // https://books.google.com/books?id=nPkjIEVY-CsC&pg=PA451&lpg=PA451&dq=multivariate+hypergeometric+distribution+covariance&source=bl&ots=yoieGfA4bu&sig=dhRcSYKcYiqLXBPZWOaqzciViMs&hl=en&sa=X&ved=0CEQQ6AEwBmoVChMIkqbU09SuyAIVgimICh0J3w1x#v=onepage&q=multivariate%20hypergeometric%20distribution%20covariance&f=false // when N==1: only 1 subject, no variance if (i !== j && N !== 1) { vv[i][j] -= n * Ki * Kj * (N - n) / (N * N * (N - 1)); vv[j][i] = vv[i][j]; } else if (N !== 1) { // i==j vv[i][i] += n * Ki * (N - Ki) * (N - n) / (N * N * (N - 1)); } } } } } return vv; } // This might be the mis-named. function solve(a, b) { var bT = transpose(b), aInv = jStat.inv(a); return multiply(multiply(b, aInv), bT); } function allGroupsKm(groups) { var tte = [].concat.apply([], pluck(groups, 'tte')), ev = [].concat.apply([], pluck(groups, 'ev')); return compute(tte, ev).filter(function(t) { return t.e; }); } // allGroupsRes: km of all groups combined? // groupedDataTable: [{tte, ev}, ...] function logranktest(groupedDataTable) { var allGroupsRes = allGroupsKm(groupedDataTable), pValue = 1, KMStats, dof, // degree of freedom OETable, OMinusEVector, // O-E vv; //covariant matrix // Table of observed and expected events, for each group. //console.log({tte, ev}); OETable = groupedDataTable .map(function(d) { return expectedObservedEventNumber(allGroupsRes, d.tte, d.ev); }) .filter(function(r) { return r.expected; }); // Find O-E and covariance, and drop one dimension from each OMinusEVector = OETable.map(function(r) { return r.observed - r.expected; }).slice(1); vv = covariance(allGroupsRes, OETable).slice(1).map(function(r) { return r.slice(1); }); // drop 1st row & 1st column dof = OETable.length - 1; if (dof > 0) { KMStats = solve(vv, [OMinusEVector])[0][0]; pValue = 1 - jStat.chisquare.cdf(KMStats, dof); } return { dof: dof, KMStats: KMStats, pValue: pValue }; }