// @(#)root/roostats:$Id$ // Author: Kyle Cranmer January 2009 /************************************************************************* * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. * * All rights reserved. * * * * For the licensing terms see $ROOTSYS/LICENSE. * * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ //////////////////////////////////////////////////////////////////////////////// #ifndef RooStats_NeymanConstruction #include "RooStats/NeymanConstruction.h" #endif #ifndef RooStats_RooStatsUtils #include "RooStats/RooStatsUtils.h" #endif #ifndef RooStats_PointSetInterval #include "RooStats/PointSetInterval.h" #endif #include "RooStats/SamplingDistribution.h" #include "RooStats/ToyMCSampler.h" #include "RooStats/ModelConfig.h" #include "RooMsgService.h" #include "RooGlobalFunc.h" #include "RooDataSet.h" #include "TFile.h" #include "TTree.h" #include "TMath.h" #include "TH1F.h" ClassImp(RooStats::NeymanConstruction) ; using namespace RooFit; using namespace RooStats; using namespace std; //////////////////////////////////////////////////////////////////////////////// NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model): fSize(0.05), fData(data), fModel(model), fTestStatSampler(0), fPointsToTest(0), fLeftSideFraction(0), fConfBelt(0), // constructed with tree data fAdaptiveSampling(false), fAdditionalNToysFactor(1.), fSaveBeltToFile(false), fCreateBelt(false) { // default constructor // fWS = new RooWorkspace(); // fOwnsWorkspace = true; // fDataName = ""; // fPdfName = ""; } //////////////////////////////////////////////////////////////////////////////// /// default constructor /// if(fOwnsWorkspace && fWS) delete fWS; /// if(fConfBelt) delete fConfBelt; NeymanConstruction::~NeymanConstruction() { } //////////////////////////////////////////////////////////////////////////////// /// Main interface to get a RooStats::ConfInterval. /// It constructs a RooStats::SetInterval. PointSetInterval* NeymanConstruction::GetInterval() const { TFile* f=0; if(fSaveBeltToFile){ //coverity[FORWARD_NULL] oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl; f = new TFile("SamplingDistributions.root","recreate"); } Int_t npass = 0; RooArgSet* point; // strange problems when using snapshots. // RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot(); RooArgSet* fPOI = new RooArgSet(*fModel.GetParametersOfInterest()); RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval", "points in interval", *(fPointsToTest->get(0)) ); // loop over points to test for(Int_t i=0; inumEntries(); ++i){ // get a parameter point from the list of points to test. point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp"); // set parameters of interest to current point *fPOI = *point; // set test stat sampler to use this point fTestStatSampler->SetParametersForTestStat(*fPOI); // get the value of the test statistic for this data set Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI ); /* cout << "NC CHECK: " << i << endl; point->Print(); fPOI->Print("v"); fData.Print(); cout <<"thisTestStatistic = " << thisTestStatistic << endl; */ // find the lower & upper thresholds on the test statistic that // define the acceptance region in the data SamplingDistribution* samplingDist=0; Double_t sigma; Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma; Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma; Int_t additionalMC=0; // the adaptive sampling algorithm wants at least one toy event to be outside // of the requested pvalue including the sampling variaton. That leads to an equation // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and // 1 = alpha N - Z sqrt(alpha N) // for lower limit // // solving for N gives: // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently) // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac) // totalMC will be incremented by 2 before first call, so initiated it at half the value Int_t totalMC = (Int_t) (2./fSize/TMath::Min(fLeftSideFraction,1.-fLeftSideFraction)); if(fLeftSideFraction==0. || fLeftSideFraction ==1.){ totalMC = (Int_t) (2./fSize); } // use control Double_t tmc = Double_t(totalMC)*fAdditionalNToysFactor; totalMC = (Int_t) tmc; ToyMCSampler* toyMCSampler = dynamic_cast(fTestStatSampler); if(fAdaptiveSampling && toyMCSampler) { do{ // this will be executed first, then while conditioned checked // as an exit condition for the loop. // the next line is where most of the time will be spent // generating the sampling dist of the test statistic. additionalMC = 2*totalMC; // grow by a factor of two samplingDist = toyMCSampler->AppendSamplingDistribution(*point, samplingDist, additionalMC); if (!samplingDist) { oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl; return 0; } totalMC=samplingDist->GetSize(); //cout << "without sigma upper = " << //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl; sigma = 1; upperEdgeOfAcceptance = samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) , sigma, upperEdgePlusSigma); sigma = -1; samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) , sigma, upperEdgeMinusSigma); sigma = 1; lowerEdgeOfAcceptance = samplingDist->InverseCDF( fLeftSideFraction * fSize , sigma, lowerEdgePlusSigma); sigma = -1; samplingDist->InverseCDF( fLeftSideFraction * fSize , sigma, lowerEdgeMinusSigma); ooccoutD(samplingDist,Eval) << "NeymanConstruction: " << "total MC = " << totalMC << " this test stat = " << thisTestStatistic << endl << " upper edge -1sigma = " << upperEdgeMinusSigma << ", upperEdge = "<InverseCDF( fLeftSideFraction * fSize ); upperEdgeOfAcceptance = samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ); } // add acceptance region to ConfidenceBelt if(fConfBelt && fCreateBelt){ // cout << "conf belt set " << fConfBelt << endl; fConfBelt->AddAcceptanceRegion(*point, i, lowerEdgeOfAcceptance, upperEdgeOfAcceptance); } // printout some debug info TIter itr = point->createIterator(); RooRealVar* myarg; ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<numEntries() << " total MC = " << samplingDist->GetSize() << " this test stat = " << thisTestStatistic << endl; ooccoutP(samplingDist,Eval) << " "; while ((myarg = (RooRealVar *)itr.Next())) { ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " "; } ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", " << upperEdgeOfAcceptance << "] " << " in interval = " << (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) << endl << endl; // Check if this data is in the acceptance region if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) { // if so, set this point to true // fPointsToTest->add(*point, 1.); // this behaves differently for Hist and DataSet pointsInInterval->add(*point); ++npass; } if(fSaveBeltToFile){ //write to file samplingDist->Write(); string tmpName = "hist_"; tmpName+=samplingDist->GetName(); TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.); for(int ii=0; iiGetSize(); ++ii){ h->Fill(samplingDist->GetSamplingDistribution().at(ii) ); } h->Write(); delete h; } delete samplingDist; // delete point; // from dataset } oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl; // create an interval based pointsInInterval PointSetInterval* interval = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval); if(fSaveBeltToFile){ // write belt to file fConfBelt->Write(); f->Close(); } delete f; //delete data; return interval; }