// @(#)root/roostats:$Id$ /************************************************************************* * Project: RooStats * * Package: RooFit/RooStats * * Authors: * * Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke * * Other author of this class: Danilo Piparo * ************************************************************************* * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. * * All rights reserved. * * * * For the licensing terms see $ROOTSYS/LICENSE. * * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ //////////////////////////////////////////////////////////////////////////////// #include "RooDataHist.h" #include "RooDataSet.h" #include "RooGlobalFunc.h" #include "RooNLLVar.h" #include "RooRealVar.h" #include "RooAbsData.h" #include "RooWorkspace.h" #include "TH1.h" #include "RooStats/HybridCalculatorOriginal.h" using namespace std; ClassImp(RooStats::HybridCalculatorOriginal) using namespace RooStats; /////////////////////////////////////////////////////////////////////////// HybridCalculatorOriginal::HybridCalculatorOriginal(const char *name) : TNamed(name,name), fSbModel(0), fBModel(0), fObservables(0), fNuisanceParameters(0), fPriorPdf(0), fData(0), fGenerateBinned(false), fUsePriorPdf(false), fTmpDoExtended(true) { // constructor with name and title // set default parameters SetTestStatistic(1); SetNumberOfToys(1000); } /// constructor without the data - is it needed ??????????? HybridCalculatorOriginal::HybridCalculatorOriginal( RooAbsPdf& sbModel, RooAbsPdf& bModel, RooArgList& observables, const RooArgSet* nuisance_parameters, RooAbsPdf* priorPdf , bool GenerateBinned, int testStatistics, int numToys) : fSbModel(&sbModel), fBModel(&bModel), fNuisanceParameters(nuisance_parameters), fPriorPdf(priorPdf), fData(0), fGenerateBinned(GenerateBinned), fUsePriorPdf(false), fTmpDoExtended(true) { /// HybridCalculatorOriginal constructor without specifying a data set /// the user need to specify the models in the S+B case and B-only case, /// the list of observables of the model(s) (for MC-generation), the list of parameters /// that are marginalised and the prior distribution of those parameters // observables are managed by the class (they are copied in) fObservables = new RooArgList(observables); //Try to recover the information from the pdf's //fObservables=new RooArgList("fObservables"); //fNuisanceParameters=new RooArgSet("fNuisanceParameters"); // if (priorPdf){ SetTestStatistic(testStatistics); SetNumberOfToys(numToys); if (priorPdf) UseNuisance(true); // this->Print(); /* if ( _verbose ) */ //this->PrintMore("v"); /// TO DO: add the verbose mode } HybridCalculatorOriginal::HybridCalculatorOriginal( RooAbsData & data, RooAbsPdf& sbModel, RooAbsPdf& bModel, const RooArgSet* nuisance_parameters, RooAbsPdf* priorPdf, bool GenerateBinned, int testStatistics, int numToys) : fSbModel(&sbModel), fBModel(&bModel), fObservables(0), fNuisanceParameters(nuisance_parameters), fPriorPdf(priorPdf), fData(&data), fGenerateBinned(GenerateBinned), fUsePriorPdf(false), fTmpDoExtended(true) { /// HybridCalculatorOriginal constructor for performing hypotesis test /// the user need to specify the data set, the models in the S+B case and B-only case. /// In case of treatment of nuisance parameter, the user need to specify the /// the list of parameters that are marginalised and the prior distribution of those parameters SetTestStatistic(testStatistics); SetNumberOfToys(numToys); if (priorPdf) UseNuisance(true); } HybridCalculatorOriginal::HybridCalculatorOriginal( RooAbsData& data, const ModelConfig& sbModel, const ModelConfig& bModel, bool GenerateBinned, int testStatistics, int numToys) : fSbModel(sbModel.GetPdf()), fBModel(bModel.GetPdf()), fObservables(0), // no need to set them - can be taken from the data fNuisanceParameters((sbModel.GetNuisanceParameters()) ? sbModel.GetNuisanceParameters() : bModel.GetNuisanceParameters()), fPriorPdf((sbModel.GetPriorPdf()) ? sbModel.GetPriorPdf() : bModel.GetPriorPdf()), fData(&data), fGenerateBinned(GenerateBinned), fUsePriorPdf(false), fTmpDoExtended(true) { /// Constructor with a ModelConfig object representing the signal + background model and /// another model config representig the background only model /// a Prior pdf for the nuiscane parameter of the signal and background can be specified in /// the s+b model or the b model. If it is specified in the s+b model, the one of the s+b model will be used if (fPriorPdf) UseNuisance(true); SetTestStatistic(testStatistics); SetNumberOfToys(numToys); } /////////////////////////////////////////////////////////////////////////// HybridCalculatorOriginal::~HybridCalculatorOriginal() { /// HybridCalculatorOriginal destructor if (fObservables) delete fObservables; } /////////////////////////////////////////////////////////////////////////// void HybridCalculatorOriginal::SetNullModel(const ModelConfig& model) { // Set the model describing the null hypothesis fBModel = model.GetPdf(); // only if it has not been set before if (!fPriorPdf) fPriorPdf = model.GetPriorPdf(); if (!fNuisanceParameters) fNuisanceParameters = model.GetNuisanceParameters(); } void HybridCalculatorOriginal::SetAlternateModel(const ModelConfig& model) { // Set the model describing the alternate hypothesis fSbModel = model.GetPdf(); fPriorPdf = model.GetPriorPdf(); fNuisanceParameters = model.GetNuisanceParameters(); } void HybridCalculatorOriginal::SetTestStatistic(int index) { /// set the desired test statistics: /// index=1 : likelihood ratio: 2 * log( L_sb / L_b ) (DEFAULT) /// index=2 : number of generated events /// index=3 : profiled likelihood ratio /// if the index is different to any of those values, the default is used fTestStatisticsIdx = index; } /////////////////////////////////////////////////////////////////////////// HybridResult* HybridCalculatorOriginal::Calculate(TH1& data, unsigned int nToys, bool usePriors) const { /// first compute the test statistics for data and then prepare and run the toy-MC experiments /// convert data TH1 histogram to a RooDataHist TString dataHistName = GetName(); dataHistName += "_roodatahist"; RooDataHist dataHist(dataHistName,"Data distribution as RooDataHist converted from TH1",*fObservables,&data); HybridResult* result = Calculate(dataHist,nToys,usePriors); return result; } /////////////////////////////////////////////////////////////////////////// HybridResult* HybridCalculatorOriginal::Calculate(RooAbsData& data, unsigned int nToys, bool usePriors) const { /// first compute the test statistics for data and then prepare and run the toy-MC experiments double testStatData = 0; if ( fTestStatisticsIdx==2 ) { /// number of events used as test statistics double nEvents = data.sumEntries(); testStatData = nEvents; } else if ( fTestStatisticsIdx==3 ) { /// profiled likelihood ratio used as test statistics if ( fTmpDoExtended ) { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,data,RooFit::CloneData(false),RooFit::Extended()); fSbModel->fitTo(data,RooFit::Hesse(false),RooFit::Strategy(0),RooFit::Extended()); double sb_nll_val = sb_nll.getVal(); RooNLLVar b_nll("b_nll","b_nll",*fBModel,data,RooFit::CloneData(false),RooFit::Extended()); fBModel->fitTo(data,RooFit::Hesse(false),RooFit::Strategy(0),RooFit::Extended()); double b_nll_val = b_nll.getVal(); double m2lnQ = 2*(sb_nll_val-b_nll_val); testStatData = m2lnQ; } else { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,data,RooFit::CloneData(false)); fSbModel->fitTo(data,RooFit::Hesse(false),RooFit::Strategy(0)); double sb_nll_val = sb_nll.getVal(); RooNLLVar b_nll("b_nll","b_nll",*fBModel,data,RooFit::CloneData(false)); fBModel->fitTo(data,RooFit::Hesse(false),RooFit::Strategy(0)); double b_nll_val = b_nll.getVal(); double m2lnQ = 2*(sb_nll_val-b_nll_val); testStatData = m2lnQ; } } else if ( fTestStatisticsIdx==1 ) { /// likelihood ratio used as test statistics (default) if ( fTmpDoExtended ) { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,data,RooFit::Extended()); RooNLLVar b_nll("b_nll","b_nll",*fBModel,data,RooFit::Extended()); double m2lnQ = 2*(sb_nll.getVal()-b_nll.getVal()); testStatData = m2lnQ; } else { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,data); RooNLLVar b_nll("b_nll","b_nll",*fBModel,data); double m2lnQ = 2*(sb_nll.getVal()-b_nll.getVal()); testStatData = m2lnQ; } } std::cout << "Test statistics has been evaluated for data\n"; HybridResult* result = Calculate(nToys,usePriors); result->SetDataTestStatistics(testStatData); return result; } /////////////////////////////////////////////////////////////////////////// HybridResult* HybridCalculatorOriginal::Calculate(unsigned int nToys, bool usePriors) const { std::vector bVals; bVals.reserve(nToys); std::vector sbVals; sbVals.reserve(nToys); RunToys(bVals,sbVals,nToys,usePriors); HybridResult* result; TString name = "HybridResult_" + TString(GetName() ); if ( fTestStatisticsIdx==2 ) result = new HybridResult(name,sbVals,bVals,false); else result = new HybridResult(name,sbVals,bVals); return result; } /////////////////////////////////////////////////////////////////////////// void HybridCalculatorOriginal::RunToys(std::vector& bVals, std::vector& sbVals, unsigned int nToys, bool usePriors) const { /// do the actual run-MC processing std::cout << "HybridCalculatorOriginal: run " << nToys << " toy-MC experiments\n"; std::cout << "with test statistics index: " << fTestStatisticsIdx << "\n"; if (usePriors) std::cout << "marginalize nuisance parameters \n"; assert(nToys > 0); assert(fBModel); assert(fSbModel); if (usePriors) { assert(fPriorPdf); assert(fNuisanceParameters); } std::vector parameterValues; /// array to hold the initial parameter values /// backup the initial values of the parameters that are varied by the prior MC-integration int nParameters = (fNuisanceParameters) ? fNuisanceParameters->getSize() : 0; RooArgList parametersList("parametersList"); /// transforms the RooArgSet in a RooArgList (needed for .at()) if (usePriors && nParameters>0) { parametersList.add(*fNuisanceParameters); parameterValues.resize(nParameters); for (int iParameter=0; iParametergetVal(); } } // create a cloned list of all parameters need in case of test statistics 3 where those // changed by the best fit RooArgSet originalSbParams; RooArgSet originalBParams; if (fTestStatisticsIdx == 3) { RooArgSet * sbparams = fSbModel->getParameters(*fObservables); RooArgSet * bparams = fBModel->getParameters(*fObservables); if (sbparams) originalSbParams.addClone(*sbparams); if (bparams) originalBParams.addClone(*bparams); delete sbparams; delete bparams; // originalSbParams.Print("V"); // originalBParams.Print("V"); } for (unsigned int iToy=0; iToy0) { /// generation from the prior pdf (TO DO: RooMCStudy could be used here) RooDataSet* tmpValues = (RooDataSet*) fPriorPdf->generate(*fNuisanceParameters,1); for (int iParameter=0; iParametersetVal(tmpValues->get()->getRealValue(oneParam->GetName())); } delete tmpValues; } /// generate the dataset in the B-only hypothesis RooAbsData* bData; if (fGenerateBinned) bData = static_cast (fBModel->generateBinned(*fObservables,RooFit::Extended())); else { if ( fTmpDoExtended ) bData = static_cast (fBModel->generate(*fObservables,RooFit::Extended())); else bData = static_cast (fBModel->generate(*fObservables,1)); } /// work-around in case of an empty dataset (TO DO: need a debug in RooFit?) bool bIsEmpty = false; if (bData==NULL) { bIsEmpty = true; // if ( _verbose ) std::cout << "empty B-only dataset!\n"; RooDataSet* bDataDummy=new RooDataSet("bDataDummy","empty dataset",*fObservables); bData = static_cast(new RooDataHist ("bDataEmpty","",*fObservables,*bDataDummy)); delete bDataDummy; } /// generate the dataset in the S+B hypothesis RooAbsData* sbData; if (fGenerateBinned) sbData = static_cast (fSbModel->generateBinned(*fObservables,RooFit::Extended())); else { if ( fTmpDoExtended ) sbData = static_cast (fSbModel->generate(*fObservables,RooFit::Extended())); else sbData = static_cast (fSbModel->generate(*fObservables,1)); } /// work-around in case of an empty dataset (TO DO: need a debug in RooFit?) bool sbIsEmpty = false; if (sbData==NULL) { sbIsEmpty = true; // if ( _verbose ) std::cout << "empty S+B dataset!\n"; RooDataSet* sbDataDummy=new RooDataSet("sbDataDummy","empty dataset",*fObservables); sbData = static_cast(new RooDataHist ("sbDataEmpty","",*fObservables,*sbDataDummy)); delete sbDataDummy; } /// restore the parameters to their initial values if (usePriors && nParameters>0) { for (int iParameter=0; iParametersetVal(parameterValues[iParameter]); } } // test first the S+B hypothesis and the the B-only hypothesis for (int hypoTested=0; hypoTested<=1; hypoTested++) { RooAbsData* dataToTest = sbData; bool dataIsEmpty = sbIsEmpty; if ( hypoTested==1 ) { dataToTest = bData; dataIsEmpty = bIsEmpty; } /// evaluate the test statistic in the tested hypothesis case if ( fTestStatisticsIdx==2 ) { /// number of events used as test statistics double nEvents = 0; if ( !dataIsEmpty ) nEvents = dataToTest->numEntries(); if ( hypoTested==0 ) sbVals.push_back(nEvents); else bVals.push_back(nEvents); } else if ( fTestStatisticsIdx==3 ) { /// profiled likelihood ratio used as test statistics if ( fTmpDoExtended ) { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,*dataToTest,RooFit::CloneData(false),RooFit::Extended()); fSbModel->fitTo(*dataToTest,RooFit::PrintLevel(-1), RooFit::Hesse(false),RooFit::Strategy(0),RooFit::Extended()); double sb_nll_val = sb_nll.getVal(); RooNLLVar b_nll("b_nll","b_nll",*fBModel,*dataToTest,RooFit::CloneData(false),RooFit::Extended()); fBModel->fitTo(*dataToTest,RooFit::PrintLevel(-1),RooFit::Hesse(false),RooFit::Strategy(0),RooFit::Extended()); double b_nll_val = b_nll.getVal(); double m2lnQ = -2*(b_nll_val-sb_nll_val); if ( hypoTested==0 ) sbVals.push_back(m2lnQ); else bVals.push_back(m2lnQ); } else { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,*dataToTest,RooFit::CloneData(false)); fSbModel->fitTo(*dataToTest,RooFit::PrintLevel(-1), RooFit::Hesse(false),RooFit::Strategy(0)); double sb_nll_val = sb_nll.getVal(); RooNLLVar b_nll("b_nll","b_nll",*fBModel,*dataToTest,RooFit::CloneData(false)); fBModel->fitTo(*dataToTest,RooFit::PrintLevel(-1), RooFit::Hesse(false),RooFit::Strategy(0)); double b_nll_val = b_nll.getVal(); double m2lnQ = -2*(b_nll_val-sb_nll_val); if ( hypoTested==0 ) sbVals.push_back(m2lnQ); else bVals.push_back(m2lnQ); } } else if ( fTestStatisticsIdx==1 ) { /// likelihood ratio used as test statistics (default) if ( fTmpDoExtended ) { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,*dataToTest,RooFit::CloneData(false),RooFit::Extended()); RooNLLVar b_nll("b_nll","b_nll",*fBModel,*dataToTest,RooFit::CloneData(false),RooFit::Extended()); double m2lnQ = -2*(b_nll.getVal()-sb_nll.getVal()); if ( hypoTested==0 ) sbVals.push_back(m2lnQ); else bVals.push_back(m2lnQ); } else { RooNLLVar sb_nll("sb_nll","sb_nll",*fSbModel,*dataToTest,RooFit::CloneData(false)); RooNLLVar b_nll("b_nll","b_nll",*fBModel,*dataToTest,RooFit::CloneData(false)); double m2lnQ = -2*(b_nll.getVal()-sb_nll.getVal()); if ( hypoTested==0 ) sbVals.push_back(m2lnQ); else bVals.push_back(m2lnQ); } } } // tested both hypotheses /// delete the toy-MC datasets delete sbData; delete bData; /// restore the parameters to their initial values in case fitting is done if (fTestStatisticsIdx == 3) { RooArgSet * sbparams = fSbModel->getParameters(*fObservables); if (sbparams) { assert(originalSbParams.getSize() == sbparams->getSize()); *sbparams = originalSbParams; delete sbparams; } RooArgSet * bparams = fBModel->getParameters(*fObservables); if (bparams) { assert(originalBParams.getSize() == bparams->getSize()); *bparams = originalBParams; delete bparams; } } } /// end of loop over toy-MC experiments /// restore the parameters to their initial values if (usePriors && nParameters>0) { for (int iParameter=0; iParametersetVal(parameterValues[iParameter]); } } return; } /////////////////////////////////////////////////////////////////////////// void HybridCalculatorOriginal::PrintMore(const char* options) const { /// Print out some information about the input models if (fSbModel) { std::cout << "Signal plus background model:\n"; fSbModel->Print(options); } if (fBModel) { std::cout << "\nBackground model:\n"; fBModel->Print(options); } if (fObservables) { std::cout << "\nObservables:\n"; fObservables->Print(options); } if (fNuisanceParameters) { std::cout << "\nParameters being integrated:\n"; fNuisanceParameters->Print(options); } if (fPriorPdf) { std::cout << "\nPrior PDF model for integration:\n"; fPriorPdf->Print(options); } return; } /////////////////////////////////////////////////////////////////////////// // implementation of inherited methods from HypoTestCalculator HybridResult* HybridCalculatorOriginal::GetHypoTest() const { // perform the hypothesis test and return result of hypothesis test // check first that everything needed is there if (!DoCheckInputs()) return 0; RooAbsData * treeData = dynamic_cast (fData); if (!treeData) { std::cerr << "Error in HybridCalculatorOriginal::GetHypoTest - invalid data type - return NULL" << std::endl; return 0; } bool usePrior = (fUsePriorPdf && fPriorPdf ); return Calculate( *treeData, fNToys, usePrior); } bool HybridCalculatorOriginal::DoCheckInputs() const { if (!fData) { std::cerr << "Error in HybridCalculatorOriginal - data have not been set" << std::endl; return false; } // if observable have not been set take them from data if (!fObservables && fData->get() ) fObservables = new RooArgList( *fData->get() ); if (!fObservables) { std::cerr << "Error in HybridCalculatorOriginal - no observables" << std::endl; return false; } if (!fSbModel) { std::cerr << "Error in HybridCalculatorOriginal - S+B pdf has not been set " << std::endl; return false; } if (!fBModel) { std::cerr << "Error in HybridCalculatorOriginal - B pdf has not been set" << std::endl; return false; } if (fUsePriorPdf && !fNuisanceParameters) { std::cerr << "Error in HybridCalculatorOriginal - nuisance parameters have not been set " << std::endl; return false; } if (fUsePriorPdf && !fPriorPdf) { std::cerr << "Error in HybridCalculatorOriginal - prior pdf has not been set " << std::endl; return false; } return true; }