1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements.  See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 package org.apache.commons.math3.stat.inference;
18 
19 import org.apache.commons.math3.distribution.BinomialDistribution;
20 import org.apache.commons.math3.exception.MathIllegalArgumentException;
21 import org.apache.commons.math3.exception.MathInternalError;
22 import org.apache.commons.math3.exception.NotPositiveException;
23 import org.apache.commons.math3.exception.NullArgumentException;
24 import org.apache.commons.math3.exception.OutOfRangeException;
25 import org.apache.commons.math3.exception.util.LocalizedFormats;
26 
27 /**
28  * Implements binomial test statistics.
29  * <p>
30  * Exact test for the statistical significance of deviations from a
31  * theoretically expected distribution of observations into two categories.
32  *
33  * @see <a href="http://en.wikipedia.org/wiki/Binomial_test">Binomial test (Wikipedia)</a>
34  * @since 3.3
35  */
36 public class BinomialTest {
37 
38     /**
39      * Returns whether the null hypothesis can be rejected with the given confidence level.
40      * <p>
41      * <strong>Preconditions</strong>:
42      * <ul>
43      * <li>Number of trials must be &ge; 0.</li>
44      * <li>Number of successes must be &ge; 0.</li>
45      * <li>Number of successes must be &le; number of trials.</li>
46      * <li>Probability must be &ge; 0 and &le; 1.</li>
47      * </ul>
48      *
49      * @param numberOfTrials number of trials performed
50      * @param numberOfSuccesses number of successes observed
51      * @param probability assumed probability of a single trial under the null hypothesis
52      * @param alternativeHypothesis type of hypothesis being evaluated (one- or two-sided)
53      * @param alpha significance level of the test
54      * @return true if the null hypothesis can be rejected with confidence {@code 1 - alpha}
55      * @throws NotPositiveException if {@code numberOfTrials} or {@code numberOfSuccesses} is negative
56      * @throws OutOfRangeException if {@code probability} is not between 0 and 1
57      * @throws MathIllegalArgumentException if {@code numberOfTrials} &lt; {@code numberOfSuccesses} or
58      * if {@code alternateHypothesis} is null.
59      * @see AlternativeHypothesis
60      */
binomialTest(int numberOfTrials, int numberOfSuccesses, double probability, AlternativeHypothesis alternativeHypothesis, double alpha)61     public boolean binomialTest(int numberOfTrials, int numberOfSuccesses, double probability,
62                                 AlternativeHypothesis alternativeHypothesis, double alpha) {
63         double pValue = binomialTest(numberOfTrials, numberOfSuccesses, probability, alternativeHypothesis);
64         return pValue < alpha;
65     }
66 
67     /**
68      * Returns the <i>observed significance level</i>, or
69      * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">p-value</a>,
70      * associated with a <a href="http://en.wikipedia.org/wiki/Binomial_test"> Binomial test</a>.
71      * <p>
72      * The number returned is the smallest significance level at which one can reject the null hypothesis.
73      * The form of the hypothesis depends on {@code alternativeHypothesis}.</p>
74      * <p>
75      * The p-Value represents the likelihood of getting a result at least as extreme as the sample,
76      * given the provided {@code probability} of success on a single trial. For single-sided tests,
77      * this value can be directly derived from the Binomial distribution. For the two-sided test,
78      * the implementation works as follows: we start by looking at the most extreme cases
79      * (0 success and n success where n is the number of trials from the sample) and determine their likelihood.
80      * The lower value is added to the p-Value (if both values are equal, both are added). Then we continue with
81      * the next extreme value, until we added the value for the actual observed sample.</p>
82      * <p>
83      * <strong>Preconditions</strong>:
84      * <ul>
85      * <li>Number of trials must be &ge; 0.</li>
86      * <li>Number of successes must be &ge; 0.</li>
87      * <li>Number of successes must be &le; number of trials.</li>
88      * <li>Probability must be &ge; 0 and &le; 1.</li>
89      * </ul></p>
90      *
91      * @param numberOfTrials number of trials performed
92      * @param numberOfSuccesses number of successes observed
93      * @param probability assumed probability of a single trial under the null hypothesis
94      * @param alternativeHypothesis type of hypothesis being evaluated (one- or two-sided)
95      * @return p-value
96      * @throws NotPositiveException if {@code numberOfTrials} or {@code numberOfSuccesses} is negative
97      * @throws OutOfRangeException if {@code probability} is not between 0 and 1
98      * @throws MathIllegalArgumentException if {@code numberOfTrials} &lt; {@code numberOfSuccesses} or
99      * if {@code alternateHypothesis} is null.
100      * @see AlternativeHypothesis
101      */
binomialTest(int numberOfTrials, int numberOfSuccesses, double probability, AlternativeHypothesis alternativeHypothesis)102     public double binomialTest(int numberOfTrials, int numberOfSuccesses, double probability,
103                                AlternativeHypothesis alternativeHypothesis) {
104         if (numberOfTrials < 0) {
105             throw new NotPositiveException(numberOfTrials);
106         }
107         if (numberOfSuccesses < 0) {
108             throw new NotPositiveException(numberOfSuccesses);
109         }
110         if (probability < 0 || probability > 1) {
111             throw new OutOfRangeException(probability, 0, 1);
112         }
113         if (numberOfTrials < numberOfSuccesses) {
114             throw new MathIllegalArgumentException(
115                 LocalizedFormats.BINOMIAL_INVALID_PARAMETERS_ORDER,
116                 numberOfTrials, numberOfSuccesses);
117         }
118         if (alternativeHypothesis == null) {
119             throw new NullArgumentException();
120         }
121 
122         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
123         final BinomialDistribution distribution = new BinomialDistribution(null, numberOfTrials, probability);
124         switch (alternativeHypothesis) {
125         case GREATER_THAN:
126             return 1 - distribution.cumulativeProbability(numberOfSuccesses - 1);
127         case LESS_THAN:
128             return distribution.cumulativeProbability(numberOfSuccesses);
129         case TWO_SIDED:
130             int criticalValueLow = 0;
131             int criticalValueHigh = numberOfTrials;
132             double pTotal = 0;
133 
134             while (true) {
135                 double pLow = distribution.probability(criticalValueLow);
136                 double pHigh = distribution.probability(criticalValueHigh);
137 
138                 if (pLow == pHigh) {
139                     pTotal += 2 * pLow;
140                     criticalValueLow++;
141                     criticalValueHigh--;
142                 } else if (pLow < pHigh) {
143                     pTotal += pLow;
144                     criticalValueLow++;
145                 } else {
146                     pTotal += pHigh;
147                     criticalValueHigh--;
148                 }
149 
150                 if (criticalValueLow > numberOfSuccesses || criticalValueHigh < numberOfSuccesses) {
151                     break;
152                 }
153             }
154             return pTotal;
155         default:
156             throw new MathInternalError(LocalizedFormats. OUT_OF_RANGE_SIMPLE, alternativeHypothesis,
157                       AlternativeHypothesis.TWO_SIDED, AlternativeHypothesis.LESS_THAN);
158         }
159     }
160 }
161