Mustapha Setta April-October 2005 0 Table of Contents TABLE OF CONTENTS 0. SUMMARY ..............................................................................................................3 1. INTRODUCTION: ....................................................................................................6 1.1 Binomial distribution .......................................................................................................................................6 1.2 Hypothesis Testing ...........................................................................................................................................6 1.3 Bayesian statistics .............................................................................................................................................9 2. ONE-ARMED TRIALS ...........................................................................................11 2.1 One-stage design .............................................................................................................................................11 2.1.1 Normal approximation .............................................................................................................................. 11 2.1.2 Binomial distribution ................................................................................................................................ 12 2.2 Two-stage designs ...........................................................................................................................................13 2.2.1 General introduction .................................................................................................................................13 2.2.2 Frequentist designs ...................................................................................................................................14 2.2.3 Bayesian design ........................................................................................................................................18 2.3 Three-stage designs ........................................................................................................................................21 2.3.1 General introduction .................................................................................................................................21 2.3.2 Frequentist designs ...................................................................................................................................21 2.3.3 Bayesian design ........................................................................................................................................23 2.4 Multi-stage designs .........................................................................................................................................25 2.4.1. Bayesian Design ......................................................................................................................................25 3. A CASE STUDY: INVESTIGATING A DOSE OF HIRULOG ................................28 3.1. Case description ............................................................................................................................................28 3.2. Possible designs..............................................................................................................................................28 3.2.1 Ginsberg et al ............................................................................................................................................28 3.2.2 Fleming .....................................................................................................................................................30 3.2.3 Thall & Simon (Continuous) ....................................................................................................................31 3.2.4 Thall & Simon (Discrete)..........................................................................................................................32 3.3 Reversing Hypotheses ....................................................................................................................................33 3.4 Summary and recommendations ..................................................................................................................34 3.4.1 Summary ...................................................................................................................................................34 3.4.2 Recommendations .....................................................................................................................................35 4. TWO-ARMED TRIALS ..........................................................................................36 4.1. One-Stage design ...........................................................................................................................................36 4.2. Two-stage Design ...........................................................................................................................................37 4.2.1. Decomposition of the Z-test .....................................................................................................................38 1 Table of Contents 4.2.2. The B-value .............................................................................................................................................39 4.2.3. The conditional power .............................................................................................................................39 4.2.4. Futility stopping rule ................................................................................................................................ 40 REFERENCES ..........................................................................................................44 APPENDIX. SAS PROGRAMS .................................................................................45 2 0. Summary 0. Summary In order to create new medicines clinical trials are needed. A clinical trial is a research study to answer specific questions about vaccines, new therapies or new ways of using known treatments. It is also used to determine whether new drugs or treatments are both safe and effective. In most clinical trials one group of participants is given an experimental drug, while another group is given either a standard treatment for the disease or a placebo (the short hand term is two-armed). However, in earlier stages of development, clinical trials may also have only one ‘arm’ which means that all participants gets the same experimental drug. Clinical trials can be divided into three categories: phase I, II and III. Phase I studies are primarily concerned with the drug's safety, and are the first time the drug is tested in humans. These studies are typically done in a small number of healthy volunteers (20-100), usually in a hospital setting where they can be closely monitored and treated if there are any side effects. The purpose of these studies is to determine how the experimental drug is absorbed, metabolized, and excreted in humans. Additionally, they seek to determine what types of side effects occur as the dosage of the drug is increased. Any beneficial effects of the drug are also noted. Phase II: Once an experimental drug has been proven to be safe and well-tolerated in healthy volunteers, it must be tested in the patients that have the disease or condition that the experimental drug is expected to improve/cure. In addition to ensuring that the experimental drug is safe and effective in the patient population of interest, Phase II studies are also designed to evaluate the effectiveness of the drug. The second phase of testing may last from several months to a few years and may involve up to several hundred patients. Phase III: is a study where an experimental drug is tested in several hundred to several thousand patients with the disease/condition of interest. Most Phase III studies are well controlled, randomized trials. That is, one group of patients (subjects) receives the experimental drug, while a second "control" group receives a standard treatment or placebo. Placement of the subject into the drug treatment or placebo group is random in a binary context (as if by the flip of a coin). Often these studies are "double-blinded", that is, neither the patient nor the researchers know who is getting the experimental drug. The large-scale testing provides the pharmaceutical company, as well as the FDA, with a more thorough understanding of the drug's effectiveness, benefits/risks, and range/severity of possible adverse side effects. In this research the focus will be on designs for Phase II studies, different designs will be inventoried, evaluated and programmed (in SAS) to make them available. A ‘real-life’ example of a design that may be applied to an actual Organon study is presented. The two-stage designs that are evaluated can be divided into two groups: One-armed and Two-armed. In both cases, data is used to make a decision about whether to reject or ’accept’(not reject) a statistical hypothesis -usually stated as a null hypothesis H0- in favor of an alternative hypothesis HA. One has then the following setting: H0: p<=p0 where the true response p of the treatment is less than some uninteresting level p0. HA: p>p1 where the true response probability is at least some desirable target level p1. For the one-armed trials, we have first evaluated the one-stage design where a pre-specified number of patients are enrolled. The treatment is then tested only once, namely at the end of 3 0. Summary the trial. Two methods are evaluated, the normal approximation which is based at the normal distribution and the exact binomial method where the binomial distribution is used. However, in order to reduce the number of patients that are used for a trial, a two and threestage designs that are introduced by Herson (1979) and later by Fleming (1982) and Simon (1989) are explored. Further, a multi-stage design of Thall and Simon is reviewed. In Fleming’s design the treatment may be declared ‘promising’ as well as ‘unpromising’ at the end of the trial. However in order to reduce the number of patients, in case of early evidence of (in) efficacy, Fleming introduces also interim analyses. After every interim analysis one can also declare the treatment ‘promising’ as well as ‘unpromising’. Simon’s design is slightly different from Fleming’s design. In this design the trial will be stopped, only because of the lack of the effect (i.e. the treatment is then declared ‘unpromising’ not only at the end of the trial but also after every interim analysis). Further, the characteristics of both designs are derived assuming a fixed value of the response rate of the treatment p. Obvious choices are then p=p0 (the response under the null hypothesis) and p=p1 (the response rate under the alternative hypothesis). Similarly to Simon’s design is Herson’s design (1979), again after each interim analysis the treatment may only be declared ‘unpromising’, the only difference is that the characteristics of this design are derived in a different way then in Fleming’s. Here one chose the response rate of the treatment to follow a certain distribution the ‘prior’. Usually the Beta distribution is taken. Thall and Simon’s design (1994) includes both possibilities ‘promising’ as well as ‘unpromising’. Further is this design a multi-stage design which means that the data is monitored continuously until the total number of patients is reached. The designs properties are derived using a fixed value of the response rate of the treatment. For ‘two-armed’ trials, an approach based on conditional power is developed (i.e. the probability to reject the null hypothesis of ‘no difference between treatments’ at the end of the trial). If the conditional power is ‘low’ the trial will be stopped and the new treatment will be declared ‘unpromising’ as compared to the control. For both one-armed and two-armed trials, restriction was made to binary outcomes, i.e and design properties were derived using the binomial distribution. However, for continuous outcomes the designs are similar. The only difference is that the properties will be derived under a continuous distribution. Furthermore, clinicians often prefer a binary outcome like response or bleeding. For ‘two-armed’ trials we have not considered stopping after an interim analysis and followed by declaring the new treatment ‘promising’. This topic has, however, extensively been discussed in the statistical literature. See, for example, O’Brien and Fleming (1979). Finally, a case study was reviewed. Deep venous thrombosis (DVT) is a condition where there is a blood clot in a deep vein (a vein that accompanies an artery). For years the standard has been an anticoagulant medication called Heparin which was given through the vein. This results in relatively immediate anticoagulation and treatment of the clot. Along with heparin an oral medication called warfarin is given. The main side effect of heparin and warfarin is bleeding. Some time ago a new treatment was introduced for the prevention of DVT: Hirulog. To explore the potential of Hirulog in the prevention of DVT, a phase II dose ranging study was preformed in patients undergoing major knee or hip surgery (Ginsberg et al, 1994). The study objective was to identify a Hirulog dose associated with: an overall DTV rate 15% a bleeding rate < 5% 4 0. Summary These values represent the approximate rates of the standard treatment heparin. Five dosage regimens were investigated in a sequential fashion using the designs presented above, where each dose was evaluated independently. For each dose it was planned to monitor the rates of bleeding and thrombosis in every 10 patients up to a maximum of 50. Hence, this study may be considered as a sequence of five one-armed trials. A dose-finding study was not fully covered. If doses are investigated in a sequential fashion, then the methods of the case study can be applied. If only two doses are investigated in parallel, then the design for two-armed trial can be applied. However, for more than two doses, other methods should be developed. See, for example, Whitehead et al for a recent reference. 5 0. Summary 1. Introduction: 1.1 Binomial distribution The binomial distribution is the discrete probability distribution of the number of successes in a sequence of N independent success/failure experiments, each of which yields success with probability p. Such a success/failure experiment is also called a Bernoulli experiment or Bernoulli trial. Hence, the probability of k successes in a sequence of N independent yes/no experiments with success probability p is equal to N nk , k = 0, 1, 2,….., N; P X k bk , p, N p k 1 p k 0<p<1 and the probability of k successes or less (cumulative probability) is then equal to r N nk P X r Br , p, N p k 1 p k 0 k The binomial distribution is also the basis for the popular binomial test which we will define in the next section. The binomial distribution can also be approximated in some cases by the normal distribution. For large N (say N > 20) and p not too near 0 or 1 (say 0.05 < p < 0.95) the distribution approximately follows the Normal distribution. For instance, if X ~ binomial (N, p) then approximately X has the Normal distribution with mean E(X) = Np and variance Var(X) = Np (1-p).This implies that X minus his expectation divided by the standard deviation, i.e. the square of the variance, is Normally distributed with expectation zero and a unit variance, i.e. the following holds: Z X Np Np1 p N pˆ p p1 p Normal 0,1 with pˆ X . N This last term is known as the Z-statistic and can be used for testing statistical hypotheses as we will see in section one of this report. 1.2 Hypothesis Testing Setting up and testing hypotheses is an essential part of statistical inference. In order to formulate such a test, usually some previous research has been done. This is done either because some hypothesis is believed to be true or because it is to be used as a basis for argument, but it has not been proved. To make this statistical method more clearly, we introduce some terminology. The general situation is then as follows. We have a set of observations x1, …, xN that may be viewed as realizations of identical independent distributed random variables (i.i.d.) X1,…, XN with distribution F p with p . Here is p called a parameter that indicates a specific distribution from a parametric family F p : p . 6 0. Summary Suppose further that 0 and 1 \ 0 .We now wants to test H 0 : p 0 against the alternative hypothesis HA : p 1 , i.e. we want to come to one of the following conclusions: 1. Reject H0 2. Do not reject H0 In each problem considered, the question of interest is mostly simplified into two competing hypotheses; the null hypothesis, denoted H0, and the alternative hypothesis, denoted HA. These two hypotheses are not however treated on an equal basis; special consideration is given to the null hypothesis. We have to know which types of error we can make. First, we can reject H0 while p 0 and hence H0 is correct; this is known as type I error (usually denoted as ). Second, we can accept (not reject) H0 while p 0 and hence H0 is incorrect; this is called the type II error (denoted ). Note further that the complement of the type II error, i.e. reject H0 while p 0 , is called the power of the test. The following table gives a summary of possible results of any hypothesis test: Decision Reject H0 Truth H0 Type I Error Don't reject H0 Right Decision HA Right Decision Type II Error Now we know which types of errors occur, we choose a significance level 0 and construct a test such that the following holds: 1. The type I error must be smaller or equal to 0 . 2. Minimize the type II error. This shows that we design the test to bound the type I error. For the type II error we do not attempt to reach a prescribed value. The test can then be described by a critical (or rejection) region K with K R n . If x= (x1… , xN) is a vector of realizations of X= (X1…., XN) then we have the following: 1. Reject H0 if x K 2. Do not reject H0 if x K By the design of the test the probability in making an error in conclusion one is bounded by 0 . There is no prescribed bound on the probability of making an error in conclusion two. Hence the careful formulation is 1. F p x, x K 0 For each p 0 . 2. For p 0 , minimize F p x K ; or maximize F p x, x K The rejection region K can be derived by solving these constraints. Note that the function p F p x, x K is called the power of the test with a critical region K; and sup p 0 F x, x k is called the size (the significance level) of the test. Example: 7 0. Summary Suppose we have an observation from a binomial (10,p) distribution F p , p 0,1 . Further we have the following hypotheses to test: H 0 : p 0 0,1 / 4 HA : p \ 0 . Note that the hypotheses above can also be reformulated as H 0 : p p0 , p 0 HA : p p1 , p1 1 It is now reasonable to use a critical region of the form [t, 10], for some t=0, 1, 2…..10. We choose the significance level to be equal to 0.05 and derive t by solving the constraints given above. Hence, we have sup 0 p 1 / 4 (1 F p t 1) 1 F1 / 4 t 1 Now, X has a binomial distribution and so: 0.0781......t 5 1 F1 / 4 t 1 Bt 1,1 / 4,10 0.0197.......t 6 Note that because the binomial distribution is discrete, it is not possible to get exactly a significance level that is equal to 0.05 and therefore we choose the critical region as the interval [6, 10]. After we have derived the critical region, the following conclusions can be made: 1. Reject H0 if X 6,10 2. Do not reject H0 if X 0,5 In this example we have seen that for a given set of observations N that is binomial (N, p) distributed, a given significance level and a given Hypotheses we can construct a test that reject the null hypothesis in favour of the alternative hypothesis. This was done by deriving the critical region as we have seen above. Conversely to this, as we will see in section 2 and further in this report, we are more interested in the choice of N such that for a given p 0 and p1 , we have a type I error and type II error . 8 0. Summary 1.3 Bayesian statistics Bayesian statistics is an approach to statistical inference in which observations are used to update prior belief about a probability or population parameter to posterior belief. The name "Bayesian" comes from the fact that Bayes' theorem from probability theory is frequently used in the inference process. Bayes' theorem was first derived by Thomas Bays. Suppose that we have a random variable X that is distributed according to a parametric family X f x . The goal is then, given i.i.d. observations X i , to estimate . For instance, let X i be a success/ failure experiment (i.e. Bernoulli distributed), where X i 1 denotes success and X i 0 denotes failure. Let us define P X i 1 ; our goal is then to estimate . We claim to have a prior distribution over the probability , which represents our prior belief. Suppose this distribution is P and P Betaa, b . The Beta distribution is concentrated on the interval [0, 1] and has the probability density p given by p a 1 1 b 1 b 1 a 1 1 d Now we observe the sequence X i i 1 and suppose that the number of successes is equal to k and n-k is the number of failures. We may calculate the posterior distribution P | X 1 ,...... X n according to Bayes' Rule: n p | X 1 ,...... X n p X 1 ,..... X n | p p X 1 ,..... X n p X 1 ,.... X n | p p X 1 ,..... X n | p d The term p X 1 ,.... X n | is, as before, the likelihood function of and p X 1 ,..... X n follows by integrating out : p X 1 ,..... X n p X 1 ,..... X n | p d To make this more clearly for our example, let us assume that is Beta distributed with parameters a=10 and b=20 then the density function is: 9 1 19 p 19 9 1 d 9 0. Summary Suppose further we observe in the new data X i i 1 , with n=100, a sequence of 50 successes followed by a sequence of 50 failures. The likelihood function becomes n p X 1 ,.... X n | 50 1 50 Plugging in this likelihood and the prior into the Bayes Rule expression, we obtain the posterior distribution as a Beta10 50,20 50 with density function that is equal to 59 1 69 p X 1 ,.... X n 69 59 1 d Note that the posterior and prior distributions have the same form. The Beta distribution is usually chosen for the binomial distribution which gives the likelihood of i.i.d. Bernoulli trials. Further, the outcome of the Bayesian analysis is not a estimated value of but a posterior distribution. This posterior distribution summarizes all information about . As we get more data the posterior distribution will become more sharply peaked about a single value. This of course will allow us to make inference about the value of . This Bayesian approach will be used further in this report (Herson and Thall & Simon) and is an alternative way to test some given hypothesis. In Herson’s design as well as Thall & Simon’s, we used this approach to update prior beliefs about a certain parameter of interest. Further, it is used to derive a Framework for a two-stage design, i.e. derive critical regions to stop the test early. 10 0. Summary 2. One-Armed Trials Much of the statistical methodology used in these trials is derived from oncology. Cancer trials usually are uncontrolled trials, i.e. one-armed; only one experimental treatment is evaluated to decide whether it has sufficient effectiveness to justify further study. These onearmed trials often use a binary outcome ‘response’, i.e. the drug “works” or it does not. In this section, we focus on these one-armed trials. In this setting we have the following hypotheses: H0: p<=p0 where the true response probability p is less than some uninteresting level p0. HA: p>p1 where the true response probability is at least some desirable target level p1. 2.1 One-stage design In a one-stage design a pre-specified number of patients are enrolled and the hypothesis is tested only once, namely at the end of the trial. In order to investigate the efficacy of the new drug, we will derive a formula for the sample size of this single-stage design. This formula depends on the test chosen; there are two different methods: The first is based on Normal approximation. The second is based on the exact binomial probabilities. Both methods use the number of successes X as a starting point to derive the sample size. 2.1.1 Normal approximation This one-stage procedure is based on the statistic p̂ =X/N, where N is the sample size and X has a binomial distribution with parameters (N, p). Additionally, we assume that for sufficiently large N, p̂ approximately follows a normal distribution with parameters (p, p (1p)/N) and Z pˆ p se p With se p1 p N will have approximately a normal distribution, with zero mean and unit variance. The single-stage procedure will then reject H0 when Z>Zα; or equivalently when: X>Np0+Zα √{N p0 (1- p0)}. It is straightforward to show that the sample size for this design with significance level α and power 1-β on HA is approximately equal to N norm ( Z Z ) 2 p(1 p) ( p1 p 0 ) 2 Where p ( p0 p1 ) 2 11 0. Summary Example 1: Suppose we want to investigate a treatment for a certain disease and that 20% would be an interesting response rate. On the other hand, if the response was only 10%, the treatment would not be worth further investigation. For α= 0.05 (one-sided), β= 0.20, H0: p= 0.10, and HA: p= 0.20 we find that the number of patients N equals 79. See SAS program ‘Onestage.sas’ in the Appendix. 2.1.2 Binomial distribution The sample size can also be derived using the exact binomial distribution of X. For specified p0, p1, β and size α we determine the optimal sample size by enumeration, which can be summarized in the following steps: Take the value of N norm obtained in the previous section, and let the sample size ‘N’ range from 3/4 N norm to 5/4 N norm . For each N we determine the minimum of k in (0, N) for which Prob (X>=k) <α under the null hypothesis (p0). Then take the minimum of N for which Prob (X>=k) >1-β (under p1). The required sample size will then be the smallest N and the corresponding critical value k that satisfies the two probability constraints: 1. Type I error = Prob (reject H0|p0) <α. 2. Power = Prob (reject H0|p1) > 1-β where β is the type II error. Example 1(continued): If we now calculate the sample size using the exact binomial probabilities, we see in Table 1 below that N=79 (k=12) is the first N for which the power is more then 80%. However for N=82 (k=13) the power decreases and does not satisfy the required condition, which is a result of the discreteness of binomial distribution. N=89 is the first N for which the power satisfies the condition and does not drop below 80%. Table 1 P1 P0 NNORM N MINK MINPOWER XALPHA 0.2 0.1 79 79 12 0.822 0.049 0.2 0.1 79 82 13 0.785 0.032 0.2 0.1 79 86 13 0.841 0.046 0.2 0.1 79 88 14 0.793 0.028 0.2 0.1 79 89 14 0.807 0.030 0.2 0.1 79 90 14 0.821 0.033 These results were generated using 12 ‘Onestage.sas’ (Appendix). 0. Summary 2.2 Two-stage designs 2.2.1 General introduction As in the previous section, the null hypothesis to be tested is that the true response probability (p) is less than some uninteresting level p0: H0: p<=p0. The alternative hypothesis is that the true response probability is at least some desirable target level p1: HA: p>p0. When H0 can not be rejected (we simply say ‘accepted’), the drug will not be considered for further study. In order to reduce the number of patients, in case of early evidence of (in) efficacy, an interim analysis is performed. Three different designs are reviewed: two frequentist designs and one Bayesian design, and common features of these designs are given below. Throughout, let n1, n2 be the number of patients in the first and second stages respectively, and x1, x2 the number of responses. After the first stage, when n1 patients are treated we will: ‘Accept’ H0 if x1 a1 Reject H0 if x1 r1 continue if a1< x1< r1 In the final analysis, when n1+n2 patients are treated we will: ‘Accept’ H0 if x1+x2 a2 Reject H0 if x1+x2 r2 with r2= a2+1 Note that in the final analysis a decision must be taken, i.e. there is no grey area where the number of responses could lay at the end of the trial. The acceptance and rejection probabilities after the first stage are: Prob (accept H0|p) = P (x1 a1) = B (a 1 , p, n 1 ) (1) Prob (reject H0|p) = P (x1 r1) = 1- B (r 1 -1, p, n 1 ) (2) Hence, in these designs, the probability of early termination (PET) after the first stage is: PET (p) = P (x1 a1) + P (x1 r1) = B (a 1 , p, n 1 ) + (1- B (r 1 -1, p, n 1 )) (3) Furthermore, the expected sample size or the average sample number (ASN) is: ASN (p) = n1 + (1-PET (p)) n2 (4) The overall (i.e. the first and the second stage combined) acceptance and rejection probabilities are: Prob (accept H0|p) = P (x1 a1) + P (x1+x2 a2; a1< x1< r1) r1 1 = B (a 1 , p, n 1 ) + b( k , p, n ) B ( a 1 k a1 1 2 k , p, n 2 ) (5) Prob (reject H0|p) = P (x1 r1) + P (x1+x2 r2; a1< x1< r1) r1 1 = (1- B (r 1 -1, p, n 1 )) + b(k , p, n )(1 B(r k a1 1 13 1 2 k 1, p, n2 )) (6) 0. Summary 2.2.2 Frequentist designs Fleming (1982) In 1973 Schultz et al defined a general multiple testing procedure, which of course includes a two-stage testing, and derived a general formulas for the average sample size and the probability of rejecting H0. The two-stage design that is proposed by Fleming has the objective to continue the work of Schultz et al by giving a method to derive the appropriate acceptance and rejections points. This method can be described by the following steps: i. Specify p0, p1 and α, and derive the sample size Nnorm (see chapter 1) ii. Then we let N range from 3/4 N orm to 5/4 N orm iii. Choose equal sample size at each size, i.e. n1 ≈ n2 (as was done in table 2 in the corresponding article). iv. For each value of N, derive the average sample number and the probability of rejecting H0 (see section 2.2.1 for the exact formulas). The next step is to determine the appropriate acceptance and rejection points (a1, r1, a2, r2). Armitage, McPherson and Rowe (1969) have shown that the true significance level of a twostage testing procedure can be considerably greater than the nominal significance level if one employs the single-stage test procedure at each test g=1, 2. This would be the case if the test reject, H0 whenever Yg (p)>Zα where Yg p g g i 1 i 1 x i ni p g=1, 2 1/ 2 ni p1 p i 1 g Here p=(p0+p1)/2. To correct for the fact that xi has a discrete binomial distribution; we would set the rejection points by * 1/ 2 g g rg ni p Z ni p 0 1 p 0 1 i 1 i 1 Where [ ]* is the Round function (return the nearest integer). Now, to obtain a two-stage procedure that has the same nominal size at each stage, O’Brien and Fleming (1979) proposed the following procedure: At test g, one would reject H0 whenever n1 ... n g / N The rejection of whenever n1 ... n g / N 1/ 2 HA at Yg ~ p A Z Where ~ p A is 14 test 1/ 2 Yg p Z . g<k would occur 0. Summary Np0 ~ pA 1/ 2 1 p 0 1/ 2 N Z Z 2 2 The corresponding acceptance and rejection points are then given in the following manner: * 1/ 2 g g rg ni p 0 Z Np0 1 p 0 1 i 1 i 1 1/ 2 g g ~ ~ ~ a g ni p A Z Np A 1 p A i 1 i 1 g=1, 2, * g=1 Examples: Table 2 gives the two-stage designs for different hypotheses (H0: p<=p0 vs. HA: p>p0) with α=0.05, β=0.20 for examples 2, 3 and 5, β=0.10 for 1, 4 and 6 and a sample size N =Nnorm (the one-stage sample size) .These results can also be found in Fleming (1982, Table 2) and are also referred to in Simon’s article (1989, Table 3). Table 2: N P0 P1 N1 N2 N3 A1 A2 R1 R2 ASN0 ASN1 ALPHA POWER 1 40 0.05 0.2 20 20 0 0 4 4 5 32.5 28.0 0.052 0.922 2 25 0.10 0.3 15 10 0 1 5 5 6 19.4 19.8 0.036 0.808 3 35 0.20 0.4 20 15 0 4 11 9 12 25.4 28.2 0.037 0.801 4 50 0.20 0.4 25 25 0 4 15 11 16 39.3 39.4 0.032 0.904 5 45 0.30 0.5 25 20 0 8 19 14 20 31.3 37.0 0.029 0.807 6 50 0.30 0.5 25 25 0 7 20 14 21 37.1 40.8 0.048 0.894 These results were generated using Fleming.sas (Appendix). Inspection of this table reveals that the use of the proposed two-stage design yields a considerable reduction in ASN the average sample size under H0 as well as under HA. 15 0. Summary Simon (1989) The two-stage design developed by Simon (1989) is slightly different from the general idea we presented in the section 2.2.1.In this design we will stop early, only because of lack of effect and not because of overwhelming effect. Hence, after the first stage we can only: ‘Accept’ H0, whereas early rejection of H0 is not possible in this design. Simon searches for the optimal design by choosing the two-stage design that satisfies the probability constraints and either minimizes the expected sample size ASN, or minimizes the maximum number of patients. This is done by enumeration using the exact binomial probabilities Specify p0, p1, α, β For each value of the total sample size N and each value of n 1 in the range (1, N-1) determine the values a 1 , a2 that satisfy the two constrains and minimize the ASN. These values are found by searching over the range of a 1 (0, n 1 ); for each a 1 , we determine the maximum value of a2 that satisfy the type II error constraints. Then examine the parameters a 1 , a2, n 1 and N to see if they satisfy the type I error constraint. If they do, then we compared the ASN until the minimum of N is achieved. For practical purposes, it is sufficient to let N range from N norm (the formula is given in the first chapter) to 5/4 N norm . This enumeration procedure searches upwards from this minimum value of N until the optimal design is found. Example: Suppose we want to derive a two-stage design for testing H0: p=0.05 vs. HA: p=0.25 with α=0.10 and β=0.10. Then using Simon’s method, we get the following results (the ‘best 5’ designs, i.e. with the lowest ASN (p0)) are given in the table below: Table 3: N N1 R R1 PET0 EXPN0 EXPN1 ALPHA1 POWER1 24 9 2 0 0.630 14.5 22.9 0.093 0.903 22 10 2 0 0.599 14.8 21.3 0.083 0.905 23 10 2 0 0.599 15.2 22.3 0.091 0.913 21 11 2 0 0.569 15.3 20.6 0.078 0.905 24 10 2 0 0.599 15.6 23.2 0.098 0.920 These results were generated using Simon.sas (Appendix). According to the results above, we see that the optimal two-stage design for testing H0: p=0.05 vs. HA: p=0.25 consists of a first stage of 9 patients, possibly followed by a second stage of 15 patients. The study will be stopped after the first stage (and, hence, H0 ‘accepted’) if there are no responses in the first 9 patients. Note that the first line of Table 3 is the design given in Table 1 of Simon’s article (1989). 16 0. Summary The ‘Minimax’ design: The optimal two-stage design derived above does not necessarily minimize the maximum sample size. Consider the case given in the previous example; the optimal design given above has a maximum sample size of 24 patients. However, there is also a two-stage design in Table 1 that has a smaller maximum size of 22 patients and a slightly larger expected size then the optimal design. Therefore, Simon also proposes another criterion based on which a design may be chosen: the ‘minimax’ design that has the smallest maximum sample size. For the example discussed in the previous section, the ‘best 5’ designs (i.e. with the lowest N) are given in the table below. Table 4: N N1 R R1 PET0 EXPN0 EXPN1 ALPHA1 POWER1 20 13 2 0 0.513 16.4 19.8 0.074 0.903 21 11 2 0 0.569 15.3 20.6 0.078 0.905 21 12 2 0 0.540 16.1 20.7 0.080 0.913 21 13 2 0 0.513 16.9 20.8 0.082 0.918 21 14 2 0 0.488 17.6 20.9 0.083 0.921 These results were generated using Simon.sas (Appendix). Note here also that the first line is the ‘minimax’ design given in Table 1 of Simon (1989). If we now compare the optimal model with that of the ‘minimax’ (see Table 2 and 3), we see the difference in the expected sample size is very small. In some cases the ‘minimax’ will be more attractive then the optimal design. This will be the case when the difference in expected size is small and when the patient’s accrual rate is low. In this case it will take more time to complete the optimal design; sometimes that is more important than the reduction in expected sample size. 17 0. Summary 2.2.3 Bayesian design Herson (1979) The designs of Simon (1989) and Fleming (1982) discussed in section 2.2.2 were frequentist designs. All characteristics of the designs such as the rejection probability and the average sample size, were derived assuming a fixed value for the response probability p. Obvious choices were p=p0 (the response level under the null hypothesis) and p>p1 (the response level under the alternative). An alternative design is the so-called Bayesian design. In this design the response probability p is no longer fixed but assumed to follow a certain probability distribution (the prior). For binomial outcomes, the Beta distribution is usually chosen which includes the Uniform distribution as a special case. Herson (1979) proposed a Bayesian approach of a two-stage design. Unfortunately, his hypotheses are the reverse of those of Simon (1989) and Fleming (1982), i.e.: H 0 : 0 , 0 is an interesting level. HA : 0 , i.e. the response level is uninteresting. The general idea for such a design can be summarized by the following steps (we will keep to Herson’s notation): First, for a given α, β, 0 and 1 determine the sample size N and the rejection region C using the exact binomial probabilities (see the first section). Then at the interim analysis, compute the predictive probability (PP) to reject H0 at the end of the study given the interim results Reject H0 (early) if the PP is larger than a certain threshold P0. 1. Predictive probability A suitable sample size for N can be based on a one-stage design and on the test that rejects H0 in favor of HA whenever ≤ C responses are observed. Then at any point during the study, say when n1<N patients are observed with x1≤ C responses, it is appropriate to calculate the probability that the number of responses in N patients x1+x2 ≤ C. This probability is called the predictive probability (PP). A high PP would indicate that it is very likely that H0 will be rejected at the end of the trial and, hence, early termination might be advisable and a low PP value would give a little justification of early termination. Let f ( ) be the prior probability density function (PDF), then P[X responses in N patients| x1 responses in first n1, f ( ) ] = P [(X, N)|(x1, n1), f ( ) ] 1 Px , n | P X , N | x , n , f d 1 1 1 1 0 (1) 1 Px , n | f d 1 1 0 The conditional probabilities are computed with reference to the binomial distribution. For f ( ) we take the Beta distribution with parameters r and s, that is: f ( ) L r 1 (1 ) s 1 , L is a normalizing constant. 18 0. Summary However, Herson uses a different parameterization with a=r-1 and b-a=s-1. Then the density function is equal to f ( ) L a (1 ) b a . It can now be shown that the formula in (1) is equal to: P [(X, N)|(x1, n1), (a, b)] = Where K x, y n1 b 1 K n1 b, x1 a .K N n1 , X x1 N b 1 K N b, X a x! and y!( x y )! C P[X< C in N patients|(x1, n1), (a, b)] = P [(X, N) |(x1, n1), (a, b)] (2) X x1 The predictive probability (PP) in (2) will form the basis of early termination rules to be developed. 2. Quantification of prior beliefs Quantification of beliefs about entails specification of the parameters a, b, in the Beta distribution. Herson (1979) presented the following approach. The investigator specifies his prior beliefs of the mean of the prior distribution for and an expression of the degree of confidence in via the coefficient of variation C.V ( ). The last is equal to the variation divided by the average. A low value of C.V. would indicate that the investigator has a high confidence that = , whereas a high C.V. indicates low confidence. For the Beta distribution we find the following equations: = (a+1)/ (b+2), V2 = (b-a+1)/ (a+1) (b+3), a= (b+2) -1 and b= [1- (1 3V 2 ) / V 2 ]. With C.V. ( ) = V.100%. 3. Selecting an early termination plan The process of selecting a PP early termination plan may be described by the following steps: Derive N and select n1 and n2. Then for a specified and C.V ( ), derive the parameters a, b of the Beta distribution. Compute the PP For a given P0 derive a1 by searching over the range of x1 (0, C); for each x1 we determine the PP value given in (2) and we take a1 equal to the maximum value of x1 that satisfies PP>P0. If the PP is higher than P0, the study is terminated and the drug is abandoned, otherwise the trial is continued. In this case, after the first stage when n1 patients are treated we will: 1. reject H0 when x1<= a1 In the final analysis we will: 2. reject H0 when x1+x2 <=C Note here that the acceptance and rejection probabilities and the expected sample size can be computed using the formulas given in section 2.2.1. 19 0. Summary Examples: The following tables give the same examples as in Table 1 (the same response probabilities 1 , 0 and the same α and β), Herson’s Bayesian two-stage design based on the reverse hypotheses and a Beta distribution for . In Table 1 is Uniform distributed; table 2 and 3 is Beta distributed with mean 1 and 0 respectively. Table 5 (Uniform distribution and PP0=0.85): 0 1 N 0.3 0.10 0.4 C alpha1 POWER1 N1 C1 PP alpha2 POWER2 ASN0 ASN1 25 3 0.033 0.764 15 1 0.858 0.054 0.790 24.6 19.5 0.20 35 8 0.026 0.745 20 3 0.913 0.035 0.760 34.8 28.8 0.2 0.05 40 3 0.028 0.862 20 0 0.952 0.035 0.868 39.8 32.8 0.4 0.20 50 13 0.028 0.889 25 4 0.952 0.033 0.894 49.8 39.5 0.5 0.30 45 16 0.036 0.836 25 7 0.903 0.047 0.847 44.6 34.8 0.5 0.30 50 18 0.032 0.859 25 7 0.898 0.045 0.870 49.5 37.2 Table 6 (Beta distribution with 0 1 N 0.3 0.10 0.4 C = 1 , C.V = 50% and PP0=0.85): alpha1 POWER1 N1 C1 PP alpha2 POWER2 ASN0 ASN1 25 3 0.033 0.764 15 1 0.930 0.054 0.790 24.6 19.5 0.20 35 8 0.026 0.745 20 3 0.941 0.035 0.760 34.8 28.8 0.2 0.05 40 3 0.028 0.862 20 1 0.908 0.081 0.896 38.6 25.3 0.4 0.20 50 13 0.028 0.889 25 5 0.908 0.047 0.903 49.3 34.6 0.5 0.30 45 16 0.036 0.836 25 7 0.925 0.047 0.847 44.6 34.8 0.5 0.30 50 18 0.032 0.859 25 7 0.923 0.045 0.870 49.5 37.2 Table 7 (Beta distribution with 0 1 N 0.3 0.10 0.4 C = 0 , C.V = 50% and PP0=0.85): alpha1 POWER1 N1 C1 PP alpha2 POWER2 ASN0 ASN1 25 3 0.033 0.764 15 0 0.962 0.035 0.766 25.0 22.9 0.20 35 8 0.026 0.745 20 3 0.893 0.035 0.760 34.8 28.8 0.2 0.05 40 3 0.028 0.862 20 0 0.873 0.035 0.868 39.8 32.8 0.4 0.20 50 13 0.028 0.889 25 4 0.940 0.033 0.894 49.8 39.5 0.5 0.30 45 16 0.036 0.836 25 7 0.895 0.047 0.847 44.6 34.8 0.5 0.30 50 18 0.032 0.859 25 7 0.889 0.045 0.870 49.5 37.2 These results were generated using Herson2.sas (Appendix). Note that the type I error in the two-stage design (alpha2) is consistently higher than in the corresponding one-stage design (alpha1). This is a result of having an interim analysis, which leads to an extra possibility to reject H0 and, hence, a larger alpha. One can observe that under the three different priors the expected sample size ASN is smaller than the one-stage sample size, especially under 1 . It is of interest to compare the expected sample size under 1 for the different distributions (priors). Take, for example, the design ( 0 =0.30 and 1 =0.10); we see that this design has the same expected sample size 20 0. Summary (ASN=19.5) under the Uniform distribution and the Beta distribution with = 1 and a larger ASN (=22.9) under the Beta distribution with = 0 . This illustrates an important feature of the Bayesian approach. When the prior distribution is located near = 1 (treatment not promising) the ASN will be smaller than when the prior distribution is located near = 0 (treatment is promising). 2.3 Three-stage designs 2.3.1 General introduction Like in section 2.2, the null hypothesis to be tested is that the true response probability (p) is less than some uninteresting level p0: H0: p<=p0. The alternative hypothesis is that the true response probability is at least some desirable target level p1: HA: p>p0. When H0 can not be rejected (we simply say ‘accepted’), the drug will not be considered for further study. Three different designs are presented; the common features of these designs are given below. Throughout, let n1, n2 and n3 represent the number of patients in the first, second and third stages respectively, and x1, x2 and x3 the number of responses. Note that the acceptance and rejection probabilities for the first and second stages are the same as in section 2.2.1. For the final analysis (after the third stage) when n1+n2+n3 are treated we will: ‘Accept’ H0 if x1+x2+x3 a3 ‘Reject’ H0 if x1+x2+x3 r3 with r3= a3+1 The overall (i.e. the first, the second and third stages combined) acceptance and rejection probabilities are: Prob (accept H0|p) = P (x1 a1) + P (x1+x2 a2; a1< x1< r1) + P (x1+x2+x3 a3; a2< x1+x2 < r2; a1< x1< r1) r1 1 = B (a 1 , p, n 1 ) + r1 1 = b( k , p, n ) B ( a 1 k a1 1 2 r2 k 1 bl , p, n b(k , p, n ) B(a k a1 1 l a2 k 1 2 1 2 k , p, n 2 ) k , p, n 2 ) Prob (reject H0|p) = P (x1 r1) + P (x1+x2 r2; a1< x1< r1) r1 1 = (1- B (r 1 -1, p, n 1 )) + r1 1 = b(k , p, n )(1 B(r 1 k a1 1 2 r2 k 1 bl , p, n b(k , p, n ){1 B(a k a1 1 l a2 k 1 2 1 2.3.2 Frequentist designs 21 2 k 1, p, n2 )) k , p, n2 )} 0. Summary Fleming (1982) Fleming’s method (1982), described in section 2.2 for a two-stage design, can also be applied to three-stage designs. The only difference is that two interim analyses, instead of one, are evaluated (i.e. g=1, 2, 3). The formulas for the acceptance and the rejection points were already given in the section 2.2 for a two-stage design and can be applied in exactly the same way for the three-stage design. Examples: The following table gives the three-stage design for different hypotheses (H0: p<=p0 vs. HA: p>p0) with α=0.05, β=0.20 for examples 1-4 and β=0.10 for 5, 6 and a sample size N =Nnorm (the one-stage sample size) .These results are given in Fleming’s (1982) Table 2. Table 8: N P0 P1 N1 N2 N3 A1 A2 A3 R1 R2 R3 ASN0 ASN1 ALPHA POWER 40 0.05 0.2 15 15 10 -1 2 4 4 5 5 31.6 26.8 0.046 0.913 25 0.10 0.3 10 10 5 0 3 5 4 5 6 16.8 16.8 0.053 0.820 35 0.20 0.4 15 10 10 2 6 11 8 10 12 22.9 26.0 0.039 0.801 50 0.20 0.4 20 15 15 2 9 15 10 13 16 33.8 34.9 0.033 0.892 45 0.30 0.5 15 15 15 3 11 19 11 15 20 27.6 33.7 0.033 0.802 50 0.30 0.5 20 15 15 5 12 20 12 17 21 31.6 35.5 0.049 0.887 Results generated by Fleming.sas (Appendix) Inspection of this table reveals that the use of the proposed three-stage design also yields a considerable reduction in the average sample size ASN under H0 as well as under HA. Ensign et al (1994) Ensign et al (1994) extended Simon’s two-stage design to a three-stage design, where stopping after the first interim (‘Accept’ H0) is only possible when there are “zero” responses. The hypotheses are the same as in Simon (1989) and the probabilities of accepting H0 after each stage and the expected sample size are given in the general introduction. Optimal three-stage design The optimal three-stage design can be derived by the following steps: Specify p0, p1, α, β For each value of n1 that satisfies Pr (accept H0|p1) <β, determine the values a2, a3, n2 and n3 that satisfy the two constraints and minimize the ASN. For each value of the total sample size N and each value of n2 in the range (1, N-1) determine the values a2, a3 that satisfy the two constraints and minimize the ASN. These values are found by searching over the range of a2 (0, n2); for each a2, determine the maximum value of a3 that satisfies the type II error (we use the value of {β- Pr (accept H0|p1)} as the type II error rate of this optimalization). Then examine whether the parameters a3, a2, n2 and n3 satisfy the type I error constraint. If they do, compare the ASN sample until the minimum or N is achieved. Note here that the range of N is chosen in exactly the same way as in Simon’s two-stage design. 22 0. Summary 2.3.3 Bayesian design Herson (1979) The method by Herson (1979) described in section 2.2 for two-stage design, can also be applied to three-stage designs. The only difference is that two interim analyses, instead of one, are evaluated. The hypotheses are the same as the ones used in section 2.2.3 and the response probability is also generated by the Beta distribution. H 0 : 0 , where 0 is an interesting level. HA : 0 , i.e. the response level is uninteresting. In this case, after the first stage when n1 patients are treated we will: reject H0 when x1<=a1 After the second stage when n1+n2 patients are treated we will: reject H0 when if x1+x2 <= a2 In the final analysis we will: reject H0 when X=x1+x2+x3 <= C 1. Predictive probability The predictive probability after the first interim analysis will be computed by the formula given in section (2.2.3). After the first interim analysis, when early termination does not occur and when n1+n2 patients are observed with x1+x2< r2 responses, the PP computation will also be given by a generalization of Laplace’s rule of succession. The only difference is that f ( ) (the posterior) probability density function (PDF) is updated; i.e. the parameters of the Beta distribution are then (a+ x1; b+ n1- x1). P[X responses in N patients| x1+x2 responses in first n1+n2, f ( ) ] = P [(X, N)|(x1, n1), (x2, n2), (a+ x1; b+ n1- x1)] = n1 n2 b n1 x1 1 K n1 n2 b n1 x1 , x1 x2 a x1 N b n1 x1 1 K N b n1 x1 , X a x1 C P[R<= C in N patients|(R1,N1),(a,b)]= P[(X,N)|(x2,n2),(a+x1,b+n1-x1)] (2) R R1 The PP value in (2) will be the basis of early termination rules to be developed. 2. Selecting an early termination plan The process of selecting a PP early termination plan, described in section 2.2 for the twostage design, and computation of rejection points can also be applied in exactly the same way for three-stage designs. . Examples 23 0. Summary The following tables give the same examples as in Table 1 (the same response probabilities 1 , 0 and the same α and β), Herson’s Bayesian three-stage design based on the reverse hypotheses and three different (prior) Beta distributions for . Table 9 (Uniform distribution and PP0=0.85): 0 1 N 0.3 0.10 0.4 C N1 A1 PP1 N2 A2 PP2 alpha2 POWER2 ASN0 ASN1 25 3 10 0 0.909 10 1 0.986 0.054 0.784 24.6 19.1 0.20 35 8 15 2 0.878 10 4 0.954 0.047 0.773 34.4 25.9 0.2 0.05 40 3 15 0 0.875 15 1 0.961 0.057 0.880 39.1 26.7 0.4 0.20 50 13 20 3 0.928 15 7 0.947 0.041 0.899 49.4 34.2 0.5 0.30 45 16 15 3 0.941 15 9 0.898 0.055 0.852 44.2 31.3 0.5 0.30 50 18 20 5 0.915 15 11 0.887 0.052 0.876 49.2 33.5 Table 10 (Beta distribution with 0 1 N 0.3 0.10 0.4 C = 1 , C.V = 50% and PP0=0.85): N1 A1 PP1 N2 A2 PP2 alpha 2 POWER2 ASN0 ASN1 25 3 10 0 0.955 10 2 0.911 0.064 0.802 24.5 18.0 0.20 35 8 15 2 0.918 10 5 0.856 0.055 0.789 34.3 24.5 0.2 0.05 40 3 15 0 0.965 15 2 0.892 0.073 0.896 38.8 24.8 0.4 0.20 50 13 20 4 0.876 15 8 0.882 0.073 0.915 48.3 28.6 0.5 0.30 45 16 15 4 0.866 15 9 0.919 0.086 0.868 43.1 27.1 0.5 0.30 50 18 20 5 0.938 15 11 0.909 0.052 0.876 49.2 33.5 Table 11 (Beta distribution with 0 1 N 0.3 0.10 0.4 C = 0 , C.V = 50% and PP0=0.85): N1 A1 PP1 N2 A2 PP2 alpha2 POWER2 ASN0 ASN1 25 3 10 -1 1.000 10 1 0.975 0.034 0.766 25.0 23.0 0.20 35 8 15 1 0.961 10 4 0.944 0.031 0.755 34.8 29.0 0.2 0.05 40 3 15 -1 1.000 15 1 0.930 0.032 0.866 39.9 34.5 0.4 0.20 50 13 20 3 0.908 15 7 0.941 0.041 0.899 49.4 34.2 0.5 0.30 45 16 15 3 0.930 15 9 0.892 0.055 0.852 44.2 31.3 0.5 0.30 50 18 20 5 0.905 15 11 0.882 0.052 0.876 49.2 33.5 These results are generated using Herson3.sas (Appendix). Note that the type I error in the three-stage design (alpha2) is consistently higher than in the corresponding one-stage design (alpha1) and also higher than the two-stage design. This is a result of having two interim analyses, which create an extra possibility to reject H0 and, hence, a larger alpha. One can observe that under the three different priors the expected sample size ASN is smaller than the one-stage sample size, especially under 1 . It is of interest to compare the expected sample size under 1 for the different distributions (priors). Take, for example, the design ( 0 =0.30 and 1 =0.10), we see that there is a small difference in expected sample size under the Uniform distribution and the Beta distribution with = 1 (19.1 and 18 respectively) and a larger ASN (23) under the Beta distribution with = 0 . This illustrates an 24 0. Summary important feature of the Bayesian approach. When the prior distribution is located near = 1 (treatment not promising) the ASN will be smaller than when the prior distribution is located near = 0 (treatment is promising). 2.4 Multi-stage designs 2.4.1. Bayesian Design To implement the designs of Simon (1989), Fleming (1982) and Herson (1979), the clinician has to specify a single value of the patient’s response rate to the standard therapy S. In most cases, however, the clinician expresses uncertainty in such a situation. For example, if he can only give a range where this value is located, it is not possible to use one of the designs described in the previous section. From this point of view a Bayesian approach may be an alternative. Thall and Simon (1994) presented a Bayesian approach to the design and analysis of Phase II trials. In this design the response of patients is again binary, but the data is monitored continuously. Thall and Simon (1994) The design approach here is based on clinical beliefs regarding the efficacy of a standard therapy S, which is translated into a prior probability distribution for s . The aim of this design is to provide guidelines for deciding whether a new therapy E is promising relative to S. The design requires the following: Informative prior on s and a flat prior on E A targeted improvement of E over S Bounds Nmin and Nmax on the allowable sample escalation Decision criteria The sequence of patients’ responses to E will be denoted by Y1, Y2... with Yi=0 or 1, depending on wether the treatment is a failure or a success. The total number of responses after n patients will then be denoted by Xn= Y1+ Y2……+ Yn. This addresses settings in which the data are monitored continuously, i.e. observe the sequence Xn until a predetermined limit point nmax. The clinician wishes to declare E promising, not promising, or to terminate the trial at any time based on the most recent data. We assume that an informative prior S of s may be elected by the clinician, but also require that the prior E of E is only slightly informative. We model E and S as beta distributions Beta (a, b), and we denote the probability density function (pdf) and cumulative distribution function (cdf) as f (. ; a, b) and F (. ; a, b) respectively. The decision for stopping/continuing the trial is based on the posterior probability that the response rate on the experimental treatment is 0 0 higher than on the standard, i.e.: 25 0. Summary x, n; s , E , 0 Pr E S 0 X n x Pr s 0 E X n x 1 0 1 F p 0 ; a E x, b E n x f ( p; a s , b s )dp, 0 for n=1, 2, ……, nmax, using the posterior density of E given (n, Xn). Now let pu and pL denote predetermined probabilities, with pL a small value, such as 0.010.05, and pu a large value, such as 0.95-0.99. Then the upper and lower decision cut-offs of the trial are: Un= the smallest integer x such that x, n; s , E ,0 Pr E S 0 X n x pu Ln= the largest integer x< Un such that x, n; s , E , 0 Pr E S 0 X n x p L The lower cut-off criterion Ln can be seen as: if the probability that a treatment E provides an improvement of at least 0 is very ‘small’, then stop the trial and consider E not promising. On the other hand, the upper cut-off states that: if the probability that a treatment E provides an improvement over S is ‘high’ then stop the trial and declare E promising. The decision rule at stage n is as follows: If X n U n , stop and declare E promising. If X n Ln , stop and declare E not promising If Ln X n U n , and n< nmax, then continue. The Priors For eliciting a prior on s , Thall & Simon (1994) describe Beta (a, b) in terms of its mean and the width of 90% probability interval (W 90). So, the clinician is asked to specify the mean of s and a value of W 90. With these given values one can find the appropriate parameters of S . Since S and E play fundamental roles in determining the decision boundaries, it is essential to formulate E such that it reflects only a slightly informative prior. For this purpose, Thall & Simon (1994) use the concentration parameter cE = aE+bE of E . They require that cE range from 2 to 10, cE =2 corresponding to a uniform distribution and cE =10 with information obtained from a small pilot study. Given a targeted improvement 0 for E over S, Thall & Simon set the mean of E equal to µs+ 0 /2. One can now easily see that the prior of E is determined by 0 , cE and µs and hence, the parameters are equal to a E c E s 0 / 2 bE c E 1 s 0 / 2 Frequentist Operating Characteristics We next evaluate the design’s behavior under different circumstances and prior distributions. While the design’s decision boundaries are obtained using a Bayesian framework, which regards the success probability of E as a random quantity in order to reflect some uncertainty, we evaluate these design properties under a fixed value of this success probability, which we denote by pE. 26 0. Summary So, for every set of parameters { 0 , cE, µs and W90}, the decision boundaries and pE, We derive the following operating characteristics: P+= Pr[E is declared promising] P-= Pr[E is not promising] 25th, 50th, and 75th percentiles of the achieved sample size N These characteristics and the probability distribution of N are computed analytically given the recursion in the Appendix of Thall and Simon (1994). Examples: To illustrate this model and the manner in which the design properties vary for different priors, we will give some examples. For all examples in the two tables below, we have 0 =0.15 and µs=0.20. The only difference between the two is that the design properties are evaluated under different values of the success probability PE. Note that these results can also be found in Thall & Simon (1994, Table 1) Table 11 PE P50 P75 PPLUS PMIN PINCONCL 0.20 0.20 10 14 32 0.072 0.835 0.093 2 0.30 0.20 11 21 65 0.030 0.714 0.256 10 0.20 0.20 12 20 45 0.047 0.813 0.140 cE W90 2 P25 Table 12 cE W90 PE 2 0.20 2 10 P25 P50 P75 PPLUS PMIN PINCONCL 0.35 10 16 37 0.706 0.165 0.129 0.30 0.35 12 35 65 0.502 0.098 0.399 0.20 0.35 12 26 53 0.691 0.109 0.200 These results were generated using Thall.sas (Appendix) If we look at table 11 we can observe the following results (The same results can also be observed from the examples given in table 12): If we increase the width of the 90% probability interval of the standard treatment from 0.20 to 0.30 (see rows 1 and 2), which means from informative to less informative, then the probability to declare E promising (pplus), which means ‘accept that the response rate is 0.20’, decreases from 0.072 to 0.030. The same happens to the probability to declare E unpromising (pmin), which decreases from 0.835 to 0.714. Furthermore, we observe that PINCONCL, which is the probability the trial will be inconclusive, increases from 0.093 to 0.256, and that the distribution of N takes substantially larger values. If we increase the concentration parameter cE from 2 to 10 (see row 1 and 3), then the probability to declare E promising (pplus) decreases from 0.072 to 0.074. The same happens with the probability to declare E unpromising (pmin) which decreases from 0.907 to 0.860. Further, we also observe that PINCONCL increases from 0.129 to 0.200. And we observe here also that the distribution of N takes substantially larger values. If we now compare the results of table 11 with those given in table 12, we can observe that the designs in table 12 provide more acceptable design properties than those in Table 11. This a direct result of the high value of the success rate of the experimental treatment pE. 27 3. A Case Study 3. A case study: investigating a dose of Hirulog 3.1. Case description Deep venous thrombosis (DVT) is a condition where there is a blood clot in a deep vein (a vein that accompanies an artery). For years the standard has been an anticoagulant medication called Heparin which was given through the vein. This results in relatively immediate anticoagulation and treatment of the clot. Along with heparin an oral medication called warfarin is given. The main side effect of heparin and warfarin is bleeding. Some time ago a new treatment was introduced for the prevention of DVT: Hirulog. To explore the potential of Hirulog in the prevention of DVT, a phase II dose ranging study was preformed in patients undergoing major knee or hip surgery (Ginsberg et al, 1994). The study objective was to identify a Hirulog dose associated with: an overall DTV rate 15% a bleeding rate < 5% These values represent the approximate rates of the standard treatment heparin. Five dosage regimens were investigated in a sequential fashion, where each dose was evaluated independently. For each dose it was planned to monitor the rates of bleeding and thrombosis in every 10 patients up to a maximum of 50. Hence, this study may be considered as a sequence of five one-armed trials. In the remainder of this chapter we shall evaluate various designs for the investigation of one dose. 3.2. Possible designs 3.2.1 Ginsberg et al In the original design of Ginsberg et al. (1994), a dose regimen would be stopped if: The lower 95% confidence limit for bleeding was greater than 5% and/or The lower 95% confidence limit for DVT was greater than 15%. Although not specified in the article, the corresponding hypotheses are: For bleeding H 0 : p 0.05 vs. H 0 : p 0.05 with a one-sided alpha=0.05 For DVT we have H 0 : p 0.15 vs. H 0 : p 0.15 Observe that in this case study the event (bleeding, DVT) is negative, while in the previous chapters the event was positive (response). Both hypotheses are of the form H 0 : p p0 , where p0 is an ‘acceptable’ bleeding or DVT rate HA : p p0 , i.e. the bleeding or DVT rate is unacceptable The consequence of this hypothesis formulation is that if we reject H0 we know that the drug is harmful (causing too much bleeding and/or too many DVTs). On the other hand, if H0 can not be rejected then it is tempting to claim that the drug is promising. However, the only ‘real’ conclusion is that we have not (yet) been able to demonstrate that the drug is harmful. This issue will be revisited in Section 3.3. 28 3. A Case Study Note that lower 95% confidence limit corresponds to the lower limit of a two-sided 90% confidence interval. Using the Clopper-Pearson method ( Brown et al. 2001) to construct (exact) 90% confidence intervals, the following rejection points are obtained (for testing 0.05 vs. 0.10) for n=10, 20, 30, 40 and 50 : 3, 4, 5, 5, and 6 respectively. That is: a dose is rejected at an interim analysis as soon as the number of bleedings is greater than or equal to 3/10 (30%), 4/20 (20%), 5/30 (17%), 5/40 (13%) or 6/50 (12%). Since we can only ‘accept’ H0 at the end of the trial, we set the acceptance points A1 to A4 equal to -1 and A5 equal to R5-1. The properties of Ginsberg’s design (Type I error, power at an alternative of 10% and expected sample size) are given in the table below: Table13. Bleeding P0 P1 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.05 0.1 -1 -1 -1 -1 5 3 4 5 5 6 0.067 0.456 48.8 41.7 0.05 0.2 -1 -1 -1 -1 5 3 4 5 5 6 0.067 0.963 48.8 23.6 Results generated using Flemingfive.sas (Appendix). Similarly for DVT (0.15 vs.0.20), a dose will be rejected at one of the five interim analyses if the number of ‘events’ is greater than 4/10 (40%), 7/20 (35%), 9/30 (30%), 11/40 (28%) or 13/50 (26%). Again, we set the acceptance points equal to -1. The design properties are: Table14. DVT P0 P1 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.15 0.20 -1 -1 -1 -1 12 4 7 9 11 13 0.090 0.296 47.4 42.6 0.15 0.35 -1 -1 -1 -1 12 4 7 9 11 13 0.090 0.953 47.4 21.3 Results generated by Flemingfive.sas (see Appendix). One can observe the following from the tables above: The overall alpha is 0.067 for bleeding and 0.09 for DVT. Hence, the probability to reject a dose that is associated with 5% bleeding is 6.7% and the probability to reject a dose that is associated with 15% DVT is 9%, which in both cases is greater than the usual 5%. The power for testing 0.05 vs. 0.10 (bleeding rate) is 45.6% and for testing 0.15 vs.0.20 (DVT) is 29.6%. These two values are both lower than the usual 80% which means that that this design has low power to detect small (put possibly clinically relevant) differences. If we look at the cases where 0.05 vs. 0.20 (bleeding) and 0.15 vs. 0.35 (DVT) are tested, we see that in both cases the power increases (96.3% for bleeding and 95.3 for DVT). The expected sample size under HA for bleeding (0.05 vs. 0.10) and DVT (0.15 vs. 0.20) is respectively 42 and 43, which is not much lower than the maximum of 50. This is because early ´acceptance´ of H0 is not possible and also because of the fact that the probability to reject H0 at each stage is ´small´. If we now increase p1 to 0.20 for bleeding, and to 0.35 for DVT, we see that the ASN under HA decreases to 23.6 and 21.3 respectively. This is because the probability to reject H0 at each stage increases. 29 3. A Case Study 3.2.2 Fleming It is also possible to extend Fleming’s three-stage design to five stages and analyze the case proposed above. The hypothesis will remain the same as in Ginsberg et al (1994) H 0 : p p0 vs. HA : p p0 , but the designs are different. Whereas in Ginsberg only rejection of H0 is possible after an interim analysis, in Fleming’s design both ‘acceptance’ and rejection of H0 possible. This will be done by taking n1=n2=n3=n4=n5=10 and deriving the rejection points and the properties using the method from paragraph 2.1. If we now set p1=0.10 or 0.20 as unacceptable bleeding rates and p1=0.20 or 0.35 as unacceptable DVT rates, we get the following results: Table15. Bleeding P0 P1 P2 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.05 0.1 0.192 -3 -1 1 3 5 4 5 5 6 6 0.040 0.377 35.4 39.0 0.05 0.2 0.192 -3 -1 1 3 5 4 5 5 6 6 0.040 0.944 35.4 28.5 Results generated using Flemingfive.sas (see Appendix). Table16. DVT P0 P1 P2 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.15 0.20 0.344 -2 1 5 8 12 7 8 10 11 13 0.042 0.212 31.9 36.1 0.15 0.35 0.344 -2 1 5 8 12 7 8 10 11 13 0.042 0.932 31.9 29.7 Results generated using Flemingfive.sas (see Appendix). The following observations can be made from the results that are given above: For bleeding, the acceptance points for the first and for the second stage are negative, which means that it is not possible to ‘accept’ H0 after the first and second stage. For DVT that is only the case for the first stage. The overall alpha is well controlled for bleeding as well as for DVT. This is a property of Fleming’s design. The power for testing 0.05 vs. 0.10 for bleeding and for testing 0.15 vs. 0.20 is equal to 33.7% and 21.2% which in both cases is lower than the required 80%. In other words, the power is too low to detect small differences. The expected sample sizes under HA (39.0 for bleeding is 39.0 and 36.1 for DVT) are much lower than 50. This is a result of the extra possibilities to ‘accept’ H0 and hence to stop the trial early (after an interim analysis). Furthermore, from the value of p2 we observe that for bleeding ‘acceptance’ of H0 at some stage is equivalent to rejection of HA: p=0.192, while for DVT ‘acceptance’ of H0 is equivalent to rejection of HA p=0.344. Hence, ‘acceptance’ of H0 for bleeding does not imply that the bleeding rate is 5%, but only that it is less than 19.2%. Similarly, ‘acceptance’ of H0 for DVT only implies that the DVT rate is less than 34.4%. 30 3. A Case Study 3.2.3 Thall & Simon (Continuous) Another alternative design for the case study is to apply the Thall & Simon (1994) method. This means that we will use a fully sequential design and monitor the data continuously instead of every 10 patients. Therefore, we set the following: The mean of “the standard treatment” (Heparin) for bleeding and for DVT is u=0.05 u0=0.15 respectively. The width of 90% confidence interval for the distribution of “the standard treatment” for bleeding and for DVT will be equal to W90=0.05, which means that the prior of “the standard treatment” is highly informative. The concentration parameter for the experimental treatment for bleeding and DVT is equal to 2. The design properties for bleeding will be derived under Delta=0.05 and 0.15, which means that the bleeding probability of E (pE) is equal to 0.10 and 0.20. For DVT we set delta equal to 0.05 or 0.20, which corresponds with pE= 0.20 or 0.35. Nmin =10, Nmax =50, Pu =0.95, Pl =0.05. With these values, the Thall & Simon design has the following properties: Table17. Bleeding ps pe P25 P50 P75 PPLUS PMIN PINCONCL 0.05 0.10 10 14 46 0.385 0.404 0.211 0.05 0.20 10 12 15 0.759 0.241 0.000 Results generated using Thall.sas (Appendix). Table18. DVT ps pe P25 P50 P75 PPLUS PMIN PINCONCL 0.15 0.20 13 32 50 0.376 0.248 0.376 0.15 0.35 10 10 14 0.808 0.192 0.000 Results generated using Thall.sas (Appendix). The following observations can be made from the results that are given above: Bleeding: it can be seen from the table that if pE=0.10 then the probability to declare “the experimental treatment” harmful (pplus) is 0.385 and the probability to declare “the experimental treatment” not harmful (pmin) is 0.404. Furthermore, the probability that the trial will be inconclusive (PINCONCL) is equal to 0.211 and the distribution of N takes a large value (median=14) On the other hand, if pE=0.20 then pplus increases to 0.759 and pmin decreases to 0.241. Furthermore, PINCONCL is equal to zero. The distribution of N takes smaller values (median=12). DVT: it can be seen from the table that if pE=0.20 then the probability to declare “the experimental treatment” harmful (pplus) is 0.376 and the probability to declare it not harmful (pmin) is 0.248. Furthermore, the probability that the trial will be inconclusive (PINCONCL) is equal to 0.376 and the distribution of N takes larger values (median32). On the other hand, if pE=0.35 then pplus increases to 0.808 and pmin decreases to 0.192. Furthermore, PINCONCL is equal to zero and there is a large drop in the distribution of N (median=10). 31 3. A Case Study 3.2.4 Thall & Simon (Discrete) Another alternative design for the case study is to use the upper and lower cut-offs of Thall & Simon (1994), but only at n=10, 20, 30, 40 and 50, since we primarily have interest in monitoring the data at these ’time’ points. This also facilitates a comparison with the other designs with regard to the alpha, the power and the expected sample size. For bleeding these properties are: Table19. Bleeding P0 P1 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.05 0.1 0 0 0 1 2 3 4 5 6 7 0.033 0.279 24.5 29.2 0.05 0.2 0 1 2 4 5 3 4 5 6 6 0.036 0.824 16.9 19.9 Results generated using Flemingfive.sas (Appendix). It can be seen that in both cases (0.05 vs. 0.10 and 0.05 vs. 0.20) the overall alpha is lower than the usual maximum of 5% (3.3% and 3.6% respectively). The power for testing 0.05 vs. 0.10 is 27.9%, which is much smaller than the usual 80%, and the expected sample size (29.2) under HA is much lower than the maximum of 50. Furthermore, one can observe that the power increases (82.4%) and ASN1 under the alternative decreases when p1 increases from 0.10 to 0.20. Note that the boundaries A1-A5 and R1-R5 not only depend on the value of the bleeding probability under null hypothesis (p0=0.05), but also on the value of P under the alternative. For example: In the first design (p1=0.1), the null hypothesis will be ‘accepted’ after 40 patients if there is at most 1 patient with bleeding. One bleeding in 40 patients corresponds to 2.5%, which is in line with the value specified under null hypothesis (5%). In the second design (p1=0.2), the null hypothesis will be ‘accepted’ after 40 patients if there are at most 4 patients with bleeding. However, 4 bleedings in 40 patients corresponds to 10%, which is in fact more than the value specified under the null hypothesis (5%). On the other hand, an observed bleeding rate of 10% is even less consistent with the alternative (20%), which is probably why this Bayesian method allows for acceptance of the null hypothesis. Nevertheless, it seems counterintuitive to accept the null hypothesis based on a value that is not consistent with this hypothesis. For DVT the design properties are: Table20. DVT P0 P1 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.15 0.20 0 1 2 4 5 4 7 9 11 13 0.089 0.290 35.9 37.0 0.15 0.35 1 3 6 9 12 4 7 9 11 13 0.077 0.855 17.1 17.8 Results generated using Flemingfive.sas (Appendix). In both cases (p1=0.20 and 0.35) the overall alpha is greater than the usual maximum of 5% (8.9% and 7.7%). The power for testing 0.15 vs. 0.20 is 29%, which is much smaller than the usual 80%, and the expected sample size (37) is lower than the maximum of 50. If p1 increases from 0.15 to 0.35, then the power for testing 0.15 vs. 0.35 increases to 85.5% and ASN1 decreases to 17.8. 32 3. A Case Study Note here also that the boundaries A1-A5 and R1-R5 not only depend on the value of the DVT probability under null hypothesis (p0=0.15), but also on the value of P under the alternative. For example: In the first design (p1=0.20), the null hypothesis will be ‘accepted’ after 40 patients if there are at most 4 patients with a DVT. Four DVTs in 40 patients corresponds to 10%, which is in line with the value specified under null hypothesis (15%). In the second design (p1=0.35), the null hypothesis will be ‘accepted’ after 40 patients if there are at most 9 patients with a DVT. However, 9 DVTs in 40 patients corresponds to 22.5%, which is in fact more than the value specified under the null hypothesis (15%). 3.3 Reversing Hypotheses In the designs presented thus far we used the hypotheses implicitly given by Ginsberg et al. (1994). However, accepting H0 is not a correct way to prove that a drug is safe and effective. Therefore, it is better to interpret the criteria given in the case description as follows: A bleeding-free rate >=95% An overall DVT-free >=85% And the corresponding hypothesis will then be of the form H 0 : p p0 , where p0 is an ‘unacceptable’ bleeding or DVT rate HA : p p0 , is acceptable In this new setting, we will derive two Fleming (1982) designs for bleeding and two for DVT based on the following hypotheses: For the proportion of patients who are bleeding-free: o H 0 : p 0.90 vs. H 0 : p 0.90 and o H 0 : p 0.80 vs. H 0 : p 0.80 For the proportion of patients who are DVT-free: o H 0 : p 0.80 vs. H 0 : p 0.80 and o H 0 : p 0.65 vs. H 0 : p 0.65 All hypotheses will be tested at a one-sided alpha=0.05. For proportion bleeding-free we get the following designs: Table21. Bleeding free P0 P1 P2 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.9 0.95 0.991 9 19 29 39 48 13 22 31 40 49 0.015 0.129 15.1 11.7 0.8 0.95 0.946 7 16 26 35 45 14 22 30 38 46 0.019 0.881 22.4 26.9 Results generated using Flemingfive.sas (Appendix) By design, the overall alpha for testing (0.90 vs. 0.95 and 0.80 vs. 0.95) is smaller than the usual maximum of 5% (1.5% and 1.9%). One can also observe that there is insufficient power to detect a small difference (like 0.90 vs. 0.95); there is only enough power to detect a difference like 0.80 vs. 0.95. Furthermore, the expected sample size under H0 (ASN0) is 15.1 for testing 0.90 vs. 0.95 and ASN0=22.4 for testing 0.80 vs.0.95. Note that in the second design, the null hypothesis is accepted (the drug considered harmful) if there are less than or equal to 7/10 (70%), 16/20 (80%), 26/30 (86.7%), 35/40 (87.2%) and 45/50 (90%). 33 3. A Case Study These boundaries are related to the boundaries for the corresponding Fleming design with p0=0.05 and p1=0.20. For the proportion DVT-free the designs are: Table22. DVT free P0 P1 P2 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 0.80 0.85 0.946 7 16 26 35 45 14 22 30 38 46 0.019 0.111 22.4 23.5 0.65 0.85 0.845 4 13 21 30 38 13 20 26 33 39 0.031 0.908 25.7 31.6 Results generated using Flemingfive.sas (Appendix) Here, the overall alpha for testing (0.80 vs. 0.85 and 0.65 vs. 0.85) is also smaller than the usual maximum of 5%, namely (1.9% and 3.1%). One can also observe that there is insufficient power to detect a small difference, but enough power to detect a larger difference (0.65 vs. 0.85). Furthermore, there is a small increase in the ASN0 (22.4 to 25.7). Note that in the second design, the null hypothesis is accepted (the drug considered harmful) if there are less than or equal to 4/10 (40%), 13/20 (65%), 21/30 (70%), 30/40 (75%) and 38/50 (76%) DVTs. These boundaries are related to the boundaries for the corresponding Fleming design with p0=0.15 and p1=0.35. 3.4 Summary and recommendations In this section we will summarize and compare the properties of the possible designs to monitor the bleeding and thrombosis rate of a certain dose of Hirulog. 3.4.1 Summary The designs suggested in the last section to assess the bleeding rate are summarized in the following table: Table23. Bleeding M P0 P1 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 G 0.05 0.10 -1 -1 -1 -1 5 3 4 5 5 6 0.067 0.456 48.8 41.7 G 0.05 0.20 -1 -1 -1 -1 5 3 4 5 5 6 0.067 0.963 48.8 23.6 F 0.05 0.10 -3 -1 1 3 5 4 5 5 6 6 0.040 0.377 35.4 39.0 F 0.05 0.20 -3 -1 1 3 5 4 5 5 6 6 0.040 0.944 35.4 28.5 T 0.05 0.10 0 0 0 1 2 3 4 5 6 7 0.033 0.279 24.5 29.2 T 0.05 0.20 0 1 2 4 5 3 4 5 6 6 0.036 0.824 16.9 19.9 R 0.90 0.95 9 19 29 39 48 13 22 31 40 49 0.015 0.129 15.1 21.7 R 0.80 0.95 7 16 26 35 45 14 22 30 38 46 0.019 0.881 22.4 39.3 (G=Ginsberg, F= Fleming, T= Thall & Simon, R= Fleming with the reverse hypothesis) Of the designs suggested to test 0.05 vs. 0.10, Ginsberg’s design seems to have the highest power. Fleming and Thall & Simon’s designs are both able to control the overall alpha. The expected sample size is the lowest if one uses Thall & Simon. If we increase p1 to 0.20 then the same results can be observed. Ginsberg gives the highest power, Fleming and Thall & Simon control the overall alpha and Thall & Simon has the smallest expected sample size. 34 3. A Case Study The designs presented to asses the DVT rate are summarized in the following table: Table ii. DVT P0 P1 A1 A2 A3 A4 A5 R1 R2 R3 R4 R5 XALPHA XPOWER ASN0 ASN1 G 0.15 0.20 -1 -1 -1 -1 12 4 7 9 11 13 0.090 0.296 47.4 42.6 G 0.15 0.35 -1 -1 -1 -1 12 4 7 9 11 13 0.090 0.953 47.4 21.3 F 0.15 0.20 -2 1 5 8 12 7 8 10 11 13 0.042 0.212 31.9 36.1 F 0.15 0.35 -2 1 5 8 12 7 8 10 11 13 0.042 0.932 31.9 29.7 T 0.15 0.20 0 1 2 4 5 4 7 9 11 13 0.089 0.290 35.9 37.0 T 0.15 0.35 1 3 6 9 12 4 7 9 11 13 0.077 0.855 17.1 17.8 R 0.80 0.85 7 16 26 35 45 14 22 30 38 46 0.019 0.111 22.4 29.2 R 0.65 0.85 4 13 21 30 38 13 20 26 33 39 0.031 0.908 25.7 35.3 (G=Ginsberg, F= Fleming, T= Thall & Simon, R= reverse of Fleming) For all proposed designs, the power for testing 0.15 vs. 0.20 was the highest using Ginsberg. On the other hand, Fleming’s design is the only model that controls the overall alpha while Thall & Simon minimizes the expected sample size. If we increase p1 to 0.35, then all the designs give a high power (over 80%). However, Ginsberg’s design gives the highest power. Fleming’s design is the only design that controlls the overall alpha and Thall & Simon has the smallest expected sample size. 3.4.2 Recommendations Based on the summary, the following conclusions and recommendations can be made: Firstly, with a number of 50 subjects, the power is too low to detect small differences like (0.05 vs. 0.10 or 0.90 vs. 0.95). If we use the formula of the sample size given in chapter 1 to compute the power for N=50, p0=0.05 and p1=0.10 then power=38%. Therefore, the number of subjects may be increased in order to be able to detect smaller differences. The sample size for testing (0.05 vs. 0.10) with an alpha equal to 0.05 and a power equal to 80% is N=172. Secondly, the design of Ginsberg is unclear about ‘accepting’ H0; if the null hypothesis can not be rejected at the end of the trial, then accepting H0 is not a correct way of proving that a drug is safe and effective. As opposed to Ginsberg, Fleming’s design is clearer. There, accepting (H0: p=p0) at some stage is equivalent with rejecting HA (p= ~ p A ) where ~ p A is the same value described in section 2.2.2. The most natural and correct way to prove that a drug is safe and effective, is to reverse the hypothesis, as in section 3.3 ( instead of bleeding we now have bleeding free) so that rejection of H0 leads to accept the drug. The only design where both hypotheses (‘drug acceptable’ as well as ‘drug uncacceptable) are treated as equally important is Fleming’s design. 35 4. Two-Armed Trials 4. Two-Armed Trials In two-armed clinical trials, we compare two treatments for a disease. Again, we restrict ourselves to binary outcomes. One treatment will be denoted as the control therapy C (possibly a placebo) and the other one will be the experimental therapy E. The aim of the two- armed trial is to investigate whether there is a difference between C and E. In this setting we have the following hypotheses: H0: pC pE this means that there is no difference between the two treatments. HA: pE pC this means that E better is than C. In a one-stage design a pre-specified number of patients are enrolled and the hypothesis is tested only once, namely at the end of the trial. 4.1. One-Stage design The one stage procedure is based on the following statistics: pˆ E X E / N E , where NE is the sample size and XE is the number of responses in the experimental treatment E. pˆ C X C / NC , where NC is the sample size and XC is the number of responses in the control group. The overall response is then pˆ ( X E X c ) /( N E N C ) Additionally, we assume that for sufficiently large NE and NC, pˆ E pˆ C follows a normal distribution with parameters approximately p 1 pE pC 1 pC pE pC , E . NE NC Now define the Z-statistic as pˆ E pˆ C se pˆ E 1 pˆ E pˆ C 1 pˆ C Here se is the standard error. The single-stage test will reject H0 NE NC Z at the end of the trial when Z>Zα; or equivalently when p E p C > Zαse. It is straightforward to show that the sample size for each group, with significance level α and power 1-β, is approximately N 2 2Z Z pˆ 1 pˆ p E pC With pˆ pˆ E pˆ C 2 36 4. Two-Armed Trials 4.2. Two-stage Design As in the one-stage design, the hypotheses to be tested are: H0: pC pE there is no difference between the two treatments (control and experimental). HA: pE pC the experimental treatment is better than the control. In a two-stage design, an interim analysis is performed, at which, in principle, either H0 can be rejected or “accepted” or the trial can be continued. Here we only consider early acceptance of H0. (This is similar to Simon’s two-stage design for a one-armed trial). In order to stop early when the trial has no chance to reject H0 at the end of the trial, a futility stopping rule will be introduced. A helpful tool in constructing such a rule is the conditional power, that is: the conditional probability to reject the null hypothesis given 1) the data so far, 2) an alternative hypothesis about the expected treatment group difference being true. This alternative hypothesis can be the expected difference at the start of the trial, the difference observed at the interim analysis (‘the current trend’), or a combination of two. In this section, we will give a method for computing the conditional power based on the socalled B-value. Then we derive a futility stopping rule. Throughout, this chapter we use the following notation: N1 the first stage group size for each treatment. N2 the second stage group size for each treatment. N the total group size for each treatment. XE1 and XE2 the number of responses in the first and second stage in the experimental group. Analogously, XC1 and XC2 are the number of responses in the control group in the first and second stages respectively. After the first stage when N1 patients are treated in each group we will: ‘Accept’ H0 if XC1=k and XE1< f(k) for some k in the range (0, N1) Continue if XC1=k and XE1>= f(k) for some k in the range (0, N1) Here f is a one to one map X C1 X E1 . So, for each XC1=k there is a value f (k) such that H0 will be accepted if XE1<f (k) and the trial will be continued if XE1>=f (k). In the final analysis, when N patients are treated in the control group and also N in the experimental group, we will: ‘Accept’ H0 if XC=s and XE< g(s) for each s in the range (0, N) Reject H0 if XC=s and XE>= g(s) for each s in the range (0, N) Here g is a one to one map X C X E . So, for each XC=s than there is a value g (s) such that H0 will be rejected if XE>= g(s) and “accepted” if XE< g(s). Hence, in this design, the probability of early termination (PET) after the first stage is: N1 PET p P X C1 k P X E1 f k 1 . k 0 Furthermore, the expected total sample size or the average sample number (ASN) is: 37 4. Two-Armed Trials ASN p N1 N1 1 PET N 2 N 2 . The overall rejection probability is: Pr rejectH 0 p N1 N 2 N1 p X k 0 l 0 m f ( k ) C1 k p X C 2 l P X E1 m p X E 2 g s m . The average probability to make a correct decision is: Pcorrect PET 0 1 PET1 / 2 where PET0 is the probability of the early termination under the null hypothesis and PET1 is that under the alternative hypothesis. Note that (unweighted) averaging is performed only over H0 and HA, other possibilities are not considered). 4.2.1. Decomposition of the Z-test In this two-stage design, the conditional power will be calculated using the Z-statistic defined in section 4.1. In a two-stage design, the conditional power, the probability to reject H0 given an interim result, can be written as CP = Pr (ZFIN>Zα | Z1, HA), ZFIN is the Z-statistic at the final analysis and Z1 is the Z-statistic at the interim analysis. In this section we will prove that for a normal or binomial distributed data, the ZFIN can be decomposed as Z FIN Z1 N1 N2 Z2 N N Let X x , 2 and Y y , 2 represent the response variables in the experimental and the control treatment respectively, and let N=NE= NC be the sample size in each group. Then, for a known the Z-statistic can be written as Z FIN XE XC 2 / N 2 S E SC 2 N 2 , where X E and X C is the mean response in the experimental and the control group respectively, S E and S C are the corresponding sums. Now consider N1<N the number of patients in the first interim analysis in both treatments and N2 the number of patients in the residual. Then a statistic quite similar to the Z-statistic can be computed. Specifically, the interim Z-value is Z1 X 1E X 1C 2 2 / N 1 S 1E S 1C 2 N 1 2 , where X 1E and X 1C is the mean response in the experimental and the control group respectively, and S1E and S1C the corresponding number of sums. Now the Z-statistic at the end of the test can be expressed as 38 4. Two-Armed Trials Z FIN S E SC 2 N 2 S 1E S 1C 2 N 1 2 S 1E S 2 E S 1C S 2C 2 N 2 N1 S 2 E S 2C N 2 N 2 N1 N2 N1 Z1 Z2 N N N Note that for an unknown , the variance has to be estimated from the data. The estimates may differ between stages 1, 2 and both stages combined, but for a large N will be approximately equal to 2 . In the binomial case, Z FIN X E1 X E 2 X C1 X C 2 , is approximately normally distributed 2 Npˆ 1 pˆ and the reasoning given above can be followed. 4.2.2. The B-value The decomposition of the Z-value can also be written as Z 1 Z 1 t Z 2 1 t Where t=N1/N represents the proportion of the total planned information at the time of data monitoring. Now define the B-value at information time t as Bt Z 1 t . Then we have B1 Bt B1 Bt . The B-value has the following properties under HA Bt and B1 Bt are normal and independent; II. EBt t and EB1 Bt 1 t ; III. var Bt t and var B1 Bt 1 t ; I. Here = E [Z (1)] = E [B (1)]. If the treatment effect anticipated at the start of the trial is true and the planned power is 1-β, then =Zα+Zβ. Alternatively, can also be estimated using the interim data ( is then equal to B (t)/t). 4.2.3. The conditional power As mentioned before, the conditional power is the probability to reject H0 at the end of the trial given an interim result. This probability can be expressed using the B-value, presented in the last section, as follows: CP PrZ 1 Z Z 1 z, HA PrB1 Z Bt b, HA bz t. (1) Using property (I) given in the section 4.2.2, equation (1) can be written as CP PrB1 Bt Z b, HA Now, using the fact that B (1)-B (t) has a normal distribution with expected value 1 t and variance equal to 1 t , the conditional power can be calculated as 39 4. Two-Armed Trials 1.96 b (1 t ) CP 1 1 t Note that for various values can be chosen: The expected difference at the start of the trial or The observed difference after the interim results. Any combination of the two. 4.2.4. Futility stopping rule Conditional power is a useful instrument for the consideration of early termination for futility. The study will be terminated early if the conditional power is “too low”. In this paragraph we will introduce a futility stopping rule, i.e. a threshold CP0 for the conditional power below which the study will be stopped at the interim analysis. Suppose now that we want to investigate whether a treatment E is better that the control treatment C. Then, for a given alpha, beta and sample size N, H0 will be rejected when ZFIN> Zα. The corresponding number of responses XC and XE that leads to rejection can be derived as follows: For each XC and XE in the range (0, N), derive the ZFIN given in section 4.1. Then, for each XC, find the min XE for which ZFIN> Zα. The result is then a one to one map g : X C X E . So, for each XC=s there is a value g (s) such that ZFIN> Zα and H0 will be rejected. Now consider an interim analysis N1<N, and suppose we want to investigate whether H0 can be accepted, then the following steps should be followed to select an early termination plan that is based on the conditional power: For each value XC1 and each XE1 value in the range (0, N1), derive the conditional power as described above. For a given threshold CP0, find for each XC1 a minimum XE1 such that the conditional power CP>CP0. The result is then a one to one map f : X C1 X E1 . So, for each XC1=k there is a value f (k) such that H0 will be accepted if XE1<f (k) and the trial will be continued if XE1>=f (k). Now, using these boundaries, the properties of this stopping plan, including the probabilities of stopping under H0 and HA and the type I and II error, can be derived as given in 4.2. In our search for the optimal threshold CP0, these properties can be used. The properties of an optimal threshold would be: Provide a high probability to take a correct decision, i.e., a high stopping probability (PET0) under H0 and a low stopping probability (PET1) under HA. Restrict power loss Therefore, we let CP0 range in the interval (0, 1), and for each CP0 we derive the properties of the design given above. Then we compare the properties obtained in each design and we will give a range of CP0 that provides optimal properties. We shall illustrate this in the following example. 40 4. Two-Armed Trials Example Suppose an experimental treatment is compared to a control, with respect to a binary outcome ‘response’, and the anticipated response rates are 40% and 20% respectively. The group size needed for 80% power to detect this difference is 82 subjects (one-sided α=0.025). Suppose, further, that an interim analysis is performed halfway through the trial (N1), and that CP0 varies from 0 to 1, then for each conditional power threshold CP0 we can derive alpha, power, ASN, PET and Pcorrect. These design properties are plotted as a fucntion of CP0 in figures 1 (PET), 2 (Pcorrect), and 3 (power) respectively. Figure1. Probabilities of early stopping PET0 (under H0, solid line) and PET1 (under HA, dotted line) for a range of thresholds CP0. It can be seen that in both cases - under the null hypothesis as well as under the alternative hypothesis - the probability of early stopping increases when the threshold for the conditional power increases. For example, if CP0=0.05 then the probability to stop early under H0 is 65%, suggesting that a low threshold already provides a fairly high probability of stopping when there is no difference between treatments. On the other hand, if CP0=0.90 then the probability to stop early under HA is 50%, which means that a high threshold will lead to stopping trials for futility, even when difference between treatments exists. 41 4. Two-Armed Trials Figure2. Probabilities of making a correct decision Pcorrect0 (dotted line, increasing), Pcorrect1 (dotted line, decreasing) and their average Pcorrect (solid line) for a range of thresholds CP0. Figure 2 shows that the probability of making a correct decision when H0 is true (Pcorrect0, which is equal to PET0) increases when CP0 increases. Conversely, we see that the probability to make a correct decision if HA is true (Pcorrect1, which is equal to 1-PET1) decreases when CP0 increases. Furthermore, it can be seen that the average Pcorrect of Pcorrect1 and Pcorrect0 is approximately constant at a value 0.8 for each CP0 in the range (0, 0.5). Figure3. Type I error (solid line) and the power (the dotted line) plotted as a function of the conditional power threshold CP0. Figure 3 shows that the power decreases as the threshold (CP0) increases. For small CP0 the power is close to the required 80%, but for large CP0 the power decreases to 40%. This 42 4. Two-Armed Trials is a consequence of the fact that the probability of early stopping under HA, which is a part of the type II error, increases. Further we can observe that the alpha decreases when the stopping boundary increases and stays under the nominal size of 2.5%. This confirms that a futility stopping rule does not introduce Type I error inflation. Choosing an optimal threshold As stated abovean optimal threshold should provide a high probability to take a correct decision and restrict power loss. From Figure 2 it appeared that the probability to make a correct decision (averaged over H0 and HA) is fairly constant on the CP0 interval. This does not provide much information to determine a threshold. From Figure 3 it appeared that the power loss is restricted to 5% as long as CP0 is in the interval (0, 0.20). In this interval, Pcorrect takes a maximum value (84%), the type II error lies in the interval (20%, 25%) and the power lies between 75% and 80%. Hence the optimal CP0 43 Summary References 1. Brown LD et al. Interval Estimation for a Binomial Proportion. Statistical Science 2001; vol. 16, No. 2, 101-133. 2. Fleming TR. One-sample multiple testing procedure for phase II clinical trials. Biometrics 1982; 38: 143-151. 3. Ginsberg J. et al. Use of Hirulog in the Prevention of Venous Thrombosis after Major Hip or Knee Surgery. Journal? (1994) 4. Herson J. Predictive Probability Early termination Plans For Phase II Clinical trials. Biometrics 1979; 35: 775-783. 5. Lachin J.M. A review of methods for futility stopping based on the conditional power. Statistics In Medicine 2005; 24:2747-2764. 6. Lan KKG, Wittes J. The B-value: A tool for monitoring data. Biometrics. 1988; 44: 579-585. 7. Simon R. Optimal two-stage designs for phase II clinical trials. Controlled Clin Trials 1989; 10: 110. 8. Thall PF and Simon R. Practical Bayesian guidelines for phase IIB clinical trials. Biometrics 1994; 50: 337349. 9. Van Houwelingen JC. et al. Introduction to Medical Statistics. 10. Whitehead J, Zhou Y, Stevens J. and Blakey G. An evaluation of a Bayesian method of dose-escalation based on bivariate responses. Journal of Biopharmaceutical Statistics 2004; 14, 969-983. 44 Appendix Appendix. SAS Programs Onestage.sas data bin; alpha=0.05; za=probit(1-alpha); power=0.80; zb=probit(power); p0=0.10; * null; p1=0.20; * alternative; pbar=(p0+p1)/2; nnorm=round((za+zb)**2*pbar*(1-pbar)/(p1-p0)**2); do n=int(3/4*nnorm) to ceil(6/4*nnorm); do k=0 to n; pwaarde=1-probbnml(p0,n,k); beta=probbnml(p1,n,k); power=1-beta; output; end; end; run; proc univariate data=bin noprint; by p1 p0 nnorm n; var k power; output out=binout min=mink minpower max=maxk maxpower where (pwaarde<alpha and power>0.78); run; data binout; set binout; xalpha=1-probbnml(p0,n,mink); run; ods rtf file='binout.rtf'; proc print noobs; format MAXPOWER MINPOWER XALPHA f5.3; run; 45 ; Appendix ods rtf close; 46 Appendix Fleming.sas /******************************************************************************* Organon/CTO/Biometrics ******************************************************************************** Program : Fleming.sas Author : Mustapha Setta Protocol : Not applicable Compound : Not applicable Date : 17 JUN 05 Purpose : Apply Fleming's (1982) method for 2-stage and 3-stage designs Reference : Fleming TR. One-Sample Multiple Testing Procedure for Phase II Clinical trials. Biometrics; 38:143-151. Remarks : k=number of responses in 1st stage -> Q1=prob to have k responses l=number of responses in 2nd stage -> Q2=prob to have l responses This program calculates a two- or three-stage design. After each stage, H0 can be rejected or 'accepted'. In this design the hypotheses are: H0: p<=p0 , p0 is unpromising level. HA: p>p1, p1 is promising level. Input p0 (M) P1 (M) Alpha (M) n1 (M) n2 (M) : (O=optional, M=mandatory) value of the unpromising response probability. value of the promising response probability. the Type I error (one-sided). the number of patients treated in the first interim analysis. the number of patients treated in the second interim analysis. 47 Appendix n3 (M) the number of patients treated after the second interim analysis. Output : A1 ASN0 ASN1 the acceptance point after the first interim analysis, i.e. accept H0 whenever <=A1 responses are observed in n1 patients. the acceptance point after the second interim analysis, i.e. accept H0 whenever <=A2 responses are observed in n1+ n2 patients. the acceptance point in the analysis, i.e. accept H0 whenever <=A3 responses are observed in n1+ n2+n3 patients. the rejejection point after the first interim analysis, i.e. reject H0 in favor of HA whenever <=R1 responses are observed in n1 patients. the rejection point after the second interim analysis, i.e. reject H0 in favor of HA whenever <=R2 responses are observed in n1+ n2 patients. the rejection point in the final analysis, i.e. reject H0 in favor of HA whenever <=R3 responses are observed in n1+ n2+n3 patients. the expected sample size under H0 (p=p0, unpromising). the expected sample size under HA (p=p1, promising). ALPHA1 POWER the exact type I error of the two- or three-stage design. the exact power of the two-or the three-stage design. A2 A3 R1 R2 R3 - Data sets : - Files : the usual log- and lst-files - Other : SAS version : 8.2 for winNT 4.0 ******************************************************************************** (This part must be filled in for every modification that is made) Date Modif. : Done by : Description : *******************************************************************************/ /******************************************************************************* FORMAT DEFINITIONS 48 Appendix *******************************************************************************/ /******************************************************************************* MACRO DEFINITIONS *******************************************************************************/ %macro preject2 (p,preject); * probability to reject H0 after 2nd stage; &preject=0; do k=max(a1+1,0) to min(r1-1,n1); if k>0 then Q1=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1)); else Q1=probbnml(&p,n1,k); if 0<=r2-(k+1)<=n2 then Q2=(1-probbnml(&p,n2,r2-(k+1))); else if r2-(k+1)<0 then Q2=1; else Q2=0; *r2-(k+1)>n2:rejection not possible; &preject=&preject+Q1*Q2; end; drop k Q1 Q2; %mend; %macro paccept2 (p,paccept); * probability to 'accept' H0 after 2nd stage; &paccept=0; do k=max(a1+1, 0) to min(r1-1,n1); if k>0 then Q1=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1)); else Q1=probbnml(&p,n1,k); if 0<=(a2-k)<=n2 then Q2=probbnml(&p,n2,a2-k); else if (a2-K)>n2 then Q2=1; else Q2=0; *(a2-k)<0:acceptance not possible; &paccept=&paccept+Q1*Q2; end; drop k Q1 Q2; %mend; %macro preject3 (p, preject); * probability to reject H0 after 3rd stage; 49 Appendix &preject=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); put k l; if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); if 0<=r3-(k+l+1)<=n3 then Q3=1-probbnml(&p, n3, r3-(k+l+1)); else if r3-(k+l+1)<0 then Q3=1; else Q3=0;*r3-(k+l+1)>n3:rejection not possible; &preject=&preject+Q1*Q2*Q3; end; end; drop k l Q1 Q2 Q3; %mend; %macro paccept3 (p, paccept); * probability to 'accept' H0 after 3rd stage; &paccept=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); if 0<=a3-(k+l)<=n3 then Q3=probbnml(&p, n3, a3-(k+l)); else if a3-(k+l)>n3 then Q3=1; else Q3=0;*a3-(k+l)<0:acceptance not possible; &paccept=&paccept+Q1*Q2*Q3; end; end; drop k l Q1 Q2 Q3; %mend; %macro fleming (n1 , n2, n3, p0, p1, alpha); data threestage; 50 Appendix n1=&n1; n2=&n2; n3=&n3; n=&n1+&n2+&n3; p0=&p0; p1=&p1; za=probit (1-&alpha); * one-sided; p2=(sqrt(n*p0)+sqrt(1-p0)*za)**2/(n+za**2); r1=round(n1*p0+za*sqrt(n*p0*(1-p0)))+1; a1=round(n1*p2-za*sqrt(n*p2*(1-p2))); r2=round((n2+n1)*p0+za*sqrt(n*p0*(1-p0)))+1; if n3=0 then a2=round(n*p2-za*sqrt(n*p2*(1-p2))); else do; a2=round((n2+n1)*p2-za*sqrt(n*p2*(1-p2))); r3=round(n*p0+za*sqrt(n*p0*(1-p0)))+1; a3=r3-1; end; if r1<=n1 then prej10=1-probbnml(p0, n1,r1-1); else prej10=0; if r1<=n1 then prej11=1-probbnml(p1, n1, r1-1); else prej11=0; if a1>=0 then pacc10=probbnml(p0,n1,a1); else pacc10=0; if a1>=0 then pacc11=probbnml(p0,n1,a1); else pacc11=0; %preject2 (p0, prej20); %preject2 (p1, prej21); %paccept2 (p0, pacc20); %paccept2 (p1, pacc21); %preject3 (p0, prej30); %preject3 (p1, prej31); %paccept3 (p0, pacc30); %paccept3 (p1, pacc31); asn0=n1+(1-prej10-pacc10)*n2+(1-prej10-pacc10-prej20-pacc20)*n3; asn1=n1+(1-prej11-pacc11)*n2+(1-prej11-pacc11-prej21-pacc21)*n3; alpha1=prej10+prej20+prej30; power=prej11+prej21+prej31; ptot=prej10+pacc10+prej20+pacc20+prej30+pacc30; run; proc append base=fleming data=threestage; run; %mend; 51 Appendix /******************************************************************************* MAIN PROGRAM *******************************************************************************/ * Six cases (two-stage) from Fleming (1982) cited by Simon (1989); %fleming %fleming %fleming %fleming %fleming %fleming (n1=20, (n1=15, (n1=20, (n1=25, (n1=25, (n1=25, n2=20, n2=10, n2=15, n2=25, n2=20, n2=25, n3=0, n3=0, n3=0, n3=0, n3=0, n3=0, p0=0.05, p0=0.10, p0=0.20, p0=0.20, p0=0.30, p0=0.30, p1=0.20, p1=0.30, p1=0.40, p1=0.40, p1=0.50, p1=0.50, alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); *Six cases (three-stage) from Fleming (1982); %fleming (n1=10, n2=10, n3=10, p0=0.05, p1=0.20, %fleming (n1=10, n2=10, n3=5, p0=0.10, p1=0.30, %fleming (n1=15, n2=10, n3=10, p0=0.20, p1=0.40, %fleming (n1=20, n2=15, n3=15, p0=0.20, p1=0.40, %fleming (n1=15, n2=15, n3=15, p0=0.30, p1=0.50, %fleming (n1=20, n2=15, n3=15, p0=0.30, p1=0.50, %fleming (n1=20, n2=15, n3=15, p0=0.90, p1=0.95, ods rtf file='Fleming3.rtf'; proc print data=fleming; var n p0 p1 p2 n1 n2 n3 a1 a2 a3 r1 r2 r3 asn0 format asn0 asn1 f5.1 alpha1 power f5.3; run; ods alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); alpha=0.05); asn1 alpha1 power; rtf 52 close; Appendix Simon.sas /******************************************************************************* Organon/CTO/Biometrics ******************************************************************************** Program : Simon.sas Author : Mustapha Setta Protocol : Not applicable Compound : Not applicable Date : 17 JUN 05 Purpose : Apply Simon's (1989) method for 2-stage design Reference : Simon R. Optimal two-stage designs for phase II clinical Trials. Contolled Clinical Trials 1989; 10: 110 Remarks : This program gives Simon's optimal two-stage design based on the exact binomial probabilities. After the first stage H0 can be 'accepted', early rejection is not possible. In this design the hypotheses are: H0: p=p0 , p0 is unpromising level. HA: p>=p1, p1 is promising level. Input : (O=optional, M=mandatory) p0 (M) P1 (M) Alpha (M) Beta (M) Output N value of value of the Type the Type an unpromising response probability. a promising response probability. I error (one-sided). II error. : the smallest sample size that satisfies the error constrains (type I, II) 53 Appendix n1 R R1 PET0 EXPN0 EXPN1 Alpha1 Power1 and minimizes the expected sample size under H0. the number of patients treated in the first interim analysis the acceptance point after n patients are treated (accept H0 if <=R responses are observed). the acceptance point after n1 patients are treated (accept H0 if <=R1 responses are observed). the probability of early termination after the first interim analysis. the expected sample size under H0 (p=p0, unpromising). the expected sample size under HA (p=p1, promising). the exact type I error of the optimal two-stage design. the exact power of the optimal two-stage design. - Data sets : - Files : the usual log- and lst-files - Other : SAS version : 8.2 for winNT 4.0 ******************************************************************************** (This part must be filled in for every modification that is made) Date Modif. : Done by : Description : *******************************************************************************/ /******************************************************************************* FORMAT DEFINITIONS *******************************************************************************/ /******************************************************************************* MACRO DEFINITIONS *******************************************************************************/ %macro ssize(p0, p1, alpha, beta); p0=&p0; p1=&p1; alpha=&alpha; beta=&beta; p_=(p0+p1)/2; za=probit(1-&alpha); 54 Appendix zb=probit(1-&beta); n0=((za+zb)**2*p_*(1-p_))/(p1-p0)**2; l=int(n0); k=ceil(5/4*n0); drop n0 za zb p_; %mend; /******************************************************************************* MAIN PROGRAM *******************************************************************************/ data simon; %ssize(p0=0.05, p1=0.25, alpha=0.10, beta=0.10); do n=l to k by 1; do n1=int(1/4*n) to int(2/3*n); do r=0 to n; do r1=0 to min(n1,r); pet0=probbnml(p0,n1,r1); pet1=probbnml(p1,n1,r1); n2=n-n1; expN0=n1+(1-pet0)*n2; expN1=n1+(1-pet1)*n2; spx0=pet0; do x0=r1+1 to min(n1,r); if (0<=(r-x0)<=n2) then px0=(probbnml(p0,n1,x0)-probbnml(p0,n1,x0-1))*probbnml(p0,n2,r-x0); else px0=0; spx0=spx0+px0; end; alpha1=1-spx0; spx1=pet1; do x1=r1+1 to min(n1,r); if (0<=(r-x1)<=n2) then px1=(probbnml(p1,n1,x1)-probbnml(p1,n1,x1-1))*probbnml(p1,n2,r-x1); else px1=0; spx1=spx1+px1; end; power1=1-spx1; if (alpha1<alpha and power1>(1-beta)) then output; end; end; end; end; run; 55 Appendix proc sort; by expn0; run; ods rtf file='Simon.rtf'; proc print noobs; var N N1 R R1 PET0 EXPN0 EXPN1 alpha1 POWER1; format pet0 alpha1 power1 f5.3 expn0 expn1 f4.1; run; proc sort; by n expn0; run; proc print noobs; var N N1 R R1 PET0 EXPN0 EXPN1 alpha1 POWER1; format pet0 alpha1 power1 f5.3 expn0 expn1 f4.1; run; ods rtf close; 56 Appendix Herson2.sas /******************************************************************************* Organon/CTO/Biometrics ******************************************************************************** Program : Herson2.sas Author : Mustapha Setta Protocol : Not applicable Compound : Not applicable Date : 17 JUN 05 Purpose : Apply Herson's (1979) method for 2-stage design Reference : Herson J. Predictive Probability Early Termination Plans For Phase II clinical Trials. Biometrics 1979; 35: 775-783. Remarks : This program gives a two-stage design including an early termination plan based on Bayesian predictive probability (PP). The study will be terminated after the first stage if the PP to reject H0 is "high". In this design the hypotheses are: H0: p>=p0 , p0 is a promising level. HA: p<p1 , p1 is unpromising level. Input : (O=optional, M=mandatory) p0 (M) p1 (M) Alpha (M) Power (M) N (O) n1 (O) Mu (O) Value of a promising response probability. Value of an unpromising response probability. The Type I error (one-sided). The power of the test. Sample size of the treatment group. If n=. then the program will calculate sample size based on alpha and power. the number of patients treated in the first interim analysis. If n1=. then the program will take n1= (n/2). The mean of the beta (prior) distribution of response probability p. 57 Appendix CV (O) PP0 (M) if Mu=. then the program assumes the Uniform distribution. The coefficient of variation, i.e. an expression of the degree of confidence in Mu. if CV=. then we the Uniform distribution. A certain threshold that is used for interim analyses. Note: if a parameter is not given a value, then explicitly specify as missing '.'. Output : C the rejection point at the final analysis, H0 will be then rejected when <= C responses are observed in n patients. the corresponding type I error of the one-stage designs. the power corresponding with the one-stage design. the rejection point at the first interim analysis, H0 will be rejected when <=C1 responses are observed in n1 patients. the predictive probability to reject H0, given R1 responses in n1 patients. the type I error that corresponds with the two-stage design. the power of the two-stage design. the expected sample size under H0 (p=p0, promising). the expected sample size under HA (p=p1, unpromising). ALPHA1 POWER1 C1 PP ALPHA2 POWER2 ASN0 ASN1 - Data sets : - Files :the usual log- and lst-files - Other : SAS version : 8.2 for winNT 4.0 ******************************************************************************** (This part must be filled in for every modification that is made) Date Modif. : Done by : Description : *******************************************************************************/ %*include sasopt; 58 Appendix /******************************************************************************* FORMAT DEFINITIONS *******************************************************************************/ /******************************************************************************* MACRO DEFINITIONS *******************************************************************************/ %macro preject2 (p,preject); * probability to reject H0 after 2nd stage; &preject=0; do k=c1+1 to c by 1; if k>0 then px=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1))*probbnml(&p,n-n1,c-k); else px=probbnml(&p,n1,k)*probbnml(&p,n-n1,c-k); &preject=&preject+px; end; drop k px; %mend; %macro paccept2 (p,paccept); * probability to accept H0 after 2nd stage; &paccept=0; do k=c1+1 to c by 1; if k>0 then Q1=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1)); else Q1=probbnml(&p,n1,k); if c>=k+1 then T1=(1-probbnml(&p,n-n1,c-(k+1))); else T1=1; &paccept=&paccept+Q1*T1; end; drop k Q1 T1; %mend; %macro herson (p0, p1, alpha, power, n, n1, mu, cv, pp0); * Calculate n (if needed) and C; data ssize; p0=&p0; p1=&p1; alpha=&alpha; power=&power; pp0=&pp0; %if &n ne . %then n=&n; %else %do; 59 Appendix za=probit(1-alpha); zb=probit(power); pbar=(p0+p1)/2; n=(za+zb)**2*pbar*(1-pbar)/(p1-p0)**2; %end;; n=ceil(n); do k=0 to n; xalpha=probbnml(p0,n,k); output; end; run; proc univariate data=ssize noprint; by p0 p1 alpha power pp0 n; var k; output out=crit max=c; where (xalpha<alpha); run; * Calculate a and b for Hersons Beta prior with pdf(p)~p^a*p^(b-a); * If no mu or CV is given a uniform prior is assumed; data beta; set crit; %if &mu ne . and &cv ne . %then %do; v=&cv/100; b=(1-&mu*(1+3*v**2))/(v**2*&mu); a=(b+2)*&mu-1; drop v; %end; %else %do; a=0; b=0; %end;; run; * Calculate Predictive Probability; data pp; set beta; %if &n1 ne . %then n1=&n1; %else n1=int(n/2);; 60 Appendix r1=-1; pp=1.0; output; do r1=0 to c by 1; pp=0; do r=r1 to c by 1; lpr1=log((n1+b+1)/(n+b+1)) +(lgamma(n1+b+1)+lgamma(n-n1+1)+lgamma(r+a+1)+lgamma(n+b-r-a+1)) -(lgamma(n+b+1)+lgamma(r-r1+1)+lgamma(r1+a+1)+lgamma(n1+b-r1-a+1)+lgamma(n-n1-r+r1+1)); pp=pp+exp(lpr1); end; output; end; run; proc univariate data=pp noprint; by p0 p1 alpha power pp0 n c n1; var r1; output out=uniout max=c1; where (pp>pp0); run; data crit1 (drop=r1); merge pp (drop=r lpr1) uniout; by p0 p1 alpha power pp0 n c n1; if r1=c1; run; * Calculate ASN, alpha and power (also of one-stage design); data alpower; set crit1; alpha1=probbnml(p0,n,c); power1=probbnml(p1,n,c); if c1>=0 then do; prej10=probbnml(p0,n1,c1); prej11=probbnml(p1,n1,c1); end; else do; prej10=0; 61 Appendix prej11=0; end; %preject2 (p0, prej20); %preject2 (p1, prej21); alpha2=prej10+prej20; power2=prej11+prej21; ASN0=n1+(1-prej10)*(n-n1); ASN1=n1+(1-prej11)*(n-n1); run; proc append base=table data=alpower; run; %mend; /******************************************************************************* MAIN PROGRAM *******************************************************************************/ %herson %herson %herson %herson %herson %herson (p0=0.30, (p0=0.40, (p0=0.20, (p0=0.40, (p0=0.50, (p0=0.50, p1=0.10, p1=0.20, p1=0.05, p1=0.20, p1=0.30, p1=0.30, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, power=0.80, power=0.80, power=0.80, power=0.90, power=0.80, power=0.90, n=25, n=35, n=40, n=50, n=45, n=50, n1=15, n1=20, n1=20, n1=25, n1=25, n1=25, mu=., mu=., mu=., mu=., mu=., mu=., %herson %herson %herson %herson %herson %herson (p0=0.30, (p0=0.40, (p0=0.20, (p0=0.40, (p0=0.50, (p0=0.50, p1=0.10, p1=0.20, p1=0.05, p1=0.20, p1=0.30, p1=0.30, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, power=0.80, power=0.80, power=0.80, power=0.90, power=0.80, power=0.90, n=25, n=35, n=40, n=50, n=45, n=50, n1=15, n1=20, n1=20, n1=25, n1=25, n1=25, mu=0.10, mu=0.20, mu=0.05, mu=0.20, mu=0.30, mu=0.30, CV=50, CV=50, CV=50, CV=50, CV=50, CV=50, pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); %herson %herson %herson %herson %herson (p0=0.30, (p0=0.40, (p0=0.20, (p0=0.40, (p0=0.50, p1=0.10, p1=0.20, p1=0.05, p1=0.20, p1=0.30, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, power=0.80, power=0.80, power=0.80, power=0.90, power=0.80, n=25, n=35, n=40, n=50, n=45, n1=15, n1=20, n1=20, n1=25, n1=25, mu=0.30, mu=0.40, mu=0.20, mu=0.40, mu=0.50, CV=50, CV=50, CV=50, CV=50, CV=50, pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); 62 CV=., CV=., CV=., CV=., CV=., CV=., pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); Appendix %herson (p0=0.50, p1=0.30, alpha=0.05, power=0.90, n=50, n1=25, mu=0.50, CV=50, pp0=0.85); ods rtf file='herson2.rtf'; proc print data=table noobs; title 'table3(Beta distribution with mu=p0 and C.V = 50%): '; var p0 p1 alpha power pp0 n c alpha1 power1 n1 c1 pp alpha2 power2 ASN0 ASN1; format alpha power alpha1 power1 alpha2 power2 pp f5.3 asn0 asn1 f4.1; run; ods rtf close; 63 Appendix Herson3.sas /******************************************************************************* Organon/CTO/Biometrics ******************************************************************************** Program : Herson3.sas Author : Mustapha Setta Protocol : Not applicable Compound : Not applicable Date : 21 jun 2005 Purpose : Apply Herson's (1979) method for 3-stage design Reference : Herson J. Predictive Probability Early Termination Plans For Phase II clinical Trials. Biometrics 1979; 35: 775-783. Remarks : This program gives a three-stage design including an early termination plan based on Bayesian predictive probability (PP). The study will be terminated after the first stage if the PP1 to reject H0 is "high". The study will be terminated after the second stage if the PP2 to reject H0 is "high". In this design the hypotheses are: H0: p>=p0 HA: p<p0 p0 p1 is a promising level. is unpromising level. Input : (O=optional, M=mandatory) p0 (M) p1 (M) Alpha (M) Power (M) n (O) value of a promising response probability. value of an unpromising response probability. the Type I error (one-sided). the power of the test. sample size of the treatment group. if n=. , then the program will calculate sample size based on alpha and power. 64 Appendix n1 (O) the number of patients treated in the first interim analysis. if n1=. , then the program will take n1= (n/3). the number of patients treated in the second interim analysis if n2=. , then the program will take n2= (n/3). the mean of the beta (prior) distribution of response probability p if Mu=. , then the program assumes the Uniform distribution. The coefficient of variation, i.e. an expression of the degree of confidence in Mu. if CV=. , then we the Uniform distribution. a certain threshold that is used for interim analyses. n2 (O) Mu (O) CV (O) PP0 (M) Note: if a parameter is not given a value, then explicitly specify as missing '.'. Output : C the rejection point at the final analysis, H0 will be then rejected when <= C responses are observed in n patients. ALPHA1 the corresponding type I error of the one-stage designs. POWER1 the power corresponding with the one-stage design. C1 the rejection point at the first interim analysis, H0 will be rejected when <=C1 responses are observed in n1 patients. PP1 the predictive probability to reject H0, given R1 responses in n1 patients C2 The rejection point at the second interim analysis, H0 will be then rejected when <=C2 responses are observed in n1+ n2 patients. PP2 The predictive probability to reject H0, given R2 responses in n1+n2 patients ALPHA2 the type I error that correspond with the three-stage design. POWER2 the power of the three-stage designs. ASN0 the expected sample size under H0 (p=p0, promising). ASN1 the expected sample size under HA (p=p1, unpromising). - Data sets : - Files : The usual log- and lst-files - Other : SAS version : 8.2 for winNT 4.0 ******************************************************************************** (This part must be filled in for every modification that is made) 65 Appendix Date Modif. : Done by : Description : *******************************************************************************/ %*include sasopt; /******************************************************************************* FORMAT DEFINITIONS *******************************************************************************/ /******************************************************************************* MACRO DEFINITIONS *******************************************************************************/ %macro preject2 (p,preject); * probability to reject H0 after 2nd stage; &preject=0; do k=c1+1 to c2 by 1; if k>0 then px=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1))*probbnml(&p,n2,c2-k); else px=probbnml(&p,n1,k)*probbnml(&p,n2,c2-k); &preject=&preject+px; end; drop k px; %mend; %macro preject3 (p,preject); * probability to reject H0 after 3rd stage; &preject=0; do k=max(c1+1,0) to min(c,n1) by 1; do l=max(c2-k+1,0) to min(c-k,n2) by 1; if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=probbnml(&p,n2,l)-probbnml(&p,n2,l-1); else Q2=probbnml(&p,n2,l); &preject=&preject+Q1*Q2*probbnml(&p,(n-n1-n2),c-k-l); end; end; drop k l Q1 Q2; %mend; 66 Appendix %macro critical (nint, crint, cname, ppname); data temp; set pp; rint=-1; pp=1.0; output; do rint=0 to c by 1; pp=0; do r=rint to c by 1; lpr1=log((&nint+b+1)/(n+b+1)) +(lgamma(&nint+b+1)+lgamma(n-&nint+1)+lgamma(r+a+1)+lgamma(n+b-r-a+1)) -(lgamma(n+b+1)+lgamma(r-rint+1)+lgamma(rint+a+1)+lgamma(&nint+b-rint-a+1)+lgamma(n-&nint-r+rint+1)); pp=pp+exp(lpr1); end; output; end; run; proc univariate data=temp noprint; by p0 p1 alpha power pp0 n c; var rint; output out=uniout max=&cname; where (pp>0.85); run; data &crint (rename=(pp=&ppname)); merge temp (drop=r lpr1) uniout; by p0 p1 alpha power pp0 n c; if rint=&cname; run; %mend; %macro herson (p0, p1, alpha, power, n, n1, n2, mu, cv, pp0); * Calculate n (if needed) and C; data ssize; p0=&p0; p1=&p1; alpha=&alpha; power=&power; pp0=&pp0; %if &n ne . %then n=&n; %else %do; 67 Appendix za=probit(1-alpha); zb=probit(power); pbar=(p0+p1)/2; n=(za+zb)**2*pbar*(1-pbar)/(p1-p0)**2; %end;; n=ceil(n); do k=0 to n; xalpha=probbnml(p0,n,k); output; end; run; proc univariate data=ssize noprint; by p0 p1 alpha power pp0 n; var k; output out=crit max=c; where (xalpha<alpha); run; * Calculate a and b for Hersons Beta prior with pdf(p)~p^a*p^(b-a); * If no mu or CV is given a uniform prior is assumed; data beta; set crit; %if &mu ne . and &cv ne . %then %do; v=&cv/100; b=(1-&mu*(1+3*v**2))/(v**2*&mu); a=(b+2)*&mu-1; drop v; %end; %else %do; a=0; b=0; %end;; run; * Calculate Predictive Probability; data pp; set beta; %if &n1 ne . %then n1=&n1; %else n1=ceil(n/3);; 68 Appendix %if &n2 ne . %then n2=&n2; %else n2=ceil(n/3);; run; %critical (nint=n1, crint=crit1, cname=c1, ppname=pp1); %critical (nint=(n1+n2), crint=crit2, cname=c2, ppname=pp2); * Calculate ASN, alpha and power (also of one-stage design); data alpower; merge pp crit1 crit2; by p0 p1 alpha power pp0 n c; alpha1=probbnml(&p0,n,c); power1=probbnml(&p1,n,c); if c1>=0 then prej10=probbnml(&p0,n1,c1); else prej10=0; if c1>=0 then prej11=probbnml(&p1,n1,c1); else prej11=0; %preject2 (p0, prej20); %preject2 (p1, prej21); %preject3 (p0, prej30); %preject3 (p1, prej31); alpha2=prej10+prej20+prej30; power2=prej11+prej21+prej31; ASN0=n1+(1-prej10)*n2+(1-prej10-prej20)*(n-n1-n2); ASN1=n1+(1-prej11)*n2+(1-prej11-prej21)*(n-n1-n2); run; proc append base=table data=alpower; run; %mend; /******************************************************************************* MAIN PROGRAM *******************************************************************************/ %herson %herson %herson %herson %herson (p0=0.30, (p0=0.40, (p0=0.20, (p0=0.40, (p0=0.50, p1=0.10, p1=0.20, p1=0.05, p1=0.20, p1=0.30, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, power=0.80, power=0.80, power=0.90, power=0.90, power=0.80, n=25, n=35, n=40, n=50, n=45, n1=10, n1=15, n1=15, n1=20, n1=15, 69 n2=10, n2=10, n2=15, n2=15, n2=15, mu=., mu=., mu=., mu=., mu=., CV=., CV=., CV=., CV=., CV=., pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); Appendix %herson (p0=0.50, p1=0.30, alpha=0.05, power=0.90, n=50, n1=20, n2=15, mu=., CV=., pp0=0.85); %herson %herson %herson %herson %herson %herson (p0=0.30, (p0=0.40, (p0=0.20, (p0=0.40, (p0=0.50, (p0=0.50, p1=0.10, p1=0.20, p1=0.05, p1=0.20, p1=0.30, p1=0.30, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, power=0.80, power=0.80, power=0.90, power=0.90, power=0.80, power=0.90, n=25, n=35, n=40, n=50, n=45, n=50, n1=10, n1=15, n1=15, n1=20, n1=15, n1=20, n2=10, n2=10, n2=15, n2=15, n2=15, n2=15, mu=0.10, mu=0.20, mu=0.05, mu=0.20, mu=0.30, mu=0.30, CV=50, CV=50, CV=50, CV=50, CV=50, CV=50, pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); %herson %herson %herson %herson %herson %herson (p0=0.30, (p0=0.40, (p0=0.20, (p0=0.40, (p0=0.50, (p0=0.50, p1=0.10, p1=0.20, p1=0.05, p1=0.20, p1=0.30, p1=0.30, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, alpha=0.05, power=0.80, power=0.80, power=0.90, power=0.90, power=0.80, power=0.90, n=25, n=35, n=40, n=50, n=45, n=50, n1=10, n1=15, n1=15, n1=20, n1=15, n1=20, n2=10, n2=10, n2=15, n2=15, n2=15, n2=15, mu=0.30, mu=0.40, mu=0.20, mu=0.40, mu=0.50, mu=0.50, CV=50, CV=50, CV=50, CV=50, CV=50, CV=50, pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); pp0=0.85); ods rtf file='herson3.rtf'; proc print data=table noobs; var p0 p1 pp0 n c alpha1 power1 n1 c1 pp1 n2 c2 pp2 alpha2 power2 ASN0 ASN1; format alpha1 power1 alpha2 power2 pp1 pp2 f5.3 asn0 asn1 f4.1; run; ods rtf close; 70 Appendix Thall.sas /******************************************************************************* Organon/CTO/Biometrics ******************************************************************************** Program : thall.sas Author : Mustapha Setta Protocol : Not applicable Compound : Not applicable Date : 17 JUN 05 Purpose : Apply Simon's (1989) method for 2-stage design Reference : Thall PF & Simon R. Practical Bayesian guidelines for phase IIB clinical trials. Biometrics 1994; 50: 337-349. Remarks : This program the probabilities to reject or accept an experimental treatment E comparing to a Standard treatment S. This design is based upon a Bayesian approach assuming that the prior of the response rate for both treatments is Beta distributed. Input : (O=optional, M=mandatory) W90 (M) u0 (M) c1 (M) parameters). Delta (M) Nmin (M) Nmax (M) Pu (M) Pl (M) Output the value of the 90% confidence interval of the standard treatment S. the mean of the prior distribution of S. the concentration parameter of the prior of E (c1=a1+b1, with a1 the value of a target improvement of E over S the minimum value of sample size N the maximum value of sample size N. a predetermined probability, mostly a large value such as 0.95-0.99 a predetermined probability, mostly a small value such as 0.01-0.05 : 71 and b1 the corresponding Appendix Un for each n, the smallest integer x (number of responses) such that Pr (pe-ps>0|Xn=x)>pu. Ln the largest integer x<Un such that Pr (pe-ps>delta|Xn=x) <pl. P25 The empirical 25th percentiles of the achieved sample size N Pr [N=P25]>=0.25. The empirical 50th percentiles of the achieved sample size N Pr [N=P50]>=0.50. P50 P75 The empirical 75th percentiles of the achieved sample size N Pr [N=P75]>=0.75. PPLUS PMIN the probability to declare E promising the probability to declare E not promising - Data sets : - Files : the usual log- and lst-files - Other : SAS version : 8.2 for winNT 4.0 ******************************************************************************** (This part must be filled in for every modification that is made) Date Modif. : Done by : Description : *******************************************************************************/ /******************************************************************************* FORMAT DEFINITIONS *******************************************************************************/ /******************************************************************************* MACRO DEFINITIONS *******************************************************************************/ 72 Appendix /******************************************************************************* MAIN PROGRAM *******************************************************************************/ %macro thall (W90, u0, c1, delta, nmin, nmax, pu, pl); * Calculate a and b for Beta prior with pdf(p)~p^a*p^(b-a); data new; u0=&u0; W90=&W90; do a=1 to 200 by 0.01; b=((1-u0)/u0)*a; x=betainv(0.05,a,b); y=betainv(0.95,a,b); z=y-x; output; end; run; proc univariate data=new noprint; by u0; var a b; output out=beta min=a0 b0; where (z<=W90 ); run; * calculate decision criteria; data int; set beta; delta=&delta; c1=&c1; u1=u0+(delta/2); a1=u1*c1; b1=c1-a1; do n=&nmin to &nmax; do x=0 to n; F0=0; 73 Appendix F1=0; step=0.01; do teta=0 to 1-delta by step; P0=(1-CDF('beta',teta+delta,a1+x,b1+n-x,0,1))*PDF('beta',teta,a0,b0,0,1); P1=(1-CDF('beta',teta,a1+x,b1+n-x,0,1))*PDF('beta',teta,a0,b0,0,1); F0=F0+P0*step; F1=F1+P1*step; end; drop p0 p1; output; end; end; run; proc univariate data=int noprint; by n; var x; output out=lo max=Ln; where F0<=&pl; run; proc univariate data=int noprint; by n; var x; output out=up min=Un; where F1>=&pu; run; data bounds; merge lo up; by n; if Ln=. then Ln=-1; if Un=. then Un=n+1; if ln>=un then ln=un-1; run; proc print; run; 74 Appendix proc iml; use bounds; read all var {un} into upper; read all var {ln} into lower; upper=j(&nmin-1,1,&nmax+1)//upper; lower=j(&nmin-1,1,-2)//lower; tau0=j(&nmax,1,0); tau=j(&nmax,&nmax,0); p=&u0+&delta; q=1-p; tau0[1]=q; tau[1,1]=p; do n=2 to &nmax; x=0; tau0[n]=q*tau0[n-1]*((lower[n-1]<x) & (x<upper[n-1])); x=1; tau[n,x]=p*tau0[n-1]*((lower[n-1]<(x-1))& ((x-1)<upper[n-1])) +q*tau[n-1,x]*((lower[n-1]<x) & (x<upper[n-1])); do x=2 to n; tau[n,x]=p*tau[n-1, x-1]*((lower[n-1]<(x-1))& ((x-1)<upper[n-1])) +q*tau[n-1,x]*((lower[n-1]<x) & (x<upper[n-1])); end; end; pnplus=j(&nmin-1,1,0)//j(&nmax-&nmin+1,1,-1); pnmin=j(&nmin-1,1,0)//j(&nmax-&nmin+1,1,-1); pnplus[&nmin]=1-probbnml(p,&nmin,upper[&nmin]-1); pnmin[&nmin]=probbnml(p,&nmin,lower[&nmin]); pplus=j(&nmax,1, 0); pplus[&nmin]=pnplus[&nmin]; pmin=j(&nmax,1,0); pmin[&nmin]=pnmin[&nmin]; do n=&nmin+1 to &nmax; pnplus[n]=p*tau[n-1,(upper[n-1]-1)]*(upper[n]=upper[n-1])*(lower[n-1]<(upper[n-1]-1)); pnmin[n]=q*tau[n-1,(lower[n-1]+1)]*(lower[n]=(lower[n-1]+1))*(lower[n-1]<(upper[n-1]-1)); pplus[n]=pplus[n-1]+pnplus[n]; pmin[n]=pmin[n-1]+pnmin[n]; end; stot=pplus+pmin; pinconcl=1-stot; 75 Appendix nntl=j(&nmax,1,0); nntl[1]=1; do n=2 to &nmax; nntl[n]=nntl[n-1]+1; end; result=pnplus||pplus||pnmin||pmin||stot||pinconcl||nntl; create result from result [colname={pnplus pplus pnmin pmin stot pinconcl nntl}]; append from result; quit; proc univariate data=result noprint; var nntl; output out=p25 min=p25; where (stot>0.25 or nntl=&nmax); run; proc univariate data=result noprint; var nntl; output out=p50 min=p50; where (stot>0.5 or nntl=&nmax); run; proc univariate data=result noprint; var nntl; output out=p75 min=p75; where (stot>0.75 or nntl=&nmax); run; data result2; merge p25 p50 p75 result (where=(nntl=&nmax)); run; proc append base=table data=result2; run; %mend; 76 Appendix %*thall %*thall %*thall %*thall (W90=0.20, (W90=0.30, (W90=0.20, (W90=0.20, u0=0.20, u0=0.20, u0=0.20, u0=0.20, c1=2, delta=0.15, nmin=10, nmax=65, pu=0.95, pl=0.05); c1=2, delta=0.15, nmin=10, nmax=65, pu=0.95, pl=0.05); c1=2, delta=0.15, nmin=10, nmax=65, pu=0.95, pl=0.05); c1=10, delta=0.15, nmin=10, nmax=65, pu=0.95, pl=0.05); * calulate the properties for DVT and Bleeding; %thall (W90=0.05, u0=0.05, c1=2, delta=0.05, nmin=10, nmax=50, pu=0.95, pl=0.05); %thall (W90=0.05, u0=0.05, c1=2, delta=0.15, nmin=10, nmax=50, pu=0.95, pl=0.05); %*thall (W90=0.05, u0=0.15, c1=2, delta=0.05, nmin=10, nmax=50, pu=0.95, pl=0.05); %*thall (W90=0.05, u0=0.15, c1=2, delta=0.20, nmin=10, nmax=50, pu=0.95, pl=0.05); ods rtf file='Thall.rtf'; proc print data=table noobs; var P25 P50 P75 PPLUS PMIN pinconcl; format pplus pmin stot pinconcl f5.3; run; 77 Appendix Flemingfive.sas /******************************************************************************* Organon/CTO/Biometrics ******************************************************************************** Program : fivestage.sas Author : Mustapha Setta Protocol : Not applicable Compound : Not applicable Date : 02 AUG 05 Purpose : Evaluate five-stage design where boundaries are calculated according to Fleming (1982) Reference : Fleming TR. One-Sample Multiple Testing Procedure for Phase II Clinical trials. Biometrics; 38:143-151. Remarks : k=number l=number m=number s=number of of of of responses responses responses responses in in in in 1st 2nd 3rd 4th stage stage stage stage -> -> -> -> Q1=prob Q2=prob Q3=prob Q4=prob to to to to have have have have This program calculates five-stage design. After each stage, H0 can be rejected or 'accepted'. In this design the hypotheses are: H0: p<=p0 , p0 is unpromising level. HA: p>p1, p1 is promising level. Input p0 (M) P1 (M) : (O=optional, M=mandatory) value of the unpromising response probability. value of the promising response probability. 78 k l m s responses responses responses responses Appendix Alpha (M) n1 (M) n2 (M) n3 (M) n4 (M) the Type I error (one-sided). the number of patients treated in the first interim analysis. the number of patients treated in the second interim analysis. the number of patients treated after the second interim analysis. the number Output : A1 the acceptance point after the first interim analysis, i.e. accept H0 whenever <=A1 responses are observed in n1 patients. the acceptance point after the second interim analysis, i.e. accept H0 whenever <=A2 responses are observed in n1+ n2 patients. the acceptance after the third interim analysis, i.e. accept H0 whenever <=A3 responses are observed in n1+ n2+n3 patients. the acceptance after the fourth interim analysis, i.e. accept H0 whenever <=A4 responses are observed in n1+ n2+n3+n4 patients. the acceptance after the final analysis, i.e. accept H0 whenever <=A5 responses are observed in n1+ n2+n3+n4+n5 patients. A2 A3 A4 A5 R1 ASN0 ASN1 the rejejection point after the first interim analysis, i.e. reject H0 in favor of HA whenever <=R1 responses the rejection point after the second interim analysis, i.e. reject H0 in favor of HA whenever <=R2 responses the rejection point after the third interim analysis, i.e. reject H0 in favor of HA whenever <=R3 responses the rejection point after the fourth interim analysis, i.e. reject H0 in favor of HA whenever <=R4 responses the rejection point in the final analysis, i.e. reject H0 in favor of HA whenever <=R5 responses the expected sample size under H0 (p=p0, unpromising). the expected sample size under HA (p=p1, promising). ALPHA1 POWER the exact type I error of the two- or three-stage design. the exact power of the two-or the three-stage design. R2 R3 R4 R5 - Data sets : 79 are observed in n1 patients. are observed in n1+ n2 patients. are observed in n1+ n2+n3 patients. are observed in n1+ n2+n3+n4 patients. are observed in n1+ n2+n3+n4+n5 patients. Appendix - Files - Other : the usual log- and lst-files : SAS version : 8.2 for winNT 4.0 *************************************************************************** (This part must be filled in for every modification that is made) Date Modif. : Done by : Description : *******************************************************************************/ /******************************************************************************* FORMAT DEFINITIONS *******************************************************************************/ /******************************************************************************* MACRO DEFINITIONS *******************************************************************************/ %macro preject2 (p,preject); * probability to reject H0 after 2nd stage; &preject=0; do k=max(a1+1,0) to min(r1-1,n1); put k; if k>0 then Q1=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1)); else Q1=probbnml(&p,n1,k); if 0<=r2-(k+1)<=n2 then Q2=(1-probbnml(&p,n2,r2-(k+1))); else if r2-(k+1)<0 then Q2=1; else Q2=0;*r2-(k+1)>n2:rejection not possible; &preject=&preject+Q1*Q2; end; drop k Q1 Q2; %mend; %macro paccept2 (p,paccept); 80 Appendix * probability to 'accept' H0 after 2nd stage; &paccept=0; do k=max(a1+1, 0) to min(r1-1,n1); if k>0 then Q1=(probbnml(&p,n1,k)-probbnml(&p,n1,k-1)); else Q1=probbnml(&p,n1,k); if 0<=(a2-k)<=n2 then Q2=probbnml(&p,n2,a2-k); else if (a2-K)>n2 then Q2=1; else Q2=0; *(a2-k)<0:acceptance not possible; &paccept=&paccept+Q1*Q2; end; drop k Q1 Q2; %mend; %macro preject3 (p, preject); * probability to reject H0 after 3rd stage; &preject=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); put k l; if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); if 0<=r3-(k+l+1)<=n3 then Q3=1-probbnml(&p, n3, r3-(k+l+1)); else if r3-(k+l+1)<0 then Q3=1; else Q3=0;*r3-(k+l+1)>n3:rejection not possible; &preject=&preject+Q1*Q2*Q3; end; end; drop k l Q1 Q2 Q3; %mend; %macro paccept3 (p, paccept); * probability to 'accept' H0 after 3rd stage; 81 Appendix &paccept=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); if 0<=a3-(k+l)<=n3 then Q3=probbnml(&p, n3, a3-(k+l)); else if a3-(k+l)>n3 then Q3=1; else Q3=0;*a3-(k+l)<0:acceptance not possible; &paccept=&paccept+Q1*Q2*Q3; end; end; drop k l Q1 Q2 Q3; %mend; %macro preject4 (p, preject); * probability to reject H0 after 4rd stage; &preject=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); do m=max(a3-k-l+1,0) to min(r3-k-l-1,n3); put k l m; if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); if m>0 then Q3=(probbnml(&p,n3,m)-probbnml(&p,n3,m-1)); else Q3=probbnml(&p,n2,m); if 0<=r4-(k+l+m+1)<=n4 then Q4=1-probbnml(&p, n4, r4-(k+l+m+1)); else if r4-(k+l+m+1)<0 then Q4=1; else Q4=0;*r4-(k+l+m+1)>n4:rejection not possible; &preject=&preject+Q1*Q2*Q3*Q4; end; end; end; drop k l m Q1 Q2 Q3 Q4; %mend; 82 Appendix %macro paccept4 (p, paccept); * probability to reject H0 after 4th stage; &paccept=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); do m=max(a3-k-l+1,0) to min(r3-k-l-1,n3); if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); if m>0 then Q3=(probbnml(&p,n3,m)-probbnml(&p,n3,m-1)); else Q3=probbnml(&p,n2,m); if 0<=a4-(k+l+m)<=n4 then Q4=probbnml(&p, n4, a4-(k+l+m)); else if a4-(k+l+m)>n4 then Q4=1; else Q4=0;*a4-(k+l+m)<0:acceptance not possible; &paccept=&paccept+Q1*Q2*Q3*Q4; end; end; end; drop k l m Q1 Q2 Q3 Q4; %mend; %macro preject5 (p, preject); * probability to reject H0 after 5th stage; &preject=0; do k=max(a1+1, 0) to min(r1-1,n1); do l=max(a2-k+1,0) to min(r2-k-1,n2); do m=max(a3-k-l+1,0) to min(r3-k-l-1,n3); do s=max(a4-k-l-m+1,0) to min(r4-k-l-m-1,n4); put k l m s; if k>0 then Q1=probbnml(&p,n1,k)-probbnml(&p,n1,k-1); else Q1=probbnml(&p,n1,k); if l>0 then Q2=(probbnml(&p,n2,l)-probbnml(&p,n2,l-1)); else Q2=probbnml(&p,n2,l); 83 Appendix if m>0 then Q3=(probbnml(&p,n3,m)-probbnml(&p,n3,m-1)); else Q3=probbnml(&p,n2,m); if s>0 then Q4=(probbnml(&p,n4,s)-probbnml(&p,n4,s-1)); else Q4=probbnml(&p,n4,s); if 0<=r5-(k+l+m+s+1)<=n5 then Q5=1-probbnml(&p, n5, r5-(k+l+m+s+1)); else if r5-(k+l+m+s+1)<0 then Q5=1; else Q5=0;*r5-(k+l+m+s+1)>n5:rejection not possible; &preject=&preject+Q1*Q2*Q3*Q4*Q5; end; end; end; end; drop k l m s Q1 Q2 Q3 Q4 Q5; %mend; %macro all (n1, n2, n3, n4, n5, p0, p1, alpha, a1, a2, a3, a4, a5, r1, r2, r3, r4, r5); data fivestage; n1=&n1; n2=&n2; n3=&n3; n4=&n4; n5=&n5; n=&n1+&n2+&n3+&n4+&n5; p0=&p0; p1=&p1; za=probit (1-&alpha); * one-sided; p2=(sqrt(n*p0)+sqrt(1-p0)*za)**2/(n+za**2); if &r1 ne . then r1=&r1; else r1=round(n1*p0+za*sqrt(n*p0*(1-p0)))+1; if &a1 ne . then a1=&a1; else a1=round(n1*p2-za*sqrt(n*p2*(1-p2))); if &r2 ne . then r2=&r2; else r2=round((n2+n1)*p0+za*sqrt(n*p0*(1-p0)))+1; if &a2 ne . then a2=&a2; else a2=round((n2+n1)*p2-za*sqrt(n*p2*(1-p2))); if &r3 ne . then r3=&r3; else r3=round((n3+n2+n1)*p0+za*sqrt(n*p0*(1-p0)))+1; if &a3 ne . then a3=&a3; else a3=round((n3+n2+n1)*p2-za*sqrt(n*p2*(1-p2))); if &r4 ne . then r4=&r4; else r4=round((n4+n3+n2+n1)*p0+za*sqrt(n*p0*(1-p0)))+1; if &a4 ne . then a4=&a4; else a4=round((n4+n3+n2+n1)*p2-za*sqrt(n*p2*(1-p2))); if &r5 ne . then r5=&r5; else r5=round(n*p0+za*sqrt(n*p0*(1-p0)))+1; if &a5 ne . then a5=&a5; else a5=r5-1; if r1<=n1 then prej01=1-probbnml(p0, n1,r1-1); else prej01=0; 84 Appendix if r1<=n1 then prej11=1-probbnml(p1, n1, r1-1); else prej11=0; if a1>=0 then pacc01=probbnml(p0,n1,a1); else pacc01=0; if a1>=0 then pacc11=probbnml(p1,n1,a1); else pacc11=0; %preject2(p0,prej02); %preject2(p1,prej12); %paccept2(p0,pacc02); %paccept2(p1,pacc12); %preject3(p0,prej03); %preject3(p1,prej13); %paccept3(p0,pacc03); %paccept3(p1,pacc13); %preject4(p0,prej04); %preject4(p1,prej14); %paccept4(p0,pacc04); %paccept4(p1,pacc14); %preject5(p0,prej05); %preject5(p1,prej15); xalpha=prej01+prej02+prej03+prej04+prej05; xpower=prej11+prej12+prej13+prej14+prej15; asn0=n1+(1-prej01-pacc01)*n2+(1-prej01-pacc01-prej02-pacc02)*n3+(1-prej01-pacc01-prej02-pacc02-prej03-pacc03)*n4 +(1-prej01-pacc01-prej02-pacc02-prej03-pacc03-prej04-pacc04)*n5; asn1=n1+(1-prej11-pacc11)*n2+(1-prej11-pacc11-prej12-pacc12)*n3+(1-prej11-pacc11-prej12-pacc12-prej13-pacc13)*n4 +(1-prej11-pacc11-prej12-pacc12-prej13-pacc13-prej14-pacc14)*n5; run; proc append base=table data=fivestage; run; %mend; /******************************************************************************* MAIN PROGRAM *******************************************************************************/ *Ginsberg; %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.05, p1=0.10, alpha=0.05, a1=-1, a2=-1, a3=-1, a4=-1, a5=5, r1=3, r2=4, r3=5, r4=5, r5=6); %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.05, p1=0.20, alpha=0.05, a1=-1, a2=-1, a3=-1, a4=-1, a5=5, r1=3, r2=4, r3=5, r4=5, r5=6); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.15, p1=0.20, alpha=0.05, a1=-1, a2=-1, a3=-1, a4=-1, a5=12, r1=4, r2=7, r3=9, r4=11, r5=13); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.15, p1=0.35, alpha=0.05, 85 Appendix a1=-1, a2=-1, a3=-1, a4=-1, a5=12, r1=4, r2=7, r3=9, r4=11, r5=13); *Fleming; %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.05, p1=0.10, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.05, p1=0.20, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.15, p1=0.20, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.15, p1=0.35, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); *Thall and Simon; %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.05, p1=0.10, alpha=0.05, a1=0, a2=0, a3=0, a4=1, a5=2, r1=3, r2=4, r3=5, r4=6, r5=7); %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.05, p1=0.20, alpha=0.05, a1=0, a2=1, a3=2, a4=4, a5=5, r1=3, r2=4, r3=5, r4=6, r5=6); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.15, p1=0.20, alpha=0.05, a1=0, a2=1, a3=2, a4=4, a5=5, r1=4, r2=7, r3=9, r4=11, r5=13); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.15, p1=0.35, alpha=0.05, a1=1, a2=3, a3=6, a4=9, a5=12, r1=4, r2=7, r3=9, r4=11, r5=13); *reversing Hypothesis Fleming; %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.90, p1=0.95, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); %*all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.80, p1=0.95, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.80, p1=0.85, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); %all (n1=10, n2=10, n3=10, n4=10, n5=10, p0=0.65, p1=0.85, alpha=0.05, a1=., a2=., a3=., a4=., a5=., r1=., r2=., r3=., r4=., r5=.); ods rtf file='all.rtf'; proc print data=table noobs; var p0 p1 p2 a1 a2 a3 a4 a5 r1 r2 r3 r4 r5 xalpha xpower asn0 asn1 ; format p2 xalpha xpower f5.3 asn0 asn1 f4.1; run; 86