// CS 1538 Fall 2009 // This handout shows how exponential and normal random variates can be generated. // The exponential distribution is generated using the inverse transform // technique and the normal distribution is generated using the polar method. // It also shows how the Kolmogorov-Smirnov test can be used to test goodness of // fit of a generated distribution with a theoretical distribution. The basic // technique is similar to that used for testing uniform distributions. However, // as we saw with the Maximum-of-T test extra credit exercise, for non-uniform // distributions we must first somehow transform the distribution into a uniform, // distribution, then do the test. import javax.swing.*; import java.awt.*; import java.util.*; import java.text.*; // A lot of the code in the main program is simply for the graphical representation // of the distributions. To do this I simply discretize the values and create a // histogram of the result. I then make a polygon of the histogram values and print // out the result graphically. Because of the discretization, the final graph will // look better or worse depending upon two factors: // 1) The overall range of the values -- the more different values there are the // smoother the curve will look // 2) The number of values generated -- the more values generated the better the // overall distribution will resemble the theoretical distribution, and thus the // better it will look // // For an example of a GOOD curve: Try exponential with mean 10 and 200000 values // For an example of a BAD curve: Try normal with mean 5, stddev 1 and 30 values // Note also that the normal distribution can generate negative values -- in this // program they are not shown. public class Variates extends JFrame { public double lambda, mean, stddev, theta; public int npoints, k, low, high; int [] hist; double [] thepoints; Polygon P; double nextval, sum, ave; int maxX = 0, maxY = 0; int maxW = 800; int maxH = 600; int baseX = 100, baseY = 650; double Xmult, Ynorm; int distType; String distName, msg; DecimalFormat formatter = new DecimalFormat("0.0000"); public Variates() { super("Distribution Demo"); distType = Integer.parseInt(JOptionPane.showInputDialog(null, "1 = Exponential \n2 = Normal \n3 = Erlang \n4 = Uniform")); if (distType == 1) { mean = Double.parseDouble(JOptionPane.showInputDialog(null, "Enter Mean")); distName = "Exponential"; lambda = 1/mean; msg = "Lambda = " + lambda; } else if (distType == 2) { mean = Double.parseDouble(JOptionPane.showInputDialog(null, "Enter Mean")); distName = "Normal"; stddev = Double.parseDouble(JOptionPane.showInputDialog(null, "Enter Standard Deviation")); msg = "Standard Dev. = " + stddev; } else if (distType == 3) { mean = Double.parseDouble(JOptionPane.showInputDialog(null, "Enter individual mean")); distName = "Erlang"; k = Integer.parseInt(JOptionPane.showInputDialog(null, "Enter k")); theta = 1/(k * mean); msg = "k = " + k + " theta = 1/ave = " + theta; } else { distName = "Discrete Uniform"; low = Integer.parseInt(JOptionPane.showInputDialog(null, "Low value")); high = Integer.parseInt(JOptionPane.showInputDialog(null, "High value")); msg = "Low: " + low + " High: " + high; } npoints = Integer.parseInt(JOptionPane.showInputDialog(null, "How many values?")); thepoints = new double[npoints+1]; hist = new int [500]; for (int i = 0; i < hist.length; i++) hist[i] = 0; for (int i = 0; i < thepoints.length; i++) thepoints[i] = 0; sum = 0; for (int i = 0; i < npoints; i++) { if (distType == 1) nextval = RandDist.exponential(lambda); else if (distType == 2) nextval = RandDist.normal(mean, stddev); else if (distType == 3) nextval = RandDist.erlang(k, theta); else nextval = RandDist.uniform(low, high); sum += nextval; addPoint(nextval); thepoints[i+1] = nextval; } getResults(); setSize(1000, 700); setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); setVisible(true); } public static void main (String [] args) { Variates T = new Variates(); } // Add the next point to the histogram (increment the data in the // index of the rounded value). Keep track of the maximum index and // data for scaling of the graph. private void addPoint(double nextPoint) { int loc = (int) (nextPoint); // discretize actual point if (loc >= 0) { hist[loc]++; if (loc > maxX) maxX = loc; if (hist[loc] > maxY) maxY = hist[loc]; } } // Create a polygon with all of the points and display it graphically // Some thought must be used to scale the coordinates correctly. The // X-axis is scaled based on the maximum value generated by the distribution // and the Y-axis is scaled based on the maximum frequency of any value // in the distribution. private void getResults() { P = new Polygon(); Xmult = (double)maxW/maxX; Ynorm = (double)maxH/maxY; P.addPoint(baseX, baseY - (int)(maxY*Ynorm)); for (int i = 0; i <= maxX; i++) { P.addPoint(baseX + (int)(i*Xmult), baseY - (int) (hist[i]*Ynorm)); System.out.println("Val: " + i + " Freq: " + hist[i]); } ave = sum/npoints; System.out.println("Ave: " + ave); P.addPoint(baseX + (int)(Xmult*maxX), baseY); P.addPoint(baseX, baseY); } // Draw the graph and some strings as well for labeling public void paint(Graphics G) { Graphics2D g2d = (Graphics2D) G; g2d.drawString(distName + " Distribution ", 800, 50); g2d.drawString("Average: " + formatter.format(ave), 800, 70); g2d.drawString(msg, 800, 90); g2d.draw(P); g2d.drawString(""+0,baseX, baseY+20); g2d.drawString(""+maxX/2, baseX + maxW/2, baseY+20); g2d.drawString(""+maxX, baseX + maxW, baseY+20); g2d.drawString(""+0,baseX-25,baseY); g2d.drawString(""+maxY, baseX-25, baseY-(int)(maxY*Ynorm)); if (distType == 1) { KStest1(g2d); KStest2(g2d); } } // Two different Kolmogorov-Smirnov tests are shown below for the // exponential data. The first technique is particular to the exponential // distribution, and can be used without knowing the rate of the distribution // (lambda). It transforms the original points in the following way: // newpoint[i] = sum of original points 1 to i // newpoint[i] = newpoint[i] / sum of all points // The resulting array newpoints should have a uniform distribution over (0,1) // if the original data was in fact exponential. The regular KS evaluation can // then be used with the critical values as indicated in the text. The idea is // that as values are generated, the sum of all of the values increases gradually // to one and sum should be the same as the uniform cdf. // See Banks p. 331 for more details. // The motivation for using this would be something along the following lines: // You would like to determine a good input distribution for a simulation you // are developing. You have sampled some real data and think it may be // exponential, but want to test it to be sure. If you verify that it is // exponential with this test, you can then use an exponential distribution for // your input data. Note however, that this test will not give you the value // of lambda. You can make a good guess at lambda though by using 1 over the // observed mean. // // The second technique is more generally applicable, and is done in the following // way: // For each x value generated in the distribution to test, calculate F(x) // where F is the cdf for the theoretical distribution it is being compared with. // Then sort the resulting values, which should have a uniform distribution over // (0, 1) if F(x) matches the distribution being tested. The downside of this // version of the KS test is that the critical values used for comparison differ // for each distribution (and are not the standard ones given in Table A.8 of the // Banks text). For more information on these tests, see the Law and Kelton // "Simulation Modeling and Analysis" text, p. 363 public void KStest1(Graphics2D g) { double [] mypoints = new double[thepoints.length]; mypoints[0] = 0.0; for (int i = 1; i < thepoints.length; i++) { mypoints[i] = mypoints[i-1] + thepoints[i]; } for (int i = 1; i < mypoints.length; i++) { mypoints[i] = mypoints[i]/sum; } // Arrays.sort(mypoints); // Not necessary since the data will be increasing double Dplus = 0.0; double Dminus = 0.0; for (int i = 1; i < npoints; i++) { double curr = ((double)i/npoints) - mypoints[i]; if (curr > Dplus) Dplus = curr; curr = mypoints[i] - ((double)(i-1)/npoints); if (curr > Dminus) Dminus = curr; } double alpha = 0.05; double D = Math.max(Dplus, Dminus); double critVal = 1.36/(Math.sqrt(npoints)); g.drawString("Results from KS test:", 800, 120); g.drawString("Degrees of freedom = " + npoints, 800, 135); g.drawString("Alpha = " + alpha, 800, 150); g.drawString("Critical value = " + critVal, 800, 165); g.drawString("Max D = " + D, 800, 180); if (D < critVal) g.drawString("It passed the test", 800, 195); else g.drawString("It failed the test", 800, 195); } public void KStest2(Graphics2D g) { double [] mypoints = new double[thepoints.length]; double empirLambda = 1/ave; // From original set of points xi create new points F(xi), // then sort them in increasing order for (int i = 1; i < mypoints.length; i++) { double subval = Math.exp(-(empirLambda*thepoints[i])); mypoints[i] = 1 - subval; //System.out.println(mypoints[i]); } //System.out.println(); Arrays.sort(mypoints); //for (int i = 1; i < mypoints.length; i++) // System.out.println(mypoints[i] + " "); double Dplus = 0.0; double Dminus = 0.0; for (int i = 1; i < npoints; i++) { double curr = ((double)i/npoints) - mypoints[i]; if (curr > Dplus) Dplus = curr; curr = mypoints[i] - ((double)(i-1)/npoints); if (curr > Dminus) Dminus = curr; } double alpha = 0.05; double D = Math.max(Dplus, Dminus); double critVal = 1.36/(Math.sqrt(npoints)); g.drawString("Results from KS2 test:", 800, 225); g.drawString("Degrees of freedom = " + npoints, 800, 240); g.drawString("Alpha = " + alpha, 800, 255); g.drawString("Max D = " + D, 800, 270); g.drawString("Critical values must be calculated", 800, 285); g.drawString(" differently in this case. See", 800, 300); g.drawString(" Law and Kelton p. 365", 800, 315); } }