/*     */ package com.dlmu.wisdomST.operation;
/*     */ 
/*     */ import com.dlmu.wisdomST.core.Point;
/*     */ import com.dlmu.wisdomST.core.Rectangle;
/*     */ import com.dlmu.wisdomST.core.ResultCollector;
/*     */ import com.dlmu.wisdomST.core.Shape;
/*     */ import com.dlmu.wisdomST.io.Text2;
/*     */ import com.dlmu.wisdomST.io.TextSerializable;
/*     */ import com.dlmu.wisdomST.mapreduce.ShapeLineInputFormat;
/*     */ import com.dlmu.wisdomST.mapreduce.ShapeLineRecordReader;
/*     */ import com.dlmu.wisdomST.mapreduce.TextOutputFormat;
/*     */ import com.dlmu.wisdomST.util.OperationsParams;
/*     */ import java.io.IOException;
/*     */ import java.util.ArrayList;
/*     */ import java.util.Arrays;
/*     */ import java.util.Iterator;
/*     */ import java.util.Random;
/*     */ import org.apache.commons.logging.Log;
/*     */ import org.apache.commons.logging.LogFactory;
/*     */ import org.apache.hadoop.conf.Configuration;
/*     */ import org.apache.hadoop.fs.FileStatus;
/*     */ import org.apache.hadoop.fs.FileSystem;
/*     */ import org.apache.hadoop.fs.Path;
/*     */ import org.apache.hadoop.fs.PathFilter;
/*     */ import org.apache.hadoop.io.IntWritable;
/*     */ import org.apache.hadoop.io.LongWritable;
/*     */ import org.apache.hadoop.io.NullWritable;
/*     */ import org.apache.hadoop.io.Text;
/*     */ import org.apache.hadoop.mapred.ClusterStatus;
/*     */ import org.apache.hadoop.mapred.Counters;
/*     */ import org.apache.hadoop.mapred.FileSplit;
/*     */ import org.apache.hadoop.mapred.InputSplit;
/*     */ import org.apache.hadoop.mapred.JobClient;
/*     */ import org.apache.hadoop.mapred.JobConf;
/*     */ import org.apache.hadoop.mapred.MapReduceBase;
/*     */ import org.apache.hadoop.mapred.Mapper;
/*     */ import org.apache.hadoop.mapred.OutputCollector;
/*     */ import org.apache.hadoop.mapred.RecordReader;
/*     */ import org.apache.hadoop.mapred.Reducer;
/*     */ import org.apache.hadoop.mapred.Reporter;
/*     */ import org.apache.hadoop.mapred.RunningJob;
/*     */ import org.apache.hadoop.mapred.Task;
/*     */ import org.apache.hadoop.util.GenericOptionsParser;
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ public class Sampler
/*     */ {
/*  61 */   private static final Log LOG = LogFactory.getLog(Sampler.class);
/*     */   
/*     */   public static long sizeOfLastProcessedFile;
/*     */ 
/*     */   
/*     */   public static class Map
/*     */     extends MapReduceBase
/*     */     implements Mapper<Rectangle, Text, IntWritable, Text>
/*     */   {
/*     */     private double sampleRatio;
/*     */     
/*     */     private Random random;
/*     */     
/*     */     private IntWritable key;
/*     */     private Shape inShape;
/*     */     Conversion conversion;
/*     */     
/*     */     public Map() {
/*  79 */       this.key = new IntWritable((int)(Math.random() * 2.147483647E9D));
/*     */     }
/*     */     
/*     */     enum Conversion
/*     */     {
/*  84 */       None, ShapeToPoint, ShapeToRect;
/*     */     }
/*     */ 
/*     */     
/*     */     public void configure(JobConf job) {
/*  89 */       this.sampleRatio = job.getFloat("ratio", 0.01F);
/*  90 */       this.random = new Random(job.getLong("seed", System.currentTimeMillis()));
/*     */       
/*  92 */       TextSerializable inObj = OperationsParams.getTextSerializable((Configuration)job, "shape", (TextSerializable)new Text2());
/*  93 */       TextSerializable outObj = OperationsParams.getTextSerializable((Configuration)job, "outshape", (TextSerializable)new Text2());
/*     */       
/*  95 */       if (inObj.getClass() == outObj.getClass()) {
/*  96 */         this.conversion = Conversion.None;
/*     */       }
/*  98 */       else if (inObj instanceof Shape && outObj instanceof Point) {
/*  99 */         this.inShape = (Shape)inObj;
/* 100 */         this.conversion = Conversion.ShapeToPoint;
/* 101 */       } else if (inObj instanceof Shape && outObj instanceof Rectangle) {
/* 102 */         this.inShape = (Shape)inObj;
/* 103 */         this.conversion = Conversion.ShapeToRect;
/* 104 */       } else if (outObj instanceof Text) {
/* 105 */         this.conversion = Conversion.None;
/*     */       } else {
/* 107 */         throw new RuntimeException("Don't know how to convert from: " + 
/* 108 */             inObj.getClass() + " to " + outObj.getClass());
/*     */       } 
/*     */     }
/*     */ 
/*     */ 
/*     */ 
/*     */     
/*     */     public void map(Rectangle cell, Text line, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException {
/* 116 */       if (this.random.nextFloat() < this.sampleRatio) {
/* 117 */         Rectangle mbr; switch (this.conversion) {
/*     */           case null:
/* 119 */             output.collect(this.key, line);
/*     */             break;
/*     */           case ShapeToPoint:
/* 122 */             this.inShape.fromText(line);
/* 123 */             mbr = this.inShape.getMBR();
/* 124 */             if (mbr != null) {
/* 125 */               Point center = mbr.getCenterPoint();
/* 126 */               line.clear();
/* 127 */               center.toText(line);
/* 128 */               output.collect(this.key, line);
/*     */             } 
/*     */             break;
/*     */           case ShapeToRect:
/* 132 */             this.inShape.fromText(line);
/* 133 */             mbr = this.inShape.getMBR();
/* 134 */             if (mbr != null) {
/* 135 */               line.clear();
/* 136 */               mbr.toText(line);
/* 137 */               output.collect(this.key, line);
/*     */             } 
/*     */             break;
/*     */         } 
/*     */       } 
/*     */     }
/*     */   }
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */   
/*     */   public static class Reduce
/*     */     extends MapReduceBase
/*     */     implements Reducer<IntWritable, Text, NullWritable, Text>
/*     */   {
/*     */     public void reduce(IntWritable dummy, Iterator<Text> values, OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException {
/* 158 */       while (values.hasNext()) {
/* 159 */         Text x = values.next();
/* 160 */         output.collect(NullWritable.get(), x);
/*     */       } 
/*     */     }
/*     */   }
/*     */ 
/*     */   
/*     */   private static int sampleWithRatio(Path[] files, ResultCollector<? extends TextSerializable> output, OperationsParams params) throws IOException {
/* 167 */     FileSystem fs = files[0].getFileSystem((Configuration)params);
/* 168 */     FileStatus inFStatus = fs.getFileStatus(files[0]);
/* 169 */     if (inFStatus.isDir() || inFStatus.getLen() / inFStatus.getBlockSize() > 1L)
/*     */     {
/* 171 */       return sampleMapReduceWithRatio(files, output, params);
/*     */     }
/*     */     
/* 174 */     return sampleLocalWithRatio(files, output, params);
/*     */   }
/*     */ 
/*     */ 
/*     */   
/*     */   private static <T extends TextSerializable> int sampleMapReduceWithRatio(Path[] files, ResultCollector<T> output, OperationsParams params) throws IOException {
/*     */     Path outputPath;
/* 181 */     JobConf job = new JobConf((Configuration)params, Sampler.class);
/*     */ 
/*     */     
/* 184 */     FileSystem outFs = FileSystem.get((Configuration)job);
/*     */     do {
/* 186 */       outputPath = new Path(String.valueOf(files[0].toUri().getPath()) + 
/* 187 */           ".sample_" + (int)(Math.random() * 1000000.0D));
/* 188 */     } while (outFs.exists(outputPath));
/*     */     
/* 190 */     job.setJobName("Sample");
/* 191 */     job.setMapOutputKeyClass(IntWritable.class);
/* 192 */     job.setMapOutputValueClass(Text.class);
/*     */     
/* 194 */     job.setMapperClass(Map.class);
/* 195 */     job.setReducerClass(Reduce.class);
/*     */     
/* 197 */     ClusterStatus clusterStatus = (new JobClient(job)).getClusterStatus();
/* 198 */     job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
/*     */ 
/*     */     
/* 201 */     job.setNumReduceTasks(
/* 202 */         Math.max(1, clusterStatus.getMaxReduceTasks() * 9 / 10));
/*     */     
/* 204 */     job.setInputFormat(ShapeLineInputFormat.class);
/* 205 */     job.setOutputFormat(TextOutputFormat.class);
/*     */     
/* 207 */     ShapeLineInputFormat.setInputPaths(job, files);
/* 208 */     TextOutputFormat.setOutputPath(job, outputPath);
/*     */ 
/*     */     
/* 211 */     RunningJob run_job = JobClient.runJob(job);
/*     */     
/* 213 */     Counters counters = run_job.getCounters();
/* 214 */     Counters.Counter outputRecordCounter = (Counters.Counter)counters.findCounter((Enum)Task.Counter.MAP_OUTPUT_RECORDS);
/* 215 */     long resultCount = outputRecordCounter.getValue();
/*     */     
/* 217 */     Counters.Counter outputSizeConter = (Counters.Counter)counters.findCounter((Enum)Task.Counter.MAP_OUTPUT_BYTES);
/* 218 */     long sampleSize = outputSizeConter.getValue();
/*     */     
/* 220 */     LOG.info("resultSize: " + sampleSize);
/* 221 */     LOG.info("resultCount: " + resultCount);
/*     */     
/* 223 */     Counters.Counter inputBytesCounter = (Counters.Counter)counters.findCounter((Enum)Task.Counter.MAP_INPUT_BYTES);
/* 224 */     sizeOfLastProcessedFile = inputBytesCounter.getValue();
/*     */ 
/*     */ 
/*     */ 
/*     */     
/* 229 */     long desiredSampleSize = job.getLong("size", 0L);
/*     */     
/* 231 */     float selectRatio = (desiredSampleSize <= 0L) ? 2.0F : ((float)desiredSampleSize / (float)sampleSize);
/*     */ 
/*     */     
/* 234 */     int result_size = 0;
/* 235 */     if (selectRatio > 1.0F) {
/*     */       
/* 237 */       ShapeLineInputFormat inputFormat = new ShapeLineInputFormat();
/* 238 */       ShapeLineInputFormat.setInputPaths(job, new Path[] { outputPath });
/* 239 */       InputSplit[] splits = inputFormat.getSplits(job, 1); byte b; int i; InputSplit[] arrayOfInputSplit1;
/* 240 */       for (i = (arrayOfInputSplit1 = splits).length, b = 0; b < i; ) { InputSplit split = arrayOfInputSplit1[b];
/* 241 */         RecordReader<Rectangle, Text> reader = inputFormat.getRecordReader(split, job, null);
/* 242 */         Rectangle key = (Rectangle)reader.createKey();
/* 243 */         Text value = (Text)reader.createValue();
/* 244 */         TextSerializable textSerializable = OperationsParams.getTextSerializable((Configuration)params, "outshape", (TextSerializable)new Text2());
/* 245 */         while (reader.next(key, value)) {
/* 246 */           System.out.println(value + "----------------");
/* 247 */           textSerializable.fromText(value);
/* 248 */           output.collect(textSerializable);
/*     */         } 
/* 250 */         reader.close();
/*     */         b++; }
/*     */     
/* 253 */     } else if (output != null) {
/* 254 */       OperationsParams params2 = new OperationsParams(params);
/* 255 */       params2.setFloat("ratio", selectRatio);
/* 256 */       params2.set("shape", params.get("outshape"));
/* 257 */       params2.set("outshape", params.get("outshape"));
/* 258 */       if (selectRatio > 0.1D) {
/* 259 */         LOG.info("Local return " + selectRatio + " of " + resultCount + " records");
/*     */         
/* 261 */         long tempSize = sizeOfLastProcessedFile;
/*     */ 
/*     */ 
/*     */         
/* 265 */         result_size = sampleLocalWithRatio(new Path[] { outputPath
/* 266 */             }, output, params2);
/* 267 */         sizeOfLastProcessedFile = tempSize;
/*     */       } else {
/* 269 */         LOG.info("MapReduce return " + selectRatio + " of " + resultCount + " records");
/*     */         
/* 271 */         long tempSize = sizeOfLastProcessedFile;
/*     */ 
/*     */ 
/*     */         
/* 275 */         result_size = sampleMapReduceWithRatio(new Path[] { outputPath
/* 276 */             }, output, params2);
/* 277 */         sizeOfLastProcessedFile = tempSize;
/*     */       } 
/*     */     } 
/*     */ 
/*     */     
/* 282 */     outFs.delete(outputPath, true);
/*     */     
/* 284 */     return result_size;
/*     */   }
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */ 
/*     */   
/*     */   private static <O extends TextSerializable, T extends TextSerializable> ResultCollector<T> createConverter(final ResultCollector<O> output, T inObj, final O outObj) {
/* 302 */     if (output == null)
/* 303 */       return null; 
/* 304 */     if (inObj.getClass() == outObj.getClass())
/* 305 */       return new ResultCollector<T>()
/*     */         {
/*     */           public void collect(T r) {
/* 308 */             output.collect(r);
/*     */           }
/*     */         }; 
/* 311 */     if (inObj instanceof Shape && outObj instanceof Point) {
/* 312 */       final Point out_pt = (Point)outObj;
/* 313 */       return new ResultCollector<T>()
/*     */         {
/*     */           public void collect(T r) {
/* 316 */             Shape s = (Shape)r;
/* 317 */             if (s == null)
/*     */               return; 
/* 319 */             Rectangle mbr = s.getMBR();
/* 320 */             if (mbr == null)
/*     */               return; 
/* 322 */             Point pt = mbr.getCenterPoint();
/* 323 */             out_pt.x = pt.x;
/* 324 */             out_pt.y = pt.y;
/* 325 */             output.collect(outObj); }
/*     */         };
/*     */     } 
/* 328 */     if (inObj instanceof Shape && outObj instanceof Rectangle) {
/* 329 */       final Rectangle out_rect = (Rectangle)outObj;
/* 330 */       return new ResultCollector<T>()
/*     */         {
/*     */           public void collect(T r) {
/* 333 */             out_rect.set((Shape)r);
/* 334 */             output.collect(outObj); }
/*     */         };
/*     */     } 
/* 337 */     if (outObj instanceof Text) {
/* 338 */       final Text text = (Text)outObj;
/* 339 */       return new ResultCollector<T>()
/*     */         {
/*     */           public void collect(T r) {
/* 342 */             text.clear();
/* 343 */             r.toText(text);
/* 344 */             output.collect(outObj); }
/*     */         };
/*     */     } 
/* 347 */     if (inObj instanceof Text) {
/* 348 */       final Text text = (Text)inObj;
/* 349 */       return new ResultCollector<T>()
/*     */         {
/*     */           public void collect(T r) {
/* 352 */             outObj.fromText(text);
/* 353 */             output.collect(outObj);
/*     */           }
/*     */         };
/*     */     } 
/* 357 */     throw new RuntimeException("Cannot convert from " + inObj.getClass() + 
/* 358 */         " to " + outObj.getClass());
/*     */   }
/*     */ 
/*     */   
/* 362 */   private static final PathFilter hiddenFileFilter = new PathFilter() {
/*     */       public boolean accept(Path p) {
/* 364 */         String name = p.getName();
/* 365 */         return (!name.startsWith("_") && !name.startsWith("."));
/*     */       }
/*     */     };
/*     */ 
/*     */ 
/*     */ 
/*     */   
/*     */   private static <T extends TextSerializable> int sampleLocalWithSize(Path[] files, ResultCollector<T> output, OperationsParams params) throws IOException {
/* 373 */     int average_record_size = 1024;
/* 374 */     final LongWritable current_sample_size = new LongWritable();
/* 375 */     int sample_count = 0;
/*     */ 
/*     */     
/* 378 */     TextSerializable inObj1 = OperationsParams.getTextSerializable((Configuration)params, "shape", (TextSerializable)new Text2());
/* 379 */     TextSerializable outObj1 = OperationsParams.getTextSerializable((Configuration)params, "outshape", (TextSerializable)new Text2());
/*     */ 
/*     */     
/* 382 */     final TextSerializable inObj = inObj1;
/* 383 */     TextSerializable textSerializable1 = outObj1;
/* 384 */     final ResultCollector<TextSerializable> converter = createConverter(output, inObj, textSerializable1);
/*     */     
/* 386 */     ResultCollector<Text2> counter = new ResultCollector<Text2>()
/*     */       {
/*     */         public void collect(Text2 r) {
/* 389 */           current_sample_size.set(current_sample_size.get() + r.getLength());
/* 390 */           inObj.fromText((Text)r);
/* 391 */           converter.collect(inObj);
/*     */         }
/*     */       };
/*     */     
/* 395 */     long total_size = params.getLong("size", 0L);
/* 396 */     long seed = params.getLong("seed", System.currentTimeMillis());
/*     */     
/* 398 */     while (current_sample_size.get() < total_size) {
/* 399 */       int count = (int)((total_size - current_sample_size.get()) / average_record_size);
/* 400 */       if (count < 10) {
/* 401 */         count = 10;
/*     */       }
/* 403 */       OperationsParams params2 = new OperationsParams(params);
/* 404 */       params2.setClass("shape", Text2.class, TextSerializable.class);
/* 405 */       params2.setClass("outshape", Text2.class, TextSerializable.class);
/* 406 */       params2.setInt("count", count);
/* 407 */       params2.setLong("seed", seed);
/* 408 */       sample_count += sampleLocalByCount(files, counter, params2);
/*     */ 
/*     */ 
/*     */       
/* 412 */       seed += sample_count;
/*     */       
/* 414 */       average_record_size = (int)(current_sample_size.get() / sample_count);
/*     */     } 
/* 416 */     return sample_count;
/*     */   }
/*     */ 
/*     */   
/*     */   private static <T extends TextSerializable> int sampleLocalWithRatio(Path[] files, ResultCollector<T> output, OperationsParams params) throws IOException {
/* 421 */     long total_size = 0L; byte b;
/*     */     int i;
/*     */     Path[] arrayOfPath;
/* 424 */     for (i = (arrayOfPath = files).length, b = 0; b < i; ) { Path file = arrayOfPath[b];
/* 425 */       FileSystem fs = file.getFileSystem((Configuration)params);
/* 426 */       FileStatus fStatus = fs.getFileStatus(file);
/* 427 */       if (fStatus.isDir()) {
/*     */         byte b1; int j; FileStatus[] arrayOfFileStatus;
/* 429 */         for (j = (arrayOfFileStatus = fs.listStatus(file)).length, b1 = 0; b1 < j; ) { FileStatus subFStatus = arrayOfFileStatus[b1];
/* 430 */           if (!subFStatus.isDir())
/* 431 */             total_size += subFStatus.getLen();  b1++; }
/*     */       
/*     */       } else {
/* 434 */         total_size += fStatus.getLen();
/*     */       }  b++; }
/*     */     
/* 437 */     sizeOfLastProcessedFile = total_size;
/* 438 */     float ratio = params.getFloat("ratio", 0.1F);
/* 439 */     params.setLong("size", (long)((float)total_size * ratio));
/* 440 */     return sampleLocalWithSize(files, output, params);
/*     */   }
/*     */ 
/*     */ 
/*     */   
/*     */   private static <T extends TextSerializable> int sampleLocalByCount(Path[] files, ResultCollector<T> output, OperationsParams params) throws IOException {
/* 446 */     ArrayList<Path> data_files = new ArrayList<Path>(); byte b; int j; Path[] arrayOfPath;
/* 447 */     for (j = (arrayOfPath = files).length, b = 0; b < j; ) { Path file = arrayOfPath[b];
/* 448 */       FileSystem fs = file.getFileSystem((Configuration)params);
/* 449 */       if (fs.getFileStatus(file).isDir()) {
/*     */         
/* 451 */         FileStatus[] fileStatus = fs.listStatus(file, hiddenFileFilter); byte b1; int k; FileStatus[] arrayOfFileStatus1;
/* 452 */         for (k = (arrayOfFileStatus1 = fileStatus).length, b1 = 0; b1 < k; ) { FileStatus f = arrayOfFileStatus1[b1];
/* 453 */           data_files.add(f.getPath());
/*     */           b1++; }
/*     */       
/*     */       } else {
/* 457 */         data_files.add(file);
/*     */       } 
/*     */       b++; }
/*     */     
/* 461 */     files = data_files.<Path>toArray(new Path[data_files.size()]);
/*     */ 
/*     */     
/* 464 */     TextSerializable inObj1 = OperationsParams.getTextSerializable((Configuration)params, "shape", (TextSerializable)new Text2());
/* 465 */     TextSerializable outObj1 = OperationsParams.getTextSerializable((Configuration)params, "outshape", (TextSerializable)new Text2());
/*     */ 
/*     */     
/* 468 */     TextSerializable inObj = inObj1;
/* 469 */     TextSerializable textSerializable1 = outObj1;
/*     */     
/* 471 */     ResultCollector<TextSerializable> converter = createConverter(output, inObj, textSerializable1);
/* 472 */     long[] files_start_offset = new long[files.length + 1];
/* 473 */     long total_length = 0L;
/* 474 */     for (int i_file = 0; i_file < files.length; i_file++) {
/* 475 */       FileSystem fs = files[i_file].getFileSystem((Configuration)params);
/* 476 */       files_start_offset[i_file] = total_length;
/* 477 */       total_length += fs.getFileStatus(files[i_file]).getLen();
/*     */     } 
/* 479 */     files_start_offset[files.length] = total_length;
/*     */ 
/*     */ 
/*     */     
/* 483 */     Random random = new Random(params.getLong("seed", System.currentTimeMillis()));
/* 484 */     long[] offsets = new long[params.getInt("count", 0)];
/* 485 */     for (int i = 0; i < offsets.length; i++) {
/* 486 */       if (total_length == 0L) {
/* 487 */         offsets[i] = 0L;
/*     */       } else {
/* 489 */         offsets[i] = Math.abs(random.nextLong()) % total_length;
/*     */       } 
/* 491 */     }  Arrays.sort(offsets);
/*     */     
/* 493 */     int record_i = 0;
/* 494 */     int records_returned = 0;
/*     */     
/* 496 */     int file_i = 0;
/* 497 */     while (record_i < offsets.length) {
/*     */       
/* 499 */       while (offsets[record_i] > files_start_offset[file_i + 1]) {
/* 500 */         file_i++;
/*     */       }
/* 502 */       long current_file_size = files_start_offset[file_i + 1] - files_start_offset[file_i];
/* 503 */       FileSystem fs = files[file_i].getFileSystem((Configuration)params);
/* 504 */       ShapeLineRecordReader reader = new ShapeLineRecordReader(fs.getConf(), 
/* 505 */           new FileSplit(files[file_i], 0L, current_file_size, new String[0]));
/* 506 */       Rectangle key = reader.createKey();
/* 507 */       Text line = reader.createValue();
/* 508 */       long pos = files_start_offset[file_i];
/*     */       
/* 510 */       while (record_i < offsets.length && 
/* 511 */         offsets[record_i] <= files_start_offset[file_i + 1] && 
/* 512 */         reader.next(key, line)) {
/* 513 */         pos += line.getLength();
/* 514 */         if (pos > offsets[record_i]) {
/*     */ 
/*     */           
/* 517 */           if (converter != null) {
/* 518 */             inObj.fromText(line);
/* 519 */             converter.collect(inObj);
/*     */           } 
/* 521 */           record_i++;
/* 522 */           records_returned++;
/*     */         } 
/*     */       } 
/* 525 */       reader.close();
/*     */ 
/*     */ 
/*     */ 
/*     */       
/* 530 */       while (record_i < offsets.length && 
/* 531 */         offsets[record_i] <= files_start_offset[file_i + 1])
/* 532 */         record_i++; 
/*     */     } 
/* 534 */     return records_returned;
/*     */   }
/*     */ 
/*     */ 
/*     */   
/*     */   public static void sample(Path[] inputFiles, ResultCollector<? extends TextSerializable> output, OperationsParams params) throws IOException {
/* 540 */     if (params.get("ratio") != null) {
/* 541 */       if (params.getBoolean("local", false))
/* 542 */       { sampleLocalWithRatio(inputFiles, output, params); }
/*     */       else
/* 544 */       { sampleMapReduceWithRatio(inputFiles, output, params); } 
/* 545 */     } else if (params.get("size") != null) {
/* 546 */       sampleLocalWithSize(inputFiles, output, params);
/* 547 */     } else if (params.get("count") != null) {
/*     */       
/* 549 */       sampleLocalByCount(inputFiles, output, params);
/*     */     } else {
/* 551 */       throw new RuntimeException("Must provide one of three options 'size', 'ratio' or 'count'");
/*     */     } 
/*     */   }
/*     */   
/*     */   private static void printUsage() {
/* 556 */     System.out.println("Reads a random sample of an input file. Sample is written to stdout");
/* 557 */     System.out.println("Parameters (* marks required parameters):");
/* 558 */     System.out.println("<input file> - (*) Path to input file");
/* 559 */     System.out.println("shape:<s> - Type of shapes stored in the file");
/* 560 */     System.out.println("outshape:<s> - Shapes to write to output");
/* 561 */     System.out.println("ratio:<r> - ratio of random sample to read [0, 1]");
/* 562 */     System.out.println("count:<s> - approximate number of records in the sample");
/* 563 */     System.out.println("size:<s> - approximate size of the sample in bytes");
/* 564 */     System.out.println("seed:<s> - random seed to use while reading the sample");
/* 565 */     GenericOptionsParser.printGenericCommandUsage(System.out);
/*     */   }
/*     */   
/*     */   public static void main(String[] args) throws IOException {
/* 569 */     OperationsParams params = new OperationsParams(new GenericOptionsParser(args));
/* 570 */     Path[] inputFiles = params.getPaths();
/*     */     
/* 572 */     if (!params.checkInput()) {
/* 573 */       printUsage();
/* 574 */       System.exit(1);
/*     */     } 
/*     */     
/* 577 */     ResultCollector<TextSerializable> output = 
/* 578 */       new ResultCollector<TextSerializable>()
/*     */       {
/*     */         public void collect(TextSerializable value) {
/* 581 */           System.out.println(value.toText(new Text()));
/*     */         }
/*     */       };
/*     */     
/* 585 */     sample(inputFiles, output, params);
/*     */   }
/*     */ }


/* Location:              E:\大连公交集团-项目文档\公交项目相关文档\田\wisdomST-0.0.1.jar!\com\dlmu\wisdomST\operation\Sampler.class
 * Java compiler version: 6 (50.0)
 * JD-Core Version:       1.1.3
 */