QueryID = iteblog_20160704104520_988f81d4-0b82-4778-af98-43cc1950d357 Total jobs = 1 LaunchingJob1 out of 1 Number of reduce tasks determined at compile time: 1 In order to change the average load for a reducer (in bytes): set hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum number of reducers: set hive.exec.reducers.max=<number> In order to set a constant number of reducers: set mapreduce.job.reduces=<number>
/** * Estimate the number of reducers needed for this job, based on job input, * and configuration parameters. * * The output of this method should only be used if the output of this * MapRedTask is not being used to populate a bucketed table and the user * has not specified the number of reducers to use. * * @return the number of reducers. */ publicstaticintestimateNumberOfReducers(HiveConf conf, ContentSummary inputSummary, MapWork work, boolean finalMapRed)throws IOException { long bytesPerReducer = conf.getLongVar(HiveConf.ConfVars.BYTESPERREDUCER); int maxReducers = conf.getIntVar(HiveConf.ConfVars.MAXREDUCERS); double samplePercentage = getHighestSamplePercentage(work); long totalInputFileSize = getTotalInputFileSize(inputSummary, work, samplePercentage); // if all inputs are sampled, we should shrink the size of reducers accordingly. if (totalInputFileSize != inputSummary.getLength()) { LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" + maxReducers + " estimated totalInputFileSize=" + totalInputFileSize); } else { LOG.info("BytesPerReducer=" + bytesPerReducer + " maxReducers=" + maxReducers + " totalInputFileSize=" + totalInputFileSize); } // If this map reduce job writes final data to a table and bucketing is being inferred, // and the user has configured Hive to do this, make sure the number of reducers is a // power of two boolean powersOfTwo = conf.getBoolVar(HiveConf.ConfVars.HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO) && finalMapRed && !work.getBucketedColsByDirectory().isEmpty(); return estimateReducers(totalInputFileSize, bytesPerReducer, maxReducers, powersOfTwo); } publicstaticintestimateReducers(long totalInputFileSize, long bytesPerReducer, int maxReducers, boolean powersOfTwo){ double bytes = Math.max(totalInputFileSize, bytesPerReducer); int reducers = (int) Math.ceil(bytes / bytesPerReducer); reducers = Math.max(1, reducers); reducers = Math.min(maxReducers, reducers); int reducersLog = (int)(Math.log(reducers) / Math.log(2)) + 1; int reducersPowerTwo = (int)Math.pow(2, reducersLog); if (powersOfTwo) { // If the original number of reducers was a power of two, use that if (reducersPowerTwo / 2 == reducers) { // nothing to do } elseif (reducersPowerTwo > maxReducers) { // If the next power of two greater than the original number of reducers is greater // than the max number of reducers, use the preceding power of two, which is strictly // less than the original number of reducers and hence the max reducers = reducersPowerTwo / 2; } else { // Otherwise use the smallest power of two greater than the original number of reducers reducers = reducersPowerTwo; } } return reducers; }
/** * Set the number of reducers for the mapred work. */ privatevoidsetNumberOfReducers()throws IOException { ReduceWork rWork = work.getReduceWork(); // this is a temporary hack to fix things that are not fixed in the compiler Integer numReducersFromWork = rWork == null ? 0 : rWork.getNumReduceTasks(); if (rWork == null) { console .printInfo("Number of reduce tasks is set to 0 since there's no reduce operator"); } else { if (numReducersFromWork >= 0) { console.printInfo("Number of reduce tasks determined at compile time: " + rWork.getNumReduceTasks()); } elseif (job.getNumReduceTasks() > 0) { int reducers = job.getNumReduceTasks(); rWork.setNumReduceTasks(reducers); console.printInfo("Number of reduce tasks not specified. Defaulting to jobconf value of: " + reducers); } else { if (inputSummary == null) { inputSummary = Utilities.getInputSummary(driverContext.getCtx(), work.getMapWork(), null); } int reducers = Utilities.estimateNumberOfReducers(conf, inputSummary, work.getMapWork(), work.isFinalMapRed()); rWork.setNumReduceTasks(reducers); console .printInfo("Number of reduce tasks not specified. Estimated from input data size: " + reducers); } console.printInfo("In order to change the average load for a reducer (in bytes):"); console.printInfo(" set " + HiveConf.ConfVars.BYTESPERREDUCER.varname + "=<number>"); console.printInfo("In order to limit the maximum number of reducers:"); console.printInfo(" set " + HiveConf.ConfVars.MAXREDUCERS.varname + "=<number>"); console.printInfo("In order to set a constant number of reducers:"); console.printInfo(" set " + HiveConf.ConfVars.HADOOPNUMREDUCERS + "=<numbe>"); } }