How to kill Hadoop jobs matching a pattern?

Today, I had to kill a list of jobs (45) running on my Hadoop cluster. Ok, let’s have a look to the docs http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#job But wait a minute… No, Hadoop knows the “kill” command, but not the “pkill”…

One solution is:

import java.io.IOException;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.RunningJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PKill {

 private final static Logger LOGGER = LoggerFactory.getLogger(PKill.class);

 private static void printUsage(Options options) {
  HelpFormatter usageFormatter = new HelpFormatter();
  usageFormatter.printHelp("pkill", options);
 }

 public static void main(final String... commandLineArguments)
   throws IOException, InterruptedException {

  CommandLineParser parser = new PosixParser();

  Options options = new Options();
  options.addOption(
    "p",
    "pattern",
    true,
    "Specify the patterns: 'elastic' will match 'elastic',elasticsearch','searchelasticsearch'");
  options.addOption("h", "help", false, "Print the help");

  CommandLine line = null;
  try {
   line = parser.parse(options, commandLineArguments);
  } catch (ParseException exp) {
   printUsage(options);
   return;
  }

  if (line.hasOption("h")) {
   printUsage(options);
   return;
  }

  String[] patterns = null;
  if (line.hasOption("p")) {
   LOGGER.debug("Setting prefixes: {}",
     new Object[] { line.getOptionValues("p") });
   patterns = line.getOptionValues("p");
  }

  Configuration configuration = new Configuration();

  JobClient jobClient = new JobClient(configuration);

  JobStatus[] jobsToComplete = jobClient.jobsToComplete();

  for (JobStatus jobStatus : jobsToComplete) {
   RunningJob job = jobClient.getJob(jobStatus.getJobID());

   LOGGER.debug("--------------------------------------------");
   LOGGER.debug("Job: " + job.getJobName());
   LOGGER.debug(" Progress:" + job.cleanupProgress() * 100 + "% (Map:"
     + job.getJobStatus().mapProgress() * 100 + ", Reduce:"
     + job.getJobStatus().reduceProgress() + ")");
   LOGGER.debug(" Username:" + jobStatus.getUsername());
   if (patterns == null) {
    kill(job);
   } else {

    for (String pattern : patterns) {
     if (job.getJobName().toLowerCase().contains(pattern)) {
      kill(job);
      break;
     }
    }
   }
  }

  if (ArrayUtils.isNotEmpty(patterns)) {
   LOGGER.debug("Jobs matching '" + StringUtils.join(patterns, ",")
     + "' have been killed with success!");
  } else {
   LOGGER.debug("All jobs have been killed with success!");
  }
 }

 private static void kill(RunningJob job) throws IOException {
  LOGGER.debug("KILLING....");
  job.killJob();
  LOGGER.debug("Success");
 }
}

Then in your console, enter:

java -jar yourjar.jar fr.layer4.PKill -p "terasort" -p "pi"

And all your jobs containing “terasort” or “pi” will be killed!

Nice!

Related Posts

Leave a comment