package org.apache.mahout.clustering.streaming.tools;

import com.google.common.base.Charsets;
import com.google.common.collect.Iterables;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.Iterator;
import org.apache.commons.cli2.CommandLine;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.cli2.option.DefaultOption;
import org.apache.commons.cli2.util.HelpFormatter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;

/* loaded from: input_file:org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.class */
public class ResplitSequenceFiles {
    private String inputFile;
    private String outputFileBase;
    private int numSplits;
    private Configuration conf;
    private FileSystem fs;

    private ResplitSequenceFiles() {
    }

    private void writeSplit(Iterator<Pair<Writable, Writable>> it, int i, int i2) throws IOException {
        SequenceFile.Writer writer = null;
        for (int i3 = 0; i3 < i2; i3++) {
            Pair<Writable, Writable> next = it.next();
            if (writer == null) {
                writer = SequenceFile.createWriter(this.fs, this.conf, new Path(this.outputFileBase + "-" + i), next.getFirst().getClass(), next.getSecond().getClass());
            }
            writer.append(next.getFirst(), next.getSecond());
        }
        if (writer != null) {
            writer.close();
        }
    }

    private void run(PrintWriter printWriter) throws IOException {
        this.conf = new Configuration();
        SequenceFileDirIterable sequenceFileDirIterable = new SequenceFileDirIterable(new Path(this.inputFile), PathType.LIST, this.conf);
        this.fs = FileSystem.get(this.conf);
        int size = Iterables.size(sequenceFileDirIterable);
        int i = size / this.numSplits;
        int i2 = (i + size) - (i * this.numSplits);
        Iterator<Pair<Writable, Writable>> it = sequenceFileDirIterable.iterator();
        printWriter.printf("Writing %d splits\n", Integer.valueOf(this.numSplits));
        for (int i3 = 0; i3 < this.numSplits - 1; i3++) {
            printWriter.printf("Writing split %d\n", Integer.valueOf(i3));
            writeSplit(it, i3, i);
        }
        printWriter.printf("Writing split %d\n", Integer.valueOf(this.numSplits - 1));
        writeSplit(it, this.numSplits - 1, i2);
    }

    private boolean parseArgs(String[] strArr) {
        DefaultOptionBuilder defaultOptionBuilder = new DefaultOptionBuilder();
        DefaultOption create = defaultOptionBuilder.withLongName("help").withDescription("print this list").create();
        ArgumentBuilder argumentBuilder = new ArgumentBuilder();
        DefaultOption create2 = defaultOptionBuilder.withLongName(DefaultOptionCreator.INPUT_OPTION).withShortName(WikipediaTokenizer.ITALICS).withRequired(true).withArgument(argumentBuilder.withName(DefaultOptionCreator.INPUT_OPTION).withMaximum(1).create()).withDescription("what the base folder for sequence files is (they all must have the same key/value type").create();
        DefaultOption create3 = defaultOptionBuilder.withLongName("output").withShortName("o").withRequired(true).withArgument(argumentBuilder.withName("output").withMaximum(1).create()).withDescription("the base name of the file split that the files will be split it; the i'th split has the suffix -i").create();
        DefaultOption create4 = defaultOptionBuilder.withLongName("numSplits").withShortName("ns").withRequired(true).withArgument(argumentBuilder.withName("numSplits").withMaximum(1).create()).withDescription("how many splits to use for the given files").create();
        Group create5 = new GroupBuilder().withOption(create).withOption(create2).withOption(create3).withOption(create4).create();
        Parser parser = new Parser();
        parser.setHelpOption(create);
        parser.setHelpTrigger("--help");
        parser.setGroup(create5);
        parser.setHelpFormatter(new HelpFormatter(" ", "", " ", 130));
        CommandLine parseAndHelp = parser.parseAndHelp(strArr);
        if (parseAndHelp == null) {
            return false;
        }
        this.inputFile = (String) parseAndHelp.getValue(create2);
        this.outputFileBase = (String) parseAndHelp.getValue(create3);
        this.numSplits = Integer.parseInt((String) parseAndHelp.getValue(create4));
        return true;
    }

    public static void main(String[] strArr) throws IOException {
        ResplitSequenceFiles resplitSequenceFiles = new ResplitSequenceFiles();
        if (resplitSequenceFiles.parseArgs(strArr)) {
            resplitSequenceFiles.run(new PrintWriter((Writer) new OutputStreamWriter(System.out, Charsets.UTF_8), true));
        }
    }
}
