package org.apache.mahout.classifier.df.tools;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.classifier.df.DFUtils;
import org.apache.mahout.classifier.df.data.DataConverter;
import org.apache.mahout.classifier.df.data.Dataset;
import org.apache.mahout.classifier.df.mapreduce.Builder;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/mahout/classifier/df/tools/FrequenciesJob.class */
public class FrequenciesJob {
    private static final Logger log = LoggerFactory.getLogger(FrequenciesJob.class);
    private final Path outputPath;
    private final Path datasetPath;
    private final Path dataPath;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/mahout/classifier/df/tools/FrequenciesJob$Frequencies.class */
    public static class Frequencies implements Writable, Comparable<Frequencies>, Cloneable {
        private long firstId;
        private int[] counts;

        Frequencies() {
        }

        Frequencies(long j, int[] iArr) {
            this.firstId = j;
            this.counts = Arrays.copyOf(iArr, iArr.length);
        }

        @Override // org.apache.hadoop.io.Writable
        public void readFields(DataInput dataInput) throws IOException {
            this.firstId = dataInput.readLong();
            this.counts = DFUtils.readIntArray(dataInput);
        }

        @Override // org.apache.hadoop.io.Writable
        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeLong(this.firstId);
            DFUtils.writeArray(dataOutput, this.counts);
        }

        public boolean equals(Object obj) {
            return (obj instanceof Frequencies) && this.firstId == ((Frequencies) obj).firstId;
        }

        public int hashCode() {
            return (int) this.firstId;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        /* renamed from: clone, reason: merged with bridge method [inline-methods] */
        public Frequencies m5012clone() {
            return new Frequencies(this.firstId, this.counts);
        }

        @Override // java.lang.Comparable
        public int compareTo(Frequencies frequencies) {
            if (this.firstId < frequencies.firstId) {
                return -1;
            }
            return this.firstId > frequencies.firstId ? 1 : 0;
        }

        /* JADX WARN: Type inference failed for: r0v2, types: [int[], int[][]] */
        public static int[][] extractCounts(Frequencies[] frequenciesArr) {
            ?? r0 = new int[frequenciesArr.length];
            for (int i = 0; i < frequenciesArr.length; i++) {
                r0[i] = frequenciesArr[i].counts;
            }
            return r0;
        }
    }

    /* loaded from: input_file:org/apache/mahout/classifier/df/tools/FrequenciesJob$FrequenciesMapper.class */
    private static class FrequenciesMapper extends Mapper<LongWritable, Text, LongWritable, IntWritable> {
        private LongWritable firstId;
        private DataConverter converter;
        private Dataset dataset;

        private FrequenciesMapper() {
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Mapper
        public void setup(Mapper<LongWritable, Text, LongWritable, IntWritable>.Context context) throws IOException, InterruptedException {
            this.dataset = Builder.loadDataset(context.getConfiguration());
            setup(this.dataset);
        }

        void setup(Dataset dataset) {
            this.converter = new DataConverter(dataset);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Mapper
        public void map(LongWritable longWritable, Text text, Mapper<LongWritable, Text, LongWritable, IntWritable>.Context context) throws IOException, InterruptedException {
            if (this.firstId == null) {
                this.firstId = new LongWritable(longWritable.get());
            }
            context.write(this.firstId, new IntWritable((int) this.dataset.getLabel(this.converter.convert(text.toString()))));
        }
    }

    /* loaded from: input_file:org/apache/mahout/classifier/df/tools/FrequenciesJob$FrequenciesReducer.class */
    private static class FrequenciesReducer extends Reducer<LongWritable, IntWritable, LongWritable, Frequencies> {
        private int nblabels;

        private FrequenciesReducer() {
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void setup(Reducer<LongWritable, IntWritable, LongWritable, Frequencies>.Context context) throws IOException, InterruptedException {
            setup(Builder.loadDataset(context.getConfiguration()).nblabels());
        }

        void setup(int i) {
            this.nblabels = i;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.hadoop.mapreduce.Reducer
        public void reduce(LongWritable longWritable, Iterable<IntWritable> iterable, Reducer<LongWritable, IntWritable, LongWritable, Frequencies>.Context context) throws IOException, InterruptedException {
            int[] iArr = new int[this.nblabels];
            Iterator<IntWritable> it = iterable.iterator();
            while (it.hasNext()) {
                int i = it.next().get();
                iArr[i] = iArr[i] + 1;
            }
            context.write(longWritable, new Frequencies(longWritable.get(), iArr));
        }
    }

    public FrequenciesJob(Path path, Path path2, Path path3) {
        this.outputPath = new Path(path, "frequencies.output");
        this.dataPath = path2;
        this.datasetPath = path3;
    }

    public int[][] run(Configuration configuration) throws IOException, ClassNotFoundException, InterruptedException {
        if (this.outputPath.getFileSystem(configuration).exists(this.outputPath)) {
            throw new IOException("Output path already exists : " + this.outputPath);
        }
        DistributedCache.setCacheFiles(new URI[]{this.datasetPath.toUri()}, configuration);
        Job job = new Job(configuration);
        job.setJarByClass(FrequenciesJob.class);
        FileInputFormat.setInputPaths(job, this.dataPath);
        FileOutputFormat.setOutputPath(job, this.outputPath);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Frequencies.class);
        job.setMapperClass(FrequenciesMapper.class);
        job.setReducerClass(FrequenciesReducer.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        if (!job.waitForCompletion(true)) {
            throw new IllegalStateException("Job failed!");
        }
        int[][] parseOutput = parseOutput(job);
        HadoopUtil.delete(configuration, this.outputPath);
        return parseOutput;
    }

    int[][] parseOutput(JobContext jobContext) throws IOException {
        Configuration configuration = jobContext.getConfiguration();
        int i = configuration.getInt("mapred.map.tasks", -1);
        log.info("mapred.map.tasks = {}", Integer.valueOf(i));
        Path[] listOutputFiles = DFUtils.listOutputFiles(this.outputPath.getFileSystem(configuration), this.outputPath);
        Frequencies[] frequenciesArr = new Frequencies[i];
        int i2 = 0;
        for (Path path : listOutputFiles) {
            Iterator it = new SequenceFileValueIterable(path, configuration).iterator();
            while (it.hasNext()) {
                int i3 = i2;
                i2++;
                frequenciesArr[i3] = (Frequencies) it.next();
            }
        }
        if (i2 < i) {
            throw new IllegalStateException("number of output Frequencies (" + i2 + ") is lesser than the number of mappers!");
        }
        Arrays.sort(frequenciesArr);
        return Frequencies.extractCounts(frequenciesArr);
    }
}
