package io.openlineage.spark.agent.lifecycle.plan;

import io.openlineage.client.OpenLineage;
import io.openlineage.spark.agent.util.PlanUtils;
import io.openlineage.spark.agent.util.ScalaConversionUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Stack;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.spark.rdd.HadoopRDD;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.execution.LogicalRDD;

/* loaded from: input_file:io/openlineage/spark/agent/lifecycle/plan/LogicalRDDVisitor.class */
public class LogicalRDDVisitor extends QueryPlanVisitor<LogicalRDD, OpenLineage.Dataset> {
    @Override // io.openlineage.spark.agent.lifecycle.plan.QueryPlanVisitor
    public boolean isDefinedAt(LogicalPlan logicalPlan) {
        return (logicalPlan instanceof LogicalRDD) && !findHadoopRdds((LogicalRDD) logicalPlan).isEmpty();
    }

    private List<HadoopRDD> findHadoopRdds(LogicalRDD logicalRDD) {
        RDD rdd = logicalRDD.rdd();
        ArrayList arrayList = new ArrayList();
        Stack stack = new Stack();
        stack.add(rdd);
        while (!stack.isEmpty()) {
            HadoopRDD hadoopRDD = (RDD) stack.pop();
            stack.addAll((Collection) ScalaConversionUtils.fromSeq(hadoopRDD.getDependencies()).stream().map((v0) -> {
                return v0.rdd();
            }).collect(Collectors.toList()));
            if (hadoopRDD instanceof HadoopRDD) {
                arrayList.add(hadoopRDD);
            }
        }
        return arrayList;
    }

    public List<OpenLineage.Dataset> apply(LogicalPlan logicalPlan) {
        LogicalRDD logicalRDD = (LogicalRDD) logicalPlan;
        return (List) findHadoopRdds(logicalRDD).stream().flatMap(hadoopRDD -> {
            Path[] inputPaths = FileInputFormat.getInputPaths(hadoopRDD.getJobConf());
            Configuration conf = hadoopRDD.getConf();
            return Arrays.stream(inputPaths).map(path -> {
                return PlanUtils.getDirectoryPath(path, conf);
            });
        }).distinct().map(path -> {
            return PlanUtils.getDataset(path.toUri(), logicalRDD.schema());
        }).collect(Collectors.toList());
    }
}
