/*
 * Decompiled with CFR 0.152.
 */
package cc.unitmesh.rag.splitter;

import cc.unitmesh.rag.document.Document;
import cc.unitmesh.rag.splitter.HeaderType;
import cc.unitmesh.rag.splitter.LineType;
import cc.unitmesh.rag.splitter.MarkdownHeaderTextSplitter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={1, 9, 0}, k=1, xi=48, d1={"\u0000H\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0010!\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0007\n\u0002\u0010$\n\u0000\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010%\n\u0002\b\u0004\u0018\u00002\u00020\u0001B!\b\u0016\u0012\u0018\u0010\u0002\u001a\u0014\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u00040\u0003\u00a2\u0006\u0002\u0010\u0006B)\u0012\u001a\u0010\u0002\u001a\u0016\u0012\u0012\u0012\u0010\u0012\u0004\u0012\u00020\u0005\u0012\u0006\u0012\u0004\u0018\u00010\u00050\u00040\u0003\u0012\u0006\u0010\u0007\u001a\u00020\b\u00a2\u0006\u0002\u0010\tJ\u001c\u0010\n\u001a\b\u0012\u0004\u0012\u00020\f0\u000b2\f\u0010\r\u001a\b\u0012\u0004\u0012\u00020\u000e0\u0003H\u0002J\u0018\u0010\u000f\u001a\u00020\b2\u0006\u0010\u0010\u001a\u00020\u00052\u0006\u0010\u0011\u001a\u00020\u0005H\u0002Jb\u0010\u0012\u001a\u00020\b2\u0006\u0010\u0010\u001a\u00020\u00052\f\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u000e0\u000b2\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00050\u000b2\u0012\u0010\u0015\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00010\u00162\f\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00180\u000b2\u0012\u0010\u0019\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u001aH\u0002J>\u0010\u001b\u001a\b\u0012\u0004\u0012\u00020\f0\u00032\f\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u000e0\u000b2\f\u0010\u0014\u001a\b\u0012\u0004\u0012\u00020\u00050\u00032\u0012\u0010\u0015\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00010\u0016H\u0002J\u0014\u0010\u001c\u001a\b\u0012\u0004\u0012\u00020\f0\u00032\u0006\u0010\u001d\u001a\u00020\u0005R\"\u0010\u0002\u001a\u0016\u0012\u0012\u0012\u0010\u0012\u0004\u0012\u00020\u0005\u0012\u0006\u0012\u0004\u0018\u00010\u00050\u00040\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u000e\u0010\u0007\u001a\u00020\bX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u001e"}, d2={"Lcc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter;", "", "headersToSplitOn", "", "Lkotlin/Pair;", "", "(Ljava/util/List;)V", "returnEachLine", "", "(Ljava/util/List;Z)V", "aggregateLinesToChunks", "", "Lcc/unitmesh/rag/document/Document;", "lines", "Lcc/unitmesh/rag/splitter/LineType;", "isHeaderToSplitOn", "strippedLine", "sep", "processLine", "linesWithMetadata", "currentContent", "currentMetadata", "", "headerStack", "Lcc/unitmesh/rag/splitter/HeaderType;", "initialMetadata", "", "processOutput", "splitText", "text", "cocoa-core"})
@SourceDebugExtension(value={"SMAP\nMarkdownHeaderTextSplitter.kt\nKotlin\n*S Kotlin\n*F\n+ 1 MarkdownHeaderTextSplitter.kt\ncc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n+ 3 ArraysJVM.kt\nkotlin/collections/ArraysKt__ArraysJVMKt\n+ 4 _Strings.kt\nkotlin/text/StringsKt___StringsKt\n*L\n1#1,192:1\n731#2,9:193\n37#3,2:202\n1099#4,3:204\n*S KotlinDebug\n*F\n+ 1 MarkdownHeaderTextSplitter.kt\ncc/unitmesh/rag/splitter/MarkdownHeaderTextSplitter\n*L\n94#1:193,9\n94#1:202,2\n134#1:204,3\n*E\n"})
public final class MarkdownHeaderTextSplitter {
    private final boolean returnEachLine;
    @NotNull
    private final List<Pair<String, String>> headersToSplitOn;

    public MarkdownHeaderTextSplitter(@NotNull List<Pair<String, String>> headersToSplitOn, boolean returnEachLine) {
        Intrinsics.checkNotNullParameter(headersToSplitOn, (String)"headersToSplitOn");
        this.returnEachLine = returnEachLine;
        List<Object> list = headersToSplitOn.stream().sorted(Comparator.comparingInt(arg_0 -> MarkdownHeaderTextSplitter._init_$lambda$0(1.INSTANCE, arg_0)).reversed()).toList();
        Intrinsics.checkNotNullExpressionValue(list, (String)"toList(...)");
        this.headersToSplitOn = list;
    }

    public MarkdownHeaderTextSplitter(@NotNull List<Pair<String, String>> headersToSplitOn) {
        Intrinsics.checkNotNullParameter(headersToSplitOn, (String)"headersToSplitOn");
        this(headersToSplitOn, false);
    }

    private final List<Document> aggregateLinesToChunks(List<LineType> lines) {
        List aggregatedChunks = new ArrayList();
        for (LineType line : lines) {
            if (!((Collection)aggregatedChunks).isEmpty() && Intrinsics.areEqual(((LineType)aggregatedChunks.get(aggregatedChunks.size() - 1)).getMetadata(), line.getMetadata())) {
                LineType lastChunk = (LineType)aggregatedChunks.get(aggregatedChunks.size() - 1);
                lastChunk.setContent(lastChunk.getContent() + "  \n" + line.getContent());
                continue;
            }
            aggregatedChunks.add(line);
        }
        List<Document> list = aggregatedChunks.stream().map(arg_0 -> MarkdownHeaderTextSplitter.aggregateLinesToChunks$lambda$1(aggregateLinesToChunks.1.INSTANCE, arg_0)).toList();
        Intrinsics.checkNotNullExpressionValue((Object)list, (String)"toList(...)");
        return list;
    }

    /*
     * WARNING - void declaration
     */
    @NotNull
    public final List<Document> splitText(@NotNull String text) {
        void $this$toTypedArray$iv;
        List list;
        Collection $this$dropLastWhile$iv;
        Intrinsics.checkNotNullParameter((Object)text, (String)"text");
        List linesWithMetadata = new ArrayList();
        List currentContent = new ArrayList();
        Map currentMetadata = MapsKt.emptyMap();
        List headerStack = new ArrayList();
        Map initialMetadata = new LinkedHashMap();
        Object object = text;
        Regex regex = new Regex("\n");
        int n = 0;
        object = regex.split((CharSequence)object, n);
        boolean $i$f$dropLastWhile = false;
        if (!$this$dropLastWhile$iv.isEmpty()) {
            ListIterator iterator$iv = $this$dropLastWhile$iv.listIterator($this$dropLastWhile$iv.size());
            while (iterator$iv.hasPrevious()) {
                String it = (String)iterator$iv.previous();
                boolean bl = false;
                if (((CharSequence)it).length() == 0) continue;
                list = CollectionsKt.take((Iterable)$this$dropLastWhile$iv, (int)(iterator$iv.nextIndex() + 1));
                break;
            }
        } else {
            list = CollectionsKt.emptyList();
        }
        $this$dropLastWhile$iv = list;
        boolean $i$f$toTypedArray = false;
        void thisCollection$iv = $this$toTypedArray$iv;
        for (String line : thisCollection$iv.toArray(new String[0])) {
            String strippedLine = line.strip();
            Intrinsics.checkNotNull((Object)strippedLine);
            boolean foundHeader = this.processLine(strippedLine, linesWithMetadata, currentContent, currentMetadata, headerStack, initialMetadata);
            if (!foundHeader && ((CharSequence)strippedLine).length() > 0) {
                currentContent.add(strippedLine);
            } else if (!((Collection)currentContent).isEmpty()) {
                String string = String.join((CharSequence)"\n", currentContent);
                Intrinsics.checkNotNullExpressionValue((Object)string, (String)"join(...)");
                linesWithMetadata.add(new LineType(string, currentMetadata));
                currentContent.clear();
            }
            currentMetadata = new HashMap(initialMetadata);
        }
        return this.processOutput(linesWithMetadata, currentContent, currentMetadata);
    }

    private final boolean processLine(String strippedLine, List<LineType> linesWithMetadata, List<String> currentContent, Map<String, ? extends Object> currentMetadata, List<HeaderType> headerStack, Map<String, String> initialMetadata) {
        for (Pair<String, String> pair : this.headersToSplitOn) {
            String sep = (String)pair.getFirst();
            String name = (String)pair.getSecond();
            if (!this.isHeaderToSplitOn(strippedLine, sep)) continue;
            if (name != null) {
                CharSequence $this$count$iv = sep;
                boolean $i$f$count = false;
                int count$iv = 0;
                for (int i = 0; i < $this$count$iv.length(); ++i) {
                    char element$iv;
                    char it = element$iv = $this$count$iv.charAt(i);
                    boolean bl = false;
                    if (!(it == '#')) continue;
                    ++count$iv;
                }
                int currentHeaderLevel = count$iv;
                while (!headerStack.isEmpty() && headerStack.get(headerStack.size() - 1).getLevel() >= currentHeaderLevel) {
                    HeaderType poppedHeader = headerStack.remove(headerStack.size() - 1);
                    initialMetadata.remove(poppedHeader.getName());
                }
                String string = strippedLine.substring(sep.length());
                Intrinsics.checkNotNullExpressionValue((Object)string, (String)"this as java.lang.String).substring(startIndex)");
                String string2 = string.strip();
                Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"strip(...)");
                HeaderType header = new HeaderType(currentHeaderLevel, name, string2);
                headerStack.add(header);
                initialMetadata.put(name, header.getData());
            }
            if (!((Collection)currentContent).isEmpty()) {
                linesWithMetadata.add(new LineType(CollectionsKt.joinToString$default((Iterable)currentContent, (CharSequence)"\n", null, null, (int)0, null, null, (int)62, null), currentMetadata));
                currentContent.clear();
            }
            return true;
        }
        return false;
    }

    private final boolean isHeaderToSplitOn(String strippedLine, String sep) {
        return StringsKt.startsWith$default((String)strippedLine, (String)sep, (boolean)false, (int)2, null) && (strippedLine.length() == sep.length() || strippedLine.charAt(sep.length()) == ' ');
    }

    private final List<Document> processOutput(List<LineType> linesWithMetadata, List<String> currentContent, Map<String, ? extends Object> currentMetadata) {
        List<Document> list;
        if (!((Collection)currentContent).isEmpty()) {
            linesWithMetadata.add(new LineType(CollectionsKt.joinToString$default((Iterable)currentContent, (CharSequence)"\n", null, null, (int)0, null, null, (int)62, null), currentMetadata));
        }
        if (!this.returnEachLine) {
            list = this.aggregateLinesToChunks(linesWithMetadata);
        } else {
            List<Document> list2 = linesWithMetadata.stream().map(arg_0 -> MarkdownHeaderTextSplitter.processOutput$lambda$4(processOutput.1.INSTANCE, arg_0)).toList();
            Intrinsics.checkNotNull(list2);
            list = list2;
        }
        return list;
    }

    private static final int _init_$lambda$0(Function1 $tmp0, Object p0) {
        Intrinsics.checkNotNullParameter((Object)$tmp0, (String)"$tmp0");
        return ((Number)$tmp0.invoke(p0)).intValue();
    }

    private static final Document aggregateLinesToChunks$lambda$1(Function1 $tmp0, Object p0) {
        Intrinsics.checkNotNullParameter((Object)$tmp0, (String)"$tmp0");
        return (Document)$tmp0.invoke(p0);
    }

    private static final Document processOutput$lambda$4(Function1 $tmp0, Object p0) {
        Intrinsics.checkNotNullParameter((Object)$tmp0, (String)"$tmp0");
        return (Document)$tmp0.invoke(p0);
    }
}

