/*
 * Decompiled with CFR 0.152.
 */
package io.annot8.components.text.processors;

import io.annot8.api.annotations.Annotation;
import io.annot8.api.bounds.Bounds;
import io.annot8.api.capabilities.Capabilities;
import io.annot8.api.components.annotations.ComponentDescription;
import io.annot8.api.components.annotations.ComponentName;
import io.annot8.api.context.Context;
import io.annot8.api.settings.NoSettings;
import io.annot8.common.components.AbstractProcessorDescriptor;
import io.annot8.common.components.capabilities.SimpleCapabilities;
import io.annot8.common.data.bounds.SpanBounds;
import io.annot8.common.data.content.Text;
import io.annot8.components.base.text.processors.AbstractTextProcessor;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@ComponentName(value="Naive Paragraph")
@ComponentDescription(value="Naively extract paragraphs by looking for multiple new line characters between lines")
public class NaiveParagraph
extends AbstractProcessorDescriptor<Processor, NoSettings> {
    protected Processor createComponent(Context context, NoSettings settings) {
        return new Processor();
    }

    public Capabilities capabilities() {
        return new SimpleCapabilities.Builder().withProcessesContent(Text.class).withCreatesAnnotations("grammar/paragraph", SpanBounds.class).build();
    }

    public static class Processor
    extends AbstractTextProcessor {
        private static final Pattern PARAGRAPH_REGEX = Pattern.compile("[^\\r\\n]+((\\r|\\n|\\r\\n)[^\\r\\n]+)*");

        protected void process(Text content) {
            Matcher m = PARAGRAPH_REGEX.matcher((CharSequence)content.getData());
            while (m.find()) {
                ((Annotation.Builder)content.getAnnotations().create().withType("grammar/paragraph")).withBounds((Bounds)new SpanBounds(m.start(), m.end())).save();
            }
        }
    }
}

