package com.khala.extractor;

import com.aliasi.sentences.IndoEuropeanSentenceModel;
import com.aliasi.sentences.SentenceModel;
import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:com/khala/extractor/ArticleUtil.class */
public class ArticleUtil {
    static final TokenizerFactory TOKENIZER_FACTORY = IndoEuropeanTokenizerFactory.INSTANCE;
    static final SentenceModel SENTENCE_MODEL = new IndoEuropeanSentenceModel();
    static String[] ChineseInterpunction = {"。", "，", "；", "：", "？", "！", "……", "—", "～", "（", "）", "《", "》"};
    static String[] EnglishInterpunction = {".", ",", ";", ":", "?", "!", "…", "-", "~", "(", ")", "<", ">"};

    public static void main(String[] strArr) {
        List<String> sentences = getSentences("泼水节是世界上最重要节日之一，深受中国傣族和东南亚人民的喜爱。七百多年来，人们一直在庆祝这个节日，现在这个节日是促进国家间合作和交流的必要方式。");
        if (sentences.isEmpty()) {
            System.out.println("没有识别到句子");
        }
        Iterator<String> it = sentences.iterator();
        while (it.hasNext()) {
            System.out.println(it.next());
        }
    }

    public static List<String> getSentences(String str) {
        for (int i = 0; i < ChineseInterpunction.length; i++) {
            str = str.replace(ChineseInterpunction[i], EnglishInterpunction[i] + " ");
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        TOKENIZER_FACTORY.tokenizer(str.toCharArray(), 0, str.length()).tokenize(arrayList2, arrayList3);
        String[] strArr = new String[arrayList2.size()];
        String[] strArr2 = new String[arrayList3.size()];
        arrayList2.toArray(strArr);
        arrayList3.toArray(strArr2);
        int i2 = 0;
        for (int i3 : SENTENCE_MODEL.boundaryIndices(strArr, strArr2)) {
            StringBuilder sb = new StringBuilder();
            for (int i4 = i2; i4 <= i3; i4++) {
                sb.append(strArr[i4]).append(strArr2[i4 + 1]);
            }
            i2 = i3 + 1;
            String sb2 = sb.toString();
            for (int i5 = 0; i5 < ChineseInterpunction.length; i5++) {
                sb2 = sb2.replace(EnglishInterpunction[i5], ChineseInterpunction[i5]);
            }
            arrayList.add(sb2);
        }
        return arrayList;
    }
}
