package hepple.postag;

import gate.persist.DBHelper;
import gate.util.BomStrippingInputStreamReader;
import gnu.getopt.Getopt;
import gnu.getopt.LongOpt;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

/* loaded from: input_file:hepple/postag/POSTagger.class */
public class POSTagger {
    protected Map rules;
    Lexicon lexicon;
    private String encoding;
    static final String staart = "STAART";
    private String[] staartLex;
    private String[] deflex_NNP;
    private String[] deflex_JJ;
    private String[] deflex_CD;
    private String[] deflex_NNS;
    private String[] deflex_RB;
    private String[] deflex_VBG;
    private String[] deflex_NN;
    public String[] wordBuff;
    public String[] tagBuff;
    public String[][] lexBuff;

    public POSTagger(URL url, URL url2) throws InvalidRuleException, IOException {
        this(url, url2, null);
    }

    /* JADX WARN: Type inference failed for: r1v21, types: [java.lang.String[], java.lang.String[][]] */
    public POSTagger(URL url, URL url2, String str) throws InvalidRuleException, IOException {
        this.staartLex = new String[]{staart};
        this.deflex_NNP = new String[]{"NNP"};
        this.deflex_JJ = new String[]{"JJ"};
        this.deflex_CD = new String[]{"CD"};
        this.deflex_NNS = new String[]{"NNS"};
        this.deflex_RB = new String[]{"RB"};
        this.deflex_VBG = new String[]{"VBG"};
        this.deflex_NN = new String[]{"NN"};
        this.wordBuff = new String[]{staart, staart, staart, staart, staart, staart, staart};
        this.tagBuff = new String[]{staart, staart, staart, staart, staart, staart, staart};
        this.lexBuff = new String[]{this.staartLex, this.staartLex, this.staartLex, this.staartLex, this.staartLex, this.staartLex, this.staartLex};
        this.encoding = str;
        this.lexicon = new Lexicon(url, str);
        this.rules = new HashMap();
        readRules(url2);
    }

    public Rule createNewRule(String str) throws InvalidRuleException {
        try {
            return (Rule) Class.forName("hepple.postag.rules.Rule_" + str).newInstance();
        } catch (Exception e) {
            throw new InvalidRuleException("Could not create rule " + str + "!\n" + e.toString());
        }
    }

    public List runTagger(List list) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Iterator it = list.iterator();
        while (it.hasNext()) {
            Iterator it2 = ((List) it.next()).iterator();
            while (it2.hasNext()) {
                oneStep((String) it2.next(), arrayList2);
            }
            for (int i = 0; i < 6; i++) {
                oneStep(staart, arrayList2);
            }
            arrayList.add(arrayList2);
            arrayList2 = new ArrayList();
        }
        return arrayList;
    }

    public void setEncoding(String str) {
        throw new IllegalStateException("Cannot change encoding once POS tagger has been constructed.  Use the three argument constructor to specify encoding.");
    }

    protected boolean oneStep(String str, List list) {
        for (int i = 1; i < 7; i++) {
            this.wordBuff[i - 1] = this.wordBuff[i];
            this.tagBuff[i - 1] = this.tagBuff[i];
            this.lexBuff[i - 1] = this.lexBuff[i];
        }
        this.wordBuff[6] = str;
        this.lexBuff[6] = classifyWord(str);
        this.tagBuff[6] = this.lexBuff[6][0];
        List list2 = (List) this.rules.get(this.lexBuff[3][0]);
        if (list2 != null && list2.size() > 0) {
            Iterator it = list2.iterator();
            while (it.hasNext() && !((Rule) it.next()).apply(this)) {
            }
        }
        String str2 = this.wordBuff[0];
        if (str2 == staart) {
            return false;
        }
        list.add(new String[]{str2, this.tagBuff[0]});
        return this.wordBuff[1] == staart;
    }

    public void readRules(URL url) throws IOException, InvalidRuleException {
        BomStrippingInputStreamReader bomStrippingInputStreamReader = this.encoding == null ? new BomStrippingInputStreamReader(url.openStream()) : new BomStrippingInputStreamReader(url.openStream(), this.encoding);
        String readLine = bomStrippingInputStreamReader.readLine();
        while (true) {
            String str = readLine;
            if (str == null) {
                return;
            }
            ArrayList arrayList = new ArrayList();
            StringTokenizer stringTokenizer = new StringTokenizer(str);
            while (stringTokenizer.hasMoreTokens()) {
                arrayList.add(stringTokenizer.nextToken());
            }
            if (arrayList.size() < 3) {
                throw new InvalidRuleException(str);
            }
            Rule createNewRule = createNewRule((String) arrayList.get(2));
            createNewRule.initialise(arrayList);
            List list = (List) this.rules.get(createNewRule.from);
            if (list == null) {
                list = new ArrayList();
                this.rules.put(createNewRule.from, list);
            }
            list.add(createNewRule);
            readLine = bomStrippingInputStreamReader.readLine();
        }
    }

    public void showRules() {
        System.out.println(this.rules);
    }

    protected String[] classifyWord(String str) {
        if (str == staart) {
            return this.staartLex;
        }
        List list = (List) this.lexicon.get(str);
        if (list != null) {
            String[] strArr = new String[list.size()];
            for (int i = 0; i < strArr.length; i++) {
                strArr[i] = (String) list.get(i);
            }
            return strArr;
        }
        if ('A' <= str.charAt(0) && str.charAt(0) <= 'Z') {
            return this.deflex_NNP;
        }
        for (int i2 = 1; i2 < str.length() - 1; i2++) {
            if (str.charAt(i2) == '-') {
                return this.deflex_JJ;
            }
        }
        for (int i3 = 0; i3 < str.length(); i3++) {
            if ('0' <= str.charAt(i3) && str.charAt(i3) <= '9') {
                return this.deflex_CD;
            }
        }
        return (str.endsWith("ed") || str.endsWith("us") || str.endsWith("ic") || str.endsWith("ble") || str.endsWith("ive") || str.endsWith("ary") || str.endsWith("ful") || str.endsWith("ical") || str.endsWith("less")) ? this.deflex_JJ : str.endsWith("s") ? this.deflex_NNS : str.endsWith("ly") ? this.deflex_RB : str.endsWith("ing") ? this.deflex_VBG : this.deflex_NN;
    }

    public static void main(String[] strArr) {
        if (strArr.length == 0) {
            help();
        }
        try {
            Getopt getopt = new Getopt("HepTag", strArr, "hl:r:", new LongOpt[]{new LongOpt("help", 0, (StringBuffer) null, 104), new LongOpt("lexicon", 0, (StringBuffer) null, DBHelper.VALUE_TYPE_LONG_ARR), new LongOpt("rules", 0, (StringBuffer) null, 114)});
            String str = null;
            String str2 = null;
            while (true) {
                int i = getopt.getopt();
                if (i == -1) {
                    String[] strArr2 = new String[strArr.length - getopt.getOptind()];
                    for (int optind = getopt.getOptind(); optind < strArr.length; optind++) {
                        strArr2[optind - getopt.getOptind()] = strArr[optind];
                    }
                    POSTagger pOSTagger = new POSTagger(str == null ? POSTagger.class.getResource("/hepple/resources/sample_lexicon") : new File(str).toURI().toURL(), str2 == null ? POSTagger.class.getResource("/hepple/resources/sample_ruleset.big") : new File(str2).toURI().toURL());
                    for (String str3 : strArr2) {
                        BufferedReader bufferedReader = new BufferedReader(new FileReader(str3));
                        for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
                            ArrayList arrayList = new ArrayList();
                            while (stringTokenizer.hasMoreTokens()) {
                                arrayList.add(stringTokenizer.nextToken());
                            }
                            ArrayList arrayList2 = new ArrayList();
                            arrayList2.add(arrayList);
                            Iterator it = pOSTagger.runTagger(arrayList2).iterator();
                            while (it.hasNext()) {
                                Iterator it2 = ((List) it.next()).iterator();
                                while (it2.hasNext()) {
                                    String[] strArr3 = (String[]) it2.next();
                                    System.out.print(strArr3[0] + "/" + strArr3[1]);
                                    if (it2.hasNext()) {
                                        System.out.print(" ");
                                    } else {
                                        System.out.println();
                                    }
                                }
                            }
                        }
                    }
                    return;
                }
                switch (i) {
                    case 104:
                        help();
                        System.exit(0);
                        break;
                    case DBHelper.VALUE_TYPE_LONG_ARR /* 108 */:
                        str = getopt.getOptarg();
                        break;
                    case 114:
                        str2 = getopt.getOptarg();
                        break;
                    default:
                        System.err.println("Invalid option " + strArr[getopt.getOptind() - 1] + "!");
                        System.exit(1);
                        break;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void help() {
        System.out.println("NAME\nHepTag - a Part-of-Speech tagger\nsee http://www.dcs.shef.ac.uk/~hepple/papers/acl00/abstract.html \n\nSYNOPSIS\n\tjava hepple.postag.POSTagger [options] file1 [file2 ...]\n\nOPTIONS:\n-h, --help \n\tdisplays this message\n-l, --lexicon <lexicon file>\n\tuses specified lexicon\n-r, --rules <rules file>\n\tuses specified rules");
    }

    private static List readInput(String str) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        ArrayList arrayList = new ArrayList();
        for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            ArrayList arrayList2 = new ArrayList();
            while (stringTokenizer.hasMoreTokens()) {
                arrayList2.add(stringTokenizer.nextToken());
            }
            arrayList.add(arrayList2);
        }
        return arrayList;
    }
}
