package org.forester.io.parsers.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.biojava.nbio.structure.align.util.AtomCache;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
import org.forester.io.parsers.nhx.NHXParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.parsers.tol.TolParser;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;

/* loaded from: input_file:org/forester/io/parsers/util/ParserUtils.class */
public final class ParserUtils {
    private static final String SN_BN = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}";
    public static final String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA";
    public static final String TAX_CODE_LO = "(?:[A-Z]{5})|RAT|PIG|PEA";
    public static final Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile("(?:\\b|_)((?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)(?:\\b|_)");
    public static final Pattern TAXOMONY_CODE_PATTERN_A_LO = Pattern.compile("_((?:[A-Z]{5})|RAT|PIG|PEA)(?:\\b|_)");
    public static final Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile("\\[((?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)\\]");
    public static final Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile("(?:\\b|_)[a-zA-Z0-9]{3,}_((?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)\\b");
    public static final Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile("([A-Z][a-z]{2,30})");
    public static final Pattern TAXOMONY_SN_PATTERN_SN = Pattern.compile("(?:\\b|_)([A-Z][a-z]{2,30}[_ ][a-z]{3,30})(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))");
    public static final Pattern TAXOMONY_SN_PATTERN_SNS = Pattern.compile("(?:\\b|_)([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ][a-z]{3,30})[_ ][a-z]*[A-Z0-9]");
    public static final Pattern TAXOMONY_SN_PATTERN_SNS2 = Pattern.compile("[A-Z0-9][a-z]*[_ ]([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ][a-z]{3,30})\\s*$");
    public static final Pattern TAXOMONY_SN_PATTERN_SP = Pattern.compile("(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?");
    public static final Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern.compile("(?:\\b|_)([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ](?:str|subsp|ssp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60})(?:\\b|_)");
    public static final Pattern TAXOMONY_SN_PATTERN_STRAIN_2 = Pattern.compile("(?:\\b|_)([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ]\\((?:str|subsp|ssp|var)[a-z]{0,5}\\.?[_ ]\\S{1,60}\\))");
    public static final Pattern TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN = Pattern.compile("(?:\\b|_)([A-Z][a-z]{2,30}[_ ][a-z]{3,30}[_ ]str[a-z]{0,3}\\.?[_ ]\\S{1,60}[_ ]substr[a-z]{0,3}\\.?[_ ]\\S{1,60})(?:\\b|_)");
    private static final Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile("(?:\\b|_)[A-Z0-9]{4,}_((?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)/\\d+-\\d+\\b");
    private static final Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern.compile("(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b");
    private static final Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern.compile("(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b");

    public static final PhylogenyParser createParserDependingFileContents(File file, boolean z) throws FileNotFoundException, IOException {
        PhylogenyParser nexusPhylogeniesParser;
        URL resource;
        String lowerCase = ForesterUtil.getFirstLine(file).trim().toLowerCase();
        if (lowerCase.startsWith("<")) {
            nexusPhylogeniesParser = PhyloXmlParser.createPhyloXmlParser();
            if (z && (resource = PhyloXmlParser.class.getClassLoader().getResource(ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE)) != null) {
                ((PhyloXmlParser) nexusPhylogeniesParser).setValidateAgainstSchema(resource.toString());
            }
        } else {
            nexusPhylogeniesParser = (lowerCase.startsWith("nexus") || lowerCase.startsWith("#nexus") || lowerCase.startsWith("# nexus") || lowerCase.startsWith("begin")) ? new NexusPhylogeniesParser() : new NHXParser();
        }
        return nexusPhylogeniesParser;
    }

    public static final PhylogenyParser createParserDependingOnFileType(File file, boolean z) throws FileNotFoundException, IOException {
        PhylogenyParser createParserDependingOnSuffix = createParserDependingOnSuffix(file.getName(), z);
        if (createParserDependingOnSuffix == null) {
            createParserDependingOnSuffix = createParserDependingFileContents(file, z);
        }
        if (createParserDependingOnSuffix != null && file.toString().toLowerCase().endsWith(".zip")) {
            if (createParserDependingOnSuffix instanceof PhyloXmlParser) {
                ((PhyloXmlParser) createParserDependingOnSuffix).setZippedInputstream(true);
            } else if (createParserDependingOnSuffix instanceof TolParser) {
                ((TolParser) createParserDependingOnSuffix).setZippedInputstream(true);
            }
        }
        return createParserDependingOnSuffix;
    }

    public static final PhylogenyParser createParserDependingOnUrlContents(URL url, boolean z) throws FileNotFoundException, IOException {
        String lowerCase = url.getFile().toString().toLowerCase();
        PhylogenyParser createParserDependingOnSuffix = createParserDependingOnSuffix(lowerCase, z);
        if (createParserDependingOnSuffix == null) {
            String lowerCase2 = ForesterUtil.getFirstLine(url).trim().toLowerCase();
            if (lowerCase2.startsWith("<")) {
                createParserDependingOnSuffix = PhyloXmlParser.createPhyloXmlParser();
                if (z) {
                    URL resource = PhyloXmlParser.class.getClassLoader().getResource(ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE);
                    if (resource == null) {
                        throw new RuntimeException("failed to get URL for phyloXML XSD from jar file from [resources/phyloxml.xsd]");
                    }
                    ((PhyloXmlParser) createParserDependingOnSuffix).setValidateAgainstSchema(resource.toString());
                }
            } else {
                createParserDependingOnSuffix = (lowerCase2.startsWith("nexus") || lowerCase2.startsWith("#nexus") || lowerCase2.startsWith("# nexus") || lowerCase2.startsWith("begin")) ? new NexusPhylogeniesParser() : new NHXParser();
            }
        }
        if (createParserDependingOnSuffix != null && lowerCase.endsWith(".zip")) {
            if (createParserDependingOnSuffix instanceof PhyloXmlParser) {
                ((PhyloXmlParser) createParserDependingOnSuffix).setZippedInputstream(true);
            } else if (createParserDependingOnSuffix instanceof TolParser) {
                ((TolParser) createParserDependingOnSuffix).setZippedInputstream(true);
            }
        }
        return createParserDependingOnSuffix;
    }

    public static BufferedReader createReader(Object obj) throws IOException, FileNotFoundException {
        BufferedReader bufferedReader;
        if ((obj instanceof File) || (obj instanceof String)) {
            File file = obj instanceof File ? (File) obj : new File((String) obj);
            if (!file.exists()) {
                throw new IOException("[" + file.getAbsolutePath() + "] does not exist");
            }
            if (!file.isFile()) {
                throw new IOException("[" + file.getAbsolutePath() + "] is not a file");
            }
            if (!file.canRead()) {
                throw new IOException("[" + file.getAbsolutePath() + "] is not a readable");
            }
            bufferedReader = new BufferedReader(new FileReader(file));
        } else if (obj instanceof InputStream) {
            bufferedReader = new BufferedReader(new InputStreamReader((InputStream) obj));
        } else {
            if (!(obj instanceof StringBuffer) && !(obj instanceof StringBuilder)) {
                throw new IllegalArgumentException("attempt to parse object of type [" + obj.getClass() + "] (can only parse objects of type File/String, InputStream, StringBuffer, or StringBuilder)");
            }
            bufferedReader = new BufferedReader(new StringReader(obj.toString()));
        }
        return bufferedReader;
    }

    public static final String extractScientificNameFromNodeName(String str) {
        Matcher matcher = TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN.matcher(str);
        if (matcher.find()) {
            String replace = matcher.group(1).replace('_', ' ');
            if (replace.indexOf(" str ") > 4) {
                replace = replace.replaceFirst(" str ", " str. ");
            }
            if (replace.indexOf(" substr ") > 4) {
                replace = replace.replaceFirst(" substr ", " substr. ");
            }
            return replace;
        }
        Matcher matcher2 = TAXOMONY_SN_PATTERN_STRAIN_1.matcher(str);
        if (matcher2.find()) {
            String replace2 = matcher2.group(1).replace('_', ' ');
            if (replace2.indexOf(" str ") > 4) {
                replace2 = replace2.replaceFirst(" str ", " str. ");
            } else if (replace2.indexOf(" subsp ") > 4) {
                replace2 = replace2.replaceFirst(" subsp ", " subsp. ");
            } else if (replace2.indexOf(" ssp ") > 4) {
                replace2 = replace2.replaceFirst(" ssp ", " subsp. ");
            } else if (replace2.indexOf(" ssp. ") > 4) {
                replace2 = replace2.replaceFirst(" ssp. ", " subsp. ");
            } else if (replace2.indexOf(" var ") > 4) {
                replace2 = replace2.replaceFirst(" var ", " var. ");
            }
            return replace2;
        }
        Matcher matcher3 = TAXOMONY_SN_PATTERN_STRAIN_2.matcher(str);
        if (matcher3.find()) {
            String replace3 = matcher3.group(1).replace('_', ' ');
            if (replace3.indexOf(" (str ") > 4) {
                replace3 = replace3.replaceFirst(" \\(str ", " (str. ");
            } else if (replace3.indexOf(" (subsp ") > 4) {
                replace3 = replace3.replaceFirst(" \\(subsp ", " (subsp. ");
            } else if (replace3.indexOf(" (ssp ") > 4) {
                replace3 = replace3.replaceFirst(" \\(ssp ", " (subsp. ");
            } else if (replace3.indexOf(" (ssp. ") > 4) {
                replace3 = replace3.replaceFirst(" \\(ssp. ", " (subsp. ");
            } else if (replace3.indexOf(" (var ") > 4) {
                replace3 = replace3.replaceFirst(" \\(var ", " (var. ");
            }
            return replace3;
        }
        Matcher matcher4 = TAXOMONY_SN_PATTERN_SNS.matcher(str);
        if (matcher4.find()) {
            return matcher4.group(1).replace('_', ' ');
        }
        Matcher matcher5 = TAXOMONY_SN_PATTERN_SNS2.matcher(str);
        if (matcher5.find()) {
            return matcher5.group(1).replace('_', ' ');
        }
        Matcher matcher6 = TAXOMONY_SN_PATTERN_SN.matcher(str);
        if (matcher6.find()) {
            return matcher6.group(1).replace('_', ' ');
        }
        Matcher matcher7 = TAXOMONY_SN_PATTERN_SP.matcher(str);
        if (!matcher7.find()) {
            return null;
        }
        String replace4 = matcher7.group(1).replace('_', ' ');
        if (replace4.endsWith(" sp")) {
            replace4 = replace4 + AtomCache.CHAIN_SPLIT_SYMBOL;
        }
        return replace4;
    }

    public static final String extractTaxonomyCodeFromNodeName(String str, NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction) {
        Matcher matcher = TAXOMONY_CODE_PATTERN_PFS.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        if (taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED && taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE) {
            return null;
        }
        Matcher matcher2 = TAXOMONY_CODE_PATTERN_PFR.matcher(str);
        if (matcher2.find()) {
            return matcher2.group(1);
        }
        if (taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE) {
            return null;
        }
        Matcher matcher3 = TAXOMONY_CODE_PATTERN_A.matcher(str);
        if (matcher3.find()) {
            return matcher3.group(1);
        }
        return null;
    }

    public static final String extractTaxonomyDataFromNodeName(PhylogenyNode phylogenyNode, NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction) throws PhyloXmlDataFormatException {
        if (taxonomy_extraction == NHXParser.TAXONOMY_EXTRACTION.NO) {
            throw new IllegalArgumentException();
        }
        String extractUniprotTaxonomyIdFromNodeName = extractUniprotTaxonomyIdFromNodeName(phylogenyNode.getName(), taxonomy_extraction);
        if (!ForesterUtil.isEmpty(extractUniprotTaxonomyIdFromNodeName)) {
            if (!phylogenyNode.getNodeData().isHasTaxonomy()) {
                phylogenyNode.getNodeData().setTaxonomy(new Taxonomy());
            }
            phylogenyNode.getNodeData().getTaxonomy().setIdentifier(new Identifier(extractUniprotTaxonomyIdFromNodeName, PhyloXmlUtil.UNIPROT_TAX_PROVIDER));
            return extractUniprotTaxonomyIdFromNodeName;
        }
        String str = null;
        if (taxonomy_extraction == NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE) {
            str = extractTaxonomyCodeFromNodeNameLettersOnly(phylogenyNode.getName());
            if (ForesterUtil.isEmpty(str)) {
                String extractScientificNameFromNodeName = extractScientificNameFromNodeName(phylogenyNode.getName());
                if (!ForesterUtil.isEmpty(extractScientificNameFromNodeName)) {
                    if (!phylogenyNode.getNodeData().isHasTaxonomy()) {
                        phylogenyNode.getNodeData().setTaxonomy(new Taxonomy());
                    }
                    phylogenyNode.getNodeData().getTaxonomy().setScientificName(extractScientificNameFromNodeName);
                    return extractScientificNameFromNodeName;
                }
            }
        }
        if (ForesterUtil.isEmpty(str)) {
            str = extractTaxonomyCodeFromNodeName(phylogenyNode.getName(), taxonomy_extraction);
        }
        if (ForesterUtil.isEmpty(str)) {
            return null;
        }
        if (!phylogenyNode.getNodeData().isHasTaxonomy()) {
            phylogenyNode.getNodeData().setTaxonomy(new Taxonomy());
        }
        phylogenyNode.getNodeData().getTaxonomy().setTaxonomyCode(str);
        return str;
    }

    public static final String extractUniprotTaxonomyIdFromNodeName(String str, NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction) {
        Matcher matcher = TAXOMONY_UNIPROT_ID_PATTERN_PFS.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        if (taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED && taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE) {
            return null;
        }
        Matcher matcher2 = TAXOMONY_UNIPROT_ID_PATTERN_PFR.matcher(str);
        if (matcher2.find()) {
            return matcher2.group(1);
        }
        return null;
    }

    public static final Phylogeny[] readPhylogenies(File file) throws FileNotFoundException, IOException {
        return PhylogenyMethods.readPhylogenies(createParserDependingOnFileType(file, true), file);
    }

    public static final Phylogeny[] readPhylogenies(String str) throws FileNotFoundException, IOException {
        return readPhylogenies(new File(str));
    }

    private static final PhylogenyParser createParserDependingOnSuffix(String str, boolean z) {
        URL resource;
        PhylogenyParser phylogenyParser = null;
        String lowerCase = str.toLowerCase();
        if (lowerCase.endsWith(".tol") || lowerCase.endsWith(".tolxml") || lowerCase.endsWith(".tol.zip")) {
            phylogenyParser = new TolParser();
        } else if (lowerCase.endsWith(ForesterConstants.PHYLO_XML_SUFFIX) || lowerCase.endsWith("phyloxml") || lowerCase.endsWith(".zip")) {
            phylogenyParser = PhyloXmlParser.createPhyloXmlParser();
            if (z && (resource = PhyloXmlParser.class.getClassLoader().getResource(ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE)) != null) {
                ((PhyloXmlParser) phylogenyParser).setValidateAgainstSchema(resource.toString());
            }
        } else if (lowerCase.endsWith(".nexus") || lowerCase.endsWith(".nex") || lowerCase.endsWith(".nx")) {
            phylogenyParser = new NexusPhylogeniesParser();
        } else if (lowerCase.endsWith(".nhx") || lowerCase.endsWith(".nh") || lowerCase.endsWith(".newick") || lowerCase.endsWith(".nwk")) {
            phylogenyParser = new NHXParser();
        }
        return phylogenyParser;
    }

    private static final String extractTaxonomyCodeFromNodeNameLettersOnly(String str) {
        Matcher matcher = TAXOMONY_CODE_PATTERN_A_LO.matcher(str);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }
}
