package uk.ac.open.crc.intt;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.logging.Level;
import java.util.logging.Logger;

/* JADX INFO: Access modifiers changed from: package-private */
/* loaded from: input_file:lib/intt.jar:uk/ac/open/crc/intt/NumericTokeniser.class */
public class NumericTokeniser {
    private ArrayList<String> words = new ArrayList<>();
    private ArrayList<Integer> boundaries = new ArrayList<>();
    private AggregatedDictionary aggregatedDictionary;
    private DigitAbbreviationDictionary numericAbbreviationDictionary;
    private static final Logger logger = Logger.getLogger("uk.ac.open.crc.intt");
    private BasicTokeniser basicTokeniser;

    /* JADX INFO: Access modifiers changed from: package-private */
    public NumericTokeniser(OracleSet oracleSet) {
        this.aggregatedDictionary = oracleSet.getAggregatedDictionary();
        this.numericAbbreviationDictionary = oracleSet.getDigitAbbreviationDictionary();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @Deprecated
    public String[] split(String str, String[] strArr) {
        this.words.clear();
        this.boundaries.clear();
        String[] hasKnownSubstring = this.numericAbbreviationDictionary.hasKnownSubstring(str);
        new ArrayList();
        if (hasKnownSubstring.length > 0) {
            if (hasKnownSubstring.length != 1) {
                int i = 0;
                for (int i2 = 0; i2 < hasKnownSubstring.length; i2++) {
                    int indexOf = str.indexOf(hasKnownSubstring[i2]);
                    String substring = str.substring(i, indexOf);
                    if (substring.length() > 0) {
                        this.words.addAll(Arrays.asList(this.basicTokeniser.split(substring)));
                    }
                    this.words.add(hasKnownSubstring[i2]);
                    i = indexOf + hasKnownSubstring[i2].length();
                }
                String substring2 = str.substring(i, str.length());
                if (substring2.length() > 0) {
                    this.words.addAll(Arrays.asList(this.basicTokeniser.split(substring2)));
                }
            } else if (hasKnownSubstring[0].equals(str)) {
                this.words.add(str);
            } else {
                int indexOf2 = str.indexOf(hasKnownSubstring[0]);
                String substring3 = str.substring(0, indexOf2);
                if (substring3.length() > 0) {
                    this.words.addAll(Arrays.asList(this.basicTokeniser.split(substring3)));
                }
                this.words.add(hasKnownSubstring[0]);
                String substring4 = str.substring(indexOf2 + hasKnownSubstring[0].length());
                if (substring4.length() > 0) {
                    this.words.addAll(Arrays.asList(this.basicTokeniser.split(substring4)));
                }
            }
        } else if (str.matches("^.*[0-9]+$")) {
            str.substring(0, str.length() - 1);
            String str2 = strArr[strArr.length - 1];
            String[] split = str2.split("[0-9]+");
            if (split.length <= 0 || !this.aggregatedDictionary.isWord(split[0])) {
                this.words.addAll(Arrays.asList(strArr));
            } else {
                this.words.addAll(Arrays.asList(strArr));
                this.words.remove(this.words.size() - 1);
                this.words.addAll(Arrays.asList(split));
                this.words.add(str2.substring(split[0].length()));
            }
        } else if (str.matches("^.+[0-9]+.+$")) {
            ArrayList arrayList = new ArrayList();
            for (String str3 : strArr) {
                if (str3.matches("^.+[0-9]+[a-zA-Z]+$")) {
                    String[] split2 = str3.split("[0-9]+");
                    ArrayList arrayList2 = new ArrayList();
                    int i3 = 0;
                    for (int i4 = 0; i4 < split2.length - 1; i4++) {
                        try {
                            String substring5 = str3.substring(split2[i4].length() + i3, str3.indexOf(split2[i4 + 1], split2[i4].length()));
                            if (this.aggregatedDictionary.isWord(split2[i4 + 1])) {
                                arrayList2.add(split2[i4] + substring5);
                                i3 += ((String) arrayList2.get(i4)).length();
                            } else if (!this.aggregatedDictionary.isWord(split2[i4 + 1])) {
                                if (this.aggregatedDictionary.isWord(split2[i4])) {
                                    arrayList2.add(split2[i4]);
                                    i3 += split2[i4].length();
                                    split2[i4 + 1] = substring5 + split2[i4 + 1];
                                } else {
                                    arrayList2.add(split2[i4] + substring5);
                                    i3 += ((String) arrayList2.get(i4)).length();
                                }
                            }
                        } catch (ArrayIndexOutOfBoundsException e) {
                            System.out.println("NumericSplitter: Array bounds exception");
                            System.out.println("\tName: " + str);
                            System.out.println("\tFragment: " + str3);
                            for (String str4 : split2) {
                                System.out.println(str4);
                            }
                        } catch (StringIndexOutOfBoundsException e2) {
                            System.out.println("NumericSplitter: String bounds exception");
                            System.out.println("\tName: " + str);
                            System.out.println("\tFragment: " + str3);
                        }
                        if (i4 == split2.length - 2) {
                            arrayList2.add(split2[i4 + 1]);
                        }
                    }
                    arrayList.addAll(arrayList2);
                } else {
                    arrayList.add(str3);
                }
            }
            this.words.addAll(arrayList);
            if (str.matches("^[a-zA-Z]+(2|4)[a-zA-Z]+$")) {
                int indexOf3 = str.indexOf("2");
                if (indexOf3 == -1) {
                    indexOf3 = str.indexOf("4");
                }
                if (indexOf3 > 0) {
                    ArrayList arrayList3 = new ArrayList();
                    arrayList3.addAll(Arrays.asList(this.basicTokeniser.split(str.substring(0, indexOf3))));
                    arrayList3.add(str.substring(indexOf3, indexOf3 + 1));
                    String[] split3 = this.basicTokeniser.split(str.substring(indexOf3 + 1, str.length()));
                    arrayList3.addAll(Arrays.asList(split3));
                    if (this.aggregatedDictionary.percentageKnown(split3) >= this.aggregatedDictionary.percentageKnown((String[]) this.words.toArray(new String[0]))) {
                        this.words.clear();
                        this.words.addAll(arrayList3);
                    }
                }
            }
        } else {
            this.words.add(str);
        }
        return (String[]) this.words.toArray(new String[0]);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public ArrayList<String> tokenise(String str, boolean z) {
        ArrayList<String> arrayList = new ArrayList<>();
        String[] hasKnownSubstring = this.numericAbbreviationDictionary.hasKnownSubstring(str);
        if (hasKnownSubstring.length > 0) {
            if (hasKnownSubstring.length != 1) {
                int i = 0;
                for (int i2 = 0; i2 < hasKnownSubstring.length; i2++) {
                    int indexOf = str.toLowerCase().indexOf(hasKnownSubstring[i2]);
                    String substring = str.substring(i, indexOf);
                    if (substring.length() > 0) {
                        arrayList.add(substring);
                    }
                    arrayList.add(str.substring(indexOf, indexOf + hasKnownSubstring[i2].length()));
                    i = indexOf + hasKnownSubstring[i2].length();
                }
                String substring2 = str.substring(i, str.length());
                if (substring2.length() > 0) {
                    arrayList.add(substring2);
                }
            } else if (hasKnownSubstring[0].equals(str.toLowerCase())) {
                arrayList.add(str);
            } else {
                int indexOf2 = str.toLowerCase().indexOf(hasKnownSubstring[0].toLowerCase());
                String substring3 = str.substring(0, indexOf2);
                if (substring3.length() > 0) {
                    arrayList.add(substring3);
                }
                arrayList.add(str.substring(indexOf2, indexOf2 + hasKnownSubstring[0].length()));
                String substring4 = str.substring(indexOf2 + hasKnownSubstring[0].length());
                if (substring4.length() > 0) {
                    arrayList.add(substring4);
                }
            }
        } else if (str.matches("^[a-zA-Z]+[0-9]+$")) {
            if (z) {
                arrayList.addAll(tokeniseOnDigits(str));
            } else {
                arrayList.add(str);
            }
        } else if (str.matches("^[0-9]+[a-zA-Z]+$")) {
            arrayList.addAll(tokeniseOnDigits(str));
        } else if (str.matches("^.+[0-9]+.+$")) {
            ArrayList arrayList2 = new ArrayList();
            if (str.matches("^[A-Z][0-9][A-Za-z]$")) {
                arrayList.add(str);
            } else {
                String[] strArr = new String[0];
                Integer ucLcBoundary = getUcLcBoundary(str);
                if (ucLcBoundary.intValue() > 0) {
                    Boolean valueOf = Boolean.valueOf(this.aggregatedDictionary.isWord(str.substring(0, ucLcBoundary.intValue())) || this.aggregatedDictionary.isWord(str.substring(ucLcBoundary.intValue())));
                    Boolean.valueOf(this.aggregatedDictionary.isWord(str.substring(0, ucLcBoundary.intValue() + 1)) || this.aggregatedDictionary.isWord(str.substring(ucLcBoundary.intValue() + 1)));
                    String[] strArr2 = new String[2];
                    if (valueOf.booleanValue()) {
                        strArr2[0] = str.substring(0, ucLcBoundary.intValue());
                        strArr2[1] = str.substring(ucLcBoundary.intValue());
                    } else {
                        strArr2[0] = str.substring(0, ucLcBoundary.intValue() + 1);
                        strArr2[1] = str.substring(ucLcBoundary.intValue() + 1);
                    }
                    if (strArr2[0].matches("^.*[0-9]+.*$")) {
                        arrayList.addAll(tokenise(strArr2[0], false));
                        arrayList.add(strArr2[1]);
                    } else {
                        arrayList.add(strArr2[0]);
                        arrayList.addAll(tokenise(strArr2[1], z));
                    }
                } else {
                    arrayList.addAll(splitMixedString(str));
                }
            }
            arrayList.addAll(arrayList2);
            if (str.matches("^[a-zA-Z]+(2|4)[a-zA-Z]+$")) {
                int indexOf3 = str.indexOf("2");
                if (indexOf3 == -1) {
                    indexOf3 = str.indexOf("4");
                }
                if (indexOf3 > 0) {
                    ArrayList arrayList3 = new ArrayList();
                    arrayList3.add(str.substring(0, indexOf3));
                    arrayList3.add(str.substring(indexOf3, indexOf3 + 1));
                    arrayList3.add(str.substring(indexOf3 + 1, str.length()));
                    if (this.aggregatedDictionary.percentageKnown((String[]) arrayList3.toArray(new String[0])) >= this.aggregatedDictionary.percentageKnown((String[]) arrayList.toArray(new String[0]))) {
                        arrayList.clear();
                        arrayList.addAll(arrayList3);
                    }
                }
            }
        } else {
            arrayList.add(str);
        }
        return arrayList;
    }

    private ArrayList<String> tokeniseOnDigits(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        ArrayList arrayList2 = new ArrayList();
        for (Integer num = 0; num.intValue() < str.length(); num = Integer.valueOf(num.intValue() + 1)) {
            if (num.intValue() == 0) {
                arrayList2.add(num);
            }
            if (Character.isDigit(str.codePointAt(num.intValue()))) {
                if (num.intValue() > 0 && !Character.isDigit(str.codePointAt(num.intValue() - 1))) {
                    arrayList2.add(Integer.valueOf(num.intValue() - 1));
                    arrayList2.add(num);
                }
                if (num.intValue() < str.length() - 1 && !Character.isDigit(str.codePointAt(num.intValue() + 1))) {
                    arrayList2.add(num);
                    arrayList2.add(Integer.valueOf(num.intValue() + 1));
                }
            }
            if (num.intValue() == str.length() - 1) {
                arrayList2.add(num);
            }
        }
        if (arrayList2.size() % 2 == 1) {
            logger.log(Level.WARNING, "Odd number of boundaries found for: \"{0}\"", str);
        }
        for (int i = 0; i < arrayList2.size(); i += 2) {
            arrayList.add(str.substring(((Integer) arrayList2.get(i)).intValue(), ((Integer) arrayList2.get(i + 1)).intValue() + 1));
        }
        return arrayList;
    }

    private Integer getUcLcBoundary(String str) {
        Integer num = -1;
        for (Integer num2 = 0; num2.intValue() < str.length() - 1; num2 = Integer.valueOf(num2.intValue() + 1)) {
            if (Character.isUpperCase(str.codePointAt(num2.intValue())) && Character.isLowerCase(str.codePointAt(num2.intValue() + 1)) && (num2.intValue() == 0 || Character.isUpperCase(str.codePointAt(num2.intValue() - 1)))) {
                num = num2;
                break;
            }
        }
        return num;
    }

    private ArrayList<String> splitMixedString(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        ArrayList<String> arrayList2 = tokeniseOnDigits(str);
        if (arrayList2.size() == 2) {
            arrayList.add(arrayList2.get(0) + arrayList2.get(1));
        } else {
            Integer num = 0;
            while (num.intValue() < arrayList2.size()) {
                if (num.intValue() + 2 >= arrayList2.size()) {
                    if (num.intValue() < arrayList2.size() - 1) {
                        arrayList.add(arrayList2.get(num.intValue()) + arrayList2.get(num.intValue() + 1));
                    } else {
                        arrayList.add(arrayList2.get(num.intValue()));
                    }
                    num = Integer.valueOf(num.intValue() + 2);
                } else if (this.aggregatedDictionary.isWord(arrayList2.get(num.intValue() + 2))) {
                    arrayList.add(arrayList2.get(num.intValue()) + arrayList2.get(num.intValue() + 1));
                    Integer.valueOf(num.intValue() + 2);
                    num = Integer.valueOf(num.intValue() + 2);
                } else if (this.aggregatedDictionary.isWord(arrayList2.get(num.intValue()))) {
                    arrayList.add(arrayList2.get(num.intValue()));
                    arrayList.add(arrayList2.get(num.intValue() + 1) + arrayList2.get(num.intValue() + 2));
                    Integer valueOf = Integer.valueOf(num.intValue() + 3);
                    Integer valueOf2 = Integer.valueOf(num.intValue() + 4);
                    while (true) {
                        num = valueOf2;
                        if (valueOf.intValue() < arrayList2.size()) {
                            if (num.intValue() < arrayList2.size()) {
                                arrayList.add(arrayList2.get(valueOf.intValue()) + arrayList2.get(num.intValue()));
                            } else {
                                arrayList.add(arrayList2.get(valueOf.intValue()));
                            }
                            valueOf = Integer.valueOf(valueOf.intValue() + 2);
                            valueOf2 = Integer.valueOf(num.intValue() + 2);
                        }
                    }
                } else {
                    arrayList.add(arrayList2.get(num.intValue()) + arrayList2.get(num.intValue() + 1));
                    Integer.valueOf(num.intValue() + 2);
                    num = Integer.valueOf(num.intValue() + 2);
                }
            }
        }
        return arrayList;
    }
}
