package com.zhidian.analysis;

import com.zhidian.analysis.utils.graph.TokenGraph;
import com.zhidian.analysis.utils.tree.BasicTree;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/zhidian/analysis/YgSegmenter.class */
public class YgSegmenter {
    private Logger logger = LoggerFactory.getLogger(getClass());
    private Trie dictionary = null;
    private boolean enumerateAll = false;

    private void checkDict() {
        if (this.dictionary == null) {
            throw new NoDictException("没有为此分词器设置词典");
        }
    }

    public List<Token> getTokens(String str) {
        if (str == null || str.isEmpty()) {
            return null;
        }
        return getTokens(str.toCharArray());
    }

    public TokenGraph getTokenGraph(char[] cArr) {
        checkDict();
        if (cArr == null || cArr.length < 1) {
            return null;
        }
        int length = cArr.length;
        this.dictionary.getRoot();
        LinkedList<Token> linkedList = new LinkedList<>();
        TokenGraph tokenGraph = new TokenGraph();
        for (int i = 0; i < length; i = linkedList.getFirst().getEndOffset()) {
            linkedList.clear();
            linkedList.addLast(getToken(cArr, null, i, i, false));
            BasicTree.Node root = this.dictionary.getRoot();
            for (int i2 = i; i2 < length; i2++) {
                BasicTree.Node find = this.dictionary.find(root, cArr[i2]);
                if (find == null) {
                    break;
                }
                if (find.isTokenEnd()) {
                    handleTokenPerRound(linkedList, getToken(cArr, find, i, i2, true));
                }
                root = find;
            }
            addToGraph(tokenGraph, linkedList);
        }
        tokenGraph.finish();
        return tokenGraph;
    }

    private void addToGraph(TokenGraph tokenGraph, LinkedList<Token> linkedList) {
        Iterator<Token> it = linkedList.iterator();
        while (it.hasNext()) {
            tokenGraph.add(it.next());
        }
    }

    private void handleTokenPerRound(LinkedList<Token> linkedList, Token token) {
        if (linkedList.isEmpty()) {
            linkedList.addLast(token);
        } else if (token.getEndOffset() - token.getStartOffset() > 3) {
            linkedList.addLast(token);
        } else {
            linkedList.clear();
            linkedList.addLast(token);
        }
    }

    public List<Token> getTokens(char[] cArr) {
        checkDict();
        if (cArr == null || cArr.length < 1) {
            return null;
        }
        int length = cArr.length;
        LinkedList linkedList = new LinkedList();
        int i = 0;
        int i2 = length;
        int i3 = 0;
        LinkedList<Token> linkedList2 = new LinkedList<>();
        int i4 = -1;
        int i5 = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            BasicTree.Node root = this.dictionary.getRoot();
            linkedList2.clear();
            boolean z = false;
            i3 = i;
            while (true) {
                if (i3 >= i2) {
                    break;
                }
                BasicTree.Node find = this.dictionary.find(root, cArr[i3]);
                if (find != null) {
                    if (find.isTokenEnd()) {
                        if (ennAndItsPreOrNextAlsoEnn(i, i3, cArr)) {
                            z = true;
                            break;
                        }
                        if (i > i4 + 1) {
                            linkedList.addLast(getToken(cArr, null, i4 + 1, i - 1, false));
                            i4 = i3;
                        }
                        handleTokenPerRound(linkedList2, getToken(cArr, find, i, i3, true));
                        if (i3 > i4) {
                            i4 = i3;
                        }
                    }
                    root = find;
                    i3++;
                } else if (tryExtractEnToken(linkedList2, i, i3, cArr) != -1) {
                    i4 = linkedList2.peekLast().getEndOffset() - 1;
                }
            }
            int[] nextRoundOffset = nextRoundOffset(i, i3, linkedList2, i4, length, z);
            i = nextRoundOffset[0];
            i2 = nextRoundOffset[1];
            if (linkedList2.size() > 0) {
                linkedList.addAll(linkedList2);
            }
            i5++;
            if (i5 > length) {
                this.logger.warn("what?提取词的次数比文本还长?当前文本是:" + new String(cArr));
                break;
            }
        }
        linkedList2.clear();
        int i6 = length - 1;
        if (i6 > i4) {
            linkedList.addLast(getToken(cArr, null, i4 + 1, i6, false));
        }
        return linkedList;
    }

    private boolean isLetterOrNumber(char c) {
        if (c >= '0' && c <= '9') {
            return true;
        }
        if (c < 'A' || c > 'Z') {
            return c >= 'a' && c <= 'z';
        }
        return true;
    }

    private int tryExtractEnToken(LinkedList<Token> linkedList, int i, int i2, char[] cArr) {
        if (linkedList.size() < 1 || i2 < 1 || i2 >= cArr.length) {
            return -1;
        }
        Token peekLast = linkedList.peekLast();
        if (peekLast.getStartOffset() != i || peekLast.getEndOffset() > i2 || !isLetterOrNumber(cArr[i2]) || !isLetterOrNumber(cArr[i2 - 1])) {
            return -1;
        }
        int i3 = 1;
        while (true) {
            i2++;
            if (i2 < cArr.length && isLetterOrNumber(cArr[i2])) {
                i3++;
            }
        }
        handleTokenPerRound(linkedList, getToken(cArr, null, i, i2 - 1, true));
        return i3;
    }

    private boolean ennAndItsPreOrNextAlsoEnn(int i, int i2, char[] cArr) {
        boolean z = false;
        for (int i3 = i; i3 <= i2; i3++) {
            z = isLetterOrNumber(cArr[i3]);
            if (!z) {
                return false;
            }
        }
        int i4 = i == 0 ? -1 : i - 1;
        boolean isLetterOrNumber = i4 != -1 ? isLetterOrNumber(cArr[i4]) : false;
        int i5 = i2 == cArr.length - 1 ? -1 : i2 + 1;
        boolean isLetterOrNumber2 = i5 != -1 ? isLetterOrNumber(cArr[i5]) : false;
        if (z) {
            return isLetterOrNumber || isLetterOrNumber2;
        }
        return false;
    }

    private int[] nextRoundOffset(int i, int i2, LinkedList<Token> linkedList, int i3, int i4, boolean z) {
        int i5;
        int i6 = Integer.MAX_VALUE;
        if (z) {
            i5 = i2 + 2;
        } else if (this.enumerateAll) {
            i5 = i + 1;
        } else if (linkedList.size() > 0) {
            Token last = linkedList.getLast();
            Token first = linkedList.getFirst();
            if (linkedList.size() == 1 || last.getEndOffset() <= i3 || last.getEndOffset() - first.getEndOffset() <= 1) {
                i5 = i3 + 1;
            } else {
                i5 = first.getEndOffset();
                i6 = last.getEndOffset();
            }
        } else {
            i5 = i <= i3 ? i3 + 1 : i + 1;
        }
        return new int[]{i5, Math.min(i6, i4)};
    }

    private Token getToken(char[] cArr, BasicTree.Node node, int i, int i2, boolean z) {
        Token token = new Token();
        token.setValue(new String(cArr, i, (i2 - i) + 1));
        token.setStartOffset(i);
        token.setEndOffset(i2 + 1);
        token.setWord(z);
        if (!z) {
            token.setTypes(Constant.fixed_token_type_not_a_word);
        } else if (node != null) {
            token.setTypes(node.getTypes());
        }
        return token;
    }

    public Trie getDictionary() {
        return this.dictionary;
    }

    public void setDictionary(Trie trie) {
        this.dictionary = trie;
    }

    public boolean isEnumerateAll() {
        return this.enumerateAll;
    }

    public void setEnumerateAll(boolean z) {
        this.enumerateAll = z;
    }
}
