package com.zhidian.analysis;

import com.zhidian.analysis.utils.tree.BasicTree;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;

/* loaded from: input_file:com/zhidian/analysis/SentenceTextHandler.class */
public class SentenceTextHandler extends TextHandler {
    private int startOffset;
    private int currentPosition;
    private Trie ambiguousDict;
    private BasicTree.Node ambiguousParent;
    private int ambiguousStart;
    private AtomicBoolean everMatchAmbiguous;
    private String ambiguousToken;
    private String ambiguousTokenType;

    public SentenceTextHandler(TextHandlerSharedAttribute textHandlerSharedAttribute) {
        super(textHandlerSharedAttribute);
        this.startOffset = 0;
        this.currentPosition = 0;
        this.ambiguousDict = FileDictionaryHandler.ambiguous;
        this.ambiguousParent = this.ambiguousDict.getRoot();
        this.ambiguousStart = 0;
        this.everMatchAmbiguous = new AtomicBoolean(false);
        this.ambiguousToken = null;
        this.ambiguousTokenType = null;
    }

    @Override // com.zhidian.analysis.TextHandler
    public boolean incrementToken() throws IOException {
        return hasSentence();
    }

    private boolean hasSentence() throws IOException {
        int tokenLength;
        if (this.input == null) {
            return false;
        }
        this.attribute.clearAttributes();
        if (this.everMatchAmbiguous.get()) {
            fillAmbiguousAttr();
            resetAmbiguousInfo();
            return true;
        }
        this.startOffset = this.currentPosition;
        while (true) {
            int read = this.input.read();
            if (read == -1) {
                break;
            }
            this.currentPosition++;
            if (!AnalysisUtil.isSentencePunctuation(read) && !Character.isWhitespace(read) && !Character.isSpaceChar(read)) {
                int lowerCase = Character.toLowerCase(read);
                this.attribute.tokenAppend((char) lowerCase);
                if (tryExtractAmbiguous((char) lowerCase)) {
                    break;
                }
            } else {
                if (this.attribute.getTokenLength() > 0) {
                    break;
                }
                this.startOffset = this.currentPosition;
            }
        }
        if (this.attribute.getTokenLength() <= 0) {
            return false;
        }
        int i = -1;
        if (!this.everMatchAmbiguous.get()) {
            tokenLength = this.startOffset + this.attribute.getTokenLength();
        } else {
            if (this.startOffset == this.startOffset + this.ambiguousStart) {
                fillAmbiguousAttr();
                resetAmbiguousInfo();
                return true;
            }
            tokenLength = this.startOffset + this.ambiguousStart;
            i = tokenLength - this.startOffset;
            try {
                this.ambiguousToken = this.attribute.substring(this.ambiguousStart);
            } catch (Exception e) {
                System.out.println("end:" + tokenLength + "\tstartOffset:" + this.startOffset + "\tattr:" + this.attribute.getTokenString());
            }
        }
        this.attribute.setType("sentence");
        if (i != -1) {
            this.attribute.setLength(i);
        }
        this.attribute.setStartOffset(this.startOffset);
        this.attribute.setEndOffset(tokenLength);
        return true;
    }

    @Override // com.zhidian.analysis.TextHandler
    public void reset() throws IOException {
        super.reset();
        this.currentPosition = 0;
        this.startOffset = 0;
        resetAmbiguousInfo();
    }

    private boolean tryExtractAmbiguous(char c) {
        BasicTree.Node find = this.ambiguousDict.find(this.ambiguousParent, c);
        if (find == null) {
            this.ambiguousParent = this.ambiguousDict.getRoot();
            this.ambiguousStart = -1;
            return false;
        }
        this.ambiguousParent = find;
        if (this.ambiguousStart == -1) {
            this.ambiguousStart = (this.currentPosition - this.startOffset) - 1;
        }
        if (!find.isTokenEnd()) {
            return false;
        }
        this.everMatchAmbiguous.compareAndSet(false, true);
        this.ambiguousTokenType = find.getTypes();
        this.ambiguousToken = this.attribute.substring(this.ambiguousStart);
        return true;
    }

    private void resetAmbiguousInfo() {
        this.ambiguousParent = this.ambiguousDict.getRoot();
        this.ambiguousStart = 0;
        this.everMatchAmbiguous.set(false);
        this.ambiguousToken = null;
        this.ambiguousTokenType = null;
    }

    private void fillAmbiguousAttr() {
        this.attribute.clearAttributes();
        this.attribute.tokenAppend(this.ambiguousToken);
        this.attribute.setStartOffset(this.startOffset + this.ambiguousStart);
        this.attribute.setEndOffset(this.currentPosition);
        this.attribute.setWord(true);
        this.attribute.setType(this.ambiguousTokenType);
    }
}
