package edu.stanford.nlp.trees.international.negra;

import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.french.FrenchTreeReader;
import edu.stanford.nlp.util.Filter;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:stanford-parser.jar:edu/stanford/nlp/trees/international/negra/NegraPennTreeNormalizer.class */
public class NegraPennTreeNormalizer extends TreeNormalizer {
    private final int nodeCleanup;
    private static final String nonUnaryRoot = "NUR";
    protected final TreebankLanguagePack tlp;
    private boolean insertNPinPP;
    private final Filter<Tree> emptyFilter;
    private final Filter<Tree> aOverAFilter;
    private static final String junkCPP = "---CJ";
    private static final String cpp = "CPP";
    private Set<String> prepositionTags;
    private Set<String> postpositionTags;
    private static final long serialVersionUID = 8529514903815041064L;

    public NegraPennTreeNormalizer() {
        this(new NegraPennLanguagePack());
    }

    public NegraPennTreeNormalizer(TreebankLanguagePack treebankLanguagePack) {
        this(treebankLanguagePack, 0);
    }

    public NegraPennTreeNormalizer(TreebankLanguagePack treebankLanguagePack, int i) {
        this.insertNPinPP = false;
        this.prepositionTags = new HashSet(Arrays.asList("APPR", "APPRART"));
        this.postpositionTags = new HashSet(Arrays.asList("APPO", "APZR"));
        this.tlp = treebankLanguagePack;
        this.nodeCleanup = i;
        this.emptyFilter = new Filter<Tree>() { // from class: edu.stanford.nlp.trees.international.negra.NegraPennTreeNormalizer.1
            private static final long serialVersionUID = -606371737889816130L;

            @Override // edu.stanford.nlp.util.Filter
            public boolean accept(Tree tree) {
                Tree[] children = tree.children();
                Label label = tree.label();
                return label == null || label.value() == null || !label.value().matches("^\\*T.*$") || tree.isLeaf() || children.length != 1 || !children[0].isLeaf();
            }
        };
        this.aOverAFilter = new Filter<Tree>() { // from class: edu.stanford.nlp.trees.international.negra.NegraPennTreeNormalizer.2
            private static final long serialVersionUID = -606371737889816130L;

            @Override // edu.stanford.nlp.util.Filter
            public boolean accept(Tree tree) {
                return tree.isLeaf() || tree.isPreTerminal() || tree.children().length != 1 || tree.label() == null || !tree.label().equals(tree.children()[0].label());
            }
        };
    }

    public String rootSymbol() {
        return this.tlp.startSymbol();
    }

    public String nonUnaryRootSymbol() {
        return nonUnaryRoot;
    }

    public void setInsertNPinPP(boolean z) {
        this.insertNPinPP = z;
    }

    public boolean getInsertNPinPP() {
        return this.insertNPinPP;
    }

    @Override // edu.stanford.nlp.trees.TreeNormalizer
    public String normalizeTerminal(String str) {
        return str.intern();
    }

    @Override // edu.stanford.nlp.trees.TreeNormalizer
    public String normalizeNonterminal(String str) {
        if (junkCPP.equals(str)) {
            str = cpp;
        }
        String cleanUpLabel = cleanUpLabel(str);
        if (cleanUpLabel == null) {
            return null;
        }
        return cleanUpLabel.intern();
    }

    private Tree fixNonUnaryRoot(Tree tree, TreeFactory treeFactory) {
        List<Tree> childrenAsList = tree.getChildrenAsList();
        if (childrenAsList.size() == 2 && tree.firstChild().isPhrasal() && this.tlp.isSentenceFinalPunctuationTag(tree.lastChild().value())) {
            List<Tree> childrenAsList2 = tree.firstChild().getChildrenAsList();
            childrenAsList2.add(tree.lastChild());
            tree.firstChild().setChildren(childrenAsList2);
            childrenAsList.remove(childrenAsList.size() - 1);
            tree.setChildren(childrenAsList);
            tree.setValue(this.tlp.startSymbol());
        } else {
            tree.setValue(nonUnaryRoot);
            tree = treeFactory.newTreeNode(this.tlp.startSymbol(), Collections.singletonList(tree));
        }
        return tree;
    }

    @Override // edu.stanford.nlp.trees.TreeNormalizer
    public Tree normalizeWholeTree(Tree tree, TreeFactory treeFactory) {
        String value;
        if (tree.value() == null) {
            tree = fixNonUnaryRoot(tree, treeFactory);
        } else if (!tree.value().equals(this.tlp.startSymbol())) {
            tree = treeFactory.newTreeNode(this.tlp.startSymbol(), Collections.singletonList(tree));
        }
        Tree spliceOut = tree.prune(this.emptyFilter, treeFactory).spliceOut(this.aOverAFilter, treeFactory);
        if (this.insertNPinPP) {
            insertNPinPPall(spliceOut);
        }
        Iterator<Tree> it = spliceOut.iterator();
        while (it.hasNext()) {
            Tree next = it.next();
            if (!next.isLeaf() && !next.isPreTerminal()) {
                if (next.value() == null || next.value().equals("")) {
                    next.setValue(FrenchTreeReader.MISSING_POS);
                }
                if (!next.value().matches("--.*") && ((value = next.value()) == null || value.equals(""))) {
                    if (next.numChildren() == 3 && next.firstChild().label().value().equals("NN") && next.getChild(1).label().value().equals("$.")) {
                        System.err.println("Correcting treebank error: giving phrase label DL to " + next);
                        next.label().setValue("DL");
                    }
                }
            }
        }
        return spliceOut;
    }

    private void insertNPinPPall(Tree tree) {
        for (Tree tree2 : tree.children()) {
            insertNPinPPall(tree2);
        }
        insertNPinPP(tree);
    }

    private void insertNPinPP(Tree tree) {
        if (this.tlp.basicCategory(tree.label().value()).equals("PP")) {
            Tree[] children = tree.children();
            int i = 0;
            int length = children.length - 1;
            while (i < length && this.prepositionTags.contains(this.tlp.basicCategory(children[i].label().value()))) {
                i++;
            }
            while (i < length && this.postpositionTags.contains(this.tlp.basicCategory(children[length].label().value()))) {
                length--;
            }
            if (i > length) {
                System.err.println("##### Warning -- no NP material here!");
                return;
            }
            int i2 = (length - i) + 1;
            Tree[] treeArr = new Tree[i2];
            System.arraycopy(children, i, treeArr, 0, i2);
            Tree newTreeNode = tree.treeFactory().newTreeNode(tree.label().labelFactory().newLabel("NP"), Arrays.asList(treeArr));
            Tree[] treeArr2 = new Tree[(children.length - i2) + 1];
            System.arraycopy(children, 0, treeArr2, 0, i + 1);
            treeArr2[i] = newTreeNode;
            System.arraycopy(children, length + 1, treeArr2, i + 1, (children.length - length) - 1);
            tree.setChildren(treeArr2);
            System.out.println("#### inserted NP in PP");
            tree.pennPrint();
        }
    }

    protected String cleanUpLabel(String str) {
        return this.nodeCleanup == 1 ? this.tlp.categoryAndFunction(str) : this.nodeCleanup == 2 ? this.tlp.basicCategory(str) : str;
    }
}
