package edu.stanford.nlp.international.french.pipeline;

import edu.stanford.nlp.process.treebank.AbstractDataset;
import edu.stanford.nlp.process.treebank.DefaultMapper;
import edu.stanford.nlp.process.treebank.StringMap;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory;
import edu.stanford.nlp.trees.international.french.FrenchTreebankLanguagePack;
import edu.stanford.nlp.trees.tregex.TregexParseException;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;

/* loaded from: input_file:stanford-parser.jar:edu/stanford/nlp/international/french/pipeline/FTBDataset.class */
public class FTBDataset extends AbstractDataset {
    public FTBDataset() {
        this.treebank = new MemoryTreebank(new FrenchTreeReaderFactory(), FrenchTreebankLanguagePack.FTB_ENCODING);
        this.treeFileExtension = "xml";
    }

    @Override // edu.stanford.nlp.process.treebank.AbstractDataset, edu.stanford.nlp.process.treebank.Dataset
    public void build() {
        for (File file : this.pathsToData) {
            int size = this.treebank.size();
            if (this.splitFilter == null) {
                this.treebank.loadPath(file, this.treeFileExtension, false);
            } else {
                this.treebank.loadPath(file, this.splitFilter);
            }
            this.toStringBuffer.append(String.format(" Loaded %d trees from %s\n", Integer.valueOf(this.treebank.size() - size), file.getPath()));
        }
        PrintWriter printWriter = null;
        PrintWriter printWriter2 = null;
        try {
            try {
                try {
                    printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.outFileName), "UTF-8")));
                    printWriter2 = this.makeFlatFile ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(this.flatFileName), "UTF-8"))) : null;
                    this.outputFileList.add(this.outFileName);
                    if (this.makeFlatFile) {
                        this.outputFileList.add(this.flatFileName);
                        this.toStringBuffer.append(" Made flat files\n");
                    }
                    preprocessMWEs();
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(TregexPattern.compile("@SENT <: @PUNC"));
                    arrayList.add(TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __"));
                    arrayList.add(TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC <3 @PUNC <4 @PUNC !<5 __"));
                    Iterator<Tree> it = this.treebank.iterator();
                    while (it.hasNext()) {
                        Tree next = it.next();
                        boolean z = false;
                        Iterator it2 = arrayList.iterator();
                        while (it2.hasNext()) {
                            z = ((TregexPattern) it2.next()).matcher(next).find();
                            if (z) {
                                break;
                            }
                        }
                        if (z) {
                            System.err.println("Discarding tree: " + next.toString());
                        } else {
                            if (this.customTreeVisitor != null) {
                                this.customTreeVisitor.visitTree(next);
                            }
                            printWriter.println(next.toString());
                            if (this.makeFlatFile) {
                                printWriter2.println(this.removeEscapeTokens ? ATBTreeUtils.unEscape(ATBTreeUtils.flattenTree(next)) : ATBTreeUtils.flattenTree(next));
                            }
                        }
                    }
                    if (printWriter != null) {
                        printWriter.close();
                    }
                    if (printWriter2 != null) {
                        printWriter2.close();
                    }
                } catch (TregexParseException e) {
                    System.err.printf("%s: Could not compile Tregex expressions%n", getClass().getName());
                    e.printStackTrace();
                    if (printWriter != null) {
                        printWriter.close();
                    }
                    if (printWriter2 != null) {
                        printWriter2.close();
                    }
                }
            } catch (FileNotFoundException e2) {
                System.err.printf("%s: Could not open %s for writing%n", getClass().getName(), this.outFileName);
                if (printWriter != null) {
                    printWriter.close();
                }
                if (printWriter2 != null) {
                    printWriter2.close();
                }
            } catch (UnsupportedEncodingException e3) {
                System.err.printf("%s: Filesystem does not support UTF-8 output%n", getClass().getName());
                e3.printStackTrace();
                if (printWriter != null) {
                    printWriter.close();
                }
                if (printWriter2 != null) {
                    printWriter2.close();
                }
            }
        } catch (Throwable th) {
            if (printWriter != null) {
                printWriter.close();
            }
            if (printWriter2 != null) {
                printWriter2.close();
            }
            throw th;
        }
    }

    private void preprocessMWEs() {
        TwoDimensionalCounter twoDimensionalCounter = new TwoDimensionalCounter();
        TwoDimensionalCounter twoDimensionalCounter2 = new TwoDimensionalCounter();
        TwoDimensionalCounter twoDimensionalCounter3 = new TwoDimensionalCounter();
        TwoDimensionalCounter twoDimensionalCounter4 = new TwoDimensionalCounter();
        TwoDimensionalCounter twoDimensionalCounter5 = new TwoDimensionalCounter();
        Iterator<Tree> it = this.treebank.iterator();
        while (it.hasNext()) {
            MWEPreprocessor.countMWEStatistics(it.next(), twoDimensionalCounter5, twoDimensionalCounter3, twoDimensionalCounter4, twoDimensionalCounter, twoDimensionalCounter2);
        }
        Iterator<Tree> it2 = this.treebank.iterator();
        while (it2.hasNext()) {
            MWEPreprocessor.traverseAndFix(it2.next(), twoDimensionalCounter4, twoDimensionalCounter5);
        }
    }

    @Override // edu.stanford.nlp.process.treebank.AbstractDataset, edu.stanford.nlp.process.treebank.Dataset
    public boolean setOptions(StringMap stringMap) {
        boolean options = super.setOptions(stringMap);
        if (this.lexMapper == null) {
            this.lexMapper = new DefaultMapper();
            this.lexMapper.setup(null, this.lexMapOptions.split(","));
        }
        if (this.pathsToMappings.size() != 0) {
            if (this.posMapper == null) {
                this.posMapper = new DefaultMapper();
            }
            Iterator<File> it = this.pathsToMappings.iterator();
            while (it.hasNext()) {
                this.posMapper.setup(it.next(), new String[0]);
            }
        }
        return options;
    }
}
