package edu.mit.csail.cgs.deepseq.discovery;

import edu.mit.csail.cgs.datasets.chipseq.ChipSeqLocator;
import edu.mit.csail.cgs.datasets.species.Genome;
import edu.mit.csail.cgs.datasets.species.Organism;
import edu.mit.csail.cgs.deepseq.DeepSeqExpt;
import edu.mit.csail.cgs.deepseq.analysis.ChIAPET_analysis;
import edu.mit.csail.cgs.deepseq.analysis.GPS_ReadDistribution;
import edu.mit.csail.cgs.deepseq.analysis.MotifScan;
import edu.mit.csail.cgs.deepseq.analysis.TFBS_SpaitialAnalysis;
import edu.mit.csail.cgs.deepseq.discovery.kmer.KMAC;
import edu.mit.csail.cgs.deepseq.discovery.kmer.KMAC0;
import edu.mit.csail.cgs.deepseq.utilities.BEDFileWriter;
import edu.mit.csail.cgs.deepseq.utilities.CommonUtils;
import edu.mit.csail.cgs.metagenes.MetaMaker;
import edu.mit.csail.cgs.tools.utils.Args;
import edu.mit.csail.cgs.utils.NotFoundException;
import edu.mit.csail.cgs.utils.Pair;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import org.apache.batik.util.SVGConstants;
import org.apache.batik.util.XMLConstants;

/* loaded from: input_file:edu/mit/csail/cgs/deepseq/discovery/GEM.class */
public class GEM {
    public static final String GEM_VERSION = "3.0";
    private String[] args;
    private Genome genome;
    private KPPMixture mixture;

    public GEM(String[] strArr) throws NotFoundException {
        String replaceFirst;
        this.args = strArr;
        Set<String> parseFlags = Args.parseFlags(strArr);
        if (parseFlags.contains("help")) {
            printHelp();
            System.exit(1);
        }
        Pair<Organism, Genome> parseGenome = Args.parseGenome(strArr);
        if (parseGenome != null) {
            this.genome = parseGenome.cdr();
        } else {
            String parseString = Args.parseString(strArr, SVGConstants.SVG_G_TAG, null);
            if (parseString != null) {
                this.genome = new Genome("Genome", new File(parseString), true);
                if (this.genome.getChromLengthMap().isEmpty()) {
                    System.err.println("Your genome chromosome information in --g " + parseString + " is empty, or has wrong file format.\n");
                    printError();
                    System.exit(1);
                }
            } else {
                this.genome = null;
            }
        }
        String parseString2 = Args.parseString(strArr, SVGConstants.SVG_D_ATTRIBUTE, null);
        if (parseString2 == null) {
            System.err.println("The read distribution file is required. Use --d option.\n");
            printError();
            System.exit(1);
        } else if (!new File(parseString2).isFile()) {
            System.err.println("\nCannot find read distribution file!");
            System.exit(1);
        }
        ArrayList arrayList = new ArrayList();
        long currentTimeMillis = System.currentTimeMillis();
        ArrayList arrayList2 = new ArrayList();
        Vector vector = new Vector();
        for (String str : strArr) {
            if (str.contains("expt") && !vector.contains(str)) {
                vector.add(str);
            }
        }
        if (vector.size() == 0) {
            System.err.println("Error: No signal experiments provided.\nUse the --expt option.");
            printError();
            System.exit(1);
        }
        if (this.genome == null) {
            System.out.println("Estimating chromosome sizes from all read files (skip this step by adding a --g option)...\n");
            HashMap hashMap = new HashMap();
            Iterator it = vector.iterator();
            while (it.hasNext()) {
                String str2 = (String) it.next();
                if (!str2.startsWith("--rf") && !str2.startsWith("--rdb")) {
                    String replaceFirst2 = str2.replaceFirst("--expt", "");
                    List<File> parseFileHandles = Args.parseFileHandles(strArr, "expt" + replaceFirst2);
                    List<File> parseFileHandles2 = Args.parseFileHandles(strArr, "ctrl" + replaceFirst2);
                    String upperCase = Args.parseString(strArr, "f", "BED").toUpperCase();
                    upperCase = upperCase.equals("BAM") ? "SAM" : upperCase;
                    if (parseFileHandles.size() > 0) {
                        DeepSeqExpt deepSeqExpt = new DeepSeqExpt(parseFileHandles, upperCase);
                        DeepSeqExpt deepSeqExpt2 = new DeepSeqExpt(parseFileHandles2, upperCase);
                        Map<String, Integer> chromLengthMap = deepSeqExpt.getGenome().getChromLengthMap();
                        for (String str3 : chromLengthMap.keySet()) {
                            if (!hashMap.containsKey(str3) || ((Integer) hashMap.get(str3)).intValue() < chromLengthMap.get(str3).intValue() + 1000) {
                                hashMap.put(str3, Integer.valueOf(chromLengthMap.get(str3).intValue() + 1000));
                            }
                        }
                        Map<String, Integer> chromLengthMap2 = deepSeqExpt2.getGenome().getChromLengthMap();
                        for (String str4 : chromLengthMap2.keySet()) {
                            if (!hashMap.containsKey(str4) || ((Integer) hashMap.get(str4)).intValue() < chromLengthMap2.get(str4).intValue()) {
                                hashMap.put(str4, chromLengthMap2.get(str4));
                            }
                        }
                    }
                }
            }
            this.genome = new Genome("Genome", hashMap);
        }
        Iterator it2 = vector.iterator();
        while (it2.hasNext()) {
            String str5 = (String) it2.next();
            if (str5.startsWith("--rf")) {
                arrayList2.add(str5.replaceFirst("--rfexpt", ""));
            } else {
                System.out.println("Loading data...");
                if (str5.startsWith("--rdb")) {
                    replaceFirst = str5.replaceFirst("--rdbexpt", "");
                    arrayList2.add(replaceFirst);
                } else {
                    replaceFirst = str5.replaceFirst("--expt", "");
                    arrayList2.add(replaceFirst);
                }
                if (replaceFirst.length() > 0) {
                    System.out.println("    loading condition: " + replaceFirst);
                }
                List<ChipSeqLocator> parseChipSeq = Args.parseChipSeq(strArr, "rdbexpt" + replaceFirst);
                List<ChipSeqLocator> parseChipSeq2 = Args.parseChipSeq(strArr, "rdbctrl" + replaceFirst);
                List<File> parseFileHandles3 = Args.parseFileHandles(strArr, "expt" + replaceFirst);
                List<File> parseFileHandles4 = Args.parseFileHandles(strArr, "ctrl" + replaceFirst);
                boolean contains = parseFlags.contains("nonunique");
                String upperCase2 = Args.parseString(strArr, "f", "BED").toUpperCase();
                upperCase2 = upperCase2.equals("BAM") ? "SAM" : upperCase2;
                if (parseFileHandles3.size() > 0 && parseChipSeq.size() == 0) {
                    arrayList.add(new Pair(new DeepSeqExpt(this.genome, parseFileHandles3, contains, upperCase2, -1), new DeepSeqExpt(this.genome, parseFileHandles4, contains, upperCase2, -1)));
                } else if (parseChipSeq.size() <= 0 || parseFileHandles3.size() != 0) {
                    System.err.println("Must provide either an aligner output file or Gifford lab DB experiment name for the signal experiment (but not both)");
                    printError();
                    System.exit(1);
                    System.out.println("    done: " + CommonUtils.timeElapsed(currentTimeMillis));
                } else {
                    if (this.genome == null) {
                        System.err.println("Error: the genome must be defined in order to use the Gifford Lab DB.");
                        System.exit(1);
                    }
                    arrayList.add(new Pair(new DeepSeqExpt(this.genome, parseChipSeq, "readdb", -1), new DeepSeqExpt(this.genome, parseChipSeq2, "readdb", -1)));
                }
            }
        }
        try {
            this.mixture = new KPPMixture(this.genome, arrayList, arrayList2, strArr);
        } catch (Exception e) {
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                Pair pair = (Pair) it3.next();
                ((DeepSeqExpt) pair.car()).closeLoaders();
                ((DeepSeqExpt) pair.cdr()).closeLoaders();
            }
            e.printStackTrace();
        }
    }

    public void runMixtureModel(boolean z) {
        Set<String> parseFlags = Args.parseFlags(this.args);
        int parseInteger = Args.parseInteger(this.args, "r_gps", 2);
        int parseInteger2 = Args.parseInteger(this.args, "r_gem", 2);
        int parseInteger3 = Args.parseInteger(this.args, "d_l", 300);
        int parseInteger4 = Args.parseInteger(this.args, "d_r", 200);
        String parseString = Args.parseString(this.args, "kf", null);
        boolean contains = parseFlags.contains("not_update_model");
        String outName = this.mixture.getOutName();
        String name = new File(outName).getName();
        int i = 0;
        if (parseString == null && !contains) {
            this.mixture.setOutName(outName + "_0");
            System.out.println("\n============================ Round 0 ============================");
            this.mixture.execute();
            if (!contains) {
                if (Args.parseFlags(this.args).contains("constant_model_range")) {
                    this.mixture.updateBindingModel(-this.mixture.getModel().getMin(), this.mixture.getModel().getMax(), outName + "_" + (0 + 1));
                } else {
                    Pair<Integer, Integer> newEnds = this.mixture.getModel().getNewEnds(parseInteger3, parseInteger4);
                    this.mixture.updateBindingModel(newEnds.car().intValue(), newEnds.cdr().intValue(), outName + "_" + (0 + 1));
                }
            }
            if (Args.parseFlags(this.args).contains("process_all_regions")) {
                this.mixture.printFeatures(0);
                this.mixture.printFilteredFeatures(0);
                this.mixture.printInsignificantFeatures(0);
                if (Args.parseFlags(this.args).contains("refine_regions")) {
                    this.mixture.refineRegions();
                }
            }
            this.mixture.releaseMemory();
        }
        while (i + 1 < parseInteger) {
            i++;
            System.out.println("\n============================ Round " + i + " ============================");
            this.mixture.setOutName(outName + "_" + i);
            this.mixture.execute();
            if (!contains) {
                this.mixture.updateBindingModel(-this.mixture.getModel().getMin(), this.mixture.getModel().getMax(), outName + "_" + (i + 1));
            }
            this.mixture.printFeatures(i);
            this.mixture.printFilteredFeatures(i);
            this.mixture.printInsignificantFeatures(i);
            this.mixture.releaseMemory();
        }
        String absolutePath = new File(new File(outName).getParentFile().getParentFile(), name).getAbsolutePath();
        CommonUtils.copyFile(outName + "_" + i + ".GEM_events.txt", absolutePath + ".GPS_events.txt");
        if (Args.parseFlags(this.args).contains("outNP")) {
            CommonUtils.copyFile(outName + "_" + i + ".GEM_events.narrowPeak", absolutePath + ".GPS_events.narrowPeak");
        }
        if (z) {
            int initKMAC = this.mixture.initKMAC();
            if (initKMAC < 0) {
                this.mixture.plotAllReadDistributions(this.mixture.allModels, this.mixture.outName);
                this.mixture.closeLogFile();
                if (initKMAC == -1) {
                    System.out.println("\nMotif can not be found!\n\nGPS analysis results are printed to:");
                } else if (initKMAC == -2) {
                    System.out.println("\nBinding event can not be found!\n\nGPS analysis results are printed to:");
                }
                System.out.println(absolutePath + ".GPS_events.txt\n" + absolutePath + "_result.htm\n" + absolutePath + "_outputs (folder with all other files)\n");
                String str = name + "_outputs/" + name + "_" + i + ".results.htm";
                CommonUtils.writeFile(absolutePath + ".results.htm", "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'><html><head><title>Redirect</title><meta http-equiv='REFRESH' content='0;url=" + str + "'></HEAD><BODY>If your browser did not redirect, <a href='" + str + "'>click here for GPS Result</a>.</BODY></HTML>");
                return;
            }
            for (int i2 = 1; i2 < parseInteger2; i2++) {
                i++;
                System.out.println("\n============================ Round " + i + " ============================");
                this.mixture.setOutName(outName + "_" + i);
                this.mixture.execute();
                if (!contains) {
                    this.mixture.updateBindingModel(-this.mixture.getModel().getMin(), this.mixture.getModel().getMax(), outName + "_" + (i + 1));
                }
                this.mixture.printFeatures(i);
                this.mixture.printFilteredFeatures(i);
                this.mixture.printInsignificantFeatures(i);
                this.mixture.releaseMemory();
                this.mixture.runKMAC(Args.parseInteger(this.args, "k_win", 61));
            }
            int parseInteger5 = Args.parseInteger(this.args, "k_win2", -1);
            if (parseInteger5 != -1) {
                System.out.println("\n============== Finding motif for " + name + "_" + (i + 1) + ", window size=" + parseInteger5 + " =============\n");
                this.mixture.setOutName(outName + "_" + (i + 1));
                this.mixture.runKMAC(parseInteger5);
            }
        }
        this.mixture.plotAllReadDistributions(this.mixture.allModels, this.mixture.outName);
        this.mixture.closeLogFile();
        if (!z) {
            System.out.println("\nFinished! GPS analysis results are printed to:\n" + absolutePath + ".GPS_events.txt\n" + absolutePath + "_outputs (folder with all other files)\n");
            CommonUtils.copyFile(outName + "_" + i + ".GEM_events.txt", absolutePath + ".GPS_events.txt");
            if (Args.parseFlags(this.args).contains("outNP")) {
                CommonUtils.copyFile(outName + "_" + i + ".GEM_events.narrowPeak", absolutePath + ".GPS_events.narrowPeak");
                return;
            }
            return;
        }
        System.out.println("\nFinished! GEM analysis results are printed to:\n" + absolutePath + ".GEM_events.txt\n" + absolutePath + ".results.htm\n" + absolutePath + "_outputs (folder with all other files)\n");
        CommonUtils.copyFile(outName + "_" + i + ".GEM_events.txt", absolutePath + ".GEM_events.txt");
        if (Args.parseFlags(this.args).contains("outNP")) {
            CommonUtils.copyFile(outName + "_" + i + ".GEM_events.narrowPeak", absolutePath + ".GEM_events.narrowPeak");
        }
        String str2 = name + "_outputs/" + name + "_" + i + ".results.htm";
        CommonUtils.writeFile(absolutePath + ".results.htm", "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.0 Transitional//EN'><html><head><title>Redirect</title><meta http-equiv='REFRESH' content='0;url=" + str2 + "'></HEAD><BODY>If your browser did not redirect, <a href='" + str2 + "'>click here for GEM Result</a>.</BODY></HTML>");
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length == 0) {
            System.out.println("\nGEM (version 3.0)!");
            System.out.println("\nPlease cite: \nYuchun Guo, Shaun Mahony, David K. Gifford (2012) PLoS Computational Biology 8(8): e1002638. \nHigh Resolution Genome Wide Binding Event Finding and Motif Discovery Reveals Transcription Factor Spatial Binding Constraints. \ndoi:10.1371/journal.pcbi.1002638\n");
            System.out.println("Gifford Laboratory at MIT (http://cgs.csail.mit.edu/gem/).\n");
            printHelp();
            System.exit(-1);
        }
        String str = strArr[0];
        if (str.equalsIgnoreCase("KMAC0")) {
            KMAC0.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("KMAC")) {
            KMAC.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("KSM")) {
            MotifScan.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("CPC")) {
            ChIAPET_analysis.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("RMD") || str.equalsIgnoreCase("RPD")) {
            TFBS_SpaitialAnalysis.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("RDIST")) {
            GPS_ReadDistribution.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("get_bed")) {
            BEDFileWriter.main(strArr);
            return;
        }
        if (str.equalsIgnoreCase("line_plot")) {
            MetaMaker.main(strArr);
            return;
        }
        long currentTimeMillis = System.currentTimeMillis();
        System.out.println("\nGEM (version 3.0)!");
        System.out.println("\nPlease cite: \nYuchun Guo, Shaun Mahony, David K. Gifford (2012) PLoS Computational Biology 8(8): e1002638. \nHigh Resolution Genome Wide Binding Event Finding and Motif Discovery Reveals Transcription Factor Spatial Binding Constraints. \ndoi:10.1371/journal.pcbi.1002638\n");
        System.out.println("Gifford Laboratory at MIT (http://cgs.csail.mit.edu/gem/).\n");
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss");
        System.out.println("----------------------------------\n\nStart time: " + simpleDateFormat.format(new Date()) + "\n");
        boolean z = false;
        if (Args.parseInteger(strArr, SVGConstants.SVG_K_ATTRIBUTE, -1) == -1 && Args.parseInteger(strArr, "k_min", -1) == -1 && Args.parseInteger(strArr, "kmin", -1) == -1 && Args.parseString(strArr, SVGConstants.SVG_SEED_ATTRIBUTE, null) == null) {
            System.err.println("Warning: GEM did not see options (--k, --k_min & --k_max, or --seed) to run motif discovery. It will run GPS and stop!");
        } else {
            z = true;
        }
        GEM gem = new GEM(strArr);
        gem.runMixtureModel(z);
        gem.close();
        System.out.println("----------------------------------\n\nEnd time: " + simpleDateFormat.format(new Date()));
        System.out.println("\nTotal running time: " + CommonUtils.timeElapsed(currentTimeMillis) + "\n");
    }

    public static void printHelp() {
        System.err.print("GEM command line options (see more options at our website)\n   Required parameters:\n      --d <read spatial distribution file>\n      --exptX <aligned read file for expt (X is condition name)>\n   Required GEM motif discovery parameters, optional for GPS-only analysis:\n      --k <length of the k-mer for motif finding, use --k or (--kmin & --kmax)>\n      --k_min <min value of k, e.g. 6>\n      --k_max <max value of k, e.g. 13>\n      --seed <exact k-mer string to jump start k-mer set motif discovery>\n      --genome <the path to the genome sequence directory, for motif finding>\n   Optional parameters:\n      --ctrlX <aligned reads file for ctrl (for each condition, ctrlX should match exptX)>\n      --g <genome chrom.sizes file with chr name/length pairs>\n      --f <read file format, BED/SAM/BOWTIE/ELAND/NOVO (default BED)>\n      --s <size of mappable genome in bp (default is estimated from genome chrom sizes)>\n      --a <minimum alpha value for sparse prior (default is esitmated from the whole dataset coverage)>\n      --q <significance level for q-value, specify as -log10(q-value) (default=2, q-value=0.01)>\n      --t <maximum number of threads to run GEM in paralell (default=#CPU)>\n      --out <output folder name and file name prefix>\n      --k_seqs <number of binding events to use for motif discovery (default=5000)>\n   Optional flags: \n      --fa use a fixed user-specified alpha value for all the regions\n      --help print this help information and exit\n\nExample: java -Xmx10G -jar gem.jar --d Read_Distribution_default.txt --g mm8.chrom.sizes --genome your_path/mm8 --s 2000000000 --expt SRX000540_mES_CTCF.bed --ctrl SRX000543_mES_GFP.bed --f BED --out mouseCTCF --k_min 6 --k_max 13\n");
    }

    public void printError() {
        printHelp();
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("\nYour input options are:\n");
        for (String str : this.args) {
            if (str.trim().indexOf(" ") != -1) {
                stringBuffer.append(XMLConstants.XML_DOUBLE_QUOTE).append(str).append("\" ");
            } else {
                stringBuffer.append(str).append(" ");
            }
        }
        System.err.println(stringBuffer.toString() + "\n");
    }

    public void close() {
        this.mixture.cleanup();
    }
}
