package edu.mit.csail.cgs.tools.sgd2ucsc;

import edu.mit.csail.cgs.datasets.species.Genome;
import edu.mit.csail.cgs.datasets.species.Organism;
import edu.mit.csail.cgs.tools.sgd2ucsc.SGDGFFParser;
import edu.mit.csail.cgs.utils.NotFoundException;
import edu.mit.csail.cgs.utils.database.DatabaseFactory;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Vector;
import org.apache.batik.util.XMLConstants;
import org.biojava.bio.program.gff.GFFRecord;
import org.biojava.bio.seq.StrandedFeature;

/* loaded from: input_file:edu/mit/csail/cgs/tools/sgd2ucsc/UCSCTableAssembler.class */
public class UCSCTableAssembler {
    private SGDGFFParser gffParser = new SGDGFFParser();
    private UniProtGeneMap uniprotMap;
    private LinkedList<SGDOther> sgdOthers;
    private LinkedList<SGDGene> sgdGenes;
    private static int[] romvals = null;
    private static BinCalculator binCalc = new BinCalculator();
    private static Map<String, String> gfftype2ucsc = new HashMap();

    /* loaded from: input_file:edu/mit/csail/cgs/tools/sgd2ucsc/UCSCTableAssembler$SGDGene.class */
    public static class SGDGene {
        public String name;
        public String chrom;
        public boolean strand;
        public int txStart;
        public int txEnd;
        public int cdsStart;
        public int cdsEnd;
        public int exonCount;
        public Vector<Integer> exonStarts = new Vector<>();
        public Vector<Integer> exonEnds = new Vector<>();
        public String protID;

        public void insertIntoDB(PreparedStatement preparedStatement) throws SQLException {
            preparedStatement.setString(1, this.name);
            preparedStatement.setString(2, this.chrom);
            preparedStatement.setString(3, UCSCTableAssembler.translateStrand(this.strand));
            preparedStatement.setInt(4, this.txStart);
            preparedStatement.setInt(5, this.txEnd);
            preparedStatement.setInt(6, this.cdsStart);
            preparedStatement.setInt(7, this.cdsEnd);
            preparedStatement.setInt(8, this.exonCount);
            String exonStartsString = getExonStartsString();
            preparedStatement.setCharacterStream(9, (Reader) new StringReader(exonStartsString), exonStartsString.length());
            String exonEndsString = getExonEndsString();
            preparedStatement.setCharacterStream(10, (Reader) new StringReader(exonEndsString), exonEndsString.length());
            preparedStatement.setString(11, this.protID);
            preparedStatement.executeUpdate();
        }

        public SGDGene(SGDGFFParser.GeneFeatures geneFeatures, String str) {
            this.name = geneFeatures.geneAttrs.get("ID").get(0);
            this.chrom = UCSCTableAssembler.translateChromName(geneFeatures.gene.getSeqName());
            this.txStart = geneFeatures.gene.getStart();
            this.txEnd = geneFeatures.gene.getEnd();
            this.protID = str;
            this.strand = geneFeatures.gene.getStrand() == StrandedFeature.POSITIVE;
            this.exonCount = 0;
            if (geneFeatures.cds.size() <= 0) {
                this.cdsStart = this.txStart;
                this.cdsEnd = this.txEnd;
                return;
            }
            this.cdsStart = this.txEnd;
            this.cdsEnd = this.txStart;
            Iterator<GFFRecord> it = geneFeatures.cds.iterator();
            while (it.hasNext()) {
                GFFRecord next = it.next();
                int start = next.getStart();
                int end = next.getEnd();
                this.exonStarts.add(Integer.valueOf(start));
                this.exonEnds.add(Integer.valueOf(end));
                this.exonCount++;
                this.cdsStart = Math.min(this.cdsStart, start);
                this.cdsEnd = Math.max(this.cdsEnd, end);
            }
        }

        public String getExonStartsString() {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < this.exonCount; i++) {
                sb.append(this.exonStarts.get(i) + ",");
            }
            return sb.toString();
        }

        public String getExonEndsString() {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < this.exonCount; i++) {
                sb.append(this.exonEnds.get(i) + ",");
            }
            return sb.toString();
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("SGDGene");
            sb.append(" " + this.chrom + ":" + this.txStart + "-" + this.txEnd + " " + this.cdsStart + ":" + this.cdsEnd);
            sb.append(" " + this.name + "/" + this.protID);
            sb.append(" " + UCSCTableAssembler.translateStrand(this.strand));
            sb.append(" " + this.exonCount);
            for (int i = 0; i < this.exonCount; i++) {
                sb.append(" " + this.exonStarts.get(i) + "-" + this.exonEnds.get(i));
            }
            return sb.toString();
        }
    }

    /* loaded from: input_file:edu/mit/csail/cgs/tools/sgd2ucsc/UCSCTableAssembler$SGDOther.class */
    public static class SGDOther {
        public int bin;
        public String chrom;
        public int chromStart;
        public int chromEnd;
        public String name;
        public boolean strand;
        public String type;

        public void insertIntoDB(PreparedStatement preparedStatement) throws SQLException {
            preparedStatement.setInt(1, this.bin);
            preparedStatement.setString(2, this.chrom);
            preparedStatement.setInt(3, this.chromStart);
            preparedStatement.setInt(4, this.chromEnd);
            preparedStatement.setString(5, this.name);
            preparedStatement.setString(6, UCSCTableAssembler.translateStrand(this.strand));
            preparedStatement.setString(7, this.type);
            preparedStatement.executeUpdate();
        }

        public SGDOther(SGDGFFParser.GeneFeatures geneFeatures) {
            if (geneFeatures.gene.getFeature().equals("pseudogene")) {
                this.type = "CDS:pseudogene";
            } else {
                this.type = "Dubious:CDS";
            }
            this.bin = UCSCTableAssembler.binCalc.getBinFromRange(geneFeatures.gene.getStart(), geneFeatures.gene.getEnd());
            this.chrom = UCSCTableAssembler.translateChromName(geneFeatures.gene.getSeqName());
            this.chromStart = geneFeatures.gene.getStart();
            this.chromEnd = geneFeatures.gene.getEnd();
            this.name = geneFeatures.geneAttrs.get("ID").get(0);
            this.strand = geneFeatures.gene.getStrand() == StrandedFeature.POSITIVE;
        }

        public SGDOther(GFFRecord gFFRecord) {
            Map<String, List<String>> decodeAttrMap = SGDGFFParser.decodeAttrMap(gFFRecord);
            this.bin = UCSCTableAssembler.binCalc.getBinFromRange(gFFRecord.getStart(), gFFRecord.getEnd());
            this.chrom = UCSCTableAssembler.translateChromName(gFFRecord.getSeqName());
            this.chromStart = gFFRecord.getStart();
            this.chromEnd = gFFRecord.getEnd();
            this.name = decodeAttrMap.get("Name").get(0);
            this.strand = gFFRecord.getStrand() == StrandedFeature.POSITIVE;
            if (!UCSCTableAssembler.gfftype2ucsc.containsKey(gFFRecord.getFeature())) {
                throw new IllegalArgumentException(gFFRecord.getFeature());
            }
            this.type = (String) UCSCTableAssembler.gfftype2ucsc.get(gFFRecord.getFeature());
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("SGDOther");
            sb.append(" " + this.chrom + ":" + this.chromStart + "-" + this.chromEnd);
            sb.append(" " + this.name);
            sb.append(" " + UCSCTableAssembler.translateStrand(this.strand));
            sb.append(" \"" + this.type + XMLConstants.XML_DOUBLE_QUOTE);
            return sb.toString();
        }
    }

    public static void main(String[] strArr) {
        File file = strArr.length > 0 ? new File(strArr[0]) : new File("C:\\Documents and Settings\\tdanford\\Desktop\\sacCer1.gff");
        File file2 = strArr.length > 0 ? new File(strArr[1]) : new File("C:\\Documents and Settings\\tdanford\\Desktop\\dbxref.tab");
        try {
            Genome findGenome = Organism.findGenome("SGDv1");
            new UCSCTableAssembler(file, file2).insertIntoDB(findGenome);
            new SgdToNameTable(file).populateTable(findGenome);
        } catch (NotFoundException e) {
            e.printStackTrace();
        } catch (IOException e2) {
            e2.printStackTrace();
        } catch (SQLException e3) {
            e3.printStackTrace();
        }
    }

    public UCSCTableAssembler(File file, File file2) throws IOException {
        this.uniprotMap = new UniProtGeneMap(file2);
        this.gffParser.parseInputFile(file);
        this.sgdOthers = new LinkedList<>();
        this.sgdGenes = new LinkedList<>();
        populateSGDLists();
    }

    public void printSGDLines(PrintStream printStream) {
        Iterator<SGDGene> it = this.sgdGenes.iterator();
        while (it.hasNext()) {
            printStream.println(it.next().toString());
        }
        Iterator<SGDOther> it2 = this.sgdOthers.iterator();
        while (it2.hasNext()) {
            printStream.println(it2.next().toString());
        }
    }

    public void insertIntoDB(Genome genome) {
        try {
            Connection ucscConnection = genome.getUcscConnection();
            insertIntoDB(ucscConnection);
            DatabaseFactory.freeConnection(ucscConnection);
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }

    public void insertIntoDB(Connection connection) throws SQLException {
        connection.setAutoCommit(false);
        Statement createStatement = connection.createStatement();
        createStatement.executeUpdate("delete from sgdGene");
        createStatement.executeUpdate("delete from sgdOther");
        connection.commit();
        createStatement.close();
        PreparedStatement prepareSGDGeneInsert = prepareSGDGeneInsert(connection);
        PreparedStatement prepareSGDOtherInsert = prepareSGDOtherInsert(connection);
        Iterator<SGDGene> it = this.sgdGenes.iterator();
        while (it.hasNext()) {
            it.next().insertIntoDB(prepareSGDGeneInsert);
        }
        System.out.println("Inserted " + this.sgdGenes.size() + " sgdGene Entries.");
        connection.commit();
        Iterator<SGDOther> it2 = this.sgdOthers.iterator();
        while (it2.hasNext()) {
            it2.next().insertIntoDB(prepareSGDOtherInsert);
        }
        System.out.println("Inserted " + this.sgdOthers.size() + " sgdOther Entries.");
        connection.commit();
        prepareSGDGeneInsert.close();
        prepareSGDOtherInsert.close();
        connection.setAutoCommit(true);
    }

    public void populateSGDLists() {
        this.sgdGenes.clear();
        this.sgdOthers.clear();
        LinkedList linkedList = new LinkedList();
        Iterator<String> it = this.gffParser.geneFeatures.keySet().iterator();
        while (it.hasNext()) {
            SGDGFFParser.GeneFeatures geneFeatures = this.gffParser.geneFeatures.get(it.next());
            if (geneFeatures.gene.getFeature().equals("transposable_element_gene")) {
                String str = geneFeatures.geneAttrs.get("ID").get(0);
                SGDGene sGDGene = new SGDGene(geneFeatures, this.uniprotMap.containsGeneName(str) ? this.uniprotMap.getUniprot(str) : "n/a");
                if (sGDGene.exonCount > 0) {
                    linkedList.addLast(sGDGene);
                }
                this.sgdGenes.addLast(sGDGene);
            } else if (geneFeatures.gene.getFeature().equals("pseudogene")) {
                this.sgdOthers.addLast(new SGDOther(geneFeatures));
            } else if (geneFeatures.geneAttrs.get("orf_classification").get(0).equals("Verified")) {
                String str2 = geneFeatures.geneAttrs.get("ID").get(0);
                SGDGene sGDGene2 = new SGDGene(geneFeatures, this.uniprotMap.containsGeneName(str2) ? this.uniprotMap.getUniprot(str2) : "n/a");
                if (sGDGene2.exonCount > 0) {
                    linkedList.addLast(sGDGene2);
                }
                this.sgdGenes.addLast(sGDGene2);
            } else {
                this.sgdOthers.addLast(new SGDOther(geneFeatures));
            }
        }
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        Iterator<GFFRecord> it2 = this.gffParser.otherRecords.iterator();
        while (it2.hasNext()) {
            GFFRecord next = it2.next();
            if (gfftype2ucsc.containsKey(next.getFeature())) {
                SGDOther sGDOther = new SGDOther(next);
                hashSet.add(next.getFeature());
                this.sgdOthers.addLast(sGDOther);
            } else {
                hashSet2.add(next.getFeature());
            }
        }
        System.out.println("# Other records: " + this.gffParser.otherRecords.size());
        System.out.println("\tRecognized Other Types: " + hashSet);
        System.out.println("\tUnrecognized Other Types: " + hashSet2);
        System.out.println("# Exon-Genes: " + linkedList.size());
    }

    public static String fixChrom(String str) {
        int i;
        if (romvals == null) {
            romvals = new int[Character.getNumericValue('Z')];
            romvals[Character.getNumericValue('X')] = 10;
            romvals[Character.getNumericValue('V')] = 5;
            romvals[Character.getNumericValue('I')] = 1;
        }
        String str2 = str;
        if (str2.matches("^[cC][hH][rR].*")) {
            str2 = str2.substring(3);
        }
        if (str2.matches("^Mito$")) {
            return "M";
        }
        if (str2.matches("^2-micron$")) {
            return "2micron";
        }
        if (!str2.matches("^[XVI]+$")) {
            if (str2.matches("^[1234567890MUXY]+(_random)?[LRh]?$")) {
                return str2;
            }
            throw new NumberFormatException("Can't fix chrom name " + str + "," + str2);
        }
        int i2 = 0;
        boolean z = false;
        if (str2.matches("_random$")) {
            z = true;
            str2.replaceFirst("_random$", "");
        }
        char charAt = str2.charAt(0);
        int i3 = romvals[Character.getNumericValue(charAt)];
        int i4 = i3;
        for (int i5 = 1; i5 < str2.length(); i5++) {
            char charAt2 = str2.charAt(i5);
            int i6 = romvals[Character.getNumericValue(charAt2)];
            if (i6 > i3) {
                i2 += i6 - i3;
                i = 0;
            } else if (charAt2 != charAt) {
                i2 += i4;
                i = i6;
            } else {
                i = i4 + i6;
            }
            i4 = i;
            charAt = charAt2;
            i3 = i6;
        }
        int i7 = i2 + i4;
        return z ? Integer.toString(i7) + "_random" : Integer.toString(i7);
    }

    public static String translateChromName(String str) {
        return "chr" + fixChrom(str);
    }

    public static String translateStrand(boolean z) {
        return z ? "+" : "-";
    }

    public static PreparedStatement prepareSGDOtherInsert(Connection connection) throws SQLException {
        return connection.prepareStatement("insert into sgdOther (bin, chrom, chromStart, chromEnd, name, score, strand, type) values (?, ?, ?, ?, ?, 0, ?, ?)");
    }

    public static PreparedStatement prepareSGDGeneInsert(Connection connection) throws SQLException {
        return connection.prepareStatement("insert into sgdGene (name, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, exonCount, exonStarts, exonEnds, proteinID) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
    }

    static {
        gfftype2ucsc.put("telomere", "Telomeric Region");
        gfftype2ucsc.put("tRNA", "tRNA");
        gfftype2ucsc.put("snoRNA", "snoRNA");
        gfftype2ucsc.put("snRNA", "snRNA");
        gfftype2ucsc.put("rRNA", "rRNA");
        gfftype2ucsc.put("ncRNA", "RNA");
        gfftype2ucsc.put("centromere", "CEN");
        gfftype2ucsc.put("transposable_element", "Transposon");
    }
}
