package picard.sam.markduplicates;

import htsjdk.samtools.DuplicateScoringStrategy;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SortingCollection;
import htsjdk.samtools.util.SortingLongCollection;
import java.io.File;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Priority;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.SamOrBam;
import picard.sam.DuplicationMetrics;
import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram;
import picard.sam.markduplicates.util.DiskBasedReadEndsForMarkDuplicatesMap;
import picard.sam.markduplicates.util.LibraryIdGenerator;
import picard.sam.markduplicates.util.ReadEnds;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicates;
import picard.sam.markduplicates.util.ReadEndsForMarkDuplicatesCodec;

@CommandLineProgramProperties(usage = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged.", usageShort = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules.", programGroup = SamOrBam.class)
/* loaded from: input_file:picard/sam/markduplicates/MarkDuplicates.class */
public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
    private SortingCollection<ReadEndsForMarkDuplicates> pairSort;
    private SortingCollection<ReadEndsForMarkDuplicates> fragSort;
    private SortingLongCollection duplicateIndexes;
    private final Log log = Log.getInstance(MarkDuplicates.class);

    @Option(shortName = "MAX_SEQS", doc = "This option is obsolete. ReadEnds will always be spilled to disk.")
    public int MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = Priority.FATAL_INT;

    @Option(shortName = "MAX_FILE_HANDLES", doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.")
    public int MAX_FILE_HANDLES_FOR_READ_ENDS_MAP = 8000;

    @Option(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections.  If you are running out of memory, try reducing this number.")
    public double SORTING_COLLECTION_SIZE_RATIO = 0.25d;
    private int numDuplicateIndices = 0;
    private LibraryIdGenerator libraryIdGenerator = null;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:picard/sam/markduplicates/MarkDuplicates$ReadEndsMDComparator.class */
    public static class ReadEndsMDComparator implements Comparator<ReadEndsForMarkDuplicates> {
        ReadEndsMDComparator() {
        }

        @Override // java.util.Comparator
        public int compare(ReadEndsForMarkDuplicates readEndsForMarkDuplicates, ReadEndsForMarkDuplicates readEndsForMarkDuplicates2) {
            int i = readEndsForMarkDuplicates.libraryId - readEndsForMarkDuplicates2.libraryId;
            if (i == 0) {
                i = readEndsForMarkDuplicates.read1ReferenceIndex - readEndsForMarkDuplicates2.read1ReferenceIndex;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read1Coordinate - readEndsForMarkDuplicates2.read1Coordinate;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.orientation - readEndsForMarkDuplicates2.orientation;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read2ReferenceIndex - readEndsForMarkDuplicates2.read2ReferenceIndex;
            }
            if (i == 0) {
                i = readEndsForMarkDuplicates.read2Coordinate - readEndsForMarkDuplicates2.read2Coordinate;
            }
            if (i == 0) {
                i = (int) (readEndsForMarkDuplicates.read1IndexInFile - readEndsForMarkDuplicates2.read1IndexInFile);
            }
            if (i == 0) {
                i = (int) (readEndsForMarkDuplicates.read2IndexInFile - readEndsForMarkDuplicates2.read2IndexInFile);
            }
            return i;
        }
    }

    public MarkDuplicates() {
        this.DUPLICATE_SCORING_STRATEGY = DuplicateScoringStrategy.ScoringStrategy.SUM_OF_BASE_QUALITIES;
    }

    public static void main(String[] strArr) {
        new MarkDuplicates().instanceMainWithExit(strArr);
    }

    @Override // picard.cmdline.CommandLineProgram
    protected int doWork() {
        IOUtil.assertFilesAreReadable(this.INPUT);
        IOUtil.assertFileIsWritable(this.OUTPUT);
        IOUtil.assertFileIsWritable(this.METRICS_FILE);
        reportMemoryStats("Start of doWork");
        this.log.info("Reading input file and constructing read end information.");
        buildSortedReadEndLists();
        reportMemoryStats("After buildSortedReadEndLists");
        generateDuplicateIndexes();
        reportMemoryStats("After generateDuplicateIndexes");
        this.log.info("Marking " + this.numDuplicateIndices + " records as duplicates.");
        if (this.READ_NAME_REGEX == null) {
            this.log.warn("Skipped optical duplicate cluster discovery; library size estimation may be inaccurate!");
        } else {
            this.log.info("Found " + this.libraryIdGenerator.getNumberOfOpticalDuplicateClusters() + " optical duplicate clusters.");
        }
        AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator openInputs = openInputs();
        SAMFileHeader sAMFileHeader = openInputs.header;
        SAMFileHeader m860clone = sAMFileHeader.m860clone();
        m860clone.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        Iterator<String> it = this.COMMENT.iterator();
        while (it.hasNext()) {
            m860clone.addComment(it.next());
        }
        Map<String, String> chainedPgIds = getChainedPgIds(m860clone);
        SAMFileWriter makeSAMOrBAMWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(m860clone, true, this.OUTPUT);
        long j = 0;
        long next = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1L;
        ProgressLogger progressLogger = new ProgressLogger(this.log, 10000000, "Written");
        CloseableIterator<SAMRecord> closeableIterator = openInputs.iterator;
        while (closeableIterator.hasNext()) {
            SAMRecord next2 = closeableIterator.next();
            if (!next2.isSecondaryOrSupplementary()) {
                LibraryIdGenerator libraryIdGenerator = this.libraryIdGenerator;
                String libraryName = LibraryIdGenerator.getLibraryName(sAMFileHeader, next2);
                DuplicationMetrics metricsByLibrary = this.libraryIdGenerator.getMetricsByLibrary(libraryName);
                if (metricsByLibrary == null) {
                    metricsByLibrary = new DuplicationMetrics();
                    metricsByLibrary.LIBRARY = libraryName;
                    this.libraryIdGenerator.addMetricsByLibrary(libraryName, metricsByLibrary);
                }
                if (next2.getReadUnmappedFlag()) {
                    metricsByLibrary.UNMAPPED_READS++;
                } else if (!next2.getReadPairedFlag() || next2.getMateUnmappedFlag()) {
                    metricsByLibrary.UNPAIRED_READS_EXAMINED++;
                } else {
                    metricsByLibrary.READ_PAIRS_EXAMINED++;
                }
                if (j == next) {
                    next2.setDuplicateReadFlag(true);
                    if (!next2.getReadPairedFlag() || next2.getMateUnmappedFlag()) {
                        metricsByLibrary.UNPAIRED_READ_DUPLICATES++;
                    } else {
                        metricsByLibrary.READ_PAIR_DUPLICATES++;
                    }
                    next = this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1L;
                } else {
                    next2.setDuplicateReadFlag(false);
                }
            }
            j++;
            if (!this.REMOVE_DUPLICATES || !next2.getDuplicateReadFlag()) {
                if (this.PROGRAM_RECORD_ID != null) {
                    next2.setAttribute(SAMTag.PG.name(), chainedPgIds.get(next2.getStringAttribute(SAMTag.PG.name())));
                }
                makeSAMOrBAMWriter.addAlignment(next2);
                progressLogger.record(next2);
            }
        }
        closeableIterator.close();
        this.duplicateIndexes.cleanup();
        reportMemoryStats("Before output close");
        makeSAMOrBAMWriter.close();
        reportMemoryStats("After output close");
        finalizeAndWriteMetrics(this.libraryIdGenerator);
        return 0;
    }

    long numOpticalDuplicates() {
        return (long) this.libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap().getSumOfValues();
    }

    private void reportMemoryStats(String str) {
        System.gc();
        Runtime runtime = Runtime.getRuntime();
        this.log.info(str + " freeMemory: " + runtime.freeMemory() + "; totalMemory: " + runtime.totalMemory() + "; maxMemory: " + runtime.maxMemory());
    }

    private void buildSortedReadEndLists() {
        int maxMemory = (int) ((Runtime.getRuntime().maxMemory() * this.SORTING_COLLECTION_SIZE_RATIO) / 65.0d);
        this.log.info("Will retain up to " + maxMemory + " data points before spilling to disk.");
        this.pairSort = SortingCollection.newInstance(ReadEndsForMarkDuplicates.class, new ReadEndsForMarkDuplicatesCodec(), new ReadEndsMDComparator(), maxMemory, this.TMP_DIR);
        this.fragSort = SortingCollection.newInstance(ReadEndsForMarkDuplicates.class, new ReadEndsForMarkDuplicatesCodec(), new ReadEndsMDComparator(), maxMemory, this.TMP_DIR);
        AbstractMarkDuplicatesCommandLineProgram.SamHeaderAndIterator openInputs = openInputs();
        SAMFileHeader sAMFileHeader = openInputs.header;
        DiskBasedReadEndsForMarkDuplicatesMap diskBasedReadEndsForMarkDuplicatesMap = new DiskBasedReadEndsForMarkDuplicatesMap(this.MAX_FILE_HANDLES_FOR_READ_ENDS_MAP);
        long j = 0;
        ProgressLogger progressLogger = new ProgressLogger(this.log, 1000000, "Read");
        CloseableIterator<SAMRecord> closeableIterator = openInputs.iterator;
        if (null == this.libraryIdGenerator) {
            this.libraryIdGenerator = new LibraryIdGenerator(sAMFileHeader);
        }
        while (closeableIterator.hasNext()) {
            SAMRecord next = closeableIterator.next();
            if (this.PROGRAM_RECORD_ID != null) {
                this.pgIdsSeen.add(next.getStringAttribute(SAMTag.PG.name()));
            }
            if (next.getReadUnmappedFlag()) {
                if (next.getReferenceIndex().intValue() == -1) {
                    break;
                }
            } else if (!next.isSecondaryOrSupplementary()) {
                ReadEndsForMarkDuplicates buildReadEnds = buildReadEnds(sAMFileHeader, j, next);
                this.fragSort.add(buildReadEnds);
                if (next.getReadPairedFlag() && !next.getMateUnmappedFlag()) {
                    String str = next.getAttribute(ReservedTagConstants.READ_GROUP_ID) + ":" + next.getReadName();
                    ReadEndsForMarkDuplicates remove = diskBasedReadEndsForMarkDuplicatesMap.remove(next.getReferenceIndex().intValue(), str);
                    if (remove == null) {
                        ReadEndsForMarkDuplicates buildReadEnds2 = buildReadEnds(sAMFileHeader, j, next);
                        diskBasedReadEndsForMarkDuplicatesMap.put(buildReadEnds2.read2ReferenceIndex, str, buildReadEnds2);
                    } else {
                        int i = buildReadEnds.read1ReferenceIndex;
                        int i2 = buildReadEnds.read1Coordinate;
                        if (next.getFirstOfPairFlag()) {
                            remove.orientationForOpticalDuplicates = ReadEnds.getOrientationByte(next.getReadNegativeStrandFlag(), remove.orientation == 1);
                        } else {
                            remove.orientationForOpticalDuplicates = ReadEnds.getOrientationByte(remove.orientation == 1, next.getReadNegativeStrandFlag());
                        }
                        if (i > remove.read1ReferenceIndex || (i == remove.read1ReferenceIndex && i2 >= remove.read1Coordinate)) {
                            remove.read2ReferenceIndex = i;
                            remove.read2Coordinate = i2;
                            remove.read2IndexInFile = j;
                            remove.orientation = ReadEnds.getOrientationByte(remove.orientation == 1, next.getReadNegativeStrandFlag());
                        } else {
                            remove.read2ReferenceIndex = remove.read1ReferenceIndex;
                            remove.read2Coordinate = remove.read1Coordinate;
                            remove.read2IndexInFile = remove.read1IndexInFile;
                            remove.read1ReferenceIndex = i;
                            remove.read1Coordinate = i2;
                            remove.read1IndexInFile = j;
                            remove.orientation = ReadEnds.getOrientationByte(next.getReadNegativeStrandFlag(), remove.orientation == 1);
                        }
                        remove.score = (short) (remove.score + DuplicateScoringStrategy.computeDuplicateScore(next, this.DUPLICATE_SCORING_STRATEGY));
                        this.pairSort.add(remove);
                    }
                }
            }
            j++;
            if (progressLogger.record(next)) {
                this.log.info("Tracking " + diskBasedReadEndsForMarkDuplicatesMap.size() + " as yet unmatched pairs. " + diskBasedReadEndsForMarkDuplicatesMap.sizeInRam() + " records in RAM.");
            }
        }
        this.log.info("Read " + j + " records. " + diskBasedReadEndsForMarkDuplicatesMap.size() + " pairs never matched.");
        closeableIterator.close();
        this.pairSort.doneAdding();
        this.fragSort.doneAdding();
    }

    private ReadEndsForMarkDuplicates buildReadEnds(SAMFileHeader sAMFileHeader, long j, SAMRecord sAMRecord) {
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates = new ReadEndsForMarkDuplicates();
        readEndsForMarkDuplicates.read1ReferenceIndex = sAMRecord.getReferenceIndex().intValue();
        readEndsForMarkDuplicates.read1Coordinate = sAMRecord.getReadNegativeStrandFlag() ? sAMRecord.getUnclippedEnd() : sAMRecord.getUnclippedStart();
        readEndsForMarkDuplicates.orientation = sAMRecord.getReadNegativeStrandFlag() ? (byte) 1 : (byte) 0;
        readEndsForMarkDuplicates.read1IndexInFile = j;
        readEndsForMarkDuplicates.score = DuplicateScoringStrategy.computeDuplicateScore(sAMRecord, this.DUPLICATE_SCORING_STRATEGY);
        if (sAMRecord.getReadPairedFlag() && !sAMRecord.getMateUnmappedFlag()) {
            readEndsForMarkDuplicates.read2ReferenceIndex = sAMRecord.getMateReferenceIndex().intValue();
        }
        readEndsForMarkDuplicates.libraryId = this.libraryIdGenerator.getLibraryId(sAMRecord);
        if (this.opticalDuplicateFinder.addLocationInformation(sAMRecord.getReadName(), readEndsForMarkDuplicates)) {
            readEndsForMarkDuplicates.readGroup = (short) 0;
            String str = (String) sAMRecord.getAttribute("RG");
            List<SAMReadGroupRecord> readGroups = sAMFileHeader.getReadGroups();
            if (str != null && readGroups != null) {
                Iterator<SAMReadGroupRecord> it = readGroups.iterator();
                while (it.hasNext() && !it.next().getReadGroupId().equals(str)) {
                    readEndsForMarkDuplicates.readGroup = (short) (readEndsForMarkDuplicates.readGroup + 1);
                }
            }
        }
        return readEndsForMarkDuplicates;
    }

    private void generateDuplicateIndexes() {
        int min = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25d) / 8.0d, 2.147483642E9d);
        this.log.info("Will retain up to " + min + " duplicate indices before spilling to disk.");
        this.duplicateIndexes = new SortingLongCollection(min, (File[]) this.TMP_DIR.toArray(new File[this.TMP_DIR.size()]));
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates = null;
        ArrayList arrayList = new ArrayList(200);
        this.log.info("Traversing read pair information and detecting duplicates.");
        Iterator it = this.pairSort.iterator();
        while (it.hasNext()) {
            ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 = (ReadEndsForMarkDuplicates) it.next();
            if (readEndsForMarkDuplicates == null) {
                readEndsForMarkDuplicates = readEndsForMarkDuplicates2;
                arrayList.add(readEndsForMarkDuplicates);
            } else if (areComparableForDuplicates(readEndsForMarkDuplicates, readEndsForMarkDuplicates2, true)) {
                arrayList.add(readEndsForMarkDuplicates2);
            } else {
                if (arrayList.size() > 1) {
                    markDuplicatePairs(arrayList);
                }
                arrayList.clear();
                arrayList.add(readEndsForMarkDuplicates2);
                readEndsForMarkDuplicates = readEndsForMarkDuplicates2;
            }
        }
        if (arrayList.size() > 1) {
            markDuplicatePairs(arrayList);
        }
        this.pairSort.cleanup();
        this.pairSort = null;
        this.log.info("Traversing fragment information and detecting duplicates.");
        boolean z = false;
        boolean z2 = false;
        Iterator it2 = this.fragSort.iterator();
        while (it2.hasNext()) {
            ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 = (ReadEndsForMarkDuplicates) it2.next();
            if (readEndsForMarkDuplicates == null || !areComparableForDuplicates(readEndsForMarkDuplicates, readEndsForMarkDuplicates3, false)) {
                if (arrayList.size() > 1 && z2) {
                    markDuplicateFragments(arrayList, z);
                }
                arrayList.clear();
                arrayList.add(readEndsForMarkDuplicates3);
                readEndsForMarkDuplicates = readEndsForMarkDuplicates3;
                z = readEndsForMarkDuplicates3.isPaired();
                z2 = !readEndsForMarkDuplicates3.isPaired();
            } else {
                arrayList.add(readEndsForMarkDuplicates3);
                z = z || readEndsForMarkDuplicates3.isPaired();
                z2 = z2 || !readEndsForMarkDuplicates3.isPaired();
            }
        }
        markDuplicateFragments(arrayList, z);
        this.fragSort.cleanup();
        this.fragSort = null;
        this.log.info("Sorting list of duplicate records.");
        this.duplicateIndexes.doneAddingStartIteration();
    }

    private boolean areComparableForDuplicates(ReadEndsForMarkDuplicates readEndsForMarkDuplicates, ReadEndsForMarkDuplicates readEndsForMarkDuplicates2, boolean z) {
        boolean z2 = readEndsForMarkDuplicates.libraryId == readEndsForMarkDuplicates2.libraryId && readEndsForMarkDuplicates.read1ReferenceIndex == readEndsForMarkDuplicates2.read1ReferenceIndex && readEndsForMarkDuplicates.read1Coordinate == readEndsForMarkDuplicates2.read1Coordinate && readEndsForMarkDuplicates.orientation == readEndsForMarkDuplicates2.orientation;
        if (z2 && z) {
            z2 = readEndsForMarkDuplicates.read2ReferenceIndex == readEndsForMarkDuplicates2.read2ReferenceIndex && readEndsForMarkDuplicates.read2Coordinate == readEndsForMarkDuplicates2.read2Coordinate;
        }
        return z2;
    }

    private void addIndexAsDuplicate(long j) {
        this.duplicateIndexes.add(j);
        this.numDuplicateIndices++;
    }

    private void markDuplicatePairs(List<ReadEndsForMarkDuplicates> list) {
        short s = 0;
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates = null;
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 : list) {
            if (readEndsForMarkDuplicates2.score > s || readEndsForMarkDuplicates == null) {
                s = readEndsForMarkDuplicates2.score;
                readEndsForMarkDuplicates = readEndsForMarkDuplicates2;
            }
        }
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 : list) {
            if (readEndsForMarkDuplicates3 != readEndsForMarkDuplicates) {
                addIndexAsDuplicate(readEndsForMarkDuplicates3.read1IndexInFile);
                addIndexAsDuplicate(readEndsForMarkDuplicates3.read2IndexInFile);
            }
        }
        if (this.READ_NAME_REGEX != null) {
            AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(list, this.opticalDuplicateFinder, this.libraryIdGenerator);
        }
    }

    private void markDuplicateFragments(List<ReadEndsForMarkDuplicates> list, boolean z) {
        if (z) {
            for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates : list) {
                if (!readEndsForMarkDuplicates.isPaired()) {
                    addIndexAsDuplicate(readEndsForMarkDuplicates.read1IndexInFile);
                }
            }
            return;
        }
        short s = 0;
        ReadEndsForMarkDuplicates readEndsForMarkDuplicates2 = null;
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates3 : list) {
            if (readEndsForMarkDuplicates3.score > s || readEndsForMarkDuplicates2 == null) {
                s = readEndsForMarkDuplicates3.score;
                readEndsForMarkDuplicates2 = readEndsForMarkDuplicates3;
            }
        }
        for (ReadEndsForMarkDuplicates readEndsForMarkDuplicates4 : list) {
            if (readEndsForMarkDuplicates4 != readEndsForMarkDuplicates2) {
                addIndexAsDuplicate(readEndsForMarkDuplicates4.read1IndexInFile);
            }
        }
    }
}
