import Plot from '@models/Plot';
import { AnalysisParameters, SeuratOverRepresentationAnalysisParameters } from '@models/AnalysisParameters';
import {
    PanglaoShortnames,
    SeuratOverRepresentationAnalysis,
} from '@/src/models/analysis/SeuratOverRepresentationAnalysis';
import React from 'react';
import MethodsSectionTitle from '@components/experiments/methods/MethodsSectionTitle';
import Bold from '@components/elements/Bold';
import CodeBlockInline from '@/src/components/CodeBlockInline';
import { formatStringToNumberWithSeparator } from '@util/StringUtil';
import DotPlotDisplayOption from '@/src/models/plotDisplayOption/DotPlotDisplayOption';
import Experiment from '@/src/models/Experiment';

type Props = { plot: Plot; experiment: Experiment; analysisParameters: AnalysisParameters | null };
const SeuratOverRepresentationAnalysisMethodsSection = ({ plot, experiment, analysisParameters }: Props) => {
    const analysis = plot.analysis as SeuratOverRepresentationAnalysis;
    if (!analysis) {
        return <div>No analysis was found.</div>;
    }

    const display = plot.display as DotPlotDisplayOption;
    const parameters = analysisParameters as SeuratOverRepresentationAnalysisParameters;
    const organismShortname = experiment.organism.shortname;

    let panglaoSentence: string | React.ReactNode | null = null;
    const hasPanglaoShortname = parameters?.gene_set_collections.some(
        (collection) =>
            analysis.gene_set_collections[0] === collection.id && PanglaoShortnames.includes(collection.shortname),
    );

    if (hasPanglaoShortname) {
        panglaoSentence = (
            <>
                The gene sets collection from the PanglaoDB<sup>2</sup> were included in the analysis. False discovery
                rate (FDR) was calculated with Benjamini and Hochberg<sup>3</sup>.
            </>
        );
    } else if (!hasPanglaoShortname) {
        panglaoSentence = (
            <>
                The gene sets collection from the Molecular Signatures Database (MSigDB)<sup>2,3</sup>, curated using
                the <CodeBlockInline>msigdbr</CodeBlockInline> R package<sup>4</sup>, were included in the analysis.
                False discovery rate (FDR) was calculated with Benjamini and Hochberg<sup>5</sup>.
            </>
        );
    }

    return (
        <section>
            <section>
                <MethodsSectionTitle>Over-representation analysis (ORA)</MethodsSectionTitle>
                <p className="mb-6">
                    Over-representation analysis (ORA) was performed with the{' '}
                    <CodeBlockInline>clusterProfiler</CodeBlockInline> R package<sup>1</sup>, which determines the
                    significance of overlap between passing genes and gene sets using a hypergeometric test. Passing
                    genes were defined as those with an adjusted <i>p</i>-value of less than{' '}
                    <Bold>{formatStringToNumberWithSeparator(analysis.adj_pval_de)}</Bold> in the selected comparison.
                    For groups with more than <Bold>{formatStringToNumberWithSeparator(analysis.n_genes)}</Bold>{' '}
                    significant genes, the top <Bold>{formatStringToNumberWithSeparator(analysis.n_genes)}</Bold> genes
                    were chosen by largest fold change following the <i>p</i>-value filter. {panglaoSentence}
                    {organismShortname === 'eastern_spiny_mouse' ? (
                        <>
                            {' '}
                            <i>Acomys dimidiatus</i> Ensembl gene identifiers were appended to <i>Mus musculus</i> gene
                            names of the MSigDB, which were used for ORA.
                        </>
                    ) : null}
                </p>
            </section>
            {display.display_type === 'dot_plot' ? (
                <section>
                    <MethodsSectionTitle>Dot plot</MethodsSectionTitle>
                    <p className="mb-6">
                        Dot plot showing the top <Bold>{display.max_gene_sets}</Bold> most significant gene sets{' '}
                        {display.max_gene_set_group_option === 'any' ? 'for all groups' : 'per group'} following ORA.
                        Dots are colored by the -log10(FDR) and are sized by gene ratio (the fraction of differentially
                        expressed genes found in a given gene set).
                    </p>
                </section>
            ) : null}
            {display.display_type === 'heatmap' ? (
                <section>
                    <MethodsSectionTitle>Heatmap</MethodsSectionTitle>
                    <p className="mb-6">
                        Heatmap showing the -log10(FDR) for the top <Bold>{display.max_gene_sets}</Bold> most
                        significant gene sets{' '}
                        {display.max_gene_set_group_option === 'any' ? 'for all groups' : 'per group'} following ORA.
                    </p>
                </section>
            ) : null}
            {display.display_type === 'score_bar_plot' ? (
                <section>
                    <MethodsSectionTitle>Score barplot</MethodsSectionTitle>
                    <p className="mb-6">
                        Score barplot showing the -log10(FDR) for the top <Bold>{display.max_gene_sets}</Bold> most
                        significant gene sets{' '}
                        {display.max_gene_set_group_option === 'any' ? 'for all groups' : 'per group'} following ORA.
                    </p>
                </section>
            ) : null}
        </section>
    );
};

export default SeuratOverRepresentationAnalysisMethodsSection;
