Skip to content

Dynamic Factor Model

Module for Dynamic Factor ModelRunner

ModelRunner

A class for running dynamic factor models on batches of data.

Parameters: - ad (AnnData): The AnnData object containing the data. - outdir (Path, optional): The output directory for saving the results. Defaults to "./output". - batch (str, optional): The batch column in the AnnData object. Defaults to None.

Attributes: - ad (AnnData): The AnnData object containing the data. - outdir (Path): The output directory for saving the results. - batch (str): The batch column in the AnnData object. - batches (dict[str, AnnData]): A dictionary of batches extracted from the AnnData object. - results (list): A list to store the results of each model run. - failures (dict): A dictionary to store any failures that occur during model runs.

Source code in dfmdash/dfm.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
class ModelRunner:
    """
    A class for running dynamic factor models on batches of data.

    Parameters:
    - ad (AnnData): The AnnData object containing the data.
    - outdir (Path, optional): The output directory for saving the results. Defaults to "./output".
    - batch (str, optional): The batch column in the AnnData object. Defaults to None.

    Attributes:
    - ad (AnnData): The AnnData object containing the data.
    - outdir (Path): The output directory for saving the results.
    - batch (str): The batch column in the AnnData object.
    - batches (dict[str, AnnData]): A dictionary of batches extracted from the AnnData object.
    - results (list): A list to store the results of each model run.
    - failures (dict): A dictionary to store any failures that occur during model runs.
    """

    def __init__(self, ad: AnnData, outdir: Path = Path("./output"), batch: Optional[str] = None):
        self.ad = ad
        self.outdir = outdir
        self.batch = batch
        self.batches: dict[str, AnnData] = self.get_batches()
        self.results = []
        self.failures = {}

    def __repr__(self):
        return f"ModelRunner(ad={self.ad}, outdir={self.outdir}, batch={self.batch})"

    def run(self, maxiter=10_000, global_multiplier=1, columns: Optional[list[str]] = None) -> "ModelRunner":
        """
        Run the dynamic factor models on the batches of data.

        Parameters:
        - maxiter (int, optional): The maximum number of iterations for model fitting. Defaults to 10,000.
        - global_multiplier (int, optional): A global multiplier for the model. Defaults to 1.
        - columns (list[str], optional): The columns to include in the model. Defaults to None.

        Returns:
        - ModelRunner: The ModelRunner object.

        Raises:
        - Exception: If an error occurs during model fitting.
        """
        self.outdir.mkdir(exist_ok=True)
        print(f"{len(self.batches)} batches to run")
        for batch_name, batch in track(list(self.batches.items())):
            data = DataProcessor(batch, global_multiplier, maxiter).process(columns)
            data.write(self.outdir / batch_name) if batch_name else data.write(self.outdir)
            model = sm.tsa.DynamicFactorMQ(data.df, factors=data.factors, factor_multiplicities=data.multiplicities)
            try:
                res = model.fit(disp=10, maxiter=data.maxiter)
            except Exception as e:
                print(f"[bold red]FAILURE[/]{e}")
                self.failures[batch_name] = e
                continue
            filtered_factors = process_factors(res.factors["filtered"], data.raw, batch.obs)
            result = Result(batch_name, res, model, filtered_factors)
            result.write(self.outdir)
            # self.ad.uns["factors"] = result.factors.drop(columns="Time")
            # TODO: Fix this. Tests need this present but the dashboard doesn't
            try:
                self.ad.obs = self.ad.obs.drop(columns="Time")
                self.ad.write(self.outdir / batch_name / "data.h5ad")
            except:
                pass
            self.results.append(result)
        # TODO: Concat factors across batch variables
        print("All runs completed!")
        return self

    def write_failures(self):
        """
        Write the failures to a file.

        The failures are written to a file named "failed.txt" in the output directory.
        Each line in the file contains the batch name and the corresponding failure message.
        """
        for name, failure in self.failures.items():
            with open(self.outdir / "failed.txt", "a") as f:
                f.write(f"{name}\t{failure}\n")

    def get_batches(self) -> dict[str, AnnData]:
        """
        Get batches from AnnData object.

        Returns:
        - dict[str, AnnData]: A dictionary of batches extracted from the AnnData object.
        """
        if not self.batch:
            return {None: self.ad}  # Didn't know you could use None as a key, cool
        return {x: self.ad[self.ad.obs[self.batch] == x] for x in self.ad.obs[self.batch].unique()}

get_batches()

Get batches from AnnData object.

Returns: - dict[str, AnnData]: A dictionary of batches extracted from the AnnData object.

Source code in dfmdash/dfm.py
130
131
132
133
134
135
136
137
138
139
def get_batches(self) -> dict[str, AnnData]:
    """
    Get batches from AnnData object.

    Returns:
    - dict[str, AnnData]: A dictionary of batches extracted from the AnnData object.
    """
    if not self.batch:
        return {None: self.ad}  # Didn't know you could use None as a key, cool
    return {x: self.ad[self.ad.obs[self.batch] == x] for x in self.ad.obs[self.batch].unique()}

run(maxiter=10000, global_multiplier=1, columns=None)

Run the dynamic factor models on the batches of data.

Parameters: - maxiter (int, optional): The maximum number of iterations for model fitting. Defaults to 10,000. - global_multiplier (int, optional): A global multiplier for the model. Defaults to 1. - columns (list[str], optional): The columns to include in the model. Defaults to None.

Returns: - ModelRunner: The ModelRunner object.

Raises: - Exception: If an error occurs during model fitting.

Source code in dfmdash/dfm.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def run(self, maxiter=10_000, global_multiplier=1, columns: Optional[list[str]] = None) -> "ModelRunner":
    """
    Run the dynamic factor models on the batches of data.

    Parameters:
    - maxiter (int, optional): The maximum number of iterations for model fitting. Defaults to 10,000.
    - global_multiplier (int, optional): A global multiplier for the model. Defaults to 1.
    - columns (list[str], optional): The columns to include in the model. Defaults to None.

    Returns:
    - ModelRunner: The ModelRunner object.

    Raises:
    - Exception: If an error occurs during model fitting.
    """
    self.outdir.mkdir(exist_ok=True)
    print(f"{len(self.batches)} batches to run")
    for batch_name, batch in track(list(self.batches.items())):
        data = DataProcessor(batch, global_multiplier, maxiter).process(columns)
        data.write(self.outdir / batch_name) if batch_name else data.write(self.outdir)
        model = sm.tsa.DynamicFactorMQ(data.df, factors=data.factors, factor_multiplicities=data.multiplicities)
        try:
            res = model.fit(disp=10, maxiter=data.maxiter)
        except Exception as e:
            print(f"[bold red]FAILURE[/]{e}")
            self.failures[batch_name] = e
            continue
        filtered_factors = process_factors(res.factors["filtered"], data.raw, batch.obs)
        result = Result(batch_name, res, model, filtered_factors)
        result.write(self.outdir)
        # self.ad.uns["factors"] = result.factors.drop(columns="Time")
        # TODO: Fix this. Tests need this present but the dashboard doesn't
        try:
            self.ad.obs = self.ad.obs.drop(columns="Time")
            self.ad.write(self.outdir / batch_name / "data.h5ad")
        except:
            pass
        self.results.append(result)
    # TODO: Concat factors across batch variables
    print("All runs completed!")
    return self

write_failures()

Write the failures to a file.

The failures are written to a file named "failed.txt" in the output directory. Each line in the file contains the batch name and the corresponding failure message.

Source code in dfmdash/dfm.py
119
120
121
122
123
124
125
126
127
128
def write_failures(self):
    """
    Write the failures to a file.

    The failures are written to a file named "failed.txt" in the output directory.
    Each line in the file contains the batch name and the corresponding failure message.
    """
    for name, failure in self.failures.items():
        with open(self.outdir / "failed.txt", "a") as f:
            f.write(f"{name}\t{failure}\n")

Result dataclass

Represents the result of a dynamic factor model analysis.

Attributes:

Name Type Description
name Optional[str]

Name of the batch if batch is specified.

result DynamicFactor

The dynamic factor model result.

model DynamicFactorMQ

The dynamic factor model.

factors DataFrame

The factors obtained from the analysis.

Source code in dfmdash/dfm.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@dataclass
class Result:
    """
    Represents the result of a dynamic factor model analysis.

    Attributes:
        name (Optional[str]): Name of the batch if batch is specified.
        result (sm.tsa.DynamicFactor): The dynamic factor model result.
        model (sm.tsa.DynamicFactorMQ): The dynamic factor model.
        factors (pd.DataFrame): The factors obtained from the analysis.
    """

    name: Optional[str]
    result: sm.tsa.DynamicFactor
    model: sm.tsa.DynamicFactorMQ
    factors: pd.DataFrame

    def write(self, outdir: Path):
        """
        Writes the model summary, result summary, and factors to CSV files.

        Args:
            outdir (Path): The output directory where the files will be written.
        """
        out = outdir / self.name if self.name else outdir
        with open(out / "model.csv", "w") as f:
            f.write(self.model.summary().as_csv())
        with open(out / "results.csv", "w") as f:
            f.write(self.result.summary().as_csv())
        self.factors.to_csv(out / "factors.csv")

write(outdir)

Writes the model summary, result summary, and factors to CSV files.

Parameters:

Name Type Description Default
outdir Path

The output directory where the files will be written.

required
Source code in dfmdash/dfm.py
33
34
35
36
37
38
39
40
41
42
43
44
45
def write(self, outdir: Path):
    """
    Writes the model summary, result summary, and factors to CSV files.

    Args:
        outdir (Path): The output directory where the files will be written.
    """
    out = outdir / self.name if self.name else outdir
    with open(out / "model.csv", "w") as f:
        f.write(self.model.summary().as_csv())
    with open(out / "results.csv", "w") as f:
        f.write(self.result.summary().as_csv())
    self.factors.to_csv(out / "factors.csv")

process_factors(factors, raw, obs)

Process factors by merging them with raw and obs dataframes.

Parameters:

Name Type Description Default
factors DataFrame

The factors dataframe.

required
raw DataFrame

The raw dataframe.

required
obs DataFrame

The obs dataframe.

required

Returns:

Type Description
DataFrame

pd.DataFrame: The merged factors dataframe.

Source code in dfmdash/dfm.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def process_factors(factors: pd.DataFrame, raw: pd.DataFrame, obs: pd.DataFrame) -> pd.DataFrame:
    """
    Process factors by merging them with raw and obs dataframes.

    Args:
        factors (pd.DataFrame): The factors dataframe.
        raw (pd.DataFrame): The raw dataframe.
        obs (pd.DataFrame): The obs dataframe.

    Returns:
        pd.DataFrame: The merged factors dataframe.
    """
    factors.index = raw.index
    factors = factors.merge(raw, left_index=True, right_index=True)
    factors.columns = [f"Factor_{x}" for x in factors.columns]
    if not obs.empty:
        factors = factors.merge(obs, left_index=True, right_index=True)
    return factors