header='infer' in from_single_starsolo function
is there a reason why the default is 'infer'
instead of None
? Because it's infering the first barcode to be a header and function from_quant
is throwing an Error:
ValueError Traceback (most recent call last)
Cell In[5], line 2
1 if path_quant:
----> 2 adata = assembler.from_quant(path_quant, the_10X_yml, dtype='raw', velocity=False)
File <@beartype(sctoolbox.utils.assemblers.from_quant) at 0x7f1efbeecae0>:121, in from_quant(__beartype_get_violation, __beartype_conf, __beartype_object_139768936653696, __beartype_object_139771619770608, __beartype_object_139771649551024, __beartype_object_93835711688832, __beartype_func, *args, **kwargs)
File /mnt/workspace2/yalayou/envs/sctoolbox/lib/python3.11/site-packages/sctoolbox/utils/assemblers.py:220, in from_quant(path, configuration, use_samples, dtype, velocity)
218 logger.info(f"Assembling sample '{sample_name}'")
219 solo_dir = os.path.join(sample_dir, "solo")
--> 220 adata = from_single_starsolo(solo_dir, dtype=dtype, velocity=velocity)
222 # Make barcode index unique
223 adata.obs.index = adata.obs.index + "-" + sample_name
File <@beartype(sctoolbox.utils.assemblers.from_single_starsolo) at 0x7f1efac0d620>:121, in from_single_starsolo(__beartype_get_violation, __beartype_conf, __beartype_object_139771619770608, __beartype_object_139771649551024, __beartype_object_139768928484032, __beartype_object_139771611080624, __beartype_getrandbits, __beartype_object_93835711688832, __beartype_func, *args, **kwargs)
File /mnt/workspace2/yalayou/envs/sctoolbox/lib/python3.11/site-packages/sctoolbox/utils/assemblers.py:137, in from_single_starsolo(path, dtype, header, velocity)
135 # Setup main adata object from matrix/barcodes/genes
136 logger.info("Setting up adata from solo files")
--> 137 adata = from_single_mtx(matrix_f, barcodes_f, genes_f, header=header)
138 adata.var.columns = ["gene", "type"] # specific to the starsolo format
140 # Add in velocity information
File <@beartype(sctoolbox.utils.assemblers.from_single_mtx) at 0x7f1efad8a160>:199, in from_single_mtx(__beartype_object_139769163800896, __beartype_get_violation, __beartype_conf, __beartype_object_139768928603328, __beartype_object_139768928484032, __beartype_object_139771611080624, __beartype_getrandbits, __beartype_object_93835711688832, __beartype_func, *args, **kwargs)
File /mnt/workspace2/yalayou/envs/sctoolbox/lib/python3.11/site-packages/sctoolbox/utils/assemblers.py:314, in from_single_mtx(mtx, barcodes, variables, transpose, header, barcode_index, genes_index, delimiter)
311 raise ValueError("Genes index column does not contain unique values")
313 # Add tables to anndata object
--> 314 adata.obs = barcode_csv
315 if variables:
316 adata.var = var_csv
File /mnt/workspace2/yalayou/envs/sctoolbox/lib/python3.11/site-packages/anndata/_core/anndata.py:851, in AnnData.obs(self, value)
849 @obs.setter
850 def obs(self, value: pd.DataFrame):
--> 851 self._set_dim_df(value, "obs")
File /mnt/workspace2/yalayou/envs/sctoolbox/lib/python3.11/site-packages/anndata/_core/anndata.py:788, in AnnData._set_dim_df(self, value, attr)
786 if not isinstance(value, pd.DataFrame):
787 raise ValueError(f"Can only assign pd.DataFrame to {attr}.")
--> 788 value_idx = self._prep_dim_index(value.index, attr)
789 if self.is_view:
790 self._init_as_actual(self.copy())
File /mnt/workspace2/yalayou/envs/sctoolbox/lib/python3.11/site-packages/anndata/_core/anndata.py:802, in AnnData._prep_dim_index(self, value, attr)
797 """Prepares index to be uses as obs_names or var_names for AnnData object.AssertionError
798
799 If a pd.Index is passed, this will use a reference, otherwise a new index object is created.
800 """
801 if self.shape[attr == "var"] != len(value):
--> 802 raise ValueError(
803 f"Length of passed value for {attr}_names is {len(value)}, but this AnnData has shape: {self.shape}"
804 )
805 if isinstance(value, pd.Index) and not isinstance(
806 value.name, (str, type(None))
807 ):
808 raise ValueError(
809 f"AnnData expects .{attr}.index.name to be a string or None, "
810 f"but you passed a name of type {type(value.name).__name__!r}"
811 )
ValueError: Length of passed value for obs_names is 6794879, but this AnnData has shape: (6794880, 32057)
passing header=None
as parameter in from_single_starsolo
call inside from_quant
function or changing the default to None will fix the problem