Source code for rush.convert

"""
Conversion utilities for molecular structure file formats.

Format parsing and writing are backed by the native libqdx Rust library.
"""

import json as std_json
from collections.abc import Sequence
from pathlib import Path
from typing import TypeGuard

import libqdx

from ..mol import TRC
from .json import from_json, to_dict


[docs] def from_pdb(pdb_content: str) -> TRC | list[TRC]: """Parse PDB file content into TRC structures. Args: pdb_content: Raw PDB file text. Returns: A single TRC if the file contains one model, otherwise a list of TRCs. """ trcs = libqdx.from_pdb(pdb_content) return trcs[0] if len(trcs) == 1 else trcs
[docs] def to_pdb(trc: TRC) -> str: """Convert a TRC structure to PDB format text. Args: trc: TRC structure to serialise. Returns: PDB-formatted string (includes trailing END record). """ return libqdx.to_pdb(trc)
[docs] def from_mmcif(mmcif_content: str) -> TRC | list[TRC]: """Parse mmCIF file contents into TRC structures. Args: mmcif_content: Raw mmCIF file text. Returns: A single TRC if the file contains one model, otherwise a list of TRCs. """ trcs = libqdx.from_mmcif(mmcif_content) return trcs[0] if len(trcs) == 1 else trcs
[docs] def from_sdf(sdf_content: str) -> TRC | list[TRC]: """Parse SDF file contents into TRC structures. Args: sdf_content: Raw SDF / MOL file text. Returns: A single TRC if the file contains one molecule, otherwise a list of TRCs. """ trcs = libqdx.from_sdf(sdf_content) return trcs[0] if len(trcs) == 1 else trcs
[docs] def load_structure(file_path: str | Path) -> TRC | list[TRC]: """Load a molecular structure from a file. Supported formats: PDB, mmCIF (.cif / .mmcif), SDF, and TRC JSON. The format is determined by extension; when the extension is unrecognised the content is inspected heuristically. Args: file_path: Path to the structure file. Returns: A single TRC when the file contains one model/molecule, otherwise a list of TRCs. """ path = Path(file_path) suffix = path.suffix.lower() if suffix == ".json": return from_json(path) with path.open("r") as f: content = f.read() if suffix in {".cif", ".mmcif"}: return from_mmcif(content) elif suffix == ".pdb": return from_pdb(content) elif suffix == ".sdf": return from_sdf(content) else: # Unrecognised extension — try to guess from content content_lower = content.lower() if content.strip().startswith("[") or content.strip().startswith("{"): return from_json(std_json.loads(content)) elif "data_" in content_lower and "_atom_site" in content_lower: return from_mmcif(content) else: return from_pdb(content)
[docs] def save_structure( trcs: TRC | list[TRC], file_path: str | Path, format: str | None = None ): """Save TRC structures to a file. Args: trcs: TRC structure or list of TRC structures to write. file_path: Output file path. format: Output format (``'pdb'`` or ``'json'``). When *None* the format is inferred from the file extension. Raises: ValueError: If the format cannot be inferred or is unsupported. """ path = Path(file_path) if format is None: suffix = path.suffix.lower() if suffix == ".json": format = "json" elif suffix == ".pdb": format = "pdb" else: raise ValueError( f"Cannot infer format from extension '{suffix}'; pass format= explicitly" ) if format.lower() == "json": with path.open("w") as f: std_json.dump(to_dict(trcs), f, indent=2) return elif format.lower() == "pdb": if isinstance(trcs, TRC): trcs = [trcs] if len(trcs) > 1: # Multi-model PDB: wrap each TRC in MODEL/ENDMDL records content_parts = [] for i, trc in enumerate(trcs, 1): content_parts.append(f"MODEL {i:>4}") content_parts.append(to_pdb(trc).replace("END\n", "")) content_parts.append("ENDMDL") content_parts.append("END") content = "\n".join(content_parts) else: content = to_pdb(trcs[0]) else: raise ValueError(f"Unsupported format: {format}") with path.open("w") as f: f.write(content)
TrcInput = TRC | str | Path TrcInputSeq = Sequence[TrcInput] def _single_trc(trc: TRC | list[TRC], label: str | Path) -> TRC: if _is_trc_list(trc): if len(trc) != 1: raise ValueError(f"Expected 1 TRC in {label}, found {len(trc)}") return trc[0] if isinstance(trc, list): raise TypeError("Expected TRC list elements to be TRC objects") return trc def _normalize_trc_inputs(inputs: tuple[TrcInput | TrcInputSeq, ...]) -> list[TrcInput]: normalized: list[TrcInput] = [] for item in inputs: if isinstance(item, Sequence) and not isinstance(item, (str, Path, TRC)): normalized.extend(item) else: normalized.append(item) return normalized def _is_trc_list(value: object) -> TypeGuard[list[TRC]]: return isinstance(value, list) and all(isinstance(item, TRC) for item in value) def _load_trc(trc: TrcInput) -> TRC: """Load TRC from TRC object or file path.""" if isinstance(trc, TRC): return trc if isinstance(trc, (str, Path)): loaded = load_structure(trc) if _is_trc_list(loaded): if len(loaded) == 1: return loaded[0] merged = loaded[0] for next_trc in loaded[1:]: merged = merged.extend(next_trc) return merged if isinstance(loaded, list): raise TypeError("Expected TRC list elements to be TRC objects") return loaded raise TypeError(f"TRC must be a TRC object or file path, got {type(trc)}")
[docs] def merge_trcs( *trcs: TrcInput | TrcInputSeq, output_file: str | Path | None = None, skip_validation: bool = False, ) -> TRC: """ Merge one or more TRC objects (or file paths) into a single TRC. Atom, residue, and chain indices are renumbered so that the merged structure has unique indices throughout. Args: trcs: TRC objects or file paths. A single list/tuple is treated as the full set of inputs. output_file: Optional path to write the merged TRC as JSON. skip_validation: If *True*, skip ``trc.check()`` on the result. Returns: The merged TRC object. Raises: ValueError: If no inputs are provided or validation fails. FileNotFoundError: If a file path does not exist. """ trc_inputs = _normalize_trc_inputs(trcs) if not trc_inputs: raise ValueError("Expected at least one TRC input, found 0") merged = _load_trc(trc_inputs[0]) for trc in trc_inputs[1:]: merged = merged.extend(_load_trc(trc)) if not skip_validation: merged.check() if output_file is not None: output_path = Path(output_file) with output_path.open("w") as f: std_json.dump(to_dict([merged]), f, indent=2) return merged
__all__ = [ "from_json", "to_dict", "from_mmcif", "from_pdb", "to_pdb", "from_sdf", "load_structure", "save_structure", "merge_trcs", ]