Export from cherrytree to Obsidian

12 May 2026
#export, #cherrytree, #obsidian

Exportamos nuestra base de datos a formato .ctd sin contraseña.

Lanzamos el siguiente script:

NOTA: cambiar rutas de origen y destino en el script.

#!/usr/bin/env python3
"""
cherrytree_to_obsidian.py
Convierte archivos CherryTree (.ctb SQLite / .ctd XML) a Markdown para Obsidian.
Mantiene la jerarquia de nodos como carpetas/archivos.
"""

import sys
import re
import sqlite3
import xml.etree.ElementTree as ET
from pathlib import Path

# ─── Configuracion ─────────────────────────────────────────────────────────────
INPUT_FILE = r"C:\TMP\Cherry.ctd"
OUTPUT_DIR = r"C:\TMP\obsidian"
# ───────────────────────────────────────────────────────────────────────────────


def sanitize_name(name: str) -> str:
    """Elimina caracteres invalidos para nombres de fichero/carpeta."""
    name = re.sub(r'[\\/*?:"<>|]', "_", name)
    return name.strip()


def write_node(node_id: int, name: str, content: str, parent_path: Path, is_folder: bool):
    """Escribe un nodo como archivo .md en la ruta correcta."""
    safe_name = sanitize_name(name)

    if is_folder:
        folder_path = parent_path / safe_name
        folder_path.mkdir(parents=True, exist_ok=True)
        md_file = folder_path / "_index.md"
    else:
        parent_path.mkdir(parents=True, exist_ok=True)
        md_file = parent_path / f"{safe_name}.md"

    # Evitar sobreescribir si hay colision de nombres
    if md_file.exists():
        md_file = md_file.with_stem(f"{md_file.stem}_{node_id}")

    md_file.write_text(content, encoding="utf-8")
    return md_file


def apply_rich_format(text: str, attrs: dict) -> str:
    """Aplica formato Markdown segun atributos rich_text de CherryTree."""
    if not text:
        return text

    weight        = attrs.get("weight", "")
    style         = attrs.get("style", "")
    strikethrough = attrs.get("strikethrough", "")
    family        = attrs.get("family", "")
    link          = attrs.get("link", "")

    if family == "monospace":
        text = f"`{text}`"
    if weight == "heavy":
        text = f"**{text}**"
    if style == "italic":
        text = f"*{text}*"
    if strikethrough == "true":
        text = f"~~{text}~~"
    if link:
        url = link.replace("webs ", "").replace("node ", "")
        text = f"[{text}]({url})"

    return text


# ─── Parser SQLite (.ctb) ──────────────────────────────────────────────────────

def parse_ctb(filepath: str, output_root: Path):
    """Procesa archivos .ctb (formato SQLite)."""
    conn = sqlite3.connect(filepath)
    cur  = conn.cursor()

    cur.execute("SELECT node_id, name, txt, syntax, is_richtxt, level FROM node")
    nodes = {
        row[0]: {"name": row[1], "txt": row[2], "syntax": row[3], "rich": row[4], "level": row[5]}
        for row in cur.fetchall()
    }

    cur.execute("SELECT node_id, father_id, sequence FROM children ORDER BY father_id, sequence")
    children_map = {}
    node_parent  = {}
    for node_id, father_id, _ in cur.fetchall():
        children_map.setdefault(father_id, []).append(node_id)
        node_parent[node_id] = father_id

    conn.close()

    def get_path(node_id: int) -> Path:
        path_parts = []
        current = node_id
        while current in node_parent and node_parent[current] != 0:
            current = node_parent[current]
            path_parts.append(sanitize_name(nodes[current]["name"]))
        path_parts.reverse()
        return output_root.joinpath(*path_parts)

    for node_id, data in nodes.items():
        name    = data["name"]
        raw_txt = data["txt"] or ""
        is_rich = data["rich"]

        if is_rich:
            try:
                root_elem = ET.fromstring(f"<root>{raw_txt}</root>")
                parts = []
                for elem in root_elem.iter("rich_text"):
                    t = apply_rich_format(elem.text or "", elem.attrib)
                    if t:
                        parts.append(t)
                content = "".join(parts)
            except ET.ParseError:
                content = raw_txt
        else:
            syntax  = data["syntax"] or "text"
            lang    = "" if syntax == "plain-text" else syntax
            content = f"```{lang}\n{raw_txt}\n```" if lang else raw_txt

        has_children = node_id in children_map
        parent_path  = get_path(node_id)

        write_node(node_id, name, content, parent_path, is_folder=has_children)
        print(f"  OK  {name}")

    print(f"\nExportado en: {output_root.resolve()}")


# ─── Parser XML (.ctd / .ctx) ──────────────────────────────────────────────────

def parse_ctd(filepath: str, output_root: Path):
    """Procesa archivos .ctd (formato XML)."""
    tree = ET.parse(filepath)
    root = tree.getroot()

    def process_node(xml_node, current_path: Path):
        name    = xml_node.get("name", "sin_nombre")
        node_id = xml_node.get("unique_id", "0")

        parts = []
        for rt in xml_node.findall("rich_text"):
            t = apply_rich_format(rt.text or "", rt.attrib)
            if t:
                parts.append(t)

        content      = "".join(parts)
        child_nodes  = xml_node.findall("node")
        has_children = len(child_nodes) > 0

        write_node(int(node_id), name, content, current_path, is_folder=has_children)
        print(f"  OK  {name}")

        if has_children:
            child_path = current_path / sanitize_name(name)
            for child in child_nodes:
                process_node(child, child_path)

    output_root.mkdir(parents=True, exist_ok=True)
    for top_node in root.findall("node"):
        process_node(top_node, output_root)

    print(f"\nExportado en: {output_root.resolve()}")


# ─── Main ──────────────────────────────────────────────────────────────────────

def main():
    input_path = Path(INPUT_FILE)
    output_path = Path(OUTPUT_DIR)

    if not input_path.exists():
        print(f"ERROR: Archivo no encontrado: {input_path}")
        sys.exit(1)

    output_path.mkdir(parents=True, exist_ok=True)
    ext = input_path.suffix.lower()

    print(f"Procesando : {input_path}")
    print(f"Destino    : {output_path}\n")

    if ext == ".ctb":
        parse_ctb(str(input_path), output_path)
    elif ext in (".ctd", ".ctx"):
        parse_ctd(str(input_path), output_path)
    else:
        print(f"ERROR: Formato no reconocido: {ext}")
        print("Formatos soportados: .ctb (SQLite), .ctd/.ctx (XML)")
        sys.exit(1)


if __name__ == "__main__":
    main()


Check out more IT nuggets at www.ruizpelaez.com.



A bit (1,0) about me:

I solve IT problems. I make your Company better.