Skip to content

API Reference

This page provides comprehensive API documentation for memg-core.

Public API

The main interface for memg-core is through the public API module:

Public API for memg-core - designed for long-running servers.

Provides MemgClient for explicit initialization and module-level functions for environment-based usage.

_CLIENT = None module-attribute

DatabaseClients

DDL-only database setup - creates schemas and returns raw clients.

NO INTERFACES - pure schema creation only. Consumer must create interfaces separately using returned raw clients.

Attributes:

Name Type Description
qdrant_client QdrantClient | None

Pre-initialized QdrantClient.

kuzu_connection Connection | None

Pre-initialized Kuzu connection.

db_name

Database name.

qdrant_path

Path to Qdrant database.

kuzu_path

Path to Kuzu database.

yaml_translator

YAML translator instance.

Source code in src/memg_core/utils/db_clients.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
class DatabaseClients:
    """DDL-only database setup - creates schemas and returns raw clients.

    NO INTERFACES - pure schema creation only.
    Consumer must create interfaces separately using returned raw clients.

    Attributes:
        qdrant_client: Pre-initialized QdrantClient.
        kuzu_connection: Pre-initialized Kuzu connection.
        db_name: Database name.
        qdrant_path: Path to Qdrant database.
        kuzu_path: Path to Kuzu database.
        yaml_translator: YAML translator instance.
    """

    def __init__(self, yaml_path: str | None = None):
        """Create DDL-only database client wrapper.

        Args:
            yaml_path: Path to YAML schema file. User must provide - no defaults.
        """
        self.qdrant_client: QdrantClient | None = None
        self.kuzu_connection: kuzu.Connection | None = None
        self.db_name = "memg"
        self.qdrant_path = "qdrant"
        self.kuzu_path = "kuzu"

        self.yaml_translator = YamlTranslator(yaml_path) if yaml_path else None

    def init_dbs(self, db_path: str, db_name: str):
        """Initialize databases with structured paths.

        Args:
            db_path: Base database directory.
            db_name: Database name (used for collection and file names).
        """
        # Structure paths
        qdrant_path = os.path.join(db_path, "qdrant")
        kuzu_path = os.path.join(db_path, "kuzu", db_name)

        # Store paths and names
        self.qdrant_path = qdrant_path
        self.kuzu_path = kuzu_path
        self.db_name = db_name

        # Ensure directories exist
        os.makedirs(qdrant_path, exist_ok=True)
        os.makedirs(Path(kuzu_path).parent, exist_ok=True)

        # Create raw database clients directly
        qdrant_client = QdrantClient(path=qdrant_path)
        kuzu_db = kuzu.Database(kuzu_path)
        kuzu_conn = kuzu.Connection(kuzu_db)

        # Store raw clients for interface creation
        self.qdrant_client = qdrant_client
        self.kuzu_connection = kuzu_conn

        # DDL operations - create collection and tables
        self._setup_qdrant_collection(qdrant_client, self.db_name)
        self._setup_kuzu_tables_with_graph_register(kuzu_conn)

    def _setup_qdrant_collection(self, client: QdrantClient, collection_name: str) -> None:
        """Create Qdrant collection if it doesn't exist.

        Args:
            client: Qdrant client instance.
            collection_name: Name of the collection to create.

        Raises:
            DatabaseError: If collection creation fails.
        """
        try:
            config = get_config()
            vector_dimension = config.memg.vector_dimension

            collections = client.get_collections()
            if not any(col.name == collection_name for col in collections.collections):
                client.create_collection(
                    collection_name=collection_name,
                    vectors_config=VectorParams(size=vector_dimension, distance=Distance.COSINE),
                )
        except Exception as e:
            raise DatabaseError(
                "Failed to setup Qdrant collection",
                operation="_setup_qdrant_collection",
                original_error=e,
            ) from e

    def _setup_kuzu_tables_with_graph_register(self, conn: kuzu.Connection) -> None:
        """Create Kuzu tables using GraphRegister for DDL generation.

        Args:
            conn: Kuzu database connection.

        Raises:
            DatabaseError: If YAML translator not initialized or table creation fails.
        """
        if not self.yaml_translator:
            raise DatabaseError(
                "YAML translator not initialized. Provide yaml_path to constructor.",
                operation="_setup_kuzu_tables_with_graph_register",
            )

        try:
            # Create GraphRegister with YamlTranslator for complete DDL generation
            graph_register = GraphRegister(yaml_translator=self.yaml_translator)

            # Generate all DDL statements using GraphRegister
            ddl_statements = graph_register.generate_all_ddl()

            # Execute all DDL statements
            for ddl in ddl_statements:
                conn.execute(ddl)

        except Exception as e:
            raise DatabaseError(
                "Failed to setup Kuzu tables using GraphRegister",
                operation="_setup_kuzu_tables_with_graph_register",
                original_error=e,
            ) from e

    # ===== INTERFACE ACCESS METHODS =====
    # After DDL operations, provide access to CRUD interfaces

    def get_qdrant_interface(self) -> QdrantInterface:
        """Get Qdrant interface using the initialized client.

        Returns:
            QdrantInterface: Configured with the DDL-created client and collection.

        Raises:
            DatabaseError: If client not initialized (call init_dbs first).
        """
        if self.qdrant_client is None:
            raise DatabaseError(
                "Qdrant client not initialized. Call init_dbs() first.",
                operation="get_qdrant_interface",
            )
        return QdrantInterface(self.qdrant_client, self.db_name)

    def get_kuzu_interface(self) -> KuzuInterface:
        """Get Kuzu interface using the initialized connection.

        Returns:
            KuzuInterface: Configured with the DDL-created connection.

        Raises:
            DatabaseError: If connection not initialized (call init_dbs first).
        """
        if self.kuzu_connection is None:
            raise DatabaseError(
                "Kuzu connection not initialized. Call init_dbs() first.",
                operation="get_kuzu_interface",
            )
        return KuzuInterface(self.kuzu_connection, self.yaml_translator)

    def get_embedder(self) -> Embedder:
        """Get embedder instance.

        Returns:
            Embedder: Instance for generating vectors.
        """
        return Embedder()

    def get_yaml_translator(self) -> YamlTranslator:
        """Get the YAML translator used for schema operations.

        Returns:
            YamlTranslator: Instance used during DDL operations.

        Raises:
            DatabaseError: If YAML translator not initialized.
        """
        if self.yaml_translator is None:
            raise DatabaseError(
                "YAML translator not initialized. Provide yaml_path to constructor.",
                operation="get_yaml_translator",
            )
        return self.yaml_translator

    def close(self):
        """Close all database connections and cleanup resources.

        Should be called when database clients are no longer needed.
        """
        if self.qdrant_client is not None:
            with suppress(Exception):
                # Ignore cleanup errors - best effort
                self.qdrant_client.close()
            self.qdrant_client = None

        if self.kuzu_connection is not None:
            with suppress(Exception):
                # Ignore cleanup errors - best effort
                self.kuzu_connection.close()
            self.kuzu_connection = None

__init__(yaml_path=None)

Create DDL-only database client wrapper.

Parameters:

Name Type Description Default
yaml_path str | None

Path to YAML schema file. User must provide - no defaults.

None
Source code in src/memg_core/utils/db_clients.py
40
41
42
43
44
45
46
47
48
49
50
51
52
def __init__(self, yaml_path: str | None = None):
    """Create DDL-only database client wrapper.

    Args:
        yaml_path: Path to YAML schema file. User must provide - no defaults.
    """
    self.qdrant_client: QdrantClient | None = None
    self.kuzu_connection: kuzu.Connection | None = None
    self.db_name = "memg"
    self.qdrant_path = "qdrant"
    self.kuzu_path = "kuzu"

    self.yaml_translator = YamlTranslator(yaml_path) if yaml_path else None

close()

Close all database connections and cleanup resources.

Should be called when database clients are no longer needed.

Source code in src/memg_core/utils/db_clients.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def close(self):
    """Close all database connections and cleanup resources.

    Should be called when database clients are no longer needed.
    """
    if self.qdrant_client is not None:
        with suppress(Exception):
            # Ignore cleanup errors - best effort
            self.qdrant_client.close()
        self.qdrant_client = None

    if self.kuzu_connection is not None:
        with suppress(Exception):
            # Ignore cleanup errors - best effort
            self.kuzu_connection.close()
        self.kuzu_connection = None

get_embedder()

Get embedder instance.

Returns:

Name Type Description
Embedder Embedder

Instance for generating vectors.

Source code in src/memg_core/utils/db_clients.py
182
183
184
185
186
187
188
def get_embedder(self) -> Embedder:
    """Get embedder instance.

    Returns:
        Embedder: Instance for generating vectors.
    """
    return Embedder()

get_kuzu_interface()

Get Kuzu interface using the initialized connection.

Returns:

Name Type Description
KuzuInterface KuzuInterface

Configured with the DDL-created connection.

Raises:

Type Description
DatabaseError

If connection not initialized (call init_dbs first).

Source code in src/memg_core/utils/db_clients.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def get_kuzu_interface(self) -> KuzuInterface:
    """Get Kuzu interface using the initialized connection.

    Returns:
        KuzuInterface: Configured with the DDL-created connection.

    Raises:
        DatabaseError: If connection not initialized (call init_dbs first).
    """
    if self.kuzu_connection is None:
        raise DatabaseError(
            "Kuzu connection not initialized. Call init_dbs() first.",
            operation="get_kuzu_interface",
        )
    return KuzuInterface(self.kuzu_connection, self.yaml_translator)

get_qdrant_interface()

Get Qdrant interface using the initialized client.

Returns:

Name Type Description
QdrantInterface QdrantInterface

Configured with the DDL-created client and collection.

Raises:

Type Description
DatabaseError

If client not initialized (call init_dbs first).

Source code in src/memg_core/utils/db_clients.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def get_qdrant_interface(self) -> QdrantInterface:
    """Get Qdrant interface using the initialized client.

    Returns:
        QdrantInterface: Configured with the DDL-created client and collection.

    Raises:
        DatabaseError: If client not initialized (call init_dbs first).
    """
    if self.qdrant_client is None:
        raise DatabaseError(
            "Qdrant client not initialized. Call init_dbs() first.",
            operation="get_qdrant_interface",
        )
    return QdrantInterface(self.qdrant_client, self.db_name)

get_yaml_translator()

Get the YAML translator used for schema operations.

Returns:

Name Type Description
YamlTranslator YamlTranslator

Instance used during DDL operations.

Raises:

Type Description
DatabaseError

If YAML translator not initialized.

Source code in src/memg_core/utils/db_clients.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def get_yaml_translator(self) -> YamlTranslator:
    """Get the YAML translator used for schema operations.

    Returns:
        YamlTranslator: Instance used during DDL operations.

    Raises:
        DatabaseError: If YAML translator not initialized.
    """
    if self.yaml_translator is None:
        raise DatabaseError(
            "YAML translator not initialized. Provide yaml_path to constructor.",
            operation="get_yaml_translator",
        )
    return self.yaml_translator

init_dbs(db_path, db_name)

Initialize databases with structured paths.

Parameters:

Name Type Description Default
db_path str

Base database directory.

required
db_name str

Database name (used for collection and file names).

required
Source code in src/memg_core/utils/db_clients.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def init_dbs(self, db_path: str, db_name: str):
    """Initialize databases with structured paths.

    Args:
        db_path: Base database directory.
        db_name: Database name (used for collection and file names).
    """
    # Structure paths
    qdrant_path = os.path.join(db_path, "qdrant")
    kuzu_path = os.path.join(db_path, "kuzu", db_name)

    # Store paths and names
    self.qdrant_path = qdrant_path
    self.kuzu_path = kuzu_path
    self.db_name = db_name

    # Ensure directories exist
    os.makedirs(qdrant_path, exist_ok=True)
    os.makedirs(Path(kuzu_path).parent, exist_ok=True)

    # Create raw database clients directly
    qdrant_client = QdrantClient(path=qdrant_path)
    kuzu_db = kuzu.Database(kuzu_path)
    kuzu_conn = kuzu.Connection(kuzu_db)

    # Store raw clients for interface creation
    self.qdrant_client = qdrant_client
    self.kuzu_connection = kuzu_conn

    # DDL operations - create collection and tables
    self._setup_qdrant_collection(qdrant_client, self.db_name)
    self._setup_kuzu_tables_with_graph_register(kuzu_conn)

MemgClient

Client for memg-core operations - initialize once, use throughout server lifetime.

Provides a clean interface for memory operations with explicit resource management.

Source code in src/memg_core/api/public.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
class MemgClient:
    """Client for memg-core operations - initialize once, use throughout server lifetime.

    Provides a clean interface for memory operations with explicit resource management.
    """

    def __init__(self, yaml_path: str, db_path: str):
        """Initialize client for long-running server usage.

        Args:
            yaml_path: Path to YAML schema configuration file.
            db_path: Base directory path for database storage.
        """
        self._db_clients = DatabaseClients(yaml_path=yaml_path)
        self._db_clients.init_dbs(db_path=db_path, db_name=self._db_clients.db_name)

        self._memory_service = create_memory_service(self._db_clients)
        self._search_service = create_search_service(self._db_clients)

        if not all([self._memory_service, self._search_service]):
            raise RuntimeError("Failed to initialize memg-core services")

    def add_memory(self, memory_type: str, payload: dict[str, Any], user_id: str) -> str:
        """Add memory and return HRID.

        Args:
            memory_type: Entity type from YAML schema (e.g., 'task', 'note').
            payload: Memory data conforming to YAML schema.
            user_id: Owner of the memory.

        Returns:
            str: Human-readable ID (HRID) for the created memory.
        """
        return self._memory_service.add_memory(memory_type, payload, user_id)

    def search(
        self,
        query: str,
        user_id: str,
        memory_type: str | None = None,
        limit: int = 10,
        score_threshold: float | None = None,
        decay_threshold: float | None = None,
        **kwargs,
    ) -> SearchResult:
        """Search memories with explicit seed/neighbor separation.

        Args:
            query: Text to search for.
            user_id: User ID for filtering results.
            memory_type: Optional memory type filter.
            limit: Maximum number of results to return.
            score_threshold: Minimum similarity score threshold (0.0-1.0).
            decay_threshold: Minimum neighbor relevance threshold (0.0-1.0).
            **kwargs: Additional search parameters.

        Returns:
            SearchResult: Search result with explicit seed/neighbor separation,
                including full payloads for seeds and relationships.
        """
        clean_query = query.strip() if query else ""
        return self._search_service.search(
            clean_query,
            user_id,
            memory_type=memory_type,
            limit=limit,
            score_threshold=score_threshold,
            decay_threshold=decay_threshold,
            **kwargs,
        )

    def delete_memory(self, hrid: str, user_id: str, memory_type: str | None = None) -> bool:
        """Delete memory by HRID.

        Args:
            hrid: Human-readable ID of the memory to delete.
            user_id: User ID for ownership verification.
            memory_type: Optional memory type hint.

        Returns:
            bool: True if deletion succeeded, False otherwise.
        """
        if memory_type is None:
            memory_type = hrid.split("_")[0].lower()
        return self._memory_service.delete_memory(hrid, memory_type, user_id)

    def update_memory(
        self,
        hrid: str,
        payload_updates: dict[str, Any],
        user_id: str,
        memory_type: str | None = None,
    ) -> bool:
        """Update memory with partial payload changes (patch-style update).

        Args:
            hrid: Memory HRID to update.
            payload_updates: Dictionary of fields to update (only changed fields).
            user_id: User ID for ownership verification.
            memory_type: Optional memory type hint (inferred from HRID if not provided).

        Returns:
            bool: True if update succeeded, False otherwise.
        """
        return self._memory_service.update_memory(hrid, payload_updates, user_id, memory_type)

    def add_relationship(
        self,
        from_memory_hrid: str,
        to_memory_hrid: str,
        relation_type: str,
        from_memory_type: str,
        to_memory_type: str,
        user_id: str,
        properties: dict[str, Any] | None = None,
    ) -> None:
        """Add relationship between memories.

        Args:
            from_memory_hrid: Source memory HRID.
            to_memory_hrid: Target memory HRID.
            relation_type: Relationship type from YAML schema.
            from_memory_type: Source memory entity type.
            to_memory_type: Target memory entity type.
            user_id: User ID for ownership verification.
            properties: Optional relationship properties.
        """
        self._memory_service.add_relationship(
            from_memory_hrid,
            to_memory_hrid,
            relation_type,
            from_memory_type,
            to_memory_type,
            user_id,
            properties,
        )

    def delete_relationship(
        self,
        from_memory_hrid: str,
        to_memory_hrid: str,
        relation_type: str,
        from_memory_type: str | None = None,
        to_memory_type: str | None = None,
        user_id: str | None = None,
    ) -> bool:
        """Delete relationship between memories.

        Args:
            from_memory_hrid: Source memory HRID.
            to_memory_hrid: Target memory HRID.
            relation_type: Relationship type from YAML schema.
            from_memory_type: Source memory entity type (inferred from HRID if not provided).
            to_memory_type: Target memory entity type (inferred from HRID if not provided).
            user_id: User ID for ownership verification (required).

        Returns:
            bool: True if deletion succeeded, False if relationship not found.
        """
        return self._memory_service.delete_relationship(
            from_memory_hrid,
            to_memory_hrid,
            relation_type,
            from_memory_type,
            to_memory_type,
            user_id,
        )

    def get_memory(
        self,
        hrid: str,
        user_id: str,
        memory_type: str | None = None,
    ) -> dict[str, Any] | None:
        """Get a single memory by HRID.

        Args:
            hrid: Human-readable identifier of the memory.
            user_id: User ID for ownership verification.
            memory_type: Optional memory type hint (inferred from HRID if not provided).

        Returns:
            dict[str, Any] | None: Memory data with full payload, or None if not found.
        """
        return self._search_service.get_memory(hrid, user_id, memory_type)

    def get_memories(
        self,
        user_id: str,
        memory_type: str | None = None,
        filters: dict[str, Any] | None = None,
        limit: int = 50,
        offset: int = 0,
        include_neighbors: bool = False,
        hops: int = 1,
    ) -> list[dict[str, Any]]:
        """Get multiple memories with filtering and optional graph expansion.

        Args:
            user_id: User ID for ownership verification.
            memory_type: Optional memory type filter (e.g., "task", "note").
            filters: Optional field filters (e.g., {"status": "open", "priority": "high"}).
            limit: Maximum number of memories to return (default 50).
            offset: Number of memories to skip for pagination (default 0).
            include_neighbors: Whether to include neighbor nodes via graph traversal.
            hops: Number of hops for neighbor expansion (default 1).

        Returns:
            list[dict[str, Any]]: List of memory data with full payloads.
        """
        return self._search_service.get_memories(
            user_id, memory_type, filters, limit, offset, include_neighbors, hops
        )

    def close(self):
        """Close client and cleanup resources.

        Should be called when the client is no longer needed to free database connections.
        """
        if hasattr(self, "_db_clients") and self._db_clients:
            self._db_clients.close()

__init__(yaml_path, db_path)

Initialize client for long-running server usage.

Parameters:

Name Type Description Default
yaml_path str

Path to YAML schema configuration file.

required
db_path str

Base directory path for database storage.

required
Source code in src/memg_core/api/public.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(self, yaml_path: str, db_path: str):
    """Initialize client for long-running server usage.

    Args:
        yaml_path: Path to YAML schema configuration file.
        db_path: Base directory path for database storage.
    """
    self._db_clients = DatabaseClients(yaml_path=yaml_path)
    self._db_clients.init_dbs(db_path=db_path, db_name=self._db_clients.db_name)

    self._memory_service = create_memory_service(self._db_clients)
    self._search_service = create_search_service(self._db_clients)

    if not all([self._memory_service, self._search_service]):
        raise RuntimeError("Failed to initialize memg-core services")

add_memory(memory_type, payload, user_id)

Add memory and return HRID.

Parameters:

Name Type Description Default
memory_type str

Entity type from YAML schema (e.g., 'task', 'note').

required
payload dict[str, Any]

Memory data conforming to YAML schema.

required
user_id str

Owner of the memory.

required

Returns:

Name Type Description
str str

Human-readable ID (HRID) for the created memory.

Source code in src/memg_core/api/public.py
41
42
43
44
45
46
47
48
49
50
51
52
def add_memory(self, memory_type: str, payload: dict[str, Any], user_id: str) -> str:
    """Add memory and return HRID.

    Args:
        memory_type: Entity type from YAML schema (e.g., 'task', 'note').
        payload: Memory data conforming to YAML schema.
        user_id: Owner of the memory.

    Returns:
        str: Human-readable ID (HRID) for the created memory.
    """
    return self._memory_service.add_memory(memory_type, payload, user_id)

add_relationship(from_memory_hrid, to_memory_hrid, relation_type, from_memory_type, to_memory_type, user_id, properties=None)

Add relationship between memories.

Parameters:

Name Type Description Default
from_memory_hrid str

Source memory HRID.

required
to_memory_hrid str

Target memory HRID.

required
relation_type str

Relationship type from YAML schema.

required
from_memory_type str

Source memory entity type.

required
to_memory_type str

Target memory entity type.

required
user_id str

User ID for ownership verification.

required
properties dict[str, Any] | None

Optional relationship properties.

None
Source code in src/memg_core/api/public.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def add_relationship(
    self,
    from_memory_hrid: str,
    to_memory_hrid: str,
    relation_type: str,
    from_memory_type: str,
    to_memory_type: str,
    user_id: str,
    properties: dict[str, Any] | None = None,
) -> None:
    """Add relationship between memories.

    Args:
        from_memory_hrid: Source memory HRID.
        to_memory_hrid: Target memory HRID.
        relation_type: Relationship type from YAML schema.
        from_memory_type: Source memory entity type.
        to_memory_type: Target memory entity type.
        user_id: User ID for ownership verification.
        properties: Optional relationship properties.
    """
    self._memory_service.add_relationship(
        from_memory_hrid,
        to_memory_hrid,
        relation_type,
        from_memory_type,
        to_memory_type,
        user_id,
        properties,
    )

close()

Close client and cleanup resources.

Should be called when the client is no longer needed to free database connections.

Source code in src/memg_core/api/public.py
233
234
235
236
237
238
239
def close(self):
    """Close client and cleanup resources.

    Should be called when the client is no longer needed to free database connections.
    """
    if hasattr(self, "_db_clients") and self._db_clients:
        self._db_clients.close()

delete_memory(hrid, user_id, memory_type=None)

Delete memory by HRID.

Parameters:

Name Type Description Default
hrid str

Human-readable ID of the memory to delete.

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint.

None

Returns:

Name Type Description
bool bool

True if deletion succeeded, False otherwise.

Source code in src/memg_core/api/public.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def delete_memory(self, hrid: str, user_id: str, memory_type: str | None = None) -> bool:
    """Delete memory by HRID.

    Args:
        hrid: Human-readable ID of the memory to delete.
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint.

    Returns:
        bool: True if deletion succeeded, False otherwise.
    """
    if memory_type is None:
        memory_type = hrid.split("_")[0].lower()
    return self._memory_service.delete_memory(hrid, memory_type, user_id)

delete_relationship(from_memory_hrid, to_memory_hrid, relation_type, from_memory_type=None, to_memory_type=None, user_id=None)

Delete relationship between memories.

Parameters:

Name Type Description Default
from_memory_hrid str

Source memory HRID.

required
to_memory_hrid str

Target memory HRID.

required
relation_type str

Relationship type from YAML schema.

required
from_memory_type str | None

Source memory entity type (inferred from HRID if not provided).

None
to_memory_type str | None

Target memory entity type (inferred from HRID if not provided).

None
user_id str | None

User ID for ownership verification (required).

None

Returns:

Name Type Description
bool bool

True if deletion succeeded, False if relationship not found.

Source code in src/memg_core/api/public.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def delete_relationship(
    self,
    from_memory_hrid: str,
    to_memory_hrid: str,
    relation_type: str,
    from_memory_type: str | None = None,
    to_memory_type: str | None = None,
    user_id: str | None = None,
) -> bool:
    """Delete relationship between memories.

    Args:
        from_memory_hrid: Source memory HRID.
        to_memory_hrid: Target memory HRID.
        relation_type: Relationship type from YAML schema.
        from_memory_type: Source memory entity type (inferred from HRID if not provided).
        to_memory_type: Target memory entity type (inferred from HRID if not provided).
        user_id: User ID for ownership verification (required).

    Returns:
        bool: True if deletion succeeded, False if relationship not found.
    """
    return self._memory_service.delete_relationship(
        from_memory_hrid,
        to_memory_hrid,
        relation_type,
        from_memory_type,
        to_memory_type,
        user_id,
    )

get_memories(user_id, memory_type=None, filters=None, limit=50, offset=0, include_neighbors=False, hops=1)

Get multiple memories with filtering and optional graph expansion.

Parameters:

Name Type Description Default
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type filter (e.g., "task", "note").

None
filters dict[str, Any] | None

Optional field filters (e.g., {"status": "open", "priority": "high"}).

None
limit int

Maximum number of memories to return (default 50).

50
offset int

Number of memories to skip for pagination (default 0).

0
include_neighbors bool

Whether to include neighbor nodes via graph traversal.

False
hops int

Number of hops for neighbor expansion (default 1).

1

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of memory data with full payloads.

Source code in src/memg_core/api/public.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
def get_memories(
    self,
    user_id: str,
    memory_type: str | None = None,
    filters: dict[str, Any] | None = None,
    limit: int = 50,
    offset: int = 0,
    include_neighbors: bool = False,
    hops: int = 1,
) -> list[dict[str, Any]]:
    """Get multiple memories with filtering and optional graph expansion.

    Args:
        user_id: User ID for ownership verification.
        memory_type: Optional memory type filter (e.g., "task", "note").
        filters: Optional field filters (e.g., {"status": "open", "priority": "high"}).
        limit: Maximum number of memories to return (default 50).
        offset: Number of memories to skip for pagination (default 0).
        include_neighbors: Whether to include neighbor nodes via graph traversal.
        hops: Number of hops for neighbor expansion (default 1).

    Returns:
        list[dict[str, Any]]: List of memory data with full payloads.
    """
    return self._search_service.get_memories(
        user_id, memory_type, filters, limit, offset, include_neighbors, hops
    )

get_memory(hrid, user_id, memory_type=None)

Get a single memory by HRID.

Parameters:

Name Type Description Default
hrid str

Human-readable identifier of the memory.

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint (inferred from HRID if not provided).

None

Returns:

Type Description
dict[str, Any] | None

dict[str, Any] | None: Memory data with full payload, or None if not found.

Source code in src/memg_core/api/public.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def get_memory(
    self,
    hrid: str,
    user_id: str,
    memory_type: str | None = None,
) -> dict[str, Any] | None:
    """Get a single memory by HRID.

    Args:
        hrid: Human-readable identifier of the memory.
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint (inferred from HRID if not provided).

    Returns:
        dict[str, Any] | None: Memory data with full payload, or None if not found.
    """
    return self._search_service.get_memory(hrid, user_id, memory_type)

search(query, user_id, memory_type=None, limit=10, score_threshold=None, decay_threshold=None, **kwargs)

Search memories with explicit seed/neighbor separation.

Parameters:

Name Type Description Default
query str

Text to search for.

required
user_id str

User ID for filtering results.

required
memory_type str | None

Optional memory type filter.

None
limit int

Maximum number of results to return.

10
score_threshold float | None

Minimum similarity score threshold (0.0-1.0).

None
decay_threshold float | None

Minimum neighbor relevance threshold (0.0-1.0).

None
**kwargs

Additional search parameters.

{}

Returns:

Name Type Description
SearchResult SearchResult

Search result with explicit seed/neighbor separation, including full payloads for seeds and relationships.

Source code in src/memg_core/api/public.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def search(
    self,
    query: str,
    user_id: str,
    memory_type: str | None = None,
    limit: int = 10,
    score_threshold: float | None = None,
    decay_threshold: float | None = None,
    **kwargs,
) -> SearchResult:
    """Search memories with explicit seed/neighbor separation.

    Args:
        query: Text to search for.
        user_id: User ID for filtering results.
        memory_type: Optional memory type filter.
        limit: Maximum number of results to return.
        score_threshold: Minimum similarity score threshold (0.0-1.0).
        decay_threshold: Minimum neighbor relevance threshold (0.0-1.0).
        **kwargs: Additional search parameters.

    Returns:
        SearchResult: Search result with explicit seed/neighbor separation,
            including full payloads for seeds and relationships.
    """
    clean_query = query.strip() if query else ""
    return self._search_service.search(
        clean_query,
        user_id,
        memory_type=memory_type,
        limit=limit,
        score_threshold=score_threshold,
        decay_threshold=decay_threshold,
        **kwargs,
    )

update_memory(hrid, payload_updates, user_id, memory_type=None)

Update memory with partial payload changes (patch-style update).

Parameters:

Name Type Description Default
hrid str

Memory HRID to update.

required
payload_updates dict[str, Any]

Dictionary of fields to update (only changed fields).

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint (inferred from HRID if not provided).

None

Returns:

Name Type Description
bool bool

True if update succeeded, False otherwise.

Source code in src/memg_core/api/public.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def update_memory(
    self,
    hrid: str,
    payload_updates: dict[str, Any],
    user_id: str,
    memory_type: str | None = None,
) -> bool:
    """Update memory with partial payload changes (patch-style update).

    Args:
        hrid: Memory HRID to update.
        payload_updates: Dictionary of fields to update (only changed fields).
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint (inferred from HRID if not provided).

    Returns:
        bool: True if update succeeded, False otherwise.
    """
    return self._memory_service.update_memory(hrid, payload_updates, user_id, memory_type)

MemoryService

Unified memory service - handles indexing, search, and deletion operations.

Provides a clean, class-based interface for all memory operations using DatabaseClients for both DDL initialization and CRUD interface access. Eliminates the need for scattered interface creation.

Attributes:

Name Type Description
qdrant

Qdrant interface instance.

kuzu

Kuzu interface instance.

embedder

Embedder instance.

yaml_translator

YAML translator instance.

hrid_tracker

HRID tracker instance.

Source code in src/memg_core/core/pipelines/indexer.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
class MemoryService:
    """Unified memory service - handles indexing, search, and deletion operations.

    Provides a clean, class-based interface for all memory operations using
    DatabaseClients for both DDL initialization and CRUD interface access.
    Eliminates the need for scattered interface creation.

    Attributes:
        qdrant: Qdrant interface instance.
        kuzu: Kuzu interface instance.
        embedder: Embedder instance.
        yaml_translator: YAML translator instance.
        hrid_tracker: HRID tracker instance.
    """

    def __init__(self, db_clients):
        """Initialize MemoryService with DatabaseClients.

        Args:
            db_clients: DatabaseClients instance (after init_dbs() called).
        """
        if not isinstance(db_clients, DatabaseClients):
            raise TypeError("db_clients must be a DatabaseClients instance")

        # Get interfaces from DatabaseClients (reuses DDL-created clients)
        self.qdrant = db_clients.get_qdrant_interface()
        self.kuzu = db_clients.get_kuzu_interface()
        self.embedder = db_clients.get_embedder()
        self.yaml_translator = db_clients.get_yaml_translator()
        self.hrid_tracker = HridTracker(self.kuzu)

    def add_memory(
        self,
        memory_type: str,
        payload: dict[str, Any],
        user_id: str,
        collection: str | None = None,
    ) -> str:
        """Add a memory to both graph and vector storage.

        Args:
            memory_type: Entity type from YAML schema (e.g., 'task', 'note').
            payload: Memory data conforming to YAML schema.
            user_id: Owner of the memory.
            collection: Optional Qdrant collection override.

        Returns:
            str: Memory HRID (Human-readable ID string).

        Raises:
            ProcessingError: If validation fails or storage operations fail.
        """
        try:
            # Create and validate memory from YAML schema using our instance
            memory = self.yaml_translator.create_memory_from_yaml(memory_type, payload, user_id)

            # Stamp timestamps
            now = datetime.now(UTC)
            if not memory.created_at:
                memory.created_at = now
            memory.updated_at = now

            # Generate HRID using tracker
            hrid = generate_hrid(memory_type, user_id, self.hrid_tracker)

            # Get anchor text from YAML-defined anchor field using our instance
            anchor_text = self.yaml_translator.build_anchor_text(memory)
            if not anchor_text:
                raise ProcessingError(
                    f"Empty anchor text for memory type '{memory_type}'",
                    operation="add_memory",
                    context={"memory_id": memory.id, "memory_type": memory_type},
                )

            # Generate embedding from anchor text
            vector = self.embedder.get_embedding(anchor_text)

            # Create complete flat payload for Qdrant (includes system fields for filtering)
            flat_payload = {
                "user_id": memory.user_id,  # Required for user filtering
                "memory_type": memory.memory_type,  # Required for type filtering
                "created_at": memory.created_at.isoformat(),  # Required for time filtering
                "updated_at": memory.updated_at.isoformat(),  # Required for time filtering
                "hrid": hrid,  # Include HRID for user-facing operations
                **memory.payload,  # Include all YAML-validated entity fields
            }

            # Add to Qdrant (vector storage) with complete payload
            success, _point_id = self.qdrant.add_point(
                vector=vector,
                payload=flat_payload,  # Complete flat payload with system + entity fields
                point_id=memory.id,
                collection=collection,
            )
            if not success:
                raise ProcessingError(
                    "Failed to add memory to vector storage",
                    operation="add_memory",
                    context={"memory_id": memory.id},
                )

            # Add to Kuzu (graph storage) - use entity-specific table
            kuzu_data = {
                "id": memory.id,
                "user_id": memory.user_id,
                "memory_type": memory.memory_type,
                "created_at": memory.created_at.isoformat(),
                "updated_at": memory.updated_at.isoformat(),
                **memory.payload,  # Include all YAML-validated fields
            }
            self.kuzu.add_node(memory_type, kuzu_data)

            # Create HRID mapping after successful storage
            self.hrid_tracker.create_mapping(hrid, memory.id, memory_type, user_id)

            return hrid  # Return HRID, not UUID

        except Exception as e:
            if isinstance(e, ProcessingError):
                raise
            raise ProcessingError(
                "Failed to add memory",
                operation="add_memory",
                context={"memory_type": memory_type, "user_id": user_id},
                original_error=e,
            ) from e

    def update_memory(
        self,
        hrid: str,
        payload_updates: dict[str, Any],
        user_id: str,
        memory_type: str | None = None,
        collection: str | None = None,
    ) -> bool:
        """Update memory with partial payload changes (patch-style update).

        Args:
            hrid: Memory HRID to update.
            payload_updates: Dictionary of fields to update (only changed fields).
            user_id: User ID for ownership verification.
            memory_type: Optional memory type hint (inferred from HRID if not provided).
            collection: Optional Qdrant collection override.

        Returns:
            bool: True if update succeeded.

        Raises:
            ProcessingError: If update fails or memory not found.
        """
        try:
            # Infer memory type from HRID if not provided
            if memory_type is None:
                memory_type = hrid.split("_")[0].lower()

            # Get existing UUID to preserve relationships
            uuid = self.hrid_tracker.get_uuid(hrid, user_id)

            # Get current memory data from Qdrant to merge with updates
            current_point = self.qdrant.get_point(uuid, collection)
            if not current_point:
                raise ProcessingError(
                    f"Memory not found for HRID {hrid}",
                    operation="update_memory",
                    context={"hrid": hrid, "user_id": user_id},
                )

            # Merge current payload with updates
            current_payload = current_point.get("payload", {})
            # Remove system fields from current payload to get user fields only
            user_fields = {
                k: v
                for k, v in current_payload.items()
                if k
                not in (
                    "id",
                    "user_id",
                    "memory_type",
                    "created_at",
                    "updated_at",
                    "hrid",
                )
            }

            # Merge updates into user fields
            updated_user_payload = {**user_fields, **payload_updates}

            # Validate merged payload against YAML schema
            memory = self.yaml_translator.create_memory_from_yaml(
                memory_type, updated_user_payload, user_id
            )
            memory.id = uuid  # Preserve existing UUID for relationships

            # Update timestamps - preserve created_at, update updated_at
            memory.created_at = datetime.fromisoformat(current_payload.get("created_at"))
            memory.updated_at = datetime.now(UTC)

            # Get anchor text for vector update
            anchor_text = self.yaml_translator.build_anchor_text(memory)
            if not anchor_text:
                raise ProcessingError(
                    f"Empty anchor text for memory type '{memory_type}'",
                    operation="update_memory",
                    context={"memory_id": memory.id, "memory_type": memory_type},
                )

            # Generate new embedding from updated anchor text
            vector = self.embedder.get_embedding(anchor_text)

            # Create updated flat payload for Qdrant
            flat_payload = {
                "user_id": memory.user_id,
                "memory_type": memory.memory_type,
                "created_at": memory.created_at.isoformat(),
                "updated_at": memory.updated_at.isoformat(),
                "hrid": hrid,  # Preserve HRID
                **memory.payload,  # Updated and validated payload
            }

            # Update Qdrant point (upsert with same UUID)
            success, _point_id = self.qdrant.add_point(
                vector=vector,
                payload=flat_payload,
                point_id=memory.id,  # Same UUID preserves relationships
                collection=collection,
            )
            if not success:
                raise ProcessingError(
                    "Failed to update memory in vector storage",
                    operation="update_memory",
                    context={"memory_id": memory.id, "hrid": hrid},
                )

            # Update Kuzu node (need to implement update_node method)
            kuzu_data = {
                "id": memory.id,
                "user_id": memory.user_id,
                "memory_type": memory.memory_type,
                "created_at": memory.created_at.isoformat(),
                "updated_at": memory.updated_at.isoformat(),
                **memory.payload,
            }

            # Update Kuzu node using efficient update_node method
            # This preserves relationships and is more efficient than delete+add
            success = self.kuzu.update_node(memory_type, uuid, kuzu_data, user_id)
            if not success:
                raise ProcessingError(
                    "Failed to update memory in graph storage - memory not found",
                    operation="update_memory",
                    context={"memory_id": uuid, "hrid": hrid, "user_id": user_id},
                )

            return True

        except Exception as e:
            if isinstance(e, ProcessingError):
                raise
            raise ProcessingError(
                "Failed to update memory",
                operation="update_memory",
                context={"hrid": hrid, "user_id": user_id, "memory_type": memory_type},
                original_error=e,
            ) from e

    def add_relationship(
        self,
        from_memory_hrid: str,
        to_memory_hrid: str,
        relation_type: str,
        from_memory_type: str,
        to_memory_type: str,
        user_id: str,
        properties: dict[str, Any] | None = None,
    ) -> None:
        """Add a relationship between two memories using HRIDs.

        Args:
            from_memory_hrid: Source memory HRID.
            to_memory_hrid: Target memory HRID.
            relation_type: Relationship type from YAML schema (e.g., 'ANNOTATES').
            from_memory_type: Source memory entity type.
            to_memory_type: Target memory entity type.
            user_id: User ID for ownership verification.
            properties: Optional relationship properties.

        Raises:
            ProcessingError: If relationship creation fails.
        """
        try:
            # Translate HRIDs to UUIDs
            from_uuid = self.hrid_tracker.get_uuid(from_memory_hrid, user_id)
            to_uuid = self.hrid_tracker.get_uuid(to_memory_hrid, user_id)

            self.kuzu.add_relationship(
                from_table=from_memory_type,
                to_table=to_memory_type,
                rel_type=relation_type,
                from_id=from_uuid,
                to_id=to_uuid,
                user_id=user_id,
                props=properties or {},
            )
        except Exception as e:
            raise ProcessingError(
                "Failed to add relationship",
                operation="add_relationship",
                context={
                    "from_hrid": from_memory_hrid,
                    "to_hrid": to_memory_hrid,
                    "relation_type": relation_type,
                },
                original_error=e,
            ) from e

    def delete_relationship(
        self,
        from_memory_hrid: str,
        to_memory_hrid: str,
        relation_type: str,
        from_memory_type: str | None = None,
        to_memory_type: str | None = None,
        user_id: str | None = None,
    ) -> bool:
        """Delete a relationship between two memories using HRIDs.

        Args:
            from_memory_hrid: Source memory HRID.
            to_memory_hrid: Target memory HRID.
            relation_type: Relationship type from YAML schema (e.g., 'ANNOTATES').
            from_memory_type: Source memory entity type (inferred from HRID if not provided).
            to_memory_type: Target memory entity type (inferred from HRID if not provided).
            user_id: User ID for ownership verification (required).

        Returns:
            bool: True if deletion succeeded, False if relationship not found.

        Raises:
            ProcessingError: If relationship deletion fails or parameters invalid.
        """
        try:
            # Validate required user_id
            if not user_id:
                raise ProcessingError(
                    "user_id is required for relationship deletion",
                    operation="delete_relationship",
                    context={"from_hrid": from_memory_hrid, "to_hrid": to_memory_hrid},
                )

            # Infer memory types from HRIDs if not provided
            if from_memory_type is None:
                from_memory_type = from_memory_hrid.split("_")[0].lower()
            if to_memory_type is None:
                to_memory_type = to_memory_hrid.split("_")[0].lower()

            # Translate HRIDs to UUIDs
            from_uuid = self.hrid_tracker.get_uuid(from_memory_hrid, user_id)
            to_uuid = self.hrid_tracker.get_uuid(to_memory_hrid, user_id)

            # Delete relationship using Kuzu interface
            return self.kuzu.delete_relationship(
                from_table=from_memory_type,
                to_table=to_memory_type,
                rel_type=relation_type,
                from_id=from_uuid,
                to_id=to_uuid,
                user_id=user_id,
            )

        except Exception as e:
            if isinstance(e, ProcessingError):
                raise
            raise ProcessingError(
                "Failed to delete relationship",
                operation="delete_relationship",
                context={
                    "from_hrid": from_memory_hrid,
                    "to_hrid": to_memory_hrid,
                    "relation_type": relation_type,
                },
                original_error=e,
            ) from e

    def delete_memory(self, memory_hrid: str, memory_type: str, user_id: str) -> bool:
        """Delete a memory from both storages using HRID.

        Args:
            memory_hrid: Memory HRID to delete.
            memory_type: Memory entity type.
            user_id: User ID for ownership verification.

        Returns:
            bool: True if deletion succeeded.
        """
        try:
            # Translate HRID to UUID
            uuid = self.hrid_tracker.get_uuid(memory_hrid, user_id)

            # Delete from Qdrant (with user ownership verification)
            qdrant_success = self.qdrant.delete_points([uuid], user_id)

            # Delete from Kuzu (with user_id verification)
            kuzu_success = self.kuzu.delete_node(memory_type, uuid, user_id)

            # Mark HRID as deleted (soft delete in mapping)
            if qdrant_success and kuzu_success:
                self.hrid_tracker.mark_deleted(memory_hrid)

            return qdrant_success and kuzu_success

        except Exception as e:
            raise ProcessingError(
                "Failed to delete memory",
                operation="delete_memory",
                context={"memory_hrid": memory_hrid, "memory_type": memory_type},
                original_error=e,
            ) from e

__init__(db_clients)

Initialize MemoryService with DatabaseClients.

Parameters:

Name Type Description Default
db_clients

DatabaseClients instance (after init_dbs() called).

required
Source code in src/memg_core/core/pipelines/indexer.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def __init__(self, db_clients):
    """Initialize MemoryService with DatabaseClients.

    Args:
        db_clients: DatabaseClients instance (after init_dbs() called).
    """
    if not isinstance(db_clients, DatabaseClients):
        raise TypeError("db_clients must be a DatabaseClients instance")

    # Get interfaces from DatabaseClients (reuses DDL-created clients)
    self.qdrant = db_clients.get_qdrant_interface()
    self.kuzu = db_clients.get_kuzu_interface()
    self.embedder = db_clients.get_embedder()
    self.yaml_translator = db_clients.get_yaml_translator()
    self.hrid_tracker = HridTracker(self.kuzu)

add_memory(memory_type, payload, user_id, collection=None)

Add a memory to both graph and vector storage.

Parameters:

Name Type Description Default
memory_type str

Entity type from YAML schema (e.g., 'task', 'note').

required
payload dict[str, Any]

Memory data conforming to YAML schema.

required
user_id str

Owner of the memory.

required
collection str | None

Optional Qdrant collection override.

None

Returns:

Name Type Description
str str

Memory HRID (Human-readable ID string).

Raises:

Type Description
ProcessingError

If validation fails or storage operations fail.

Source code in src/memg_core/core/pipelines/indexer.py
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def add_memory(
    self,
    memory_type: str,
    payload: dict[str, Any],
    user_id: str,
    collection: str | None = None,
) -> str:
    """Add a memory to both graph and vector storage.

    Args:
        memory_type: Entity type from YAML schema (e.g., 'task', 'note').
        payload: Memory data conforming to YAML schema.
        user_id: Owner of the memory.
        collection: Optional Qdrant collection override.

    Returns:
        str: Memory HRID (Human-readable ID string).

    Raises:
        ProcessingError: If validation fails or storage operations fail.
    """
    try:
        # Create and validate memory from YAML schema using our instance
        memory = self.yaml_translator.create_memory_from_yaml(memory_type, payload, user_id)

        # Stamp timestamps
        now = datetime.now(UTC)
        if not memory.created_at:
            memory.created_at = now
        memory.updated_at = now

        # Generate HRID using tracker
        hrid = generate_hrid(memory_type, user_id, self.hrid_tracker)

        # Get anchor text from YAML-defined anchor field using our instance
        anchor_text = self.yaml_translator.build_anchor_text(memory)
        if not anchor_text:
            raise ProcessingError(
                f"Empty anchor text for memory type '{memory_type}'",
                operation="add_memory",
                context={"memory_id": memory.id, "memory_type": memory_type},
            )

        # Generate embedding from anchor text
        vector = self.embedder.get_embedding(anchor_text)

        # Create complete flat payload for Qdrant (includes system fields for filtering)
        flat_payload = {
            "user_id": memory.user_id,  # Required for user filtering
            "memory_type": memory.memory_type,  # Required for type filtering
            "created_at": memory.created_at.isoformat(),  # Required for time filtering
            "updated_at": memory.updated_at.isoformat(),  # Required for time filtering
            "hrid": hrid,  # Include HRID for user-facing operations
            **memory.payload,  # Include all YAML-validated entity fields
        }

        # Add to Qdrant (vector storage) with complete payload
        success, _point_id = self.qdrant.add_point(
            vector=vector,
            payload=flat_payload,  # Complete flat payload with system + entity fields
            point_id=memory.id,
            collection=collection,
        )
        if not success:
            raise ProcessingError(
                "Failed to add memory to vector storage",
                operation="add_memory",
                context={"memory_id": memory.id},
            )

        # Add to Kuzu (graph storage) - use entity-specific table
        kuzu_data = {
            "id": memory.id,
            "user_id": memory.user_id,
            "memory_type": memory.memory_type,
            "created_at": memory.created_at.isoformat(),
            "updated_at": memory.updated_at.isoformat(),
            **memory.payload,  # Include all YAML-validated fields
        }
        self.kuzu.add_node(memory_type, kuzu_data)

        # Create HRID mapping after successful storage
        self.hrid_tracker.create_mapping(hrid, memory.id, memory_type, user_id)

        return hrid  # Return HRID, not UUID

    except Exception as e:
        if isinstance(e, ProcessingError):
            raise
        raise ProcessingError(
            "Failed to add memory",
            operation="add_memory",
            context={"memory_type": memory_type, "user_id": user_id},
            original_error=e,
        ) from e

add_relationship(from_memory_hrid, to_memory_hrid, relation_type, from_memory_type, to_memory_type, user_id, properties=None)

Add a relationship between two memories using HRIDs.

Parameters:

Name Type Description Default
from_memory_hrid str

Source memory HRID.

required
to_memory_hrid str

Target memory HRID.

required
relation_type str

Relationship type from YAML schema (e.g., 'ANNOTATES').

required
from_memory_type str

Source memory entity type.

required
to_memory_type str

Target memory entity type.

required
user_id str

User ID for ownership verification.

required
properties dict[str, Any] | None

Optional relationship properties.

None

Raises:

Type Description
ProcessingError

If relationship creation fails.

Source code in src/memg_core/core/pipelines/indexer.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
def add_relationship(
    self,
    from_memory_hrid: str,
    to_memory_hrid: str,
    relation_type: str,
    from_memory_type: str,
    to_memory_type: str,
    user_id: str,
    properties: dict[str, Any] | None = None,
) -> None:
    """Add a relationship between two memories using HRIDs.

    Args:
        from_memory_hrid: Source memory HRID.
        to_memory_hrid: Target memory HRID.
        relation_type: Relationship type from YAML schema (e.g., 'ANNOTATES').
        from_memory_type: Source memory entity type.
        to_memory_type: Target memory entity type.
        user_id: User ID for ownership verification.
        properties: Optional relationship properties.

    Raises:
        ProcessingError: If relationship creation fails.
    """
    try:
        # Translate HRIDs to UUIDs
        from_uuid = self.hrid_tracker.get_uuid(from_memory_hrid, user_id)
        to_uuid = self.hrid_tracker.get_uuid(to_memory_hrid, user_id)

        self.kuzu.add_relationship(
            from_table=from_memory_type,
            to_table=to_memory_type,
            rel_type=relation_type,
            from_id=from_uuid,
            to_id=to_uuid,
            user_id=user_id,
            props=properties or {},
        )
    except Exception as e:
        raise ProcessingError(
            "Failed to add relationship",
            operation="add_relationship",
            context={
                "from_hrid": from_memory_hrid,
                "to_hrid": to_memory_hrid,
                "relation_type": relation_type,
            },
            original_error=e,
        ) from e

delete_memory(memory_hrid, memory_type, user_id)

Delete a memory from both storages using HRID.

Parameters:

Name Type Description Default
memory_hrid str

Memory HRID to delete.

required
memory_type str

Memory entity type.

required
user_id str

User ID for ownership verification.

required

Returns:

Name Type Description
bool bool

True if deletion succeeded.

Source code in src/memg_core/core/pipelines/indexer.py
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
def delete_memory(self, memory_hrid: str, memory_type: str, user_id: str) -> bool:
    """Delete a memory from both storages using HRID.

    Args:
        memory_hrid: Memory HRID to delete.
        memory_type: Memory entity type.
        user_id: User ID for ownership verification.

    Returns:
        bool: True if deletion succeeded.
    """
    try:
        # Translate HRID to UUID
        uuid = self.hrid_tracker.get_uuid(memory_hrid, user_id)

        # Delete from Qdrant (with user ownership verification)
        qdrant_success = self.qdrant.delete_points([uuid], user_id)

        # Delete from Kuzu (with user_id verification)
        kuzu_success = self.kuzu.delete_node(memory_type, uuid, user_id)

        # Mark HRID as deleted (soft delete in mapping)
        if qdrant_success and kuzu_success:
            self.hrid_tracker.mark_deleted(memory_hrid)

        return qdrant_success and kuzu_success

    except Exception as e:
        raise ProcessingError(
            "Failed to delete memory",
            operation="delete_memory",
            context={"memory_hrid": memory_hrid, "memory_type": memory_type},
            original_error=e,
        ) from e

delete_relationship(from_memory_hrid, to_memory_hrid, relation_type, from_memory_type=None, to_memory_type=None, user_id=None)

Delete a relationship between two memories using HRIDs.

Parameters:

Name Type Description Default
from_memory_hrid str

Source memory HRID.

required
to_memory_hrid str

Target memory HRID.

required
relation_type str

Relationship type from YAML schema (e.g., 'ANNOTATES').

required
from_memory_type str | None

Source memory entity type (inferred from HRID if not provided).

None
to_memory_type str | None

Target memory entity type (inferred from HRID if not provided).

None
user_id str | None

User ID for ownership verification (required).

None

Returns:

Name Type Description
bool bool

True if deletion succeeded, False if relationship not found.

Raises:

Type Description
ProcessingError

If relationship deletion fails or parameters invalid.

Source code in src/memg_core/core/pipelines/indexer.py
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
def delete_relationship(
    self,
    from_memory_hrid: str,
    to_memory_hrid: str,
    relation_type: str,
    from_memory_type: str | None = None,
    to_memory_type: str | None = None,
    user_id: str | None = None,
) -> bool:
    """Delete a relationship between two memories using HRIDs.

    Args:
        from_memory_hrid: Source memory HRID.
        to_memory_hrid: Target memory HRID.
        relation_type: Relationship type from YAML schema (e.g., 'ANNOTATES').
        from_memory_type: Source memory entity type (inferred from HRID if not provided).
        to_memory_type: Target memory entity type (inferred from HRID if not provided).
        user_id: User ID for ownership verification (required).

    Returns:
        bool: True if deletion succeeded, False if relationship not found.

    Raises:
        ProcessingError: If relationship deletion fails or parameters invalid.
    """
    try:
        # Validate required user_id
        if not user_id:
            raise ProcessingError(
                "user_id is required for relationship deletion",
                operation="delete_relationship",
                context={"from_hrid": from_memory_hrid, "to_hrid": to_memory_hrid},
            )

        # Infer memory types from HRIDs if not provided
        if from_memory_type is None:
            from_memory_type = from_memory_hrid.split("_")[0].lower()
        if to_memory_type is None:
            to_memory_type = to_memory_hrid.split("_")[0].lower()

        # Translate HRIDs to UUIDs
        from_uuid = self.hrid_tracker.get_uuid(from_memory_hrid, user_id)
        to_uuid = self.hrid_tracker.get_uuid(to_memory_hrid, user_id)

        # Delete relationship using Kuzu interface
        return self.kuzu.delete_relationship(
            from_table=from_memory_type,
            to_table=to_memory_type,
            rel_type=relation_type,
            from_id=from_uuid,
            to_id=to_uuid,
            user_id=user_id,
        )

    except Exception as e:
        if isinstance(e, ProcessingError):
            raise
        raise ProcessingError(
            "Failed to delete relationship",
            operation="delete_relationship",
            context={
                "from_hrid": from_memory_hrid,
                "to_hrid": to_memory_hrid,
                "relation_type": relation_type,
            },
            original_error=e,
        ) from e

update_memory(hrid, payload_updates, user_id, memory_type=None, collection=None)

Update memory with partial payload changes (patch-style update).

Parameters:

Name Type Description Default
hrid str

Memory HRID to update.

required
payload_updates dict[str, Any]

Dictionary of fields to update (only changed fields).

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint (inferred from HRID if not provided).

None
collection str | None

Optional Qdrant collection override.

None

Returns:

Name Type Description
bool bool

True if update succeeded.

Raises:

Type Description
ProcessingError

If update fails or memory not found.

Source code in src/memg_core/core/pipelines/indexer.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def update_memory(
    self,
    hrid: str,
    payload_updates: dict[str, Any],
    user_id: str,
    memory_type: str | None = None,
    collection: str | None = None,
) -> bool:
    """Update memory with partial payload changes (patch-style update).

    Args:
        hrid: Memory HRID to update.
        payload_updates: Dictionary of fields to update (only changed fields).
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint (inferred from HRID if not provided).
        collection: Optional Qdrant collection override.

    Returns:
        bool: True if update succeeded.

    Raises:
        ProcessingError: If update fails or memory not found.
    """
    try:
        # Infer memory type from HRID if not provided
        if memory_type is None:
            memory_type = hrid.split("_")[0].lower()

        # Get existing UUID to preserve relationships
        uuid = self.hrid_tracker.get_uuid(hrid, user_id)

        # Get current memory data from Qdrant to merge with updates
        current_point = self.qdrant.get_point(uuid, collection)
        if not current_point:
            raise ProcessingError(
                f"Memory not found for HRID {hrid}",
                operation="update_memory",
                context={"hrid": hrid, "user_id": user_id},
            )

        # Merge current payload with updates
        current_payload = current_point.get("payload", {})
        # Remove system fields from current payload to get user fields only
        user_fields = {
            k: v
            for k, v in current_payload.items()
            if k
            not in (
                "id",
                "user_id",
                "memory_type",
                "created_at",
                "updated_at",
                "hrid",
            )
        }

        # Merge updates into user fields
        updated_user_payload = {**user_fields, **payload_updates}

        # Validate merged payload against YAML schema
        memory = self.yaml_translator.create_memory_from_yaml(
            memory_type, updated_user_payload, user_id
        )
        memory.id = uuid  # Preserve existing UUID for relationships

        # Update timestamps - preserve created_at, update updated_at
        memory.created_at = datetime.fromisoformat(current_payload.get("created_at"))
        memory.updated_at = datetime.now(UTC)

        # Get anchor text for vector update
        anchor_text = self.yaml_translator.build_anchor_text(memory)
        if not anchor_text:
            raise ProcessingError(
                f"Empty anchor text for memory type '{memory_type}'",
                operation="update_memory",
                context={"memory_id": memory.id, "memory_type": memory_type},
            )

        # Generate new embedding from updated anchor text
        vector = self.embedder.get_embedding(anchor_text)

        # Create updated flat payload for Qdrant
        flat_payload = {
            "user_id": memory.user_id,
            "memory_type": memory.memory_type,
            "created_at": memory.created_at.isoformat(),
            "updated_at": memory.updated_at.isoformat(),
            "hrid": hrid,  # Preserve HRID
            **memory.payload,  # Updated and validated payload
        }

        # Update Qdrant point (upsert with same UUID)
        success, _point_id = self.qdrant.add_point(
            vector=vector,
            payload=flat_payload,
            point_id=memory.id,  # Same UUID preserves relationships
            collection=collection,
        )
        if not success:
            raise ProcessingError(
                "Failed to update memory in vector storage",
                operation="update_memory",
                context={"memory_id": memory.id, "hrid": hrid},
            )

        # Update Kuzu node (need to implement update_node method)
        kuzu_data = {
            "id": memory.id,
            "user_id": memory.user_id,
            "memory_type": memory.memory_type,
            "created_at": memory.created_at.isoformat(),
            "updated_at": memory.updated_at.isoformat(),
            **memory.payload,
        }

        # Update Kuzu node using efficient update_node method
        # This preserves relationships and is more efficient than delete+add
        success = self.kuzu.update_node(memory_type, uuid, kuzu_data, user_id)
        if not success:
            raise ProcessingError(
                "Failed to update memory in graph storage - memory not found",
                operation="update_memory",
                context={"memory_id": uuid, "hrid": hrid, "user_id": user_id},
            )

        return True

    except Exception as e:
        if isinstance(e, ProcessingError):
            raise
        raise ProcessingError(
            "Failed to update memory",
            operation="update_memory",
            context={"hrid": hrid, "user_id": user_id, "memory_type": memory_type},
            original_error=e,
        ) from e

SearchResult

Bases: BaseModel

Search result with explicit seed/neighbor separation.

Attributes:

Name Type Description
memories list[MemorySeed]

List of memory seeds with full payloads and relationships.

neighbors list[MemoryNeighbor]

List of memory neighbors with anchor-only payloads.

Source code in src/memg_core/core/models.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
class SearchResult(BaseModel):
    """Search result with explicit seed/neighbor separation.

    Attributes:
        memories: List of memory seeds with full payloads and relationships.
        neighbors: List of memory neighbors with anchor-only payloads.
    """

    memories: list[MemorySeed] = Field(
        default_factory=list, description="Memory seeds with full payloads"
    )
    neighbors: list[MemoryNeighbor] = Field(
        default_factory=list, description="Memory neighbors with anchor payloads"
    )

SearchService

Unified search service - handles all search and retrieval operations.

Provides GraphRAG search functionality using DatabaseClients for interface access. Eliminates the need to pass interfaces as function parameters.

Attributes:

Name Type Description
qdrant

Qdrant interface instance.

kuzu

Kuzu interface instance.

embedder

Embedder instance.

yaml_translator

YAML translator instance.

hrid_tracker

HRID tracker instance.

Source code in src/memg_core/core/pipelines/retrieval.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
class SearchService:
    """Unified search service - handles all search and retrieval operations.

    Provides GraphRAG search functionality using DatabaseClients for interface access.
    Eliminates the need to pass interfaces as function parameters.

    Attributes:
        qdrant: Qdrant interface instance.
        kuzu: Kuzu interface instance.
        embedder: Embedder instance.
        yaml_translator: YAML translator instance.
        hrid_tracker: HRID tracker instance.
    """

    def __init__(self, db_clients):
        """Initialize SearchService with DatabaseClients.

        Args:
            db_clients: DatabaseClients instance (after init_dbs() called).
        """
        if not isinstance(db_clients, DatabaseClients):
            raise TypeError("db_clients must be a DatabaseClients instance")

        # Get interfaces from DatabaseClients (reuses DDL-created clients)
        self.qdrant = db_clients.get_qdrant_interface()
        self.kuzu = db_clients.get_kuzu_interface()
        self.embedder = db_clients.get_embedder()
        self.yaml_translator = db_clients.get_yaml_translator()
        self.hrid_tracker = HridTracker(self.kuzu)
        self.config = get_config()

    def search(
        self,
        query: str,
        user_id: str,
        limit: int = 5,
        *,
        memory_type: str | None = None,
        relation_names: list[str] | None = None,
        neighbor_limit: int = 5,
        hops: int = 1,
        include_semantic: bool = True,
        include_details: str = "self",
        modified_within_days: int | None = None,
        filters: dict[str, Any] | None = None,
        projection: dict[str, list[str]] | None = None,
        score_threshold: float | None = None,
        decay_threshold: float | None = None,
    ) -> SearchResult:
        """GraphRAG search: vector seeds → graph expansion → semantic enhancement.

        This method encapsulates the graph_rag_search logic as a class method,
        eliminating the need to pass interfaces as parameters.

        Args:
            query: Search query text (required).
            user_id: User ID for filtering (required).
            limit: Maximum results to return (default: 5).
            memory_type: Optional memory type filter.
            relation_names: Specific relations to expand (None = all relations).
            neighbor_limit: Max neighbors per seed (default: 5).
            hops: Number of graph hops to expand (default: 1).
            include_semantic: Enable semantic expansion via see_also (default: True).
            include_details: "self" (full payload) or "none" (anchor only) for seeds.
            modified_within_days: Filter by recency (e.g., last 7 days).
            filters: Custom field-based filtering (e.g., {"project": "memg-core"}).
            projection: Control which fields to return per memory type.
            score_threshold: Minimum similarity score threshold (0.0-1.0).
            decay_threshold: Minimum neighbor relevance threshold (0.0-1.0).

        Returns:
            SearchResult: Search result with explicit seed/neighbor separation.
        """
        if not query or not query.strip():
            return SearchResult()

        # 1. Get seeds from Qdrant vector search
        query_vector = self.embedder.get_embedding(query)

        # Build filters for Qdrant
        qdrant_filters = self._build_qdrant_filters(
            user_id=user_id,
            memory_type=memory_type,
            modified_within_days=modified_within_days,
            extra_filters=filters,
        )

        # Search Qdrant for vector seeds
        vector_points = self.qdrant.search_points(
            vector=query_vector,
            limit=limit,
            filters=qdrant_filters,  # user_id already included by _build_qdrant_filters
            score_threshold=score_threshold,
        )

        # Convert Qdrant points to SearchResult seeds
        seeds: list[MemorySeed] = []
        for point in vector_points:
            payload = point["payload"]
            point_id = point["id"]

            # Use centralized utility for Memory construction
            memory = build_memory_from_flat_payload(point_id, payload, self.hrid_tracker)

            # Project seed payload based on include_details and projection
            memory.payload = _project_payload(
                memory.memory_type,
                memory.payload,
                include_details=include_details,
                projection=projection,
                yaml_translator=self.yaml_translator,
            )

            seed_result = MemorySeed(
                hrid=memory.hrid or memory.id,
                memory_type=memory.memory_type,
                payload=memory.payload,
                score=float(point["score"]),
                relationships=[],  # Will be populated by graph expansion
            )

            seeds.append(seed_result)

        if not seeds:
            return SearchResult()

        # 2. Graph expansion (neighbors with anchor-only payloads)
        neighbors: list[MemoryNeighbor] = []
        if hops > 0:
            graph_result = _append_neighbors(
                seeds=seeds,
                kuzu=self.kuzu,
                user_id=user_id,
                relation_names=relation_names,
                neighbor_limit=neighbor_limit,
                hops=hops,
                projection=projection,
                hrid_tracker=self.hrid_tracker,
                yaml_translator=self.yaml_translator,
                decay_rate=self.config.memg.decay_rate,
            )
            # Seeds are now modified with relationships, neighbors are in graph_result.neighbors
            neighbors.extend(graph_result.neighbors)

        # 3. Semantic expansion (optional, type-specific "see also")
        if include_semantic:
            semantic_neighbors = _find_semantic_expansion(
                seeds=seeds,  # Only expand from original seeds, not neighbors
                qdrant=self.qdrant,
                embedder=self.embedder,
                user_id=user_id,
                projection=projection,
                hrid_tracker=self.hrid_tracker,
                yaml_translator=self.yaml_translator,
            )
            neighbors.extend(semantic_neighbors)

        # Compose final SearchResult with seeds and neighbors
        return SearchResult(
            memories=seeds,
            neighbors=neighbors,
        )

    def _build_qdrant_filters(
        self,
        user_id: str,
        memory_type: str | None,
        modified_within_days: int | None,
        extra_filters: dict[str, Any] | None,
    ) -> dict[str, Any]:
        """Build Qdrant filters from parameters with mandatory user isolation.

        Args:
            user_id: User ID for filtering (CRITICAL: included in filters dict).
            memory_type: Optional memory type filter.
            modified_within_days: Filter by recency (days).
            extra_filters: Additional custom filters.

        Returns:
            dict[str, Any]: Combined filters dictionary for Qdrant with user_id always included.

        Note:
            user_id is now included in filters dict for security validation.
        """
        # CRITICAL SECURITY: Always start with user_id
        filters: dict[str, Any] = {"user_id": user_id}

        # Add extra filters (user_id will be overridden if present, which is fine)
        if extra_filters:
            filters.update(extra_filters)

        # memory_type filter - use flat structure
        if memory_type:
            filters["memory_type"] = memory_type

        # Time-based filtering - use flat structure
        if modified_within_days and modified_within_days > 0:
            cutoff_date = datetime.now(UTC) - timedelta(days=modified_within_days)
            filters["updated_at_from"] = cutoff_date.isoformat()

        return filters

    def get_memory(
        self,
        hrid: str,
        user_id: str,
        memory_type: str | None = None,
        collection: str | None = None,
    ) -> dict[str, Any] | None:
        """Get a single memory by HRID.

        Args:
            hrid: Human-readable identifier of the memory.
            user_id: User ID for ownership verification.
            memory_type: Optional memory type hint (inferred from HRID if not provided).
            collection: Optional Qdrant collection override.

        Returns:
            dict[str, Any] | None: Memory data with full payload, or None if not found.
        """
        try:
            # Infer memory type from HRID if not provided
            if memory_type is None:
                memory_type = hrid.split("_")[0].lower()

            # Get UUID from HRID
            uuid = self.hrid_tracker.get_uuid(hrid, user_id)
            if not uuid:
                return None

            # Get memory data from Qdrant
            point_data = self.qdrant.get_point(uuid, collection)
            if not point_data:
                return None

            # Verify user ownership
            payload = point_data.get("payload", {})
            if payload.get("user_id") != user_id:
                return None

            # Build response with full memory information (HRID-only policy - no UUID exposure)
            memory_data = {
                "hrid": hrid,
                "memory_type": payload.get("memory_type", memory_type),
                "user_id": user_id,
                "created_at": payload.get("created_at"),
                "updated_at": payload.get("updated_at"),
                "payload": {
                    k: v
                    for k, v in payload.items()
                    if k
                    not in (
                        "id",
                        "user_id",
                        "memory_type",
                        "created_at",
                        "updated_at",
                        "hrid",
                    )
                },
            }

            return memory_data

        except (DatabaseError, ValueError, KeyError):
            return None

    def get_memories(
        self,
        user_id: str,
        memory_type: str | None = None,
        filters: dict[str, Any] | None = None,
        limit: int = 50,
        offset: int = 0,
        include_neighbors: bool = False,
        hops: int = 1,
    ) -> list[dict[str, Any]]:
        """Get multiple memories with filtering and optional graph expansion.

        Args:
            user_id: User ID for ownership verification.
            memory_type: Optional memory type filter (e.g., "task", "note").
            filters: Optional field filters (e.g., {"status": "open", "priority": "high"}).
            limit: Maximum number of memories to return (default 50).
            offset: Number of memories to skip for pagination (default 0).
            include_neighbors: Whether to include neighbor nodes via graph traversal.
            hops: Number of hops for neighbor expansion (default 1).

        Returns:
            list[dict[str, Any]]: List of memory data with full payloads.
        """
        try:
            # Use KuzuInterface to get nodes with filtering
            results = self.kuzu.get_nodes(
                user_id=user_id,
                node_type=memory_type,
                filters=filters,
                limit=limit,
                offset=offset,
            )

            # Convert Kuzu results directly to memory data
            memories = []
            for result in results:
                node_data = result.get("node", {})

                # Get HRID from UUID
                uuid = result.get("id")
                hrid = self.hrid_tracker.get_hrid(uuid, user_id) if uuid else None

                if not hrid:
                    continue

                # Filter out UUID fields from payload (consumers should only see HRIDs)
                filtered_payload = {
                    k: v for k, v in node_data.items() if k not in ["id", "_id", "uuid"]
                }

                # Build memory data directly
                memory_data = {
                    "hrid": hrid,
                    "memory_type": result.get("memory_type"),
                    "user_id": user_id,
                    "created_at": result.get("created_at"),
                    "updated_at": result.get("updated_at"),
                    "payload": filtered_payload,
                    "score": 1.0,  # Not from vector search
                    "source": "kuzu_query",
                    "metadata": {"query_type": "get_memories"},
                }

                memories.append(memory_data)

            # Apply graph expansion if requested
            if include_neighbors and memories:
                try:
                    # Convert to MemorySeed objects for graph expansion
                    memory_seeds = []
                    for memory_data in memories:
                        # Create MemorySeed directly from memory_data
                        hrid = memory_data["hrid"]

                        # Create MemorySeed
                        seed = MemorySeed(
                            hrid=hrid,
                            memory_type=memory_data["memory_type"],
                            payload=memory_data["payload"],
                            score=memory_data["score"],
                            relationships=[],  # Will be populated by graph expansion
                        )
                        memory_seeds.append(seed)

                    # Apply graph expansion
                    expanded_result = _append_neighbors(
                        seeds=memory_seeds,
                        kuzu=self.kuzu,
                        user_id=user_id,
                        relation_names=None,  # All relations
                        neighbor_limit=5,  # Default limit
                        hops=hops,
                        projection=None,
                        hrid_tracker=self.hrid_tracker,
                        yaml_translator=self.yaml_translator,
                        decay_rate=self.config.memg.decay_rate,
                    )

                    # Convert back to dict format for API compatibility
                    expanded_memories = []
                    for seed in expanded_result.memories:
                        memory_data = {
                            "hrid": seed.hrid,
                            "memory_type": seed.memory_type,
                            "user_id": user_id,  # Use the user_id parameter
                            "created_at": None,  # Not available in MemorySeed
                            "updated_at": None,  # Not available in MemorySeed
                            "payload": seed.payload,
                            "score": seed.score,
                            "source": "kuzu_query_expanded",
                            "metadata": {"query_type": "get_memories_expanded"},
                            "relationships": [
                                {
                                    "relation_type": rel.relation_type,
                                    "target_hrid": rel.target_hrid,
                                    "scores": rel.scores,
                                }
                                for rel in seed.relationships
                            ],
                        }
                        expanded_memories.append(memory_data)

                    return expanded_memories

                except Exception as e:
                    # If graph expansion fails, fall back to basic memories
                    # Log the actual error with full details for debugging
                    import logging

                    logging.error(
                        f"Graph expansion failed in get_memories(): {type(e).__name__}: {e}",
                        exc_info=True,
                    )
                    # Fall through to return basic memories

            return memories

        except (DatabaseError, ValueError, KeyError) as e:
            # Log the error instead of silently failing
            import logging

            logging.error(f"get_memories() failed: {type(e).__name__}: {e}", exc_info=True)
            return []

    def get_memory_neighbors(
        self,
        memory_id: str,
        memory_type: str,
        user_id: str,
        relation_types: list[str] | None = None,
        direction: str = "any",
        limit: int = 10,
    ) -> list[dict[str, Any]]:
        """Get related memories through graph relationships.

        Args:
            memory_id: Memory ID to find neighbors for
            memory_type: Memory entity type
            user_id: User ID for isolation
            relation_types: Filter by specific relationship types
            direction: "in", "out", or "any"
            limit: Maximum number of neighbors

        Returns:
            List of neighbor memories with relationship info
        """
        try:
            return self.kuzu.neighbors(
                node_label=memory_type,
                node_uuid=memory_id,
                user_id=user_id,
                rel_types=relation_types,
                direction=direction,
                limit=limit,
            )
        except Exception as e:
            raise ProcessingError(
                "Failed to get memory neighbors",
                operation="get_memory_neighbors",
                context={"memory_id": memory_id, "memory_type": memory_type},
                original_error=e,
            ) from e

__init__(db_clients)

Initialize SearchService with DatabaseClients.

Parameters:

Name Type Description Default
db_clients

DatabaseClients instance (after init_dbs() called).

required
Source code in src/memg_core/core/pipelines/retrieval.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def __init__(self, db_clients):
    """Initialize SearchService with DatabaseClients.

    Args:
        db_clients: DatabaseClients instance (after init_dbs() called).
    """
    if not isinstance(db_clients, DatabaseClients):
        raise TypeError("db_clients must be a DatabaseClients instance")

    # Get interfaces from DatabaseClients (reuses DDL-created clients)
    self.qdrant = db_clients.get_qdrant_interface()
    self.kuzu = db_clients.get_kuzu_interface()
    self.embedder = db_clients.get_embedder()
    self.yaml_translator = db_clients.get_yaml_translator()
    self.hrid_tracker = HridTracker(self.kuzu)
    self.config = get_config()

get_memories(user_id, memory_type=None, filters=None, limit=50, offset=0, include_neighbors=False, hops=1)

Get multiple memories with filtering and optional graph expansion.

Parameters:

Name Type Description Default
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type filter (e.g., "task", "note").

None
filters dict[str, Any] | None

Optional field filters (e.g., {"status": "open", "priority": "high"}).

None
limit int

Maximum number of memories to return (default 50).

50
offset int

Number of memories to skip for pagination (default 0).

0
include_neighbors bool

Whether to include neighbor nodes via graph traversal.

False
hops int

Number of hops for neighbor expansion (default 1).

1

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of memory data with full payloads.

Source code in src/memg_core/core/pipelines/retrieval.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
def get_memories(
    self,
    user_id: str,
    memory_type: str | None = None,
    filters: dict[str, Any] | None = None,
    limit: int = 50,
    offset: int = 0,
    include_neighbors: bool = False,
    hops: int = 1,
) -> list[dict[str, Any]]:
    """Get multiple memories with filtering and optional graph expansion.

    Args:
        user_id: User ID for ownership verification.
        memory_type: Optional memory type filter (e.g., "task", "note").
        filters: Optional field filters (e.g., {"status": "open", "priority": "high"}).
        limit: Maximum number of memories to return (default 50).
        offset: Number of memories to skip for pagination (default 0).
        include_neighbors: Whether to include neighbor nodes via graph traversal.
        hops: Number of hops for neighbor expansion (default 1).

    Returns:
        list[dict[str, Any]]: List of memory data with full payloads.
    """
    try:
        # Use KuzuInterface to get nodes with filtering
        results = self.kuzu.get_nodes(
            user_id=user_id,
            node_type=memory_type,
            filters=filters,
            limit=limit,
            offset=offset,
        )

        # Convert Kuzu results directly to memory data
        memories = []
        for result in results:
            node_data = result.get("node", {})

            # Get HRID from UUID
            uuid = result.get("id")
            hrid = self.hrid_tracker.get_hrid(uuid, user_id) if uuid else None

            if not hrid:
                continue

            # Filter out UUID fields from payload (consumers should only see HRIDs)
            filtered_payload = {
                k: v for k, v in node_data.items() if k not in ["id", "_id", "uuid"]
            }

            # Build memory data directly
            memory_data = {
                "hrid": hrid,
                "memory_type": result.get("memory_type"),
                "user_id": user_id,
                "created_at": result.get("created_at"),
                "updated_at": result.get("updated_at"),
                "payload": filtered_payload,
                "score": 1.0,  # Not from vector search
                "source": "kuzu_query",
                "metadata": {"query_type": "get_memories"},
            }

            memories.append(memory_data)

        # Apply graph expansion if requested
        if include_neighbors and memories:
            try:
                # Convert to MemorySeed objects for graph expansion
                memory_seeds = []
                for memory_data in memories:
                    # Create MemorySeed directly from memory_data
                    hrid = memory_data["hrid"]

                    # Create MemorySeed
                    seed = MemorySeed(
                        hrid=hrid,
                        memory_type=memory_data["memory_type"],
                        payload=memory_data["payload"],
                        score=memory_data["score"],
                        relationships=[],  # Will be populated by graph expansion
                    )
                    memory_seeds.append(seed)

                # Apply graph expansion
                expanded_result = _append_neighbors(
                    seeds=memory_seeds,
                    kuzu=self.kuzu,
                    user_id=user_id,
                    relation_names=None,  # All relations
                    neighbor_limit=5,  # Default limit
                    hops=hops,
                    projection=None,
                    hrid_tracker=self.hrid_tracker,
                    yaml_translator=self.yaml_translator,
                    decay_rate=self.config.memg.decay_rate,
                )

                # Convert back to dict format for API compatibility
                expanded_memories = []
                for seed in expanded_result.memories:
                    memory_data = {
                        "hrid": seed.hrid,
                        "memory_type": seed.memory_type,
                        "user_id": user_id,  # Use the user_id parameter
                        "created_at": None,  # Not available in MemorySeed
                        "updated_at": None,  # Not available in MemorySeed
                        "payload": seed.payload,
                        "score": seed.score,
                        "source": "kuzu_query_expanded",
                        "metadata": {"query_type": "get_memories_expanded"},
                        "relationships": [
                            {
                                "relation_type": rel.relation_type,
                                "target_hrid": rel.target_hrid,
                                "scores": rel.scores,
                            }
                            for rel in seed.relationships
                        ],
                    }
                    expanded_memories.append(memory_data)

                return expanded_memories

            except Exception as e:
                # If graph expansion fails, fall back to basic memories
                # Log the actual error with full details for debugging
                import logging

                logging.error(
                    f"Graph expansion failed in get_memories(): {type(e).__name__}: {e}",
                    exc_info=True,
                )
                # Fall through to return basic memories

        return memories

    except (DatabaseError, ValueError, KeyError) as e:
        # Log the error instead of silently failing
        import logging

        logging.error(f"get_memories() failed: {type(e).__name__}: {e}", exc_info=True)
        return []

get_memory(hrid, user_id, memory_type=None, collection=None)

Get a single memory by HRID.

Parameters:

Name Type Description Default
hrid str

Human-readable identifier of the memory.

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint (inferred from HRID if not provided).

None
collection str | None

Optional Qdrant collection override.

None

Returns:

Type Description
dict[str, Any] | None

dict[str, Any] | None: Memory data with full payload, or None if not found.

Source code in src/memg_core/core/pipelines/retrieval.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
def get_memory(
    self,
    hrid: str,
    user_id: str,
    memory_type: str | None = None,
    collection: str | None = None,
) -> dict[str, Any] | None:
    """Get a single memory by HRID.

    Args:
        hrid: Human-readable identifier of the memory.
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint (inferred from HRID if not provided).
        collection: Optional Qdrant collection override.

    Returns:
        dict[str, Any] | None: Memory data with full payload, or None if not found.
    """
    try:
        # Infer memory type from HRID if not provided
        if memory_type is None:
            memory_type = hrid.split("_")[0].lower()

        # Get UUID from HRID
        uuid = self.hrid_tracker.get_uuid(hrid, user_id)
        if not uuid:
            return None

        # Get memory data from Qdrant
        point_data = self.qdrant.get_point(uuid, collection)
        if not point_data:
            return None

        # Verify user ownership
        payload = point_data.get("payload", {})
        if payload.get("user_id") != user_id:
            return None

        # Build response with full memory information (HRID-only policy - no UUID exposure)
        memory_data = {
            "hrid": hrid,
            "memory_type": payload.get("memory_type", memory_type),
            "user_id": user_id,
            "created_at": payload.get("created_at"),
            "updated_at": payload.get("updated_at"),
            "payload": {
                k: v
                for k, v in payload.items()
                if k
                not in (
                    "id",
                    "user_id",
                    "memory_type",
                    "created_at",
                    "updated_at",
                    "hrid",
                )
            },
        }

        return memory_data

    except (DatabaseError, ValueError, KeyError):
        return None

get_memory_neighbors(memory_id, memory_type, user_id, relation_types=None, direction='any', limit=10)

Get related memories through graph relationships.

Parameters:

Name Type Description Default
memory_id str

Memory ID to find neighbors for

required
memory_type str

Memory entity type

required
user_id str

User ID for isolation

required
relation_types list[str] | None

Filter by specific relationship types

None
direction str

"in", "out", or "any"

'any'
limit int

Maximum number of neighbors

10

Returns:

Type Description
list[dict[str, Any]]

List of neighbor memories with relationship info

Source code in src/memg_core/core/pipelines/retrieval.py
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def get_memory_neighbors(
    self,
    memory_id: str,
    memory_type: str,
    user_id: str,
    relation_types: list[str] | None = None,
    direction: str = "any",
    limit: int = 10,
) -> list[dict[str, Any]]:
    """Get related memories through graph relationships.

    Args:
        memory_id: Memory ID to find neighbors for
        memory_type: Memory entity type
        user_id: User ID for isolation
        relation_types: Filter by specific relationship types
        direction: "in", "out", or "any"
        limit: Maximum number of neighbors

    Returns:
        List of neighbor memories with relationship info
    """
    try:
        return self.kuzu.neighbors(
            node_label=memory_type,
            node_uuid=memory_id,
            user_id=user_id,
            rel_types=relation_types,
            direction=direction,
            limit=limit,
        )
    except Exception as e:
        raise ProcessingError(
            "Failed to get memory neighbors",
            operation="get_memory_neighbors",
            context={"memory_id": memory_id, "memory_type": memory_type},
            original_error=e,
        ) from e

search(query, user_id, limit=5, *, memory_type=None, relation_names=None, neighbor_limit=5, hops=1, include_semantic=True, include_details='self', modified_within_days=None, filters=None, projection=None, score_threshold=None, decay_threshold=None)

GraphRAG search: vector seeds → graph expansion → semantic enhancement.

This method encapsulates the graph_rag_search logic as a class method, eliminating the need to pass interfaces as parameters.

Parameters:

Name Type Description Default
query str

Search query text (required).

required
user_id str

User ID for filtering (required).

required
limit int

Maximum results to return (default: 5).

5
memory_type str | None

Optional memory type filter.

None
relation_names list[str] | None

Specific relations to expand (None = all relations).

None
neighbor_limit int

Max neighbors per seed (default: 5).

5
hops int

Number of graph hops to expand (default: 1).

1
include_semantic bool

Enable semantic expansion via see_also (default: True).

True
include_details str

"self" (full payload) or "none" (anchor only) for seeds.

'self'
modified_within_days int | None

Filter by recency (e.g., last 7 days).

None
filters dict[str, Any] | None

Custom field-based filtering (e.g., {"project": "memg-core"}).

None
projection dict[str, list[str]] | None

Control which fields to return per memory type.

None
score_threshold float | None

Minimum similarity score threshold (0.0-1.0).

None
decay_threshold float | None

Minimum neighbor relevance threshold (0.0-1.0).

None

Returns:

Name Type Description
SearchResult SearchResult

Search result with explicit seed/neighbor separation.

Source code in src/memg_core/core/pipelines/retrieval.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def search(
    self,
    query: str,
    user_id: str,
    limit: int = 5,
    *,
    memory_type: str | None = None,
    relation_names: list[str] | None = None,
    neighbor_limit: int = 5,
    hops: int = 1,
    include_semantic: bool = True,
    include_details: str = "self",
    modified_within_days: int | None = None,
    filters: dict[str, Any] | None = None,
    projection: dict[str, list[str]] | None = None,
    score_threshold: float | None = None,
    decay_threshold: float | None = None,
) -> SearchResult:
    """GraphRAG search: vector seeds → graph expansion → semantic enhancement.

    This method encapsulates the graph_rag_search logic as a class method,
    eliminating the need to pass interfaces as parameters.

    Args:
        query: Search query text (required).
        user_id: User ID for filtering (required).
        limit: Maximum results to return (default: 5).
        memory_type: Optional memory type filter.
        relation_names: Specific relations to expand (None = all relations).
        neighbor_limit: Max neighbors per seed (default: 5).
        hops: Number of graph hops to expand (default: 1).
        include_semantic: Enable semantic expansion via see_also (default: True).
        include_details: "self" (full payload) or "none" (anchor only) for seeds.
        modified_within_days: Filter by recency (e.g., last 7 days).
        filters: Custom field-based filtering (e.g., {"project": "memg-core"}).
        projection: Control which fields to return per memory type.
        score_threshold: Minimum similarity score threshold (0.0-1.0).
        decay_threshold: Minimum neighbor relevance threshold (0.0-1.0).

    Returns:
        SearchResult: Search result with explicit seed/neighbor separation.
    """
    if not query or not query.strip():
        return SearchResult()

    # 1. Get seeds from Qdrant vector search
    query_vector = self.embedder.get_embedding(query)

    # Build filters for Qdrant
    qdrant_filters = self._build_qdrant_filters(
        user_id=user_id,
        memory_type=memory_type,
        modified_within_days=modified_within_days,
        extra_filters=filters,
    )

    # Search Qdrant for vector seeds
    vector_points = self.qdrant.search_points(
        vector=query_vector,
        limit=limit,
        filters=qdrant_filters,  # user_id already included by _build_qdrant_filters
        score_threshold=score_threshold,
    )

    # Convert Qdrant points to SearchResult seeds
    seeds: list[MemorySeed] = []
    for point in vector_points:
        payload = point["payload"]
        point_id = point["id"]

        # Use centralized utility for Memory construction
        memory = build_memory_from_flat_payload(point_id, payload, self.hrid_tracker)

        # Project seed payload based on include_details and projection
        memory.payload = _project_payload(
            memory.memory_type,
            memory.payload,
            include_details=include_details,
            projection=projection,
            yaml_translator=self.yaml_translator,
        )

        seed_result = MemorySeed(
            hrid=memory.hrid or memory.id,
            memory_type=memory.memory_type,
            payload=memory.payload,
            score=float(point["score"]),
            relationships=[],  # Will be populated by graph expansion
        )

        seeds.append(seed_result)

    if not seeds:
        return SearchResult()

    # 2. Graph expansion (neighbors with anchor-only payloads)
    neighbors: list[MemoryNeighbor] = []
    if hops > 0:
        graph_result = _append_neighbors(
            seeds=seeds,
            kuzu=self.kuzu,
            user_id=user_id,
            relation_names=relation_names,
            neighbor_limit=neighbor_limit,
            hops=hops,
            projection=projection,
            hrid_tracker=self.hrid_tracker,
            yaml_translator=self.yaml_translator,
            decay_rate=self.config.memg.decay_rate,
        )
        # Seeds are now modified with relationships, neighbors are in graph_result.neighbors
        neighbors.extend(graph_result.neighbors)

    # 3. Semantic expansion (optional, type-specific "see also")
    if include_semantic:
        semantic_neighbors = _find_semantic_expansion(
            seeds=seeds,  # Only expand from original seeds, not neighbors
            qdrant=self.qdrant,
            embedder=self.embedder,
            user_id=user_id,
            projection=projection,
            hrid_tracker=self.hrid_tracker,
            yaml_translator=self.yaml_translator,
        )
        neighbors.extend(semantic_neighbors)

    # Compose final SearchResult with seeds and neighbors
    return SearchResult(
        memories=seeds,
        neighbors=neighbors,
    )

YamlTranslator

Translates YAML schema definitions to Pydantic models for strict validation.

Attributes:

Name Type Description
yaml_path

Path to YAML schema file.

_schema dict[str, Any] | None

Cached schema dictionary.

Source code in src/memg_core/core/yaml_translator.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
class YamlTranslator:
    """Translates YAML schema definitions to Pydantic models for strict validation.

    Attributes:
        yaml_path: Path to YAML schema file.
        _schema: Cached schema dictionary.
    """

    def __init__(self, yaml_path: str | None = None) -> None:
        """Initialize YamlTranslator with YAML schema path.

        Args:
            yaml_path: Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

        Raises:
            YamlTranslatorError: If YAML path not provided or TypeRegistry initialization fails.
        """
        # Require explicit YAML path - no silent defaults
        if yaml_path:
            self.yaml_path = yaml_path
        else:
            env_path = os.getenv("MEMG_YAML_SCHEMA")
            if not env_path:
                raise YamlTranslatorError(
                    "YAML schema path required. Set MEMG_YAML_SCHEMA environment variable "
                    "or provide yaml_path parameter. No defaults allowed."
                )
            self.yaml_path = env_path

        self._schema: dict[str, Any] | None = None
        # NO model cache - TypeRegistry handles all caching

        # Initialize TypeRegistry from YAML - crash early if invalid
        try:
            initialize_types_from_yaml(self.yaml_path)
        except Exception as e:
            raise YamlTranslatorError(f"Failed to initialize TypeRegistry from YAML: {e}") from e

    @property
    def schema(self) -> dict[str, Any]:
        """Get the loaded YAML schema, loading it if necessary."""
        if self._schema is not None:
            return self._schema

        # Load schema from the required path - no fallbacks
        if not self.yaml_path:
            raise YamlTranslatorError(
                "YAML schema path not set. This should not happen after __init__."
            )

        self._schema = self._load_schema()
        return self._schema

    def _load_schema(self) -> dict[str, Any]:
        """Load schema from the current yaml_path."""
        if not self.yaml_path:
            raise YamlTranslatorError("YAML path is None")
        path = Path(self.yaml_path)
        if not path.exists():
            raise YamlTranslatorError(f"YAML schema not found at {path}")
        try:
            with path.open(encoding="utf-8") as f:
                data = yaml.safe_load(f)
            if not data:
                raise YamlTranslatorError("Empty YAML schema")
            if not isinstance(data, dict):
                raise YamlTranslatorError("YAML schema root must be a mapping")
            return data
        except yaml.YAMLError as e:
            raise YamlTranslatorError(f"Invalid YAML syntax: {e}") from e

    def _entities_map(self) -> dict[str, dict[str, Any]]:
        sch = self.schema
        ents = sch.get("entities")
        if not ents:
            return {}
        if isinstance(ents, dict):
            # Normalize keys to lower
            return {str(k).lower(): v for k, v in ents.items()}
        # list form
        out: dict[str, dict[str, Any]] = {}
        for item in ents:
            if not isinstance(item, dict):
                continue
            key = (item.get("name") or item.get("type") or "").lower()
            if key:
                out[key] = item
        return out

    def get_entity_types(self) -> list[str]:
        """Get list of available entity types from YAML schema."""
        return list(self._entities_map().keys())

    # ================== RELATIONSHIP PARSING (TARGET-FIRST FORMAT) ==================

    def _get_relations_mapping_for_entity(
        self, entity_name: str
    ) -> dict[str, list[dict[str, Any]]]:
        """Return raw relations mapping for an entity in target-first schema format.

        The expected YAML shape under an entity is:
            relations:
              target_entity_name:
                - name: ...
                  description: ...
                  predicate: PREDICATE_NAME
                  directed: true|false

        Returns an empty dict when no relations are defined.
        """
        entity_spec = self._resolve_entity_with_inheritance(entity_name)
        relations_section = entity_spec.get("relations")
        if not relations_section or not isinstance(relations_section, dict):
            return {}

        # Normalize keys to lower for targets; keep items as-is
        normalized: dict[str, list[dict[str, Any]]] = {}
        for target_name, items in relations_section.items():
            if not isinstance(items, list):
                # Skip invalid shapes silently at this layer; validation is higher-level
                continue
            normalized[str(target_name).lower()] = [i for i in items if isinstance(i, dict)]
        return normalized

    def get_relations_for_source(self, entity_name: str) -> list[dict[str, Any]]:
        """Get normalized relation specs for a source entity in target-first schema.

        Returns list of dicts with keys:
            - source (str)
            - target (str)
            - name (str | None)
            - description (str | None)
            - predicate (str)
            - directed (bool)
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")

        source_l = entity_name.lower()
        relations_map = self._get_relations_mapping_for_entity(source_l)
        if not relations_map:
            return []

        out: list[dict[str, Any]] = []
        for target_l, items in relations_map.items():
            for item in items:
                predicate = item.get("predicate")
                if not predicate or not isinstance(predicate, str):
                    # Skip invalid entries - strict behavior can be added later
                    continue
                directed = bool(item.get("directed", True))
                out.append(
                    {
                        "source": source_l,
                        "target": target_l,
                        "name": item.get("name"),
                        "description": item.get("description"),
                        "predicate": predicate.upper(),
                        "directed": directed,
                    }
                )
        return out

    @staticmethod
    def relationship_table_name(
        source: str,
        predicate: str,
        target: str,
        *,
        directed: bool = True,  # noqa: unused-argument
    ) -> str:
        """Generate relationship table name.

        For now, table name does not encode direction; direction affects creation/query semantics.
        Canonicalization for undirected pairs can be added here later if decided.
        """
        return f"{str(source).upper()}_{str(predicate).upper()}_{str(target).upper()}"

    def get_labels_for_predicates(
        self,
        source_type: str,
        predicates: list[str] | None,
        neighbor_label: str | None = None,
    ) -> list[str]:
        """Expand predicate names to concrete relationship labels for a given source.

        Args:
            source_type: Source entity type name
            predicates: List of predicate names to include (case-insensitive). If None, include all.
            neighbor_label: Optional target entity type filter (case-insensitive)

        Returns:
            List of concrete relationship labels (table names) matching the filter.
        """
        if not source_type:
            raise YamlTranslatorError("Empty source_type")

        preds_u = set(p.upper() for p in predicates) if predicates else None
        neighbor_l = neighbor_label.lower() if neighbor_label else None

        labels: list[str] = []
        for spec in self.get_relations_for_source(source_type):
            if preds_u is not None and spec["predicate"].upper() not in preds_u:
                continue
            if neighbor_l is not None and spec["target"].lower() != neighbor_l:
                continue
            labels.append(
                self.relationship_table_name(
                    source=spec["source"],
                    predicate=spec["predicate"],
                    target=spec["target"],
                    directed=spec["directed"],
                )
            )
        return labels

    def debug_relation_map(self) -> dict[str, dict[str, list[dict[str, Any]]]]:
        """Return a nested relation map for debugging/printing.

        Structure:
        {
          source: {
            target: [ {name, predicate, directed, description} ... ]
          }
        }
        """
        out: dict[str, dict[str, list[dict[str, Any]]]] = {}
        for source in self.get_entity_types():
            specs = self.get_relations_for_source(source)
            if not specs:
                continue
            if source not in out:
                out[source] = {}
            for spec in specs:
                target = spec["target"]
                out[source].setdefault(target, [])
                out[source][target].append(
                    {
                        "name": spec.get("name"),
                        "predicate": spec.get("predicate"),
                        "directed": spec.get("directed", True),
                        "description": spec.get("description"),
                    }
                )
        return out

    def get_anchor_field(self, entity_name: str) -> str:
        """Get the anchor field name for the given entity type from YAML schema.

        Now reads from vector.anchored_to instead of separate anchor field.

        Args:
            entity_name: Name of the entity type.

        Returns:
            str: Anchor field name.

        Raises:
            YamlTranslatorError: If anchor field not found.
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")

        # Get entity spec with inheritance resolution
        entity_spec = self._resolve_entity_with_inheritance(entity_name)

        # Look for vector field with anchored_to
        fields = entity_spec.get("fields", {})
        for _field_name, field_def in fields.items():
            if isinstance(field_def, dict) and field_def.get("type") == "vector":
                anchored_to = field_def.get("anchored_to")
                if anchored_to:
                    return str(anchored_to)

        raise YamlTranslatorError(
            f"Entity '{entity_name}' has no vector field with 'anchored_to' property"
        )

    def _resolve_entity_with_inheritance(self, entity_name: str) -> dict[str, Any]:
        """Resolve entity specification with full inheritance chain."""
        name_l = entity_name.lower()
        emap = self._entities_map()
        spec_raw = emap.get(name_l)
        if not spec_raw:
            raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

        # If no parent, return as-is
        parent_name = spec_raw.get("parent")
        if not parent_name:
            return spec_raw

        # Recursively resolve parent and merge fields
        parent_spec = self._resolve_entity_with_inheritance(parent_name)

        # Merge parent fields with child fields (child overrides parent)
        merged_fields = parent_spec.get("fields", {}).copy()
        merged_fields.update(spec_raw.get("fields", {}))

        # Create merged spec
        merged_spec = spec_raw.copy()
        merged_spec["fields"] = merged_fields

        return merged_spec

    def get_see_also_config(self, entity_name: str) -> dict[str, Any] | None:
        """Get the see_also configuration for the given entity type from YAML schema.

        Returns:
            Dict with keys: enabled, threshold, limit, target_types
            None if see_also is not configured for this entity
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")
        name_l = entity_name.lower()
        emap = self._entities_map()
        spec_raw = emap.get(name_l)
        if not spec_raw:
            raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

        see_also = spec_raw.get("see_also")
        if not see_also or not isinstance(see_also, dict):
            return None

        # Validate required fields
        if not see_also.get("enabled", False):
            return None

        return {
            "enabled": see_also.get("enabled", False),
            "threshold": float(see_also.get("threshold", 0.7)),
            "limit": int(see_also.get("limit", 3)),
            "target_types": list(see_also.get("target_types", [])),
        }

    def build_anchor_text(self, memory) -> str:
        """Build anchor text for embedding from YAML-defined anchor field.

        NO hardcoded field names - reads anchor field from YAML schema.

        Args:
            memory: Memory object containing payload data.

        Returns:
            str: Anchor text for embedding.

        Raises:
            YamlTranslatorError: If anchor field is missing or invalid.
        """
        mem_type = getattr(memory, "memory_type", None)
        if not mem_type:
            raise YamlTranslatorError(
                "Memory object missing 'memory_type' field",
                operation="build_anchor_text",
            )

        # Get anchor field from YAML schema
        anchor_field = self.get_anchor_field(mem_type)

        # Try to get anchor text from the specified field
        anchor_text = None

        # First check if it's a core field on the Memory object
        if hasattr(memory, anchor_field):
            anchor_text = getattr(memory, anchor_field, None)
        # Otherwise check in the payload
        elif hasattr(memory, "payload") and isinstance(memory.payload, dict):
            anchor_text = memory.payload.get(anchor_field)

        if isinstance(anchor_text, str):
            stripped_text = anchor_text.strip()
            if stripped_text:
                return stripped_text

        # Anchor field missing, empty, or invalid
        raise YamlTranslatorError(
            f"Anchor field '{anchor_field}' is missing, empty, or invalid "
            f"for memory type '{mem_type}'",
            operation="build_anchor_text",
            context={
                "memory_type": mem_type,
                "anchor_field": anchor_field,
                "anchor_value": anchor_text,
            },
        )

    def _fields_contract(self, spec: dict[str, Any]) -> tuple[list[str], list[str]]:
        """Extract required and optional fields from entity specification.

        Supports either:
        - fields: {required:[...], optional:[...]} format
        - Individual field definitions with required flags

        Args:
            spec: Entity specification dictionary.

        Returns:
            tuple[list[str], list[str]]: (required_fields, optional_fields)
        """
        # supports either fields: {required:[...], optional:[...]} OR flat dict
        fields = spec.get("fields") or {}
        if "required" in fields or "optional" in fields:
            req = [str(x) for x in fields.get("required", [])]
            opt = [str(x) for x in fields.get("optional", [])]
            return req, opt

        # Resolve all fields including inherited ones
        all_fields = self._resolve_inherited_fields(spec)

        # Parse individual field definitions for required flag
        required_fields = []
        optional_fields = []

        for field_name, field_def in all_fields.items():
            if isinstance(field_def, dict) and field_def.get("required", False):
                # Skip system fields - they're handled by the system
                if not field_def.get("system", False):
                    required_fields.append(field_name)
                else:
                    optional_fields.append(field_name)
            else:
                optional_fields.append(field_name)

        return required_fields, optional_fields

    def _resolve_inherited_fields(self, spec: dict[str, Any]) -> dict[str, Any]:
        """Resolve all fields including inherited ones from parent entities.

        Args:
            spec: Entity specification dictionary.

        Returns:
            dict[str, Any]: Dictionary containing all fields (inherited + current).
        """
        all_fields = {}
        entities_map = self._entities_map()

        # If entity has a parent, resolve parent fields first
        parent_name = spec.get("parent")
        if parent_name:
            parent_spec = entities_map.get(parent_name.lower())
            if parent_spec:
                # Recursively resolve parent fields
                parent_fields = self._resolve_inherited_fields(parent_spec)
                all_fields.update(parent_fields)

        # Add/override with current entity's fields
        current_fields = spec.get("fields") or {}
        all_fields.update(current_fields)

        return all_fields

    def _get_system_fields(self, spec: dict[str, Any]) -> set[str]:
        """Extract system fields from YAML schema (fields marked with system: true).

        Args:
            spec: Entity specification dictionary.

        Returns:
            set[str]: Set of field names that are marked as system fields.
        """
        system_fields = set()
        all_fields = self._resolve_inherited_fields(spec)

        for field_name, field_def in all_fields.items():
            if isinstance(field_def, dict) and field_def.get("system", False):
                system_fields.add(field_name)

        return system_fields

    def _validate_enum_fields(self, memory_type: str, payload: dict[str, Any]) -> None:
        """Validate enum fields against YAML schema choices.

        Args:
            memory_type: Entity type from YAML schema.
            payload: Memory data to validate.

        Raises:
            YamlTranslatorError: If enum field has invalid value.
        """
        emap = self._entities_map()
        spec = emap.get(memory_type.lower())
        if not spec:
            return  # Entity validation happens elsewhere

        # Get field definitions for this entity type
        fields = spec.get("fields", {})

        # Check each field in the payload
        for field_name, field_value in payload.items():
            if field_name in fields:
                field_def = fields[field_name]

                # Check if this is an enum field
                if field_def.get("type") == "enum":
                    choices = field_def.get("choices", [])

                    # Validate the value against choices
                    if field_value is not None and field_value not in choices:
                        raise YamlTranslatorError(
                            f"Invalid {field_name} value '{field_value}'. Valid choices: {choices}",
                            context={
                                "memory_type": memory_type,
                                "field_name": field_name,
                                "invalid_value": field_value,
                                "valid_choices": choices,
                            },
                        )

    def validate_memory_against_yaml(
        self, memory_type: str, payload: dict[str, Any]
    ) -> dict[str, Any]:
        """Validate memory payload against YAML schema and return cleaned payload."""
        if not memory_type:
            raise YamlTranslatorError("memory_type is required")
        if payload is None:
            raise YamlTranslatorError("payload is required")

        # Strict validation - entity type MUST exist in YAML
        emap = self._entities_map()
        spec = emap.get(memory_type.lower())
        if not spec:
            raise YamlTranslatorError(
                f"Unknown entity type '{memory_type}'. All types must be defined in YAML schema.",
                context={
                    "memory_type": memory_type,
                    "available_types": list(emap.keys()),
                },
            )

        req, _opt = self._fields_contract(spec)
        missing = [k for k in req if not payload.get(k)]
        if missing:
            raise YamlTranslatorError(
                f"Missing required fields: {missing}",
                context={"memory_type": memory_type},
            )

        # Validate enum fields against YAML schema choices
        self._validate_enum_fields(memory_type, payload)

        # Validate that all fields are defined in YAML schema
        req, opt = self._fields_contract(spec)
        valid_fields = set(req + opt)
        system_fields = self._get_system_fields(spec)
        invalid_fields = set(payload.keys()) - valid_fields - system_fields
        if invalid_fields:
            raise YamlTranslatorError(
                f"Invalid fields not defined in schema: {sorted(invalid_fields)}",
                context={
                    "memory_type": memory_type,
                    "valid_fields": sorted(valid_fields),
                    "invalid_fields": sorted(invalid_fields),
                },
            )

        # Strip system-reserved fields if present
        cleaned = dict(payload)
        for syskey in system_fields:
            cleaned.pop(syskey, None)
        return cleaned

    def create_memory_from_yaml(self, memory_type: str, payload: dict[str, Any], user_id: str):
        """Create a Memory object from YAML-validated payload."""

        # Get anchor field from YAML schema
        anchor_field = self.get_anchor_field(memory_type)

        # Extract anchor text from payload
        anchor_text = payload.get(anchor_field)
        if not anchor_text or not isinstance(anchor_text, str):
            raise YamlTranslatorError(
                f"Missing or invalid anchor field '{anchor_field}' in payload "
                f"for memory type '{memory_type}'"
            )

        # Validate full payload against YAML schema
        validated_payload = self.validate_memory_against_yaml(memory_type, payload)

        # Construct Memory with YAML-defined payload only
        return Memory(
            memory_type=memory_type,
            payload=validated_payload,
            user_id=user_id,
        )

    def get_entity_model(self, entity_name: str):
        """Get Pydantic model from TypeRegistry - NO REDUNDANCY."""
        return get_entity_model(entity_name)

schema property

Get the loaded YAML schema, loading it if necessary.

__init__(yaml_path=None)

Initialize YamlTranslator with YAML schema path.

Parameters:

Name Type Description Default
yaml_path str | None

Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

None

Raises:

Type Description
YamlTranslatorError

If YAML path not provided or TypeRegistry initialization fails.

Source code in src/memg_core/core/yaml_translator.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(self, yaml_path: str | None = None) -> None:
    """Initialize YamlTranslator with YAML schema path.

    Args:
        yaml_path: Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

    Raises:
        YamlTranslatorError: If YAML path not provided or TypeRegistry initialization fails.
    """
    # Require explicit YAML path - no silent defaults
    if yaml_path:
        self.yaml_path = yaml_path
    else:
        env_path = os.getenv("MEMG_YAML_SCHEMA")
        if not env_path:
            raise YamlTranslatorError(
                "YAML schema path required. Set MEMG_YAML_SCHEMA environment variable "
                "or provide yaml_path parameter. No defaults allowed."
            )
        self.yaml_path = env_path

    self._schema: dict[str, Any] | None = None
    # NO model cache - TypeRegistry handles all caching

    # Initialize TypeRegistry from YAML - crash early if invalid
    try:
        initialize_types_from_yaml(self.yaml_path)
    except Exception as e:
        raise YamlTranslatorError(f"Failed to initialize TypeRegistry from YAML: {e}") from e

build_anchor_text(memory)

Build anchor text for embedding from YAML-defined anchor field.

NO hardcoded field names - reads anchor field from YAML schema.

Parameters:

Name Type Description Default
memory

Memory object containing payload data.

required

Returns:

Name Type Description
str str

Anchor text for embedding.

Raises:

Type Description
YamlTranslatorError

If anchor field is missing or invalid.

Source code in src/memg_core/core/yaml_translator.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
def build_anchor_text(self, memory) -> str:
    """Build anchor text for embedding from YAML-defined anchor field.

    NO hardcoded field names - reads anchor field from YAML schema.

    Args:
        memory: Memory object containing payload data.

    Returns:
        str: Anchor text for embedding.

    Raises:
        YamlTranslatorError: If anchor field is missing or invalid.
    """
    mem_type = getattr(memory, "memory_type", None)
    if not mem_type:
        raise YamlTranslatorError(
            "Memory object missing 'memory_type' field",
            operation="build_anchor_text",
        )

    # Get anchor field from YAML schema
    anchor_field = self.get_anchor_field(mem_type)

    # Try to get anchor text from the specified field
    anchor_text = None

    # First check if it's a core field on the Memory object
    if hasattr(memory, anchor_field):
        anchor_text = getattr(memory, anchor_field, None)
    # Otherwise check in the payload
    elif hasattr(memory, "payload") and isinstance(memory.payload, dict):
        anchor_text = memory.payload.get(anchor_field)

    if isinstance(anchor_text, str):
        stripped_text = anchor_text.strip()
        if stripped_text:
            return stripped_text

    # Anchor field missing, empty, or invalid
    raise YamlTranslatorError(
        f"Anchor field '{anchor_field}' is missing, empty, or invalid "
        f"for memory type '{mem_type}'",
        operation="build_anchor_text",
        context={
            "memory_type": mem_type,
            "anchor_field": anchor_field,
            "anchor_value": anchor_text,
        },
    )

create_memory_from_yaml(memory_type, payload, user_id)

Create a Memory object from YAML-validated payload.

Source code in src/memg_core/core/yaml_translator.py
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
def create_memory_from_yaml(self, memory_type: str, payload: dict[str, Any], user_id: str):
    """Create a Memory object from YAML-validated payload."""

    # Get anchor field from YAML schema
    anchor_field = self.get_anchor_field(memory_type)

    # Extract anchor text from payload
    anchor_text = payload.get(anchor_field)
    if not anchor_text or not isinstance(anchor_text, str):
        raise YamlTranslatorError(
            f"Missing or invalid anchor field '{anchor_field}' in payload "
            f"for memory type '{memory_type}'"
        )

    # Validate full payload against YAML schema
    validated_payload = self.validate_memory_against_yaml(memory_type, payload)

    # Construct Memory with YAML-defined payload only
    return Memory(
        memory_type=memory_type,
        payload=validated_payload,
        user_id=user_id,
    )

debug_relation_map()

Return a nested relation map for debugging/printing.

Structure: { source: { target: [ {name, predicate, directed, description} ... ] } }

Source code in src/memg_core/core/yaml_translator.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def debug_relation_map(self) -> dict[str, dict[str, list[dict[str, Any]]]]:
    """Return a nested relation map for debugging/printing.

    Structure:
    {
      source: {
        target: [ {name, predicate, directed, description} ... ]
      }
    }
    """
    out: dict[str, dict[str, list[dict[str, Any]]]] = {}
    for source in self.get_entity_types():
        specs = self.get_relations_for_source(source)
        if not specs:
            continue
        if source not in out:
            out[source] = {}
        for spec in specs:
            target = spec["target"]
            out[source].setdefault(target, [])
            out[source][target].append(
                {
                    "name": spec.get("name"),
                    "predicate": spec.get("predicate"),
                    "directed": spec.get("directed", True),
                    "description": spec.get("description"),
                }
            )
    return out

get_anchor_field(entity_name)

Get the anchor field name for the given entity type from YAML schema.

Now reads from vector.anchored_to instead of separate anchor field.

Parameters:

Name Type Description Default
entity_name str

Name of the entity type.

required

Returns:

Name Type Description
str str

Anchor field name.

Raises:

Type Description
YamlTranslatorError

If anchor field not found.

Source code in src/memg_core/core/yaml_translator.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def get_anchor_field(self, entity_name: str) -> str:
    """Get the anchor field name for the given entity type from YAML schema.

    Now reads from vector.anchored_to instead of separate anchor field.

    Args:
        entity_name: Name of the entity type.

    Returns:
        str: Anchor field name.

    Raises:
        YamlTranslatorError: If anchor field not found.
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")

    # Get entity spec with inheritance resolution
    entity_spec = self._resolve_entity_with_inheritance(entity_name)

    # Look for vector field with anchored_to
    fields = entity_spec.get("fields", {})
    for _field_name, field_def in fields.items():
        if isinstance(field_def, dict) and field_def.get("type") == "vector":
            anchored_to = field_def.get("anchored_to")
            if anchored_to:
                return str(anchored_to)

    raise YamlTranslatorError(
        f"Entity '{entity_name}' has no vector field with 'anchored_to' property"
    )

get_entity_model(entity_name)

Get Pydantic model from TypeRegistry - NO REDUNDANCY.

Source code in src/memg_core/core/yaml_translator.py
619
620
621
def get_entity_model(self, entity_name: str):
    """Get Pydantic model from TypeRegistry - NO REDUNDANCY."""
    return get_entity_model(entity_name)

get_entity_types()

Get list of available entity types from YAML schema.

Source code in src/memg_core/core/yaml_translator.py
123
124
125
def get_entity_types(self) -> list[str]:
    """Get list of available entity types from YAML schema."""
    return list(self._entities_map().keys())

get_labels_for_predicates(source_type, predicates, neighbor_label=None)

Expand predicate names to concrete relationship labels for a given source.

Parameters:

Name Type Description Default
source_type str

Source entity type name

required
predicates list[str] | None

List of predicate names to include (case-insensitive). If None, include all.

required
neighbor_label str | None

Optional target entity type filter (case-insensitive)

None

Returns:

Type Description
list[str]

List of concrete relationship labels (table names) matching the filter.

Source code in src/memg_core/core/yaml_translator.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def get_labels_for_predicates(
    self,
    source_type: str,
    predicates: list[str] | None,
    neighbor_label: str | None = None,
) -> list[str]:
    """Expand predicate names to concrete relationship labels for a given source.

    Args:
        source_type: Source entity type name
        predicates: List of predicate names to include (case-insensitive). If None, include all.
        neighbor_label: Optional target entity type filter (case-insensitive)

    Returns:
        List of concrete relationship labels (table names) matching the filter.
    """
    if not source_type:
        raise YamlTranslatorError("Empty source_type")

    preds_u = set(p.upper() for p in predicates) if predicates else None
    neighbor_l = neighbor_label.lower() if neighbor_label else None

    labels: list[str] = []
    for spec in self.get_relations_for_source(source_type):
        if preds_u is not None and spec["predicate"].upper() not in preds_u:
            continue
        if neighbor_l is not None and spec["target"].lower() != neighbor_l:
            continue
        labels.append(
            self.relationship_table_name(
                source=spec["source"],
                predicate=spec["predicate"],
                target=spec["target"],
                directed=spec["directed"],
            )
        )
    return labels

get_relations_for_source(entity_name)

Get normalized relation specs for a source entity in target-first schema.

Returns list of dicts with keys
  • source (str)
  • target (str)
  • name (str | None)
  • description (str | None)
  • predicate (str)
  • directed (bool)
Source code in src/memg_core/core/yaml_translator.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def get_relations_for_source(self, entity_name: str) -> list[dict[str, Any]]:
    """Get normalized relation specs for a source entity in target-first schema.

    Returns list of dicts with keys:
        - source (str)
        - target (str)
        - name (str | None)
        - description (str | None)
        - predicate (str)
        - directed (bool)
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")

    source_l = entity_name.lower()
    relations_map = self._get_relations_mapping_for_entity(source_l)
    if not relations_map:
        return []

    out: list[dict[str, Any]] = []
    for target_l, items in relations_map.items():
        for item in items:
            predicate = item.get("predicate")
            if not predicate or not isinstance(predicate, str):
                # Skip invalid entries - strict behavior can be added later
                continue
            directed = bool(item.get("directed", True))
            out.append(
                {
                    "source": source_l,
                    "target": target_l,
                    "name": item.get("name"),
                    "description": item.get("description"),
                    "predicate": predicate.upper(),
                    "directed": directed,
                }
            )
    return out

get_see_also_config(entity_name)

Get the see_also configuration for the given entity type from YAML schema.

Returns:

Type Description
dict[str, Any] | None

Dict with keys: enabled, threshold, limit, target_types

dict[str, Any] | None

None if see_also is not configured for this entity

Source code in src/memg_core/core/yaml_translator.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def get_see_also_config(self, entity_name: str) -> dict[str, Any] | None:
    """Get the see_also configuration for the given entity type from YAML schema.

    Returns:
        Dict with keys: enabled, threshold, limit, target_types
        None if see_also is not configured for this entity
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")
    name_l = entity_name.lower()
    emap = self._entities_map()
    spec_raw = emap.get(name_l)
    if not spec_raw:
        raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

    see_also = spec_raw.get("see_also")
    if not see_also or not isinstance(see_also, dict):
        return None

    # Validate required fields
    if not see_also.get("enabled", False):
        return None

    return {
        "enabled": see_also.get("enabled", False),
        "threshold": float(see_also.get("threshold", 0.7)),
        "limit": int(see_also.get("limit", 3)),
        "target_types": list(see_also.get("target_types", [])),
    }

relationship_table_name(source, predicate, target, *, directed=True) staticmethod

Generate relationship table name.

For now, table name does not encode direction; direction affects creation/query semantics. Canonicalization for undirected pairs can be added here later if decided.

Source code in src/memg_core/core/yaml_translator.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
@staticmethod
def relationship_table_name(
    source: str,
    predicate: str,
    target: str,
    *,
    directed: bool = True,  # noqa: unused-argument
) -> str:
    """Generate relationship table name.

    For now, table name does not encode direction; direction affects creation/query semantics.
    Canonicalization for undirected pairs can be added here later if decided.
    """
    return f"{str(source).upper()}_{str(predicate).upper()}_{str(target).upper()}"

validate_memory_against_yaml(memory_type, payload)

Validate memory payload against YAML schema and return cleaned payload.

Source code in src/memg_core/core/yaml_translator.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
def validate_memory_against_yaml(
    self, memory_type: str, payload: dict[str, Any]
) -> dict[str, Any]:
    """Validate memory payload against YAML schema and return cleaned payload."""
    if not memory_type:
        raise YamlTranslatorError("memory_type is required")
    if payload is None:
        raise YamlTranslatorError("payload is required")

    # Strict validation - entity type MUST exist in YAML
    emap = self._entities_map()
    spec = emap.get(memory_type.lower())
    if not spec:
        raise YamlTranslatorError(
            f"Unknown entity type '{memory_type}'. All types must be defined in YAML schema.",
            context={
                "memory_type": memory_type,
                "available_types": list(emap.keys()),
            },
        )

    req, _opt = self._fields_contract(spec)
    missing = [k for k in req if not payload.get(k)]
    if missing:
        raise YamlTranslatorError(
            f"Missing required fields: {missing}",
            context={"memory_type": memory_type},
        )

    # Validate enum fields against YAML schema choices
    self._validate_enum_fields(memory_type, payload)

    # Validate that all fields are defined in YAML schema
    req, opt = self._fields_contract(spec)
    valid_fields = set(req + opt)
    system_fields = self._get_system_fields(spec)
    invalid_fields = set(payload.keys()) - valid_fields - system_fields
    if invalid_fields:
        raise YamlTranslatorError(
            f"Invalid fields not defined in schema: {sorted(invalid_fields)}",
            context={
                "memory_type": memory_type,
                "valid_fields": sorted(valid_fields),
                "invalid_fields": sorted(invalid_fields),
            },
        )

    # Strip system-reserved fields if present
    cleaned = dict(payload)
    for syskey in system_fields:
        cleaned.pop(syskey, None)
    return cleaned

_get_client()

Get or create singleton client from environment variables.

Returns:

Name Type Description
MemgClient MemgClient

Singleton client instance.

Raises:

Type Description
RuntimeError

If required environment variables are not set.

Source code in src/memg_core/api/public.py
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
def _get_client() -> MemgClient:
    """Get or create singleton client from environment variables.

    Returns:
        MemgClient: Singleton client instance.

    Raises:
        RuntimeError: If required environment variables are not set.
    """
    global _CLIENT
    if _CLIENT is None:
        yaml_path = os.environ.get("MEMG_YAML_PATH")
        db_path = os.environ.get("MEMG_DB_PATH")

        if not yaml_path or not db_path:
            raise RuntimeError("MEMG_YAML_PATH and MEMG_DB_PATH environment variables must be set")

        _CLIENT = MemgClient(yaml_path, db_path)
    return _CLIENT

add_memory(memory_type, payload, user_id)

Add memory using environment-based client.

Parameters:

Name Type Description Default
memory_type str

Entity type from YAML schema (e.g., 'task', 'note').

required
payload dict[str, Any]

Memory data conforming to YAML schema.

required
user_id str

Owner of the memory.

required

Returns:

Name Type Description
str str

Human-readable ID (HRID) for the created memory.

Source code in src/memg_core/api/public.py
268
269
270
271
272
273
274
275
276
277
278
279
def add_memory(memory_type: str, payload: dict[str, Any], user_id: str) -> str:
    """Add memory using environment-based client.

    Args:
        memory_type: Entity type from YAML schema (e.g., 'task', 'note').
        payload: Memory data conforming to YAML schema.
        user_id: Owner of the memory.

    Returns:
        str: Human-readable ID (HRID) for the created memory.
    """
    return _get_client().add_memory(memory_type, payload, user_id)

add_relationship(from_memory_hrid, to_memory_hrid, relation_type, from_memory_type, to_memory_type, user_id, properties=None)

Add relationship using environment-based client.

Parameters:

Name Type Description Default
from_memory_hrid str

Source memory HRID.

required
to_memory_hrid str

Target memory HRID.

required
relation_type str

Relationship type from YAML schema.

required
from_memory_type str

Source memory entity type.

required
to_memory_type str

Target memory entity type.

required
user_id str

User ID for ownership verification.

required
properties dict[str, Any] | None

Optional relationship properties.

None
Source code in src/memg_core/api/public.py
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
def add_relationship(
    from_memory_hrid: str,
    to_memory_hrid: str,
    relation_type: str,
    from_memory_type: str,
    to_memory_type: str,
    user_id: str,
    properties: dict[str, Any] | None = None,
) -> None:
    """Add relationship using environment-based client.

    Args:
        from_memory_hrid: Source memory HRID.
        to_memory_hrid: Target memory HRID.
        relation_type: Relationship type from YAML schema.
        from_memory_type: Source memory entity type.
        to_memory_type: Target memory entity type.
        user_id: User ID for ownership verification.
        properties: Optional relationship properties.
    """
    _get_client().add_relationship(
        from_memory_hrid,
        to_memory_hrid,
        relation_type,
        from_memory_type,
        to_memory_type,
        user_id,
        properties,
    )

create_memory_service(db_clients)

Factory function to create a MemoryService instance.

Parameters:

Name Type Description Default
db_clients

DatabaseClients instance (after init_dbs() called).

required

Returns:

Name Type Description
MemoryService MemoryService

Configured MemoryService instance.

Source code in src/memg_core/core/pipelines/indexer.py
437
438
439
440
441
442
443
444
445
446
def create_memory_service(db_clients) -> MemoryService:
    """Factory function to create a MemoryService instance.

    Args:
        db_clients: DatabaseClients instance (after init_dbs() called).

    Returns:
        MemoryService: Configured MemoryService instance.
    """
    return MemoryService(db_clients)

create_search_service(db_clients)

Factory function to create a SearchService instance.

Parameters:

Name Type Description Default
db_clients

DatabaseClients instance (after init_dbs() called).

required

Returns:

Name Type Description
SearchService SearchService

Configured SearchService instance.

Source code in src/memg_core/core/pipelines/retrieval.py
482
483
484
485
486
487
488
489
490
491
def create_search_service(db_clients) -> SearchService:
    """Factory function to create a SearchService instance.

    Args:
        db_clients: DatabaseClients instance (after init_dbs() called).

    Returns:
        SearchService: Configured SearchService instance.
    """
    return SearchService(db_clients)

delete_memory(hrid, user_id, memory_type=None)

Delete memory using environment-based client.

Parameters:

Name Type Description Default
hrid str

Human-readable ID of the memory to delete.

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint.

None

Returns:

Name Type Description
bool bool

True if deletion succeeded, False otherwise.

Source code in src/memg_core/api/public.py
317
318
319
320
321
322
323
324
325
326
327
328
def delete_memory(hrid: str, user_id: str, memory_type: str | None = None) -> bool:
    """Delete memory using environment-based client.

    Args:
        hrid: Human-readable ID of the memory to delete.
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint.

    Returns:
        bool: True if deletion succeeded, False otherwise.
    """
    return _get_client().delete_memory(hrid, user_id, memory_type)

delete_relationship(from_memory_hrid, to_memory_hrid, relation_type, from_memory_type=None, to_memory_type=None, user_id=None)

Delete relationship using environment-based client.

Parameters:

Name Type Description Default
from_memory_hrid str

Source memory HRID.

required
to_memory_hrid str

Target memory HRID.

required
relation_type str

Relationship type from YAML schema.

required
from_memory_type str | None

Source memory entity type (inferred from HRID if not provided).

None
to_memory_type str | None

Target memory entity type (inferred from HRID if not provided).

None
user_id str | None

User ID for ownership verification (required).

None

Returns:

Name Type Description
bool bool

True if deletion succeeded, False if relationship not found.

Source code in src/memg_core/api/public.py
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def delete_relationship(
    from_memory_hrid: str,
    to_memory_hrid: str,
    relation_type: str,
    from_memory_type: str | None = None,
    to_memory_type: str | None = None,
    user_id: str | None = None,
) -> bool:
    """Delete relationship using environment-based client.

    Args:
        from_memory_hrid: Source memory HRID.
        to_memory_hrid: Target memory HRID.
        relation_type: Relationship type from YAML schema.
        from_memory_type: Source memory entity type (inferred from HRID if not provided).
        to_memory_type: Target memory entity type (inferred from HRID if not provided).
        user_id: User ID for ownership verification (required).

    Returns:
        bool: True if deletion succeeded, False if relationship not found.
    """
    return _get_client().delete_relationship(
        from_memory_hrid,
        to_memory_hrid,
        relation_type,
        from_memory_type,
        to_memory_type,
        user_id,
    )

get_memories(user_id, memory_type=None, filters=None, limit=50, offset=0, include_neighbors=False, hops=1)

Get memories using environment-based client.

Parameters:

Name Type Description Default
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type filter (e.g., "task", "note").

None
filters dict[str, Any] | None

Optional field filters (e.g., {"status": "open", "priority": "high"}).

None
limit int

Maximum number of memories to return (default 50).

50
offset int

Number of memories to skip for pagination (default 0).

0
include_neighbors bool

Whether to include neighbor nodes via graph traversal.

False
hops int

Number of hops for neighbor expansion (default 1).

1

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of memory data with full payloads.

Source code in src/memg_core/api/public.py
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
def get_memories(
    user_id: str,
    memory_type: str | None = None,
    filters: dict[str, Any] | None = None,
    limit: int = 50,
    offset: int = 0,
    include_neighbors: bool = False,
    hops: int = 1,
) -> list[dict[str, Any]]:
    """Get memories using environment-based client.

    Args:
        user_id: User ID for ownership verification.
        memory_type: Optional memory type filter (e.g., "task", "note").
        filters: Optional field filters (e.g., {"status": "open", "priority": "high"}).
        limit: Maximum number of memories to return (default 50).
        offset: Number of memories to skip for pagination (default 0).
        include_neighbors: Whether to include neighbor nodes via graph traversal.
        hops: Number of hops for neighbor expansion (default 1).

    Returns:
        list[dict[str, Any]]: List of memory data with full payloads.
    """
    return _get_client().get_memories(
        user_id, memory_type, filters, limit, offset, include_neighbors, hops
    )

get_memory(hrid, user_id, memory_type=None)

Get memory using environment-based client.

Parameters:

Name Type Description Default
hrid str

Human-readable identifier of the memory.

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint (inferred from HRID if not provided).

None

Returns:

Type Description
dict[str, Any] | None

dict[str, Any] | None: Memory data with full payload, or None if not found.

Source code in src/memg_core/api/public.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
def get_memory(
    hrid: str,
    user_id: str,
    memory_type: str | None = None,
) -> dict[str, Any] | None:
    """Get memory using environment-based client.

    Args:
        hrid: Human-readable identifier of the memory.
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint (inferred from HRID if not provided).

    Returns:
        dict[str, Any] | None: Memory data with full payload, or None if not found.
    """
    return _get_client().get_memory(hrid, user_id, memory_type)

get_services()

Get services from singleton client (MCP server compatibility).

Returns:

Type Description
tuple[MemoryService, SearchService, YamlTranslator]

tuple[MemoryService, SearchService, YamlTranslator]: Service instances for direct access.

Source code in src/memg_core/api/public.py
471
472
473
474
475
476
477
478
479
def get_services() -> tuple[MemoryService, SearchService, YamlTranslator]:
    """Get services from singleton client (MCP server compatibility).

    Returns:
        tuple[MemoryService, SearchService, YamlTranslator]: Service instances for direct access.
    """
    client = _get_client()
    yaml_translator = YamlTranslator(os.environ.get("MEMG_YAML_PATH"))
    return client._memory_service, client._search_service, yaml_translator

search(query, user_id, memory_type=None, limit=10, score_threshold=None, decay_threshold=None, **kwargs)

Search memories using environment-based client.

Parameters:

Name Type Description Default
query str

Text to search for.

required
user_id str

User ID for filtering results.

required
memory_type str | None

Optional memory type filter.

None
limit int

Maximum number of results to return.

10
score_threshold float | None

Minimum similarity score threshold (0.0-1.0).

None
decay_threshold float | None

Minimum neighbor relevance threshold (0.0-1.0).

None
**kwargs

Additional search parameters.

{}

Returns:

Name Type Description
SearchResult SearchResult

Search result with explicit seed/neighbor separation, including full payloads for seeds and relationships.

Source code in src/memg_core/api/public.py
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
def search(
    query: str,
    user_id: str,
    memory_type: str | None = None,
    limit: int = 10,
    score_threshold: float | None = None,
    decay_threshold: float | None = None,
    **kwargs,
) -> SearchResult:
    """Search memories using environment-based client.

    Args:
        query: Text to search for.
        user_id: User ID for filtering results.
        memory_type: Optional memory type filter.
        limit: Maximum number of results to return.
        score_threshold: Minimum similarity score threshold (0.0-1.0).
        decay_threshold: Minimum neighbor relevance threshold (0.0-1.0).
        **kwargs: Additional search parameters.

    Returns:
        SearchResult: Search result with explicit seed/neighbor separation,
            including full payloads for seeds and relationships.
    """
    return _get_client().search(
        query,
        user_id,
        memory_type,
        limit,
        score_threshold=score_threshold,
        decay_threshold=decay_threshold,
        **kwargs,
    )

shutdown_services()

Shutdown singleton client.

Closes database connections and cleans up resources.

Source code in src/memg_core/api/public.py
459
460
461
462
463
464
465
466
467
def shutdown_services():
    """Shutdown singleton client.

    Closes database connections and cleans up resources.
    """
    global _CLIENT
    if _CLIENT:
        _CLIENT.close()
        _CLIENT = None

update_memory(hrid, payload_updates, user_id, memory_type=None)

Update memory using environment-based client.

Parameters:

Name Type Description Default
hrid str

Memory HRID to update.

required
payload_updates dict[str, Any]

Dictionary of fields to update (only changed fields).

required
user_id str

User ID for ownership verification.

required
memory_type str | None

Optional memory type hint (inferred from HRID if not provided).

None

Returns:

Name Type Description
bool bool

True if update succeeded, False otherwise.

Source code in src/memg_core/api/public.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
def update_memory(
    hrid: str,
    payload_updates: dict[str, Any],
    user_id: str,
    memory_type: str | None = None,
) -> bool:
    """Update memory using environment-based client.

    Args:
        hrid: Memory HRID to update.
        payload_updates: Dictionary of fields to update (only changed fields).
        user_id: User ID for ownership verification.
        memory_type: Optional memory type hint (inferred from HRID if not provided).

    Returns:
        bool: True if update succeeded, False otherwise.
    """
    return _get_client().update_memory(hrid, payload_updates, user_id, memory_type)

Core Models

Core data models and types:

Core models for the memory system.

_MAX_SCORE_TOLERANCE = 1.001 module-attribute

Memory

Bases: BaseModel

Core memory model with YAML-driven payload validation.

Attributes:

Name Type Description
id str

Unique identifier (UUID or HRID).

user_id str

Owner of the memory.

memory_type str

Entity type from YAML schema.

payload dict[str, Any]

Entity-specific fields.

vector list[float] | None

Embedding vector.

created_at datetime

Creation timestamp.

updated_at datetime | None

Last update timestamp.

hrid str | None

Human-readable identifier.

Source code in src/memg_core/core/models.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class Memory(BaseModel):
    """Core memory model with YAML-driven payload validation.

    Attributes:
        id: Unique identifier (UUID or HRID).
        user_id: Owner of the memory.
        memory_type: Entity type from YAML schema.
        payload: Entity-specific fields.
        vector: Embedding vector.
        created_at: Creation timestamp.
        updated_at: Last update timestamp.
        hrid: Human-readable identifier.
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    # Core fields only - NO hardcoded entity-specific fields
    # NO DEFAULTS - crash early if required fields missing
    id: str = Field(default_factory=lambda: str(uuid4()))  # System-generated ID only
    user_id: str  # REQUIRED - no default
    memory_type: str  # REQUIRED - no default, must come from YAML
    payload: dict[str, Any] = Field(default_factory=dict)  # Entity fields container
    vector: list[float] | None = None  # System-generated vector
    created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))  # System timestamp
    updated_at: datetime | None = None

    # Human-readable id (e.g., MEMO_AAA001)
    hrid: str | None = None

    @field_validator("memory_type")
    @classmethod
    def memory_type_not_empty(cls, v: str) -> str:
        """Validate that memory_type is not empty.

        Args:
            v: Memory type value.

        Returns:
            str: Stripped memory type.

        Raises:
            ValueError: If memory_type is empty or whitespace.
        """
        if not v or not v.strip():
            raise ValueError("memory_type cannot be empty")
        return v.strip()

    # (properties removed – dynamic __getattr__ handles field access)

    def to_qdrant_payload(self) -> dict[str, Any]:
        """DEPRECATED: Serializes to a strict {'core': ..., 'entity': ...} structure.

        This method is deprecated and will be removed in a future version.
        The current implementation uses flat payload structure directly in MemoryStore.

        Returns:
            dict[str, Any]: Deprecated payload structure.
        """
        warnings.warn(
            "Memory.to_qdrant_payload() is deprecated. Use flat payload structure directly.",
            DeprecationWarning,
            stacklevel=2,
        )

        core = {
            "id": self.id,
            "user_id": self.user_id,
            "memory_type": self.memory_type,
            "created_at": (
                self.created_at.isoformat()
                if hasattr(self.created_at, "isoformat")
                else str(self.created_at)
            ),
        }
        if self.updated_at:
            core["updated_at"] = (
                self.updated_at.isoformat()
                if hasattr(self.updated_at, "isoformat")
                else str(self.updated_at)
            )
        if self.hrid:
            core["hrid"] = self.hrid

        # Entity payload contains only YAML-defined fields
        entity = dict(self.payload)

        return {"core": core, "entity": entity}

    def to_kuzu_node(self) -> dict[str, Any]:
        """Export a minimal node for Kuzu, containing only core fields.

        NO hardcoded fields - only system fields stored in graph.

        Returns:
            dict[str, Any]: Node data for Kuzu storage.
        """
        node = {
            "id": self.id,
            "user_id": self.user_id,
            "memory_type": self.memory_type,
            "created_at": (
                self.created_at.isoformat()
                if hasattr(self.created_at, "isoformat")
                else str(self.created_at)
            ),
        }
        if self.updated_at:
            node["updated_at"] = (
                self.updated_at.isoformat()
                if hasattr(self.updated_at, "isoformat")
                else str(self.updated_at)
            )
        if self.hrid:
            node["hrid"] = self.hrid

        return node

    def __getattr__(self, item: str):
        """Dynamic attribute access for YAML-defined payload fields ONLY.

        No fallback logic, no backward compatibility. If the field is not
        in the payload dictionary, raises AttributeError immediately.
        This enforces strict YAML schema compliance.

        Args:
            item: Field name to access.

        Returns:
            Any: Field value from payload.

        Raises:
            AttributeError: If field is not in payload.
        """
        payload = self.__dict__.get("payload")
        if isinstance(payload, dict) and item in payload:
            return payload[item]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")

    # ---------------------------------------------------------------------
    # YAML → Dynamic entity model projection helpers
    # ---------------------------------------------------------------------
    def to_entity_model(self):
        """Project this Memory into a dynamic Pydantic entity model.

        Returns an instance of the auto-generated model class that matches
        the entity type defined in the YAML schema. Only non-system fields
        are included.

        Returns:
            BaseModel: Dynamic Pydantic model instance.
        """
        model_cls = get_entity_model(self.memory_type)
        # Pass only fields that the model expects
        model_fields = {
            k: v for k, v in (self.payload or {}).items() if k in model_cls.model_fields
        }
        return model_cls(**model_fields)

__getattr__(item)

Dynamic attribute access for YAML-defined payload fields ONLY.

No fallback logic, no backward compatibility. If the field is not in the payload dictionary, raises AttributeError immediately. This enforces strict YAML schema compliance.

Parameters:

Name Type Description Default
item str

Field name to access.

required

Returns:

Name Type Description
Any

Field value from payload.

Raises:

Type Description
AttributeError

If field is not in payload.

Source code in src/memg_core/core/models.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def __getattr__(self, item: str):
    """Dynamic attribute access for YAML-defined payload fields ONLY.

    No fallback logic, no backward compatibility. If the field is not
    in the payload dictionary, raises AttributeError immediately.
    This enforces strict YAML schema compliance.

    Args:
        item: Field name to access.

    Returns:
        Any: Field value from payload.

    Raises:
        AttributeError: If field is not in payload.
    """
    payload = self.__dict__.get("payload")
    if isinstance(payload, dict) and item in payload:
        return payload[item]
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")

memory_type_not_empty(v) classmethod

Validate that memory_type is not empty.

Parameters:

Name Type Description Default
v str

Memory type value.

required

Returns:

Name Type Description
str str

Stripped memory type.

Raises:

Type Description
ValueError

If memory_type is empty or whitespace.

Source code in src/memg_core/core/models.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@field_validator("memory_type")
@classmethod
def memory_type_not_empty(cls, v: str) -> str:
    """Validate that memory_type is not empty.

    Args:
        v: Memory type value.

    Returns:
        str: Stripped memory type.

    Raises:
        ValueError: If memory_type is empty or whitespace.
    """
    if not v or not v.strip():
        raise ValueError("memory_type cannot be empty")
    return v.strip()

to_entity_model()

Project this Memory into a dynamic Pydantic entity model.

Returns an instance of the auto-generated model class that matches the entity type defined in the YAML schema. Only non-system fields are included.

Returns:

Name Type Description
BaseModel

Dynamic Pydantic model instance.

Source code in src/memg_core/core/models.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def to_entity_model(self):
    """Project this Memory into a dynamic Pydantic entity model.

    Returns an instance of the auto-generated model class that matches
    the entity type defined in the YAML schema. Only non-system fields
    are included.

    Returns:
        BaseModel: Dynamic Pydantic model instance.
    """
    model_cls = get_entity_model(self.memory_type)
    # Pass only fields that the model expects
    model_fields = {
        k: v for k, v in (self.payload or {}).items() if k in model_cls.model_fields
    }
    return model_cls(**model_fields)

to_kuzu_node()

Export a minimal node for Kuzu, containing only core fields.

NO hardcoded fields - only system fields stored in graph.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Node data for Kuzu storage.

Source code in src/memg_core/core/models.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def to_kuzu_node(self) -> dict[str, Any]:
    """Export a minimal node for Kuzu, containing only core fields.

    NO hardcoded fields - only system fields stored in graph.

    Returns:
        dict[str, Any]: Node data for Kuzu storage.
    """
    node = {
        "id": self.id,
        "user_id": self.user_id,
        "memory_type": self.memory_type,
        "created_at": (
            self.created_at.isoformat()
            if hasattr(self.created_at, "isoformat")
            else str(self.created_at)
        ),
    }
    if self.updated_at:
        node["updated_at"] = (
            self.updated_at.isoformat()
            if hasattr(self.updated_at, "isoformat")
            else str(self.updated_at)
        )
    if self.hrid:
        node["hrid"] = self.hrid

    return node

to_qdrant_payload()

DEPRECATED: Serializes to a strict {'core': ..., 'entity': ...} structure.

This method is deprecated and will be removed in a future version. The current implementation uses flat payload structure directly in MemoryStore.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Deprecated payload structure.

Source code in src/memg_core/core/models.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def to_qdrant_payload(self) -> dict[str, Any]:
    """DEPRECATED: Serializes to a strict {'core': ..., 'entity': ...} structure.

    This method is deprecated and will be removed in a future version.
    The current implementation uses flat payload structure directly in MemoryStore.

    Returns:
        dict[str, Any]: Deprecated payload structure.
    """
    warnings.warn(
        "Memory.to_qdrant_payload() is deprecated. Use flat payload structure directly.",
        DeprecationWarning,
        stacklevel=2,
    )

    core = {
        "id": self.id,
        "user_id": self.user_id,
        "memory_type": self.memory_type,
        "created_at": (
            self.created_at.isoformat()
            if hasattr(self.created_at, "isoformat")
            else str(self.created_at)
        ),
    }
    if self.updated_at:
        core["updated_at"] = (
            self.updated_at.isoformat()
            if hasattr(self.updated_at, "isoformat")
            else str(self.updated_at)
        )
    if self.hrid:
        core["hrid"] = self.hrid

    # Entity payload contains only YAML-defined fields
    entity = dict(self.payload)

    return {"core": core, "entity": entity}

MemoryNeighbor

Bases: BaseModel

Memory neighbor with anchor-only payload.

Attributes:

Name Type Description
hrid str

Human-readable identifier.

memory_type str

Entity type from YAML schema.

payload dict[str, Any]

Anchor-only payload (statement field only).

Source code in src/memg_core/core/models.py
262
263
264
265
266
267
268
269
270
271
272
273
class MemoryNeighbor(BaseModel):
    """Memory neighbor with anchor-only payload.

    Attributes:
        hrid: Human-readable identifier.
        memory_type: Entity type from YAML schema.
        payload: Anchor-only payload (statement field only).
    """

    hrid: str = Field(..., description="Human-readable identifier")
    memory_type: str = Field(..., description="Entity type from YAML schema")
    payload: dict[str, Any] = Field(..., description="Anchor-only payload")

MemoryPoint

Bases: BaseModel

Memory with embedding vector for Qdrant.

Attributes:

Name Type Description
memory Memory

Memory instance.

vector list[float]

Embedding vector.

point_id str | None

Qdrant point ID.

Source code in src/memg_core/core/models.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
class MemoryPoint(BaseModel):
    """Memory with embedding vector for Qdrant.

    Attributes:
        memory: Memory instance.
        vector: Embedding vector.
        point_id: Qdrant point ID.
    """

    memory: Memory
    vector: list[float] = Field(..., description="Embedding vector")
    point_id: str | None = Field(None, description="Qdrant point ID")

    @field_validator("vector")
    @classmethod
    def vector_not_empty(cls, v):
        """Validate that vector is not empty.

        Args:
            v: Vector to validate.

        Returns:
            list[float]: Validated vector.

        Raises:
            ValueError: If vector is empty.
        """
        if not v:
            raise ValueError("Vector cannot be empty")
        return v

vector_not_empty(v) classmethod

Validate that vector is not empty.

Parameters:

Name Type Description Default
v

Vector to validate.

required

Returns:

Type Description

list[float]: Validated vector.

Raises:

Type Description
ValueError

If vector is empty.

Source code in src/memg_core/core/models.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
@field_validator("vector")
@classmethod
def vector_not_empty(cls, v):
    """Validate that vector is not empty.

    Args:
        v: Vector to validate.

    Returns:
        list[float]: Validated vector.

    Raises:
        ValueError: If vector is empty.
    """
    if not v:
        raise ValueError("Vector cannot be empty")
    return v

MemorySeed

Bases: BaseModel

Memory seed with full payload and explicit relationships.

Attributes:

Name Type Description
hrid str

Human-readable identifier.

memory_type str

Entity type from YAML schema.

payload dict[str, Any]

Full entity payload.

score float

Vector similarity score to query.

relationships list[RelationshipInfo]

List of relationships to other memories.

Source code in src/memg_core/core/models.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
class MemorySeed(BaseModel):
    """Memory seed with full payload and explicit relationships.

    Attributes:
        hrid: Human-readable identifier.
        memory_type: Entity type from YAML schema.
        payload: Full entity payload.
        score: Vector similarity score to query.
        relationships: List of relationships to other memories.
    """

    hrid: str = Field(..., description="Human-readable identifier")
    memory_type: str = Field(..., description="Entity type from YAML schema")
    payload: dict[str, Any] = Field(..., description="Full entity payload")
    score: float = Field(
        ..., ge=0.0, le=1.0 + _MAX_SCORE_TOLERANCE, description="Vector similarity score"
    )
    relationships: list[RelationshipInfo] = Field(
        default_factory=list, description="Relationships to other memories"
    )

    @field_validator("score")
    @classmethod
    def normalize_score(cls, v: float) -> float:
        """Normalize similarity scores to handle floating-point precision errors."""
        if v < 0.0:
            raise ValueError(f"Similarity score cannot be negative: {v}")
        if v > 1.001:
            raise ValueError(f"Similarity score too high (indicates calculation error): {v}")
        return min(v, 1.0)

normalize_score(v) classmethod

Normalize similarity scores to handle floating-point precision errors.

Source code in src/memg_core/core/models.py
251
252
253
254
255
256
257
258
259
@field_validator("score")
@classmethod
def normalize_score(cls, v: float) -> float:
    """Normalize similarity scores to handle floating-point precision errors."""
    if v < 0.0:
        raise ValueError(f"Similarity score cannot be negative: {v}")
    if v > 1.001:
        raise ValueError(f"Similarity score too high (indicates calculation error): {v}")
    return min(v, 1.0)

ProcessingResult

Bases: BaseModel

Result from memory processing pipelines - type-agnostic.

Attributes:

Name Type Description
success bool

Whether processing succeeded.

memories_created list[Memory]

List of created memories.

errors list[str]

List of error messages.

processing_time_ms float | None

Processing time in milliseconds.

Source code in src/memg_core/core/models.py
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
class ProcessingResult(BaseModel):
    """Result from memory processing pipelines - type-agnostic.

    Attributes:
        success: Whether processing succeeded.
        memories_created: List of created memories.
        errors: List of error messages.
        processing_time_ms: Processing time in milliseconds.
    """

    success: bool
    memories_created: list[Memory] = Field(default_factory=list)
    errors: list[str] = Field(default_factory=list)
    processing_time_ms: float | None = Field(None)

    @property
    def total_created(self) -> int:
        """Total memories created (all types).

        Returns:
            int: Number of memories created.
        """
        return len(self.memories_created)

total_created property

Total memories created (all types).

Returns:

Name Type Description
int int

Number of memories created.

RelationshipInfo

Bases: BaseModel

Relationship information between memories.

Attributes:

Name Type Description
relation_type str

Type of relationship (e.g., FIXES, ADDRESSES).

target_hrid str

HRID of the target memory.

scores dict[str, float]

Scoring information for the relationship.

Source code in src/memg_core/core/models.py
216
217
218
219
220
221
222
223
224
225
226
227
class RelationshipInfo(BaseModel):
    """Relationship information between memories.

    Attributes:
        relation_type: Type of relationship (e.g., FIXES, ADDRESSES).
        target_hrid: HRID of the target memory.
        scores: Scoring information for the relationship.
    """

    relation_type: str = Field(..., description="Relationship type from YAML schema")
    target_hrid: str = Field(..., description="HRID of target memory")
    scores: dict[str, float] = Field(default_factory=dict, description="Relationship scores")

SearchResult

Bases: BaseModel

Search result with explicit seed/neighbor separation.

Attributes:

Name Type Description
memories list[MemorySeed]

List of memory seeds with full payloads and relationships.

neighbors list[MemoryNeighbor]

List of memory neighbors with anchor-only payloads.

Source code in src/memg_core/core/models.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
class SearchResult(BaseModel):
    """Search result with explicit seed/neighbor separation.

    Attributes:
        memories: List of memory seeds with full payloads and relationships.
        neighbors: List of memory neighbors with anchor-only payloads.
    """

    memories: list[MemorySeed] = Field(
        default_factory=list, description="Memory seeds with full payloads"
    )
    neighbors: list[MemoryNeighbor] = Field(
        default_factory=list, description="Memory neighbors with anchor payloads"
    )

get_entity_model(entity_name)

Get Pydantic model for entity from global registry.

Parameters:

Name Type Description Default
entity_name str

Name of the entity.

required

Returns:

Type Description
type[BaseModel]

type[BaseModel]: Pydantic model class.

Source code in src/memg_core/core/types.py
421
422
423
424
425
426
427
428
429
430
def get_entity_model(entity_name: str) -> type[BaseModel]:
    """Get Pydantic model for entity from global registry.

    Args:
        entity_name: Name of the entity.

    Returns:
        type[BaseModel]: Pydantic model class.
    """
    return TypeRegistry.get_instance().get_entity_model(entity_name)

Configuration

Configuration management:

Memory System Configuration - minimal and essential settings

DEFAULT_MEMG_CONFIG = MemGConfig() module-attribute

DEFAULT_SYSTEM_CONFIG = MemorySystemConfig() module-attribute

MemGConfig dataclass

Core memory system configuration.

Attributes:

Name Type Description
similarity_threshold float

Threshold for conflict detection (0.0-1.0).

score_threshold float

Minimum score for search results (0.0-1.0).

high_similarity_threshold float

Threshold for duplicate detection (0.0-1.0).

decay_rate float

Graph traversal decay rate per hop (0.0-1.0).

decay_threshold float

Minimum neighbor relevance threshold (0.0-1.0).

enable_ai_type_verification bool

Enable AI-based type detection.

enable_temporal_reasoning bool

Enable temporal reasoning.

vector_dimension int

Embedding dimension size.

batch_processing_size int

Batch size for bulk operations.

embedder_model str

FastEmbed model name.

template_name str

Active template name.

qdrant_collection_name str

Qdrant collection name.

kuzu_database_path str

Kuzu database path.

Source code in src/memg_core/core/config.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@dataclass
class MemGConfig:
    """Core memory system configuration.

    Attributes:
        similarity_threshold: Threshold for conflict detection (0.0-1.0).
        score_threshold: Minimum score for search results (0.0-1.0).
        high_similarity_threshold: Threshold for duplicate detection (0.0-1.0).
        decay_rate: Graph traversal decay rate per hop (0.0-1.0).
        decay_threshold: Minimum neighbor relevance threshold (0.0-1.0).
        enable_ai_type_verification: Enable AI-based type detection.
        enable_temporal_reasoning: Enable temporal reasoning.
        vector_dimension: Embedding dimension size.
        batch_processing_size: Batch size for bulk operations.
        embedder_model: FastEmbed model name.
        template_name: Active template name.
        qdrant_collection_name: Qdrant collection name.
        kuzu_database_path: Kuzu database path.
    """

    # Core similarity and scoring thresholds
    similarity_threshold: float = 0.7  # For conflict detection
    score_threshold: float = 0.3  # Minimum score for search results
    high_similarity_threshold: float = 0.9  # For duplicate detection
    decay_rate: float = 0.9  # Graph traversal decay rate per hop
    decay_threshold: float = 0.1  # Minimum neighbor relevance threshold

    # Processing settings
    enable_ai_type_verification: bool = True  # AI-based type detection
    enable_temporal_reasoning: bool = False  # Enable temporal reasoning

    # Performance settings
    vector_dimension: int = 384  # Embedding dimension
    batch_processing_size: int = 50  # Batch size for bulk operations
    embedder_model: str = "Snowflake/snowflake-arctic-embed-xs"  # FastEmbed model

    # Template settings
    template_name: str = "default"  # Active template name

    # Database settings
    qdrant_collection_name: str = "memories"
    kuzu_database_path: str = "kuzu_db"

    def __post_init__(self):
        """Validate configuration parameters.

        Raises:
            ValueError: If any threshold values are outside valid range [0.0, 1.0].
        """
        if not 0.0 <= self.similarity_threshold <= 1.0:
            raise ValueError("similarity_threshold must be between 0.0 and 1.0")
        if not 0.0 <= self.score_threshold <= 1.0:
            raise ValueError("score_threshold must be between 0.0 and 1.0")
        if not 0.0 <= self.high_similarity_threshold <= 1.0:
            raise ValueError("high_similarity_threshold must be between 0.0 and 1.0")
        if not 0.0 <= self.decay_rate <= 1.0:
            raise ValueError("decay_rate must be between 0.0 and 1.0")
        if not 0.0 <= self.decay_threshold <= 1.0:
            raise ValueError("decay_threshold must be between 0.0 and 1.0")

    def to_dict(self) -> dict[str, Any]:
        """Convert configuration to dictionary.

        Returns:
            dict[str, Any]: Dictionary representation of configuration.
        """
        return {
            "similarity_threshold": self.similarity_threshold,
            "score_threshold": self.score_threshold,
            "high_similarity_threshold": self.high_similarity_threshold,
            "decay_rate": self.decay_rate,
            "decay_threshold": self.decay_threshold,
            "enable_ai_type_verification": self.enable_ai_type_verification,
            "vector_dimension": self.vector_dimension,
            "batch_processing_size": self.batch_processing_size,
            "embedder_model": self.embedder_model,
            "template_name": self.template_name,
            "qdrant_collection_name": self.qdrant_collection_name,
            "kuzu_database_path": self.kuzu_database_path,
        }

    @classmethod
    def from_dict(cls, config_dict: dict[str, Any]) -> "MemGConfig":
        """Create configuration from dictionary.

        Args:
            config_dict: Dictionary containing configuration values.

        Returns:
            MemGConfig: Configuration instance.
        """
        return cls(**config_dict)

    @classmethod
    def from_env(cls) -> "MemGConfig":
        """Create configuration from environment variables.

        Each instance should use explicit environment variables for isolation.
        The core memory system doesn't know or care about server ports.

        Returns:
            MemGConfig: Configuration instance with environment-derived values.
        """
        return cls(
            similarity_threshold=float(os.getenv("MEMG_SIMILARITY_THRESHOLD", "0.7")),
            score_threshold=float(os.getenv("MEMG_SCORE_THRESHOLD", "0.3")),
            high_similarity_threshold=float(os.getenv("MEMG_HIGH_SIMILARITY_THRESHOLD", "0.9")),
            decay_rate=float(os.getenv("MEMG_DECAY_RATE", "0.9")),
            decay_threshold=float(os.getenv("MEMG_DECAY_THRESHOLD", "0.1")),
            enable_ai_type_verification=os.getenv(
                "MEMG_ENABLE_AI_TYPE_VERIFICATION", "true"
            ).lower()
            == "true",
            vector_dimension=int(os.getenv("EMBEDDING_DIMENSION_LEN", "384")),
            batch_processing_size=int(os.getenv("MEMG_BATCH_SIZE", "50")),
            embedder_model=os.getenv("EMBEDDER_MODEL", "Snowflake/snowflake-arctic-embed-xs"),
            template_name=os.getenv("MEMG_TEMPLATE", "default"),
            qdrant_collection_name=os.getenv("MEMG_QDRANT_COLLECTION", "memories"),
            kuzu_database_path=os.getenv("MEMG_KUZU_DB_PATH", "kuzu_db"),
        )

__post_init__()

Validate configuration parameters.

Raises:

Type Description
ValueError

If any threshold values are outside valid range [0.0, 1.0].

Source code in src/memg_core/core/config.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __post_init__(self):
    """Validate configuration parameters.

    Raises:
        ValueError: If any threshold values are outside valid range [0.0, 1.0].
    """
    if not 0.0 <= self.similarity_threshold <= 1.0:
        raise ValueError("similarity_threshold must be between 0.0 and 1.0")
    if not 0.0 <= self.score_threshold <= 1.0:
        raise ValueError("score_threshold must be between 0.0 and 1.0")
    if not 0.0 <= self.high_similarity_threshold <= 1.0:
        raise ValueError("high_similarity_threshold must be between 0.0 and 1.0")
    if not 0.0 <= self.decay_rate <= 1.0:
        raise ValueError("decay_rate must be between 0.0 and 1.0")
    if not 0.0 <= self.decay_threshold <= 1.0:
        raise ValueError("decay_threshold must be between 0.0 and 1.0")

from_dict(config_dict) classmethod

Create configuration from dictionary.

Parameters:

Name Type Description Default
config_dict dict[str, Any]

Dictionary containing configuration values.

required

Returns:

Name Type Description
MemGConfig MemGConfig

Configuration instance.

Source code in src/memg_core/core/config.py
89
90
91
92
93
94
95
96
97
98
99
@classmethod
def from_dict(cls, config_dict: dict[str, Any]) -> "MemGConfig":
    """Create configuration from dictionary.

    Args:
        config_dict: Dictionary containing configuration values.

    Returns:
        MemGConfig: Configuration instance.
    """
    return cls(**config_dict)

from_env() classmethod

Create configuration from environment variables.

Each instance should use explicit environment variables for isolation. The core memory system doesn't know or care about server ports.

Returns:

Name Type Description
MemGConfig MemGConfig

Configuration instance with environment-derived values.

Source code in src/memg_core/core/config.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@classmethod
def from_env(cls) -> "MemGConfig":
    """Create configuration from environment variables.

    Each instance should use explicit environment variables for isolation.
    The core memory system doesn't know or care about server ports.

    Returns:
        MemGConfig: Configuration instance with environment-derived values.
    """
    return cls(
        similarity_threshold=float(os.getenv("MEMG_SIMILARITY_THRESHOLD", "0.7")),
        score_threshold=float(os.getenv("MEMG_SCORE_THRESHOLD", "0.3")),
        high_similarity_threshold=float(os.getenv("MEMG_HIGH_SIMILARITY_THRESHOLD", "0.9")),
        decay_rate=float(os.getenv("MEMG_DECAY_RATE", "0.9")),
        decay_threshold=float(os.getenv("MEMG_DECAY_THRESHOLD", "0.1")),
        enable_ai_type_verification=os.getenv(
            "MEMG_ENABLE_AI_TYPE_VERIFICATION", "true"
        ).lower()
        == "true",
        vector_dimension=int(os.getenv("EMBEDDING_DIMENSION_LEN", "384")),
        batch_processing_size=int(os.getenv("MEMG_BATCH_SIZE", "50")),
        embedder_model=os.getenv("EMBEDDER_MODEL", "Snowflake/snowflake-arctic-embed-xs"),
        template_name=os.getenv("MEMG_TEMPLATE", "default"),
        qdrant_collection_name=os.getenv("MEMG_QDRANT_COLLECTION", "memories"),
        kuzu_database_path=os.getenv("MEMG_KUZU_DB_PATH", "kuzu_db"),
    )

to_dict()

Convert configuration to dictionary.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dictionary representation of configuration.

Source code in src/memg_core/core/config.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def to_dict(self) -> dict[str, Any]:
    """Convert configuration to dictionary.

    Returns:
        dict[str, Any]: Dictionary representation of configuration.
    """
    return {
        "similarity_threshold": self.similarity_threshold,
        "score_threshold": self.score_threshold,
        "high_similarity_threshold": self.high_similarity_threshold,
        "decay_rate": self.decay_rate,
        "decay_threshold": self.decay_threshold,
        "enable_ai_type_verification": self.enable_ai_type_verification,
        "vector_dimension": self.vector_dimension,
        "batch_processing_size": self.batch_processing_size,
        "embedder_model": self.embedder_model,
        "template_name": self.template_name,
        "qdrant_collection_name": self.qdrant_collection_name,
        "kuzu_database_path": self.kuzu_database_path,
    }

MemorySystemConfig dataclass

Core memory system configuration - NO SERVER CONCERNS.

Attributes:

Name Type Description
memg MemGConfig

Core memory configuration instance.

debug_mode bool

Enable debug mode.

log_level str

Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).

Source code in src/memg_core/core/config.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
@dataclass
class MemorySystemConfig:
    """Core memory system configuration - NO SERVER CONCERNS.

    Attributes:
        memg: Core memory configuration instance.
        debug_mode: Enable debug mode.
        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
    """

    memg: MemGConfig = field(default_factory=MemGConfig)

    # Core system settings only
    debug_mode: bool = False
    log_level: str = "INFO"

    def __post_init__(self):
        """Validate core system configuration.

        Raises:
            ValueError: If log_level is not a valid logging level.
        """
        if self.log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
            raise ValueError("log_level must be a valid logging level")

    @classmethod
    def from_env(cls) -> "MemorySystemConfig":
        """Create core memory system configuration from environment variables.

        Returns:
            MemorySystemConfig: Configuration instance with environment-derived values.
        """
        return cls(
            memg=MemGConfig.from_env(),
            debug_mode=os.getenv("MEMORY_SYSTEM_DEBUG", "false").lower() == "true",
            log_level=os.getenv("MEMORY_SYSTEM_LOG_LEVEL", "INFO").upper(),
        )

__post_init__()

Validate core system configuration.

Raises:

Type Description
ValueError

If log_level is not a valid logging level.

Source code in src/memg_core/core/config.py
146
147
148
149
150
151
152
153
def __post_init__(self):
    """Validate core system configuration.

    Raises:
        ValueError: If log_level is not a valid logging level.
    """
    if self.log_level not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
        raise ValueError("log_level must be a valid logging level")

from_env() classmethod

Create core memory system configuration from environment variables.

Returns:

Name Type Description
MemorySystemConfig MemorySystemConfig

Configuration instance with environment-derived values.

Source code in src/memg_core/core/config.py
155
156
157
158
159
160
161
162
163
164
165
166
@classmethod
def from_env(cls) -> "MemorySystemConfig":
    """Create core memory system configuration from environment variables.

    Returns:
        MemorySystemConfig: Configuration instance with environment-derived values.
    """
    return cls(
        memg=MemGConfig.from_env(),
        debug_mode=os.getenv("MEMORY_SYSTEM_DEBUG", "false").lower() == "true",
        log_level=os.getenv("MEMORY_SYSTEM_LOG_LEVEL", "INFO").upper(),
    )

get_config()

Get system configuration, preferring environment variables.

Returns:

Name Type Description
MemorySystemConfig MemorySystemConfig

System configuration instance.

Source code in src/memg_core/core/config.py
174
175
176
177
178
179
180
def get_config() -> MemorySystemConfig:
    """Get system configuration, preferring environment variables.

    Returns:
        MemorySystemConfig: System configuration instance.
    """
    return MemorySystemConfig.from_env()

Exceptions

Exception classes:

Custom exception hierarchy for the memory system - minimal set.

ConfigurationError

Bases: MemorySystemError

Configuration-related errors (env vars, validation).

Source code in src/memg_core/core/exceptions.py
38
39
class ConfigurationError(MemorySystemError):
    """Configuration-related errors (env vars, validation)."""

DatabaseError

Bases: MemorySystemError

Database operation failures (Qdrant, Kuzu).

Source code in src/memg_core/core/exceptions.py
42
43
class DatabaseError(MemorySystemError):
    """Database operation failures (Qdrant, Kuzu)."""

MemorySystemError

Bases: Exception

Base exception for all memory system errors.

Attributes:

Name Type Description
message

Error message.

operation

Operation that caused the error.

context

Additional context information.

original_error

Original exception that was wrapped.

Source code in src/memg_core/core/exceptions.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class MemorySystemError(Exception):
    """Base exception for all memory system errors.

    Attributes:
        message: Error message.
        operation: Operation that caused the error.
        context: Additional context information.
        original_error: Original exception that was wrapped.
    """

    def __init__(
        self,
        message: str,
        operation: str | None = None,
        context: dict[str, Any] | None = None,
        original_error: Exception | None = None,
    ):
        self.message = message
        self.operation = operation
        self.context = context or {}
        self.original_error = original_error

        # Build detailed error message
        full_message = message
        if operation:
            full_message = f"[{operation}] {message}"
        if original_error:
            full_message += f" (caused by: {original_error})"

        super().__init__(full_message)

ProcessingError

Bases: MemorySystemError

Memory processing operation failures (catch-all for processing).

Source code in src/memg_core/core/exceptions.py
50
51
class ProcessingError(MemorySystemError):
    """Memory processing operation failures (catch-all for processing)."""

ValidationError

Bases: MemorySystemError

Data validation failures (schema, input format).

Source code in src/memg_core/core/exceptions.py
46
47
class ValidationError(MemorySystemError):
    """Data validation failures (schema, input format)."""

handle_with_context(operation)

Decorator for consistent error handling with context.

Parameters:

Name Type Description Default
operation str

Operation name for error context.

required

Returns:

Name Type Description
callable

Decorated function with error handling.

Source code in src/memg_core/core/exceptions.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def handle_with_context(operation: str):
    """Decorator for consistent error handling with context.

    Args:
        operation: Operation name for error context.

    Returns:
        callable: Decorated function with error handling.
    """

    def decorator(func):
        def wrapper(*args, **kwargs):
            try:
                return func(*args, **kwargs)
            except MemorySystemError:
                # Re-raise our own exceptions as-is
                raise
            except Exception as e:
                # Wrap unknown exceptions
                raise wrap_exception(e, operation, {"args": args, "kwargs": kwargs}) from e

        return wrapper

    return decorator

wrap_exception(original_error, operation, context=None)

Wrap a generic exception in an appropriate MemorySystemError subclass.

Parameters:

Name Type Description Default
original_error Exception

Original exception to wrap.

required
operation str

Operation that caused the error.

required
context dict[str, Any] | None

Additional context information.

None

Returns:

Name Type Description
MemorySystemError MemorySystemError

Wrapped exception with appropriate subclass.

Source code in src/memg_core/core/exceptions.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def wrap_exception(
    original_error: Exception, operation: str, context: dict[str, Any] | None = None
) -> MemorySystemError:
    """Wrap a generic exception in an appropriate MemorySystemError subclass.

    Args:
        original_error: Original exception to wrap.
        operation: Operation that caused the error.
        context: Additional context information.

    Returns:
        MemorySystemError: Wrapped exception with appropriate subclass.
    """
    error_message = str(original_error)

    # Map common exceptions to our hierarchy

    if isinstance(original_error, (FileNotFoundError, PermissionError)):
        return DatabaseError(
            f"Storage error: {error_message}",
            operation=operation,
            context=context,
            original_error=original_error,
        )

    if isinstance(original_error, ValueError):
        return ValidationError(
            f"Invalid value: {error_message}",
            operation=operation,
            context=context,
            original_error=original_error,
        )

    # Default to generic ProcessingError for unknown exceptions
    return ProcessingError(
        f"Unexpected error: {error_message}",
        operation=operation,
        context=context,
        original_error=original_error,
    )

YAML Schema

YAML schema handling:

YAML Translator: validates payloads using TypeRegistry and resolves anchor text.

STRICT YAML-FIRST: This module enforces the single-YAML-orchestrates-everything principle. NO flexibility, NO migration support, NO fallbacks.

Uses TypeRegistry as SINGLE SOURCE OF TRUTH for all entity definitions. All type building and validation delegated to TypeRegistry - zero redundancy.

Memory

Bases: BaseModel

Core memory model with YAML-driven payload validation.

Attributes:

Name Type Description
id str

Unique identifier (UUID or HRID).

user_id str

Owner of the memory.

memory_type str

Entity type from YAML schema.

payload dict[str, Any]

Entity-specific fields.

vector list[float] | None

Embedding vector.

created_at datetime

Creation timestamp.

updated_at datetime | None

Last update timestamp.

hrid str | None

Human-readable identifier.

Source code in src/memg_core/core/models.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class Memory(BaseModel):
    """Core memory model with YAML-driven payload validation.

    Attributes:
        id: Unique identifier (UUID or HRID).
        user_id: Owner of the memory.
        memory_type: Entity type from YAML schema.
        payload: Entity-specific fields.
        vector: Embedding vector.
        created_at: Creation timestamp.
        updated_at: Last update timestamp.
        hrid: Human-readable identifier.
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)

    # Core fields only - NO hardcoded entity-specific fields
    # NO DEFAULTS - crash early if required fields missing
    id: str = Field(default_factory=lambda: str(uuid4()))  # System-generated ID only
    user_id: str  # REQUIRED - no default
    memory_type: str  # REQUIRED - no default, must come from YAML
    payload: dict[str, Any] = Field(default_factory=dict)  # Entity fields container
    vector: list[float] | None = None  # System-generated vector
    created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))  # System timestamp
    updated_at: datetime | None = None

    # Human-readable id (e.g., MEMO_AAA001)
    hrid: str | None = None

    @field_validator("memory_type")
    @classmethod
    def memory_type_not_empty(cls, v: str) -> str:
        """Validate that memory_type is not empty.

        Args:
            v: Memory type value.

        Returns:
            str: Stripped memory type.

        Raises:
            ValueError: If memory_type is empty or whitespace.
        """
        if not v or not v.strip():
            raise ValueError("memory_type cannot be empty")
        return v.strip()

    # (properties removed – dynamic __getattr__ handles field access)

    def to_qdrant_payload(self) -> dict[str, Any]:
        """DEPRECATED: Serializes to a strict {'core': ..., 'entity': ...} structure.

        This method is deprecated and will be removed in a future version.
        The current implementation uses flat payload structure directly in MemoryStore.

        Returns:
            dict[str, Any]: Deprecated payload structure.
        """
        warnings.warn(
            "Memory.to_qdrant_payload() is deprecated. Use flat payload structure directly.",
            DeprecationWarning,
            stacklevel=2,
        )

        core = {
            "id": self.id,
            "user_id": self.user_id,
            "memory_type": self.memory_type,
            "created_at": (
                self.created_at.isoformat()
                if hasattr(self.created_at, "isoformat")
                else str(self.created_at)
            ),
        }
        if self.updated_at:
            core["updated_at"] = (
                self.updated_at.isoformat()
                if hasattr(self.updated_at, "isoformat")
                else str(self.updated_at)
            )
        if self.hrid:
            core["hrid"] = self.hrid

        # Entity payload contains only YAML-defined fields
        entity = dict(self.payload)

        return {"core": core, "entity": entity}

    def to_kuzu_node(self) -> dict[str, Any]:
        """Export a minimal node for Kuzu, containing only core fields.

        NO hardcoded fields - only system fields stored in graph.

        Returns:
            dict[str, Any]: Node data for Kuzu storage.
        """
        node = {
            "id": self.id,
            "user_id": self.user_id,
            "memory_type": self.memory_type,
            "created_at": (
                self.created_at.isoformat()
                if hasattr(self.created_at, "isoformat")
                else str(self.created_at)
            ),
        }
        if self.updated_at:
            node["updated_at"] = (
                self.updated_at.isoformat()
                if hasattr(self.updated_at, "isoformat")
                else str(self.updated_at)
            )
        if self.hrid:
            node["hrid"] = self.hrid

        return node

    def __getattr__(self, item: str):
        """Dynamic attribute access for YAML-defined payload fields ONLY.

        No fallback logic, no backward compatibility. If the field is not
        in the payload dictionary, raises AttributeError immediately.
        This enforces strict YAML schema compliance.

        Args:
            item: Field name to access.

        Returns:
            Any: Field value from payload.

        Raises:
            AttributeError: If field is not in payload.
        """
        payload = self.__dict__.get("payload")
        if isinstance(payload, dict) and item in payload:
            return payload[item]
        raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")

    # ---------------------------------------------------------------------
    # YAML → Dynamic entity model projection helpers
    # ---------------------------------------------------------------------
    def to_entity_model(self):
        """Project this Memory into a dynamic Pydantic entity model.

        Returns an instance of the auto-generated model class that matches
        the entity type defined in the YAML schema. Only non-system fields
        are included.

        Returns:
            BaseModel: Dynamic Pydantic model instance.
        """
        model_cls = get_entity_model(self.memory_type)
        # Pass only fields that the model expects
        model_fields = {
            k: v for k, v in (self.payload or {}).items() if k in model_cls.model_fields
        }
        return model_cls(**model_fields)

__getattr__(item)

Dynamic attribute access for YAML-defined payload fields ONLY.

No fallback logic, no backward compatibility. If the field is not in the payload dictionary, raises AttributeError immediately. This enforces strict YAML schema compliance.

Parameters:

Name Type Description Default
item str

Field name to access.

required

Returns:

Name Type Description
Any

Field value from payload.

Raises:

Type Description
AttributeError

If field is not in payload.

Source code in src/memg_core/core/models.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def __getattr__(self, item: str):
    """Dynamic attribute access for YAML-defined payload fields ONLY.

    No fallback logic, no backward compatibility. If the field is not
    in the payload dictionary, raises AttributeError immediately.
    This enforces strict YAML schema compliance.

    Args:
        item: Field name to access.

    Returns:
        Any: Field value from payload.

    Raises:
        AttributeError: If field is not in payload.
    """
    payload = self.__dict__.get("payload")
    if isinstance(payload, dict) and item in payload:
        return payload[item]
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")

memory_type_not_empty(v) classmethod

Validate that memory_type is not empty.

Parameters:

Name Type Description Default
v str

Memory type value.

required

Returns:

Name Type Description
str str

Stripped memory type.

Raises:

Type Description
ValueError

If memory_type is empty or whitespace.

Source code in src/memg_core/core/models.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@field_validator("memory_type")
@classmethod
def memory_type_not_empty(cls, v: str) -> str:
    """Validate that memory_type is not empty.

    Args:
        v: Memory type value.

    Returns:
        str: Stripped memory type.

    Raises:
        ValueError: If memory_type is empty or whitespace.
    """
    if not v or not v.strip():
        raise ValueError("memory_type cannot be empty")
    return v.strip()

to_entity_model()

Project this Memory into a dynamic Pydantic entity model.

Returns an instance of the auto-generated model class that matches the entity type defined in the YAML schema. Only non-system fields are included.

Returns:

Name Type Description
BaseModel

Dynamic Pydantic model instance.

Source code in src/memg_core/core/models.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def to_entity_model(self):
    """Project this Memory into a dynamic Pydantic entity model.

    Returns an instance of the auto-generated model class that matches
    the entity type defined in the YAML schema. Only non-system fields
    are included.

    Returns:
        BaseModel: Dynamic Pydantic model instance.
    """
    model_cls = get_entity_model(self.memory_type)
    # Pass only fields that the model expects
    model_fields = {
        k: v for k, v in (self.payload or {}).items() if k in model_cls.model_fields
    }
    return model_cls(**model_fields)

to_kuzu_node()

Export a minimal node for Kuzu, containing only core fields.

NO hardcoded fields - only system fields stored in graph.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Node data for Kuzu storage.

Source code in src/memg_core/core/models.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def to_kuzu_node(self) -> dict[str, Any]:
    """Export a minimal node for Kuzu, containing only core fields.

    NO hardcoded fields - only system fields stored in graph.

    Returns:
        dict[str, Any]: Node data for Kuzu storage.
    """
    node = {
        "id": self.id,
        "user_id": self.user_id,
        "memory_type": self.memory_type,
        "created_at": (
            self.created_at.isoformat()
            if hasattr(self.created_at, "isoformat")
            else str(self.created_at)
        ),
    }
    if self.updated_at:
        node["updated_at"] = (
            self.updated_at.isoformat()
            if hasattr(self.updated_at, "isoformat")
            else str(self.updated_at)
        )
    if self.hrid:
        node["hrid"] = self.hrid

    return node

to_qdrant_payload()

DEPRECATED: Serializes to a strict {'core': ..., 'entity': ...} structure.

This method is deprecated and will be removed in a future version. The current implementation uses flat payload structure directly in MemoryStore.

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Deprecated payload structure.

Source code in src/memg_core/core/models.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def to_qdrant_payload(self) -> dict[str, Any]:
    """DEPRECATED: Serializes to a strict {'core': ..., 'entity': ...} structure.

    This method is deprecated and will be removed in a future version.
    The current implementation uses flat payload structure directly in MemoryStore.

    Returns:
        dict[str, Any]: Deprecated payload structure.
    """
    warnings.warn(
        "Memory.to_qdrant_payload() is deprecated. Use flat payload structure directly.",
        DeprecationWarning,
        stacklevel=2,
    )

    core = {
        "id": self.id,
        "user_id": self.user_id,
        "memory_type": self.memory_type,
        "created_at": (
            self.created_at.isoformat()
            if hasattr(self.created_at, "isoformat")
            else str(self.created_at)
        ),
    }
    if self.updated_at:
        core["updated_at"] = (
            self.updated_at.isoformat()
            if hasattr(self.updated_at, "isoformat")
            else str(self.updated_at)
        )
    if self.hrid:
        core["hrid"] = self.hrid

    # Entity payload contains only YAML-defined fields
    entity = dict(self.payload)

    return {"core": core, "entity": entity}

MemorySystemError

Bases: Exception

Base exception for all memory system errors.

Attributes:

Name Type Description
message

Error message.

operation

Operation that caused the error.

context

Additional context information.

original_error

Original exception that was wrapped.

Source code in src/memg_core/core/exceptions.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class MemorySystemError(Exception):
    """Base exception for all memory system errors.

    Attributes:
        message: Error message.
        operation: Operation that caused the error.
        context: Additional context information.
        original_error: Original exception that was wrapped.
    """

    def __init__(
        self,
        message: str,
        operation: str | None = None,
        context: dict[str, Any] | None = None,
        original_error: Exception | None = None,
    ):
        self.message = message
        self.operation = operation
        self.context = context or {}
        self.original_error = original_error

        # Build detailed error message
        full_message = message
        if operation:
            full_message = f"[{operation}] {message}"
        if original_error:
            full_message += f" (caused by: {original_error})"

        super().__init__(full_message)

YamlTranslator

Translates YAML schema definitions to Pydantic models for strict validation.

Attributes:

Name Type Description
yaml_path

Path to YAML schema file.

_schema dict[str, Any] | None

Cached schema dictionary.

Source code in src/memg_core/core/yaml_translator.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
class YamlTranslator:
    """Translates YAML schema definitions to Pydantic models for strict validation.

    Attributes:
        yaml_path: Path to YAML schema file.
        _schema: Cached schema dictionary.
    """

    def __init__(self, yaml_path: str | None = None) -> None:
        """Initialize YamlTranslator with YAML schema path.

        Args:
            yaml_path: Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

        Raises:
            YamlTranslatorError: If YAML path not provided or TypeRegistry initialization fails.
        """
        # Require explicit YAML path - no silent defaults
        if yaml_path:
            self.yaml_path = yaml_path
        else:
            env_path = os.getenv("MEMG_YAML_SCHEMA")
            if not env_path:
                raise YamlTranslatorError(
                    "YAML schema path required. Set MEMG_YAML_SCHEMA environment variable "
                    "or provide yaml_path parameter. No defaults allowed."
                )
            self.yaml_path = env_path

        self._schema: dict[str, Any] | None = None
        # NO model cache - TypeRegistry handles all caching

        # Initialize TypeRegistry from YAML - crash early if invalid
        try:
            initialize_types_from_yaml(self.yaml_path)
        except Exception as e:
            raise YamlTranslatorError(f"Failed to initialize TypeRegistry from YAML: {e}") from e

    @property
    def schema(self) -> dict[str, Any]:
        """Get the loaded YAML schema, loading it if necessary."""
        if self._schema is not None:
            return self._schema

        # Load schema from the required path - no fallbacks
        if not self.yaml_path:
            raise YamlTranslatorError(
                "YAML schema path not set. This should not happen after __init__."
            )

        self._schema = self._load_schema()
        return self._schema

    def _load_schema(self) -> dict[str, Any]:
        """Load schema from the current yaml_path."""
        if not self.yaml_path:
            raise YamlTranslatorError("YAML path is None")
        path = Path(self.yaml_path)
        if not path.exists():
            raise YamlTranslatorError(f"YAML schema not found at {path}")
        try:
            with path.open(encoding="utf-8") as f:
                data = yaml.safe_load(f)
            if not data:
                raise YamlTranslatorError("Empty YAML schema")
            if not isinstance(data, dict):
                raise YamlTranslatorError("YAML schema root must be a mapping")
            return data
        except yaml.YAMLError as e:
            raise YamlTranslatorError(f"Invalid YAML syntax: {e}") from e

    def _entities_map(self) -> dict[str, dict[str, Any]]:
        sch = self.schema
        ents = sch.get("entities")
        if not ents:
            return {}
        if isinstance(ents, dict):
            # Normalize keys to lower
            return {str(k).lower(): v for k, v in ents.items()}
        # list form
        out: dict[str, dict[str, Any]] = {}
        for item in ents:
            if not isinstance(item, dict):
                continue
            key = (item.get("name") or item.get("type") or "").lower()
            if key:
                out[key] = item
        return out

    def get_entity_types(self) -> list[str]:
        """Get list of available entity types from YAML schema."""
        return list(self._entities_map().keys())

    # ================== RELATIONSHIP PARSING (TARGET-FIRST FORMAT) ==================

    def _get_relations_mapping_for_entity(
        self, entity_name: str
    ) -> dict[str, list[dict[str, Any]]]:
        """Return raw relations mapping for an entity in target-first schema format.

        The expected YAML shape under an entity is:
            relations:
              target_entity_name:
                - name: ...
                  description: ...
                  predicate: PREDICATE_NAME
                  directed: true|false

        Returns an empty dict when no relations are defined.
        """
        entity_spec = self._resolve_entity_with_inheritance(entity_name)
        relations_section = entity_spec.get("relations")
        if not relations_section or not isinstance(relations_section, dict):
            return {}

        # Normalize keys to lower for targets; keep items as-is
        normalized: dict[str, list[dict[str, Any]]] = {}
        for target_name, items in relations_section.items():
            if not isinstance(items, list):
                # Skip invalid shapes silently at this layer; validation is higher-level
                continue
            normalized[str(target_name).lower()] = [i for i in items if isinstance(i, dict)]
        return normalized

    def get_relations_for_source(self, entity_name: str) -> list[dict[str, Any]]:
        """Get normalized relation specs for a source entity in target-first schema.

        Returns list of dicts with keys:
            - source (str)
            - target (str)
            - name (str | None)
            - description (str | None)
            - predicate (str)
            - directed (bool)
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")

        source_l = entity_name.lower()
        relations_map = self._get_relations_mapping_for_entity(source_l)
        if not relations_map:
            return []

        out: list[dict[str, Any]] = []
        for target_l, items in relations_map.items():
            for item in items:
                predicate = item.get("predicate")
                if not predicate or not isinstance(predicate, str):
                    # Skip invalid entries - strict behavior can be added later
                    continue
                directed = bool(item.get("directed", True))
                out.append(
                    {
                        "source": source_l,
                        "target": target_l,
                        "name": item.get("name"),
                        "description": item.get("description"),
                        "predicate": predicate.upper(),
                        "directed": directed,
                    }
                )
        return out

    @staticmethod
    def relationship_table_name(
        source: str,
        predicate: str,
        target: str,
        *,
        directed: bool = True,  # noqa: unused-argument
    ) -> str:
        """Generate relationship table name.

        For now, table name does not encode direction; direction affects creation/query semantics.
        Canonicalization for undirected pairs can be added here later if decided.
        """
        return f"{str(source).upper()}_{str(predicate).upper()}_{str(target).upper()}"

    def get_labels_for_predicates(
        self,
        source_type: str,
        predicates: list[str] | None,
        neighbor_label: str | None = None,
    ) -> list[str]:
        """Expand predicate names to concrete relationship labels for a given source.

        Args:
            source_type: Source entity type name
            predicates: List of predicate names to include (case-insensitive). If None, include all.
            neighbor_label: Optional target entity type filter (case-insensitive)

        Returns:
            List of concrete relationship labels (table names) matching the filter.
        """
        if not source_type:
            raise YamlTranslatorError("Empty source_type")

        preds_u = set(p.upper() for p in predicates) if predicates else None
        neighbor_l = neighbor_label.lower() if neighbor_label else None

        labels: list[str] = []
        for spec in self.get_relations_for_source(source_type):
            if preds_u is not None and spec["predicate"].upper() not in preds_u:
                continue
            if neighbor_l is not None and spec["target"].lower() != neighbor_l:
                continue
            labels.append(
                self.relationship_table_name(
                    source=spec["source"],
                    predicate=spec["predicate"],
                    target=spec["target"],
                    directed=spec["directed"],
                )
            )
        return labels

    def debug_relation_map(self) -> dict[str, dict[str, list[dict[str, Any]]]]:
        """Return a nested relation map for debugging/printing.

        Structure:
        {
          source: {
            target: [ {name, predicate, directed, description} ... ]
          }
        }
        """
        out: dict[str, dict[str, list[dict[str, Any]]]] = {}
        for source in self.get_entity_types():
            specs = self.get_relations_for_source(source)
            if not specs:
                continue
            if source not in out:
                out[source] = {}
            for spec in specs:
                target = spec["target"]
                out[source].setdefault(target, [])
                out[source][target].append(
                    {
                        "name": spec.get("name"),
                        "predicate": spec.get("predicate"),
                        "directed": spec.get("directed", True),
                        "description": spec.get("description"),
                    }
                )
        return out

    def get_anchor_field(self, entity_name: str) -> str:
        """Get the anchor field name for the given entity type from YAML schema.

        Now reads from vector.anchored_to instead of separate anchor field.

        Args:
            entity_name: Name of the entity type.

        Returns:
            str: Anchor field name.

        Raises:
            YamlTranslatorError: If anchor field not found.
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")

        # Get entity spec with inheritance resolution
        entity_spec = self._resolve_entity_with_inheritance(entity_name)

        # Look for vector field with anchored_to
        fields = entity_spec.get("fields", {})
        for _field_name, field_def in fields.items():
            if isinstance(field_def, dict) and field_def.get("type") == "vector":
                anchored_to = field_def.get("anchored_to")
                if anchored_to:
                    return str(anchored_to)

        raise YamlTranslatorError(
            f"Entity '{entity_name}' has no vector field with 'anchored_to' property"
        )

    def _resolve_entity_with_inheritance(self, entity_name: str) -> dict[str, Any]:
        """Resolve entity specification with full inheritance chain."""
        name_l = entity_name.lower()
        emap = self._entities_map()
        spec_raw = emap.get(name_l)
        if not spec_raw:
            raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

        # If no parent, return as-is
        parent_name = spec_raw.get("parent")
        if not parent_name:
            return spec_raw

        # Recursively resolve parent and merge fields
        parent_spec = self._resolve_entity_with_inheritance(parent_name)

        # Merge parent fields with child fields (child overrides parent)
        merged_fields = parent_spec.get("fields", {}).copy()
        merged_fields.update(spec_raw.get("fields", {}))

        # Create merged spec
        merged_spec = spec_raw.copy()
        merged_spec["fields"] = merged_fields

        return merged_spec

    def get_see_also_config(self, entity_name: str) -> dict[str, Any] | None:
        """Get the see_also configuration for the given entity type from YAML schema.

        Returns:
            Dict with keys: enabled, threshold, limit, target_types
            None if see_also is not configured for this entity
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")
        name_l = entity_name.lower()
        emap = self._entities_map()
        spec_raw = emap.get(name_l)
        if not spec_raw:
            raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

        see_also = spec_raw.get("see_also")
        if not see_also or not isinstance(see_also, dict):
            return None

        # Validate required fields
        if not see_also.get("enabled", False):
            return None

        return {
            "enabled": see_also.get("enabled", False),
            "threshold": float(see_also.get("threshold", 0.7)),
            "limit": int(see_also.get("limit", 3)),
            "target_types": list(see_also.get("target_types", [])),
        }

    def build_anchor_text(self, memory) -> str:
        """Build anchor text for embedding from YAML-defined anchor field.

        NO hardcoded field names - reads anchor field from YAML schema.

        Args:
            memory: Memory object containing payload data.

        Returns:
            str: Anchor text for embedding.

        Raises:
            YamlTranslatorError: If anchor field is missing or invalid.
        """
        mem_type = getattr(memory, "memory_type", None)
        if not mem_type:
            raise YamlTranslatorError(
                "Memory object missing 'memory_type' field",
                operation="build_anchor_text",
            )

        # Get anchor field from YAML schema
        anchor_field = self.get_anchor_field(mem_type)

        # Try to get anchor text from the specified field
        anchor_text = None

        # First check if it's a core field on the Memory object
        if hasattr(memory, anchor_field):
            anchor_text = getattr(memory, anchor_field, None)
        # Otherwise check in the payload
        elif hasattr(memory, "payload") and isinstance(memory.payload, dict):
            anchor_text = memory.payload.get(anchor_field)

        if isinstance(anchor_text, str):
            stripped_text = anchor_text.strip()
            if stripped_text:
                return stripped_text

        # Anchor field missing, empty, or invalid
        raise YamlTranslatorError(
            f"Anchor field '{anchor_field}' is missing, empty, or invalid "
            f"for memory type '{mem_type}'",
            operation="build_anchor_text",
            context={
                "memory_type": mem_type,
                "anchor_field": anchor_field,
                "anchor_value": anchor_text,
            },
        )

    def _fields_contract(self, spec: dict[str, Any]) -> tuple[list[str], list[str]]:
        """Extract required and optional fields from entity specification.

        Supports either:
        - fields: {required:[...], optional:[...]} format
        - Individual field definitions with required flags

        Args:
            spec: Entity specification dictionary.

        Returns:
            tuple[list[str], list[str]]: (required_fields, optional_fields)
        """
        # supports either fields: {required:[...], optional:[...]} OR flat dict
        fields = spec.get("fields") or {}
        if "required" in fields or "optional" in fields:
            req = [str(x) for x in fields.get("required", [])]
            opt = [str(x) for x in fields.get("optional", [])]
            return req, opt

        # Resolve all fields including inherited ones
        all_fields = self._resolve_inherited_fields(spec)

        # Parse individual field definitions for required flag
        required_fields = []
        optional_fields = []

        for field_name, field_def in all_fields.items():
            if isinstance(field_def, dict) and field_def.get("required", False):
                # Skip system fields - they're handled by the system
                if not field_def.get("system", False):
                    required_fields.append(field_name)
                else:
                    optional_fields.append(field_name)
            else:
                optional_fields.append(field_name)

        return required_fields, optional_fields

    def _resolve_inherited_fields(self, spec: dict[str, Any]) -> dict[str, Any]:
        """Resolve all fields including inherited ones from parent entities.

        Args:
            spec: Entity specification dictionary.

        Returns:
            dict[str, Any]: Dictionary containing all fields (inherited + current).
        """
        all_fields = {}
        entities_map = self._entities_map()

        # If entity has a parent, resolve parent fields first
        parent_name = spec.get("parent")
        if parent_name:
            parent_spec = entities_map.get(parent_name.lower())
            if parent_spec:
                # Recursively resolve parent fields
                parent_fields = self._resolve_inherited_fields(parent_spec)
                all_fields.update(parent_fields)

        # Add/override with current entity's fields
        current_fields = spec.get("fields") or {}
        all_fields.update(current_fields)

        return all_fields

    def _get_system_fields(self, spec: dict[str, Any]) -> set[str]:
        """Extract system fields from YAML schema (fields marked with system: true).

        Args:
            spec: Entity specification dictionary.

        Returns:
            set[str]: Set of field names that are marked as system fields.
        """
        system_fields = set()
        all_fields = self._resolve_inherited_fields(spec)

        for field_name, field_def in all_fields.items():
            if isinstance(field_def, dict) and field_def.get("system", False):
                system_fields.add(field_name)

        return system_fields

    def _validate_enum_fields(self, memory_type: str, payload: dict[str, Any]) -> None:
        """Validate enum fields against YAML schema choices.

        Args:
            memory_type: Entity type from YAML schema.
            payload: Memory data to validate.

        Raises:
            YamlTranslatorError: If enum field has invalid value.
        """
        emap = self._entities_map()
        spec = emap.get(memory_type.lower())
        if not spec:
            return  # Entity validation happens elsewhere

        # Get field definitions for this entity type
        fields = spec.get("fields", {})

        # Check each field in the payload
        for field_name, field_value in payload.items():
            if field_name in fields:
                field_def = fields[field_name]

                # Check if this is an enum field
                if field_def.get("type") == "enum":
                    choices = field_def.get("choices", [])

                    # Validate the value against choices
                    if field_value is not None and field_value not in choices:
                        raise YamlTranslatorError(
                            f"Invalid {field_name} value '{field_value}'. Valid choices: {choices}",
                            context={
                                "memory_type": memory_type,
                                "field_name": field_name,
                                "invalid_value": field_value,
                                "valid_choices": choices,
                            },
                        )

    def validate_memory_against_yaml(
        self, memory_type: str, payload: dict[str, Any]
    ) -> dict[str, Any]:
        """Validate memory payload against YAML schema and return cleaned payload."""
        if not memory_type:
            raise YamlTranslatorError("memory_type is required")
        if payload is None:
            raise YamlTranslatorError("payload is required")

        # Strict validation - entity type MUST exist in YAML
        emap = self._entities_map()
        spec = emap.get(memory_type.lower())
        if not spec:
            raise YamlTranslatorError(
                f"Unknown entity type '{memory_type}'. All types must be defined in YAML schema.",
                context={
                    "memory_type": memory_type,
                    "available_types": list(emap.keys()),
                },
            )

        req, _opt = self._fields_contract(spec)
        missing = [k for k in req if not payload.get(k)]
        if missing:
            raise YamlTranslatorError(
                f"Missing required fields: {missing}",
                context={"memory_type": memory_type},
            )

        # Validate enum fields against YAML schema choices
        self._validate_enum_fields(memory_type, payload)

        # Validate that all fields are defined in YAML schema
        req, opt = self._fields_contract(spec)
        valid_fields = set(req + opt)
        system_fields = self._get_system_fields(spec)
        invalid_fields = set(payload.keys()) - valid_fields - system_fields
        if invalid_fields:
            raise YamlTranslatorError(
                f"Invalid fields not defined in schema: {sorted(invalid_fields)}",
                context={
                    "memory_type": memory_type,
                    "valid_fields": sorted(valid_fields),
                    "invalid_fields": sorted(invalid_fields),
                },
            )

        # Strip system-reserved fields if present
        cleaned = dict(payload)
        for syskey in system_fields:
            cleaned.pop(syskey, None)
        return cleaned

    def create_memory_from_yaml(self, memory_type: str, payload: dict[str, Any], user_id: str):
        """Create a Memory object from YAML-validated payload."""

        # Get anchor field from YAML schema
        anchor_field = self.get_anchor_field(memory_type)

        # Extract anchor text from payload
        anchor_text = payload.get(anchor_field)
        if not anchor_text or not isinstance(anchor_text, str):
            raise YamlTranslatorError(
                f"Missing or invalid anchor field '{anchor_field}' in payload "
                f"for memory type '{memory_type}'"
            )

        # Validate full payload against YAML schema
        validated_payload = self.validate_memory_against_yaml(memory_type, payload)

        # Construct Memory with YAML-defined payload only
        return Memory(
            memory_type=memory_type,
            payload=validated_payload,
            user_id=user_id,
        )

    def get_entity_model(self, entity_name: str):
        """Get Pydantic model from TypeRegistry - NO REDUNDANCY."""
        return get_entity_model(entity_name)

schema property

Get the loaded YAML schema, loading it if necessary.

__init__(yaml_path=None)

Initialize YamlTranslator with YAML schema path.

Parameters:

Name Type Description Default
yaml_path str | None

Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

None

Raises:

Type Description
YamlTranslatorError

If YAML path not provided or TypeRegistry initialization fails.

Source code in src/memg_core/core/yaml_translator.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(self, yaml_path: str | None = None) -> None:
    """Initialize YamlTranslator with YAML schema path.

    Args:
        yaml_path: Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

    Raises:
        YamlTranslatorError: If YAML path not provided or TypeRegistry initialization fails.
    """
    # Require explicit YAML path - no silent defaults
    if yaml_path:
        self.yaml_path = yaml_path
    else:
        env_path = os.getenv("MEMG_YAML_SCHEMA")
        if not env_path:
            raise YamlTranslatorError(
                "YAML schema path required. Set MEMG_YAML_SCHEMA environment variable "
                "or provide yaml_path parameter. No defaults allowed."
            )
        self.yaml_path = env_path

    self._schema: dict[str, Any] | None = None
    # NO model cache - TypeRegistry handles all caching

    # Initialize TypeRegistry from YAML - crash early if invalid
    try:
        initialize_types_from_yaml(self.yaml_path)
    except Exception as e:
        raise YamlTranslatorError(f"Failed to initialize TypeRegistry from YAML: {e}") from e

build_anchor_text(memory)

Build anchor text for embedding from YAML-defined anchor field.

NO hardcoded field names - reads anchor field from YAML schema.

Parameters:

Name Type Description Default
memory

Memory object containing payload data.

required

Returns:

Name Type Description
str str

Anchor text for embedding.

Raises:

Type Description
YamlTranslatorError

If anchor field is missing or invalid.

Source code in src/memg_core/core/yaml_translator.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
def build_anchor_text(self, memory) -> str:
    """Build anchor text for embedding from YAML-defined anchor field.

    NO hardcoded field names - reads anchor field from YAML schema.

    Args:
        memory: Memory object containing payload data.

    Returns:
        str: Anchor text for embedding.

    Raises:
        YamlTranslatorError: If anchor field is missing or invalid.
    """
    mem_type = getattr(memory, "memory_type", None)
    if not mem_type:
        raise YamlTranslatorError(
            "Memory object missing 'memory_type' field",
            operation="build_anchor_text",
        )

    # Get anchor field from YAML schema
    anchor_field = self.get_anchor_field(mem_type)

    # Try to get anchor text from the specified field
    anchor_text = None

    # First check if it's a core field on the Memory object
    if hasattr(memory, anchor_field):
        anchor_text = getattr(memory, anchor_field, None)
    # Otherwise check in the payload
    elif hasattr(memory, "payload") and isinstance(memory.payload, dict):
        anchor_text = memory.payload.get(anchor_field)

    if isinstance(anchor_text, str):
        stripped_text = anchor_text.strip()
        if stripped_text:
            return stripped_text

    # Anchor field missing, empty, or invalid
    raise YamlTranslatorError(
        f"Anchor field '{anchor_field}' is missing, empty, or invalid "
        f"for memory type '{mem_type}'",
        operation="build_anchor_text",
        context={
            "memory_type": mem_type,
            "anchor_field": anchor_field,
            "anchor_value": anchor_text,
        },
    )

create_memory_from_yaml(memory_type, payload, user_id)

Create a Memory object from YAML-validated payload.

Source code in src/memg_core/core/yaml_translator.py
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
def create_memory_from_yaml(self, memory_type: str, payload: dict[str, Any], user_id: str):
    """Create a Memory object from YAML-validated payload."""

    # Get anchor field from YAML schema
    anchor_field = self.get_anchor_field(memory_type)

    # Extract anchor text from payload
    anchor_text = payload.get(anchor_field)
    if not anchor_text or not isinstance(anchor_text, str):
        raise YamlTranslatorError(
            f"Missing or invalid anchor field '{anchor_field}' in payload "
            f"for memory type '{memory_type}'"
        )

    # Validate full payload against YAML schema
    validated_payload = self.validate_memory_against_yaml(memory_type, payload)

    # Construct Memory with YAML-defined payload only
    return Memory(
        memory_type=memory_type,
        payload=validated_payload,
        user_id=user_id,
    )

debug_relation_map()

Return a nested relation map for debugging/printing.

Structure: { source: { target: [ {name, predicate, directed, description} ... ] } }

Source code in src/memg_core/core/yaml_translator.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def debug_relation_map(self) -> dict[str, dict[str, list[dict[str, Any]]]]:
    """Return a nested relation map for debugging/printing.

    Structure:
    {
      source: {
        target: [ {name, predicate, directed, description} ... ]
      }
    }
    """
    out: dict[str, dict[str, list[dict[str, Any]]]] = {}
    for source in self.get_entity_types():
        specs = self.get_relations_for_source(source)
        if not specs:
            continue
        if source not in out:
            out[source] = {}
        for spec in specs:
            target = spec["target"]
            out[source].setdefault(target, [])
            out[source][target].append(
                {
                    "name": spec.get("name"),
                    "predicate": spec.get("predicate"),
                    "directed": spec.get("directed", True),
                    "description": spec.get("description"),
                }
            )
    return out

get_anchor_field(entity_name)

Get the anchor field name for the given entity type from YAML schema.

Now reads from vector.anchored_to instead of separate anchor field.

Parameters:

Name Type Description Default
entity_name str

Name of the entity type.

required

Returns:

Name Type Description
str str

Anchor field name.

Raises:

Type Description
YamlTranslatorError

If anchor field not found.

Source code in src/memg_core/core/yaml_translator.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def get_anchor_field(self, entity_name: str) -> str:
    """Get the anchor field name for the given entity type from YAML schema.

    Now reads from vector.anchored_to instead of separate anchor field.

    Args:
        entity_name: Name of the entity type.

    Returns:
        str: Anchor field name.

    Raises:
        YamlTranslatorError: If anchor field not found.
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")

    # Get entity spec with inheritance resolution
    entity_spec = self._resolve_entity_with_inheritance(entity_name)

    # Look for vector field with anchored_to
    fields = entity_spec.get("fields", {})
    for _field_name, field_def in fields.items():
        if isinstance(field_def, dict) and field_def.get("type") == "vector":
            anchored_to = field_def.get("anchored_to")
            if anchored_to:
                return str(anchored_to)

    raise YamlTranslatorError(
        f"Entity '{entity_name}' has no vector field with 'anchored_to' property"
    )

get_entity_model(entity_name)

Get Pydantic model from TypeRegistry - NO REDUNDANCY.

Source code in src/memg_core/core/yaml_translator.py
619
620
621
def get_entity_model(self, entity_name: str):
    """Get Pydantic model from TypeRegistry - NO REDUNDANCY."""
    return get_entity_model(entity_name)

get_entity_types()

Get list of available entity types from YAML schema.

Source code in src/memg_core/core/yaml_translator.py
123
124
125
def get_entity_types(self) -> list[str]:
    """Get list of available entity types from YAML schema."""
    return list(self._entities_map().keys())

get_labels_for_predicates(source_type, predicates, neighbor_label=None)

Expand predicate names to concrete relationship labels for a given source.

Parameters:

Name Type Description Default
source_type str

Source entity type name

required
predicates list[str] | None

List of predicate names to include (case-insensitive). If None, include all.

required
neighbor_label str | None

Optional target entity type filter (case-insensitive)

None

Returns:

Type Description
list[str]

List of concrete relationship labels (table names) matching the filter.

Source code in src/memg_core/core/yaml_translator.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def get_labels_for_predicates(
    self,
    source_type: str,
    predicates: list[str] | None,
    neighbor_label: str | None = None,
) -> list[str]:
    """Expand predicate names to concrete relationship labels for a given source.

    Args:
        source_type: Source entity type name
        predicates: List of predicate names to include (case-insensitive). If None, include all.
        neighbor_label: Optional target entity type filter (case-insensitive)

    Returns:
        List of concrete relationship labels (table names) matching the filter.
    """
    if not source_type:
        raise YamlTranslatorError("Empty source_type")

    preds_u = set(p.upper() for p in predicates) if predicates else None
    neighbor_l = neighbor_label.lower() if neighbor_label else None

    labels: list[str] = []
    for spec in self.get_relations_for_source(source_type):
        if preds_u is not None and spec["predicate"].upper() not in preds_u:
            continue
        if neighbor_l is not None and spec["target"].lower() != neighbor_l:
            continue
        labels.append(
            self.relationship_table_name(
                source=spec["source"],
                predicate=spec["predicate"],
                target=spec["target"],
                directed=spec["directed"],
            )
        )
    return labels

get_relations_for_source(entity_name)

Get normalized relation specs for a source entity in target-first schema.

Returns list of dicts with keys
  • source (str)
  • target (str)
  • name (str | None)
  • description (str | None)
  • predicate (str)
  • directed (bool)
Source code in src/memg_core/core/yaml_translator.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def get_relations_for_source(self, entity_name: str) -> list[dict[str, Any]]:
    """Get normalized relation specs for a source entity in target-first schema.

    Returns list of dicts with keys:
        - source (str)
        - target (str)
        - name (str | None)
        - description (str | None)
        - predicate (str)
        - directed (bool)
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")

    source_l = entity_name.lower()
    relations_map = self._get_relations_mapping_for_entity(source_l)
    if not relations_map:
        return []

    out: list[dict[str, Any]] = []
    for target_l, items in relations_map.items():
        for item in items:
            predicate = item.get("predicate")
            if not predicate or not isinstance(predicate, str):
                # Skip invalid entries - strict behavior can be added later
                continue
            directed = bool(item.get("directed", True))
            out.append(
                {
                    "source": source_l,
                    "target": target_l,
                    "name": item.get("name"),
                    "description": item.get("description"),
                    "predicate": predicate.upper(),
                    "directed": directed,
                }
            )
    return out

get_see_also_config(entity_name)

Get the see_also configuration for the given entity type from YAML schema.

Returns:

Type Description
dict[str, Any] | None

Dict with keys: enabled, threshold, limit, target_types

dict[str, Any] | None

None if see_also is not configured for this entity

Source code in src/memg_core/core/yaml_translator.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def get_see_also_config(self, entity_name: str) -> dict[str, Any] | None:
    """Get the see_also configuration for the given entity type from YAML schema.

    Returns:
        Dict with keys: enabled, threshold, limit, target_types
        None if see_also is not configured for this entity
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")
    name_l = entity_name.lower()
    emap = self._entities_map()
    spec_raw = emap.get(name_l)
    if not spec_raw:
        raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

    see_also = spec_raw.get("see_also")
    if not see_also or not isinstance(see_also, dict):
        return None

    # Validate required fields
    if not see_also.get("enabled", False):
        return None

    return {
        "enabled": see_also.get("enabled", False),
        "threshold": float(see_also.get("threshold", 0.7)),
        "limit": int(see_also.get("limit", 3)),
        "target_types": list(see_also.get("target_types", [])),
    }

relationship_table_name(source, predicate, target, *, directed=True) staticmethod

Generate relationship table name.

For now, table name does not encode direction; direction affects creation/query semantics. Canonicalization for undirected pairs can be added here later if decided.

Source code in src/memg_core/core/yaml_translator.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
@staticmethod
def relationship_table_name(
    source: str,
    predicate: str,
    target: str,
    *,
    directed: bool = True,  # noqa: unused-argument
) -> str:
    """Generate relationship table name.

    For now, table name does not encode direction; direction affects creation/query semantics.
    Canonicalization for undirected pairs can be added here later if decided.
    """
    return f"{str(source).upper()}_{str(predicate).upper()}_{str(target).upper()}"

validate_memory_against_yaml(memory_type, payload)

Validate memory payload against YAML schema and return cleaned payload.

Source code in src/memg_core/core/yaml_translator.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
def validate_memory_against_yaml(
    self, memory_type: str, payload: dict[str, Any]
) -> dict[str, Any]:
    """Validate memory payload against YAML schema and return cleaned payload."""
    if not memory_type:
        raise YamlTranslatorError("memory_type is required")
    if payload is None:
        raise YamlTranslatorError("payload is required")

    # Strict validation - entity type MUST exist in YAML
    emap = self._entities_map()
    spec = emap.get(memory_type.lower())
    if not spec:
        raise YamlTranslatorError(
            f"Unknown entity type '{memory_type}'. All types must be defined in YAML schema.",
            context={
                "memory_type": memory_type,
                "available_types": list(emap.keys()),
            },
        )

    req, _opt = self._fields_contract(spec)
    missing = [k for k in req if not payload.get(k)]
    if missing:
        raise YamlTranslatorError(
            f"Missing required fields: {missing}",
            context={"memory_type": memory_type},
        )

    # Validate enum fields against YAML schema choices
    self._validate_enum_fields(memory_type, payload)

    # Validate that all fields are defined in YAML schema
    req, opt = self._fields_contract(spec)
    valid_fields = set(req + opt)
    system_fields = self._get_system_fields(spec)
    invalid_fields = set(payload.keys()) - valid_fields - system_fields
    if invalid_fields:
        raise YamlTranslatorError(
            f"Invalid fields not defined in schema: {sorted(invalid_fields)}",
            context={
                "memory_type": memory_type,
                "valid_fields": sorted(valid_fields),
                "invalid_fields": sorted(invalid_fields),
            },
        )

    # Strip system-reserved fields if present
    cleaned = dict(payload)
    for syskey in system_fields:
        cleaned.pop(syskey, None)
    return cleaned

YamlTranslatorError

Bases: MemorySystemError

Error in YAML schema translation or validation.

Attributes:

Name Type Description
message

Error message.

operation

Operation that caused the error.

context

Additional context information.

original_error

Original exception that was wrapped.

Source code in src/memg_core/core/yaml_translator.py
23
24
25
26
27
28
29
30
31
class YamlTranslatorError(MemorySystemError):
    """Error in YAML schema translation or validation.

    Attributes:
        message: Error message.
        operation: Operation that caused the error.
        context: Additional context information.
        original_error: Original exception that was wrapped.
    """

get_entity_model(entity_name)

Get Pydantic model for entity from global registry.

Parameters:

Name Type Description Default
entity_name str

Name of the entity.

required

Returns:

Type Description
type[BaseModel]

type[BaseModel]: Pydantic model class.

Source code in src/memg_core/core/types.py
421
422
423
424
425
426
427
428
429
430
def get_entity_model(entity_name: str) -> type[BaseModel]:
    """Get Pydantic model for entity from global registry.

    Args:
        entity_name: Name of the entity.

    Returns:
        type[BaseModel]: Pydantic model class.
    """
    return TypeRegistry.get_instance().get_entity_model(entity_name)

initialize_types_from_yaml(yaml_path)

Initialize global type registry from YAML - call once at startup.

Parameters:

Name Type Description Default
yaml_path str

Path to YAML schema file.

required
Source code in src/memg_core/core/types.py
457
458
459
460
461
462
463
def initialize_types_from_yaml(yaml_path: str) -> None:
    """Initialize global type registry from YAML - call once at startup.

    Args:
        yaml_path: Path to YAML schema file.
    """
    TypeRegistry.initialize_from_yaml(yaml_path)

Utilities

HRID Management

Human-readable ID utilities:

HRID generator and parser for MEMG Core.

Format: {TYPE_UPPER}_{AAA000} - TYPE: uppercase alphanumeric type name (no spaces) - AAA: base26 letters A–Z (wraps after ZZZ) - 000–999: numeric suffix

_COUNTERS = {} module-attribute

_HRID_RE = re.compile('^(?P<type>[A-Z0-9_]+)_(?P<alpha>[A-Z]{3})(?P<num>\\d{3})$') module-attribute

DatabaseError

Bases: MemorySystemError

Database operation failures (Qdrant, Kuzu).

Source code in src/memg_core/core/exceptions.py
42
43
class DatabaseError(MemorySystemError):
    """Database operation failures (Qdrant, Kuzu)."""

StorageQueryInterface

Bases: Protocol

Protocol for storage backends that can query for existing HRIDs.

Attributes:

Name Type Description
vector

Query embedding vector.

limit

Maximum number of results.

collection

Optional collection name.

user_id

User ID for filtering.

filters

Additional search filters.

Source code in src/memg_core/utils/hrid.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class StorageQueryInterface(Protocol):
    """Protocol for storage backends that can query for existing HRIDs.

    Attributes:
        vector: Query embedding vector.
        limit: Maximum number of results.
        collection: Optional collection name.
        user_id: User ID for filtering.
        filters: Additional search filters.
    """

    def search_points(
        self,
        vector: list[float],
        limit: int = 5,
        collection: str | None = None,
        user_id: str | None = None,
        filters: dict[str, Any] | None = None,
    ) -> list[dict[str, Any]]:
        """Search for points with optional filtering.

        Args:
            vector: Query embedding vector.
            limit: Maximum number of results.
            collection: Optional collection name.
            user_id: User ID for filtering.
            filters: Additional search filters.

        Returns:
            list[dict[str, Any]]: List of search results.
        """
        raise NotImplementedError("Subclasses must implement search_points method")

search_points(vector, limit=5, collection=None, user_id=None, filters=None)

Search for points with optional filtering.

Parameters:

Name Type Description Default
vector list[float]

Query embedding vector.

required
limit int

Maximum number of results.

5
collection str | None

Optional collection name.

None
user_id str | None

User ID for filtering.

None
filters dict[str, Any] | None

Additional search filters.

None

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of search results.

Source code in src/memg_core/utils/hrid.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def search_points(
    self,
    vector: list[float],
    limit: int = 5,
    collection: str | None = None,
    user_id: str | None = None,
    filters: dict[str, Any] | None = None,
) -> list[dict[str, Any]]:
    """Search for points with optional filtering.

    Args:
        vector: Query embedding vector.
        limit: Maximum number of results.
        collection: Optional collection name.
        user_id: User ID for filtering.
        filters: Additional search filters.

    Returns:
        list[dict[str, Any]]: List of search results.
    """
    raise NotImplementedError("Subclasses must implement search_points method")

_alpha_to_idx(alpha)

Convert alpha string to index: AAA -> 0, AAB -> 1, ..., ZZZ -> 17575.

Parameters:

Name Type Description Default
alpha str

Three-letter alpha string (AAA-ZZZ).

required

Returns:

Name Type Description
int int

Numeric index.

Source code in src/memg_core/utils/hrid.py
58
59
60
61
62
63
64
65
66
67
68
69
70
def _alpha_to_idx(alpha: str) -> int:
    """Convert alpha string to index: AAA -> 0, AAB -> 1, ..., ZZZ -> 17575.

    Args:
        alpha: Three-letter alpha string (AAA-ZZZ).

    Returns:
        int: Numeric index.
    """
    idx = 0
    for char in alpha:
        idx = idx * 26 + (ord(char) - ord("A"))
    return idx

_idx_to_alpha(idx)

Convert index to alpha string: 0 -> AAA, 1 -> AAB, ..., 17575 -> ZZZ.

Parameters:

Name Type Description Default
idx int

Numeric index (0-17575).

required

Returns:

Name Type Description
str str

Three-letter alpha string.

Source code in src/memg_core/utils/hrid.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def _idx_to_alpha(idx: int) -> str:
    """Convert index to alpha string: 0 -> AAA, 1 -> AAB, ..., 17575 -> ZZZ.

    Args:
        idx: Numeric index (0-17575).

    Returns:
        str: Three-letter alpha string.
    """
    chars = []
    for _ in range(3):
        chars.append(chr(ord("A") + idx % 26))
        idx //= 26
    return "".join(reversed(chars))

_initialize_counter_from_storage(type_name, user_id, storage=None)

Initialize counter by querying storage for highest existing HRID of this type.

Parameters:

Name Type Description Default
type_name str

The memory type to check (e.g., 'note', 'task')

required
user_id str

User ID for scoped HRID lookup

required
storage StorageQueryInterface | None

Storage interface to query for existing HRIDs

None

Returns:

Type Description
tuple[int, int]

tuple[int, int]: (alpha_idx, num) representing the next available counter position

Notes
  • If no storage provided or no existing HRIDs found, returns (0, -1) for fresh start
  • Queries storage for memories of the given type and finds the highest HRID
  • Parses the highest HRID to determine the next counter position
Source code in src/memg_core/utils/hrid.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def _initialize_counter_from_storage(
    type_name: str, user_id: str, storage: StorageQueryInterface | None = None
) -> tuple[int, int]:
    """Initialize counter by querying storage for highest existing HRID of this type.

    Args:
        type_name: The memory type to check (e.g., 'note', 'task')
        user_id: User ID for scoped HRID lookup
        storage: Storage interface to query for existing HRIDs

    Returns:
        tuple[int, int]: (alpha_idx, num) representing the next available counter position

    Notes:
        - If no storage provided or no existing HRIDs found, returns (0, -1) for fresh start
        - Queries storage for memories of the given type and finds the highest HRID
        - Parses the highest HRID to determine the next counter position
    """
    if not storage:
        return (0, -1)  # Fresh start if no storage interface

    try:
        # Query storage for memories of this type, with high limit to catch all
        # We use a dummy vector since we're filtering by memory_type, not doing vector search
        dummy_vector = [0.0] * 384  # Default embedding size

        # Search with memory_type and user_id filters to get all memories of this type for this user
        results = storage.search_points(
            vector=dummy_vector,
            limit=10000,  # High limit to get all existing memories of this type
            filters={
                "user_id": user_id,  # CRITICAL: Include user_id for proper isolation
                "memory_type": type_name.lower(),
            },
        )

        if not results:
            return (0, -1)  # No existing memories of this type

        # Find the highest HRID among results
        highest_hrid = None
        highest_alpha_idx = -1
        highest_num = -1

        for result in results:
            payload = result.get("payload", {})
            # Updated for flat payload structure
            hrid = payload.get("hrid")

            if not hrid or not isinstance(hrid, str):
                continue

            try:
                parsed_type, alpha, num = parse_hrid(hrid)
                if parsed_type.upper() != type_name.upper():
                    continue  # Skip HRIDs of different types

                alpha_idx = _alpha_to_idx(alpha)

                # Check if this is the highest HRID so far
                if alpha_idx > highest_alpha_idx or (
                    alpha_idx == highest_alpha_idx and num > highest_num
                ):
                    highest_alpha_idx = alpha_idx
                    highest_num = num
                    highest_hrid = hrid

            except ValueError as e:
                # Skip invalid HRIDs but log for transparency
                logger = get_logger("hrid")
                logger.debug(f"Skipping invalid HRID format '{hrid}': {e}")
                continue

        if highest_hrid is None:
            return (0, -1)  # No valid HRIDs found

        # Return the next position after the highest found
        next_num = highest_num + 1
        if next_num > 999:
            next_num = 0
            highest_alpha_idx += 1

        return (
            highest_alpha_idx,
            next_num - 1,
        )  # -1 because generate_hrid will increment

    except (DatabaseError, ConnectionError, TimeoutError) as e:
        # If storage query fails, fall back to fresh start but log the issue
        logger = get_logger("hrid")
        logger.warning(f"HRID storage query failed, falling back to fresh start: {e}")
        return (0, -1)

_initialize_counter_from_tracker(type_name, user_id, hrid_tracker)

Initialize counter by querying HridTracker for highest existing HRID.

Parameters:

Name Type Description Default
type_name str

The memory type to check (e.g., 'note', 'task')

required
user_id str

User ID for scoped HRID lookup

required
hrid_tracker

HridTracker instance to query

required

Returns:

Type Description
tuple[int, int]

tuple[int, int]: (alpha_idx, num) representing the next available counter position

Source code in src/memg_core/utils/hrid.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def _initialize_counter_from_tracker(type_name: str, user_id: str, hrid_tracker) -> tuple[int, int]:
    """Initialize counter by querying HridTracker for highest existing HRID.

    Args:
        type_name: The memory type to check (e.g., 'note', 'task')
        user_id: User ID for scoped HRID lookup
        hrid_tracker: HridTracker instance to query

    Returns:
        tuple[int, int]: (alpha_idx, num) representing the next available counter position
    """
    try:
        highest = hrid_tracker.get_highest_hrid(type_name, user_id)

        if highest is None:
            return (0, -1)  # No existing HRIDs for this type

        _highest_hrid, highest_alpha_idx, highest_num = highest

        # Return the next position after the highest found
        next_num = highest_num + 1
        if next_num > 999:
            next_num = 0
            highest_alpha_idx += 1

        return (
            highest_alpha_idx,
            next_num - 1,
        )  # -1 because generate_hrid will increment

    except Exception as e:
        # DO NOT FALL BACK SILENTLY - this causes duplicate HRID bugs!
        # If we can't initialize from existing data, the system should fail fast
        raise DatabaseError(
            f"Failed to initialize HRID counter for type '{type_name}' from existing data. "
            f"This is critical - cannot generate HRIDs without knowing existing ones.",
            operation="initialize_counter_from_tracker",
            context={"type_name": type_name},
            original_error=e,
        ) from e

_type_key(t)

Deterministic numeric key for type names to enable cross-type ordering. Encodes up to the first 8 chars in base-37 (A–Z=1–26, 0–9=27–36).

Source code in src/memg_core/utils/hrid.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
def _type_key(t: str) -> int:
    """
    Deterministic numeric key for type names to enable cross-type ordering.
    Encodes up to the first 8 chars in base-37 (A–Z=1–26, 0–9=27–36).
    """
    t = t.upper()
    key = 0
    for c in t[:8]:
        if "A" <= c <= "Z":
            v = 1 + (ord(c) - ord("A"))
        elif "0" <= c <= "9":
            v = 27 + (ord(c) - ord("0"))
        else:
            v = 0
        key = key * 37 + v
    return key

generate_hrid(type_name, user_id, hrid_tracker=None)

Generate the next HRID for the given type.

Parameters:

Name Type Description Default
type_name str

The memory type (e.g., 'note', 'task').

required
user_id str

User ID for scoped HRID generation.

required
hrid_tracker

Optional HridTracker instance for querying existing HRIDs.

None

Returns:

Name Type Description
str str

The next HRID in format TYPE_AAA000.

Notes
  • Uses HridTracker to query HridMapping table for existing HRIDs.
  • Falls back to in-memory counter if no tracker provided.
  • Ensures no duplicates by checking complete HRID history.
Source code in src/memg_core/utils/hrid.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
def generate_hrid(type_name: str, user_id: str, hrid_tracker=None) -> str:
    """Generate the next HRID for the given type.

    Args:
        type_name: The memory type (e.g., 'note', 'task').
        user_id: User ID for scoped HRID generation.
        hrid_tracker: Optional HridTracker instance for querying existing HRIDs.

    Returns:
        str: The next HRID in format TYPE_AAA000.

    Notes:
        - Uses HridTracker to query HridMapping table for existing HRIDs.
        - Falls back to in-memory counter if no tracker provided.
        - Ensures no duplicates by checking complete HRID history.
    """
    t = type_name.strip().upper()

    # Initialize counter from HridTracker on first use of this type+user combination
    counter_key = (t, user_id)
    if counter_key not in _COUNTERS and hrid_tracker is not None:
        _COUNTERS[counter_key] = _initialize_counter_from_tracker(t, user_id, hrid_tracker)

    # Get current counter or default to fresh start
    alpha_idx, num = _COUNTERS.get(counter_key, (0, -1))
    num += 1
    if num > 999:
        num = 0
        alpha_idx += 1
        if alpha_idx > 26**3 - 1:
            raise ValueError(f"HRID space exhausted for type {t}")
    _COUNTERS[counter_key] = (alpha_idx, num)
    return f"{t}_{_idx_to_alpha(alpha_idx)}{num:03d}"

get_logger(component)

Get a logger for a component.

Parameters:

Name Type Description Default
component str

Component name.

required

Returns:

Type Description
Logger

logging.Logger: Logger instance for the component.

Source code in src/memg_core/core/logging.py
157
158
159
160
161
162
163
164
165
166
def get_logger(component: str) -> logging.Logger:
    """Get a logger for a component.

    Args:
        component: Component name.

    Returns:
        logging.Logger: Logger instance for the component.
    """
    return MemorySystemLogger.get_logger(component)

hrid_to_index(hrid)

Convert HRID into a single integer index for ordering across types.

Parameters:

Name Type Description Default
hrid str

HRID string to convert.

required

Returns:

Name Type Description
int int

Single integer index for cross-type ordering.

Source code in src/memg_core/utils/hrid.py
296
297
298
299
300
301
302
303
304
305
306
307
def hrid_to_index(hrid: str) -> int:
    """Convert HRID into a single integer index for ordering across types.

    Args:
        hrid: HRID string to convert.

    Returns:
        int: Single integer index for cross-type ordering.
    """
    type_, alpha, num = parse_hrid(hrid)
    intra = _alpha_to_idx(alpha) * 1000 + num  # 0 .. 17,575,999  (needs 25 bits)
    return (_type_key(type_) << 25) | intra

parse_hrid(hrid)

Parse HRID into (type, alpha, num).

Parameters:

Name Type Description Default
hrid str

HRID string to parse.

required

Returns:

Type Description
tuple[str, str, int]

tuple[str, str, int]: (type, alpha, num) components.

Raises:

Type Description
ValueError

If HRID format is invalid.

Source code in src/memg_core/utils/hrid.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def parse_hrid(hrid: str) -> tuple[str, str, int]:
    """Parse HRID into (type, alpha, num).

    Args:
        hrid: HRID string to parse.

    Returns:
        tuple[str, str, int]: (type, alpha, num) components.

    Raises:
        ValueError: If HRID format is invalid.
    """
    m = _HRID_RE.match(hrid.strip().upper())
    if not m:
        raise ValueError(f"Invalid HRID format: {hrid}")
    return m.group("type"), m.group("alpha"), int(m.group("num"))

reset_counters()

Reset all in-memory HRID counters.

This is used for testing to simulate system restarts. In production, counters are automatically initialized from database.

Source code in src/memg_core/utils/hrid.py
310
311
312
313
314
315
316
317
def reset_counters():
    """Reset all in-memory HRID counters.

    This is used for testing to simulate system restarts.
    In production, counters are automatically initialized from database.
    """
    global _COUNTERS
    _COUNTERS.clear()

HRID Tracker: UUID ↔ HRID translation and lifecycle management.

Handles all HRID mapping operations using the existing KuzuInterface. Provides transparent translation between user-facing HRIDs and internal UUIDs.

DatabaseError

Bases: MemorySystemError

Database operation failures (Qdrant, Kuzu).

Source code in src/memg_core/core/exceptions.py
42
43
class DatabaseError(MemorySystemError):
    """Database operation failures (Qdrant, Kuzu)."""

HridTracker

Manages HRID ↔ UUID mappings using KuzuInterface.

Attributes:

Name Type Description
kuzu

Pre-configured Kuzu interface for database operations.

Source code in src/memg_core/utils/hrid_tracker.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
class HridTracker:
    """Manages HRID ↔ UUID mappings using KuzuInterface.

    Attributes:
        kuzu: Pre-configured Kuzu interface for database operations.
    """

    def __init__(self, kuzu_interface: KuzuInterface):
        """Initialize with existing KuzuInterface.

        Args:
            kuzu_interface: Pre-configured Kuzu interface for database operations.
        """
        self.kuzu = kuzu_interface

    def get_uuid(self, hrid: str, user_id: str) -> str:
        """Translate HRID to UUID.

        Args:
            hrid: Human-readable ID (e.g., 'TASK_AAA001').
            user_id: User ID for scoped lookup.

        Returns:
            str: UUID string for internal operations.

        Raises:
            DatabaseError: If HRID not found or is deleted.
        """
        try:
            query = """
            MATCH (m:HridMapping {hrid: $hrid, user_id: $user_id})
            WHERE m.deleted_at IS NULL
            RETURN m.uuid as uuid
            """
            results = self.kuzu.query(query, {"hrid": hrid, "user_id": user_id})

            if not results:
                raise DatabaseError(
                    f"HRID '{hrid}' not found or has been deleted",
                    operation="get_uuid",
                    context={"hrid": hrid},
                )

            return results[0]["uuid"]

        except Exception as e:
            if isinstance(e, DatabaseError):
                raise
            raise DatabaseError(
                f"Failed to lookup UUID for HRID '{hrid}'",
                operation="get_uuid",
                context={"hrid": hrid},
                original_error=e,
            ) from e

    def get_hrid(self, uuid: str, user_id: str) -> str:
        """Translate UUID to HRID with user verification.

        Args:
            uuid: Internal UUID.
            user_id: User ID for ownership verification.

        Returns:
            str: Human-readable ID string.

        Raises:
            DatabaseError: If UUID not found, deleted, or doesn't belong to user.
        """
        try:
            query = """
            MATCH (m:HridMapping {uuid: $uuid, user_id: $user_id})
            WHERE m.deleted_at IS NULL
            RETURN m.hrid as hrid
            """
            results = self.kuzu.query(query, {"uuid": uuid, "user_id": user_id})

            if not results:
                raise DatabaseError(
                    f"UUID '{uuid}' not found or has been deleted",
                    operation="get_hrid",
                    context={"uuid": uuid},
                )

            return results[0]["hrid"]

        except Exception as e:
            if isinstance(e, DatabaseError):
                raise
            raise DatabaseError(
                f"Failed to lookup HRID for UUID '{uuid}'",
                operation="get_hrid",
                context={"uuid": uuid},
                original_error=e,
            ) from e

    def create_mapping(self, hrid: str, uuid: str, memory_type: str, user_id: str) -> None:
        """Create new HRID ↔ UUID mapping.

        Args:
            hrid: Human-readable ID.
            uuid: Internal UUID.
            memory_type: Entity type (e.g., 'task', 'note').
            user_id: User ID for scoped mapping.

        Raises:
            DatabaseError: If mapping creation fails.
        """
        try:
            now = datetime.now(UTC).isoformat()

            mapping_data = {
                "hrid_user_key": f"{hrid}#{user_id}",  # Composite key
                "hrid": hrid,
                "uuid": uuid,
                "memory_type": memory_type,
                "user_id": user_id,
                "created_at": now,
                "deleted_at": None,  # NULL for active mappings
            }

            self.kuzu.add_node("HridMapping", mapping_data)

        except Exception as e:
            raise DatabaseError(
                f"Failed to create HRID mapping: {hrid}{uuid}",
                operation="create_mapping",
                context={"hrid": hrid, "uuid": uuid, "memory_type": memory_type},
                original_error=e,
            ) from e

    def mark_deleted(self, hrid: str) -> None:
        """Mark HRID mapping as deleted (soft delete)

        Args:
            hrid: Human-readable ID to mark as deleted

        Raises:
            DatabaseError: If marking as deleted fails
        """
        try:
            now = datetime.now(UTC).isoformat()

            query = """
            MATCH (m:HridMapping {hrid: $hrid})
            SET m.deleted_at = $deleted_at
            RETURN m.hrid as hrid
            """

            results = self.kuzu.query(query, {"hrid": hrid, "deleted_at": now})

            if not results:
                raise DatabaseError(
                    f"HRID '{hrid}' not found for deletion",
                    operation="mark_deleted",
                    context={"hrid": hrid},
                )

        except Exception as e:
            if isinstance(e, DatabaseError):
                raise
            raise DatabaseError(
                f"Failed to mark HRID '{hrid}' as deleted",
                operation="mark_deleted",
                context={"hrid": hrid},
                original_error=e,
            ) from e

    def get_highest_hrid(self, memory_type: str, user_id: str) -> tuple[str, int, int] | None:
        """Get highest HRID for a memory type (for generation).

        Args:
            memory_type: Entity type to check (case insensitive).
            user_id: User ID for scoped HRID lookup.

        Returns:
            tuple[str, int, int] | None: (hrid, alpha_idx, num) or None if no HRIDs exist.
        """
        try:
            # Normalize to lowercase for database query (YAML types are lowercase)
            normalized_type = memory_type.lower()

            query = """
            MATCH (m:HridMapping {memory_type: $memory_type, user_id: $user_id})
            RETURN m.hrid as hrid
            ORDER BY m.created_at DESC
            LIMIT 1000
            """

            results = self.kuzu.query(query, {"memory_type": normalized_type, "user_id": user_id})

            if not results:
                return None

            # Find the highest HRID by parsing all results
            highest_hrid = None
            highest_alpha_idx = -1
            highest_num = -1

            for result in results:
                hrid = result["hrid"]
                try:
                    # TODO: parsed_type available for future use (e.g., type validation)
                    _, alpha, num = parse_hrid(hrid)
                    alpha_idx = _alpha_to_idx(alpha)

                    if alpha_idx > highest_alpha_idx or (
                        alpha_idx == highest_alpha_idx and num > highest_num
                    ):
                        highest_alpha_idx = alpha_idx
                        highest_num = num
                        highest_hrid = hrid

                except ValueError:
                    continue  # Skip invalid HRIDs

            if highest_hrid is None:
                return None

            return (highest_hrid, highest_alpha_idx, highest_num)

        except Exception as e:
            raise DatabaseError(
                f"Failed to get highest HRID for type '{memory_type}' and user '{user_id}'",
                operation="get_highest_hrid",
                context={"memory_type": memory_type, "user_id": user_id},
                original_error=e,
            ) from e

    def exists(self, hrid: str) -> bool:
        """Check if HRID exists (active, not deleted).

        Args:
            hrid: Human-readable ID to check.

        Returns:
            bool: True if HRID exists and is active.
        """
        try:
            query = """
            MATCH (m:HridMapping {hrid: $hrid})
            WHERE m.deleted_at IS NULL
            RETURN COUNT(m) as count
            """
            results = self.kuzu.query(query, {"hrid": hrid})
            return results[0]["count"] > 0 if results else False

        except (DatabaseError, ValueError, KeyError):
            return False  # Assume doesn't exist on any error

__init__(kuzu_interface)

Initialize with existing KuzuInterface.

Parameters:

Name Type Description Default
kuzu_interface KuzuInterface

Pre-configured Kuzu interface for database operations.

required
Source code in src/memg_core/utils/hrid_tracker.py
23
24
25
26
27
28
29
def __init__(self, kuzu_interface: KuzuInterface):
    """Initialize with existing KuzuInterface.

    Args:
        kuzu_interface: Pre-configured Kuzu interface for database operations.
    """
    self.kuzu = kuzu_interface

create_mapping(hrid, uuid, memory_type, user_id)

Create new HRID ↔ UUID mapping.

Parameters:

Name Type Description Default
hrid str

Human-readable ID.

required
uuid str

Internal UUID.

required
memory_type str

Entity type (e.g., 'task', 'note').

required
user_id str

User ID for scoped mapping.

required

Raises:

Type Description
DatabaseError

If mapping creation fails.

Source code in src/memg_core/utils/hrid_tracker.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def create_mapping(self, hrid: str, uuid: str, memory_type: str, user_id: str) -> None:
    """Create new HRID ↔ UUID mapping.

    Args:
        hrid: Human-readable ID.
        uuid: Internal UUID.
        memory_type: Entity type (e.g., 'task', 'note').
        user_id: User ID for scoped mapping.

    Raises:
        DatabaseError: If mapping creation fails.
    """
    try:
        now = datetime.now(UTC).isoformat()

        mapping_data = {
            "hrid_user_key": f"{hrid}#{user_id}",  # Composite key
            "hrid": hrid,
            "uuid": uuid,
            "memory_type": memory_type,
            "user_id": user_id,
            "created_at": now,
            "deleted_at": None,  # NULL for active mappings
        }

        self.kuzu.add_node("HridMapping", mapping_data)

    except Exception as e:
        raise DatabaseError(
            f"Failed to create HRID mapping: {hrid}{uuid}",
            operation="create_mapping",
            context={"hrid": hrid, "uuid": uuid, "memory_type": memory_type},
            original_error=e,
        ) from e

exists(hrid)

Check if HRID exists (active, not deleted).

Parameters:

Name Type Description Default
hrid str

Human-readable ID to check.

required

Returns:

Name Type Description
bool bool

True if HRID exists and is active.

Source code in src/memg_core/utils/hrid_tracker.py
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def exists(self, hrid: str) -> bool:
    """Check if HRID exists (active, not deleted).

    Args:
        hrid: Human-readable ID to check.

    Returns:
        bool: True if HRID exists and is active.
    """
    try:
        query = """
        MATCH (m:HridMapping {hrid: $hrid})
        WHERE m.deleted_at IS NULL
        RETURN COUNT(m) as count
        """
        results = self.kuzu.query(query, {"hrid": hrid})
        return results[0]["count"] > 0 if results else False

    except (DatabaseError, ValueError, KeyError):
        return False  # Assume doesn't exist on any error

get_highest_hrid(memory_type, user_id)

Get highest HRID for a memory type (for generation).

Parameters:

Name Type Description Default
memory_type str

Entity type to check (case insensitive).

required
user_id str

User ID for scoped HRID lookup.

required

Returns:

Type Description
tuple[str, int, int] | None

tuple[str, int, int] | None: (hrid, alpha_idx, num) or None if no HRIDs exist.

Source code in src/memg_core/utils/hrid_tracker.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
def get_highest_hrid(self, memory_type: str, user_id: str) -> tuple[str, int, int] | None:
    """Get highest HRID for a memory type (for generation).

    Args:
        memory_type: Entity type to check (case insensitive).
        user_id: User ID for scoped HRID lookup.

    Returns:
        tuple[str, int, int] | None: (hrid, alpha_idx, num) or None if no HRIDs exist.
    """
    try:
        # Normalize to lowercase for database query (YAML types are lowercase)
        normalized_type = memory_type.lower()

        query = """
        MATCH (m:HridMapping {memory_type: $memory_type, user_id: $user_id})
        RETURN m.hrid as hrid
        ORDER BY m.created_at DESC
        LIMIT 1000
        """

        results = self.kuzu.query(query, {"memory_type": normalized_type, "user_id": user_id})

        if not results:
            return None

        # Find the highest HRID by parsing all results
        highest_hrid = None
        highest_alpha_idx = -1
        highest_num = -1

        for result in results:
            hrid = result["hrid"]
            try:
                # TODO: parsed_type available for future use (e.g., type validation)
                _, alpha, num = parse_hrid(hrid)
                alpha_idx = _alpha_to_idx(alpha)

                if alpha_idx > highest_alpha_idx or (
                    alpha_idx == highest_alpha_idx and num > highest_num
                ):
                    highest_alpha_idx = alpha_idx
                    highest_num = num
                    highest_hrid = hrid

            except ValueError:
                continue  # Skip invalid HRIDs

        if highest_hrid is None:
            return None

        return (highest_hrid, highest_alpha_idx, highest_num)

    except Exception as e:
        raise DatabaseError(
            f"Failed to get highest HRID for type '{memory_type}' and user '{user_id}'",
            operation="get_highest_hrid",
            context={"memory_type": memory_type, "user_id": user_id},
            original_error=e,
        ) from e

get_hrid(uuid, user_id)

Translate UUID to HRID with user verification.

Parameters:

Name Type Description Default
uuid str

Internal UUID.

required
user_id str

User ID for ownership verification.

required

Returns:

Name Type Description
str str

Human-readable ID string.

Raises:

Type Description
DatabaseError

If UUID not found, deleted, or doesn't belong to user.

Source code in src/memg_core/utils/hrid_tracker.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def get_hrid(self, uuid: str, user_id: str) -> str:
    """Translate UUID to HRID with user verification.

    Args:
        uuid: Internal UUID.
        user_id: User ID for ownership verification.

    Returns:
        str: Human-readable ID string.

    Raises:
        DatabaseError: If UUID not found, deleted, or doesn't belong to user.
    """
    try:
        query = """
        MATCH (m:HridMapping {uuid: $uuid, user_id: $user_id})
        WHERE m.deleted_at IS NULL
        RETURN m.hrid as hrid
        """
        results = self.kuzu.query(query, {"uuid": uuid, "user_id": user_id})

        if not results:
            raise DatabaseError(
                f"UUID '{uuid}' not found or has been deleted",
                operation="get_hrid",
                context={"uuid": uuid},
            )

        return results[0]["hrid"]

    except Exception as e:
        if isinstance(e, DatabaseError):
            raise
        raise DatabaseError(
            f"Failed to lookup HRID for UUID '{uuid}'",
            operation="get_hrid",
            context={"uuid": uuid},
            original_error=e,
        ) from e

get_uuid(hrid, user_id)

Translate HRID to UUID.

Parameters:

Name Type Description Default
hrid str

Human-readable ID (e.g., 'TASK_AAA001').

required
user_id str

User ID for scoped lookup.

required

Returns:

Name Type Description
str str

UUID string for internal operations.

Raises:

Type Description
DatabaseError

If HRID not found or is deleted.

Source code in src/memg_core/utils/hrid_tracker.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def get_uuid(self, hrid: str, user_id: str) -> str:
    """Translate HRID to UUID.

    Args:
        hrid: Human-readable ID (e.g., 'TASK_AAA001').
        user_id: User ID for scoped lookup.

    Returns:
        str: UUID string for internal operations.

    Raises:
        DatabaseError: If HRID not found or is deleted.
    """
    try:
        query = """
        MATCH (m:HridMapping {hrid: $hrid, user_id: $user_id})
        WHERE m.deleted_at IS NULL
        RETURN m.uuid as uuid
        """
        results = self.kuzu.query(query, {"hrid": hrid, "user_id": user_id})

        if not results:
            raise DatabaseError(
                f"HRID '{hrid}' not found or has been deleted",
                operation="get_uuid",
                context={"hrid": hrid},
            )

        return results[0]["uuid"]

    except Exception as e:
        if isinstance(e, DatabaseError):
            raise
        raise DatabaseError(
            f"Failed to lookup UUID for HRID '{hrid}'",
            operation="get_uuid",
            context={"hrid": hrid},
            original_error=e,
        ) from e

mark_deleted(hrid)

Mark HRID mapping as deleted (soft delete)

Parameters:

Name Type Description Default
hrid str

Human-readable ID to mark as deleted

required

Raises:

Type Description
DatabaseError

If marking as deleted fails

Source code in src/memg_core/utils/hrid_tracker.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
def mark_deleted(self, hrid: str) -> None:
    """Mark HRID mapping as deleted (soft delete)

    Args:
        hrid: Human-readable ID to mark as deleted

    Raises:
        DatabaseError: If marking as deleted fails
    """
    try:
        now = datetime.now(UTC).isoformat()

        query = """
        MATCH (m:HridMapping {hrid: $hrid})
        SET m.deleted_at = $deleted_at
        RETURN m.hrid as hrid
        """

        results = self.kuzu.query(query, {"hrid": hrid, "deleted_at": now})

        if not results:
            raise DatabaseError(
                f"HRID '{hrid}' not found for deletion",
                operation="mark_deleted",
                context={"hrid": hrid},
            )

    except Exception as e:
        if isinstance(e, DatabaseError):
            raise
        raise DatabaseError(
            f"Failed to mark HRID '{hrid}' as deleted",
            operation="mark_deleted",
            context={"hrid": hrid},
            original_error=e,
        ) from e

KuzuInterface

Pure CRUD wrapper around Kuzu database - NO DDL operations.

Attributes:

Name Type Description
conn

Pre-initialized Kuzu connection.

yaml_translator

Optional YAML translator for relationship operations.

Source code in src/memg_core/core/interfaces/kuzu.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
class KuzuInterface:
    """Pure CRUD wrapper around Kuzu database - NO DDL operations.

    Attributes:
        conn: Pre-initialized Kuzu connection.
        yaml_translator: Optional YAML translator for relationship operations.
    """

    def __init__(self, connection: kuzu.Connection, yaml_translator=None):
        """Initialize with pre-created connection.

        Args:
            connection: Pre-initialized Kuzu connection from DatabaseClients.
            yaml_translator: Optional YamlTranslator for relationship operations.
        """
        self.conn = connection
        self.yaml_translator = yaml_translator

    def add_node(self, table: str, properties: dict[str, Any]) -> None:
        """Add a node to the graph - pure CRUD operation.

        Args:
            table: Node table name.
            properties: Node properties.

        Raises:
            DatabaseError: If node creation fails.
        """
        try:
            props = ", ".join([f"{k}: ${k}" for k in properties])
            query = f"CREATE (:{table} {{{props}}})"
            self.conn.execute(query, parameters=properties)
        except Exception as e:
            raise DatabaseError(
                f"Failed to add node to {table}",
                operation="add_node",
                context={"table": table, "properties": properties},
                original_error=e,
            ) from e

    def update_node(
        self, table: str, node_uuid: str, properties: dict[str, Any], user_id: str
    ) -> bool:
        """Update a node in the graph - pure CRUD operation.

        Args:
            table: Node table name.
            node_uuid: UUID of the node to update.
            properties: Node properties to update.
            user_id: User ID for ownership verification.

        Returns:
            bool: True if update succeeded, False if node not found.

        Raises:
            DatabaseError: If node update fails due to system error.
        """
        try:
            # CRITICAL: Check if node exists AND belongs to user
            check_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
            check_result = self.query(check_query, {"uuid": node_uuid, "user_id": user_id})

            if not check_result:
                # Node doesn't exist for this user
                return False

            # Build SET clause for properties
            set_clauses = []
            params = {"uuid": node_uuid, "user_id": user_id}

            for key, value in properties.items():
                # Skip system fields that shouldn't be updated via this method
                if key in ("id", "user_id"):
                    continue

                param_name = f"prop_{key}"
                set_clauses.append(f"n.{key} = ${param_name}")
                params[param_name] = value

            if not set_clauses:
                # No properties to update (all were system fields)
                return True

            # Execute update query
            set_clause = ", ".join(set_clauses)
            update_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) SET {set_clause}"
            self.conn.execute(update_query, parameters=params)

            return True

        except Exception as e:
            raise DatabaseError(
                f"Failed to update node in {table}",
                operation="update_node",
                context={
                    "table": table,
                    "node_uuid": node_uuid,
                    "properties": properties,
                    "user_id": user_id,
                },
                original_error=e,
            ) from e

    def add_relationship(
        self,
        from_table: str,
        to_table: str,
        rel_type: str,
        from_id: str,
        to_id: str,
        user_id: str,
        props: dict[str, Any] | None = None,
    ) -> None:
        """Add relationship between nodes.

        Args:
            from_table: Source node table name.
            to_table: Target node table name.
            rel_type: Relationship type.
            from_id: Source node ID.
            to_id: Target node ID.
            user_id: User ID for ownership verification.
            props: Optional relationship properties.

        Raises:
            DatabaseError: If relationship creation fails.
        """
        try:
            props = props or {}

            # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
            if not validate_relation_predicate(rel_type):
                raise ValueError(
                    f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
                )

            # Use relationship type as-is (predicates from YAML) - no sanitization
            # rel_type should already be a valid predicate (e.g., "REFERENCED_BY", "ANNOTATES")

            # CRITICAL: Verify both nodes belong to the user before creating relationship
            # First check if both nodes exist and belong to the user
            check_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"RETURN a.id, b.id"
            )
            check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
            check_result = self.query(check_query, check_params)

            if not check_result:
                raise ValueError(
                    f"Cannot create relationship: one or both memories not found "
                    f"or don't belong to user {user_id}"
                )

            # Generate relationship table name using YamlTranslator
            if not self.yaml_translator:
                raise DatabaseError(
                    "YamlTranslator required for relationship operations",
                    operation="add_relationship",
                    context={
                        "from_table": from_table,
                        "to_table": to_table,
                        "rel_type": rel_type,
                    },
                )

            relationship_table_name = self.yaml_translator.relationship_table_name(
                source=from_table,
                predicate=rel_type,
                target=to_table,
                directed=True,  # Direction affects semantics but not table naming for now
            )

            # Now create the relationship using the unique table name
            prop_str = ", ".join([f"{k}: ${k}" for k in props.keys()]) if props else ""
            rel_props = f" {{{prop_str}}}" if prop_str else ""
            create_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"CREATE (a)-[:{relationship_table_name}{rel_props}]->(b)"
            )
            create_params = {
                "from_id": from_id,
                "to_id": to_id,
                "user_id": user_id,
                **props,
            }
            self.conn.execute(create_query, parameters=create_params)
        except Exception as e:
            raise DatabaseError(
                f"Failed to add relationship {rel_type}",
                operation="add_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                    "from_id": from_id,
                    "to_id": to_id,
                },
                original_error=e,
            ) from e

    def delete_relationship(
        self,
        from_table: str,
        to_table: str,
        rel_type: str,
        from_id: str,
        to_id: str,
        user_id: str,
    ) -> bool:
        """Delete relationship between nodes.

        Args:
            from_table: Source node table name.
            to_table: Target node table name.
            rel_type: Relationship type.
            from_id: Source node ID.
            to_id: Target node ID.
            user_id: User ID for ownership verification.

        Returns:
            bool: True if deletion succeeded, False if relationship not found.

        Raises:
            DatabaseError: If relationship deletion fails due to system error.
        """
        try:
            # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
            if not validate_relation_predicate(rel_type):
                raise ValueError(
                    f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
                )

            # CRITICAL: Verify both nodes belong to the user before deleting relationship
            # First check if both nodes exist and belong to the user
            check_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"RETURN a.id, b.id"
            )
            check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
            check_result = self.query(check_query, check_params)

            if not check_result:
                # Nodes don't exist or don't belong to user - return False (not found)
                return False

            # Generate relationship table name using YamlTranslator
            if not self.yaml_translator:
                raise DatabaseError(
                    "YamlTranslator required for relationship operations",
                    operation="delete_relationship",
                    context={
                        "from_table": from_table,
                        "to_table": to_table,
                        "rel_type": rel_type,
                    },
                )

            relationship_table_name = self.yaml_translator.relationship_table_name(
                source=from_table,
                predicate=rel_type,
                target=to_table,
                directed=True,  # Direction affects semantics but not table naming for now
            )

            # First check if the relationship exists
            check_rel_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
                f"-[r:{relationship_table_name}]->"
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"RETURN r"
            )
            check_rel_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

            # Check if relationship exists
            relationship_exists = self.query(check_rel_query, check_rel_params)
            if not relationship_exists:
                # Relationship doesn't exist - return False
                return False

            # Delete the specific relationship (we know it exists)
            delete_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
                f"-[r:{relationship_table_name}]->"
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"DELETE r"
            )
            delete_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

            # Execute deletion
            self.conn.execute(delete_query, parameters=delete_params)

            # If we get here, deletion succeeded
            return True

        except Exception as e:
            if isinstance(e, ValueError):
                # Re-raise validation errors as-is
                raise
            raise DatabaseError(
                f"Failed to delete relationship {rel_type}",
                operation="delete_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                    "from_id": from_id,
                    "to_id": to_id,
                },
                original_error=e,
            ) from e

    def _extract_query_results(self, query_result) -> list[dict[str, Any]]:
        """Extract results from Kuzu QueryResult using raw iteration.

        Args:
            query_result: Kuzu QueryResult object.

        Returns:
            list[dict[str, Any]]: List of dictionaries containing query results.
        """
        # Type annotations disabled for QueryResult - dynamic interface from kuzu package
        qr = query_result  # type: ignore

        results = []
        column_names = qr.get_column_names()
        while qr.has_next():
            row = qr.get_next()
            result = {}
            for i, col_name in enumerate(column_names):
                result[col_name] = row[i] if i < len(row) else None
            results.append(result)
        return results

    def query(self, cypher: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
        """Execute Cypher query and return results.

        Args:
            cypher: Cypher query string.
            params: Query parameters.

        Returns:
            list[dict[str, Any]]: Query results.

        Raises:
            DatabaseError: If query execution fails.
        """
        try:
            qr = self.conn.execute(cypher, parameters=params or {})
            return self._extract_query_results(qr)
        except Exception as e:
            raise DatabaseError(
                "Failed to execute Kuzu query",
                operation="query",
                context={"cypher": cypher, "params": params},
                original_error=e,
            ) from e

    def neighbors(
        self,
        node_label: str,
        node_uuid: str,
        user_id: str,
        rel_types: list[str] | None = None,
        direction: str = "any",
        limit: int = 10,
        neighbor_label: str | None = None,
    ) -> list[dict[str, Any]]:
        """Fetch neighbors of a node by UUID only.

        Args:
            node_label: Node type/table name (e.g., "Memory", "bug") - NOT a UUID.
            node_uuid: UUID of the specific node to find neighbors for.
            user_id: User ID for isolation - only return neighbors belonging to this user.
            rel_types: List of relationship types to filter by.
            direction: "in", "out", or "any" for relationship direction.
            limit: Maximum number of neighbors to return.
            neighbor_label: Type of neighbor nodes to return.

        Returns:
            list[dict[str, Any]]: List of neighbor nodes with relationship info.

        Raises:
            ValueError: If node_label is a UUID or node_uuid is not a UUID.
            DatabaseError: If neighbor query fails.
        """
        # Validate parameters to prevent common bugs
        if self._is_uuid(node_label):
            raise ValueError(
                f"node_label must be a node type (e.g., 'Memory', 'bug'), not UUID: {node_label}. "
                f"UUIDs should be passed as node_uuid parameter."
            )

        if not self._is_uuid(node_uuid):
            raise ValueError(f"node_uuid must be a valid UUID format, got: {node_uuid}")

        try:
            # Use YamlTranslator to expand predicates to concrete relationship labels
            if not self.yaml_translator:
                raise DatabaseError(
                    "YamlTranslator required for neighbor operations",
                    operation="neighbors",
                    context={"node_label": node_label, "rel_types": rel_types},
                )

            # Get concrete relationship labels for this source and predicates
            if rel_types:
                relationship_labels = self.yaml_translator.get_labels_for_predicates(
                    source_type=node_label,
                    predicates=rel_types,
                    neighbor_label=neighbor_label,
                )
                if not relationship_labels:
                    # No matching relationships found - return empty
                    return []

                # Create relationship pattern with specific labels
                rel_filter = "|".join(relationship_labels)
                rel_part = f":{rel_filter}"
            else:
                # No filtering - match all relationships
                rel_part = ""

            # CRITICAL: User isolation - both source node and neighbors must belong to user
            node_condition = f"a:{node_label} {{id: $node_uuid, user_id: $user_id}}"
            neighbor = f":{neighbor_label}" if neighbor_label else ""
            neighbor_condition = f"n{neighbor} {{user_id: $user_id}}"

            # Build direction-aware pattern
            if direction == "out":
                pattern = f"({node_condition})-[r{rel_part}]->({neighbor_condition})"
            elif direction == "in":
                pattern = f"({node_condition})<-[r{rel_part}]-({neighbor_condition})"
            else:
                pattern = f"({node_condition})-[r{rel_part}]-({neighbor_condition})"

            # Return neighbors only if they belong to the same user
            cypher = f"""
            MATCH {pattern}
            RETURN DISTINCT n.id as id,
                            n.user_id as user_id,
                            n.memory_type as memory_type,
                            n.created_at as created_at,
                            label(r) as rel_type,
                            n as node
            LIMIT $limit
            """
            params = {"node_uuid": node_uuid, "user_id": user_id, "limit": limit}
            return self.query(cypher, params)
        except Exception as e:
            raise DatabaseError(
                "Failed to fetch neighbors",
                operation="neighbors",
                context={
                    "node_label": node_label,
                    "node_uuid": node_uuid,
                    "user_id": user_id,
                    "rel_types": rel_types,
                    "direction": direction,
                },
                original_error=e,
            ) from e

    def delete_node(self, table: str, node_uuid: str, user_id: str) -> bool:
        """Delete a single node by UUID"""
        try:
            # CRITICAL: Check if node exists AND belongs to user
            cypher_check = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
            check_result = self.query(cypher_check, {"uuid": node_uuid, "user_id": user_id})

            if not check_result:
                # Node doesn't exist for this user, consider it successfully "deleted"
                return True

            # Delete the node - only if it belongs to the user
            cypher_delete_node = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) DELETE n"
            self.conn.execute(
                cypher_delete_node, parameters={"uuid": node_uuid, "user_id": user_id}
            )
            return True

        except Exception as e:
            error_msg = str(e).lower()
            if "delete undirected rel" in error_msg or "relationship" in error_msg:
                # Relationship constraint prevents deletion - this is a REAL FAILURE
                # Don't lie by returning True - raise explicit error
                raise DatabaseError(
                    f"Cannot delete node {node_uuid} from {table}: has existing relationships. "
                    f"Delete relationships first or use CASCADE delete if supported.",
                    operation="delete_node",
                    context={
                        "table": table,
                        "node_uuid": node_uuid,
                        "constraint_error": str(e),
                    },
                    original_error=e,
                ) from e
            # Other database error
            raise DatabaseError(
                f"Failed to delete node from {table}",
                operation="delete_node",
                context={"table": table, "node_uuid": node_uuid, "user_id": user_id},
                original_error=e,
            ) from e

    def get_nodes(
        self,
        user_id: str,
        node_type: str | None = None,
        filters: dict[str, Any] | None = None,
        limit: int = 50,
        offset: int = 0,
    ) -> list[dict[str, Any]]:
        """Get multiple nodes with filtering and pagination.

        Args:
            user_id: User ID for ownership verification.
            node_type: Optional node type filter (e.g., "task", "note").
            filters: Optional field filters (e.g., {"status": "open"}).
            limit: Maximum number of nodes to return.
            offset: Number of nodes to skip for pagination.

        Returns:
            list[dict[str, Any]]: List of node data from Kuzu.

        Raises:
            DatabaseError: If node retrieval fails.
        """
        try:
            filters = filters or {}

            # Build MATCH clause
            if node_type:
                match_clause = f"MATCH (n:{node_type} {{user_id: $user_id"
            else:
                match_clause = "MATCH (n {user_id: $user_id"

            # Add field filters
            params = {"user_id": user_id, "limit": limit, "offset": offset}
            for field_name, field_value in filters.items():
                param_name = f"filter_{field_name}"
                match_clause += f", {field_name}: ${param_name}"
                params[param_name] = field_value

            match_clause += "})"

            # Build complete query
            cypher_query = f"""
            {match_clause}
            RETURN n.id as id,
                   n.user_id as user_id,
                   n.memory_type as memory_type,
                   n.created_at as created_at,
                   n.updated_at as updated_at,
                   n as node
            ORDER BY n.created_at DESC
            SKIP $offset
            LIMIT $limit
            """

            return self.query(cypher_query, params)

        except Exception as e:
            raise DatabaseError(
                "Failed to get nodes from Kuzu",
                operation="get_nodes",
                context={
                    "user_id": user_id,
                    "node_type": node_type,
                    "filters": filters,
                    "limit": limit,
                    "offset": offset,
                },
                original_error=e,
            ) from e

    def _get_kuzu_type(self, key: str, value: Any) -> str:
        """Map Python types to Kuzu types with proper validation.

        Args:
            key: Property key name.
            value: Property value to type-check.

        Returns:
            str: Kuzu type name.

        Raises:
            DatabaseError: If the Python type is not supported by Kuzu.
        """
        if isinstance(value, bool):
            # Check bool first (bool is subclass of int in Python!)
            return "BOOLEAN"
        if isinstance(value, int):
            return "INT64"
        if isinstance(value, float):
            return "DOUBLE"
        if isinstance(value, str):
            return "STRING"
        if value is None:
            # None values need special handling - default to STRING for now
            return "STRING"
        # Unsupported type - fail explicitly instead of silent STRING conversion
        raise DatabaseError(
            f"Unsupported property type for key '{key}': {type(value).__name__}. "
            f"Supported types: str, int, float, bool. "
            f"Complex types must be serialized before storage.",
            operation="_get_kuzu_type",
            context={"key": key, "value": value, "type": type(value).__name__},
        )

    def _is_uuid(self, value: str) -> bool:
        """Check if string looks like a UUID (36 chars with hyphens in right positions).

        Args:
            value: String to check.

        Returns:
            bool: True if value matches UUID format (8-4-4-4-12 hex pattern), False otherwise.
        """
        if not isinstance(value, str) or len(value) != 36:
            return False

        # UUID format: 8-4-4-4-12 (e.g., 550e8400-e29b-41d4-a716-446655440000)
        uuid_pattern = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
        return bool(re.match(uuid_pattern, value, re.IGNORECASE))

__init__(connection, yaml_translator=None)

Initialize with pre-created connection.

Parameters:

Name Type Description Default
connection Connection

Pre-initialized Kuzu connection from DatabaseClients.

required
yaml_translator

Optional YamlTranslator for relationship operations.

None
Source code in src/memg_core/core/interfaces/kuzu.py
20
21
22
23
24
25
26
27
28
def __init__(self, connection: kuzu.Connection, yaml_translator=None):
    """Initialize with pre-created connection.

    Args:
        connection: Pre-initialized Kuzu connection from DatabaseClients.
        yaml_translator: Optional YamlTranslator for relationship operations.
    """
    self.conn = connection
    self.yaml_translator = yaml_translator

add_node(table, properties)

Add a node to the graph - pure CRUD operation.

Parameters:

Name Type Description Default
table str

Node table name.

required
properties dict[str, Any]

Node properties.

required

Raises:

Type Description
DatabaseError

If node creation fails.

Source code in src/memg_core/core/interfaces/kuzu.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def add_node(self, table: str, properties: dict[str, Any]) -> None:
    """Add a node to the graph - pure CRUD operation.

    Args:
        table: Node table name.
        properties: Node properties.

    Raises:
        DatabaseError: If node creation fails.
    """
    try:
        props = ", ".join([f"{k}: ${k}" for k in properties])
        query = f"CREATE (:{table} {{{props}}})"
        self.conn.execute(query, parameters=properties)
    except Exception as e:
        raise DatabaseError(
            f"Failed to add node to {table}",
            operation="add_node",
            context={"table": table, "properties": properties},
            original_error=e,
        ) from e

add_relationship(from_table, to_table, rel_type, from_id, to_id, user_id, props=None)

Add relationship between nodes.

Parameters:

Name Type Description Default
from_table str

Source node table name.

required
to_table str

Target node table name.

required
rel_type str

Relationship type.

required
from_id str

Source node ID.

required
to_id str

Target node ID.

required
user_id str

User ID for ownership verification.

required
props dict[str, Any] | None

Optional relationship properties.

None

Raises:

Type Description
DatabaseError

If relationship creation fails.

Source code in src/memg_core/core/interfaces/kuzu.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def add_relationship(
    self,
    from_table: str,
    to_table: str,
    rel_type: str,
    from_id: str,
    to_id: str,
    user_id: str,
    props: dict[str, Any] | None = None,
) -> None:
    """Add relationship between nodes.

    Args:
        from_table: Source node table name.
        to_table: Target node table name.
        rel_type: Relationship type.
        from_id: Source node ID.
        to_id: Target node ID.
        user_id: User ID for ownership verification.
        props: Optional relationship properties.

    Raises:
        DatabaseError: If relationship creation fails.
    """
    try:
        props = props or {}

        # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
        if not validate_relation_predicate(rel_type):
            raise ValueError(
                f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
            )

        # Use relationship type as-is (predicates from YAML) - no sanitization
        # rel_type should already be a valid predicate (e.g., "REFERENCED_BY", "ANNOTATES")

        # CRITICAL: Verify both nodes belong to the user before creating relationship
        # First check if both nodes exist and belong to the user
        check_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"RETURN a.id, b.id"
        )
        check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
        check_result = self.query(check_query, check_params)

        if not check_result:
            raise ValueError(
                f"Cannot create relationship: one or both memories not found "
                f"or don't belong to user {user_id}"
            )

        # Generate relationship table name using YamlTranslator
        if not self.yaml_translator:
            raise DatabaseError(
                "YamlTranslator required for relationship operations",
                operation="add_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                },
            )

        relationship_table_name = self.yaml_translator.relationship_table_name(
            source=from_table,
            predicate=rel_type,
            target=to_table,
            directed=True,  # Direction affects semantics but not table naming for now
        )

        # Now create the relationship using the unique table name
        prop_str = ", ".join([f"{k}: ${k}" for k in props.keys()]) if props else ""
        rel_props = f" {{{prop_str}}}" if prop_str else ""
        create_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"CREATE (a)-[:{relationship_table_name}{rel_props}]->(b)"
        )
        create_params = {
            "from_id": from_id,
            "to_id": to_id,
            "user_id": user_id,
            **props,
        }
        self.conn.execute(create_query, parameters=create_params)
    except Exception as e:
        raise DatabaseError(
            f"Failed to add relationship {rel_type}",
            operation="add_relationship",
            context={
                "from_table": from_table,
                "to_table": to_table,
                "rel_type": rel_type,
                "from_id": from_id,
                "to_id": to_id,
            },
            original_error=e,
        ) from e

delete_node(table, node_uuid, user_id)

Delete a single node by UUID

Source code in src/memg_core/core/interfaces/kuzu.py
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
def delete_node(self, table: str, node_uuid: str, user_id: str) -> bool:
    """Delete a single node by UUID"""
    try:
        # CRITICAL: Check if node exists AND belongs to user
        cypher_check = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
        check_result = self.query(cypher_check, {"uuid": node_uuid, "user_id": user_id})

        if not check_result:
            # Node doesn't exist for this user, consider it successfully "deleted"
            return True

        # Delete the node - only if it belongs to the user
        cypher_delete_node = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) DELETE n"
        self.conn.execute(
            cypher_delete_node, parameters={"uuid": node_uuid, "user_id": user_id}
        )
        return True

    except Exception as e:
        error_msg = str(e).lower()
        if "delete undirected rel" in error_msg or "relationship" in error_msg:
            # Relationship constraint prevents deletion - this is a REAL FAILURE
            # Don't lie by returning True - raise explicit error
            raise DatabaseError(
                f"Cannot delete node {node_uuid} from {table}: has existing relationships. "
                f"Delete relationships first or use CASCADE delete if supported.",
                operation="delete_node",
                context={
                    "table": table,
                    "node_uuid": node_uuid,
                    "constraint_error": str(e),
                },
                original_error=e,
            ) from e
        # Other database error
        raise DatabaseError(
            f"Failed to delete node from {table}",
            operation="delete_node",
            context={"table": table, "node_uuid": node_uuid, "user_id": user_id},
            original_error=e,
        ) from e

delete_relationship(from_table, to_table, rel_type, from_id, to_id, user_id)

Delete relationship between nodes.

Parameters:

Name Type Description Default
from_table str

Source node table name.

required
to_table str

Target node table name.

required
rel_type str

Relationship type.

required
from_id str

Source node ID.

required
to_id str

Target node ID.

required
user_id str

User ID for ownership verification.

required

Returns:

Name Type Description
bool bool

True if deletion succeeded, False if relationship not found.

Raises:

Type Description
DatabaseError

If relationship deletion fails due to system error.

Source code in src/memg_core/core/interfaces/kuzu.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def delete_relationship(
    self,
    from_table: str,
    to_table: str,
    rel_type: str,
    from_id: str,
    to_id: str,
    user_id: str,
) -> bool:
    """Delete relationship between nodes.

    Args:
        from_table: Source node table name.
        to_table: Target node table name.
        rel_type: Relationship type.
        from_id: Source node ID.
        to_id: Target node ID.
        user_id: User ID for ownership verification.

    Returns:
        bool: True if deletion succeeded, False if relationship not found.

    Raises:
        DatabaseError: If relationship deletion fails due to system error.
    """
    try:
        # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
        if not validate_relation_predicate(rel_type):
            raise ValueError(
                f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
            )

        # CRITICAL: Verify both nodes belong to the user before deleting relationship
        # First check if both nodes exist and belong to the user
        check_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"RETURN a.id, b.id"
        )
        check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
        check_result = self.query(check_query, check_params)

        if not check_result:
            # Nodes don't exist or don't belong to user - return False (not found)
            return False

        # Generate relationship table name using YamlTranslator
        if not self.yaml_translator:
            raise DatabaseError(
                "YamlTranslator required for relationship operations",
                operation="delete_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                },
            )

        relationship_table_name = self.yaml_translator.relationship_table_name(
            source=from_table,
            predicate=rel_type,
            target=to_table,
            directed=True,  # Direction affects semantics but not table naming for now
        )

        # First check if the relationship exists
        check_rel_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
            f"-[r:{relationship_table_name}]->"
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"RETURN r"
        )
        check_rel_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

        # Check if relationship exists
        relationship_exists = self.query(check_rel_query, check_rel_params)
        if not relationship_exists:
            # Relationship doesn't exist - return False
            return False

        # Delete the specific relationship (we know it exists)
        delete_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
            f"-[r:{relationship_table_name}]->"
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"DELETE r"
        )
        delete_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

        # Execute deletion
        self.conn.execute(delete_query, parameters=delete_params)

        # If we get here, deletion succeeded
        return True

    except Exception as e:
        if isinstance(e, ValueError):
            # Re-raise validation errors as-is
            raise
        raise DatabaseError(
            f"Failed to delete relationship {rel_type}",
            operation="delete_relationship",
            context={
                "from_table": from_table,
                "to_table": to_table,
                "rel_type": rel_type,
                "from_id": from_id,
                "to_id": to_id,
            },
            original_error=e,
        ) from e

get_nodes(user_id, node_type=None, filters=None, limit=50, offset=0)

Get multiple nodes with filtering and pagination.

Parameters:

Name Type Description Default
user_id str

User ID for ownership verification.

required
node_type str | None

Optional node type filter (e.g., "task", "note").

None
filters dict[str, Any] | None

Optional field filters (e.g., {"status": "open"}).

None
limit int

Maximum number of nodes to return.

50
offset int

Number of nodes to skip for pagination.

0

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of node data from Kuzu.

Raises:

Type Description
DatabaseError

If node retrieval fails.

Source code in src/memg_core/core/interfaces/kuzu.py
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
def get_nodes(
    self,
    user_id: str,
    node_type: str | None = None,
    filters: dict[str, Any] | None = None,
    limit: int = 50,
    offset: int = 0,
) -> list[dict[str, Any]]:
    """Get multiple nodes with filtering and pagination.

    Args:
        user_id: User ID for ownership verification.
        node_type: Optional node type filter (e.g., "task", "note").
        filters: Optional field filters (e.g., {"status": "open"}).
        limit: Maximum number of nodes to return.
        offset: Number of nodes to skip for pagination.

    Returns:
        list[dict[str, Any]]: List of node data from Kuzu.

    Raises:
        DatabaseError: If node retrieval fails.
    """
    try:
        filters = filters or {}

        # Build MATCH clause
        if node_type:
            match_clause = f"MATCH (n:{node_type} {{user_id: $user_id"
        else:
            match_clause = "MATCH (n {user_id: $user_id"

        # Add field filters
        params = {"user_id": user_id, "limit": limit, "offset": offset}
        for field_name, field_value in filters.items():
            param_name = f"filter_{field_name}"
            match_clause += f", {field_name}: ${param_name}"
            params[param_name] = field_value

        match_clause += "})"

        # Build complete query
        cypher_query = f"""
        {match_clause}
        RETURN n.id as id,
               n.user_id as user_id,
               n.memory_type as memory_type,
               n.created_at as created_at,
               n.updated_at as updated_at,
               n as node
        ORDER BY n.created_at DESC
        SKIP $offset
        LIMIT $limit
        """

        return self.query(cypher_query, params)

    except Exception as e:
        raise DatabaseError(
            "Failed to get nodes from Kuzu",
            operation="get_nodes",
            context={
                "user_id": user_id,
                "node_type": node_type,
                "filters": filters,
                "limit": limit,
                "offset": offset,
            },
            original_error=e,
        ) from e

neighbors(node_label, node_uuid, user_id, rel_types=None, direction='any', limit=10, neighbor_label=None)

Fetch neighbors of a node by UUID only.

Parameters:

Name Type Description Default
node_label str

Node type/table name (e.g., "Memory", "bug") - NOT a UUID.

required
node_uuid str

UUID of the specific node to find neighbors for.

required
user_id str

User ID for isolation - only return neighbors belonging to this user.

required
rel_types list[str] | None

List of relationship types to filter by.

None
direction str

"in", "out", or "any" for relationship direction.

'any'
limit int

Maximum number of neighbors to return.

10
neighbor_label str | None

Type of neighbor nodes to return.

None

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of neighbor nodes with relationship info.

Raises:

Type Description
ValueError

If node_label is a UUID or node_uuid is not a UUID.

DatabaseError

If neighbor query fails.

Source code in src/memg_core/core/interfaces/kuzu.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def neighbors(
    self,
    node_label: str,
    node_uuid: str,
    user_id: str,
    rel_types: list[str] | None = None,
    direction: str = "any",
    limit: int = 10,
    neighbor_label: str | None = None,
) -> list[dict[str, Any]]:
    """Fetch neighbors of a node by UUID only.

    Args:
        node_label: Node type/table name (e.g., "Memory", "bug") - NOT a UUID.
        node_uuid: UUID of the specific node to find neighbors for.
        user_id: User ID for isolation - only return neighbors belonging to this user.
        rel_types: List of relationship types to filter by.
        direction: "in", "out", or "any" for relationship direction.
        limit: Maximum number of neighbors to return.
        neighbor_label: Type of neighbor nodes to return.

    Returns:
        list[dict[str, Any]]: List of neighbor nodes with relationship info.

    Raises:
        ValueError: If node_label is a UUID or node_uuid is not a UUID.
        DatabaseError: If neighbor query fails.
    """
    # Validate parameters to prevent common bugs
    if self._is_uuid(node_label):
        raise ValueError(
            f"node_label must be a node type (e.g., 'Memory', 'bug'), not UUID: {node_label}. "
            f"UUIDs should be passed as node_uuid parameter."
        )

    if not self._is_uuid(node_uuid):
        raise ValueError(f"node_uuid must be a valid UUID format, got: {node_uuid}")

    try:
        # Use YamlTranslator to expand predicates to concrete relationship labels
        if not self.yaml_translator:
            raise DatabaseError(
                "YamlTranslator required for neighbor operations",
                operation="neighbors",
                context={"node_label": node_label, "rel_types": rel_types},
            )

        # Get concrete relationship labels for this source and predicates
        if rel_types:
            relationship_labels = self.yaml_translator.get_labels_for_predicates(
                source_type=node_label,
                predicates=rel_types,
                neighbor_label=neighbor_label,
            )
            if not relationship_labels:
                # No matching relationships found - return empty
                return []

            # Create relationship pattern with specific labels
            rel_filter = "|".join(relationship_labels)
            rel_part = f":{rel_filter}"
        else:
            # No filtering - match all relationships
            rel_part = ""

        # CRITICAL: User isolation - both source node and neighbors must belong to user
        node_condition = f"a:{node_label} {{id: $node_uuid, user_id: $user_id}}"
        neighbor = f":{neighbor_label}" if neighbor_label else ""
        neighbor_condition = f"n{neighbor} {{user_id: $user_id}}"

        # Build direction-aware pattern
        if direction == "out":
            pattern = f"({node_condition})-[r{rel_part}]->({neighbor_condition})"
        elif direction == "in":
            pattern = f"({node_condition})<-[r{rel_part}]-({neighbor_condition})"
        else:
            pattern = f"({node_condition})-[r{rel_part}]-({neighbor_condition})"

        # Return neighbors only if they belong to the same user
        cypher = f"""
        MATCH {pattern}
        RETURN DISTINCT n.id as id,
                        n.user_id as user_id,
                        n.memory_type as memory_type,
                        n.created_at as created_at,
                        label(r) as rel_type,
                        n as node
        LIMIT $limit
        """
        params = {"node_uuid": node_uuid, "user_id": user_id, "limit": limit}
        return self.query(cypher, params)
    except Exception as e:
        raise DatabaseError(
            "Failed to fetch neighbors",
            operation="neighbors",
            context={
                "node_label": node_label,
                "node_uuid": node_uuid,
                "user_id": user_id,
                "rel_types": rel_types,
                "direction": direction,
            },
            original_error=e,
        ) from e

query(cypher, params=None)

Execute Cypher query and return results.

Parameters:

Name Type Description Default
cypher str

Cypher query string.

required
params dict[str, Any] | None

Query parameters.

None

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: Query results.

Raises:

Type Description
DatabaseError

If query execution fails.

Source code in src/memg_core/core/interfaces/kuzu.py
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
def query(self, cypher: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
    """Execute Cypher query and return results.

    Args:
        cypher: Cypher query string.
        params: Query parameters.

    Returns:
        list[dict[str, Any]]: Query results.

    Raises:
        DatabaseError: If query execution fails.
    """
    try:
        qr = self.conn.execute(cypher, parameters=params or {})
        return self._extract_query_results(qr)
    except Exception as e:
        raise DatabaseError(
            "Failed to execute Kuzu query",
            operation="query",
            context={"cypher": cypher, "params": params},
            original_error=e,
        ) from e

update_node(table, node_uuid, properties, user_id)

Update a node in the graph - pure CRUD operation.

Parameters:

Name Type Description Default
table str

Node table name.

required
node_uuid str

UUID of the node to update.

required
properties dict[str, Any]

Node properties to update.

required
user_id str

User ID for ownership verification.

required

Returns:

Name Type Description
bool bool

True if update succeeded, False if node not found.

Raises:

Type Description
DatabaseError

If node update fails due to system error.

Source code in src/memg_core/core/interfaces/kuzu.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def update_node(
    self, table: str, node_uuid: str, properties: dict[str, Any], user_id: str
) -> bool:
    """Update a node in the graph - pure CRUD operation.

    Args:
        table: Node table name.
        node_uuid: UUID of the node to update.
        properties: Node properties to update.
        user_id: User ID for ownership verification.

    Returns:
        bool: True if update succeeded, False if node not found.

    Raises:
        DatabaseError: If node update fails due to system error.
    """
    try:
        # CRITICAL: Check if node exists AND belongs to user
        check_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
        check_result = self.query(check_query, {"uuid": node_uuid, "user_id": user_id})

        if not check_result:
            # Node doesn't exist for this user
            return False

        # Build SET clause for properties
        set_clauses = []
        params = {"uuid": node_uuid, "user_id": user_id}

        for key, value in properties.items():
            # Skip system fields that shouldn't be updated via this method
            if key in ("id", "user_id"):
                continue

            param_name = f"prop_{key}"
            set_clauses.append(f"n.{key} = ${param_name}")
            params[param_name] = value

        if not set_clauses:
            # No properties to update (all were system fields)
            return True

        # Execute update query
        set_clause = ", ".join(set_clauses)
        update_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) SET {set_clause}"
        self.conn.execute(update_query, parameters=params)

        return True

    except Exception as e:
        raise DatabaseError(
            f"Failed to update node in {table}",
            operation="update_node",
            context={
                "table": table,
                "node_uuid": node_uuid,
                "properties": properties,
                "user_id": user_id,
            },
            original_error=e,
        ) from e

_alpha_to_idx(alpha)

Convert alpha string to index: AAA -> 0, AAB -> 1, ..., ZZZ -> 17575.

Parameters:

Name Type Description Default
alpha str

Three-letter alpha string (AAA-ZZZ).

required

Returns:

Name Type Description
int int

Numeric index.

Source code in src/memg_core/utils/hrid.py
58
59
60
61
62
63
64
65
66
67
68
69
70
def _alpha_to_idx(alpha: str) -> int:
    """Convert alpha string to index: AAA -> 0, AAB -> 1, ..., ZZZ -> 17575.

    Args:
        alpha: Three-letter alpha string (AAA-ZZZ).

    Returns:
        int: Numeric index.
    """
    idx = 0
    for char in alpha:
        idx = idx * 26 + (ord(char) - ord("A"))
    return idx

parse_hrid(hrid)

Parse HRID into (type, alpha, num).

Parameters:

Name Type Description Default
hrid str

HRID string to parse.

required

Returns:

Type Description
tuple[str, str, int]

tuple[str, str, int]: (type, alpha, num) components.

Raises:

Type Description
ValueError

If HRID format is invalid.

Source code in src/memg_core/utils/hrid.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def parse_hrid(hrid: str) -> tuple[str, str, int]:
    """Parse HRID into (type, alpha, num).

    Args:
        hrid: HRID string to parse.

    Returns:
        tuple[str, str, int]: (type, alpha, num) components.

    Raises:
        ValueError: If HRID format is invalid.
    """
    m = _HRID_RE.match(hrid.strip().upper())
    if not m:
        raise ValueError(f"Invalid HRID format: {hrid}")
    return m.group("type"), m.group("alpha"), int(m.group("num"))

Database Clients

Database client utilities:

Database client management - thin layer for explicit database setup.

User controls database paths. No fallbacks. No automation.

DatabaseClients

DDL-only database setup - creates schemas and returns raw clients.

NO INTERFACES - pure schema creation only. Consumer must create interfaces separately using returned raw clients.

Attributes:

Name Type Description
qdrant_client QdrantClient | None

Pre-initialized QdrantClient.

kuzu_connection Connection | None

Pre-initialized Kuzu connection.

db_name

Database name.

qdrant_path

Path to Qdrant database.

kuzu_path

Path to Kuzu database.

yaml_translator

YAML translator instance.

Source code in src/memg_core/utils/db_clients.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
class DatabaseClients:
    """DDL-only database setup - creates schemas and returns raw clients.

    NO INTERFACES - pure schema creation only.
    Consumer must create interfaces separately using returned raw clients.

    Attributes:
        qdrant_client: Pre-initialized QdrantClient.
        kuzu_connection: Pre-initialized Kuzu connection.
        db_name: Database name.
        qdrant_path: Path to Qdrant database.
        kuzu_path: Path to Kuzu database.
        yaml_translator: YAML translator instance.
    """

    def __init__(self, yaml_path: str | None = None):
        """Create DDL-only database client wrapper.

        Args:
            yaml_path: Path to YAML schema file. User must provide - no defaults.
        """
        self.qdrant_client: QdrantClient | None = None
        self.kuzu_connection: kuzu.Connection | None = None
        self.db_name = "memg"
        self.qdrant_path = "qdrant"
        self.kuzu_path = "kuzu"

        self.yaml_translator = YamlTranslator(yaml_path) if yaml_path else None

    def init_dbs(self, db_path: str, db_name: str):
        """Initialize databases with structured paths.

        Args:
            db_path: Base database directory.
            db_name: Database name (used for collection and file names).
        """
        # Structure paths
        qdrant_path = os.path.join(db_path, "qdrant")
        kuzu_path = os.path.join(db_path, "kuzu", db_name)

        # Store paths and names
        self.qdrant_path = qdrant_path
        self.kuzu_path = kuzu_path
        self.db_name = db_name

        # Ensure directories exist
        os.makedirs(qdrant_path, exist_ok=True)
        os.makedirs(Path(kuzu_path).parent, exist_ok=True)

        # Create raw database clients directly
        qdrant_client = QdrantClient(path=qdrant_path)
        kuzu_db = kuzu.Database(kuzu_path)
        kuzu_conn = kuzu.Connection(kuzu_db)

        # Store raw clients for interface creation
        self.qdrant_client = qdrant_client
        self.kuzu_connection = kuzu_conn

        # DDL operations - create collection and tables
        self._setup_qdrant_collection(qdrant_client, self.db_name)
        self._setup_kuzu_tables_with_graph_register(kuzu_conn)

    def _setup_qdrant_collection(self, client: QdrantClient, collection_name: str) -> None:
        """Create Qdrant collection if it doesn't exist.

        Args:
            client: Qdrant client instance.
            collection_name: Name of the collection to create.

        Raises:
            DatabaseError: If collection creation fails.
        """
        try:
            config = get_config()
            vector_dimension = config.memg.vector_dimension

            collections = client.get_collections()
            if not any(col.name == collection_name for col in collections.collections):
                client.create_collection(
                    collection_name=collection_name,
                    vectors_config=VectorParams(size=vector_dimension, distance=Distance.COSINE),
                )
        except Exception as e:
            raise DatabaseError(
                "Failed to setup Qdrant collection",
                operation="_setup_qdrant_collection",
                original_error=e,
            ) from e

    def _setup_kuzu_tables_with_graph_register(self, conn: kuzu.Connection) -> None:
        """Create Kuzu tables using GraphRegister for DDL generation.

        Args:
            conn: Kuzu database connection.

        Raises:
            DatabaseError: If YAML translator not initialized or table creation fails.
        """
        if not self.yaml_translator:
            raise DatabaseError(
                "YAML translator not initialized. Provide yaml_path to constructor.",
                operation="_setup_kuzu_tables_with_graph_register",
            )

        try:
            # Create GraphRegister with YamlTranslator for complete DDL generation
            graph_register = GraphRegister(yaml_translator=self.yaml_translator)

            # Generate all DDL statements using GraphRegister
            ddl_statements = graph_register.generate_all_ddl()

            # Execute all DDL statements
            for ddl in ddl_statements:
                conn.execute(ddl)

        except Exception as e:
            raise DatabaseError(
                "Failed to setup Kuzu tables using GraphRegister",
                operation="_setup_kuzu_tables_with_graph_register",
                original_error=e,
            ) from e

    # ===== INTERFACE ACCESS METHODS =====
    # After DDL operations, provide access to CRUD interfaces

    def get_qdrant_interface(self) -> QdrantInterface:
        """Get Qdrant interface using the initialized client.

        Returns:
            QdrantInterface: Configured with the DDL-created client and collection.

        Raises:
            DatabaseError: If client not initialized (call init_dbs first).
        """
        if self.qdrant_client is None:
            raise DatabaseError(
                "Qdrant client not initialized. Call init_dbs() first.",
                operation="get_qdrant_interface",
            )
        return QdrantInterface(self.qdrant_client, self.db_name)

    def get_kuzu_interface(self) -> KuzuInterface:
        """Get Kuzu interface using the initialized connection.

        Returns:
            KuzuInterface: Configured with the DDL-created connection.

        Raises:
            DatabaseError: If connection not initialized (call init_dbs first).
        """
        if self.kuzu_connection is None:
            raise DatabaseError(
                "Kuzu connection not initialized. Call init_dbs() first.",
                operation="get_kuzu_interface",
            )
        return KuzuInterface(self.kuzu_connection, self.yaml_translator)

    def get_embedder(self) -> Embedder:
        """Get embedder instance.

        Returns:
            Embedder: Instance for generating vectors.
        """
        return Embedder()

    def get_yaml_translator(self) -> YamlTranslator:
        """Get the YAML translator used for schema operations.

        Returns:
            YamlTranslator: Instance used during DDL operations.

        Raises:
            DatabaseError: If YAML translator not initialized.
        """
        if self.yaml_translator is None:
            raise DatabaseError(
                "YAML translator not initialized. Provide yaml_path to constructor.",
                operation="get_yaml_translator",
            )
        return self.yaml_translator

    def close(self):
        """Close all database connections and cleanup resources.

        Should be called when database clients are no longer needed.
        """
        if self.qdrant_client is not None:
            with suppress(Exception):
                # Ignore cleanup errors - best effort
                self.qdrant_client.close()
            self.qdrant_client = None

        if self.kuzu_connection is not None:
            with suppress(Exception):
                # Ignore cleanup errors - best effort
                self.kuzu_connection.close()
            self.kuzu_connection = None

__init__(yaml_path=None)

Create DDL-only database client wrapper.

Parameters:

Name Type Description Default
yaml_path str | None

Path to YAML schema file. User must provide - no defaults.

None
Source code in src/memg_core/utils/db_clients.py
40
41
42
43
44
45
46
47
48
49
50
51
52
def __init__(self, yaml_path: str | None = None):
    """Create DDL-only database client wrapper.

    Args:
        yaml_path: Path to YAML schema file. User must provide - no defaults.
    """
    self.qdrant_client: QdrantClient | None = None
    self.kuzu_connection: kuzu.Connection | None = None
    self.db_name = "memg"
    self.qdrant_path = "qdrant"
    self.kuzu_path = "kuzu"

    self.yaml_translator = YamlTranslator(yaml_path) if yaml_path else None

close()

Close all database connections and cleanup resources.

Should be called when database clients are no longer needed.

Source code in src/memg_core/utils/db_clients.py
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def close(self):
    """Close all database connections and cleanup resources.

    Should be called when database clients are no longer needed.
    """
    if self.qdrant_client is not None:
        with suppress(Exception):
            # Ignore cleanup errors - best effort
            self.qdrant_client.close()
        self.qdrant_client = None

    if self.kuzu_connection is not None:
        with suppress(Exception):
            # Ignore cleanup errors - best effort
            self.kuzu_connection.close()
        self.kuzu_connection = None

get_embedder()

Get embedder instance.

Returns:

Name Type Description
Embedder Embedder

Instance for generating vectors.

Source code in src/memg_core/utils/db_clients.py
182
183
184
185
186
187
188
def get_embedder(self) -> Embedder:
    """Get embedder instance.

    Returns:
        Embedder: Instance for generating vectors.
    """
    return Embedder()

get_kuzu_interface()

Get Kuzu interface using the initialized connection.

Returns:

Name Type Description
KuzuInterface KuzuInterface

Configured with the DDL-created connection.

Raises:

Type Description
DatabaseError

If connection not initialized (call init_dbs first).

Source code in src/memg_core/utils/db_clients.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def get_kuzu_interface(self) -> KuzuInterface:
    """Get Kuzu interface using the initialized connection.

    Returns:
        KuzuInterface: Configured with the DDL-created connection.

    Raises:
        DatabaseError: If connection not initialized (call init_dbs first).
    """
    if self.kuzu_connection is None:
        raise DatabaseError(
            "Kuzu connection not initialized. Call init_dbs() first.",
            operation="get_kuzu_interface",
        )
    return KuzuInterface(self.kuzu_connection, self.yaml_translator)

get_qdrant_interface()

Get Qdrant interface using the initialized client.

Returns:

Name Type Description
QdrantInterface QdrantInterface

Configured with the DDL-created client and collection.

Raises:

Type Description
DatabaseError

If client not initialized (call init_dbs first).

Source code in src/memg_core/utils/db_clients.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def get_qdrant_interface(self) -> QdrantInterface:
    """Get Qdrant interface using the initialized client.

    Returns:
        QdrantInterface: Configured with the DDL-created client and collection.

    Raises:
        DatabaseError: If client not initialized (call init_dbs first).
    """
    if self.qdrant_client is None:
        raise DatabaseError(
            "Qdrant client not initialized. Call init_dbs() first.",
            operation="get_qdrant_interface",
        )
    return QdrantInterface(self.qdrant_client, self.db_name)

get_yaml_translator()

Get the YAML translator used for schema operations.

Returns:

Name Type Description
YamlTranslator YamlTranslator

Instance used during DDL operations.

Raises:

Type Description
DatabaseError

If YAML translator not initialized.

Source code in src/memg_core/utils/db_clients.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
def get_yaml_translator(self) -> YamlTranslator:
    """Get the YAML translator used for schema operations.

    Returns:
        YamlTranslator: Instance used during DDL operations.

    Raises:
        DatabaseError: If YAML translator not initialized.
    """
    if self.yaml_translator is None:
        raise DatabaseError(
            "YAML translator not initialized. Provide yaml_path to constructor.",
            operation="get_yaml_translator",
        )
    return self.yaml_translator

init_dbs(db_path, db_name)

Initialize databases with structured paths.

Parameters:

Name Type Description Default
db_path str

Base database directory.

required
db_name str

Database name (used for collection and file names).

required
Source code in src/memg_core/utils/db_clients.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def init_dbs(self, db_path: str, db_name: str):
    """Initialize databases with structured paths.

    Args:
        db_path: Base database directory.
        db_name: Database name (used for collection and file names).
    """
    # Structure paths
    qdrant_path = os.path.join(db_path, "qdrant")
    kuzu_path = os.path.join(db_path, "kuzu", db_name)

    # Store paths and names
    self.qdrant_path = qdrant_path
    self.kuzu_path = kuzu_path
    self.db_name = db_name

    # Ensure directories exist
    os.makedirs(qdrant_path, exist_ok=True)
    os.makedirs(Path(kuzu_path).parent, exist_ok=True)

    # Create raw database clients directly
    qdrant_client = QdrantClient(path=qdrant_path)
    kuzu_db = kuzu.Database(kuzu_path)
    kuzu_conn = kuzu.Connection(kuzu_db)

    # Store raw clients for interface creation
    self.qdrant_client = qdrant_client
    self.kuzu_connection = kuzu_conn

    # DDL operations - create collection and tables
    self._setup_qdrant_collection(qdrant_client, self.db_name)
    self._setup_kuzu_tables_with_graph_register(kuzu_conn)

DatabaseError

Bases: MemorySystemError

Database operation failures (Qdrant, Kuzu).

Source code in src/memg_core/core/exceptions.py
42
43
class DatabaseError(MemorySystemError):
    """Database operation failures (Qdrant, Kuzu)."""

Embedder

Local embedder using FastEmbed - no API keys required.

Attributes:

Name Type Description
model_name

Name of the embedding model being used.

model

FastEmbed TextEmbedding instance.

Source code in src/memg_core/core/interfaces/embedder.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class Embedder:
    """Local embedder using FastEmbed - no API keys required.

    Attributes:
        model_name: Name of the embedding model being used.
        model: FastEmbed TextEmbedding instance.
    """

    def __init__(self, model_name: str | None = None):
        """Initialize the FastEmbed embedder.

        Args:
            model_name: Model to use. Defaults to config or snowflake-arctic-embed-xs.
        """

        # Get model name from config system (which reads env) or use provided override
        if model_name:
            self.model_name = model_name
        else:
            # Use config system which handles env variable EMBEDDER_MODEL
            config = get_config()
            self.model_name = config.memg.embedder_model

        self.model = TextEmbedding(model_name=self.model_name)

    def get_embedding(self, text: str) -> list[float]:
        """Get embedding for a single text.

        Args:
            text: Text to embed.

        Returns:
            list[float]: Embedding vector.

        Raises:
            RuntimeError: If FastEmbed returns empty embedding.
        """
        # FastEmbed returns a generator, so we need to extract the first result
        embeddings = list(self.model.embed([text]))
        if embeddings:
            return embeddings[0].tolist()
        raise RuntimeError("FastEmbed returned empty embedding")

    def get_embeddings(self, texts: list[str]) -> list[list[float]]:
        """Get embeddings for multiple texts.

        Args:
            texts: List of texts to embed.

        Returns:
            list[list[float]]: List of embedding vectors.
        """
        embeddings = list(self.model.embed(texts))
        return [emb.tolist() for emb in embeddings]

__init__(model_name=None)

Initialize the FastEmbed embedder.

Parameters:

Name Type Description Default
model_name str | None

Model to use. Defaults to config or snowflake-arctic-embed-xs.

None
Source code in src/memg_core/core/interfaces/embedder.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, model_name: str | None = None):
    """Initialize the FastEmbed embedder.

    Args:
        model_name: Model to use. Defaults to config or snowflake-arctic-embed-xs.
    """

    # Get model name from config system (which reads env) or use provided override
    if model_name:
        self.model_name = model_name
    else:
        # Use config system which handles env variable EMBEDDER_MODEL
        config = get_config()
        self.model_name = config.memg.embedder_model

    self.model = TextEmbedding(model_name=self.model_name)

get_embedding(text)

Get embedding for a single text.

Parameters:

Name Type Description Default
text str

Text to embed.

required

Returns:

Type Description
list[float]

list[float]: Embedding vector.

Raises:

Type Description
RuntimeError

If FastEmbed returns empty embedding.

Source code in src/memg_core/core/interfaces/embedder.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def get_embedding(self, text: str) -> list[float]:
    """Get embedding for a single text.

    Args:
        text: Text to embed.

    Returns:
        list[float]: Embedding vector.

    Raises:
        RuntimeError: If FastEmbed returns empty embedding.
    """
    # FastEmbed returns a generator, so we need to extract the first result
    embeddings = list(self.model.embed([text]))
    if embeddings:
        return embeddings[0].tolist()
    raise RuntimeError("FastEmbed returned empty embedding")

get_embeddings(texts)

Get embeddings for multiple texts.

Parameters:

Name Type Description Default
texts list[str]

List of texts to embed.

required

Returns:

Type Description
list[list[float]]

list[list[float]]: List of embedding vectors.

Source code in src/memg_core/core/interfaces/embedder.py
53
54
55
56
57
58
59
60
61
62
63
def get_embeddings(self, texts: list[str]) -> list[list[float]]:
    """Get embeddings for multiple texts.

    Args:
        texts: List of texts to embed.

    Returns:
        list[list[float]]: List of embedding vectors.
    """
    embeddings = list(self.model.embed(texts))
    return [emb.tolist() for emb in embeddings]

GraphRegister

Generates DDL statements for graph databases using TypeRegistry.

Database-agnostic design - generates DDL that can be adapted for: - Kuzu (current) - Neo4j (future) - ArangoDB (future) - Any graph database with node/relationship tables

Source code in src/memg_core/utils/graph_register.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
class GraphRegister:
    """Generates DDL statements for graph databases using TypeRegistry.

    Database-agnostic design - generates DDL that can be adapted for:
    - Kuzu (current)
    - Neo4j (future)
    - ArangoDB (future)
    - Any graph database with node/relationship tables
    """

    def __init__(
        self,
        type_registry: TypeRegistry | None = None,
        yaml_translator: YamlTranslator | None = None,
    ):
        """Initialize GraphRegister with TypeRegistry and YamlTranslator.

        Args:
            type_registry: TypeRegistry instance. If None, uses global singleton.
            yaml_translator: YamlTranslator for accessing full YAML schema. Optional.
        """
        self.type_registry = type_registry or TypeRegistry.get_instance()
        self.yaml_translator = yaml_translator

        # Validate TypeRegistry is properly initialized
        try:
            self.type_registry.get_valid_entity_names()
        except RuntimeError as e:
            raise DatabaseError(
                "TypeRegistry not initialized. Call initialize_types_from_yaml() first.",
                operation="graph_register_init",
                original_error=e,
            ) from e

    def generate_entity_table_ddl(self, entity_name: str) -> str:
        """Generate DDL for a single entity table.

        Args:
            entity_name: Name of entity type (e.g., 'task', 'bug')

        Returns:
            DDL string for creating the entity table

        Raises:
            DatabaseError: If entity not found in TypeRegistry
        """
        # Validate entity exists in TypeRegistry
        valid_entities = self.type_registry.get_valid_entity_names()
        if entity_name not in valid_entities:
            raise DatabaseError(
                f"Entity '{entity_name}' not found in TypeRegistry",
                operation="generate_entity_table_ddl",
                context={"entity_name": entity_name, "valid_entities": valid_entities},
            )

        # Get Pydantic model with all fields (inheritance already resolved)
        model = self.type_registry.get_entity_model(entity_name)

        # Build column definitions from Pydantic model fields
        columns = []
        system_field_names = {
            "id",
            "user_id",
            "memory_type",
            "created_at",
            "updated_at",
        }

        for field_name, _field_info in model.model_fields.items():
            # Skip system fields - they'll be added separately
            if field_name in system_field_names:
                continue
            # All user fields are STRING for now (Kuzu limitation)
            # TODO: Add proper type mapping when Kuzu supports more types
            columns.append(f"{field_name} STRING")

        # Add system fields (not in YAML schema)
        system_columns = [
            "id STRING",
            "user_id STRING",
            "memory_type STRING",
            "created_at STRING",
            "updated_at STRING",
        ]

        all_columns = system_columns + columns
        columns_sql = ",\n                ".join(all_columns)

        # Generate Kuzu-style DDL (adaptable for other graph DBs)
        ddl = f"""CREATE NODE TABLE IF NOT EXISTS {entity_name}(
                {columns_sql},
                PRIMARY KEY (id)
        )"""

        return ddl

    def generate_all_entity_tables_ddl(self) -> list[str]:
        """Generate DDL for all entity tables from TypeRegistry.

        Returns:
            List of DDL strings, one per entity table
        """
        ddl_statements = []

        for entity_name in self.type_registry.get_valid_entity_names():
            ddl = self.generate_entity_table_ddl(entity_name)
            ddl_statements.append(ddl)

        return ddl_statements

    def generate_relationship_tables_ddl(self) -> list[str]:
        """Generate DDL for relationship tables from YAML schema using YamlTranslator.

        Uses YamlTranslator to discover relations and centralized table naming.
        Handles directed/undirected semantics for table creation.

        Returns:
            List of DDL strings for relationship tables

        Raises:
            DatabaseError: If YamlTranslator not provided or schema access fails
        """
        if not self.yaml_translator:
            # Return empty list if no YamlTranslator - maintains compatibility
            return []

        ddl_statements = []
        created_tables = set()  # Track unique table names to avoid duplicates

        try:
            # Use YamlTranslator to discover all relations across all entities
            for entity_name in self.yaml_translator.get_entity_types():
                relation_specs = self.yaml_translator.get_relations_for_source(entity_name)

                for spec in relation_specs:
                    # Validate predicate against TypeRegistry
                    predicate = spec["predicate"]
                    if not self.type_registry.validate_relation_predicate(predicate):
                        raise DatabaseError(
                            f"Invalid predicate '{predicate}' not found in TypeRegistry",
                            operation="generate_relationship_tables_ddl",
                            context={"predicate": predicate, "spec": spec},
                        )

                    # Generate table name using centralized helper
                    table_name = self.yaml_translator.relationship_table_name(
                        source=spec["source"],
                        predicate=spec["predicate"],
                        target=spec["target"],
                        directed=spec["directed"],
                    )

                    # Skip if we've already created this table
                    if table_name in created_tables:
                        continue

                    created_tables.add(table_name)

                    # Create DDL - direction affects semantics but not table structure
                    ddl = f"""CREATE REL TABLE IF NOT EXISTS {table_name}(
                        FROM {spec["source"]} TO {spec["target"]}
                    )"""
                    ddl_statements.append(ddl)

        except Exception as e:
            if isinstance(e, DatabaseError):
                raise
            raise DatabaseError(
                "Failed to generate relationship tables DDL",
                operation="generate_relationship_tables_ddl",
                original_error=e,
            ) from e

        return ddl_statements

    def generate_hrid_mapping_table_ddl(self) -> str:
        """Generate DDL for HRID mapping table (system table).

        Returns:
            DDL string for HRID mapping table
        """
        ddl = """CREATE NODE TABLE IF NOT EXISTS HridMapping(
            hrid_user_key STRING,
            hrid STRING,
            uuid STRING,
            memory_type STRING,
            user_id STRING,
            created_at STRING,
            deleted_at STRING,
            PRIMARY KEY (hrid_user_key)
        )"""

        return ddl

    def generate_all_ddl(self) -> list[str]:
        """Generate all DDL statements for complete schema setup.

        Returns:
            List of all DDL statements needed for schema creation
        """
        ddl_statements = []

        # Entity tables
        ddl_statements.extend(self.generate_all_entity_tables_ddl())

        # Relationship tables
        ddl_statements.extend(self.generate_relationship_tables_ddl())

        # System tables
        ddl_statements.append(self.generate_hrid_mapping_table_ddl())

        return ddl_statements

__init__(type_registry=None, yaml_translator=None)

Initialize GraphRegister with TypeRegistry and YamlTranslator.

Parameters:

Name Type Description Default
type_registry TypeRegistry | None

TypeRegistry instance. If None, uses global singleton.

None
yaml_translator YamlTranslator | None

YamlTranslator for accessing full YAML schema. Optional.

None
Source code in src/memg_core/utils/graph_register.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    type_registry: TypeRegistry | None = None,
    yaml_translator: YamlTranslator | None = None,
):
    """Initialize GraphRegister with TypeRegistry and YamlTranslator.

    Args:
        type_registry: TypeRegistry instance. If None, uses global singleton.
        yaml_translator: YamlTranslator for accessing full YAML schema. Optional.
    """
    self.type_registry = type_registry or TypeRegistry.get_instance()
    self.yaml_translator = yaml_translator

    # Validate TypeRegistry is properly initialized
    try:
        self.type_registry.get_valid_entity_names()
    except RuntimeError as e:
        raise DatabaseError(
            "TypeRegistry not initialized. Call initialize_types_from_yaml() first.",
            operation="graph_register_init",
            original_error=e,
        ) from e

generate_all_ddl()

Generate all DDL statements for complete schema setup.

Returns:

Type Description
list[str]

List of all DDL statements needed for schema creation

Source code in src/memg_core/utils/graph_register.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def generate_all_ddl(self) -> list[str]:
    """Generate all DDL statements for complete schema setup.

    Returns:
        List of all DDL statements needed for schema creation
    """
    ddl_statements = []

    # Entity tables
    ddl_statements.extend(self.generate_all_entity_tables_ddl())

    # Relationship tables
    ddl_statements.extend(self.generate_relationship_tables_ddl())

    # System tables
    ddl_statements.append(self.generate_hrid_mapping_table_ddl())

    return ddl_statements

generate_all_entity_tables_ddl()

Generate DDL for all entity tables from TypeRegistry.

Returns:

Type Description
list[str]

List of DDL strings, one per entity table

Source code in src/memg_core/utils/graph_register.py
110
111
112
113
114
115
116
117
118
119
120
121
122
def generate_all_entity_tables_ddl(self) -> list[str]:
    """Generate DDL for all entity tables from TypeRegistry.

    Returns:
        List of DDL strings, one per entity table
    """
    ddl_statements = []

    for entity_name in self.type_registry.get_valid_entity_names():
        ddl = self.generate_entity_table_ddl(entity_name)
        ddl_statements.append(ddl)

    return ddl_statements

generate_entity_table_ddl(entity_name)

Generate DDL for a single entity table.

Parameters:

Name Type Description Default
entity_name str

Name of entity type (e.g., 'task', 'bug')

required

Returns:

Type Description
str

DDL string for creating the entity table

Raises:

Type Description
DatabaseError

If entity not found in TypeRegistry

Source code in src/memg_core/utils/graph_register.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
def generate_entity_table_ddl(self, entity_name: str) -> str:
    """Generate DDL for a single entity table.

    Args:
        entity_name: Name of entity type (e.g., 'task', 'bug')

    Returns:
        DDL string for creating the entity table

    Raises:
        DatabaseError: If entity not found in TypeRegistry
    """
    # Validate entity exists in TypeRegistry
    valid_entities = self.type_registry.get_valid_entity_names()
    if entity_name not in valid_entities:
        raise DatabaseError(
            f"Entity '{entity_name}' not found in TypeRegistry",
            operation="generate_entity_table_ddl",
            context={"entity_name": entity_name, "valid_entities": valid_entities},
        )

    # Get Pydantic model with all fields (inheritance already resolved)
    model = self.type_registry.get_entity_model(entity_name)

    # Build column definitions from Pydantic model fields
    columns = []
    system_field_names = {
        "id",
        "user_id",
        "memory_type",
        "created_at",
        "updated_at",
    }

    for field_name, _field_info in model.model_fields.items():
        # Skip system fields - they'll be added separately
        if field_name in system_field_names:
            continue
        # All user fields are STRING for now (Kuzu limitation)
        # TODO: Add proper type mapping when Kuzu supports more types
        columns.append(f"{field_name} STRING")

    # Add system fields (not in YAML schema)
    system_columns = [
        "id STRING",
        "user_id STRING",
        "memory_type STRING",
        "created_at STRING",
        "updated_at STRING",
    ]

    all_columns = system_columns + columns
    columns_sql = ",\n                ".join(all_columns)

    # Generate Kuzu-style DDL (adaptable for other graph DBs)
    ddl = f"""CREATE NODE TABLE IF NOT EXISTS {entity_name}(
            {columns_sql},
            PRIMARY KEY (id)
    )"""

    return ddl

generate_hrid_mapping_table_ddl()

Generate DDL for HRID mapping table (system table).

Returns:

Type Description
str

DDL string for HRID mapping table

Source code in src/memg_core/utils/graph_register.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def generate_hrid_mapping_table_ddl(self) -> str:
    """Generate DDL for HRID mapping table (system table).

    Returns:
        DDL string for HRID mapping table
    """
    ddl = """CREATE NODE TABLE IF NOT EXISTS HridMapping(
        hrid_user_key STRING,
        hrid STRING,
        uuid STRING,
        memory_type STRING,
        user_id STRING,
        created_at STRING,
        deleted_at STRING,
        PRIMARY KEY (hrid_user_key)
    )"""

    return ddl

generate_relationship_tables_ddl()

Generate DDL for relationship tables from YAML schema using YamlTranslator.

Uses YamlTranslator to discover relations and centralized table naming. Handles directed/undirected semantics for table creation.

Returns:

Type Description
list[str]

List of DDL strings for relationship tables

Raises:

Type Description
DatabaseError

If YamlTranslator not provided or schema access fails

Source code in src/memg_core/utils/graph_register.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def generate_relationship_tables_ddl(self) -> list[str]:
    """Generate DDL for relationship tables from YAML schema using YamlTranslator.

    Uses YamlTranslator to discover relations and centralized table naming.
    Handles directed/undirected semantics for table creation.

    Returns:
        List of DDL strings for relationship tables

    Raises:
        DatabaseError: If YamlTranslator not provided or schema access fails
    """
    if not self.yaml_translator:
        # Return empty list if no YamlTranslator - maintains compatibility
        return []

    ddl_statements = []
    created_tables = set()  # Track unique table names to avoid duplicates

    try:
        # Use YamlTranslator to discover all relations across all entities
        for entity_name in self.yaml_translator.get_entity_types():
            relation_specs = self.yaml_translator.get_relations_for_source(entity_name)

            for spec in relation_specs:
                # Validate predicate against TypeRegistry
                predicate = spec["predicate"]
                if not self.type_registry.validate_relation_predicate(predicate):
                    raise DatabaseError(
                        f"Invalid predicate '{predicate}' not found in TypeRegistry",
                        operation="generate_relationship_tables_ddl",
                        context={"predicate": predicate, "spec": spec},
                    )

                # Generate table name using centralized helper
                table_name = self.yaml_translator.relationship_table_name(
                    source=spec["source"],
                    predicate=spec["predicate"],
                    target=spec["target"],
                    directed=spec["directed"],
                )

                # Skip if we've already created this table
                if table_name in created_tables:
                    continue

                created_tables.add(table_name)

                # Create DDL - direction affects semantics but not table structure
                ddl = f"""CREATE REL TABLE IF NOT EXISTS {table_name}(
                    FROM {spec["source"]} TO {spec["target"]}
                )"""
                ddl_statements.append(ddl)

    except Exception as e:
        if isinstance(e, DatabaseError):
            raise
        raise DatabaseError(
            "Failed to generate relationship tables DDL",
            operation="generate_relationship_tables_ddl",
            original_error=e,
        ) from e

    return ddl_statements

KuzuInterface

Pure CRUD wrapper around Kuzu database - NO DDL operations.

Attributes:

Name Type Description
conn

Pre-initialized Kuzu connection.

yaml_translator

Optional YAML translator for relationship operations.

Source code in src/memg_core/core/interfaces/kuzu.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
class KuzuInterface:
    """Pure CRUD wrapper around Kuzu database - NO DDL operations.

    Attributes:
        conn: Pre-initialized Kuzu connection.
        yaml_translator: Optional YAML translator for relationship operations.
    """

    def __init__(self, connection: kuzu.Connection, yaml_translator=None):
        """Initialize with pre-created connection.

        Args:
            connection: Pre-initialized Kuzu connection from DatabaseClients.
            yaml_translator: Optional YamlTranslator for relationship operations.
        """
        self.conn = connection
        self.yaml_translator = yaml_translator

    def add_node(self, table: str, properties: dict[str, Any]) -> None:
        """Add a node to the graph - pure CRUD operation.

        Args:
            table: Node table name.
            properties: Node properties.

        Raises:
            DatabaseError: If node creation fails.
        """
        try:
            props = ", ".join([f"{k}: ${k}" for k in properties])
            query = f"CREATE (:{table} {{{props}}})"
            self.conn.execute(query, parameters=properties)
        except Exception as e:
            raise DatabaseError(
                f"Failed to add node to {table}",
                operation="add_node",
                context={"table": table, "properties": properties},
                original_error=e,
            ) from e

    def update_node(
        self, table: str, node_uuid: str, properties: dict[str, Any], user_id: str
    ) -> bool:
        """Update a node in the graph - pure CRUD operation.

        Args:
            table: Node table name.
            node_uuid: UUID of the node to update.
            properties: Node properties to update.
            user_id: User ID for ownership verification.

        Returns:
            bool: True if update succeeded, False if node not found.

        Raises:
            DatabaseError: If node update fails due to system error.
        """
        try:
            # CRITICAL: Check if node exists AND belongs to user
            check_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
            check_result = self.query(check_query, {"uuid": node_uuid, "user_id": user_id})

            if not check_result:
                # Node doesn't exist for this user
                return False

            # Build SET clause for properties
            set_clauses = []
            params = {"uuid": node_uuid, "user_id": user_id}

            for key, value in properties.items():
                # Skip system fields that shouldn't be updated via this method
                if key in ("id", "user_id"):
                    continue

                param_name = f"prop_{key}"
                set_clauses.append(f"n.{key} = ${param_name}")
                params[param_name] = value

            if not set_clauses:
                # No properties to update (all were system fields)
                return True

            # Execute update query
            set_clause = ", ".join(set_clauses)
            update_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) SET {set_clause}"
            self.conn.execute(update_query, parameters=params)

            return True

        except Exception as e:
            raise DatabaseError(
                f"Failed to update node in {table}",
                operation="update_node",
                context={
                    "table": table,
                    "node_uuid": node_uuid,
                    "properties": properties,
                    "user_id": user_id,
                },
                original_error=e,
            ) from e

    def add_relationship(
        self,
        from_table: str,
        to_table: str,
        rel_type: str,
        from_id: str,
        to_id: str,
        user_id: str,
        props: dict[str, Any] | None = None,
    ) -> None:
        """Add relationship between nodes.

        Args:
            from_table: Source node table name.
            to_table: Target node table name.
            rel_type: Relationship type.
            from_id: Source node ID.
            to_id: Target node ID.
            user_id: User ID for ownership verification.
            props: Optional relationship properties.

        Raises:
            DatabaseError: If relationship creation fails.
        """
        try:
            props = props or {}

            # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
            if not validate_relation_predicate(rel_type):
                raise ValueError(
                    f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
                )

            # Use relationship type as-is (predicates from YAML) - no sanitization
            # rel_type should already be a valid predicate (e.g., "REFERENCED_BY", "ANNOTATES")

            # CRITICAL: Verify both nodes belong to the user before creating relationship
            # First check if both nodes exist and belong to the user
            check_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"RETURN a.id, b.id"
            )
            check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
            check_result = self.query(check_query, check_params)

            if not check_result:
                raise ValueError(
                    f"Cannot create relationship: one or both memories not found "
                    f"or don't belong to user {user_id}"
                )

            # Generate relationship table name using YamlTranslator
            if not self.yaml_translator:
                raise DatabaseError(
                    "YamlTranslator required for relationship operations",
                    operation="add_relationship",
                    context={
                        "from_table": from_table,
                        "to_table": to_table,
                        "rel_type": rel_type,
                    },
                )

            relationship_table_name = self.yaml_translator.relationship_table_name(
                source=from_table,
                predicate=rel_type,
                target=to_table,
                directed=True,  # Direction affects semantics but not table naming for now
            )

            # Now create the relationship using the unique table name
            prop_str = ", ".join([f"{k}: ${k}" for k in props.keys()]) if props else ""
            rel_props = f" {{{prop_str}}}" if prop_str else ""
            create_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"CREATE (a)-[:{relationship_table_name}{rel_props}]->(b)"
            )
            create_params = {
                "from_id": from_id,
                "to_id": to_id,
                "user_id": user_id,
                **props,
            }
            self.conn.execute(create_query, parameters=create_params)
        except Exception as e:
            raise DatabaseError(
                f"Failed to add relationship {rel_type}",
                operation="add_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                    "from_id": from_id,
                    "to_id": to_id,
                },
                original_error=e,
            ) from e

    def delete_relationship(
        self,
        from_table: str,
        to_table: str,
        rel_type: str,
        from_id: str,
        to_id: str,
        user_id: str,
    ) -> bool:
        """Delete relationship between nodes.

        Args:
            from_table: Source node table name.
            to_table: Target node table name.
            rel_type: Relationship type.
            from_id: Source node ID.
            to_id: Target node ID.
            user_id: User ID for ownership verification.

        Returns:
            bool: True if deletion succeeded, False if relationship not found.

        Raises:
            DatabaseError: If relationship deletion fails due to system error.
        """
        try:
            # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
            if not validate_relation_predicate(rel_type):
                raise ValueError(
                    f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
                )

            # CRITICAL: Verify both nodes belong to the user before deleting relationship
            # First check if both nodes exist and belong to the user
            check_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"RETURN a.id, b.id"
            )
            check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
            check_result = self.query(check_query, check_params)

            if not check_result:
                # Nodes don't exist or don't belong to user - return False (not found)
                return False

            # Generate relationship table name using YamlTranslator
            if not self.yaml_translator:
                raise DatabaseError(
                    "YamlTranslator required for relationship operations",
                    operation="delete_relationship",
                    context={
                        "from_table": from_table,
                        "to_table": to_table,
                        "rel_type": rel_type,
                    },
                )

            relationship_table_name = self.yaml_translator.relationship_table_name(
                source=from_table,
                predicate=rel_type,
                target=to_table,
                directed=True,  # Direction affects semantics but not table naming for now
            )

            # First check if the relationship exists
            check_rel_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
                f"-[r:{relationship_table_name}]->"
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"RETURN r"
            )
            check_rel_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

            # Check if relationship exists
            relationship_exists = self.query(check_rel_query, check_rel_params)
            if not relationship_exists:
                # Relationship doesn't exist - return False
                return False

            # Delete the specific relationship (we know it exists)
            delete_query = (
                f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
                f"-[r:{relationship_table_name}]->"
                f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
                f"DELETE r"
            )
            delete_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

            # Execute deletion
            self.conn.execute(delete_query, parameters=delete_params)

            # If we get here, deletion succeeded
            return True

        except Exception as e:
            if isinstance(e, ValueError):
                # Re-raise validation errors as-is
                raise
            raise DatabaseError(
                f"Failed to delete relationship {rel_type}",
                operation="delete_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                    "from_id": from_id,
                    "to_id": to_id,
                },
                original_error=e,
            ) from e

    def _extract_query_results(self, query_result) -> list[dict[str, Any]]:
        """Extract results from Kuzu QueryResult using raw iteration.

        Args:
            query_result: Kuzu QueryResult object.

        Returns:
            list[dict[str, Any]]: List of dictionaries containing query results.
        """
        # Type annotations disabled for QueryResult - dynamic interface from kuzu package
        qr = query_result  # type: ignore

        results = []
        column_names = qr.get_column_names()
        while qr.has_next():
            row = qr.get_next()
            result = {}
            for i, col_name in enumerate(column_names):
                result[col_name] = row[i] if i < len(row) else None
            results.append(result)
        return results

    def query(self, cypher: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
        """Execute Cypher query and return results.

        Args:
            cypher: Cypher query string.
            params: Query parameters.

        Returns:
            list[dict[str, Any]]: Query results.

        Raises:
            DatabaseError: If query execution fails.
        """
        try:
            qr = self.conn.execute(cypher, parameters=params or {})
            return self._extract_query_results(qr)
        except Exception as e:
            raise DatabaseError(
                "Failed to execute Kuzu query",
                operation="query",
                context={"cypher": cypher, "params": params},
                original_error=e,
            ) from e

    def neighbors(
        self,
        node_label: str,
        node_uuid: str,
        user_id: str,
        rel_types: list[str] | None = None,
        direction: str = "any",
        limit: int = 10,
        neighbor_label: str | None = None,
    ) -> list[dict[str, Any]]:
        """Fetch neighbors of a node by UUID only.

        Args:
            node_label: Node type/table name (e.g., "Memory", "bug") - NOT a UUID.
            node_uuid: UUID of the specific node to find neighbors for.
            user_id: User ID for isolation - only return neighbors belonging to this user.
            rel_types: List of relationship types to filter by.
            direction: "in", "out", or "any" for relationship direction.
            limit: Maximum number of neighbors to return.
            neighbor_label: Type of neighbor nodes to return.

        Returns:
            list[dict[str, Any]]: List of neighbor nodes with relationship info.

        Raises:
            ValueError: If node_label is a UUID or node_uuid is not a UUID.
            DatabaseError: If neighbor query fails.
        """
        # Validate parameters to prevent common bugs
        if self._is_uuid(node_label):
            raise ValueError(
                f"node_label must be a node type (e.g., 'Memory', 'bug'), not UUID: {node_label}. "
                f"UUIDs should be passed as node_uuid parameter."
            )

        if not self._is_uuid(node_uuid):
            raise ValueError(f"node_uuid must be a valid UUID format, got: {node_uuid}")

        try:
            # Use YamlTranslator to expand predicates to concrete relationship labels
            if not self.yaml_translator:
                raise DatabaseError(
                    "YamlTranslator required for neighbor operations",
                    operation="neighbors",
                    context={"node_label": node_label, "rel_types": rel_types},
                )

            # Get concrete relationship labels for this source and predicates
            if rel_types:
                relationship_labels = self.yaml_translator.get_labels_for_predicates(
                    source_type=node_label,
                    predicates=rel_types,
                    neighbor_label=neighbor_label,
                )
                if not relationship_labels:
                    # No matching relationships found - return empty
                    return []

                # Create relationship pattern with specific labels
                rel_filter = "|".join(relationship_labels)
                rel_part = f":{rel_filter}"
            else:
                # No filtering - match all relationships
                rel_part = ""

            # CRITICAL: User isolation - both source node and neighbors must belong to user
            node_condition = f"a:{node_label} {{id: $node_uuid, user_id: $user_id}}"
            neighbor = f":{neighbor_label}" if neighbor_label else ""
            neighbor_condition = f"n{neighbor} {{user_id: $user_id}}"

            # Build direction-aware pattern
            if direction == "out":
                pattern = f"({node_condition})-[r{rel_part}]->({neighbor_condition})"
            elif direction == "in":
                pattern = f"({node_condition})<-[r{rel_part}]-({neighbor_condition})"
            else:
                pattern = f"({node_condition})-[r{rel_part}]-({neighbor_condition})"

            # Return neighbors only if they belong to the same user
            cypher = f"""
            MATCH {pattern}
            RETURN DISTINCT n.id as id,
                            n.user_id as user_id,
                            n.memory_type as memory_type,
                            n.created_at as created_at,
                            label(r) as rel_type,
                            n as node
            LIMIT $limit
            """
            params = {"node_uuid": node_uuid, "user_id": user_id, "limit": limit}
            return self.query(cypher, params)
        except Exception as e:
            raise DatabaseError(
                "Failed to fetch neighbors",
                operation="neighbors",
                context={
                    "node_label": node_label,
                    "node_uuid": node_uuid,
                    "user_id": user_id,
                    "rel_types": rel_types,
                    "direction": direction,
                },
                original_error=e,
            ) from e

    def delete_node(self, table: str, node_uuid: str, user_id: str) -> bool:
        """Delete a single node by UUID"""
        try:
            # CRITICAL: Check if node exists AND belongs to user
            cypher_check = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
            check_result = self.query(cypher_check, {"uuid": node_uuid, "user_id": user_id})

            if not check_result:
                # Node doesn't exist for this user, consider it successfully "deleted"
                return True

            # Delete the node - only if it belongs to the user
            cypher_delete_node = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) DELETE n"
            self.conn.execute(
                cypher_delete_node, parameters={"uuid": node_uuid, "user_id": user_id}
            )
            return True

        except Exception as e:
            error_msg = str(e).lower()
            if "delete undirected rel" in error_msg or "relationship" in error_msg:
                # Relationship constraint prevents deletion - this is a REAL FAILURE
                # Don't lie by returning True - raise explicit error
                raise DatabaseError(
                    f"Cannot delete node {node_uuid} from {table}: has existing relationships. "
                    f"Delete relationships first or use CASCADE delete if supported.",
                    operation="delete_node",
                    context={
                        "table": table,
                        "node_uuid": node_uuid,
                        "constraint_error": str(e),
                    },
                    original_error=e,
                ) from e
            # Other database error
            raise DatabaseError(
                f"Failed to delete node from {table}",
                operation="delete_node",
                context={"table": table, "node_uuid": node_uuid, "user_id": user_id},
                original_error=e,
            ) from e

    def get_nodes(
        self,
        user_id: str,
        node_type: str | None = None,
        filters: dict[str, Any] | None = None,
        limit: int = 50,
        offset: int = 0,
    ) -> list[dict[str, Any]]:
        """Get multiple nodes with filtering and pagination.

        Args:
            user_id: User ID for ownership verification.
            node_type: Optional node type filter (e.g., "task", "note").
            filters: Optional field filters (e.g., {"status": "open"}).
            limit: Maximum number of nodes to return.
            offset: Number of nodes to skip for pagination.

        Returns:
            list[dict[str, Any]]: List of node data from Kuzu.

        Raises:
            DatabaseError: If node retrieval fails.
        """
        try:
            filters = filters or {}

            # Build MATCH clause
            if node_type:
                match_clause = f"MATCH (n:{node_type} {{user_id: $user_id"
            else:
                match_clause = "MATCH (n {user_id: $user_id"

            # Add field filters
            params = {"user_id": user_id, "limit": limit, "offset": offset}
            for field_name, field_value in filters.items():
                param_name = f"filter_{field_name}"
                match_clause += f", {field_name}: ${param_name}"
                params[param_name] = field_value

            match_clause += "})"

            # Build complete query
            cypher_query = f"""
            {match_clause}
            RETURN n.id as id,
                   n.user_id as user_id,
                   n.memory_type as memory_type,
                   n.created_at as created_at,
                   n.updated_at as updated_at,
                   n as node
            ORDER BY n.created_at DESC
            SKIP $offset
            LIMIT $limit
            """

            return self.query(cypher_query, params)

        except Exception as e:
            raise DatabaseError(
                "Failed to get nodes from Kuzu",
                operation="get_nodes",
                context={
                    "user_id": user_id,
                    "node_type": node_type,
                    "filters": filters,
                    "limit": limit,
                    "offset": offset,
                },
                original_error=e,
            ) from e

    def _get_kuzu_type(self, key: str, value: Any) -> str:
        """Map Python types to Kuzu types with proper validation.

        Args:
            key: Property key name.
            value: Property value to type-check.

        Returns:
            str: Kuzu type name.

        Raises:
            DatabaseError: If the Python type is not supported by Kuzu.
        """
        if isinstance(value, bool):
            # Check bool first (bool is subclass of int in Python!)
            return "BOOLEAN"
        if isinstance(value, int):
            return "INT64"
        if isinstance(value, float):
            return "DOUBLE"
        if isinstance(value, str):
            return "STRING"
        if value is None:
            # None values need special handling - default to STRING for now
            return "STRING"
        # Unsupported type - fail explicitly instead of silent STRING conversion
        raise DatabaseError(
            f"Unsupported property type for key '{key}': {type(value).__name__}. "
            f"Supported types: str, int, float, bool. "
            f"Complex types must be serialized before storage.",
            operation="_get_kuzu_type",
            context={"key": key, "value": value, "type": type(value).__name__},
        )

    def _is_uuid(self, value: str) -> bool:
        """Check if string looks like a UUID (36 chars with hyphens in right positions).

        Args:
            value: String to check.

        Returns:
            bool: True if value matches UUID format (8-4-4-4-12 hex pattern), False otherwise.
        """
        if not isinstance(value, str) or len(value) != 36:
            return False

        # UUID format: 8-4-4-4-12 (e.g., 550e8400-e29b-41d4-a716-446655440000)
        uuid_pattern = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
        return bool(re.match(uuid_pattern, value, re.IGNORECASE))

__init__(connection, yaml_translator=None)

Initialize with pre-created connection.

Parameters:

Name Type Description Default
connection Connection

Pre-initialized Kuzu connection from DatabaseClients.

required
yaml_translator

Optional YamlTranslator for relationship operations.

None
Source code in src/memg_core/core/interfaces/kuzu.py
20
21
22
23
24
25
26
27
28
def __init__(self, connection: kuzu.Connection, yaml_translator=None):
    """Initialize with pre-created connection.

    Args:
        connection: Pre-initialized Kuzu connection from DatabaseClients.
        yaml_translator: Optional YamlTranslator for relationship operations.
    """
    self.conn = connection
    self.yaml_translator = yaml_translator

add_node(table, properties)

Add a node to the graph - pure CRUD operation.

Parameters:

Name Type Description Default
table str

Node table name.

required
properties dict[str, Any]

Node properties.

required

Raises:

Type Description
DatabaseError

If node creation fails.

Source code in src/memg_core/core/interfaces/kuzu.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def add_node(self, table: str, properties: dict[str, Any]) -> None:
    """Add a node to the graph - pure CRUD operation.

    Args:
        table: Node table name.
        properties: Node properties.

    Raises:
        DatabaseError: If node creation fails.
    """
    try:
        props = ", ".join([f"{k}: ${k}" for k in properties])
        query = f"CREATE (:{table} {{{props}}})"
        self.conn.execute(query, parameters=properties)
    except Exception as e:
        raise DatabaseError(
            f"Failed to add node to {table}",
            operation="add_node",
            context={"table": table, "properties": properties},
            original_error=e,
        ) from e

add_relationship(from_table, to_table, rel_type, from_id, to_id, user_id, props=None)

Add relationship between nodes.

Parameters:

Name Type Description Default
from_table str

Source node table name.

required
to_table str

Target node table name.

required
rel_type str

Relationship type.

required
from_id str

Source node ID.

required
to_id str

Target node ID.

required
user_id str

User ID for ownership verification.

required
props dict[str, Any] | None

Optional relationship properties.

None

Raises:

Type Description
DatabaseError

If relationship creation fails.

Source code in src/memg_core/core/interfaces/kuzu.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def add_relationship(
    self,
    from_table: str,
    to_table: str,
    rel_type: str,
    from_id: str,
    to_id: str,
    user_id: str,
    props: dict[str, Any] | None = None,
) -> None:
    """Add relationship between nodes.

    Args:
        from_table: Source node table name.
        to_table: Target node table name.
        rel_type: Relationship type.
        from_id: Source node ID.
        to_id: Target node ID.
        user_id: User ID for ownership verification.
        props: Optional relationship properties.

    Raises:
        DatabaseError: If relationship creation fails.
    """
    try:
        props = props or {}

        # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
        if not validate_relation_predicate(rel_type):
            raise ValueError(
                f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
            )

        # Use relationship type as-is (predicates from YAML) - no sanitization
        # rel_type should already be a valid predicate (e.g., "REFERENCED_BY", "ANNOTATES")

        # CRITICAL: Verify both nodes belong to the user before creating relationship
        # First check if both nodes exist and belong to the user
        check_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"RETURN a.id, b.id"
        )
        check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
        check_result = self.query(check_query, check_params)

        if not check_result:
            raise ValueError(
                f"Cannot create relationship: one or both memories not found "
                f"or don't belong to user {user_id}"
            )

        # Generate relationship table name using YamlTranslator
        if not self.yaml_translator:
            raise DatabaseError(
                "YamlTranslator required for relationship operations",
                operation="add_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                },
            )

        relationship_table_name = self.yaml_translator.relationship_table_name(
            source=from_table,
            predicate=rel_type,
            target=to_table,
            directed=True,  # Direction affects semantics but not table naming for now
        )

        # Now create the relationship using the unique table name
        prop_str = ", ".join([f"{k}: ${k}" for k in props.keys()]) if props else ""
        rel_props = f" {{{prop_str}}}" if prop_str else ""
        create_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"CREATE (a)-[:{relationship_table_name}{rel_props}]->(b)"
        )
        create_params = {
            "from_id": from_id,
            "to_id": to_id,
            "user_id": user_id,
            **props,
        }
        self.conn.execute(create_query, parameters=create_params)
    except Exception as e:
        raise DatabaseError(
            f"Failed to add relationship {rel_type}",
            operation="add_relationship",
            context={
                "from_table": from_table,
                "to_table": to_table,
                "rel_type": rel_type,
                "from_id": from_id,
                "to_id": to_id,
            },
            original_error=e,
        ) from e

delete_node(table, node_uuid, user_id)

Delete a single node by UUID

Source code in src/memg_core/core/interfaces/kuzu.py
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
def delete_node(self, table: str, node_uuid: str, user_id: str) -> bool:
    """Delete a single node by UUID"""
    try:
        # CRITICAL: Check if node exists AND belongs to user
        cypher_check = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
        check_result = self.query(cypher_check, {"uuid": node_uuid, "user_id": user_id})

        if not check_result:
            # Node doesn't exist for this user, consider it successfully "deleted"
            return True

        # Delete the node - only if it belongs to the user
        cypher_delete_node = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) DELETE n"
        self.conn.execute(
            cypher_delete_node, parameters={"uuid": node_uuid, "user_id": user_id}
        )
        return True

    except Exception as e:
        error_msg = str(e).lower()
        if "delete undirected rel" in error_msg or "relationship" in error_msg:
            # Relationship constraint prevents deletion - this is a REAL FAILURE
            # Don't lie by returning True - raise explicit error
            raise DatabaseError(
                f"Cannot delete node {node_uuid} from {table}: has existing relationships. "
                f"Delete relationships first or use CASCADE delete if supported.",
                operation="delete_node",
                context={
                    "table": table,
                    "node_uuid": node_uuid,
                    "constraint_error": str(e),
                },
                original_error=e,
            ) from e
        # Other database error
        raise DatabaseError(
            f"Failed to delete node from {table}",
            operation="delete_node",
            context={"table": table, "node_uuid": node_uuid, "user_id": user_id},
            original_error=e,
        ) from e

delete_relationship(from_table, to_table, rel_type, from_id, to_id, user_id)

Delete relationship between nodes.

Parameters:

Name Type Description Default
from_table str

Source node table name.

required
to_table str

Target node table name.

required
rel_type str

Relationship type.

required
from_id str

Source node ID.

required
to_id str

Target node ID.

required
user_id str

User ID for ownership verification.

required

Returns:

Name Type Description
bool bool

True if deletion succeeded, False if relationship not found.

Raises:

Type Description
DatabaseError

If relationship deletion fails due to system error.

Source code in src/memg_core/core/interfaces/kuzu.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def delete_relationship(
    self,
    from_table: str,
    to_table: str,
    rel_type: str,
    from_id: str,
    to_id: str,
    user_id: str,
) -> bool:
    """Delete relationship between nodes.

    Args:
        from_table: Source node table name.
        to_table: Target node table name.
        rel_type: Relationship type.
        from_id: Source node ID.
        to_id: Target node ID.
        user_id: User ID for ownership verification.

    Returns:
        bool: True if deletion succeeded, False if relationship not found.

    Raises:
        DatabaseError: If relationship deletion fails due to system error.
    """
    try:
        # VALIDATE RELATIONSHIP AGAINST YAML SCHEMA - crash if invalid
        if not validate_relation_predicate(rel_type):
            raise ValueError(
                f"Invalid relationship predicate: {rel_type}. Must be defined in YAML schema."
            )

        # CRITICAL: Verify both nodes belong to the user before deleting relationship
        # First check if both nodes exist and belong to the user
        check_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}}), "
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"RETURN a.id, b.id"
        )
        check_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}
        check_result = self.query(check_query, check_params)

        if not check_result:
            # Nodes don't exist or don't belong to user - return False (not found)
            return False

        # Generate relationship table name using YamlTranslator
        if not self.yaml_translator:
            raise DatabaseError(
                "YamlTranslator required for relationship operations",
                operation="delete_relationship",
                context={
                    "from_table": from_table,
                    "to_table": to_table,
                    "rel_type": rel_type,
                },
            )

        relationship_table_name = self.yaml_translator.relationship_table_name(
            source=from_table,
            predicate=rel_type,
            target=to_table,
            directed=True,  # Direction affects semantics but not table naming for now
        )

        # First check if the relationship exists
        check_rel_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
            f"-[r:{relationship_table_name}]->"
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"RETURN r"
        )
        check_rel_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

        # Check if relationship exists
        relationship_exists = self.query(check_rel_query, check_rel_params)
        if not relationship_exists:
            # Relationship doesn't exist - return False
            return False

        # Delete the specific relationship (we know it exists)
        delete_query = (
            f"MATCH (a:{from_table} {{id: $from_id, user_id: $user_id}})"
            f"-[r:{relationship_table_name}]->"
            f"(b:{to_table} {{id: $to_id, user_id: $user_id}}) "
            f"DELETE r"
        )
        delete_params = {"from_id": from_id, "to_id": to_id, "user_id": user_id}

        # Execute deletion
        self.conn.execute(delete_query, parameters=delete_params)

        # If we get here, deletion succeeded
        return True

    except Exception as e:
        if isinstance(e, ValueError):
            # Re-raise validation errors as-is
            raise
        raise DatabaseError(
            f"Failed to delete relationship {rel_type}",
            operation="delete_relationship",
            context={
                "from_table": from_table,
                "to_table": to_table,
                "rel_type": rel_type,
                "from_id": from_id,
                "to_id": to_id,
            },
            original_error=e,
        ) from e

get_nodes(user_id, node_type=None, filters=None, limit=50, offset=0)

Get multiple nodes with filtering and pagination.

Parameters:

Name Type Description Default
user_id str

User ID for ownership verification.

required
node_type str | None

Optional node type filter (e.g., "task", "note").

None
filters dict[str, Any] | None

Optional field filters (e.g., {"status": "open"}).

None
limit int

Maximum number of nodes to return.

50
offset int

Number of nodes to skip for pagination.

0

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of node data from Kuzu.

Raises:

Type Description
DatabaseError

If node retrieval fails.

Source code in src/memg_core/core/interfaces/kuzu.py
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
def get_nodes(
    self,
    user_id: str,
    node_type: str | None = None,
    filters: dict[str, Any] | None = None,
    limit: int = 50,
    offset: int = 0,
) -> list[dict[str, Any]]:
    """Get multiple nodes with filtering and pagination.

    Args:
        user_id: User ID for ownership verification.
        node_type: Optional node type filter (e.g., "task", "note").
        filters: Optional field filters (e.g., {"status": "open"}).
        limit: Maximum number of nodes to return.
        offset: Number of nodes to skip for pagination.

    Returns:
        list[dict[str, Any]]: List of node data from Kuzu.

    Raises:
        DatabaseError: If node retrieval fails.
    """
    try:
        filters = filters or {}

        # Build MATCH clause
        if node_type:
            match_clause = f"MATCH (n:{node_type} {{user_id: $user_id"
        else:
            match_clause = "MATCH (n {user_id: $user_id"

        # Add field filters
        params = {"user_id": user_id, "limit": limit, "offset": offset}
        for field_name, field_value in filters.items():
            param_name = f"filter_{field_name}"
            match_clause += f", {field_name}: ${param_name}"
            params[param_name] = field_value

        match_clause += "})"

        # Build complete query
        cypher_query = f"""
        {match_clause}
        RETURN n.id as id,
               n.user_id as user_id,
               n.memory_type as memory_type,
               n.created_at as created_at,
               n.updated_at as updated_at,
               n as node
        ORDER BY n.created_at DESC
        SKIP $offset
        LIMIT $limit
        """

        return self.query(cypher_query, params)

    except Exception as e:
        raise DatabaseError(
            "Failed to get nodes from Kuzu",
            operation="get_nodes",
            context={
                "user_id": user_id,
                "node_type": node_type,
                "filters": filters,
                "limit": limit,
                "offset": offset,
            },
            original_error=e,
        ) from e

neighbors(node_label, node_uuid, user_id, rel_types=None, direction='any', limit=10, neighbor_label=None)

Fetch neighbors of a node by UUID only.

Parameters:

Name Type Description Default
node_label str

Node type/table name (e.g., "Memory", "bug") - NOT a UUID.

required
node_uuid str

UUID of the specific node to find neighbors for.

required
user_id str

User ID for isolation - only return neighbors belonging to this user.

required
rel_types list[str] | None

List of relationship types to filter by.

None
direction str

"in", "out", or "any" for relationship direction.

'any'
limit int

Maximum number of neighbors to return.

10
neighbor_label str | None

Type of neighbor nodes to return.

None

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of neighbor nodes with relationship info.

Raises:

Type Description
ValueError

If node_label is a UUID or node_uuid is not a UUID.

DatabaseError

If neighbor query fails.

Source code in src/memg_core/core/interfaces/kuzu.py
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def neighbors(
    self,
    node_label: str,
    node_uuid: str,
    user_id: str,
    rel_types: list[str] | None = None,
    direction: str = "any",
    limit: int = 10,
    neighbor_label: str | None = None,
) -> list[dict[str, Any]]:
    """Fetch neighbors of a node by UUID only.

    Args:
        node_label: Node type/table name (e.g., "Memory", "bug") - NOT a UUID.
        node_uuid: UUID of the specific node to find neighbors for.
        user_id: User ID for isolation - only return neighbors belonging to this user.
        rel_types: List of relationship types to filter by.
        direction: "in", "out", or "any" for relationship direction.
        limit: Maximum number of neighbors to return.
        neighbor_label: Type of neighbor nodes to return.

    Returns:
        list[dict[str, Any]]: List of neighbor nodes with relationship info.

    Raises:
        ValueError: If node_label is a UUID or node_uuid is not a UUID.
        DatabaseError: If neighbor query fails.
    """
    # Validate parameters to prevent common bugs
    if self._is_uuid(node_label):
        raise ValueError(
            f"node_label must be a node type (e.g., 'Memory', 'bug'), not UUID: {node_label}. "
            f"UUIDs should be passed as node_uuid parameter."
        )

    if not self._is_uuid(node_uuid):
        raise ValueError(f"node_uuid must be a valid UUID format, got: {node_uuid}")

    try:
        # Use YamlTranslator to expand predicates to concrete relationship labels
        if not self.yaml_translator:
            raise DatabaseError(
                "YamlTranslator required for neighbor operations",
                operation="neighbors",
                context={"node_label": node_label, "rel_types": rel_types},
            )

        # Get concrete relationship labels for this source and predicates
        if rel_types:
            relationship_labels = self.yaml_translator.get_labels_for_predicates(
                source_type=node_label,
                predicates=rel_types,
                neighbor_label=neighbor_label,
            )
            if not relationship_labels:
                # No matching relationships found - return empty
                return []

            # Create relationship pattern with specific labels
            rel_filter = "|".join(relationship_labels)
            rel_part = f":{rel_filter}"
        else:
            # No filtering - match all relationships
            rel_part = ""

        # CRITICAL: User isolation - both source node and neighbors must belong to user
        node_condition = f"a:{node_label} {{id: $node_uuid, user_id: $user_id}}"
        neighbor = f":{neighbor_label}" if neighbor_label else ""
        neighbor_condition = f"n{neighbor} {{user_id: $user_id}}"

        # Build direction-aware pattern
        if direction == "out":
            pattern = f"({node_condition})-[r{rel_part}]->({neighbor_condition})"
        elif direction == "in":
            pattern = f"({node_condition})<-[r{rel_part}]-({neighbor_condition})"
        else:
            pattern = f"({node_condition})-[r{rel_part}]-({neighbor_condition})"

        # Return neighbors only if they belong to the same user
        cypher = f"""
        MATCH {pattern}
        RETURN DISTINCT n.id as id,
                        n.user_id as user_id,
                        n.memory_type as memory_type,
                        n.created_at as created_at,
                        label(r) as rel_type,
                        n as node
        LIMIT $limit
        """
        params = {"node_uuid": node_uuid, "user_id": user_id, "limit": limit}
        return self.query(cypher, params)
    except Exception as e:
        raise DatabaseError(
            "Failed to fetch neighbors",
            operation="neighbors",
            context={
                "node_label": node_label,
                "node_uuid": node_uuid,
                "user_id": user_id,
                "rel_types": rel_types,
                "direction": direction,
            },
            original_error=e,
        ) from e

query(cypher, params=None)

Execute Cypher query and return results.

Parameters:

Name Type Description Default
cypher str

Cypher query string.

required
params dict[str, Any] | None

Query parameters.

None

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: Query results.

Raises:

Type Description
DatabaseError

If query execution fails.

Source code in src/memg_core/core/interfaces/kuzu.py
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
def query(self, cypher: str, params: dict[str, Any] | None = None) -> list[dict[str, Any]]:
    """Execute Cypher query and return results.

    Args:
        cypher: Cypher query string.
        params: Query parameters.

    Returns:
        list[dict[str, Any]]: Query results.

    Raises:
        DatabaseError: If query execution fails.
    """
    try:
        qr = self.conn.execute(cypher, parameters=params or {})
        return self._extract_query_results(qr)
    except Exception as e:
        raise DatabaseError(
            "Failed to execute Kuzu query",
            operation="query",
            context={"cypher": cypher, "params": params},
            original_error=e,
        ) from e

update_node(table, node_uuid, properties, user_id)

Update a node in the graph - pure CRUD operation.

Parameters:

Name Type Description Default
table str

Node table name.

required
node_uuid str

UUID of the node to update.

required
properties dict[str, Any]

Node properties to update.

required
user_id str

User ID for ownership verification.

required

Returns:

Name Type Description
bool bool

True if update succeeded, False if node not found.

Raises:

Type Description
DatabaseError

If node update fails due to system error.

Source code in src/memg_core/core/interfaces/kuzu.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def update_node(
    self, table: str, node_uuid: str, properties: dict[str, Any], user_id: str
) -> bool:
    """Update a node in the graph - pure CRUD operation.

    Args:
        table: Node table name.
        node_uuid: UUID of the node to update.
        properties: Node properties to update.
        user_id: User ID for ownership verification.

    Returns:
        bool: True if update succeeded, False if node not found.

    Raises:
        DatabaseError: If node update fails due to system error.
    """
    try:
        # CRITICAL: Check if node exists AND belongs to user
        check_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) RETURN n.id as id"
        check_result = self.query(check_query, {"uuid": node_uuid, "user_id": user_id})

        if not check_result:
            # Node doesn't exist for this user
            return False

        # Build SET clause for properties
        set_clauses = []
        params = {"uuid": node_uuid, "user_id": user_id}

        for key, value in properties.items():
            # Skip system fields that shouldn't be updated via this method
            if key in ("id", "user_id"):
                continue

            param_name = f"prop_{key}"
            set_clauses.append(f"n.{key} = ${param_name}")
            params[param_name] = value

        if not set_clauses:
            # No properties to update (all were system fields)
            return True

        # Execute update query
        set_clause = ", ".join(set_clauses)
        update_query = f"MATCH (n:{table} {{id: $uuid, user_id: $user_id}}) SET {set_clause}"
        self.conn.execute(update_query, parameters=params)

        return True

    except Exception as e:
        raise DatabaseError(
            f"Failed to update node in {table}",
            operation="update_node",
            context={
                "table": table,
                "node_uuid": node_uuid,
                "properties": properties,
                "user_id": user_id,
            },
            original_error=e,
        ) from e

QdrantInterface

Pure CRUD wrapper around QdrantClient - NO DDL operations.

Attributes:

Name Type Description
client

Pre-initialized QdrantClient.

collection_name

Name of the Qdrant collection.

Source code in src/memg_core/core/interfaces/qdrant.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
class QdrantInterface:
    """Pure CRUD wrapper around QdrantClient - NO DDL operations.

    Attributes:
        client: Pre-initialized QdrantClient.
        collection_name: Name of the Qdrant collection.
    """

    def __init__(self, client: QdrantClient, collection_name: str):
        """Initialize with pre-created client and collection.

        Args:
            client: Pre-initialized QdrantClient from DatabaseClients.
            collection_name: Name of pre-created collection.
        """
        self.client = client
        self.collection_name = collection_name

    def add_point(
        self,
        vector: list[float],
        payload: dict[str, Any],
        point_id: str | None = None,
        collection: str | None = None,
    ) -> tuple[bool, str]:
        """Add a single point to collection - pure CRUD operation.

        Args:
            vector: Embedding vector.
            payload: Point payload data.
            point_id: Optional point ID (auto-generated if None).
            collection: Optional collection name override.

        Returns:
            tuple[bool, str]: (success, point_id) where success indicates if operation succeeded.

        Raises:
            DatabaseError: If point addition fails.
        """
        try:
            collection = collection or self.collection_name

            if point_id is None:
                point_id = str(uuid.uuid4())

            # Create PointStruct with VectorStruct
            point = PointStruct(id=point_id, vector=vector, payload=payload)
            result = self.client.upsert(collection_name=collection, points=[point])

            # print("Qdrant result", result)

            # Determine success from returned UpdateResult status
            success = True
            status = getattr(result, "status", None)
            if status is not None:
                status_str = (
                    getattr(status, "value", None) or getattr(status, "name", None) or str(status)
                )
                status_str = str(status_str).lower()
                success = status_str in ("acknowledged", "completed")

            return success, point_id

        except Exception as e:
            raise DatabaseError(
                "Qdrant add_point error",
                operation="add_point",
                original_error=e,
            ) from e

    def search_points(
        self,
        vector: list[float],
        limit: int = 5,
        collection: str | None = None,
        filters: dict[str, Any] | None = None,
        score_threshold: float | None = None,
    ) -> list[dict[str, Any]]:
        """Search for similar points with mandatory user isolation - pure CRUD operation.

        Args:
            vector: Query embedding vector.
            limit: Maximum number of results.
            collection: Optional collection name override.
            filters: Search filters (must include user_id for security).
            score_threshold: Minimum similarity score threshold (0.0-1.0).

        Returns:
            list[dict[str, Any]]: List of search results with id, score, and payload.

        Raises:
            DatabaseError: If search fails or user_id is missing from filters.
        """
        try:
            # CRITICAL SECURITY: Validate user_id is present in filters
            if not filters or "user_id" not in filters:
                raise DatabaseError(
                    "user_id is mandatory in filters for data isolation",
                    operation="search_points_validation",
                    context={"filters": filters},
                )

            user_id = filters["user_id"]
            if not user_id or not isinstance(user_id, str) or not user_id.strip():
                raise DatabaseError(
                    "user_id must be a non-empty string",
                    operation="search_points_validation",
                    context={"user_id": user_id},
                )

            collection = collection or self.collection_name

            # Build query filter
            query_filter = None
            filter_conditions = []

            # Add user_id filter - flat payload structure (always required)
            filter_conditions.append(FieldCondition(key="user_id", match=MatchValue(value=user_id)))

            # Add additional filters (skip user_id since it's already added)
            for key, value in filters.items():
                if key == "user_id" or value is None:
                    continue
                # Handle range filters
                if isinstance(value, dict):
                    range_kwargs = {}
                    for bound_key in ("gt", "gte", "lt", "lte"):
                        if bound_key in value and value[bound_key] is not None:
                            range_kwargs[bound_key] = value[bound_key]
                    if range_kwargs:
                        filter_conditions.append(
                            FieldCondition(key=key, range=Range(**range_kwargs))
                        )
                        continue
                # Handle list values
                if isinstance(value, list):
                    filter_conditions.append(FieldCondition(key=key, match=MatchAny(any=value)))
                elif not isinstance(value, dict):  # Skip dict values that weren't handled as ranges
                    filter_conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))

            if filter_conditions:
                # Use type ignore for the Filter argument type mismatch
                query_filter = Filter(must=filter_conditions)  # type: ignore

            # Search using modern API
            results = self.client.query_points(
                collection_name=collection,
                query=vector,
                limit=limit,
                query_filter=query_filter,
                score_threshold=score_threshold,
            ).points

            # Convert to simplified results (score_threshold already applied by Qdrant)
            return [
                {
                    "id": str(result.id),
                    "score": result.score,
                    "payload": result.payload,
                }
                for result in results
            ]

        except Exception as e:
            raise DatabaseError(
                "Qdrant search_points error",
                operation="search_points",
                original_error=e,
            ) from e

    def get_point(self, point_id: str, collection: str | None = None) -> dict[str, Any] | None:
        """Get a single point by ID - pure CRUD operation.

        Args:
            point_id: ID of the point to retrieve.
            collection: Optional collection name override.

        Returns:
            dict[str, Any] | None: Point data including id, vector, and payload, or None if not found.

        Raises:
            DatabaseError: If retrieval fails.
        """
        try:
            collection = collection or self.collection_name

            result = self.client.retrieve(
                collection_name=collection,
                ids=[point_id],
            )

            if result:
                point = result[0]
                return {
                    "id": str(point.id),
                    "vector": point.vector,
                    "payload": point.payload,
                }
            return None
        except Exception as e:
            raise DatabaseError(
                "Qdrant get_point error",
                operation="get_point",
                original_error=e,
            ) from e

    def delete_points(
        self, point_ids: list[str], user_id: str, collection: str | None = None
    ) -> bool:
        """Delete points by IDs with user ownership verification.

        Args:
            point_ids: List of point IDs to delete.
            user_id: User ID for ownership verification.
            collection: Optional collection name override.

        Returns:
            bool: True if deletion succeeded.

        Raises:
            DatabaseError: If points not found or don't belong to user, or deletion fails.
        """
        try:
            collection = collection or self.collection_name

            # CRITICAL: Verify user ownership before deletion
            # First, verify all points belong to the user
            for point_id in point_ids:
                points = self.client.retrieve(
                    collection_name=collection, ids=[point_id], with_payload=True
                )

                if not points or not (
                    points[0].payload and points[0].payload.get("user_id") == user_id
                ):
                    raise DatabaseError(
                        f"Point {point_id} not found or doesn't belong to user {user_id}",
                        operation="delete_points",
                        context={"point_id": point_id, "user_id": user_id},
                    )

            # If all points belong to user, proceed with deletion
            self.client.delete(
                collection_name=collection,
                points_selector=PointIdsList(points=[str(pid) for pid in point_ids]),
            )
            return True
        except Exception as e:
            raise DatabaseError(
                "Qdrant delete_points error",
                operation="delete_points",
                original_error=e,
            ) from e

    def get_collection_info(self, collection: str | None = None) -> dict[str, Any]:
        """Get collection information - pure read operation.

        Args:
            collection: Optional collection name override.

        Returns:
            dict[str, Any]: Collection information including existence, vector count, point count, and config.

        Raises:
            DatabaseError: If collection info retrieval fails.
        """
        try:
            collection = collection or self.collection_name

            info = self.client.get_collection(collection_name=collection)
            # Handle different types of vector params
            vector_size = None
            vector_distance = None

            vectors_param = info.config.params.vectors
            if vectors_param is not None:
                if hasattr(vectors_param, "size"):
                    vector_size = vectors_param.size  # type: ignore
                    vector_distance = vectors_param.distance  # type: ignore
                elif isinstance(vectors_param, dict):
                    # For multi-vector collections, use the first vector's params
                    if vectors_param:
                        vector_values = list(vectors_param.values())
                        if vector_values:
                            first_vector = vector_values[0]
                            vector_size = first_vector.size
                            vector_distance = first_vector.distance

            return {
                "exists": True,
                "vectors_count": info.vectors_count,
                "points_count": info.points_count,
                "config": {
                    "vector_size": vector_size,
                    "distance": vector_distance,
                },
            }
        except Exception as e:
            raise DatabaseError(
                "Qdrant get_collection_info error",
                operation="get_collection_info",
                original_error=e,
            ) from e

__init__(client, collection_name)

Initialize with pre-created client and collection.

Parameters:

Name Type Description Default
client QdrantClient

Pre-initialized QdrantClient from DatabaseClients.

required
collection_name str

Name of pre-created collection.

required
Source code in src/memg_core/core/interfaces/qdrant.py
28
29
30
31
32
33
34
35
36
def __init__(self, client: QdrantClient, collection_name: str):
    """Initialize with pre-created client and collection.

    Args:
        client: Pre-initialized QdrantClient from DatabaseClients.
        collection_name: Name of pre-created collection.
    """
    self.client = client
    self.collection_name = collection_name

add_point(vector, payload, point_id=None, collection=None)

Add a single point to collection - pure CRUD operation.

Parameters:

Name Type Description Default
vector list[float]

Embedding vector.

required
payload dict[str, Any]

Point payload data.

required
point_id str | None

Optional point ID (auto-generated if None).

None
collection str | None

Optional collection name override.

None

Returns:

Type Description
tuple[bool, str]

tuple[bool, str]: (success, point_id) where success indicates if operation succeeded.

Raises:

Type Description
DatabaseError

If point addition fails.

Source code in src/memg_core/core/interfaces/qdrant.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def add_point(
    self,
    vector: list[float],
    payload: dict[str, Any],
    point_id: str | None = None,
    collection: str | None = None,
) -> tuple[bool, str]:
    """Add a single point to collection - pure CRUD operation.

    Args:
        vector: Embedding vector.
        payload: Point payload data.
        point_id: Optional point ID (auto-generated if None).
        collection: Optional collection name override.

    Returns:
        tuple[bool, str]: (success, point_id) where success indicates if operation succeeded.

    Raises:
        DatabaseError: If point addition fails.
    """
    try:
        collection = collection or self.collection_name

        if point_id is None:
            point_id = str(uuid.uuid4())

        # Create PointStruct with VectorStruct
        point = PointStruct(id=point_id, vector=vector, payload=payload)
        result = self.client.upsert(collection_name=collection, points=[point])

        # print("Qdrant result", result)

        # Determine success from returned UpdateResult status
        success = True
        status = getattr(result, "status", None)
        if status is not None:
            status_str = (
                getattr(status, "value", None) or getattr(status, "name", None) or str(status)
            )
            status_str = str(status_str).lower()
            success = status_str in ("acknowledged", "completed")

        return success, point_id

    except Exception as e:
        raise DatabaseError(
            "Qdrant add_point error",
            operation="add_point",
            original_error=e,
        ) from e

delete_points(point_ids, user_id, collection=None)

Delete points by IDs with user ownership verification.

Parameters:

Name Type Description Default
point_ids list[str]

List of point IDs to delete.

required
user_id str

User ID for ownership verification.

required
collection str | None

Optional collection name override.

None

Returns:

Name Type Description
bool bool

True if deletion succeeded.

Raises:

Type Description
DatabaseError

If points not found or don't belong to user, or deletion fails.

Source code in src/memg_core/core/interfaces/qdrant.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def delete_points(
    self, point_ids: list[str], user_id: str, collection: str | None = None
) -> bool:
    """Delete points by IDs with user ownership verification.

    Args:
        point_ids: List of point IDs to delete.
        user_id: User ID for ownership verification.
        collection: Optional collection name override.

    Returns:
        bool: True if deletion succeeded.

    Raises:
        DatabaseError: If points not found or don't belong to user, or deletion fails.
    """
    try:
        collection = collection or self.collection_name

        # CRITICAL: Verify user ownership before deletion
        # First, verify all points belong to the user
        for point_id in point_ids:
            points = self.client.retrieve(
                collection_name=collection, ids=[point_id], with_payload=True
            )

            if not points or not (
                points[0].payload and points[0].payload.get("user_id") == user_id
            ):
                raise DatabaseError(
                    f"Point {point_id} not found or doesn't belong to user {user_id}",
                    operation="delete_points",
                    context={"point_id": point_id, "user_id": user_id},
                )

        # If all points belong to user, proceed with deletion
        self.client.delete(
            collection_name=collection,
            points_selector=PointIdsList(points=[str(pid) for pid in point_ids]),
        )
        return True
    except Exception as e:
        raise DatabaseError(
            "Qdrant delete_points error",
            operation="delete_points",
            original_error=e,
        ) from e

get_collection_info(collection=None)

Get collection information - pure read operation.

Parameters:

Name Type Description Default
collection str | None

Optional collection name override.

None

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Collection information including existence, vector count, point count, and config.

Raises:

Type Description
DatabaseError

If collection info retrieval fails.

Source code in src/memg_core/core/interfaces/qdrant.py
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
def get_collection_info(self, collection: str | None = None) -> dict[str, Any]:
    """Get collection information - pure read operation.

    Args:
        collection: Optional collection name override.

    Returns:
        dict[str, Any]: Collection information including existence, vector count, point count, and config.

    Raises:
        DatabaseError: If collection info retrieval fails.
    """
    try:
        collection = collection or self.collection_name

        info = self.client.get_collection(collection_name=collection)
        # Handle different types of vector params
        vector_size = None
        vector_distance = None

        vectors_param = info.config.params.vectors
        if vectors_param is not None:
            if hasattr(vectors_param, "size"):
                vector_size = vectors_param.size  # type: ignore
                vector_distance = vectors_param.distance  # type: ignore
            elif isinstance(vectors_param, dict):
                # For multi-vector collections, use the first vector's params
                if vectors_param:
                    vector_values = list(vectors_param.values())
                    if vector_values:
                        first_vector = vector_values[0]
                        vector_size = first_vector.size
                        vector_distance = first_vector.distance

        return {
            "exists": True,
            "vectors_count": info.vectors_count,
            "points_count": info.points_count,
            "config": {
                "vector_size": vector_size,
                "distance": vector_distance,
            },
        }
    except Exception as e:
        raise DatabaseError(
            "Qdrant get_collection_info error",
            operation="get_collection_info",
            original_error=e,
        ) from e

get_point(point_id, collection=None)

Get a single point by ID - pure CRUD operation.

Parameters:

Name Type Description Default
point_id str

ID of the point to retrieve.

required
collection str | None

Optional collection name override.

None

Returns:

Type Description
dict[str, Any] | None

dict[str, Any] | None: Point data including id, vector, and payload, or None if not found.

Raises:

Type Description
DatabaseError

If retrieval fails.

Source code in src/memg_core/core/interfaces/qdrant.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def get_point(self, point_id: str, collection: str | None = None) -> dict[str, Any] | None:
    """Get a single point by ID - pure CRUD operation.

    Args:
        point_id: ID of the point to retrieve.
        collection: Optional collection name override.

    Returns:
        dict[str, Any] | None: Point data including id, vector, and payload, or None if not found.

    Raises:
        DatabaseError: If retrieval fails.
    """
    try:
        collection = collection or self.collection_name

        result = self.client.retrieve(
            collection_name=collection,
            ids=[point_id],
        )

        if result:
            point = result[0]
            return {
                "id": str(point.id),
                "vector": point.vector,
                "payload": point.payload,
            }
        return None
    except Exception as e:
        raise DatabaseError(
            "Qdrant get_point error",
            operation="get_point",
            original_error=e,
        ) from e

search_points(vector, limit=5, collection=None, filters=None, score_threshold=None)

Search for similar points with mandatory user isolation - pure CRUD operation.

Parameters:

Name Type Description Default
vector list[float]

Query embedding vector.

required
limit int

Maximum number of results.

5
collection str | None

Optional collection name override.

None
filters dict[str, Any] | None

Search filters (must include user_id for security).

None
score_threshold float | None

Minimum similarity score threshold (0.0-1.0).

None

Returns:

Type Description
list[dict[str, Any]]

list[dict[str, Any]]: List of search results with id, score, and payload.

Raises:

Type Description
DatabaseError

If search fails or user_id is missing from filters.

Source code in src/memg_core/core/interfaces/qdrant.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def search_points(
    self,
    vector: list[float],
    limit: int = 5,
    collection: str | None = None,
    filters: dict[str, Any] | None = None,
    score_threshold: float | None = None,
) -> list[dict[str, Any]]:
    """Search for similar points with mandatory user isolation - pure CRUD operation.

    Args:
        vector: Query embedding vector.
        limit: Maximum number of results.
        collection: Optional collection name override.
        filters: Search filters (must include user_id for security).
        score_threshold: Minimum similarity score threshold (0.0-1.0).

    Returns:
        list[dict[str, Any]]: List of search results with id, score, and payload.

    Raises:
        DatabaseError: If search fails or user_id is missing from filters.
    """
    try:
        # CRITICAL SECURITY: Validate user_id is present in filters
        if not filters or "user_id" not in filters:
            raise DatabaseError(
                "user_id is mandatory in filters for data isolation",
                operation="search_points_validation",
                context={"filters": filters},
            )

        user_id = filters["user_id"]
        if not user_id or not isinstance(user_id, str) or not user_id.strip():
            raise DatabaseError(
                "user_id must be a non-empty string",
                operation="search_points_validation",
                context={"user_id": user_id},
            )

        collection = collection or self.collection_name

        # Build query filter
        query_filter = None
        filter_conditions = []

        # Add user_id filter - flat payload structure (always required)
        filter_conditions.append(FieldCondition(key="user_id", match=MatchValue(value=user_id)))

        # Add additional filters (skip user_id since it's already added)
        for key, value in filters.items():
            if key == "user_id" or value is None:
                continue
            # Handle range filters
            if isinstance(value, dict):
                range_kwargs = {}
                for bound_key in ("gt", "gte", "lt", "lte"):
                    if bound_key in value and value[bound_key] is not None:
                        range_kwargs[bound_key] = value[bound_key]
                if range_kwargs:
                    filter_conditions.append(
                        FieldCondition(key=key, range=Range(**range_kwargs))
                    )
                    continue
            # Handle list values
            if isinstance(value, list):
                filter_conditions.append(FieldCondition(key=key, match=MatchAny(any=value)))
            elif not isinstance(value, dict):  # Skip dict values that weren't handled as ranges
                filter_conditions.append(FieldCondition(key=key, match=MatchValue(value=value)))

        if filter_conditions:
            # Use type ignore for the Filter argument type mismatch
            query_filter = Filter(must=filter_conditions)  # type: ignore

        # Search using modern API
        results = self.client.query_points(
            collection_name=collection,
            query=vector,
            limit=limit,
            query_filter=query_filter,
            score_threshold=score_threshold,
        ).points

        # Convert to simplified results (score_threshold already applied by Qdrant)
        return [
            {
                "id": str(result.id),
                "score": result.score,
                "payload": result.payload,
            }
            for result in results
        ]

    except Exception as e:
        raise DatabaseError(
            "Qdrant search_points error",
            operation="search_points",
            original_error=e,
        ) from e

YamlTranslator

Translates YAML schema definitions to Pydantic models for strict validation.

Attributes:

Name Type Description
yaml_path

Path to YAML schema file.

_schema dict[str, Any] | None

Cached schema dictionary.

Source code in src/memg_core/core/yaml_translator.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
class YamlTranslator:
    """Translates YAML schema definitions to Pydantic models for strict validation.

    Attributes:
        yaml_path: Path to YAML schema file.
        _schema: Cached schema dictionary.
    """

    def __init__(self, yaml_path: str | None = None) -> None:
        """Initialize YamlTranslator with YAML schema path.

        Args:
            yaml_path: Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

        Raises:
            YamlTranslatorError: If YAML path not provided or TypeRegistry initialization fails.
        """
        # Require explicit YAML path - no silent defaults
        if yaml_path:
            self.yaml_path = yaml_path
        else:
            env_path = os.getenv("MEMG_YAML_SCHEMA")
            if not env_path:
                raise YamlTranslatorError(
                    "YAML schema path required. Set MEMG_YAML_SCHEMA environment variable "
                    "or provide yaml_path parameter. No defaults allowed."
                )
            self.yaml_path = env_path

        self._schema: dict[str, Any] | None = None
        # NO model cache - TypeRegistry handles all caching

        # Initialize TypeRegistry from YAML - crash early if invalid
        try:
            initialize_types_from_yaml(self.yaml_path)
        except Exception as e:
            raise YamlTranslatorError(f"Failed to initialize TypeRegistry from YAML: {e}") from e

    @property
    def schema(self) -> dict[str, Any]:
        """Get the loaded YAML schema, loading it if necessary."""
        if self._schema is not None:
            return self._schema

        # Load schema from the required path - no fallbacks
        if not self.yaml_path:
            raise YamlTranslatorError(
                "YAML schema path not set. This should not happen after __init__."
            )

        self._schema = self._load_schema()
        return self._schema

    def _load_schema(self) -> dict[str, Any]:
        """Load schema from the current yaml_path."""
        if not self.yaml_path:
            raise YamlTranslatorError("YAML path is None")
        path = Path(self.yaml_path)
        if not path.exists():
            raise YamlTranslatorError(f"YAML schema not found at {path}")
        try:
            with path.open(encoding="utf-8") as f:
                data = yaml.safe_load(f)
            if not data:
                raise YamlTranslatorError("Empty YAML schema")
            if not isinstance(data, dict):
                raise YamlTranslatorError("YAML schema root must be a mapping")
            return data
        except yaml.YAMLError as e:
            raise YamlTranslatorError(f"Invalid YAML syntax: {e}") from e

    def _entities_map(self) -> dict[str, dict[str, Any]]:
        sch = self.schema
        ents = sch.get("entities")
        if not ents:
            return {}
        if isinstance(ents, dict):
            # Normalize keys to lower
            return {str(k).lower(): v for k, v in ents.items()}
        # list form
        out: dict[str, dict[str, Any]] = {}
        for item in ents:
            if not isinstance(item, dict):
                continue
            key = (item.get("name") or item.get("type") or "").lower()
            if key:
                out[key] = item
        return out

    def get_entity_types(self) -> list[str]:
        """Get list of available entity types from YAML schema."""
        return list(self._entities_map().keys())

    # ================== RELATIONSHIP PARSING (TARGET-FIRST FORMAT) ==================

    def _get_relations_mapping_for_entity(
        self, entity_name: str
    ) -> dict[str, list[dict[str, Any]]]:
        """Return raw relations mapping for an entity in target-first schema format.

        The expected YAML shape under an entity is:
            relations:
              target_entity_name:
                - name: ...
                  description: ...
                  predicate: PREDICATE_NAME
                  directed: true|false

        Returns an empty dict when no relations are defined.
        """
        entity_spec = self._resolve_entity_with_inheritance(entity_name)
        relations_section = entity_spec.get("relations")
        if not relations_section or not isinstance(relations_section, dict):
            return {}

        # Normalize keys to lower for targets; keep items as-is
        normalized: dict[str, list[dict[str, Any]]] = {}
        for target_name, items in relations_section.items():
            if not isinstance(items, list):
                # Skip invalid shapes silently at this layer; validation is higher-level
                continue
            normalized[str(target_name).lower()] = [i for i in items if isinstance(i, dict)]
        return normalized

    def get_relations_for_source(self, entity_name: str) -> list[dict[str, Any]]:
        """Get normalized relation specs for a source entity in target-first schema.

        Returns list of dicts with keys:
            - source (str)
            - target (str)
            - name (str | None)
            - description (str | None)
            - predicate (str)
            - directed (bool)
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")

        source_l = entity_name.lower()
        relations_map = self._get_relations_mapping_for_entity(source_l)
        if not relations_map:
            return []

        out: list[dict[str, Any]] = []
        for target_l, items in relations_map.items():
            for item in items:
                predicate = item.get("predicate")
                if not predicate or not isinstance(predicate, str):
                    # Skip invalid entries - strict behavior can be added later
                    continue
                directed = bool(item.get("directed", True))
                out.append(
                    {
                        "source": source_l,
                        "target": target_l,
                        "name": item.get("name"),
                        "description": item.get("description"),
                        "predicate": predicate.upper(),
                        "directed": directed,
                    }
                )
        return out

    @staticmethod
    def relationship_table_name(
        source: str,
        predicate: str,
        target: str,
        *,
        directed: bool = True,  # noqa: unused-argument
    ) -> str:
        """Generate relationship table name.

        For now, table name does not encode direction; direction affects creation/query semantics.
        Canonicalization for undirected pairs can be added here later if decided.
        """
        return f"{str(source).upper()}_{str(predicate).upper()}_{str(target).upper()}"

    def get_labels_for_predicates(
        self,
        source_type: str,
        predicates: list[str] | None,
        neighbor_label: str | None = None,
    ) -> list[str]:
        """Expand predicate names to concrete relationship labels for a given source.

        Args:
            source_type: Source entity type name
            predicates: List of predicate names to include (case-insensitive). If None, include all.
            neighbor_label: Optional target entity type filter (case-insensitive)

        Returns:
            List of concrete relationship labels (table names) matching the filter.
        """
        if not source_type:
            raise YamlTranslatorError("Empty source_type")

        preds_u = set(p.upper() for p in predicates) if predicates else None
        neighbor_l = neighbor_label.lower() if neighbor_label else None

        labels: list[str] = []
        for spec in self.get_relations_for_source(source_type):
            if preds_u is not None and spec["predicate"].upper() not in preds_u:
                continue
            if neighbor_l is not None and spec["target"].lower() != neighbor_l:
                continue
            labels.append(
                self.relationship_table_name(
                    source=spec["source"],
                    predicate=spec["predicate"],
                    target=spec["target"],
                    directed=spec["directed"],
                )
            )
        return labels

    def debug_relation_map(self) -> dict[str, dict[str, list[dict[str, Any]]]]:
        """Return a nested relation map for debugging/printing.

        Structure:
        {
          source: {
            target: [ {name, predicate, directed, description} ... ]
          }
        }
        """
        out: dict[str, dict[str, list[dict[str, Any]]]] = {}
        for source in self.get_entity_types():
            specs = self.get_relations_for_source(source)
            if not specs:
                continue
            if source not in out:
                out[source] = {}
            for spec in specs:
                target = spec["target"]
                out[source].setdefault(target, [])
                out[source][target].append(
                    {
                        "name": spec.get("name"),
                        "predicate": spec.get("predicate"),
                        "directed": spec.get("directed", True),
                        "description": spec.get("description"),
                    }
                )
        return out

    def get_anchor_field(self, entity_name: str) -> str:
        """Get the anchor field name for the given entity type from YAML schema.

        Now reads from vector.anchored_to instead of separate anchor field.

        Args:
            entity_name: Name of the entity type.

        Returns:
            str: Anchor field name.

        Raises:
            YamlTranslatorError: If anchor field not found.
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")

        # Get entity spec with inheritance resolution
        entity_spec = self._resolve_entity_with_inheritance(entity_name)

        # Look for vector field with anchored_to
        fields = entity_spec.get("fields", {})
        for _field_name, field_def in fields.items():
            if isinstance(field_def, dict) and field_def.get("type") == "vector":
                anchored_to = field_def.get("anchored_to")
                if anchored_to:
                    return str(anchored_to)

        raise YamlTranslatorError(
            f"Entity '{entity_name}' has no vector field with 'anchored_to' property"
        )

    def _resolve_entity_with_inheritance(self, entity_name: str) -> dict[str, Any]:
        """Resolve entity specification with full inheritance chain."""
        name_l = entity_name.lower()
        emap = self._entities_map()
        spec_raw = emap.get(name_l)
        if not spec_raw:
            raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

        # If no parent, return as-is
        parent_name = spec_raw.get("parent")
        if not parent_name:
            return spec_raw

        # Recursively resolve parent and merge fields
        parent_spec = self._resolve_entity_with_inheritance(parent_name)

        # Merge parent fields with child fields (child overrides parent)
        merged_fields = parent_spec.get("fields", {}).copy()
        merged_fields.update(spec_raw.get("fields", {}))

        # Create merged spec
        merged_spec = spec_raw.copy()
        merged_spec["fields"] = merged_fields

        return merged_spec

    def get_see_also_config(self, entity_name: str) -> dict[str, Any] | None:
        """Get the see_also configuration for the given entity type from YAML schema.

        Returns:
            Dict with keys: enabled, threshold, limit, target_types
            None if see_also is not configured for this entity
        """
        if not entity_name:
            raise YamlTranslatorError("Empty entity name")
        name_l = entity_name.lower()
        emap = self._entities_map()
        spec_raw = emap.get(name_l)
        if not spec_raw:
            raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

        see_also = spec_raw.get("see_also")
        if not see_also or not isinstance(see_also, dict):
            return None

        # Validate required fields
        if not see_also.get("enabled", False):
            return None

        return {
            "enabled": see_also.get("enabled", False),
            "threshold": float(see_also.get("threshold", 0.7)),
            "limit": int(see_also.get("limit", 3)),
            "target_types": list(see_also.get("target_types", [])),
        }

    def build_anchor_text(self, memory) -> str:
        """Build anchor text for embedding from YAML-defined anchor field.

        NO hardcoded field names - reads anchor field from YAML schema.

        Args:
            memory: Memory object containing payload data.

        Returns:
            str: Anchor text for embedding.

        Raises:
            YamlTranslatorError: If anchor field is missing or invalid.
        """
        mem_type = getattr(memory, "memory_type", None)
        if not mem_type:
            raise YamlTranslatorError(
                "Memory object missing 'memory_type' field",
                operation="build_anchor_text",
            )

        # Get anchor field from YAML schema
        anchor_field = self.get_anchor_field(mem_type)

        # Try to get anchor text from the specified field
        anchor_text = None

        # First check if it's a core field on the Memory object
        if hasattr(memory, anchor_field):
            anchor_text = getattr(memory, anchor_field, None)
        # Otherwise check in the payload
        elif hasattr(memory, "payload") and isinstance(memory.payload, dict):
            anchor_text = memory.payload.get(anchor_field)

        if isinstance(anchor_text, str):
            stripped_text = anchor_text.strip()
            if stripped_text:
                return stripped_text

        # Anchor field missing, empty, or invalid
        raise YamlTranslatorError(
            f"Anchor field '{anchor_field}' is missing, empty, or invalid "
            f"for memory type '{mem_type}'",
            operation="build_anchor_text",
            context={
                "memory_type": mem_type,
                "anchor_field": anchor_field,
                "anchor_value": anchor_text,
            },
        )

    def _fields_contract(self, spec: dict[str, Any]) -> tuple[list[str], list[str]]:
        """Extract required and optional fields from entity specification.

        Supports either:
        - fields: {required:[...], optional:[...]} format
        - Individual field definitions with required flags

        Args:
            spec: Entity specification dictionary.

        Returns:
            tuple[list[str], list[str]]: (required_fields, optional_fields)
        """
        # supports either fields: {required:[...], optional:[...]} OR flat dict
        fields = spec.get("fields") or {}
        if "required" in fields or "optional" in fields:
            req = [str(x) for x in fields.get("required", [])]
            opt = [str(x) for x in fields.get("optional", [])]
            return req, opt

        # Resolve all fields including inherited ones
        all_fields = self._resolve_inherited_fields(spec)

        # Parse individual field definitions for required flag
        required_fields = []
        optional_fields = []

        for field_name, field_def in all_fields.items():
            if isinstance(field_def, dict) and field_def.get("required", False):
                # Skip system fields - they're handled by the system
                if not field_def.get("system", False):
                    required_fields.append(field_name)
                else:
                    optional_fields.append(field_name)
            else:
                optional_fields.append(field_name)

        return required_fields, optional_fields

    def _resolve_inherited_fields(self, spec: dict[str, Any]) -> dict[str, Any]:
        """Resolve all fields including inherited ones from parent entities.

        Args:
            spec: Entity specification dictionary.

        Returns:
            dict[str, Any]: Dictionary containing all fields (inherited + current).
        """
        all_fields = {}
        entities_map = self._entities_map()

        # If entity has a parent, resolve parent fields first
        parent_name = spec.get("parent")
        if parent_name:
            parent_spec = entities_map.get(parent_name.lower())
            if parent_spec:
                # Recursively resolve parent fields
                parent_fields = self._resolve_inherited_fields(parent_spec)
                all_fields.update(parent_fields)

        # Add/override with current entity's fields
        current_fields = spec.get("fields") or {}
        all_fields.update(current_fields)

        return all_fields

    def _get_system_fields(self, spec: dict[str, Any]) -> set[str]:
        """Extract system fields from YAML schema (fields marked with system: true).

        Args:
            spec: Entity specification dictionary.

        Returns:
            set[str]: Set of field names that are marked as system fields.
        """
        system_fields = set()
        all_fields = self._resolve_inherited_fields(spec)

        for field_name, field_def in all_fields.items():
            if isinstance(field_def, dict) and field_def.get("system", False):
                system_fields.add(field_name)

        return system_fields

    def _validate_enum_fields(self, memory_type: str, payload: dict[str, Any]) -> None:
        """Validate enum fields against YAML schema choices.

        Args:
            memory_type: Entity type from YAML schema.
            payload: Memory data to validate.

        Raises:
            YamlTranslatorError: If enum field has invalid value.
        """
        emap = self._entities_map()
        spec = emap.get(memory_type.lower())
        if not spec:
            return  # Entity validation happens elsewhere

        # Get field definitions for this entity type
        fields = spec.get("fields", {})

        # Check each field in the payload
        for field_name, field_value in payload.items():
            if field_name in fields:
                field_def = fields[field_name]

                # Check if this is an enum field
                if field_def.get("type") == "enum":
                    choices = field_def.get("choices", [])

                    # Validate the value against choices
                    if field_value is not None and field_value not in choices:
                        raise YamlTranslatorError(
                            f"Invalid {field_name} value '{field_value}'. Valid choices: {choices}",
                            context={
                                "memory_type": memory_type,
                                "field_name": field_name,
                                "invalid_value": field_value,
                                "valid_choices": choices,
                            },
                        )

    def validate_memory_against_yaml(
        self, memory_type: str, payload: dict[str, Any]
    ) -> dict[str, Any]:
        """Validate memory payload against YAML schema and return cleaned payload."""
        if not memory_type:
            raise YamlTranslatorError("memory_type is required")
        if payload is None:
            raise YamlTranslatorError("payload is required")

        # Strict validation - entity type MUST exist in YAML
        emap = self._entities_map()
        spec = emap.get(memory_type.lower())
        if not spec:
            raise YamlTranslatorError(
                f"Unknown entity type '{memory_type}'. All types must be defined in YAML schema.",
                context={
                    "memory_type": memory_type,
                    "available_types": list(emap.keys()),
                },
            )

        req, _opt = self._fields_contract(spec)
        missing = [k for k in req if not payload.get(k)]
        if missing:
            raise YamlTranslatorError(
                f"Missing required fields: {missing}",
                context={"memory_type": memory_type},
            )

        # Validate enum fields against YAML schema choices
        self._validate_enum_fields(memory_type, payload)

        # Validate that all fields are defined in YAML schema
        req, opt = self._fields_contract(spec)
        valid_fields = set(req + opt)
        system_fields = self._get_system_fields(spec)
        invalid_fields = set(payload.keys()) - valid_fields - system_fields
        if invalid_fields:
            raise YamlTranslatorError(
                f"Invalid fields not defined in schema: {sorted(invalid_fields)}",
                context={
                    "memory_type": memory_type,
                    "valid_fields": sorted(valid_fields),
                    "invalid_fields": sorted(invalid_fields),
                },
            )

        # Strip system-reserved fields if present
        cleaned = dict(payload)
        for syskey in system_fields:
            cleaned.pop(syskey, None)
        return cleaned

    def create_memory_from_yaml(self, memory_type: str, payload: dict[str, Any], user_id: str):
        """Create a Memory object from YAML-validated payload."""

        # Get anchor field from YAML schema
        anchor_field = self.get_anchor_field(memory_type)

        # Extract anchor text from payload
        anchor_text = payload.get(anchor_field)
        if not anchor_text or not isinstance(anchor_text, str):
            raise YamlTranslatorError(
                f"Missing or invalid anchor field '{anchor_field}' in payload "
                f"for memory type '{memory_type}'"
            )

        # Validate full payload against YAML schema
        validated_payload = self.validate_memory_against_yaml(memory_type, payload)

        # Construct Memory with YAML-defined payload only
        return Memory(
            memory_type=memory_type,
            payload=validated_payload,
            user_id=user_id,
        )

    def get_entity_model(self, entity_name: str):
        """Get Pydantic model from TypeRegistry - NO REDUNDANCY."""
        return get_entity_model(entity_name)

schema property

Get the loaded YAML schema, loading it if necessary.

__init__(yaml_path=None)

Initialize YamlTranslator with YAML schema path.

Parameters:

Name Type Description Default
yaml_path str | None

Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

None

Raises:

Type Description
YamlTranslatorError

If YAML path not provided or TypeRegistry initialization fails.

Source code in src/memg_core/core/yaml_translator.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(self, yaml_path: str | None = None) -> None:
    """Initialize YamlTranslator with YAML schema path.

    Args:
        yaml_path: Path to YAML schema file. If None, uses MEMG_YAML_SCHEMA env var.

    Raises:
        YamlTranslatorError: If YAML path not provided or TypeRegistry initialization fails.
    """
    # Require explicit YAML path - no silent defaults
    if yaml_path:
        self.yaml_path = yaml_path
    else:
        env_path = os.getenv("MEMG_YAML_SCHEMA")
        if not env_path:
            raise YamlTranslatorError(
                "YAML schema path required. Set MEMG_YAML_SCHEMA environment variable "
                "or provide yaml_path parameter. No defaults allowed."
            )
        self.yaml_path = env_path

    self._schema: dict[str, Any] | None = None
    # NO model cache - TypeRegistry handles all caching

    # Initialize TypeRegistry from YAML - crash early if invalid
    try:
        initialize_types_from_yaml(self.yaml_path)
    except Exception as e:
        raise YamlTranslatorError(f"Failed to initialize TypeRegistry from YAML: {e}") from e

build_anchor_text(memory)

Build anchor text for embedding from YAML-defined anchor field.

NO hardcoded field names - reads anchor field from YAML schema.

Parameters:

Name Type Description Default
memory

Memory object containing payload data.

required

Returns:

Name Type Description
str str

Anchor text for embedding.

Raises:

Type Description
YamlTranslatorError

If anchor field is missing or invalid.

Source code in src/memg_core/core/yaml_translator.py
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
def build_anchor_text(self, memory) -> str:
    """Build anchor text for embedding from YAML-defined anchor field.

    NO hardcoded field names - reads anchor field from YAML schema.

    Args:
        memory: Memory object containing payload data.

    Returns:
        str: Anchor text for embedding.

    Raises:
        YamlTranslatorError: If anchor field is missing or invalid.
    """
    mem_type = getattr(memory, "memory_type", None)
    if not mem_type:
        raise YamlTranslatorError(
            "Memory object missing 'memory_type' field",
            operation="build_anchor_text",
        )

    # Get anchor field from YAML schema
    anchor_field = self.get_anchor_field(mem_type)

    # Try to get anchor text from the specified field
    anchor_text = None

    # First check if it's a core field on the Memory object
    if hasattr(memory, anchor_field):
        anchor_text = getattr(memory, anchor_field, None)
    # Otherwise check in the payload
    elif hasattr(memory, "payload") and isinstance(memory.payload, dict):
        anchor_text = memory.payload.get(anchor_field)

    if isinstance(anchor_text, str):
        stripped_text = anchor_text.strip()
        if stripped_text:
            return stripped_text

    # Anchor field missing, empty, or invalid
    raise YamlTranslatorError(
        f"Anchor field '{anchor_field}' is missing, empty, or invalid "
        f"for memory type '{mem_type}'",
        operation="build_anchor_text",
        context={
            "memory_type": mem_type,
            "anchor_field": anchor_field,
            "anchor_value": anchor_text,
        },
    )

create_memory_from_yaml(memory_type, payload, user_id)

Create a Memory object from YAML-validated payload.

Source code in src/memg_core/core/yaml_translator.py
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
def create_memory_from_yaml(self, memory_type: str, payload: dict[str, Any], user_id: str):
    """Create a Memory object from YAML-validated payload."""

    # Get anchor field from YAML schema
    anchor_field = self.get_anchor_field(memory_type)

    # Extract anchor text from payload
    anchor_text = payload.get(anchor_field)
    if not anchor_text or not isinstance(anchor_text, str):
        raise YamlTranslatorError(
            f"Missing or invalid anchor field '{anchor_field}' in payload "
            f"for memory type '{memory_type}'"
        )

    # Validate full payload against YAML schema
    validated_payload = self.validate_memory_against_yaml(memory_type, payload)

    # Construct Memory with YAML-defined payload only
    return Memory(
        memory_type=memory_type,
        payload=validated_payload,
        user_id=user_id,
    )

debug_relation_map()

Return a nested relation map for debugging/printing.

Structure: { source: { target: [ {name, predicate, directed, description} ... ] } }

Source code in src/memg_core/core/yaml_translator.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
def debug_relation_map(self) -> dict[str, dict[str, list[dict[str, Any]]]]:
    """Return a nested relation map for debugging/printing.

    Structure:
    {
      source: {
        target: [ {name, predicate, directed, description} ... ]
      }
    }
    """
    out: dict[str, dict[str, list[dict[str, Any]]]] = {}
    for source in self.get_entity_types():
        specs = self.get_relations_for_source(source)
        if not specs:
            continue
        if source not in out:
            out[source] = {}
        for spec in specs:
            target = spec["target"]
            out[source].setdefault(target, [])
            out[source][target].append(
                {
                    "name": spec.get("name"),
                    "predicate": spec.get("predicate"),
                    "directed": spec.get("directed", True),
                    "description": spec.get("description"),
                }
            )
    return out

get_anchor_field(entity_name)

Get the anchor field name for the given entity type from YAML schema.

Now reads from vector.anchored_to instead of separate anchor field.

Parameters:

Name Type Description Default
entity_name str

Name of the entity type.

required

Returns:

Name Type Description
str str

Anchor field name.

Raises:

Type Description
YamlTranslatorError

If anchor field not found.

Source code in src/memg_core/core/yaml_translator.py
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def get_anchor_field(self, entity_name: str) -> str:
    """Get the anchor field name for the given entity type from YAML schema.

    Now reads from vector.anchored_to instead of separate anchor field.

    Args:
        entity_name: Name of the entity type.

    Returns:
        str: Anchor field name.

    Raises:
        YamlTranslatorError: If anchor field not found.
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")

    # Get entity spec with inheritance resolution
    entity_spec = self._resolve_entity_with_inheritance(entity_name)

    # Look for vector field with anchored_to
    fields = entity_spec.get("fields", {})
    for _field_name, field_def in fields.items():
        if isinstance(field_def, dict) and field_def.get("type") == "vector":
            anchored_to = field_def.get("anchored_to")
            if anchored_to:
                return str(anchored_to)

    raise YamlTranslatorError(
        f"Entity '{entity_name}' has no vector field with 'anchored_to' property"
    )

get_entity_model(entity_name)

Get Pydantic model from TypeRegistry - NO REDUNDANCY.

Source code in src/memg_core/core/yaml_translator.py
619
620
621
def get_entity_model(self, entity_name: str):
    """Get Pydantic model from TypeRegistry - NO REDUNDANCY."""
    return get_entity_model(entity_name)

get_entity_types()

Get list of available entity types from YAML schema.

Source code in src/memg_core/core/yaml_translator.py
123
124
125
def get_entity_types(self) -> list[str]:
    """Get list of available entity types from YAML schema."""
    return list(self._entities_map().keys())

get_labels_for_predicates(source_type, predicates, neighbor_label=None)

Expand predicate names to concrete relationship labels for a given source.

Parameters:

Name Type Description Default
source_type str

Source entity type name

required
predicates list[str] | None

List of predicate names to include (case-insensitive). If None, include all.

required
neighbor_label str | None

Optional target entity type filter (case-insensitive)

None

Returns:

Type Description
list[str]

List of concrete relationship labels (table names) matching the filter.

Source code in src/memg_core/core/yaml_translator.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def get_labels_for_predicates(
    self,
    source_type: str,
    predicates: list[str] | None,
    neighbor_label: str | None = None,
) -> list[str]:
    """Expand predicate names to concrete relationship labels for a given source.

    Args:
        source_type: Source entity type name
        predicates: List of predicate names to include (case-insensitive). If None, include all.
        neighbor_label: Optional target entity type filter (case-insensitive)

    Returns:
        List of concrete relationship labels (table names) matching the filter.
    """
    if not source_type:
        raise YamlTranslatorError("Empty source_type")

    preds_u = set(p.upper() for p in predicates) if predicates else None
    neighbor_l = neighbor_label.lower() if neighbor_label else None

    labels: list[str] = []
    for spec in self.get_relations_for_source(source_type):
        if preds_u is not None and spec["predicate"].upper() not in preds_u:
            continue
        if neighbor_l is not None and spec["target"].lower() != neighbor_l:
            continue
        labels.append(
            self.relationship_table_name(
                source=spec["source"],
                predicate=spec["predicate"],
                target=spec["target"],
                directed=spec["directed"],
            )
        )
    return labels

get_relations_for_source(entity_name)

Get normalized relation specs for a source entity in target-first schema.

Returns list of dicts with keys
  • source (str)
  • target (str)
  • name (str | None)
  • description (str | None)
  • predicate (str)
  • directed (bool)
Source code in src/memg_core/core/yaml_translator.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def get_relations_for_source(self, entity_name: str) -> list[dict[str, Any]]:
    """Get normalized relation specs for a source entity in target-first schema.

    Returns list of dicts with keys:
        - source (str)
        - target (str)
        - name (str | None)
        - description (str | None)
        - predicate (str)
        - directed (bool)
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")

    source_l = entity_name.lower()
    relations_map = self._get_relations_mapping_for_entity(source_l)
    if not relations_map:
        return []

    out: list[dict[str, Any]] = []
    for target_l, items in relations_map.items():
        for item in items:
            predicate = item.get("predicate")
            if not predicate or not isinstance(predicate, str):
                # Skip invalid entries - strict behavior can be added later
                continue
            directed = bool(item.get("directed", True))
            out.append(
                {
                    "source": source_l,
                    "target": target_l,
                    "name": item.get("name"),
                    "description": item.get("description"),
                    "predicate": predicate.upper(),
                    "directed": directed,
                }
            )
    return out

get_see_also_config(entity_name)

Get the see_also configuration for the given entity type from YAML schema.

Returns:

Type Description
dict[str, Any] | None

Dict with keys: enabled, threshold, limit, target_types

dict[str, Any] | None

None if see_also is not configured for this entity

Source code in src/memg_core/core/yaml_translator.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def get_see_also_config(self, entity_name: str) -> dict[str, Any] | None:
    """Get the see_also configuration for the given entity type from YAML schema.

    Returns:
        Dict with keys: enabled, threshold, limit, target_types
        None if see_also is not configured for this entity
    """
    if not entity_name:
        raise YamlTranslatorError("Empty entity name")
    name_l = entity_name.lower()
    emap = self._entities_map()
    spec_raw = emap.get(name_l)
    if not spec_raw:
        raise YamlTranslatorError(f"Entity '{entity_name}' not found in YAML schema")

    see_also = spec_raw.get("see_also")
    if not see_also or not isinstance(see_also, dict):
        return None

    # Validate required fields
    if not see_also.get("enabled", False):
        return None

    return {
        "enabled": see_also.get("enabled", False),
        "threshold": float(see_also.get("threshold", 0.7)),
        "limit": int(see_also.get("limit", 3)),
        "target_types": list(see_also.get("target_types", [])),
    }

relationship_table_name(source, predicate, target, *, directed=True) staticmethod

Generate relationship table name.

For now, table name does not encode direction; direction affects creation/query semantics. Canonicalization for undirected pairs can be added here later if decided.

Source code in src/memg_core/core/yaml_translator.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
@staticmethod
def relationship_table_name(
    source: str,
    predicate: str,
    target: str,
    *,
    directed: bool = True,  # noqa: unused-argument
) -> str:
    """Generate relationship table name.

    For now, table name does not encode direction; direction affects creation/query semantics.
    Canonicalization for undirected pairs can be added here later if decided.
    """
    return f"{str(source).upper()}_{str(predicate).upper()}_{str(target).upper()}"

validate_memory_against_yaml(memory_type, payload)

Validate memory payload against YAML schema and return cleaned payload.

Source code in src/memg_core/core/yaml_translator.py
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
def validate_memory_against_yaml(
    self, memory_type: str, payload: dict[str, Any]
) -> dict[str, Any]:
    """Validate memory payload against YAML schema and return cleaned payload."""
    if not memory_type:
        raise YamlTranslatorError("memory_type is required")
    if payload is None:
        raise YamlTranslatorError("payload is required")

    # Strict validation - entity type MUST exist in YAML
    emap = self._entities_map()
    spec = emap.get(memory_type.lower())
    if not spec:
        raise YamlTranslatorError(
            f"Unknown entity type '{memory_type}'. All types must be defined in YAML schema.",
            context={
                "memory_type": memory_type,
                "available_types": list(emap.keys()),
            },
        )

    req, _opt = self._fields_contract(spec)
    missing = [k for k in req if not payload.get(k)]
    if missing:
        raise YamlTranslatorError(
            f"Missing required fields: {missing}",
            context={"memory_type": memory_type},
        )

    # Validate enum fields against YAML schema choices
    self._validate_enum_fields(memory_type, payload)

    # Validate that all fields are defined in YAML schema
    req, opt = self._fields_contract(spec)
    valid_fields = set(req + opt)
    system_fields = self._get_system_fields(spec)
    invalid_fields = set(payload.keys()) - valid_fields - system_fields
    if invalid_fields:
        raise YamlTranslatorError(
            f"Invalid fields not defined in schema: {sorted(invalid_fields)}",
            context={
                "memory_type": memory_type,
                "valid_fields": sorted(valid_fields),
                "invalid_fields": sorted(invalid_fields),
            },
        )

    # Strip system-reserved fields if present
    cleaned = dict(payload)
    for syskey in system_fields:
        cleaned.pop(syskey, None)
    return cleaned

get_config()

Get system configuration, preferring environment variables.

Returns:

Name Type Description
MemorySystemConfig MemorySystemConfig

System configuration instance.

Source code in src/memg_core/core/config.py
174
175
176
177
178
179
180
def get_config() -> MemorySystemConfig:
    """Get system configuration, preferring environment variables.

    Returns:
        MemorySystemConfig: System configuration instance.
    """
    return MemorySystemConfig.from_env()