diff --git a/LICENSE b/LICENSE
index b1d4446..8c75c7b 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
- Copyright (c) 2025 TigerGraph, Inc.
+ Copyright (c) 2024-2026 TigerGraph, Inc.
Apache License
Version 2.0, January 2004
diff --git a/common/chunkers/html_chunker.py b/common/chunkers/html_chunker.py
index ba84666..e598605 100644
--- a/common/chunkers/html_chunker.py
+++ b/common/chunkers/html_chunker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/chunkers/markdown_chunker.py b/common/chunkers/markdown_chunker.py
index 7799399..87481e7 100644
--- a/common/chunkers/markdown_chunker.py
+++ b/common/chunkers/markdown_chunker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/chunkers/recursive_chunker.py b/common/chunkers/recursive_chunker.py
index 6b262e1..8fd3bc5 100644
--- a/common/chunkers/recursive_chunker.py
+++ b/common/chunkers/recursive_chunker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/chunkers/semantic_chunker.py b/common/chunkers/semantic_chunker.py
index 6ba0474..00d34be 100644
--- a/common/chunkers/semantic_chunker.py
+++ b/common/chunkers/semantic_chunker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/config.py b/common/config.py
index 703d3f8..18a4288 100644
--- a/common/config.py
+++ b/common/config.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -86,9 +86,17 @@
completion_config = llm_config.get("completion_service")
if completion_config is None:
raise Exception("completion_service is not found in llm_config")
+if "llm_service" not in completion_config:
+ raise Exception("llm_service is not found in completion_service")
+if "llm_model" not in completion_config:
+ raise Exception("llm_model is not found in completion_service")
embedding_config = llm_config.get("embedding_service")
if embedding_config is None:
raise Exception("embedding_service is not found in llm_config")
+if "embedding_model_service" not in embedding_config:
+ raise Exception("embedding_model_service is not found in embedding_service")
+if "model_name" not in embedding_config:
+ raise Exception("model_name is not found in embedding_service")
embedding_dimension = embedding_config.get("dimensions", 1536)
# Get context window size from llm_config
@@ -144,8 +152,7 @@
reuse_embedding = graphrag_config.get("reuse_embedding", True)
doc_process_switch = graphrag_config.get("doc_process_switch", True)
entity_extraction_switch = graphrag_config.get("entity_extraction_switch", doc_process_switch)
-entity_resolution_switch = graphrag_config.get("entity_resolution_switch", entity_extraction_switch)
-community_detection_switch = graphrag_config.get("community_detection_switch", entity_resolution_switch)
+community_detection_switch = graphrag_config.get("community_detection_switch", entity_extraction_switch)
if "model_name" not in llm_config or "model_name" not in llm_config["embedding_service"]:
if "model_name" not in llm_config:
@@ -192,24 +199,41 @@ def get_llm_service(llm_config) -> LLM_Model:
else:
raise Exception("LLM Completion Service Not Supported")
+DEFAULT_MULTIMODAL_MODELS = {
+ "openai": "gpt-4o-mini",
+ "azure": "gpt-4o-mini",
+ "genai": "gemini-3.5-flash",
+ "vertexai": "gemini-3.5-flash",
+ "bedrock": "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
+}
+
def get_multimodal_service() -> LLM_Model:
"""
Get the multimodal/vision LLM service for image description tasks.
- Uses multimodal_service if configured, otherwise falls back to completion_service.
- Currently supports: OpenAI, Azure, GenAI, VertexAI
+ Priority:
+ 1. Explicit multimodal_service config
+ 2. Auto-derived from completion_service with a default vision model
+ Currently supports: OpenAI, Azure, GenAI, VertexAI, Bedrock
"""
- # Use multimodal_service if available, otherwise fallback to completion_service
- service_config = multimodal_config if multimodal_config else completion_config
-
- # Make a copy to avoid modifying the original config
- config_copy = service_config.copy()
-
- # Add default prompt_path if not present (required by LLM service classes but not used for multimodal)
+ config_copy = completion_config.copy()
+
+ if multimodal_config:
+ config_copy.update(multimodal_config)
+
+ service_type = config_copy.get("llm_service", "").lower()
+
+ if not multimodal_config or "llm_model" not in multimodal_config:
+ default_model = DEFAULT_MULTIMODAL_MODELS.get(service_type)
+ if default_model:
+ config_copy["llm_model"] = default_model
+ LogWriter.info(
+ f"Using default vision model '{default_model}' "
+ f"for provider '{service_type}'"
+ )
+
if "prompt_path" not in config_copy:
config_copy["prompt_path"] = "./common/prompts/openai_gpt4/"
-
- service_type = config_copy["llm_service"].lower()
-
+
if service_type == "openai":
return OpenAI(config_copy)
elif service_type == "azure":
@@ -218,11 +242,14 @@ def get_multimodal_service() -> LLM_Model:
return GoogleGenAI(config_copy)
elif service_type == "vertexai":
return GoogleVertexAI(config_copy)
+ elif service_type == "bedrock":
+ return AWSBedrock(config_copy)
else:
- raise Exception(
- f"Multimodal service '{service_type}' not supported. "
- "Only OpenAI, Azure, GenAI, and VertexAI are currently supported for vision tasks."
+ LogWriter.warning(
+ f"Multimodal/vision not supported for provider '{service_type}'. "
+ "Image descriptions will be skipped."
)
+ return None
if os.getenv("INIT_EMBED_STORE", "true") == "true":
conn = TigerGraphConnection(
diff --git a/common/db/connections.py b/common/db/connections.py
index 746669b..4cf21d9 100644
--- a/common/db/connections.py
+++ b/common/db/connections.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/embeddings/tigergraph_embedding_store.py b/common/embeddings/tigergraph_embedding_store.py
index 41c5d2a..748f166 100644
--- a/common/embeddings/tigergraph_embedding_store.py
+++ b/common/embeddings/tigergraph_embedding_store.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -39,10 +39,13 @@ def __init__(
self,
conn: TigerGraphConnection,
embedding_service: EmbeddingModel,
- support_ai_instance: bool = False
+ support_ai_instance: bool = False,
+ default_vector_attribute: str = "embedding",
):
self.embedding_service = embedding_service
self.support_ai_instance = support_ai_instance
+ self.default_vector_attribute = default_vector_attribute
+ self.vector_attr_cache = {}
if isinstance(conn.apiToken, tuple):
token = conn.apiToken[0]
@@ -73,7 +76,7 @@ def __init__(
logger.info(f"Done installing GDS library with status {q_res}")
if self.conn.graphname and not self.conn.graphname == "MyGraph":
current_schema = self.conn.gsql(f"USE GRAPH {self.conn.graphname}\n ls")
- if "- embedding(Dimension=" in current_schema:
+ if "(Dimension=" in current_schema:
self.install_vector_queries()
logger.info(f"TigerGraph embedding store is initialized with graph {self.conn.graphname}")
else:
@@ -96,31 +99,31 @@ def install_vector_queries(self):
with open(f"common/gsql/vector/{q_name}.gsql", "r") as f:
q_body = f.read()
q_res = self.conn.gsql(
- """USE GRAPH {}\nBEGIN\n{}\nEND\ninstall query {}\n""".format(
- self.conn.graphname, q_body, q_name
+ """USE GRAPH {}\nBEGIN\n{}\nEND\n""".format(
+ self.conn.graphname, q_body
)
)
need_install = True
logger.info(f"Done creating vector query {q_name} with status {q_res}")
- #TBD
if need_install:
- logger.info(f"Installing supportai queries all together")
+ logger.info(f"Installing vector queries all together")
query_res = self.conn.gsql(
"""USE GRAPH {}\nINSTALL QUERY ALL\n""".format(
self.conn.graphname
)
)
- logger.info(f"Done installing supportai query all with status {query_res}")
+ logger.info(f"Done installing vector queries with status {query_res}")
else:
- logger.info(f"Query installation is not needed for supportai")
+ logger.info(f"All vector queries already installed, skipping.")
def set_graphname(self, graphname):
self.conn.graphname = graphname
+ self.vector_attr_cache = {}
if self.conn.apiToken or self.conn.jwtToken:
self.conn.getToken()
if self.conn.graphname and not self.conn.graphname == "MyGraph":
current_schema = self.conn.gsql(f"USE GRAPH {self.conn.graphname}\n ls")
- if "- embedding(Dimension=" in current_schema:
+ if "(Dimension=" in current_schema:
self.install_vector_queries()
def set_connection(self, conn):
@@ -143,10 +146,63 @@ def set_connection(self, conn):
apiToken = token,
)
+ self.vector_attr_cache = {}
self.install_vector_queries()
+ def refreshvector_attr_cache(self):
+ """Parse the graph schema to discover which vertex types have which
+ vector attributes. Populates ``self.vector_attr_cache`` as
+ ``{vertex_type: {attr_name, ...}, ...}``.
+
+ The ``ls`` output contains a section like::
+
+ Vector Embeddings:
+ - Person:
+ - embedding(Dimension=1536, ...)
+ - DocumentChunk:
+ - embedding(Dimension=1536, ...)
+ - summary_vec(Dimension=768, ...)
+ """
+ self.vector_attr_cache = {}
+ try:
+ schema = self.conn.gsql(
+ f"USE GRAPH {self.conn.graphname}\n ls"
+ )
+ in_vector_section = False
+ current_vtype = None
+ for line in schema.splitlines():
+ stripped = line.strip()
+ if stripped.startswith("Vector Embeddings:"):
+ in_vector_section = True
+ continue
+ if in_vector_section:
+ if stripped == "" or (
+ not stripped.startswith("-") and ":" not in stripped
+ ):
+ break
+ if stripped.startswith("- ") and stripped.endswith(":"):
+ current_vtype = stripped[2:-1].strip()
+ self.vector_attr_cache.setdefault(current_vtype, set())
+ elif current_vtype and "(" in stripped:
+ attr_name = stripped.lstrip("- ").split("(")[0].strip()
+ if attr_name:
+ self.vector_attr_cache[current_vtype].add(attr_name)
+ except Exception as e:
+ logger.warning(f"Failed to refresh vector attribute cache: {e}")
+
+ def has_vector_attribute(self, vertex_type: str, vector_attribute: str) -> bool:
+ """Check whether *vertex_type* has a vector attribute named
+ *vector_attribute* according to the cached schema. The cache is
+ refreshed automatically on the first call or after
+ ``set_graphname`` / ``set_connection``."""
+ if not self.vector_attr_cache:
+ self.refreshvector_attr_cache()
+ attrs = self.vector_attr_cache.get(vertex_type)
+ if attrs is None:
+ return False
+ return vector_attribute in attrs
+
def map_attrs(self, attributes: Iterable[Tuple[str, List[float]]]):
- # map attrs
attrs = {}
for (k, v) in attributes:
attrs[k] = {"value": v}
@@ -163,6 +219,7 @@ def add_embeddings(
embeddings (Iterable[Tuple[str, List[float]]]):
Iterable of content and embedding of the document.
"""
+ batch = None
try:
LogWriter.info(
f"request_id={req_id_cv.get()} TigerGraph ENTRY add_embeddings()"
@@ -173,15 +230,34 @@ def add_embeddings(
"vertices": defaultdict(dict[str, any]),
}
+ skipped = []
+ vec_attrs_used = set()
for i, (text, _) in enumerate(embeddings):
(v_id, v_type) = metadatas[i].get("vertex_id")
+ vec_attr = metadatas[i].get(
+ "vector_attribute", self.default_vector_attribute
+ )
+ if not self.has_vector_attribute(v_type, vec_attr):
+ if (v_type, vec_attr) not in skipped:
+ LogWriter.warning(
+ f"Skipping vertex type '{v_type}': "
+ f"no vector attribute '{vec_attr}' in schema."
+ )
+ skipped.append((v_type, vec_attr))
+ continue
+ vec_attrs_used.add(vec_attr)
try:
embedding = self.embedding_service.embed_query(text)
except Exception as e:
LogWriter.error(f"Failed to embed {v_id}: {e}")
return
- attr = self.map_attrs([("embedding", embedding)])
+ attr = self.map_attrs([(vec_attr, embedding)])
batch["vertices"][v_type][v_id] = attr
+
+ if not any(batch["vertices"].values()):
+ LogWriter.warning("No embeddings to upsert after vector attribute checks.")
+ return
+
data = json.dumps(batch)
added = self.conn.upsertData(data)
@@ -189,7 +265,6 @@ def add_embeddings(
LogWriter.info(f"request_id={req_id_cv.get()} TigerGraph EXIT add_embeddings()")
- # Check if registration was successful
if added:
success_message = f"Document registered with id: {added[0]}"
LogWriter.info(success_message)
@@ -200,7 +275,14 @@ def add_embeddings(
raise Exception(error_message)
except Exception as e:
- error_message = f"An error occurred while registering document: {str(e)}"
+ v_types = list(batch["vertices"].keys()) if batch else "unknown"
+ vec_names = vec_attrs_used if vec_attrs_used else {self.default_vector_attribute}
+ error_message = (
+ f"An error occurred while registering document: {str(e)}. "
+ f"Vertex type(s) in batch: {v_types}. "
+ f"Vector attribute(s) used: {vec_names}. "
+ f"Ensure these vertex types have the expected vector attribute."
+ )
LogWriter.error(error_message)
async def aadd_embeddings(
@@ -214,6 +296,7 @@ async def aadd_embeddings(
embeddings (Iterable[Tuple[str, List[float]]]):
Iterable of content and embedding of the document.
"""
+ batch = None
try:
LogWriter.info(
f"request_id={req_id_cv.get()} TigerGraph ENTRY aadd_embeddings()"
@@ -224,15 +307,34 @@ async def aadd_embeddings(
"vertices": defaultdict(dict[str, any]),
}
+ skipped = []
+ vec_attrs_used = set()
for i, (text, _) in enumerate(embeddings):
(v_id, v_type) = metadatas[i].get("vertex_id")
+ vec_attr = metadatas[i].get(
+ "vector_attribute", self.default_vector_attribute
+ )
+ if not self.has_vector_attribute(v_type, vec_attr):
+ if (v_type, vec_attr) not in skipped:
+ LogWriter.warning(
+ f"Skipping vertex type '{v_type}': "
+ f"no vector attribute '{vec_attr}' in schema."
+ )
+ skipped.append((v_type, vec_attr))
+ continue
+ vec_attrs_used.add(vec_attr)
try:
embedding = await self.embedding_service.aembed_query(text)
except Exception as e:
LogWriter.error(f"Failed to embed {v_id}: {e}")
return
- attr = self.map_attrs([("embedding", embedding)])
+ attr = self.map_attrs([(vec_attr, embedding)])
batch["vertices"][v_type][v_id] = attr
+
+ if not any(batch["vertices"].values()):
+ LogWriter.warning("No embeddings to upsert after vector attribute checks.")
+ return
+
data = json.dumps(batch)
added = self.conn.upsertData(data)
@@ -240,7 +342,6 @@ async def aadd_embeddings(
LogWriter.info(f"request_id={req_id_cv.get()} TigerGraph EXIT aadd_embeddings()")
- # Check if registration was successful
if added:
success_message = f"Document {metadatas} registered with status: {added}"
LogWriter.info(success_message)
@@ -251,7 +352,14 @@ async def aadd_embeddings(
raise Exception(error_message)
except Exception as e:
- error_message = f"An error occurred while registering document: {str(e)}"
+ v_types = list(batch["vertices"].keys()) if batch else "unknown"
+ vec_names = vec_attrs_used if vec_attrs_used else {self.default_vector_attribute}
+ error_message = (
+ f"An error occurred while registering document: {str(e)}. "
+ f"Vertex type(s) in batch: {v_types}. "
+ f"Vector attribute(s) used: {vec_names}. "
+ f"Ensure these vertex types have the expected vector attribute."
+ )
LogWriter.error(error_message)
def has_embeddings(
@@ -261,6 +369,12 @@ def has_embeddings(
ret = True
try:
for (v_id, v_type) in v_ids:
+ if not self.has_vector_attribute(v_type, self.default_vector_attribute):
+ logger.info(
+ f"Vertex type '{v_type}' has no vector attribute "
+ f"'{self.default_vector_attribute}', treating as no embedding."
+ )
+ return False
res = self.conn.runInstalledQuery(
"check_embedding_exists",
params={
@@ -285,6 +399,12 @@ def check_embedding_rebuilt(
self,
v_type: str
):
+ if not self.has_vector_attribute(v_type, self.default_vector_attribute):
+ logger.info(
+ f"Vertex type '{v_type}' has no vector attribute "
+ f"'{self.default_vector_attribute}', skipping rebuild check."
+ )
+ return False
try:
res = self.conn.runInstalledQuery(
"vertices_have_embedding",
diff --git a/common/extractors/LLMEntityRelationshipExtractor.py b/common/extractors/LLMEntityRelationshipExtractor.py
index 1251ce9..b81a769 100644
--- a/common/extractors/LLMEntityRelationshipExtractor.py
+++ b/common/extractors/LLMEntityRelationshipExtractor.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql
deleted file mode 100644
index b302624..0000000
--- a/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql
+++ /dev/null
@@ -1,47 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY Build_Community_Concepts (SET v_type_set, SET e_type_set, INT min_comm_size, INT max_comm_size) SYNTAX V1 {
-MinAccum @min_cc_id = 0; //each vertex's tentative component id
-MapAccum @@comp_sizes_map;
-MapAccum> @@comp_group_by_size_map;
-SumAccum @@num_concepts_created;
-
-Start = {v_type_set};
-
-# Initialize: Label each vertex with its own internal ID
-S = SELECT x
- FROM Start:x
- POST-ACCUM x.@min_cc_id = getvid(x);
-
-# Propagate smaller internal IDs until no more ID changes can be Done
-WHILE (S.size()>0) DO
- S = SELECT t
- FROM S:s -(e_type_set:e)- v_type_set:t
- ACCUM t.@min_cc_id += s.@min_cc_id // If s has smaller id than t, copy the id to t
- HAVING t.@min_cc_id != t.@min_cc_id';
-END;
-
-Start = {v_type_set};
-Start = SELECT s
- FROM Start:s
- POST-ACCUM
- @@comp_sizes_map += (s.@min_cc_id -> 1);
-
-FOREACH (compId,size) IN @@comp_sizes_map DO
- IF size >= min_comm_size AND size <= max_comm_size THEN
- @@num_concepts_created += 1;
- v = SELECT s FROM Start:s WHERE s.@min_cc_id == compId
- POST-ACCUM
- IF s.type == "Relationship" THEN
- INSERT INTO Concept VALUES("InCommunity"+to_string(compId), _, _, "COMMUNITY_LEAF", now(), FALSE),
- INSERT INTO DESCRIBES_RELATIONSHIP VALUES("InCommunity"+to_string(compId), s)
- ELSE IF s.type == "Entity" THEN
- INSERT INTO Concept VALUES("InCommunity"+to_string(compId), _, _, "COMMUNITY_LEAF", now(), FALSE),
- INSERT INTO DESCRIBES_ENTITY VALUES("InCommunity"+to_string(compId), s)
- ELSE IF s.type == "Concept" THEN
- INSERT INTO Concept VALUES("InCommunity"+to_string(compId), _, _, "AND", now(), FALSE),
- INSERT INTO IS_CHILD_OF VALUES(s, "InCommunity"+to_string(compId))
- END;
- END;
-END;
-
-PRINT @@num_concepts_created;
-}
\ No newline at end of file
diff --git a/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql b/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql
deleted file mode 100644
index 28ab737..0000000
--- a/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql
+++ /dev/null
@@ -1,34 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY Build_Concept_Tree(INT min_cooccurence=10) {
- MapAccum> @concept_cooccurence;
- SetAccum> @@unmerged_concepts;
- SetAccum @source_concepts;
- UINT cooccurences;
- concepts = {Concept.*};
-
- concepts_no_parents = SELECT c FROM concepts:c WHERE c.outdegree("IS_CHILD_OF") == 0 POST-ACCUM @@unmerged_concepts += c;
-
- res = SELECT c FROM concepts_no_parents:c
- POST-ACCUM
- FOREACH cpt IN @@unmerged_concepts DO
- IF c.concept_type == cpt.concept_type AND c != cpt THEN
- c.@concept_cooccurence += (cpt.id -> getEntityRelationshipConceptCooccurrence(c, cpt))
- END
- END;
-
- res = SELECT c FROM res:c
- POST-ACCUM
- FOREACH (conc, cnt) IN c.@concept_cooccurence DO
- IF cnt >= min_cooccurence THEN
- IF c.id <= conc THEN
- INSERT INTO Concept VALUES (c.id+"_AND_"+conc, _, _, "AND", now(), FALSE),
- INSERT INTO IS_CHILD_OF VALUES (c, c.id+"_AND_"+conc),
- INSERT INTO IS_CHILD_OF VALUES (conc, c.id+"_AND_"+conc)
- ELSE
- INSERT INTO Concept VALUES (conc+"_AND_"+c.id, _, _, "AND", now(), FALSE),
- INSERT INTO IS_CHILD_OF VALUES (c, conc+"_AND_"+c.id),
- INSERT INTO IS_CHILD_OF VALUES (conc, conc+"_AND_"+c.id)
- END
- END
- END;
- PRINT res.size();
-}
\ No newline at end of file
diff --git a/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql
deleted file mode 100644
index b599308..0000000
--- a/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql
+++ /dev/null
@@ -1,28 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY Build_Entity_Concepts(/* Parameters here */) {
- ListAccum @rel_concepts;
- cpts = {Concept.*};
-
- relationship_cpts = SELECT c FROM cpts:c WHERE c.concept_type == "RELATIONSHIP_LEAF";
-
- rels = SELECT r FROM relationship_cpts:rc -(DESCRIBES_RELATIONSHIP)-> Relationship:r
- ACCUM r.@rel_concepts += rc.id
- POST-ACCUM
- INSERT INTO Concept VALUES("EntitiesAreHeadOf"+rc.id, _, _, "ENTITY_LEAF", now(), FALSE),
- INSERT INTO Concept VALUES("EntitiesAreTailOf"+rc.id, _, _, "ENTITY_LEAF", now(), FALSE),
- INSERT INTO HAS_RELATIONSHIP VALUES("EntitiesAreHeadOf"+rc.id, rc, "IS_HEAD_OF"),
- INSERT INTO HAS_RELATIONSHIP VALUES(rc, "EntitiesAreTailOf"+rc.id, "HAS_TAIL");
-
- head_entities = SELECT he FROM rels:r -(reverse_IS_HEAD_OF)-> Entity:he
- ACCUM
- FOREACH rel IN r.@rel_concepts DO
- INSERT INTO DESCRIBES_ENTITY VALUES("EntitiesAreHeadOf"+rel, he)
- END;
-
- tail_entities = SELECT t FROM rels:r -(HAS_TAIL)-> Entity:t
- ACCUM
- FOREACH rel IN r.@rel_concepts DO
- INSERT INTO DESCRIBES_ENTITY VALUES("EntitiesAreTailOf"+rel, t)
- END;
-
- PRINT relationship_cpts;
-}
\ No newline at end of file
diff --git a/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql
deleted file mode 100644
index 36f9a06..0000000
--- a/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql
+++ /dev/null
@@ -1,25 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY Build_Relationship_Concepts(INT occurence_min=5) {
- MapAccum> @@relationship_count;
- SetAccum @@impt_relationships;
- SetAccum @@created_concepts;
- rels = {Relationship.*};
- res = SELECT s FROM rels:s ACCUM @@relationship_count += (lower(s.short_name) -> 1);
-
- FOREACH (key, val) IN @@relationship_count DO
- IF val >= occurence_min THEN
- @@impt_relationships += key;
- END;
- END;
-
- res = SELECT ir FROM rels:ir
- WHERE lower(ir.short_name) IN @@impt_relationships
- POST-ACCUM
- STRING tmp = lower(ir.short_name),
- IF tmp != "" THEN
- @@created_concepts += tmp,
- INSERT INTO Concept VALUES (tmp, _, _, "RELATIONSHIP_LEAF", now(), FALSE),
- INSERT INTO DESCRIBES_RELATIONSHIP VALUES (tmp, ir)
- END;
-
- PRINT @@created_concepts;
-}
\ No newline at end of file
diff --git a/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql b/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql
deleted file mode 100644
index 11cda35..0000000
--- a/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql
+++ /dev/null
@@ -1,140 +0,0 @@
-CREATE QUERY getEntityRelationshipConceptCooccurrence(VERTEX c1, VERTEX c2) RETURNS(INT){
- SetAccum> @@initialCo, @@coLeafs, @@tempCo;
- OrAccum @visited, @excludeCo, @@exclude;
- MapAccum>> @@coItrMap;
- ArrayAccum>> @@arrayCo[];
- MinAccum @parentType;
- MapAccum @@debugLogs;
-
- INT iter = 0;
- //Include & Exclude Logic
-
- includeConcept = SELECT c FROM Concept:c WHERE c == c1 OR c == c2;
-
- #includeConcept = {includedConcepts};
- excludeConcept = {};
-
- DOC_INC(ANY) = {};
- DOC_EXC(ANY) = {};
- DOC_TEMP(ANY) = {};
-
- IF (includeConcept.size() > 0 OR excludeConcept.size() > 0) THEN
-
- seedIncludeHCC = SELECT s FROM includeConcept:s
- ACCUM s.@excludeCo = FALSE, @@initialCo += s;
-
- // Resize array accum with no of input HCC
- @@arrayCo.reallocate(@@initialCo.size());
-
- FOREACH cItem IN @@initialCo DO
-
- @@coLeafs = cItem;
- startCo = {cItem};
-
- WHILE startCo.size() !=0 DO
-
- startCo = SELECT t FROM startCo:s -( s)
- ELSE IF (s.@parentType == "OR") THEN
- @@coItrMap += ("OR" -> s)
- ELSE
- @@coItrMap += ("OTHER" -> s)
- END;
-
- @@exclude = FALSE;
- DOC_TEMP = {};
-
- FOREACH (Key, Value) IN @@coItrMap DO
- @@tempCo = Value;
- FOREACH itm IN @@tempCo DO
-
- seed = {itm};
- CoEach = SELECT s FROM seed:s
- ACCUM @@exclude += s.@excludeCo;
-
- # can make this simpler by combining entity and relationship in one query
- DOC_2HOP_1 = SELECT d FROM CoEach:s -(_>)- (Entity|Relationship):d; # -(_>)- Document:d;
- #DOC_2HOP_2 = SELECT d FROM CoEach:s -(_>)- Relationship -(_>)- Document:d;
-
- IF (DOC_TEMP.size() == 0) THEN
- DOC_TEMP = DOC_2HOP_1;
- END;
-
- IF (DOC_2HOP_1.size() > 0) THEN
- IF (Key == "AND") THEN
- DOC_TEMP = DOC_TEMP INTERSECT DOC_2HOP_1;
- ELSE
- DOC_TEMP = DOC_TEMP UNION DOC_2HOP_1;
- END;
- /*
- ELSE IF (DOC_2HOP_2.size() > 0) THEN
- IF (Key == "AND") THEN
- DOC_TEMP = DOC_TEMP INTERSECT DOC2_HOP_2;
- ELSE
- DOC_TEMP = DOC_TEMP UNION DOC2_HOP_2;
- END;
- END;
- */
- END;
- END;
- IF (@@exclude != TRUE) THEN
- IF(DOC_INC.size() == 0) THEN
- DOC_INC = DOC_TEMP;
- END;
- DOC_INC = DOC_INC INTERSECT DOC_TEMP;
- ELSE
- IF(DOC_EXC.size() == 0) THEN
- DOC_EXC = DOC_TEMP;
- END;
- DOC_EXC = DOC_EXC INTERSECT DOC_TEMP;
- END;
-
- @@coItrMap.clear();
-
- END; // End of Array Loop
-
- MYDOCS = DOC_INC;
-
- END;
-
- IF(includeConcept.size() == 0) THEN
-
- MyEntities = {Entity.*};
- MyRels = {Relationship.*};
- MYDOCS = MyEntities UNION MyRels;
-
- END;
-
- //Remove docs from exclude list
-
- MYDOCS = MYDOCS MINUS DOC_EXC;
-
- #PRINT MYDOCS.size() as numberOfMatchingDocs;
- #PRINT MYDOCS as RetrievedDocs;
- PRINT MYDOCS.size();
- RETURN MYDOCS.size();
-}
\ No newline at end of file
diff --git a/common/gsql/graphrag/ResolveRelationships.gsql b/common/gsql/graphrag/ResolveRelationships.gsql
deleted file mode 100644
index 493746f..0000000
--- a/common/gsql/graphrag/ResolveRelationships.gsql
+++ /dev/null
@@ -1,26 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY ResolveRelationships(BOOL printResults=FALSE) SYNTAX V2 {
- /*
- * RE1 <- entity -RELATES-> entity -> RE2
- * to
- * RE1 -resolved-> RE
- *
- * Combines all of a Resolved entity's children's relationships into
- * RESOLVED_RELATIONSHIP
- */
- REs = {ResolvedEntity.*};
-
-
- REs = SELECT re1 FROM REs:re1 -(:rel)- Entity:e_tgt -(RESOLVES_TO>:r)- ResolvedEntity:re2
- // Connect the The first RE to the second RE
- ACCUM
- INSERT INTO RESOLVED_RELATIONSHIP(FROM,TO, relation_type) VALUES(re1, re2, rel.relation_type);
-
-
- IF printResults THEN
- // show which entities didn't get resolved
- Ents = {Entity.*};
- rEnts = SELECT e FROM Ents:e -(RESOLVES_TO>)- _;
- ents = Ents minus rEnts;
- PRINT ents;
- END;
-}
diff --git a/common/gsql/graphrag/StreamDocIds.gsql b/common/gsql/graphrag/StreamDocIds.gsql
deleted file mode 100644
index 996d4c8..0000000
--- a/common/gsql/graphrag/StreamDocIds.gsql
+++ /dev/null
@@ -1,16 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY StreamDocIds(INT current_batch, INT ttl_batches) {
- /*
- * Get the IDs of documents that have not already been processed (one
- * batch at a time)
- */
- ListAccum @@doc_ids;
- Docs = {Document.*};
-
- Docs = SELECT d FROM Docs:d
- WHERE vertex_to_int(d) % ttl_batches == current_batch
- AND d.epoch_processed == 0
- ACCUM @@doc_ids += d.id
- POST-ACCUM d.epoch_processed = datetime_to_epoch(now()); // set the processing time
-
- PRINT @@doc_ids;
-}
diff --git a/common/gsql/graphrag/entities_have_resolution.gsql b/common/gsql/graphrag/entities_have_resolution.gsql
deleted file mode 100644
index 4e50af7..0000000
--- a/common/gsql/graphrag/entities_have_resolution.gsql
+++ /dev/null
@@ -1,10 +0,0 @@
-CREATE OR REPLACE DISTRIBUTED QUERY entities_have_resolution() SYNTAX V2{
- SumAccum @@resolved;
- Ents = {Entity.*};
- Ents = SELECT s FROM Ents:s -(RESOLVES_TO>)- ResolvedEntity
- POST-ACCUM(s) @@resolved += 1;
-
-
- PRINT (@@resolved >= Ents.size()) as all_resolved;
- PRINT @@resolved, Ents.size();
-}
diff --git a/common/gsql/graphrag/get_community_children.gsql b/common/gsql/graphrag/get_community_children.gsql
index a55bf0b..0c49493 100644
--- a/common/gsql/graphrag/get_community_children.gsql
+++ b/common/gsql/graphrag/get_community_children.gsql
@@ -5,7 +5,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY get_community_children(Vertex com
Comms = SELECT t FROM Comms:c -()- ResolvedEntity -(_>)- Entity:t;
+ Ents = SELECT t FROM Comms:c -( community, STRING ext_vid> Move;
+ TYPEDEF TUPLE community, STRING ext_vid> Move;
SumAccum @@m; // the sum of the weights of all the links in the network
- MinAccum> @community_id; // the community ID of the node
+ MinAccum> @community_id; // the community ID of the node
MinAccum @community_vid; // the community ID of the node
SumAccum @k; // the sum of the weights of the links incident to the node
SumAccum @k_in; // the sum of the weights of the links inside the previous community of the node
SumAccum @k_self_loop; // the weight of the self-loop link
- MapAccum, SumAccum> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
- MapAccum, SumAccum> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
+ MapAccum, SumAccum> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community
+ MapAccum, SumAccum> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C
SumAccum @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node
- MapAccum, SumAccum> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
+ MapAccum, SumAccum> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community
MapAccum>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community)
SumAccum @delta_Q_remove; // delta Q to remove the node from the previous community
MaxAccum @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community
@@ -22,7 +22,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
SumAccum @batch_id;
MinAccum @vid;
- AllNodes = {ResolvedEntity.*};
+ AllNodes = {Entity.*};
DOUBLE wt = 1.0;
// prevent multiple init runs
@@ -39,7 +39,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
s.@community_vid = s.id, // external id
s.@vid = getvid(s), // internal id (used in batching)
s.@batch_id = s.@vid % n_batches; // get batch number
- z = SELECT s FROM AllNodes:s -(_)-> ResolvedEntity:t
+ z = SELECT s FROM AllNodes:s -(_)-> Entity:t
ACCUM s.@k += wt,
@@m += 1;
@@ -52,7 +52,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
WHILE Candidates.size() > 0 AND hop < max_hop DO
hop += 1;
IF hop == 1 THEN // first iteration
- ChangedNodes = SELECT s FROM Candidates:s -(_:e)-> ResolvedEntity:t
+ ChangedNodes = SELECT s FROM Candidates:s -(_:e)-> Entity:t
WHERE s.@community_id != t.@community_id // can't move within the same community
ACCUM
DOUBLE dq = 1 - s.@k * t.@k / (2 * @@m),
@@ -79,7 +79,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
// process nodes in batch
FOREACH batch_id IN RANGE[0, n_batches-1] DO
- Nodes = SELECT s FROM Candidates:s -(_:e)-> ResolvedEntity:t
+ Nodes = SELECT s FROM Candidates:s -(_:e)-> Entity:t
WHERE s.@batch_id == batch_id
ACCUM
IF s.@community_id == t.@community_id THEN
@@ -96,7 +96,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
s.@best_move = Move(@@min_double, s, to_string(s.id)); // reset best move
// find the best move
- Nodes = SELECT s FROM Nodes:s -(_:e)-> ResolvedEntity:t
+ Nodes = SELECT s FROM Nodes:s -(_:e)-> Entity:t
WHERE s.@community_id != t.@community_id
ACCUM
DOUBLE dq = 2 * s.@community_k_in_map.get(t.@community_id) - s.@k * t.@community_sum_total / @@m,
@@ -113,7 +113,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
END;
END;
// If two nodes swap, only change the community of one of them
- SwapNodes = SELECT s FROM ChangedNodes:s -(_:e)-> ResolvedEntity:t
+ SwapNodes = SELECT s FROM ChangedNodes:s -(_:e)-> Entity:t
WHERE s.@best_move.community == t.@community_id
AND t.@to_change_community
AND t.@best_move.community == s.@community_id
@@ -143,14 +143,14 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
@@move_cnt += ChangedNodes.size();
// Get all neighbours of the changed node that do not belong to the node’s new community
- Candidates = SELECT t FROM ChangedNodes:s -(_:e)-> ResolvedEntity:t
+ Candidates = SELECT t FROM ChangedNodes:s -(_:e)-> Entity:t
WHERE t.@community_id != s.@community_id;
END;
// Coarsening
UINT new_layer = 0;
@@community_sum_total_map.clear();
- Tmp = SELECT s FROM AllNodes:s -(_:e)-> ResolvedEntity:t
+ Tmp = SELECT s FROM AllNodes:s -(_:e)-> Entity:t
ACCUM
s.@has_relations += TRUE,
t.@has_relations += TRUE,
@@ -178,7 +178,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN
@@community_sum_total_map.clear();
// link communities
- Tmp = SELECT s FROM AllNodes:s -(_:e)-> ResolvedEntity:t
+ Tmp = SELECT s FROM AllNodes:s -(_:e)-> Entity:t
WHERE s.@community_vid != t.@community_vid
ACCUM
DOUBLE w = @@source_target_k_in_map.get(s.@community_vid).get(t.@community_vid),
diff --git a/common/gsql/graphrag/louvain/modularity.gsql b/common/gsql/graphrag/louvain/modularity.gsql
index c9fa1be..cab6255 100644
--- a/common/gsql/graphrag/louvain/modularity.gsql
+++ b/common/gsql/graphrag/louvain/modularity.gsql
@@ -19,16 +19,16 @@ CREATE OR REPLACE DISTRIBUTED QUERY modularity(UINT iteration=1) SYNTAX V2 {
WHERE c.iteration == i
ACCUM t.@parent = c.@parent;
END;
- Entities = SELECT t FROM Comms:c -(_>)- ResolvedEntity:t
+ Entities = SELECT t FROM Comms:c -(_>)- Entity:t
ACCUM t.@community_id = c.@parent;
ELSE
- Entities = SELECT t FROM Comms:c -(_>)- ResolvedEntity:t
+ Entities = SELECT t FROM Comms:c -(_>)- Entity:t
WHERE c.iteration == iteration
ACCUM t.@community_id = c.id;
END;
- Nodes = SELECT s FROM Entities:s -(_>:e)- ResolvedEntity:t
+ Nodes = SELECT s FROM Entities:s -(_>:e)- Entity:t
ACCUM
IF s.@community_id == t.@community_id THEN
@@community_in_weight_map += (s.@community_id -> wt)
diff --git a/common/gsql/supportai/Scan_For_Updates.gsql b/common/gsql/supportai/Scan_For_Updates.gsql
index c9af981..9f065c9 100644
--- a/common/gsql/supportai/Scan_For_Updates.gsql
+++ b/common/gsql/supportai/Scan_For_Updates.gsql
@@ -25,15 +25,11 @@ CREATE OR REPLACE DISTRIBUTED QUERY Scan_For_Updates(STRING v_type = "Document",
res = SELECT s FROM start:s -(HAS_CONTENT)-> Content:c
ACCUM @@v_and_text += (s.id -> TextInfo(c.ctype, c.text))
POST-ACCUM s.epoch_processing = datetime_to_epoch(now());
- // ELSE IF v_type == "Concept" THEN
- // res = SELECT s FROM start:s
- // POST-ACCUM @@v_and_text += (s.id -> s.description),
- // s.epoch_processing = datetime_to_epoch(now());
ELSE IF v_type == "Entity" THEN
res = SELECT s FROM start:s
POST-ACCUM @@v_and_text += (s.id -> TextInfo("", s.definition)),
s.epoch_processing = datetime_to_epoch(now());
- ELSE IF v_type == "Relationship" THEN
+ ELSE IF v_type == "RelationshipType" THEN
res = SELECT s FROM start:s
POST-ACCUM @@v_and_text += (s.id -> TextInfo("", s.definition)),
s.epoch_processing = datetime_to_epoch(now());
diff --git a/common/gsql/supportai/Selected_Set_Display.gsql b/common/gsql/supportai/Selected_Set_Display.gsql
index c61b52a..2f94864 100644
--- a/common/gsql/supportai/Selected_Set_Display.gsql
+++ b/common/gsql/supportai/Selected_Set_Display.gsql
@@ -18,7 +18,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Selected_Set_Display(STRING json_list_vts) {
res = SELECT s FROM start:s
POST-ACCUM
- IF s.type == "Relationship" THEN
+ IF s.type == "RelationshipType" THEN
s.@context += s.definition
ELSE IF s.type == "Entity" THEN
STRING tmp_dsc = s.id + " " + s.definition,
diff --git a/common/gsql/supportai/SupportAI_IndexCreation.gsql b/common/gsql/supportai/SupportAI_IndexCreation.gsql
index 439ba18..fa2d768 100644
--- a/common/gsql/supportai/SupportAI_IndexCreation.gsql
+++ b/common/gsql/supportai/SupportAI_IndexCreation.gsql
@@ -5,7 +5,4 @@ CREATE SCHEMA_CHANGE JOB add_supportai_indexes {
ALTER VERTEX DocumentChunk ADD INDEX doc_chunk_epoch_added_index ON (epoch_added);
ALTER VERTEX DocumentChunk ADD INDEX doc_chunk_epoch_processing_index ON (epoch_processing);
ALTER VERTEX DocumentChunk ADD INDEX doc_chunk_epoch_processed_index ON (epoch_processed);
- ALTER VERTEX Concept ADD INDEX concept_epoch_added_index ON (epoch_added);
- ALTER VERTEX Concept ADD INDEX concept_epoch_processing_index ON (epoch_processing);
- ALTER VERTEX Concept ADD INDEX concept_epoch_processed_index ON (epoch_processed);
}
\ No newline at end of file
diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql
index 94fcc9e..c756fd3 100644
--- a/common/gsql/supportai/SupportAI_Schema.gsql
+++ b/common/gsql/supportai/SupportAI_Schema.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,36 +17,26 @@
CREATE SCHEMA_CHANGE JOB add_supportai_schema {
ADD VERTEX DocumentChunk(PRIMARY_ID id STRING, idx INT, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD VERTEX Document(PRIMARY_ID id STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
- ADD VERTEX Concept(PRIMARY_ID id STRING, description STRING, concept_type STRING, human_curated BOOL, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description SET, entity_type STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
- ADD VERTEX Relationship(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
- ADD VERTEX DocumentCollection(PRIMARY_ID id STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
+ ADD VERTEX RelationshipType(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD VERTEX Content(PRIMARY_ID id STRING, ctype STRING, text STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD VERTEX EntityType(PRIMARY_ID id STRING, description STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD DIRECTED EDGE HAS_CONTENT(FROM Document, TO Content|FROM DocumentChunk, TO Content) WITH REVERSE_EDGE="reverse_HAS_CONTENT";
- ADD DIRECTED EDGE IS_CHILD_OF(FROM Concept, TO Concept) WITH REVERSE_EDGE="reverse_IS_CHILD_OF";
- ADD DIRECTED EDGE IS_HEAD_OF(FROM Entity, TO Relationship) WITH REVERSE_EDGE="reverse_IS_HEAD_OF";
- ADD DIRECTED EDGE HAS_TAIL(FROM Relationship, TO Entity) WITH REVERSE_EDGE="reverse_HAS_TAIL";
- ADD DIRECTED EDGE DESCRIBES_RELATIONSHIP(FROM Concept, TO Relationship) WITH REVERSE_EDGE="reverse_DESCRIBES_RELATIONSHIP";
- ADD DIRECTED EDGE DESCRIBES_ENTITY(FROM Concept, TO Entity) WITH REVERSE_EDGE="reverse_DESCRIBES_ENTITY";
+ ADD DIRECTED EDGE IS_HEAD_OF(FROM Entity, TO RelationshipType) WITH REVERSE_EDGE="reverse_IS_HEAD_OF";
+ ADD DIRECTED EDGE HAS_TAIL(FROM RelationshipType, TO Entity) WITH REVERSE_EDGE="reverse_HAS_TAIL";
ADD DIRECTED EDGE CONTAINS_ENTITY(FROM DocumentChunk, TO Entity|FROM Document, TO Entity) WITH REVERSE_EDGE="reverse_CONTAINS_ENTITY";
- ADD DIRECTED EDGE MENTIONS_RELATIONSHIP(FROM DocumentChunk, TO Relationship|FROM Document, TO Relationship) WITH REVERSE_EDGE="reverse_MENTIONS_RELATIONSHIP";
+ ADD DIRECTED EDGE MENTIONS_RELATIONSHIP(FROM DocumentChunk, TO RelationshipType|FROM Document, TO RelationshipType) WITH REVERSE_EDGE="reverse_MENTIONS_RELATIONSHIP";
ADD DIRECTED EDGE IS_AFTER(FROM DocumentChunk, TO DocumentChunk) WITH REVERSE_EDGE="reverse_IS_AFTER";
ADD DIRECTED EDGE HAS_CHILD(FROM Document, TO DocumentChunk) WITH REVERSE_EDGE="reverse_HAS_CHILD";
- ADD DIRECTED EDGE HAS_RELATIONSHIP(FROM Concept, TO Concept, relation_type STRING) WITH REVERSE_EDGE="reverse_HAS_RELATIONSHIP";
- ADD DIRECTED EDGE CONTAINS_DOCUMENT(FROM DocumentCollection, TO Document) WITH REVERSE_EDGE="reverse_CONTAINS_DOCUMENT";
ADD DIRECTED EDGE ENTITY_HAS_TYPE(FROM Entity, TO EntityType) WITH REVERSE_EDGE="reverse_ENTITY_HAS_TYPE";
ADD DIRECTED EDGE RELATIONSHIP_TYPE(FROM EntityType, TO EntityType, DISCRIMINATOR(relation_type STRING), frequency INT) WITH REVERSE_EDGE="reverse_RELATIONSHIP_TYPE";
// GraphRAG
ADD VERTEX Community (PRIMARY_ID id STRING, iteration UINT, description STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
- ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, entity_type STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true";
ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP";
- ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVES_TO"; // Connect ResolvedEntities with their children entities
- ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVED_RELATIONSHIP"; // store edges between entities after they're resolved
- ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY";
+ ADD DIRECTED EDGE IN_COMMUNITY(FROM Entity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY";
ADD DIRECTED EDGE LINKS_TO (from Community, to Community, weight DOUBLE) WITH REVERSE_EDGE="reverse_LINKS_TO";
ADD DIRECTED EDGE HAS_PARENT (from Community, to Community) WITH REVERSE_EDGE="reverse_HAS_PARENT";
}
diff --git a/common/gsql/supportai/SupportAI_Schema_Images.gsql b/common/gsql/supportai/SupportAI_Schema_Images.gsql
index a05ffd6..e6f2e5c 100644
--- a/common/gsql/supportai/SupportAI_Schema_Images.gsql
+++ b/common/gsql/supportai/SupportAI_Schema_Images.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql b/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql
index f778caa..4db81a6 100644
--- a/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql
+++ b/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,14 +14,7 @@
* limitations under the License.
*/
-CREATE SCHEMA_CHANGE JOB add_supportai_vector {
+CREATE SCHEMA_CHANGE JOB add_graphrag_vector {
ALTER VERTEX DocumentChunk ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
- ALTER VERTEX Document ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
- ALTER VERTEX Concept ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
- ALTER VERTEX Entity ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
- ALTER VERTEX Relationship ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
-
- // GraphRAG
ALTER VERTEX Community ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
- ALTER VERTEX ResolvedEntity ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine");
}
diff --git a/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql b/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql
index f64dae2..d98051a 100644
--- a/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql
+++ b/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql b/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql
index dc8d520..a09b045 100644
--- a/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql
+++ b/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql b/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql
index 269ba9f..ed917e0 100644
--- a/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql
+++ b/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -40,7 +40,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Content_Similarity_Search(STRING json_list_v
@@final_retrieval += (s.id -> tgt.text)
END
POST-ACCUM
- IF s.type == "Relationship" OR s.type == "Entity" OR s.type == "Concept" THEN
+ IF s.type == "RelationshipType" OR s.type == "Entity" THEN
@@final_retrieval += (s.id -> s.definition)
ELSE IF s.type == "Community" THEN
@@final_retrieval += (s.id -> s.description)
diff --git a/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql b/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql
index d9bb0de..24648d9 100644
--- a/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql
+++ b/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Content_Similarity_Vector_Search(STRING v_ty
@@final_retrieval += (s.id -> tgt.text)
END
POST-ACCUM
- IF s.type == "Relationship" OR s.type == "Entity" OR s.type == "Concept" THEN
+ IF s.type == "RelationshipType" OR s.type == "Entity" THEN
@@final_retrieval += (s.id -> s.definition)
ELSE IF s.type == "Community" THEN
@@final_retrieval += (s.id -> s.description)
diff --git a/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql b/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql
index fd0c954..7e38a52 100644
--- a/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql
+++ b/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Entity_Relationship_Retrieval(SET en
SetAccum @@starting_rels;
ents = {Entity.*};
- rels = {Relationship.*};
+ rels = {RelationshipType.*};
FOREACH ent IN entities DO
STRING search_param = lower(ent);
diff --git a/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql
index 58f92e9..099a453 100644
--- a/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql
+++ b/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -45,11 +45,11 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Community_Search(STRING json_list_v
POST-ACCUM @@verbose_info += ("community_level_"+to_string(i-1) -> s.@children);
END;
IF with_doc THEN
- related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m
+ related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(IN_COMMUNITY>)- selected_comms:m
ACCUM m.@context += c.text, m.@children += d
POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children);
ELSE
- related_chunks = SELECT c FROM Content:c -()- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m
+ related_chunks = SELECT c FROM Content:c -()- Entity:v -(IN_COMMUNITY>)- selected_comms:m
ACCUM m.@context += c.text, m.@children += d
POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children);
END;
diff --git a/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql b/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql
index bdfef52..939890b 100644
--- a/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql
+++ b/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -41,8 +41,8 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Community_Search_Display(STRING jso
WHERE s.iteration == i
ACCUM s.@children += c, @@edges += e;
END;
- related_chunks = SELECT c FROM Content:c -(:e2)- Entity:v -(RESOLVES_TO>:e3)- ResolvedEntity:r -(IN_COMMUNITY>:e4)- selected_comms:m
- ACCUM m.@context += c.text, m.@children += d, @@edges += e1, @@edges += e2, @@edges += e3, @@edges += e4;
+ related_chunks = SELECT c FROM Content:c -(:e2)- Entity:v -(IN_COMMUNITY>:e3)- selected_comms:m
+ ACCUM m.@context += c.text, m.@children += d, @@edges += e1, @@edges += e2, @@edges += e3;
END;
selected_comms = SELECT c FROM selected_comms:c WHERE c.type == "Community"
diff --git a/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql
index 7777bca..08af49b 100644
--- a/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql
+++ b/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -40,15 +40,15 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Community_Vector_Search(LIST
POST-ACCUM @@verbose_info += ("community_level_"+to_string(i-1) -> s.@children);
END;
start_chunks = vectorSearch({DocumentChunk.embedding}, query_vector, top_k);
- extra_selected_comms = SELECT m FROM start_chunks:dc -(CONTAINS_ENTITY>)- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- Community:m;
+ extra_selected_comms = SELECT m FROM start_chunks:dc -(CONTAINS_ENTITY>)- Entity:v -(IN_COMMUNITY>)- Community:m;
selected_comms = selected_comms UNION extra_selected_comms;
IF with_doc THEN
- related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m
+ related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(IN_COMMUNITY>)- selected_comms:m
ACCUM m.@context += c.text, m.@children += d, @@edges += EdgeTypes(m, d)
POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children);
ELSE
- related_chunks = SELECT c FROM Content:c -()- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m
+ related_chunks = SELECT c FROM Content:c -()- Entity:v -(IN_COMMUNITY>)- selected_comms:m
ACCUM m.@context += c.text, m.@children += d, @@edges += EdgeTypes(m, d)
POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children);
END;
diff --git a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql
index ea8fa29..9d68c00 100644
--- a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql
+++ b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Hybrid_Search(STRING json_list_vts
res = SELECT s FROM start:s WHERE s.@num_times_seen >= num_seen_min AND s.type != "Document"
ACCUM
- IF s.type == "Relationship" THEN
+ IF s.type == "RelationshipType" THEN
s.@context += s.definition
ELSE IF s.type == "Entity" THEN
STRING tmp_dsc = "Entity: " + replace(s.id, "_", " ") + ", Description: " + s.definition,
diff --git a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql
index 6eec379..6a6f9b9 100644
--- a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql
+++ b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -70,7 +70,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Hybrid_Search_Display(STRING json_l
res = SELECT s FROM start:s WHERE s.@num_times_seen >= num_seen_min AND s.type != "Document"
ACCUM
- IF s.type == "Relationship" THEN
+ IF s.type == "RelationshipType" THEN
s.@context += s.definition
ELSE IF s.type == "Entity" THEN
STRING tmp_dsc = s.id + " " + s.definition,
diff --git a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql
index 971e367..2816156 100644
--- a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql
+++ b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -72,7 +72,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Hybrid_Vector_Search(Set v_
res = SELECT s FROM start:s WHERE s.@num_times_seen >= num_seen_min AND s.type != "Document"
ACCUM
- IF s.type == "Relationship" THEN
+ IF s.type == "RelationshipType" THEN
s.@context += s.definition
ELSE IF s.type == "Entity" THEN
STRING tmp_dsc = "Entity: " + replace(s.id, "_", " ") + ", Description: " + s.definition,
diff --git a/common/gsql/supportai/retrievers/Keyword_Search.gsql b/common/gsql/supportai/retrievers/Keyword_Search.gsql
index 9f1763f..fdfdb3f 100644
--- a/common/gsql/supportai/retrievers/Keyword_Search.gsql
+++ b/common/gsql/supportai/retrievers/Keyword_Search.gsql
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2025 TigerGraph, Inc.
+ * Copyright (c) 2024-2026 TigerGraph, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
diff --git a/common/llm_services/aws_bedrock_service.py b/common/llm_services/aws_bedrock_service.py
index a4eb05f..ba1b114 100644
--- a/common/llm_services/aws_bedrock_service.py
+++ b/common/llm_services/aws_bedrock_service.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/llm_services/base_llm.py b/common/llm_services/base_llm.py
index 93ec0cf..bf159fb 100644
--- a/common/llm_services/base_llm.py
+++ b/common/llm_services/base_llm.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -160,6 +160,20 @@ def graphrag_scoring_prompt(self):
"""Property to get the prompt for the GraphRAG Scoring response."""
return """You are a helpful assistant responsible for generating an answer to the question below using the data provided.\nInclude a quality score for the answer, based on how well it answers the question. The quality score should be between 0 (poor) and 100 (excellent).\n\nQuestion: {question}\nContext: {context}\n\n{format_instructions}\n"""
+ @property
+ def contextualize_question_prompt(self):
+ """Property to get the prompt for contextualizing a follow-up question
+ into a standalone search query using conversation history."""
+ return (
+ "Given the following conversation history and a follow-up "
+ "question, rewrite the follow-up question into a standalone, "
+ "self-contained question suitable for searching a knowledge "
+ "graph. Do NOT answer the question; only rewrite it.\n\n"
+ "Conversation history:\n{history}\n\n"
+ "Follow-up question: {question}\n\n"
+ "Standalone question:"
+ )
+
@property
def model(self):
"""Property to get the external LLM model."""
diff --git a/common/llm_services/google_genai_service.py b/common/llm_services/google_genai_service.py
index 6ac91a2..c544978 100644
--- a/common/llm_services/google_genai_service.py
+++ b/common/llm_services/google_genai_service.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py
index 8903b58..f23e81b 100644
--- a/common/llm_services/openai_service.py
+++ b/common/llm_services/openai_service.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/py_schemas/schemas.py b/common/py_schemas/schemas.py
index c5aee80..cd46fa6 100644
--- a/common/py_schemas/schemas.py
+++ b/common/py_schemas/schemas.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/py_schemas/tool_io_schemas.py b/common/py_schemas/tool_io_schemas.py
index 6a9f6bb..474212f 100644
--- a/common/py_schemas/tool_io_schemas.py
+++ b/common/py_schemas/tool_io_schemas.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py
index 4e8036a..19da86e 100644
--- a/common/utils/image_data_extractor.py
+++ b/common/utils/image_data_extractor.py
@@ -6,6 +6,14 @@
logger = logging.getLogger(__name__)
+_multimodal_client = None
+
+def _get_client():
+ global _multimodal_client
+ if _multimodal_client is None:
+ _multimodal_client = get_multimodal_service()
+ return _multimodal_client
+
def describe_image_with_llm(file_path):
"""
Read image file and convert to base64 to send to LLM.
@@ -13,9 +21,9 @@ def describe_image_with_llm(file_path):
try:
from PIL import Image as PILImage
- client = get_multimodal_service()
+ client = _get_client()
if not client:
- return "[Image: Failed to create multimodal LLM client]"
+ return "Image: Failed to create multimodal LLM client"
# Read image and convert to base64
pil_image = PILImage.open(file_path)
buffer = io.BytesIO()
@@ -51,5 +59,4 @@ def describe_image_with_llm(file_path):
return response.content if hasattr(response, "content") else str(response)
except Exception as e:
logger.error(f"Failed to describe image with LLM: {str(e)}")
- return "[Image: Error processing image description]"
-
+ return "Image: Error processing image description"
diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py
index e0be6b2..e2bd6df 100644
--- a/common/utils/text_extractors.py
+++ b/common/utils/text_extractors.py
@@ -40,12 +40,14 @@ def insert_description_by_id(md_text, image_id, description):
"""
Replace the description for an image whose basename == image_id.
"""
+ safe_desc = description.replace("[", "(").replace("]", ")")
+
def repl(m):
old_path = m.group(2)
candidate_id = os.path.splitext(os.path.basename(old_path))[0]
if candidate_id == image_id:
- return f''
+ return f''
return m.group(0)
return _md_pattern.sub(repl, md_text)
@@ -384,7 +386,7 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
image_counter += 1
- image_doc_id = f"{base_doc_id}_image_{image_counter}"
+ image_doc_id = f"{base_doc_id}_image_{image_counter}".lower()
# Replace file path with tg:// protocol reference in markdown
markdown_content = replace_path_with_tg_protocol(
@@ -408,6 +410,13 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None):
except Exception as img_error:
logger.warning(f"Failed to process image {img_ref.get('path')}: {img_error}")
+ failed_path = img_ref.get("path", "")
+ if failed_path:
+ markdown_content = re.sub(
+ r'!\[.*?\]\(' + re.escape(failed_path) + r'\)',
+ "",
+ markdown_content,
+ )
# FINAL CLEANUP — delete folder after processing everything
if image_output_folder.exists() and image_output_folder.is_dir():
@@ -463,8 +472,7 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None):
pil_image.save(buffer, format="JPEG", quality=95)
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
- image_id = f"{base_doc_id}_image_1"
- # Put description as text, then markdown image reference with short alt text
+ image_id = f"{base_doc_id}_image_1".lower()
content = f""
return [
{
diff --git a/common/utils/token_calculator.py b/common/utils/token_calculator.py
index 2157515..762e824 100644
--- a/common/utils/token_calculator.py
+++ b/common/utils/token_calculator.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/docs/tutorials/answer_question.py b/docs/tutorials/answer_question.py
index 5f494c2..d29bf65 100644
--- a/docs/tutorials/answer_question.py
+++ b/docs/tutorials/answer_question.py
@@ -27,7 +27,7 @@
query,
method="hybrid",
method_parameters = {
- "indices": ["Document", "DocumentChunk", "Entity", "Relationship"],
+ "indices": ["DocumentChunk", "Community"],
"top_k": 2,
"num_hops": 2,
"num_seen_min": 2,
diff --git a/ecc/app/eventual_consistency_checker.py b/ecc/app/eventual_consistency_checker.py
index dad7d7d..499bdc7 100644
--- a/ecc/app/eventual_consistency_checker.py
+++ b/ecc/app/eventual_consistency_checker.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -131,26 +131,6 @@ def _upsert_entities(self, src_id, src_type, entities):
for x in entities
],
)
- self.conn.upsertVertices(
- "Concept",
- [
- (
- x["type"],
- {
- "description": "",
- "concept_type": "EntityType",
- "epoch_added": date_added,
- },
- )
- for x in entities
- ],
- )
- self.conn.upsertEdges(
- "Concept",
- "DESCRIBES_ENTITY",
- "Entity",
- [(x["type"], x["id"], {}) for x in entities],
- )
self.conn.upsertEdges(
src_type,
"CONTAINS_ENTITY",
@@ -162,7 +142,7 @@ def _upsert_entities(self, src_id, src_type, entities):
def _upsert_rels(self, src_id, src_type, relationships):
date_added = int(time.time())
self.conn.upsertVertices(
- "Relationship",
+ "RelationshipType",
[
(
x["source"] + ":" + x["type"] + ":" + x["target"],
@@ -178,14 +158,14 @@ def _upsert_rels(self, src_id, src_type, relationships):
self.conn.upsertEdges(
"Entity",
"IS_HEAD_OF",
- "Relationship",
+ "RelationshipType",
[
(x["source"], x["source"] + ":" + x["type"] + ":" + x["target"], {})
for x in relationships
],
)
self.conn.upsertEdges(
- "Relationship",
+ "RelationshipType",
"HAS_TAIL",
"Entity",
[
@@ -196,7 +176,7 @@ def _upsert_rels(self, src_id, src_type, relationships):
self.conn.upsertEdges(
src_type,
"MENTIONS_RELATIONSHIP",
- "Relationship",
+ "RelationshipType",
[
(src_id, x["source"] + ":" + x["type"] + ":" + x["target"], {})
for x in relationships
diff --git a/ecc/app/graphrag/community_summarizer.py b/ecc/app/graphrag/community_summarizer.py
index 64c7053..0bab35b 100644
--- a/ecc/app/graphrag/community_summarizer.py
+++ b/ecc/app/graphrag/community_summarizer.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/ecc/app/graphrag/graph_rag.py b/ecc/app/graphrag/graph_rag.py
index cfbab3d..d702407 100644
--- a/ecc/app/graphrag/graph_rag.py
+++ b/ecc/app/graphrag/graph_rag.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -23,9 +23,7 @@
from aiochannel import Channel, ChannelClosed
from graphrag import workers
from graphrag.util import (
- check_all_ents_resolved,
check_vertex_has_desc,
- check_embedding_rebuilt,
http_timeout,
init,
load_q,
@@ -38,7 +36,7 @@
)
from pyTigerGraph import AsyncTigerGraphConnection
-from common.config import embedding_service, graphrag_config, entity_extraction_switch, entity_resolution_switch, community_detection_switch, doc_process_switch
+from common.config import embedding_service, graphrag_config, entity_extraction_switch, community_detection_switch, doc_process_switch
from common.embeddings.base_embedding_store import EmbeddingStore
from common.extractors.BaseExtractor import BaseExtractor
@@ -306,7 +304,7 @@ async def extract(
else:
if entity_extraction_switch:
grp.create_task(
- workers.extract(upsert_chan, embed_chan, extractor, conn, *item)
+ workers.extract(upsert_chan, extractor, conn, *item)
)
except ChannelClosed:
break
@@ -321,81 +319,11 @@ async def extract(
embed_chan.close()
-async def stream_entities(
- conn: AsyncTigerGraphConnection,
- entity_chan: Channel,
- ttl_batches: int = 50,
-):
- """
- Streams entity IDs from the grpah
- """
- logger.info("Entity Streaming Start")
- for i in range(ttl_batches):
- ids = await stream_ids(conn, "Entity", i, ttl_batches)
- if ids["error"]:
- logger.info(f"""Error streaming batch {i}: got {ids["error"]}""")
- # continue to the next batch.
- continue
-
- for i in ids["ids"]:
- if len(i) > 0:
- await entity_chan.put((i, "Entity"))
-
- logger.info("Entity Streaming End")
- # close the docs chan -- this function is the only sender
- logger.info("closing entities chan")
- entity_chan.close()
-
-
-async def resolve_entities(
- conn: AsyncTigerGraphConnection,
- emb_store: EmbeddingStore,
- entity_chan: Channel,
- upsert_chan: Channel,
-):
- """
- Merges entities into their ResolvedEntity form
- Groups what should be the same entity into a resolved entity (e.g. V_type and VType should be merged)
-
- Copies edges between entities to their respective ResolvedEntities
- """
- logger.info("Entity Resolving Start")
- async with asyncio.TaskGroup() as grp:
- # for every entity
- while True:
- try:
- entity_id = await entity_chan.get()
- grp.create_task(
- workers.resolve_entity(conn, upsert_chan, emb_store, entity_id)
- )
- logger.debug(f"Added Entity to resolve: {entity_id}")
- except ChannelClosed:
- break
- except Exception:
- raise
- logger.info("Entity Resolving End")
- logger.info("closing upsert_chan")
- upsert_chan.close()
- logger.info("resolve_entities done")
-
-async def resolve_relationships(
- conn: AsyncTigerGraphConnection
-):
- """
- Copy RELATIONSHIP edges to RESOLVED_RELATIONSHIP
- """
- logger.info("Relationship Resolving Start")
- async with tg_sem:
- res = await conn.runInstalledQuery(
- "ResolveRelationships"
- )
- logger.info("Relationship Resolving End")
-
async def communities(conn: AsyncTigerGraphConnection, comm_process_chan: Channel):
"""
Run louvain
"""
- # first pass: Group ResolvedEntities into Communities
+ # first pass: Group Entities into Communities
logger.info("Initializing Communities (first louvain pass)")
async with tg_sem:
@@ -423,7 +351,7 @@ async def communities(conn: AsyncTigerGraphConnection, comm_process_chan: Channe
logger.info(f"****mod pass 1: {mod}")
await stream_communities(conn, 1, comm_process_chan)
- # nth pass: Iterate on Resolved Entities until modularity stops increasing
+ # nth pass: Iterate on Communities until modularity stops increasing
prev_mod = -10
i = 0
while abs(prev_mod - mod) > 0.0000001 and prev_mod != 0:
@@ -527,10 +455,9 @@ async def run(graphname: str, conn: AsyncTigerGraphConnection):
- Process the documents into:
- chunks
- embeddings
- - entities/relationships (and their embeddings)
+ - entities/relationships
- upsert everything to the graph
- - Resolve Entities
- Ex: "Vincent van Gogh" and "van Gogh" should be resolved to "Vincent van Gogh"
+ - Detect communities and summarize them
"""
extractor, embedding_store = await init(conn)
@@ -586,40 +513,6 @@ async def run(graphname: str, conn: AsyncTigerGraphConnection):
logger.info("Type Processing End")
type_end = time.perf_counter()
- # Entity Resolution
- entity_start = time.perf_counter()
- if entity_resolution_switch:
- logger.info("Entity Processing Start")
- while not await check_embedding_rebuilt(conn, "Entity"):
- logger.info(f"Waiting for embedding to finish rebuilding")
- await asyncio.sleep(1)
- entities_chan = Channel()
- upsert_chan = Channel()
- load_q.reopen()
- async with asyncio.TaskGroup() as grp:
- grp.create_task(stream_entities(conn, entities_chan, 50))
- grp.create_task(
- resolve_entities(
- conn,
- embedding_store,
- entities_chan,
- upsert_chan,
- )
- )
- grp.create_task(upsert(upsert_chan))
- grp.create_task(load(conn))
- logger.info("Join entities_chan")
- await entities_chan.join()
- logger.info("Join upsert_chan")
- await upsert_chan.join()
- #Resolve relationsihps
- await resolve_relationships(conn)
- while not await check_all_ents_resolved(conn):
- logger.info(f"Waiting for resolved entites to finish loading")
- await asyncio.sleep(1)
- entity_end = time.perf_counter()
- logger.info("Entity Processing End")
-
# Community Detection
community_start = time.perf_counter()
if community_detection_switch:
@@ -648,10 +541,8 @@ async def run(graphname: str, conn: AsyncTigerGraphConnection):
community_end = time.perf_counter()
logger.info("Community Processing End")
- # Community Summarization
end = time.perf_counter()
logger.info(f"DONE. graphrag system initializer dT: {init_end-init_start}")
- logger.info(f"DONE. graphrag entity resolution dT: {entity_end-entity_start}")
logger.info(f"DONE. graphrag type creation dT: {type_end-type_start}")
logger.info(
f"DONE. graphrag community initializer dT: {community_end-community_start}"
diff --git a/ecc/app/graphrag/util.py b/ecc/app/graphrag/util.py
index 77e2860..f581057 100644
--- a/ecc/app/graphrag/util.py
+++ b/ecc/app/graphrag/util.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
import asyncio
import base64
+import json
import logging
import re
import traceback
@@ -39,7 +40,7 @@
http_timeout = httpx.Timeout(15.0)
-tg_sem = asyncio.Semaphore(2)
+tg_sem = asyncio.Semaphore(graphrag_config.get("tg_concurrency", 10))
load_q = reusable_channel.ReuseableChannel()
# will pause workers until the event is false
@@ -50,44 +51,72 @@ async def install_queries(
requried_queries: list[str],
conn: AsyncTigerGraphConnection,
):
- # queries that are currently installed
installed_queries = [q.split("/")[-1] for q in await conn.getEndpoints(dynamic=True) if f"/{conn.graphname}/" in q]
- # doesn't need to be parallel since tg only does it one at a time
+ required_names = set()
for q in requried_queries:
- # only install n queries at a time (n=n_workers)
q_name = q.split("/")[-1]
- # if the query is not installed, install it
+ required_names.add(q_name)
if q_name not in installed_queries:
res = await workers.install_query(conn, q, False)
- # stop system if a required query doesn't install
if res["error"]:
raise Exception(res["message"])
logger.info(f"Successfully created query '{q_name}'.")
- query = f"""\
-USE GRAPH {conn.graphname}
-INSTALL QUERY ALL
-"""
+
+ if required_names.issubset(set(installed_queries)):
+ logger.info("All required queries already installed, skipping INSTALL QUERY ALL.")
+ return
+
+ logger.info("Submitting INSTALL QUERY ALL ...")
+ query = f"USE GRAPH {conn.graphname}\nINSTALL QUERY ALL\n"
async with tg_sem:
res = await conn.gsql(query)
- if "error" in res:
+ logger.info(f"INSTALL QUERY ALL returned: {str(res)[:200]}")
+ if isinstance(res, str) and "error" in res.lower():
raise Exception(res)
- logger.info("Finished processing all required queries.")
+ max_wait = 600 # seconds
+ poll_interval = 10
+ elapsed = 0
+ while elapsed < max_wait:
+ ready = [
+ q.split("/")[-1]
+ for q in await conn.getEndpoints(dynamic=True)
+ if f"/{conn.graphname}/" in q
+ ]
+ missing = required_names - set(ready)
+ if not missing:
+ break
+ logger.info(
+ f"Waiting for query installation to finish "
+ f"({len(missing)} remaining: {', '.join(sorted(missing))})"
+ )
+ await asyncio.sleep(poll_interval)
+ elapsed += poll_interval
+ else:
+ raise Exception(
+ f"Query installation timed out after {max_wait}s. "
+ f"Still missing: {', '.join(sorted(missing))}"
+ )
+
+ logger.info("All required queries installed and verified.")
async def init(
conn: AsyncTigerGraphConnection,
) -> tuple[BaseExtractor, dict[str, EmbeddingStore]]:
+ """Initialize extractors and embedding store.
+
+ Returns:
+ (extractor, embedding_store)
+ """
# install requried queries
requried_queries = [
"common/gsql/graphrag/StreamIds",
"common/gsql/graphrag/StreamDocContent",
"common/gsql/graphrag/StreamChunkContent",
"common/gsql/graphrag/SetEpochProcessing",
- "common/gsql/graphrag/ResolveRelationships",
"common/gsql/graphrag/get_community_children",
- "common/gsql/graphrag/entities_have_resolution",
"common/gsql/graphrag/communities_have_desc",
"common/gsql/graphrag/get_vertices_or_remove",
"common/gsql/graphrag/louvain/graphrag_louvain_init",
@@ -280,20 +309,6 @@ async def get_commuinty_children(conn, i: int, c: str):
return descrs
-async def check_all_ents_resolved(conn):
- try:
- async with tg_sem:
- resp = await conn.runInstalledQuery(
- "entities_have_resolution"
- )
- except Exception as e:
- logger.error(f"Check Vert Desc err:\n{e}")
-
- res = resp[0]["all_resolved"]
- logger.info(resp)
-
- return res
-
async def add_rels_between_types(conn):
try:
async with tg_sem:
diff --git a/ecc/app/graphrag/workers.py b/ecc/app/graphrag/workers.py
index a5dbbca..8bcdd78 100644
--- a/ecc/app/graphrag/workers.py
+++ b/ecc/app/graphrag/workers.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
import logging
import time
import json
-import traceback
from urllib.parse import quote_plus
from typing import Iterable, List, Optional, Tuple
@@ -226,7 +225,6 @@ async def get_vert_desc(conn, v_id, node: Node):
async def extract(
upsert_chan: Channel,
- embed_chan: Channel,
extractor: BaseExtractor,
conn: AsyncTigerGraphConnection,
chunk: str,
@@ -256,14 +254,9 @@ async def extract(
continue
desc = await get_vert_desc(conn, v_id, node)
- # embed the entity
- # embed with the v_id if the description is blank
if len(desc[0]) == 0:
desc[0] = str(node.id)
- # (v_id, content, index_name)
- await embed_chan.put((v_id, desc[0], "Entity"))
-
await upsert_chan.put(
(
util.upsert_vertex, # func to call
@@ -357,10 +350,7 @@ async def extract(
continue
desc = await get_vert_desc(conn, v_id, edge.source)
if len(desc[0]) == 0:
- await embed_chan.put((v_id, v_id, "Entity"))
- else:
- # (v_id, content, index_name)
- await embed_chan.put((v_id, desc[0], "Entity"))
+ desc[0] = edge.source.id
await upsert_chan.put(
(
util.upsert_vertex, # func to call
@@ -380,10 +370,7 @@ async def extract(
continue
desc = await get_vert_desc(conn, v_id, edge.target)
if len(desc[0]) == 0:
- await embed_chan.put((v_id, v_id, "Entity"))
- else:
- # (v_id, content, index_name)
- await embed_chan.put((v_id, desc[0], "Entity"))
+ desc[0] = edge.target.id
await upsert_chan.put(
(
util.upsert_vertex, # func to call
@@ -414,96 +401,11 @@ async def extract(
),
)
)
- # embed "Relationship",
+ # embed "RelationshipType",
# (v_id, content, index_name)
# right now, we're not embedding relationships in graphrag
-resolve_sem = asyncio.Semaphore(20)
-
-
-async def resolve_entity(
- conn: AsyncTigerGraphConnection,
- upsert_chan: Channel,
- embed_store: EmbeddingStore,
- entity_id: str | Tuple[str, str],
-):
- """
- get all vectors of E (one name can have multiple discriptions)
- get ents close to E
- for e in ents:
- if e is 95% similar to E and edit_dist(E,e) <=3:
- merge
- mark e as processed
-
- mark as processed
- """
-
- # if loader is running, wait until it's done
- if not util.loading_event.is_set():
- logger.info("Entity Resolution worker waiting for loading event to finish")
- await util.loading_event.wait()
-
- async with resolve_sem:
- try:
- logger.info(f"Resolving Entity {entity_id}")
- results = await embed_store.aget_k_closest(entity_id)
- logger.info(f"Resolving Entity {entity_id} to {results}")
-
- except Exception:
- err = traceback.format_exc()
- logger.error(err)
- return
-
- if len(results) == 0:
- logger.error(
- f"aget_k_closest should, minimally, return the entity itself.\n{results}"
- )
- raise Exception()
-
- # merge all entities into the ResolvedEntity vertex
- # use the longest v_id as the resolved entity's v_id
- if isinstance(entity_id, tuple):
- resolved_entity_id = entity_id[0]
- else:
- resolved_entity_id = entity_id
- for v in results:
- if len(v) > len(resolved_entity_id):
- resolved_entity_id = v
-
- logger.debug(f"Merging {results} to ResolvedEntity {resolved_entity_id}")
- # upsert the resolved entity
- await upsert_chan.put(
- (
- util.upsert_vertex, # func to call
- (
- conn,
- "ResolvedEntity", # v_type
- resolved_entity_id, # v_id
- { # attrs
- },
- ),
- )
- )
-
- # create RESOLVES_TO edges from each entity to the ResolvedEntity
- for v in results:
- await upsert_chan.put(
- (
- util.upsert_edge,
- (
- conn,
- "Entity", # src_type
- v, # src_id
- "RESOLVES_TO", # edge_type
- "ResolvedEntity", # tgt_type
- resolved_entity_id, # tgt_id
- None, # attributes
- ),
- )
- )
-
-
comm_sem = asyncio.Semaphore(20)
diff --git a/ecc/app/main.py b/ecc/app/main.py
index 5f64981..5468391 100644
--- a/ecc/app/main.py
+++ b/ecc/app/main.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -99,7 +99,7 @@ def initialize_eventual_consistency_checker(
)
index_names = graphrag_config.get(
"indexes",
- ["Document", "DocumentChunk", "Entity", "Relationship", "Concept"],
+ ["DocumentChunk", "Community"],
)
if graphrag_config.get("extractor") == "llm":
diff --git a/ecc/app/supportai/supportai_init.py b/ecc/app/supportai/supportai_init.py
index 25dc78b..8d59a5b 100644
--- a/ecc/app/supportai/supportai_init.py
+++ b/ecc/app/supportai/supportai_init.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -168,7 +168,7 @@ async def extract(
async for item in extract_chan:
if entity_extraction_switch:
sp.create_task(
- workers.extract(upsert_chan, embed_chan, extractor, conn, *item)
+ workers.extract(upsert_chan, extractor, conn, *item)
)
logger.info(f"extract done")
diff --git a/ecc/app/supportai/util.py b/ecc/app/supportai/util.py
index e9c1f29..d3906ca 100644
--- a/ecc/app/supportai/util.py
+++ b/ecc/app/supportai/util.py
@@ -32,40 +32,64 @@ async def install_queries(
requried_queries: list[str],
conn: TigerGraphConnection,
):
- # queries that are currently installed
installed_queries = [q.split("/")[-1] for q in await conn.getEndpoints(dynamic=True) if f"/{conn.graphname}/" in q]
- # doesn't need to be parallel since tg only does it one at a time
+ required_names = set()
for q in requried_queries:
- # only install n queries at a time (n=n_workers)
q_name = q.split("/")[-1]
- # if the query is not installed, install it
+ required_names.add(q_name)
if q_name not in installed_queries:
- logger.info(f"Query '{q_name}' not found in installed queries. Attempting to install...")
+ logger.info(f"Query '{q_name}' not found in installed queries. Attempting to create...")
try:
res = await workers.install_query(conn, q, False)
- # stop system if a required query doesn't install
if res["error"]:
logger.error(f"Failed to create query '{q_name}'. Error: {res['message']}")
- raise Exception(f"Installation of query '{q_name}' failed with message: {res['message']}")
+ raise Exception(f"Creation of query '{q_name}' failed with message: {res['message']}")
else:
logger.info(f"Successfully created query '{q_name}'.")
-
except Exception as e:
- logger.critical(f"Critical error during installation of query '{q_name}': {e}")
+ logger.critical(f"Critical error during creation of query '{q_name}': {e}")
raise e
else:
logger.info(f"Query '{q_name}' is already installed.")
- query = f"""\
-USE GRAPH {conn.graphname}
-INSTALL QUERY ALL
-"""
+
+ if required_names.issubset(set(installed_queries)):
+ logger.info("All required queries already installed, skipping INSTALL QUERY ALL.")
+ return
+
+ logger.info("Submitting INSTALL QUERY ALL ...")
+ query = f"USE GRAPH {conn.graphname}\nINSTALL QUERY ALL\n"
async with tg_sem:
res = await conn.gsql(query)
- if "error" in res:
+ logger.info(f"INSTALL QUERY ALL returned: {str(res)[:200]}")
+ if isinstance(res, str) and "error" in res.lower():
raise Exception(res)
-
- logger.info("Finished processing all required queries.")
+
+ max_wait = 300 # seconds
+ poll_interval = 5
+ elapsed = 0
+ while elapsed < max_wait:
+ ready = [
+ q.split("/")[-1]
+ for q in await conn.getEndpoints(dynamic=True)
+ if f"/{conn.graphname}/" in q
+ ]
+ missing = required_names - set(ready)
+ if not missing:
+ break
+ logger.info(
+ f"Waiting for query installation to finish "
+ f"({len(missing)} remaining: {', '.join(sorted(missing))})"
+ )
+ await asyncio.sleep(poll_interval)
+ elapsed += poll_interval
+ else:
+ raise Exception(
+ f"Query installation timed out after {max_wait}s. "
+ f"Still missing: {', '.join(sorted(missing))}"
+ )
+
+ logger.info("All required queries installed and verified.")
diff --git a/ecc/app/supportai/workers.py b/ecc/app/supportai/workers.py
index 300bee4..30fc9ab 100644
--- a/ecc/app/supportai/workers.py
+++ b/ecc/app/supportai/workers.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2025 TigerGraph, Inc.
+# Copyright (c) 2024-2026 TigerGraph, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -177,7 +177,6 @@ async def get_vert_desc(conn, v_id, node: Node):
async def extract(
upsert_chan: Channel,
- embed_chan: Channel,
extractor: BaseExtractor,
conn: TigerGraphConnection,
chunk: str,
@@ -194,12 +193,8 @@ async def extract(
continue
desc = await get_vert_desc(conn, v_id, node)
- # embed the entity
- # embed with the v_id if the description is blank
- if len(desc[0]):
- await embed_chan.put((v_id, v_id, "Entity"))
- else:
- await embed_chan.put((v_id, desc[0], "Entity"))
+ if len(desc[0]) == 0:
+ desc[0] = str(node.id)
await upsert_chan.put(
(
@@ -238,15 +233,13 @@ async def extract(
v_id = edge.type
if len(v_id) == 0:
continue
- # embed "Relationship"
- await embed_chan.put((v_id, v_id, "Relationship"))
await upsert_chan.put(
(
util.upsert_vertex, # func to call
(
conn,
- "Relationship", # v_type
+ "RelationshipType", # v_type
v_id,
{ # attrs
"epoch_added": int(time.time()),
@@ -300,7 +293,7 @@ async def extract(
"Entity", # src_type
util.process_id(edge.source.id), # src_id
"IS_HEAD_OF", # edgeType
- "Relationship", # tgt_type
+ "RelationshipType", # tgt_type
edge.type, # tgt_id
),
)
@@ -310,7 +303,7 @@ async def extract(
util.upsert_edge,
(
conn,
- "Relationship", # src_type
+ "RelationshipType", # src_type
edge.type, # src_id
"HAS_TAIL", # edgeType
"Entity", # tgt_type
@@ -329,7 +322,7 @@ async def extract(
"DocumentChunk", # src_type
chunk_id, # src_id
"MENTIONS_RELATIONSHIP", # edge_type
- "Relationship", # tgt_type
+ "RelationshipType", # tgt_type
edge.type, # tgt_id
),
)
diff --git a/graphrag-ui/src/components/Bot.tsx b/graphrag-ui/src/components/Bot.tsx
index 3d31aca..1f4e4e6 100644
--- a/graphrag-ui/src/components/Bot.tsx
+++ b/graphrag-ui/src/components/Bot.tsx
@@ -38,11 +38,18 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo
// Initial load
const parseStore = loadStore();
- // Set default selectedGraph to first graph if no value in localStorage
- if (!localStorage.getItem("selectedGraph") && parseStore?.graphs?.length > 0) {
- const firstGraph = parseStore.graphs[0];
- setSelectedGraph(firstGraph);
- localStorage.setItem("selectedGraph", firstGraph);
+ // Validate selectedGraph against the current graph list
+ const storedGraph = localStorage.getItem("selectedGraph");
+ const availableGraphs = parseStore?.graphs || [];
+ if (!storedGraph || !availableGraphs.includes(storedGraph)) {
+ if (availableGraphs.length > 0) {
+ const firstGraph = availableGraphs[0];
+ setSelectedGraph(firstGraph);
+ localStorage.setItem("selectedGraph", firstGraph);
+ } else {
+ setSelectedGraph('');
+ localStorage.removeItem("selectedGraph");
+ }
}
// Set default ragPattern if no value in localStorage
@@ -128,7 +135,7 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo
className="!h-[48px] !outline-b !outline-gray-300 dark:!outline-[#3D3D3D] h-[70px] flex justify-end items-center bg-white dark:bg-background z-50 rounded-tr-lg"
>
- {selectedGraph}
+ {selectedGraph || No Knowledge Graph}
@@ -136,11 +143,19 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo
Select a KnowledgeGraph
- {store?.graphs.map((f, i) => (
- handleSelect(f)}>
- {f}
+ {store?.graphs?.length > 0 ? (
+ store.graphs.map((f, i) => (
+ handleSelect(f)}>
+ {f}
+
+ ))
+ ) : (
+
+
+ Please create a Knowledge Graph in Setup first
+
- ))}
+ )}
diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx
index 0523cc6..3ec977d 100644
--- a/graphrag-ui/src/pages/Setup.tsx
+++ b/graphrag-ui/src/pages/Setup.tsx
@@ -1,4 +1,4 @@
-import React, { useState, useEffect } from "react";
+import React, { useState, useEffect, useRef } from "react";
import { useNavigate } from "react-router-dom";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
@@ -54,6 +54,7 @@ const Setup = () => {
const [uploadedFiles, setUploadedFiles] = useState([]);
const [isUploading, setIsUploading] = useState(false);
const [uploadMessage, setUploadMessage] = useState("");
+ const [isProcessingFiles, setIsProcessingFiles] = useState(false);
const [isIngesting, setIsIngesting] = useState(false);
const [ingestMessage, setIngestMessage] = useState("");
// Ingestion job data state
@@ -66,6 +67,7 @@ const Setup = () => {
const [refreshMessage, setRefreshMessage] = useState("");
const [refreshGraphName, setRefreshGraphName] = useState("");
const [isRebuildRunning, setIsRebuildRunning] = useState(false);
+ const isRebuildRunningRef = useRef(false);
const [isCheckingStatus, setIsCheckingStatus] = useState(false);
// S3 state
@@ -174,10 +176,11 @@ const [activeTab, setActiveTab] = useState("upload");
// Step 2: Call create_ingest to process uploaded files in background
console.log("Calling handleCreateIngestAfterUpload from main upload...");
+ setIsProcessingFiles(true);
handleCreateIngestAfterUpload("uploaded", uploadedCount).catch((err) => {
console.error("Error in background processing:", err);
setUploadMessage(`❌ Processing error: ${err.message}`);
- });
+ }).finally(() => setIsProcessingFiles(false));
} else {
setUploadMessage(`⚠️ ${data.message}`);
setIsUploading(false);
@@ -249,8 +252,13 @@ const [activeTab, setActiveTab] = useState("upload");
// Step 2: Call create_ingest to process uploaded files
console.log("Calling handleCreateIngestAfterUpload...");
- await handleCreateIngestAfterUpload("uploaded", uploadedCount);
- console.log("handleCreateIngestAfterUpload completed");
+ setIsProcessingFiles(true);
+ try {
+ await handleCreateIngestAfterUpload("uploaded", uploadedCount);
+ console.log("handleCreateIngestAfterUpload completed");
+ } finally {
+ setIsProcessingFiles(false);
+ }
} catch (error: any) {
console.error("Upload error:", error);
setUploadMessage(`❌ Batch upload error: ${error.message}`);
@@ -404,10 +412,11 @@ const [activeTab, setActiveTab] = useState("upload");
await fetchDownloadedFiles();
setIsDownloading(false);
// Step 2: Call create_ingest to process downloaded files in background
+ setIsProcessingFiles(true);
handleCreateIngestAfterUpload("downloaded", downloadCount).catch((err) => {
console.error("Error in background processing:", err);
setDownloadMessage(`❌ Processing error: ${err.message}`);
- });
+ }).finally(() => setIsProcessingFiles(false));
} else if (data.status === "warning") {
setDownloadMessage(`⚠️ ${data.message}`);
setIsDownloading(false);
@@ -860,14 +869,12 @@ const [activeTab, setActiveTab] = useState("upload");
if (statusResponse.ok) {
const statusData = await statusResponse.json();
- const wasRunning = isRebuildRunning;
+ const wasRunning = isRebuildRunningRef.current;
const isCurrentlyRunning = statusData.is_running || false;
setIsRebuildRunning(isCurrentlyRunning);
+ isRebuildRunningRef.current = isCurrentlyRunning;
- if (statusData.status === "error" || statusData.status === "unknown") {
- return;
- }
if (isCurrentlyRunning) {
const startTime = statusData.started_at ? new Date(statusData.started_at * 1000).toLocaleString() : "unknown time";
setRefreshMessage(`⚠️ A rebuild is already in progress for "${graphName}" (started at ${startTime}). Please wait for it to complete.`);
@@ -875,14 +882,21 @@ const [activeTab, setActiveTab] = useState("upload");
setRefreshMessage(`✅ Rebuild completed successfully for "${graphName}".`);
} else if (statusData.status === "failed") {
setRefreshMessage(`❌ Previous rebuild failed: ${statusData.error || "Unknown error"}`);
- } else if (statusData.status === "idle") {
+ } else if (statusData.status === "error") {
+ setRefreshMessage(`❌ Failed to check rebuild status: ${statusData.error || "Unknown error"}`);
+ } else if (statusData.status === "unknown") {
+ setRefreshMessage(`⚠️ ECC service returned unknown status. It may be unavailable.`);
+ } else {
setRefreshMessage("");
}
+ } else {
+ setRefreshMessage(`❌ Failed to check rebuild status (HTTP ${statusResponse.status}).`);
}
} catch (error: any) {
console.error("Error checking rebuild status:", error);
- // On error, don't change state - keep existing message
- // Don't clear existing messages on error
+ if (showLoadingMessage) {
+ setRefreshMessage(`❌ Unable to reach ECC service: ${error.message || "Connection failed"}`);
+ }
} finally {
setIsCheckingStatus(false);
}
@@ -901,17 +915,18 @@ const [activeTab, setActiveTab] = useState("upload");
return;
}
+ setIsRefreshing(true);
+
// Ask user to confirm before proceeding with refresh
const shouldRefresh = await confirm(
`Are you sure you want to refresh the knowledge graph "${refreshGraphName}"? This will rebuild the graph content.`
);
if (!shouldRefresh) {
setRefreshMessage("Operation cancelled by user.");
+ setIsRefreshing(false);
return;
}
- // Check status one final time RIGHT before submitting (to catch any race conditions)
- setIsRefreshing(true);
setRefreshMessage("Verifying rebuild status...");
try {
@@ -930,6 +945,7 @@ const [activeTab, setActiveTab] = useState("upload");
if (statusData.is_running) {
setRefreshMessage(`⚠️ A rebuild is already in progress for "${refreshGraphName}". Please wait for it to complete.`);
setIsRebuildRunning(true);
+ isRebuildRunningRef.current = true;
setIsRefreshing(false);
return;
}
@@ -960,6 +976,7 @@ const [activeTab, setActiveTab] = useState("upload");
setRefreshMessage(`✅ Refresh submitted successfully! The knowledge graph "${refreshGraphName}" is being rebuilt.`);
setIsRebuildRunning(true);
+ isRebuildRunningRef.current = true;
} catch (error: any) {
console.error("Error refreshing graph:", error);
setRefreshMessage(`❌ Error: ${error.message}`);
@@ -1441,19 +1458,6 @@ const [activeTab, setActiveTab] = useState("upload");
)}
- {/* Ingestion Status Message */}
- {ingestMessage && (
-
- {ingestMessage}
-
- )}
-
{/* Uploaded Files List */}
{uploadedFiles.length > 0 && (
@@ -1494,7 +1498,7 @@ const [activeTab, setActiveTab] = useState("upload");
+ {ingestMessage && (
+
+ {ingestMessage}
+
+ )}
)}
@@ -1798,7 +1818,7 @@ const [activeTab, setActiveTab] = useState("upload");