From ba5f7c20012f12fd2cc7df8cbe1a490aeab9ef59 Mon Sep 17 00:00:00 2001 From: Chengbiao Jin Date: Fri, 27 Feb 2026 20:37:48 -0800 Subject: [PATCH] Bug fixes --- LICENSE | 2 +- common/chunkers/html_chunker.py | 2 +- common/chunkers/markdown_chunker.py | 2 +- common/chunkers/recursive_chunker.py | 2 +- common/chunkers/semantic_chunker.py | 2 +- common/config.py | 63 +++++-- common/db/connections.py | 2 +- .../embeddings/tigergraph_embedding_store.py | 154 ++++++++++++++++-- .../LLMEntityRelationshipExtractor.py | 2 +- .../Build_Community_Concepts.gsql | 47 ------ .../concept_creation/Build_Concept_Tree.gsql | 34 ---- .../Build_Entity_Concepts.gsql | 28 ---- .../Build_Relationship_Concepts.gsql | 25 --- ...EntityRelationshipConceptCooccurrence.gsql | 140 ---------------- .../gsql/graphrag/ResolveRelationships.gsql | 26 --- common/gsql/graphrag/StreamDocIds.gsql | 16 -- .../graphrag/entities_have_resolution.gsql | 10 -- .../gsql/graphrag/get_community_children.gsql | 2 +- .../louvain/graphrag_louvain_init.gsql | 28 ++-- common/gsql/graphrag/louvain/modularity.gsql | 6 +- common/gsql/supportai/Scan_For_Updates.gsql | 6 +- .../gsql/supportai/Selected_Set_Display.gsql | 2 +- .../supportai/SupportAI_IndexCreation.gsql | 3 - common/gsql/supportai/SupportAI_Schema.gsql | 22 +-- .../supportai/SupportAI_Schema_Images.gsql | 2 +- .../SupportAI_Schema_Native_Vector.gsql | 11 +- .../retrievers/Chunk_Sibling_Search.gsql | 2 +- .../Chunk_Sibling_Vector_Search.gsql | 2 +- .../retrievers/Content_Similarity_Search.gsql | 4 +- .../Content_Similarity_Vector_Search.gsql | 4 +- .../Entity_Relationship_Retrieval.gsql | 4 +- .../retrievers/GraphRAG_Community_Search.gsql | 6 +- .../GraphRAG_Community_Search_Display.gsql | 6 +- .../GraphRAG_Community_Vector_Search.gsql | 8 +- .../retrievers/GraphRAG_Hybrid_Search.gsql | 4 +- .../GraphRAG_Hybrid_Search_Display.gsql | 4 +- .../GraphRAG_Hybrid_Vector_Search.gsql | 4 +- .../supportai/retrievers/Keyword_Search.gsql | 2 +- common/llm_services/aws_bedrock_service.py | 2 +- common/llm_services/base_llm.py | 16 +- common/llm_services/google_genai_service.py | 2 +- common/llm_services/openai_service.py | 2 +- common/py_schemas/schemas.py | 2 +- common/py_schemas/tool_io_schemas.py | 2 +- common/utils/image_data_extractor.py | 15 +- common/utils/text_extractors.py | 16 +- common/utils/token_calculator.py | 2 +- docs/tutorials/answer_question.py | 2 +- ecc/app/eventual_consistency_checker.py | 30 +--- ecc/app/graphrag/community_summarizer.py | 2 +- ecc/app/graphrag/graph_rag.py | 123 +------------- ecc/app/graphrag/util.py | 73 +++++---- ecc/app/graphrag/workers.py | 106 +----------- ecc/app/main.py | 4 +- ecc/app/supportai/supportai_init.py | 4 +- ecc/app/supportai/util.py | 56 +++++-- ecc/app/supportai/workers.py | 21 +-- graphrag-ui/src/components/Bot.tsx | 35 ++-- graphrag-ui/src/pages/Setup.tsx | 83 ++++++---- graphrag/app/agent.py | 2 +- graphrag/app/agent/agent_generation.py | 2 +- graphrag/app/agent/agent_graph.py | 109 ++++++++++++- graphrag/app/agent/agent_router.py | 2 +- graphrag/app/main.py | 2 +- graphrag/app/routers/inquiryai.py | 20 ++- graphrag/app/routers/supportai.py | 25 +-- graphrag/app/routers/ui.py | 10 +- .../concept_management/create_concepts.py | 103 ------------ graphrag/app/supportai/supportai.py | 4 +- graphrag/app/supportai/supportai_ingest.py | 88 +--------- graphrag/app/tools/generate_cypher.py | 2 +- graphrag/app/tools/generate_function.py | 2 +- graphrag/app/tools/generate_gsql.py | 2 +- graphrag/app/tools/map_question_to_schema.py | 2 +- graphrag/tests/test_supportai.py | 6 - 75 files changed, 620 insertions(+), 1048 deletions(-) delete mode 100644 common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql delete mode 100644 common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql delete mode 100644 common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql delete mode 100644 common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql delete mode 100644 common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql delete mode 100644 common/gsql/graphrag/ResolveRelationships.gsql delete mode 100644 common/gsql/graphrag/StreamDocIds.gsql delete mode 100644 common/gsql/graphrag/entities_have_resolution.gsql delete mode 100644 graphrag/app/supportai/concept_management/create_concepts.py diff --git a/LICENSE b/LICENSE index b1d4446..8c75c7b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,4 @@ - Copyright (c) 2025 TigerGraph, Inc. + Copyright (c) 2024-2026 TigerGraph, Inc. Apache License Version 2.0, January 2004 diff --git a/common/chunkers/html_chunker.py b/common/chunkers/html_chunker.py index ba84666..e598605 100644 --- a/common/chunkers/html_chunker.py +++ b/common/chunkers/html_chunker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/chunkers/markdown_chunker.py b/common/chunkers/markdown_chunker.py index 7799399..87481e7 100644 --- a/common/chunkers/markdown_chunker.py +++ b/common/chunkers/markdown_chunker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/chunkers/recursive_chunker.py b/common/chunkers/recursive_chunker.py index 6b262e1..8fd3bc5 100644 --- a/common/chunkers/recursive_chunker.py +++ b/common/chunkers/recursive_chunker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/chunkers/semantic_chunker.py b/common/chunkers/semantic_chunker.py index 6ba0474..00d34be 100644 --- a/common/chunkers/semantic_chunker.py +++ b/common/chunkers/semantic_chunker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/config.py b/common/config.py index 703d3f8..18a4288 100644 --- a/common/config.py +++ b/common/config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -86,9 +86,17 @@ completion_config = llm_config.get("completion_service") if completion_config is None: raise Exception("completion_service is not found in llm_config") +if "llm_service" not in completion_config: + raise Exception("llm_service is not found in completion_service") +if "llm_model" not in completion_config: + raise Exception("llm_model is not found in completion_service") embedding_config = llm_config.get("embedding_service") if embedding_config is None: raise Exception("embedding_service is not found in llm_config") +if "embedding_model_service" not in embedding_config: + raise Exception("embedding_model_service is not found in embedding_service") +if "model_name" not in embedding_config: + raise Exception("model_name is not found in embedding_service") embedding_dimension = embedding_config.get("dimensions", 1536) # Get context window size from llm_config @@ -144,8 +152,7 @@ reuse_embedding = graphrag_config.get("reuse_embedding", True) doc_process_switch = graphrag_config.get("doc_process_switch", True) entity_extraction_switch = graphrag_config.get("entity_extraction_switch", doc_process_switch) -entity_resolution_switch = graphrag_config.get("entity_resolution_switch", entity_extraction_switch) -community_detection_switch = graphrag_config.get("community_detection_switch", entity_resolution_switch) +community_detection_switch = graphrag_config.get("community_detection_switch", entity_extraction_switch) if "model_name" not in llm_config or "model_name" not in llm_config["embedding_service"]: if "model_name" not in llm_config: @@ -192,24 +199,41 @@ def get_llm_service(llm_config) -> LLM_Model: else: raise Exception("LLM Completion Service Not Supported") +DEFAULT_MULTIMODAL_MODELS = { + "openai": "gpt-4o-mini", + "azure": "gpt-4o-mini", + "genai": "gemini-3.5-flash", + "vertexai": "gemini-3.5-flash", + "bedrock": "us.anthropic.claude-sonnet-4-5-20250929-v1:0", +} + def get_multimodal_service() -> LLM_Model: """ Get the multimodal/vision LLM service for image description tasks. - Uses multimodal_service if configured, otherwise falls back to completion_service. - Currently supports: OpenAI, Azure, GenAI, VertexAI + Priority: + 1. Explicit multimodal_service config + 2. Auto-derived from completion_service with a default vision model + Currently supports: OpenAI, Azure, GenAI, VertexAI, Bedrock """ - # Use multimodal_service if available, otherwise fallback to completion_service - service_config = multimodal_config if multimodal_config else completion_config - - # Make a copy to avoid modifying the original config - config_copy = service_config.copy() - - # Add default prompt_path if not present (required by LLM service classes but not used for multimodal) + config_copy = completion_config.copy() + + if multimodal_config: + config_copy.update(multimodal_config) + + service_type = config_copy.get("llm_service", "").lower() + + if not multimodal_config or "llm_model" not in multimodal_config: + default_model = DEFAULT_MULTIMODAL_MODELS.get(service_type) + if default_model: + config_copy["llm_model"] = default_model + LogWriter.info( + f"Using default vision model '{default_model}' " + f"for provider '{service_type}'" + ) + if "prompt_path" not in config_copy: config_copy["prompt_path"] = "./common/prompts/openai_gpt4/" - - service_type = config_copy["llm_service"].lower() - + if service_type == "openai": return OpenAI(config_copy) elif service_type == "azure": @@ -218,11 +242,14 @@ def get_multimodal_service() -> LLM_Model: return GoogleGenAI(config_copy) elif service_type == "vertexai": return GoogleVertexAI(config_copy) + elif service_type == "bedrock": + return AWSBedrock(config_copy) else: - raise Exception( - f"Multimodal service '{service_type}' not supported. " - "Only OpenAI, Azure, GenAI, and VertexAI are currently supported for vision tasks." + LogWriter.warning( + f"Multimodal/vision not supported for provider '{service_type}'. " + "Image descriptions will be skipped." ) + return None if os.getenv("INIT_EMBED_STORE", "true") == "true": conn = TigerGraphConnection( diff --git a/common/db/connections.py b/common/db/connections.py index 746669b..4cf21d9 100644 --- a/common/db/connections.py +++ b/common/db/connections.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/embeddings/tigergraph_embedding_store.py b/common/embeddings/tigergraph_embedding_store.py index 41c5d2a..748f166 100644 --- a/common/embeddings/tigergraph_embedding_store.py +++ b/common/embeddings/tigergraph_embedding_store.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,10 +39,13 @@ def __init__( self, conn: TigerGraphConnection, embedding_service: EmbeddingModel, - support_ai_instance: bool = False + support_ai_instance: bool = False, + default_vector_attribute: str = "embedding", ): self.embedding_service = embedding_service self.support_ai_instance = support_ai_instance + self.default_vector_attribute = default_vector_attribute + self.vector_attr_cache = {} if isinstance(conn.apiToken, tuple): token = conn.apiToken[0] @@ -73,7 +76,7 @@ def __init__( logger.info(f"Done installing GDS library with status {q_res}") if self.conn.graphname and not self.conn.graphname == "MyGraph": current_schema = self.conn.gsql(f"USE GRAPH {self.conn.graphname}\n ls") - if "- embedding(Dimension=" in current_schema: + if "(Dimension=" in current_schema: self.install_vector_queries() logger.info(f"TigerGraph embedding store is initialized with graph {self.conn.graphname}") else: @@ -96,31 +99,31 @@ def install_vector_queries(self): with open(f"common/gsql/vector/{q_name}.gsql", "r") as f: q_body = f.read() q_res = self.conn.gsql( - """USE GRAPH {}\nBEGIN\n{}\nEND\ninstall query {}\n""".format( - self.conn.graphname, q_body, q_name + """USE GRAPH {}\nBEGIN\n{}\nEND\n""".format( + self.conn.graphname, q_body ) ) need_install = True logger.info(f"Done creating vector query {q_name} with status {q_res}") - #TBD if need_install: - logger.info(f"Installing supportai queries all together") + logger.info(f"Installing vector queries all together") query_res = self.conn.gsql( """USE GRAPH {}\nINSTALL QUERY ALL\n""".format( self.conn.graphname ) ) - logger.info(f"Done installing supportai query all with status {query_res}") + logger.info(f"Done installing vector queries with status {query_res}") else: - logger.info(f"Query installation is not needed for supportai") + logger.info(f"All vector queries already installed, skipping.") def set_graphname(self, graphname): self.conn.graphname = graphname + self.vector_attr_cache = {} if self.conn.apiToken or self.conn.jwtToken: self.conn.getToken() if self.conn.graphname and not self.conn.graphname == "MyGraph": current_schema = self.conn.gsql(f"USE GRAPH {self.conn.graphname}\n ls") - if "- embedding(Dimension=" in current_schema: + if "(Dimension=" in current_schema: self.install_vector_queries() def set_connection(self, conn): @@ -143,10 +146,63 @@ def set_connection(self, conn): apiToken = token, ) + self.vector_attr_cache = {} self.install_vector_queries() + def refreshvector_attr_cache(self): + """Parse the graph schema to discover which vertex types have which + vector attributes. Populates ``self.vector_attr_cache`` as + ``{vertex_type: {attr_name, ...}, ...}``. + + The ``ls`` output contains a section like:: + + Vector Embeddings: + - Person: + - embedding(Dimension=1536, ...) + - DocumentChunk: + - embedding(Dimension=1536, ...) + - summary_vec(Dimension=768, ...) + """ + self.vector_attr_cache = {} + try: + schema = self.conn.gsql( + f"USE GRAPH {self.conn.graphname}\n ls" + ) + in_vector_section = False + current_vtype = None + for line in schema.splitlines(): + stripped = line.strip() + if stripped.startswith("Vector Embeddings:"): + in_vector_section = True + continue + if in_vector_section: + if stripped == "" or ( + not stripped.startswith("-") and ":" not in stripped + ): + break + if stripped.startswith("- ") and stripped.endswith(":"): + current_vtype = stripped[2:-1].strip() + self.vector_attr_cache.setdefault(current_vtype, set()) + elif current_vtype and "(" in stripped: + attr_name = stripped.lstrip("- ").split("(")[0].strip() + if attr_name: + self.vector_attr_cache[current_vtype].add(attr_name) + except Exception as e: + logger.warning(f"Failed to refresh vector attribute cache: {e}") + + def has_vector_attribute(self, vertex_type: str, vector_attribute: str) -> bool: + """Check whether *vertex_type* has a vector attribute named + *vector_attribute* according to the cached schema. The cache is + refreshed automatically on the first call or after + ``set_graphname`` / ``set_connection``.""" + if not self.vector_attr_cache: + self.refreshvector_attr_cache() + attrs = self.vector_attr_cache.get(vertex_type) + if attrs is None: + return False + return vector_attribute in attrs + def map_attrs(self, attributes: Iterable[Tuple[str, List[float]]]): - # map attrs attrs = {} for (k, v) in attributes: attrs[k] = {"value": v} @@ -163,6 +219,7 @@ def add_embeddings( embeddings (Iterable[Tuple[str, List[float]]]): Iterable of content and embedding of the document. """ + batch = None try: LogWriter.info( f"request_id={req_id_cv.get()} TigerGraph ENTRY add_embeddings()" @@ -173,15 +230,34 @@ def add_embeddings( "vertices": defaultdict(dict[str, any]), } + skipped = [] + vec_attrs_used = set() for i, (text, _) in enumerate(embeddings): (v_id, v_type) = metadatas[i].get("vertex_id") + vec_attr = metadatas[i].get( + "vector_attribute", self.default_vector_attribute + ) + if not self.has_vector_attribute(v_type, vec_attr): + if (v_type, vec_attr) not in skipped: + LogWriter.warning( + f"Skipping vertex type '{v_type}': " + f"no vector attribute '{vec_attr}' in schema." + ) + skipped.append((v_type, vec_attr)) + continue + vec_attrs_used.add(vec_attr) try: embedding = self.embedding_service.embed_query(text) except Exception as e: LogWriter.error(f"Failed to embed {v_id}: {e}") return - attr = self.map_attrs([("embedding", embedding)]) + attr = self.map_attrs([(vec_attr, embedding)]) batch["vertices"][v_type][v_id] = attr + + if not any(batch["vertices"].values()): + LogWriter.warning("No embeddings to upsert after vector attribute checks.") + return + data = json.dumps(batch) added = self.conn.upsertData(data) @@ -189,7 +265,6 @@ def add_embeddings( LogWriter.info(f"request_id={req_id_cv.get()} TigerGraph EXIT add_embeddings()") - # Check if registration was successful if added: success_message = f"Document registered with id: {added[0]}" LogWriter.info(success_message) @@ -200,7 +275,14 @@ def add_embeddings( raise Exception(error_message) except Exception as e: - error_message = f"An error occurred while registering document: {str(e)}" + v_types = list(batch["vertices"].keys()) if batch else "unknown" + vec_names = vec_attrs_used if vec_attrs_used else {self.default_vector_attribute} + error_message = ( + f"An error occurred while registering document: {str(e)}. " + f"Vertex type(s) in batch: {v_types}. " + f"Vector attribute(s) used: {vec_names}. " + f"Ensure these vertex types have the expected vector attribute." + ) LogWriter.error(error_message) async def aadd_embeddings( @@ -214,6 +296,7 @@ async def aadd_embeddings( embeddings (Iterable[Tuple[str, List[float]]]): Iterable of content and embedding of the document. """ + batch = None try: LogWriter.info( f"request_id={req_id_cv.get()} TigerGraph ENTRY aadd_embeddings()" @@ -224,15 +307,34 @@ async def aadd_embeddings( "vertices": defaultdict(dict[str, any]), } + skipped = [] + vec_attrs_used = set() for i, (text, _) in enumerate(embeddings): (v_id, v_type) = metadatas[i].get("vertex_id") + vec_attr = metadatas[i].get( + "vector_attribute", self.default_vector_attribute + ) + if not self.has_vector_attribute(v_type, vec_attr): + if (v_type, vec_attr) not in skipped: + LogWriter.warning( + f"Skipping vertex type '{v_type}': " + f"no vector attribute '{vec_attr}' in schema." + ) + skipped.append((v_type, vec_attr)) + continue + vec_attrs_used.add(vec_attr) try: embedding = await self.embedding_service.aembed_query(text) except Exception as e: LogWriter.error(f"Failed to embed {v_id}: {e}") return - attr = self.map_attrs([("embedding", embedding)]) + attr = self.map_attrs([(vec_attr, embedding)]) batch["vertices"][v_type][v_id] = attr + + if not any(batch["vertices"].values()): + LogWriter.warning("No embeddings to upsert after vector attribute checks.") + return + data = json.dumps(batch) added = self.conn.upsertData(data) @@ -240,7 +342,6 @@ async def aadd_embeddings( LogWriter.info(f"request_id={req_id_cv.get()} TigerGraph EXIT aadd_embeddings()") - # Check if registration was successful if added: success_message = f"Document {metadatas} registered with status: {added}" LogWriter.info(success_message) @@ -251,7 +352,14 @@ async def aadd_embeddings( raise Exception(error_message) except Exception as e: - error_message = f"An error occurred while registering document: {str(e)}" + v_types = list(batch["vertices"].keys()) if batch else "unknown" + vec_names = vec_attrs_used if vec_attrs_used else {self.default_vector_attribute} + error_message = ( + f"An error occurred while registering document: {str(e)}. " + f"Vertex type(s) in batch: {v_types}. " + f"Vector attribute(s) used: {vec_names}. " + f"Ensure these vertex types have the expected vector attribute." + ) LogWriter.error(error_message) def has_embeddings( @@ -261,6 +369,12 @@ def has_embeddings( ret = True try: for (v_id, v_type) in v_ids: + if not self.has_vector_attribute(v_type, self.default_vector_attribute): + logger.info( + f"Vertex type '{v_type}' has no vector attribute " + f"'{self.default_vector_attribute}', treating as no embedding." + ) + return False res = self.conn.runInstalledQuery( "check_embedding_exists", params={ @@ -285,6 +399,12 @@ def check_embedding_rebuilt( self, v_type: str ): + if not self.has_vector_attribute(v_type, self.default_vector_attribute): + logger.info( + f"Vertex type '{v_type}' has no vector attribute " + f"'{self.default_vector_attribute}', skipping rebuild check." + ) + return False try: res = self.conn.runInstalledQuery( "vertices_have_embedding", diff --git a/common/extractors/LLMEntityRelationshipExtractor.py b/common/extractors/LLMEntityRelationshipExtractor.py index 1251ce9..b81a769 100644 --- a/common/extractors/LLMEntityRelationshipExtractor.py +++ b/common/extractors/LLMEntityRelationshipExtractor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql deleted file mode 100644 index b302624..0000000 --- a/common/gsql/concept_curation/concept_creation/Build_Community_Concepts.gsql +++ /dev/null @@ -1,47 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY Build_Community_Concepts (SET v_type_set, SET e_type_set, INT min_comm_size, INT max_comm_size) SYNTAX V1 { -MinAccum @min_cc_id = 0; //each vertex's tentative component id -MapAccum @@comp_sizes_map; -MapAccum> @@comp_group_by_size_map; -SumAccum @@num_concepts_created; - -Start = {v_type_set}; - -# Initialize: Label each vertex with its own internal ID -S = SELECT x - FROM Start:x - POST-ACCUM x.@min_cc_id = getvid(x); - -# Propagate smaller internal IDs until no more ID changes can be Done -WHILE (S.size()>0) DO - S = SELECT t - FROM S:s -(e_type_set:e)- v_type_set:t - ACCUM t.@min_cc_id += s.@min_cc_id // If s has smaller id than t, copy the id to t - HAVING t.@min_cc_id != t.@min_cc_id'; -END; - -Start = {v_type_set}; -Start = SELECT s - FROM Start:s - POST-ACCUM - @@comp_sizes_map += (s.@min_cc_id -> 1); - -FOREACH (compId,size) IN @@comp_sizes_map DO - IF size >= min_comm_size AND size <= max_comm_size THEN - @@num_concepts_created += 1; - v = SELECT s FROM Start:s WHERE s.@min_cc_id == compId - POST-ACCUM - IF s.type == "Relationship" THEN - INSERT INTO Concept VALUES("InCommunity"+to_string(compId), _, _, "COMMUNITY_LEAF", now(), FALSE), - INSERT INTO DESCRIBES_RELATIONSHIP VALUES("InCommunity"+to_string(compId), s) - ELSE IF s.type == "Entity" THEN - INSERT INTO Concept VALUES("InCommunity"+to_string(compId), _, _, "COMMUNITY_LEAF", now(), FALSE), - INSERT INTO DESCRIBES_ENTITY VALUES("InCommunity"+to_string(compId), s) - ELSE IF s.type == "Concept" THEN - INSERT INTO Concept VALUES("InCommunity"+to_string(compId), _, _, "AND", now(), FALSE), - INSERT INTO IS_CHILD_OF VALUES(s, "InCommunity"+to_string(compId)) - END; - END; -END; - -PRINT @@num_concepts_created; -} \ No newline at end of file diff --git a/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql b/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql deleted file mode 100644 index 28ab737..0000000 --- a/common/gsql/concept_curation/concept_creation/Build_Concept_Tree.gsql +++ /dev/null @@ -1,34 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY Build_Concept_Tree(INT min_cooccurence=10) { - MapAccum> @concept_cooccurence; - SetAccum> @@unmerged_concepts; - SetAccum @source_concepts; - UINT cooccurences; - concepts = {Concept.*}; - - concepts_no_parents = SELECT c FROM concepts:c WHERE c.outdegree("IS_CHILD_OF") == 0 POST-ACCUM @@unmerged_concepts += c; - - res = SELECT c FROM concepts_no_parents:c - POST-ACCUM - FOREACH cpt IN @@unmerged_concepts DO - IF c.concept_type == cpt.concept_type AND c != cpt THEN - c.@concept_cooccurence += (cpt.id -> getEntityRelationshipConceptCooccurrence(c, cpt)) - END - END; - - res = SELECT c FROM res:c - POST-ACCUM - FOREACH (conc, cnt) IN c.@concept_cooccurence DO - IF cnt >= min_cooccurence THEN - IF c.id <= conc THEN - INSERT INTO Concept VALUES (c.id+"_AND_"+conc, _, _, "AND", now(), FALSE), - INSERT INTO IS_CHILD_OF VALUES (c, c.id+"_AND_"+conc), - INSERT INTO IS_CHILD_OF VALUES (conc, c.id+"_AND_"+conc) - ELSE - INSERT INTO Concept VALUES (conc+"_AND_"+c.id, _, _, "AND", now(), FALSE), - INSERT INTO IS_CHILD_OF VALUES (c, conc+"_AND_"+c.id), - INSERT INTO IS_CHILD_OF VALUES (conc, conc+"_AND_"+c.id) - END - END - END; - PRINT res.size(); -} \ No newline at end of file diff --git a/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql deleted file mode 100644 index b599308..0000000 --- a/common/gsql/concept_curation/concept_creation/Build_Entity_Concepts.gsql +++ /dev/null @@ -1,28 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY Build_Entity_Concepts(/* Parameters here */) { - ListAccum @rel_concepts; - cpts = {Concept.*}; - - relationship_cpts = SELECT c FROM cpts:c WHERE c.concept_type == "RELATIONSHIP_LEAF"; - - rels = SELECT r FROM relationship_cpts:rc -(DESCRIBES_RELATIONSHIP)-> Relationship:r - ACCUM r.@rel_concepts += rc.id - POST-ACCUM - INSERT INTO Concept VALUES("EntitiesAreHeadOf"+rc.id, _, _, "ENTITY_LEAF", now(), FALSE), - INSERT INTO Concept VALUES("EntitiesAreTailOf"+rc.id, _, _, "ENTITY_LEAF", now(), FALSE), - INSERT INTO HAS_RELATIONSHIP VALUES("EntitiesAreHeadOf"+rc.id, rc, "IS_HEAD_OF"), - INSERT INTO HAS_RELATIONSHIP VALUES(rc, "EntitiesAreTailOf"+rc.id, "HAS_TAIL"); - - head_entities = SELECT he FROM rels:r -(reverse_IS_HEAD_OF)-> Entity:he - ACCUM - FOREACH rel IN r.@rel_concepts DO - INSERT INTO DESCRIBES_ENTITY VALUES("EntitiesAreHeadOf"+rel, he) - END; - - tail_entities = SELECT t FROM rels:r -(HAS_TAIL)-> Entity:t - ACCUM - FOREACH rel IN r.@rel_concepts DO - INSERT INTO DESCRIBES_ENTITY VALUES("EntitiesAreTailOf"+rel, t) - END; - - PRINT relationship_cpts; -} \ No newline at end of file diff --git a/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql b/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql deleted file mode 100644 index 36f9a06..0000000 --- a/common/gsql/concept_curation/concept_creation/Build_Relationship_Concepts.gsql +++ /dev/null @@ -1,25 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY Build_Relationship_Concepts(INT occurence_min=5) { - MapAccum> @@relationship_count; - SetAccum @@impt_relationships; - SetAccum @@created_concepts; - rels = {Relationship.*}; - res = SELECT s FROM rels:s ACCUM @@relationship_count += (lower(s.short_name) -> 1); - - FOREACH (key, val) IN @@relationship_count DO - IF val >= occurence_min THEN - @@impt_relationships += key; - END; - END; - - res = SELECT ir FROM rels:ir - WHERE lower(ir.short_name) IN @@impt_relationships - POST-ACCUM - STRING tmp = lower(ir.short_name), - IF tmp != "" THEN - @@created_concepts += tmp, - INSERT INTO Concept VALUES (tmp, _, _, "RELATIONSHIP_LEAF", now(), FALSE), - INSERT INTO DESCRIBES_RELATIONSHIP VALUES (tmp, ir) - END; - - PRINT @@created_concepts; -} \ No newline at end of file diff --git a/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql b/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql deleted file mode 100644 index 11cda35..0000000 --- a/common/gsql/concept_curation/concept_creation/getEntityRelationshipConceptCooccurrence.gsql +++ /dev/null @@ -1,140 +0,0 @@ -CREATE QUERY getEntityRelationshipConceptCooccurrence(VERTEX c1, VERTEX c2) RETURNS(INT){ - SetAccum> @@initialCo, @@coLeafs, @@tempCo; - OrAccum @visited, @excludeCo, @@exclude; - MapAccum>> @@coItrMap; - ArrayAccum>> @@arrayCo[]; - MinAccum @parentType; - MapAccum @@debugLogs; - - INT iter = 0; - //Include & Exclude Logic - - includeConcept = SELECT c FROM Concept:c WHERE c == c1 OR c == c2; - - #includeConcept = {includedConcepts}; - excludeConcept = {}; - - DOC_INC(ANY) = {}; - DOC_EXC(ANY) = {}; - DOC_TEMP(ANY) = {}; - - IF (includeConcept.size() > 0 OR excludeConcept.size() > 0) THEN - - seedIncludeHCC = SELECT s FROM includeConcept:s - ACCUM s.@excludeCo = FALSE, @@initialCo += s; - - // Resize array accum with no of input HCC - @@arrayCo.reallocate(@@initialCo.size()); - - FOREACH cItem IN @@initialCo DO - - @@coLeafs = cItem; - startCo = {cItem}; - - WHILE startCo.size() !=0 DO - - startCo = SELECT t FROM startCo:s -( s) - ELSE IF (s.@parentType == "OR") THEN - @@coItrMap += ("OR" -> s) - ELSE - @@coItrMap += ("OTHER" -> s) - END; - - @@exclude = FALSE; - DOC_TEMP = {}; - - FOREACH (Key, Value) IN @@coItrMap DO - @@tempCo = Value; - FOREACH itm IN @@tempCo DO - - seed = {itm}; - CoEach = SELECT s FROM seed:s - ACCUM @@exclude += s.@excludeCo; - - # can make this simpler by combining entity and relationship in one query - DOC_2HOP_1 = SELECT d FROM CoEach:s -(_>)- (Entity|Relationship):d; # -(_>)- Document:d; - #DOC_2HOP_2 = SELECT d FROM CoEach:s -(_>)- Relationship -(_>)- Document:d; - - IF (DOC_TEMP.size() == 0) THEN - DOC_TEMP = DOC_2HOP_1; - END; - - IF (DOC_2HOP_1.size() > 0) THEN - IF (Key == "AND") THEN - DOC_TEMP = DOC_TEMP INTERSECT DOC_2HOP_1; - ELSE - DOC_TEMP = DOC_TEMP UNION DOC_2HOP_1; - END; - /* - ELSE IF (DOC_2HOP_2.size() > 0) THEN - IF (Key == "AND") THEN - DOC_TEMP = DOC_TEMP INTERSECT DOC2_HOP_2; - ELSE - DOC_TEMP = DOC_TEMP UNION DOC2_HOP_2; - END; - END; - */ - END; - END; - IF (@@exclude != TRUE) THEN - IF(DOC_INC.size() == 0) THEN - DOC_INC = DOC_TEMP; - END; - DOC_INC = DOC_INC INTERSECT DOC_TEMP; - ELSE - IF(DOC_EXC.size() == 0) THEN - DOC_EXC = DOC_TEMP; - END; - DOC_EXC = DOC_EXC INTERSECT DOC_TEMP; - END; - - @@coItrMap.clear(); - - END; // End of Array Loop - - MYDOCS = DOC_INC; - - END; - - IF(includeConcept.size() == 0) THEN - - MyEntities = {Entity.*}; - MyRels = {Relationship.*}; - MYDOCS = MyEntities UNION MyRels; - - END; - - //Remove docs from exclude list - - MYDOCS = MYDOCS MINUS DOC_EXC; - - #PRINT MYDOCS.size() as numberOfMatchingDocs; - #PRINT MYDOCS as RetrievedDocs; - PRINT MYDOCS.size(); - RETURN MYDOCS.size(); -} \ No newline at end of file diff --git a/common/gsql/graphrag/ResolveRelationships.gsql b/common/gsql/graphrag/ResolveRelationships.gsql deleted file mode 100644 index 493746f..0000000 --- a/common/gsql/graphrag/ResolveRelationships.gsql +++ /dev/null @@ -1,26 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY ResolveRelationships(BOOL printResults=FALSE) SYNTAX V2 { - /* - * RE1 <- entity -RELATES-> entity -> RE2 - * to - * RE1 -resolved-> RE - * - * Combines all of a Resolved entity's children's relationships into - * RESOLVED_RELATIONSHIP - */ - REs = {ResolvedEntity.*}; - - - REs = SELECT re1 FROM REs:re1 -(:rel)- Entity:e_tgt -(RESOLVES_TO>:r)- ResolvedEntity:re2 - // Connect the The first RE to the second RE - ACCUM - INSERT INTO RESOLVED_RELATIONSHIP(FROM,TO, relation_type) VALUES(re1, re2, rel.relation_type); - - - IF printResults THEN - // show which entities didn't get resolved - Ents = {Entity.*}; - rEnts = SELECT e FROM Ents:e -(RESOLVES_TO>)- _; - ents = Ents minus rEnts; - PRINT ents; - END; -} diff --git a/common/gsql/graphrag/StreamDocIds.gsql b/common/gsql/graphrag/StreamDocIds.gsql deleted file mode 100644 index 996d4c8..0000000 --- a/common/gsql/graphrag/StreamDocIds.gsql +++ /dev/null @@ -1,16 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY StreamDocIds(INT current_batch, INT ttl_batches) { - /* - * Get the IDs of documents that have not already been processed (one - * batch at a time) - */ - ListAccum @@doc_ids; - Docs = {Document.*}; - - Docs = SELECT d FROM Docs:d - WHERE vertex_to_int(d) % ttl_batches == current_batch - AND d.epoch_processed == 0 - ACCUM @@doc_ids += d.id - POST-ACCUM d.epoch_processed = datetime_to_epoch(now()); // set the processing time - - PRINT @@doc_ids; -} diff --git a/common/gsql/graphrag/entities_have_resolution.gsql b/common/gsql/graphrag/entities_have_resolution.gsql deleted file mode 100644 index 4e50af7..0000000 --- a/common/gsql/graphrag/entities_have_resolution.gsql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE OR REPLACE DISTRIBUTED QUERY entities_have_resolution() SYNTAX V2{ - SumAccum @@resolved; - Ents = {Entity.*}; - Ents = SELECT s FROM Ents:s -(RESOLVES_TO>)- ResolvedEntity - POST-ACCUM(s) @@resolved += 1; - - - PRINT (@@resolved >= Ents.size()) as all_resolved; - PRINT @@resolved, Ents.size(); -} diff --git a/common/gsql/graphrag/get_community_children.gsql b/common/gsql/graphrag/get_community_children.gsql index a55bf0b..0c49493 100644 --- a/common/gsql/graphrag/get_community_children.gsql +++ b/common/gsql/graphrag/get_community_children.gsql @@ -5,7 +5,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY get_community_children(Vertex com Comms = SELECT t FROM Comms:c -()- ResolvedEntity -(_>)- Entity:t; + Ents = SELECT t FROM Comms:c -( community, STRING ext_vid> Move; + TYPEDEF TUPLE community, STRING ext_vid> Move; SumAccum @@m; // the sum of the weights of all the links in the network - MinAccum> @community_id; // the community ID of the node + MinAccum> @community_id; // the community ID of the node MinAccum @community_vid; // the community ID of the node SumAccum @k; // the sum of the weights of the links incident to the node SumAccum @k_in; // the sum of the weights of the links inside the previous community of the node SumAccum @k_self_loop; // the weight of the self-loop link - MapAccum, SumAccum> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community - MapAccum, SumAccum> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C + MapAccum, SumAccum> @community_k_in_map; // the community of the neighbors of the nodes -> the sum of the weights of the links inside the community + MapAccum, SumAccum> @@community_sum_total_map; // community ID C -> the sum of the weights of the links incident to nodes in C SumAccum @community_sum_total; // the sum of the weights of the links incident to nodes in the community of the node - MapAccum, SumAccum> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community + MapAccum, SumAccum> @@community_sum_in_map; // community ID -> the sum of the weights of the links inside the community MapAccum>> @@source_target_k_in_map; // source community ID -> (target community ID -> the sum of the weights of the links from the source community to the target community) SumAccum @delta_Q_remove; // delta Q to remove the node from the previous community MaxAccum @best_move; // best move of the node with the highest delta Q to move the isolated node into the new community @@ -22,7 +22,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN SumAccum @batch_id; MinAccum @vid; - AllNodes = {ResolvedEntity.*}; + AllNodes = {Entity.*}; DOUBLE wt = 1.0; // prevent multiple init runs @@ -39,7 +39,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN s.@community_vid = s.id, // external id s.@vid = getvid(s), // internal id (used in batching) s.@batch_id = s.@vid % n_batches; // get batch number - z = SELECT s FROM AllNodes:s -(_)-> ResolvedEntity:t + z = SELECT s FROM AllNodes:s -(_)-> Entity:t ACCUM s.@k += wt, @@m += 1; @@ -52,7 +52,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN WHILE Candidates.size() > 0 AND hop < max_hop DO hop += 1; IF hop == 1 THEN // first iteration - ChangedNodes = SELECT s FROM Candidates:s -(_:e)-> ResolvedEntity:t + ChangedNodes = SELECT s FROM Candidates:s -(_:e)-> Entity:t WHERE s.@community_id != t.@community_id // can't move within the same community ACCUM DOUBLE dq = 1 - s.@k * t.@k / (2 * @@m), @@ -79,7 +79,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN // process nodes in batch FOREACH batch_id IN RANGE[0, n_batches-1] DO - Nodes = SELECT s FROM Candidates:s -(_:e)-> ResolvedEntity:t + Nodes = SELECT s FROM Candidates:s -(_:e)-> Entity:t WHERE s.@batch_id == batch_id ACCUM IF s.@community_id == t.@community_id THEN @@ -96,7 +96,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN s.@best_move = Move(@@min_double, s, to_string(s.id)); // reset best move // find the best move - Nodes = SELECT s FROM Nodes:s -(_:e)-> ResolvedEntity:t + Nodes = SELECT s FROM Nodes:s -(_:e)-> Entity:t WHERE s.@community_id != t.@community_id ACCUM DOUBLE dq = 2 * s.@community_k_in_map.get(t.@community_id) - s.@k * t.@community_sum_total / @@m, @@ -113,7 +113,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN END; END; // If two nodes swap, only change the community of one of them - SwapNodes = SELECT s FROM ChangedNodes:s -(_:e)-> ResolvedEntity:t + SwapNodes = SELECT s FROM ChangedNodes:s -(_:e)-> Entity:t WHERE s.@best_move.community == t.@community_id AND t.@to_change_community AND t.@best_move.community == s.@community_id @@ -143,14 +143,14 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN @@move_cnt += ChangedNodes.size(); // Get all neighbours of the changed node that do not belong to the node’s new community - Candidates = SELECT t FROM ChangedNodes:s -(_:e)-> ResolvedEntity:t + Candidates = SELECT t FROM ChangedNodes:s -(_:e)-> Entity:t WHERE t.@community_id != s.@community_id; END; // Coarsening UINT new_layer = 0; @@community_sum_total_map.clear(); - Tmp = SELECT s FROM AllNodes:s -(_:e)-> ResolvedEntity:t + Tmp = SELECT s FROM AllNodes:s -(_:e)-> Entity:t ACCUM s.@has_relations += TRUE, t.@has_relations += TRUE, @@ -178,7 +178,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY graphrag_louvain_init(UINT max_hop = 10, UIN @@community_sum_total_map.clear(); // link communities - Tmp = SELECT s FROM AllNodes:s -(_:e)-> ResolvedEntity:t + Tmp = SELECT s FROM AllNodes:s -(_:e)-> Entity:t WHERE s.@community_vid != t.@community_vid ACCUM DOUBLE w = @@source_target_k_in_map.get(s.@community_vid).get(t.@community_vid), diff --git a/common/gsql/graphrag/louvain/modularity.gsql b/common/gsql/graphrag/louvain/modularity.gsql index c9fa1be..cab6255 100644 --- a/common/gsql/graphrag/louvain/modularity.gsql +++ b/common/gsql/graphrag/louvain/modularity.gsql @@ -19,16 +19,16 @@ CREATE OR REPLACE DISTRIBUTED QUERY modularity(UINT iteration=1) SYNTAX V2 { WHERE c.iteration == i ACCUM t.@parent = c.@parent; END; - Entities = SELECT t FROM Comms:c -(_>)- ResolvedEntity:t + Entities = SELECT t FROM Comms:c -(_>)- Entity:t ACCUM t.@community_id = c.@parent; ELSE - Entities = SELECT t FROM Comms:c -(_>)- ResolvedEntity:t + Entities = SELECT t FROM Comms:c -(_>)- Entity:t WHERE c.iteration == iteration ACCUM t.@community_id = c.id; END; - Nodes = SELECT s FROM Entities:s -(_>:e)- ResolvedEntity:t + Nodes = SELECT s FROM Entities:s -(_>:e)- Entity:t ACCUM IF s.@community_id == t.@community_id THEN @@community_in_weight_map += (s.@community_id -> wt) diff --git a/common/gsql/supportai/Scan_For_Updates.gsql b/common/gsql/supportai/Scan_For_Updates.gsql index c9af981..9f065c9 100644 --- a/common/gsql/supportai/Scan_For_Updates.gsql +++ b/common/gsql/supportai/Scan_For_Updates.gsql @@ -25,15 +25,11 @@ CREATE OR REPLACE DISTRIBUTED QUERY Scan_For_Updates(STRING v_type = "Document", res = SELECT s FROM start:s -(HAS_CONTENT)-> Content:c ACCUM @@v_and_text += (s.id -> TextInfo(c.ctype, c.text)) POST-ACCUM s.epoch_processing = datetime_to_epoch(now()); - // ELSE IF v_type == "Concept" THEN - // res = SELECT s FROM start:s - // POST-ACCUM @@v_and_text += (s.id -> s.description), - // s.epoch_processing = datetime_to_epoch(now()); ELSE IF v_type == "Entity" THEN res = SELECT s FROM start:s POST-ACCUM @@v_and_text += (s.id -> TextInfo("", s.definition)), s.epoch_processing = datetime_to_epoch(now()); - ELSE IF v_type == "Relationship" THEN + ELSE IF v_type == "RelationshipType" THEN res = SELECT s FROM start:s POST-ACCUM @@v_and_text += (s.id -> TextInfo("", s.definition)), s.epoch_processing = datetime_to_epoch(now()); diff --git a/common/gsql/supportai/Selected_Set_Display.gsql b/common/gsql/supportai/Selected_Set_Display.gsql index c61b52a..2f94864 100644 --- a/common/gsql/supportai/Selected_Set_Display.gsql +++ b/common/gsql/supportai/Selected_Set_Display.gsql @@ -18,7 +18,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Selected_Set_Display(STRING json_list_vts) { res = SELECT s FROM start:s POST-ACCUM - IF s.type == "Relationship" THEN + IF s.type == "RelationshipType" THEN s.@context += s.definition ELSE IF s.type == "Entity" THEN STRING tmp_dsc = s.id + " " + s.definition, diff --git a/common/gsql/supportai/SupportAI_IndexCreation.gsql b/common/gsql/supportai/SupportAI_IndexCreation.gsql index 439ba18..fa2d768 100644 --- a/common/gsql/supportai/SupportAI_IndexCreation.gsql +++ b/common/gsql/supportai/SupportAI_IndexCreation.gsql @@ -5,7 +5,4 @@ CREATE SCHEMA_CHANGE JOB add_supportai_indexes { ALTER VERTEX DocumentChunk ADD INDEX doc_chunk_epoch_added_index ON (epoch_added); ALTER VERTEX DocumentChunk ADD INDEX doc_chunk_epoch_processing_index ON (epoch_processing); ALTER VERTEX DocumentChunk ADD INDEX doc_chunk_epoch_processed_index ON (epoch_processed); - ALTER VERTEX Concept ADD INDEX concept_epoch_added_index ON (epoch_added); - ALTER VERTEX Concept ADD INDEX concept_epoch_processing_index ON (epoch_processing); - ALTER VERTEX Concept ADD INDEX concept_epoch_processed_index ON (epoch_processed); } \ No newline at end of file diff --git a/common/gsql/supportai/SupportAI_Schema.gsql b/common/gsql/supportai/SupportAI_Schema.gsql index 94fcc9e..c756fd3 100644 --- a/common/gsql/supportai/SupportAI_Schema.gsql +++ b/common/gsql/supportai/SupportAI_Schema.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,36 +17,26 @@ CREATE SCHEMA_CHANGE JOB add_supportai_schema { ADD VERTEX DocumentChunk(PRIMARY_ID id STRING, idx INT, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; ADD VERTEX Document(PRIMARY_ID id STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; - ADD VERTEX Concept(PRIMARY_ID id STRING, description STRING, concept_type STRING, human_curated BOOL, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; ADD VERTEX Entity(PRIMARY_ID id STRING, definition STRING, description SET, entity_type STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; - ADD VERTEX Relationship(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; - ADD VERTEX DocumentCollection(PRIMARY_ID id STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; + ADD VERTEX RelationshipType(PRIMARY_ID id STRING, definition STRING, short_name STRING, epoch_added UINT, epoch_processing UINT, epoch_processed UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; ADD VERTEX Content(PRIMARY_ID id STRING, ctype STRING, text STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; ADD VERTEX EntityType(PRIMARY_ID id STRING, description STRING, epoch_added UINT) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; ADD DIRECTED EDGE HAS_CONTENT(FROM Document, TO Content|FROM DocumentChunk, TO Content) WITH REVERSE_EDGE="reverse_HAS_CONTENT"; - ADD DIRECTED EDGE IS_CHILD_OF(FROM Concept, TO Concept) WITH REVERSE_EDGE="reverse_IS_CHILD_OF"; - ADD DIRECTED EDGE IS_HEAD_OF(FROM Entity, TO Relationship) WITH REVERSE_EDGE="reverse_IS_HEAD_OF"; - ADD DIRECTED EDGE HAS_TAIL(FROM Relationship, TO Entity) WITH REVERSE_EDGE="reverse_HAS_TAIL"; - ADD DIRECTED EDGE DESCRIBES_RELATIONSHIP(FROM Concept, TO Relationship) WITH REVERSE_EDGE="reverse_DESCRIBES_RELATIONSHIP"; - ADD DIRECTED EDGE DESCRIBES_ENTITY(FROM Concept, TO Entity) WITH REVERSE_EDGE="reverse_DESCRIBES_ENTITY"; + ADD DIRECTED EDGE IS_HEAD_OF(FROM Entity, TO RelationshipType) WITH REVERSE_EDGE="reverse_IS_HEAD_OF"; + ADD DIRECTED EDGE HAS_TAIL(FROM RelationshipType, TO Entity) WITH REVERSE_EDGE="reverse_HAS_TAIL"; ADD DIRECTED EDGE CONTAINS_ENTITY(FROM DocumentChunk, TO Entity|FROM Document, TO Entity) WITH REVERSE_EDGE="reverse_CONTAINS_ENTITY"; - ADD DIRECTED EDGE MENTIONS_RELATIONSHIP(FROM DocumentChunk, TO Relationship|FROM Document, TO Relationship) WITH REVERSE_EDGE="reverse_MENTIONS_RELATIONSHIP"; + ADD DIRECTED EDGE MENTIONS_RELATIONSHIP(FROM DocumentChunk, TO RelationshipType|FROM Document, TO RelationshipType) WITH REVERSE_EDGE="reverse_MENTIONS_RELATIONSHIP"; ADD DIRECTED EDGE IS_AFTER(FROM DocumentChunk, TO DocumentChunk) WITH REVERSE_EDGE="reverse_IS_AFTER"; ADD DIRECTED EDGE HAS_CHILD(FROM Document, TO DocumentChunk) WITH REVERSE_EDGE="reverse_HAS_CHILD"; - ADD DIRECTED EDGE HAS_RELATIONSHIP(FROM Concept, TO Concept, relation_type STRING) WITH REVERSE_EDGE="reverse_HAS_RELATIONSHIP"; - ADD DIRECTED EDGE CONTAINS_DOCUMENT(FROM DocumentCollection, TO Document) WITH REVERSE_EDGE="reverse_CONTAINS_DOCUMENT"; ADD DIRECTED EDGE ENTITY_HAS_TYPE(FROM Entity, TO EntityType) WITH REVERSE_EDGE="reverse_ENTITY_HAS_TYPE"; ADD DIRECTED EDGE RELATIONSHIP_TYPE(FROM EntityType, TO EntityType, DISCRIMINATOR(relation_type STRING), frequency INT) WITH REVERSE_EDGE="reverse_RELATIONSHIP_TYPE"; // GraphRAG ADD VERTEX Community (PRIMARY_ID id STRING, iteration UINT, description STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; - ADD VERTEX ResolvedEntity(PRIMARY_ID id STRING, entity_type STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"; ADD DIRECTED EDGE RELATIONSHIP(FROM Entity, TO Entity, relation_type STRING) WITH REVERSE_EDGE="reverse_RELATIONSHIP"; - ADD DIRECTED EDGE RESOLVES_TO(FROM Entity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVES_TO"; // Connect ResolvedEntities with their children entities - ADD DIRECTED EDGE RESOLVED_RELATIONSHIP(FROM ResolvedEntity, TO ResolvedEntity, relation_type STRING) WITH REVERSE_EDGE="reverse_RESOLVED_RELATIONSHIP"; // store edges between entities after they're resolved - ADD DIRECTED EDGE IN_COMMUNITY(FROM ResolvedEntity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY"; + ADD DIRECTED EDGE IN_COMMUNITY(FROM Entity, TO Community) WITH REVERSE_EDGE="reverse_IN_COMMUNITY"; ADD DIRECTED EDGE LINKS_TO (from Community, to Community, weight DOUBLE) WITH REVERSE_EDGE="reverse_LINKS_TO"; ADD DIRECTED EDGE HAS_PARENT (from Community, to Community) WITH REVERSE_EDGE="reverse_HAS_PARENT"; } diff --git a/common/gsql/supportai/SupportAI_Schema_Images.gsql b/common/gsql/supportai/SupportAI_Schema_Images.gsql index a05ffd6..e6f2e5c 100644 --- a/common/gsql/supportai/SupportAI_Schema_Images.gsql +++ b/common/gsql/supportai/SupportAI_Schema_Images.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql b/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql index f778caa..4db81a6 100644 --- a/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql +++ b/common/gsql/supportai/SupportAI_Schema_Native_Vector.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,14 +14,7 @@ * limitations under the License. */ -CREATE SCHEMA_CHANGE JOB add_supportai_vector { +CREATE SCHEMA_CHANGE JOB add_graphrag_vector { ALTER VERTEX DocumentChunk ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); - ALTER VERTEX Document ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); - ALTER VERTEX Concept ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); - ALTER VERTEX Entity ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); - ALTER VERTEX Relationship ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); - - // GraphRAG ALTER VERTEX Community ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); - ALTER VERTEX ResolvedEntity ADD VECTOR ATTRIBUTE embedding(dimension=1536, metric="cosine"); } diff --git a/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql b/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql index f64dae2..d98051a 100644 --- a/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql +++ b/common/gsql/supportai/retrievers/Chunk_Sibling_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql b/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql index dc8d520..a09b045 100644 --- a/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql +++ b/common/gsql/supportai/retrievers/Chunk_Sibling_Vector_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql b/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql index 269ba9f..ed917e0 100644 --- a/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql +++ b/common/gsql/supportai/retrievers/Content_Similarity_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Content_Similarity_Search(STRING json_list_v @@final_retrieval += (s.id -> tgt.text) END POST-ACCUM - IF s.type == "Relationship" OR s.type == "Entity" OR s.type == "Concept" THEN + IF s.type == "RelationshipType" OR s.type == "Entity" THEN @@final_retrieval += (s.id -> s.definition) ELSE IF s.type == "Community" THEN @@final_retrieval += (s.id -> s.description) diff --git a/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql b/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql index d9bb0de..24648d9 100644 --- a/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql +++ b/common/gsql/supportai/retrievers/Content_Similarity_Vector_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Content_Similarity_Vector_Search(STRING v_ty @@final_retrieval += (s.id -> tgt.text) END POST-ACCUM - IF s.type == "Relationship" OR s.type == "Entity" OR s.type == "Concept" THEN + IF s.type == "RelationshipType" OR s.type == "Entity" THEN @@final_retrieval += (s.id -> s.definition) ELSE IF s.type == "Community" THEN @@final_retrieval += (s.id -> s.description) diff --git a/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql b/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql index fd0c954..7e38a52 100644 --- a/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql +++ b/common/gsql/supportai/retrievers/Entity_Relationship_Retrieval.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY Entity_Relationship_Retrieval(SET en SetAccum @@starting_rels; ents = {Entity.*}; - rels = {Relationship.*}; + rels = {RelationshipType.*}; FOREACH ent IN entities DO STRING search_param = lower(ent); diff --git a/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql index 58f92e9..099a453 100644 --- a/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql +++ b/common/gsql/supportai/retrievers/GraphRAG_Community_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,11 +45,11 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Community_Search(STRING json_list_v POST-ACCUM @@verbose_info += ("community_level_"+to_string(i-1) -> s.@children); END; IF with_doc THEN - related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m + related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(IN_COMMUNITY>)- selected_comms:m ACCUM m.@context += c.text, m.@children += d POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children); ELSE - related_chunks = SELECT c FROM Content:c -()- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m + related_chunks = SELECT c FROM Content:c -()- Entity:v -(IN_COMMUNITY>)- selected_comms:m ACCUM m.@context += c.text, m.@children += d POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children); END; diff --git a/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql b/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql index bdfef52..939890b 100644 --- a/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql +++ b/common/gsql/supportai/retrievers/GraphRAG_Community_Search_Display.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,8 +41,8 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Community_Search_Display(STRING jso WHERE s.iteration == i ACCUM s.@children += c, @@edges += e; END; - related_chunks = SELECT c FROM Content:c -(:e2)- Entity:v -(RESOLVES_TO>:e3)- ResolvedEntity:r -(IN_COMMUNITY>:e4)- selected_comms:m - ACCUM m.@context += c.text, m.@children += d, @@edges += e1, @@edges += e2, @@edges += e3, @@edges += e4; + related_chunks = SELECT c FROM Content:c -(:e2)- Entity:v -(IN_COMMUNITY>:e3)- selected_comms:m + ACCUM m.@context += c.text, m.@children += d, @@edges += e1, @@edges += e2, @@edges += e3; END; selected_comms = SELECT c FROM selected_comms:c WHERE c.type == "Community" diff --git a/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql index 7777bca..08af49b 100644 --- a/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql +++ b/common/gsql/supportai/retrievers/GraphRAG_Community_Vector_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,15 +40,15 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Community_Vector_Search(LIST POST-ACCUM @@verbose_info += ("community_level_"+to_string(i-1) -> s.@children); END; start_chunks = vectorSearch({DocumentChunk.embedding}, query_vector, top_k); - extra_selected_comms = SELECT m FROM start_chunks:dc -(CONTAINS_ENTITY>)- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- Community:m; + extra_selected_comms = SELECT m FROM start_chunks:dc -(CONTAINS_ENTITY>)- Entity:v -(IN_COMMUNITY>)- Community:m; selected_comms = selected_comms UNION extra_selected_comms; IF with_doc THEN - related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m + related_chunks = SELECT c FROM Content:c -()- DocumentChunk:dc -(CONTAINS_ENTITY>)- Entity:v -(IN_COMMUNITY>)- selected_comms:m ACCUM m.@context += c.text, m.@children += d, @@edges += EdgeTypes(m, d) POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children); ELSE - related_chunks = SELECT c FROM Content:c -()- Entity:v -(RESOLVES_TO>)- ResolvedEntity:r -(IN_COMMUNITY>)- selected_comms:m + related_chunks = SELECT c FROM Content:c -()- Entity:v -(IN_COMMUNITY>)- selected_comms:m ACCUM m.@context += c.text, m.@children += d, @@edges += EdgeTypes(m, d) POST-ACCUM @@verbose_info += ("related_chunks" -> m.@children); END; diff --git a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql index ea8fa29..9d68c00 100644 --- a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql +++ b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -66,7 +66,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Hybrid_Search(STRING json_list_vts res = SELECT s FROM start:s WHERE s.@num_times_seen >= num_seen_min AND s.type != "Document" ACCUM - IF s.type == "Relationship" THEN + IF s.type == "RelationshipType" THEN s.@context += s.definition ELSE IF s.type == "Entity" THEN STRING tmp_dsc = "Entity: " + replace(s.id, "_", " ") + ", Description: " + s.definition, diff --git a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql index 6eec379..6a6f9b9 100644 --- a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql +++ b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Search_Display.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -70,7 +70,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Hybrid_Search_Display(STRING json_l res = SELECT s FROM start:s WHERE s.@num_times_seen >= num_seen_min AND s.type != "Document" ACCUM - IF s.type == "Relationship" THEN + IF s.type == "RelationshipType" THEN s.@context += s.definition ELSE IF s.type == "Entity" THEN STRING tmp_dsc = s.id + " " + s.definition, diff --git a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql index 971e367..2816156 100644 --- a/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql +++ b/common/gsql/supportai/retrievers/GraphRAG_Hybrid_Vector_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -72,7 +72,7 @@ CREATE OR REPLACE DISTRIBUTED QUERY GraphRAG_Hybrid_Vector_Search(Set v_ res = SELECT s FROM start:s WHERE s.@num_times_seen >= num_seen_min AND s.type != "Document" ACCUM - IF s.type == "Relationship" THEN + IF s.type == "RelationshipType" THEN s.@context += s.definition ELSE IF s.type == "Entity" THEN STRING tmp_dsc = "Entity: " + replace(s.id, "_", " ") + ", Description: " + s.definition, diff --git a/common/gsql/supportai/retrievers/Keyword_Search.gsql b/common/gsql/supportai/retrievers/Keyword_Search.gsql index 9f1763f..fdfdb3f 100644 --- a/common/gsql/supportai/retrievers/Keyword_Search.gsql +++ b/common/gsql/supportai/retrievers/Keyword_Search.gsql @@ -1,5 +1,5 @@ /* - * Copyright (c) 2025 TigerGraph, Inc. + * Copyright (c) 2024-2026 TigerGraph, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/common/llm_services/aws_bedrock_service.py b/common/llm_services/aws_bedrock_service.py index a4eb05f..ba1b114 100644 --- a/common/llm_services/aws_bedrock_service.py +++ b/common/llm_services/aws_bedrock_service.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/llm_services/base_llm.py b/common/llm_services/base_llm.py index 93ec0cf..bf159fb 100644 --- a/common/llm_services/base_llm.py +++ b/common/llm_services/base_llm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -160,6 +160,20 @@ def graphrag_scoring_prompt(self): """Property to get the prompt for the GraphRAG Scoring response.""" return """You are a helpful assistant responsible for generating an answer to the question below using the data provided.\nInclude a quality score for the answer, based on how well it answers the question. The quality score should be between 0 (poor) and 100 (excellent).\n\nQuestion: {question}\nContext: {context}\n\n{format_instructions}\n""" + @property + def contextualize_question_prompt(self): + """Property to get the prompt for contextualizing a follow-up question + into a standalone search query using conversation history.""" + return ( + "Given the following conversation history and a follow-up " + "question, rewrite the follow-up question into a standalone, " + "self-contained question suitable for searching a knowledge " + "graph. Do NOT answer the question; only rewrite it.\n\n" + "Conversation history:\n{history}\n\n" + "Follow-up question: {question}\n\n" + "Standalone question:" + ) + @property def model(self): """Property to get the external LLM model.""" diff --git a/common/llm_services/google_genai_service.py b/common/llm_services/google_genai_service.py index 6ac91a2..c544978 100644 --- a/common/llm_services/google_genai_service.py +++ b/common/llm_services/google_genai_service.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/llm_services/openai_service.py b/common/llm_services/openai_service.py index 8903b58..f23e81b 100644 --- a/common/llm_services/openai_service.py +++ b/common/llm_services/openai_service.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/py_schemas/schemas.py b/common/py_schemas/schemas.py index c5aee80..cd46fa6 100644 --- a/common/py_schemas/schemas.py +++ b/common/py_schemas/schemas.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/py_schemas/tool_io_schemas.py b/common/py_schemas/tool_io_schemas.py index 6a9f6bb..474212f 100644 --- a/common/py_schemas/tool_io_schemas.py +++ b/common/py_schemas/tool_io_schemas.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py index 4e8036a..19da86e 100644 --- a/common/utils/image_data_extractor.py +++ b/common/utils/image_data_extractor.py @@ -6,6 +6,14 @@ logger = logging.getLogger(__name__) +_multimodal_client = None + +def _get_client(): + global _multimodal_client + if _multimodal_client is None: + _multimodal_client = get_multimodal_service() + return _multimodal_client + def describe_image_with_llm(file_path): """ Read image file and convert to base64 to send to LLM. @@ -13,9 +21,9 @@ def describe_image_with_llm(file_path): try: from PIL import Image as PILImage - client = get_multimodal_service() + client = _get_client() if not client: - return "[Image: Failed to create multimodal LLM client]" + return "Image: Failed to create multimodal LLM client" # Read image and convert to base64 pil_image = PILImage.open(file_path) buffer = io.BytesIO() @@ -51,5 +59,4 @@ def describe_image_with_llm(file_path): return response.content if hasattr(response, "content") else str(response) except Exception as e: logger.error(f"Failed to describe image with LLM: {str(e)}") - return "[Image: Error processing image description]" - + return "Image: Error processing image description" diff --git a/common/utils/text_extractors.py b/common/utils/text_extractors.py index e0be6b2..e2bd6df 100644 --- a/common/utils/text_extractors.py +++ b/common/utils/text_extractors.py @@ -40,12 +40,14 @@ def insert_description_by_id(md_text, image_id, description): """ Replace the description for an image whose basename == image_id. """ + safe_desc = description.replace("[", "(").replace("]", ")") + def repl(m): old_path = m.group(2) candidate_id = os.path.splitext(os.path.basename(old_path))[0] if candidate_id == image_id: - return f'![{description}]({old_path})' + return f'![{safe_desc}]({old_path})' return m.group(0) return _md_pattern.sub(repl, md_text) @@ -384,7 +386,7 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None): image_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8") image_counter += 1 - image_doc_id = f"{base_doc_id}_image_{image_counter}" + image_doc_id = f"{base_doc_id}_image_{image_counter}".lower() # Replace file path with tg:// protocol reference in markdown markdown_content = replace_path_with_tg_protocol( @@ -408,6 +410,13 @@ def _extract_pdf_with_images_as_docs(file_path, base_doc_id, graphname=None): except Exception as img_error: logger.warning(f"Failed to process image {img_ref.get('path')}: {img_error}") + failed_path = img_ref.get("path", "") + if failed_path: + markdown_content = re.sub( + r'!\[.*?\]\(' + re.escape(failed_path) + r'\)', + "", + markdown_content, + ) # FINAL CLEANUP — delete folder after processing everything if image_output_folder.exists() and image_output_folder.is_dir(): @@ -463,8 +472,7 @@ def _extract_standalone_image_as_doc(file_path, base_doc_id, graphname=None): pil_image.save(buffer, format="JPEG", quality=95) image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8') - image_id = f"{base_doc_id}_image_1" - # Put description as text, then markdown image reference with short alt text + image_id = f"{base_doc_id}_image_1".lower() content = f"![{description}](tg://{image_id})" return [ { diff --git a/common/utils/token_calculator.py b/common/utils/token_calculator.py index 2157515..762e824 100644 --- a/common/utils/token_calculator.py +++ b/common/utils/token_calculator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/tutorials/answer_question.py b/docs/tutorials/answer_question.py index 5f494c2..d29bf65 100644 --- a/docs/tutorials/answer_question.py +++ b/docs/tutorials/answer_question.py @@ -27,7 +27,7 @@ query, method="hybrid", method_parameters = { - "indices": ["Document", "DocumentChunk", "Entity", "Relationship"], + "indices": ["DocumentChunk", "Community"], "top_k": 2, "num_hops": 2, "num_seen_min": 2, diff --git a/ecc/app/eventual_consistency_checker.py b/ecc/app/eventual_consistency_checker.py index dad7d7d..499bdc7 100644 --- a/ecc/app/eventual_consistency_checker.py +++ b/ecc/app/eventual_consistency_checker.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -131,26 +131,6 @@ def _upsert_entities(self, src_id, src_type, entities): for x in entities ], ) - self.conn.upsertVertices( - "Concept", - [ - ( - x["type"], - { - "description": "", - "concept_type": "EntityType", - "epoch_added": date_added, - }, - ) - for x in entities - ], - ) - self.conn.upsertEdges( - "Concept", - "DESCRIBES_ENTITY", - "Entity", - [(x["type"], x["id"], {}) for x in entities], - ) self.conn.upsertEdges( src_type, "CONTAINS_ENTITY", @@ -162,7 +142,7 @@ def _upsert_entities(self, src_id, src_type, entities): def _upsert_rels(self, src_id, src_type, relationships): date_added = int(time.time()) self.conn.upsertVertices( - "Relationship", + "RelationshipType", [ ( x["source"] + ":" + x["type"] + ":" + x["target"], @@ -178,14 +158,14 @@ def _upsert_rels(self, src_id, src_type, relationships): self.conn.upsertEdges( "Entity", "IS_HEAD_OF", - "Relationship", + "RelationshipType", [ (x["source"], x["source"] + ":" + x["type"] + ":" + x["target"], {}) for x in relationships ], ) self.conn.upsertEdges( - "Relationship", + "RelationshipType", "HAS_TAIL", "Entity", [ @@ -196,7 +176,7 @@ def _upsert_rels(self, src_id, src_type, relationships): self.conn.upsertEdges( src_type, "MENTIONS_RELATIONSHIP", - "Relationship", + "RelationshipType", [ (src_id, x["source"] + ":" + x["type"] + ":" + x["target"], {}) for x in relationships diff --git a/ecc/app/graphrag/community_summarizer.py b/ecc/app/graphrag/community_summarizer.py index 64c7053..0bab35b 100644 --- a/ecc/app/graphrag/community_summarizer.py +++ b/ecc/app/graphrag/community_summarizer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/ecc/app/graphrag/graph_rag.py b/ecc/app/graphrag/graph_rag.py index cfbab3d..d702407 100644 --- a/ecc/app/graphrag/graph_rag.py +++ b/ecc/app/graphrag/graph_rag.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,9 +23,7 @@ from aiochannel import Channel, ChannelClosed from graphrag import workers from graphrag.util import ( - check_all_ents_resolved, check_vertex_has_desc, - check_embedding_rebuilt, http_timeout, init, load_q, @@ -38,7 +36,7 @@ ) from pyTigerGraph import AsyncTigerGraphConnection -from common.config import embedding_service, graphrag_config, entity_extraction_switch, entity_resolution_switch, community_detection_switch, doc_process_switch +from common.config import embedding_service, graphrag_config, entity_extraction_switch, community_detection_switch, doc_process_switch from common.embeddings.base_embedding_store import EmbeddingStore from common.extractors.BaseExtractor import BaseExtractor @@ -306,7 +304,7 @@ async def extract( else: if entity_extraction_switch: grp.create_task( - workers.extract(upsert_chan, embed_chan, extractor, conn, *item) + workers.extract(upsert_chan, extractor, conn, *item) ) except ChannelClosed: break @@ -321,81 +319,11 @@ async def extract( embed_chan.close() -async def stream_entities( - conn: AsyncTigerGraphConnection, - entity_chan: Channel, - ttl_batches: int = 50, -): - """ - Streams entity IDs from the grpah - """ - logger.info("Entity Streaming Start") - for i in range(ttl_batches): - ids = await stream_ids(conn, "Entity", i, ttl_batches) - if ids["error"]: - logger.info(f"""Error streaming batch {i}: got {ids["error"]}""") - # continue to the next batch. - continue - - for i in ids["ids"]: - if len(i) > 0: - await entity_chan.put((i, "Entity")) - - logger.info("Entity Streaming End") - # close the docs chan -- this function is the only sender - logger.info("closing entities chan") - entity_chan.close() - - -async def resolve_entities( - conn: AsyncTigerGraphConnection, - emb_store: EmbeddingStore, - entity_chan: Channel, - upsert_chan: Channel, -): - """ - Merges entities into their ResolvedEntity form - Groups what should be the same entity into a resolved entity (e.g. V_type and VType should be merged) - - Copies edges between entities to their respective ResolvedEntities - """ - logger.info("Entity Resolving Start") - async with asyncio.TaskGroup() as grp: - # for every entity - while True: - try: - entity_id = await entity_chan.get() - grp.create_task( - workers.resolve_entity(conn, upsert_chan, emb_store, entity_id) - ) - logger.debug(f"Added Entity to resolve: {entity_id}") - except ChannelClosed: - break - except Exception: - raise - logger.info("Entity Resolving End") - logger.info("closing upsert_chan") - upsert_chan.close() - logger.info("resolve_entities done") - -async def resolve_relationships( - conn: AsyncTigerGraphConnection -): - """ - Copy RELATIONSHIP edges to RESOLVED_RELATIONSHIP - """ - logger.info("Relationship Resolving Start") - async with tg_sem: - res = await conn.runInstalledQuery( - "ResolveRelationships" - ) - logger.info("Relationship Resolving End") - async def communities(conn: AsyncTigerGraphConnection, comm_process_chan: Channel): """ Run louvain """ - # first pass: Group ResolvedEntities into Communities + # first pass: Group Entities into Communities logger.info("Initializing Communities (first louvain pass)") async with tg_sem: @@ -423,7 +351,7 @@ async def communities(conn: AsyncTigerGraphConnection, comm_process_chan: Channe logger.info(f"****mod pass 1: {mod}") await stream_communities(conn, 1, comm_process_chan) - # nth pass: Iterate on Resolved Entities until modularity stops increasing + # nth pass: Iterate on Communities until modularity stops increasing prev_mod = -10 i = 0 while abs(prev_mod - mod) > 0.0000001 and prev_mod != 0: @@ -527,10 +455,9 @@ async def run(graphname: str, conn: AsyncTigerGraphConnection): - Process the documents into: - chunks - embeddings - - entities/relationships (and their embeddings) + - entities/relationships - upsert everything to the graph - - Resolve Entities - Ex: "Vincent van Gogh" and "van Gogh" should be resolved to "Vincent van Gogh" + - Detect communities and summarize them """ extractor, embedding_store = await init(conn) @@ -586,40 +513,6 @@ async def run(graphname: str, conn: AsyncTigerGraphConnection): logger.info("Type Processing End") type_end = time.perf_counter() - # Entity Resolution - entity_start = time.perf_counter() - if entity_resolution_switch: - logger.info("Entity Processing Start") - while not await check_embedding_rebuilt(conn, "Entity"): - logger.info(f"Waiting for embedding to finish rebuilding") - await asyncio.sleep(1) - entities_chan = Channel() - upsert_chan = Channel() - load_q.reopen() - async with asyncio.TaskGroup() as grp: - grp.create_task(stream_entities(conn, entities_chan, 50)) - grp.create_task( - resolve_entities( - conn, - embedding_store, - entities_chan, - upsert_chan, - ) - ) - grp.create_task(upsert(upsert_chan)) - grp.create_task(load(conn)) - logger.info("Join entities_chan") - await entities_chan.join() - logger.info("Join upsert_chan") - await upsert_chan.join() - #Resolve relationsihps - await resolve_relationships(conn) - while not await check_all_ents_resolved(conn): - logger.info(f"Waiting for resolved entites to finish loading") - await asyncio.sleep(1) - entity_end = time.perf_counter() - logger.info("Entity Processing End") - # Community Detection community_start = time.perf_counter() if community_detection_switch: @@ -648,10 +541,8 @@ async def run(graphname: str, conn: AsyncTigerGraphConnection): community_end = time.perf_counter() logger.info("Community Processing End") - # Community Summarization end = time.perf_counter() logger.info(f"DONE. graphrag system initializer dT: {init_end-init_start}") - logger.info(f"DONE. graphrag entity resolution dT: {entity_end-entity_start}") logger.info(f"DONE. graphrag type creation dT: {type_end-type_start}") logger.info( f"DONE. graphrag community initializer dT: {community_end-community_start}" diff --git a/ecc/app/graphrag/util.py b/ecc/app/graphrag/util.py index 77e2860..f581057 100644 --- a/ecc/app/graphrag/util.py +++ b/ecc/app/graphrag/util.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ import asyncio import base64 +import json import logging import re import traceback @@ -39,7 +40,7 @@ http_timeout = httpx.Timeout(15.0) -tg_sem = asyncio.Semaphore(2) +tg_sem = asyncio.Semaphore(graphrag_config.get("tg_concurrency", 10)) load_q = reusable_channel.ReuseableChannel() # will pause workers until the event is false @@ -50,44 +51,72 @@ async def install_queries( requried_queries: list[str], conn: AsyncTigerGraphConnection, ): - # queries that are currently installed installed_queries = [q.split("/")[-1] for q in await conn.getEndpoints(dynamic=True) if f"/{conn.graphname}/" in q] - # doesn't need to be parallel since tg only does it one at a time + required_names = set() for q in requried_queries: - # only install n queries at a time (n=n_workers) q_name = q.split("/")[-1] - # if the query is not installed, install it + required_names.add(q_name) if q_name not in installed_queries: res = await workers.install_query(conn, q, False) - # stop system if a required query doesn't install if res["error"]: raise Exception(res["message"]) logger.info(f"Successfully created query '{q_name}'.") - query = f"""\ -USE GRAPH {conn.graphname} -INSTALL QUERY ALL -""" + + if required_names.issubset(set(installed_queries)): + logger.info("All required queries already installed, skipping INSTALL QUERY ALL.") + return + + logger.info("Submitting INSTALL QUERY ALL ...") + query = f"USE GRAPH {conn.graphname}\nINSTALL QUERY ALL\n" async with tg_sem: res = await conn.gsql(query) - if "error" in res: + logger.info(f"INSTALL QUERY ALL returned: {str(res)[:200]}") + if isinstance(res, str) and "error" in res.lower(): raise Exception(res) - logger.info("Finished processing all required queries.") + max_wait = 600 # seconds + poll_interval = 10 + elapsed = 0 + while elapsed < max_wait: + ready = [ + q.split("/")[-1] + for q in await conn.getEndpoints(dynamic=True) + if f"/{conn.graphname}/" in q + ] + missing = required_names - set(ready) + if not missing: + break + logger.info( + f"Waiting for query installation to finish " + f"({len(missing)} remaining: {', '.join(sorted(missing))})" + ) + await asyncio.sleep(poll_interval) + elapsed += poll_interval + else: + raise Exception( + f"Query installation timed out after {max_wait}s. " + f"Still missing: {', '.join(sorted(missing))}" + ) + + logger.info("All required queries installed and verified.") async def init( conn: AsyncTigerGraphConnection, ) -> tuple[BaseExtractor, dict[str, EmbeddingStore]]: + """Initialize extractors and embedding store. + + Returns: + (extractor, embedding_store) + """ # install requried queries requried_queries = [ "common/gsql/graphrag/StreamIds", "common/gsql/graphrag/StreamDocContent", "common/gsql/graphrag/StreamChunkContent", "common/gsql/graphrag/SetEpochProcessing", - "common/gsql/graphrag/ResolveRelationships", "common/gsql/graphrag/get_community_children", - "common/gsql/graphrag/entities_have_resolution", "common/gsql/graphrag/communities_have_desc", "common/gsql/graphrag/get_vertices_or_remove", "common/gsql/graphrag/louvain/graphrag_louvain_init", @@ -280,20 +309,6 @@ async def get_commuinty_children(conn, i: int, c: str): return descrs -async def check_all_ents_resolved(conn): - try: - async with tg_sem: - resp = await conn.runInstalledQuery( - "entities_have_resolution" - ) - except Exception as e: - logger.error(f"Check Vert Desc err:\n{e}") - - res = resp[0]["all_resolved"] - logger.info(resp) - - return res - async def add_rels_between_types(conn): try: async with tg_sem: diff --git a/ecc/app/graphrag/workers.py b/ecc/app/graphrag/workers.py index a5dbbca..8bcdd78 100644 --- a/ecc/app/graphrag/workers.py +++ b/ecc/app/graphrag/workers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ import logging import time import json -import traceback from urllib.parse import quote_plus from typing import Iterable, List, Optional, Tuple @@ -226,7 +225,6 @@ async def get_vert_desc(conn, v_id, node: Node): async def extract( upsert_chan: Channel, - embed_chan: Channel, extractor: BaseExtractor, conn: AsyncTigerGraphConnection, chunk: str, @@ -256,14 +254,9 @@ async def extract( continue desc = await get_vert_desc(conn, v_id, node) - # embed the entity - # embed with the v_id if the description is blank if len(desc[0]) == 0: desc[0] = str(node.id) - # (v_id, content, index_name) - await embed_chan.put((v_id, desc[0], "Entity")) - await upsert_chan.put( ( util.upsert_vertex, # func to call @@ -357,10 +350,7 @@ async def extract( continue desc = await get_vert_desc(conn, v_id, edge.source) if len(desc[0]) == 0: - await embed_chan.put((v_id, v_id, "Entity")) - else: - # (v_id, content, index_name) - await embed_chan.put((v_id, desc[0], "Entity")) + desc[0] = edge.source.id await upsert_chan.put( ( util.upsert_vertex, # func to call @@ -380,10 +370,7 @@ async def extract( continue desc = await get_vert_desc(conn, v_id, edge.target) if len(desc[0]) == 0: - await embed_chan.put((v_id, v_id, "Entity")) - else: - # (v_id, content, index_name) - await embed_chan.put((v_id, desc[0], "Entity")) + desc[0] = edge.target.id await upsert_chan.put( ( util.upsert_vertex, # func to call @@ -414,96 +401,11 @@ async def extract( ), ) ) - # embed "Relationship", + # embed "RelationshipType", # (v_id, content, index_name) # right now, we're not embedding relationships in graphrag -resolve_sem = asyncio.Semaphore(20) - - -async def resolve_entity( - conn: AsyncTigerGraphConnection, - upsert_chan: Channel, - embed_store: EmbeddingStore, - entity_id: str | Tuple[str, str], -): - """ - get all vectors of E (one name can have multiple discriptions) - get ents close to E - for e in ents: - if e is 95% similar to E and edit_dist(E,e) <=3: - merge - mark e as processed - - mark as processed - """ - - # if loader is running, wait until it's done - if not util.loading_event.is_set(): - logger.info("Entity Resolution worker waiting for loading event to finish") - await util.loading_event.wait() - - async with resolve_sem: - try: - logger.info(f"Resolving Entity {entity_id}") - results = await embed_store.aget_k_closest(entity_id) - logger.info(f"Resolving Entity {entity_id} to {results}") - - except Exception: - err = traceback.format_exc() - logger.error(err) - return - - if len(results) == 0: - logger.error( - f"aget_k_closest should, minimally, return the entity itself.\n{results}" - ) - raise Exception() - - # merge all entities into the ResolvedEntity vertex - # use the longest v_id as the resolved entity's v_id - if isinstance(entity_id, tuple): - resolved_entity_id = entity_id[0] - else: - resolved_entity_id = entity_id - for v in results: - if len(v) > len(resolved_entity_id): - resolved_entity_id = v - - logger.debug(f"Merging {results} to ResolvedEntity {resolved_entity_id}") - # upsert the resolved entity - await upsert_chan.put( - ( - util.upsert_vertex, # func to call - ( - conn, - "ResolvedEntity", # v_type - resolved_entity_id, # v_id - { # attrs - }, - ), - ) - ) - - # create RESOLVES_TO edges from each entity to the ResolvedEntity - for v in results: - await upsert_chan.put( - ( - util.upsert_edge, - ( - conn, - "Entity", # src_type - v, # src_id - "RESOLVES_TO", # edge_type - "ResolvedEntity", # tgt_type - resolved_entity_id, # tgt_id - None, # attributes - ), - ) - ) - - comm_sem = asyncio.Semaphore(20) diff --git a/ecc/app/main.py b/ecc/app/main.py index 5f64981..5468391 100644 --- a/ecc/app/main.py +++ b/ecc/app/main.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -99,7 +99,7 @@ def initialize_eventual_consistency_checker( ) index_names = graphrag_config.get( "indexes", - ["Document", "DocumentChunk", "Entity", "Relationship", "Concept"], + ["DocumentChunk", "Community"], ) if graphrag_config.get("extractor") == "llm": diff --git a/ecc/app/supportai/supportai_init.py b/ecc/app/supportai/supportai_init.py index 25dc78b..8d59a5b 100644 --- a/ecc/app/supportai/supportai_init.py +++ b/ecc/app/supportai/supportai_init.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -168,7 +168,7 @@ async def extract( async for item in extract_chan: if entity_extraction_switch: sp.create_task( - workers.extract(upsert_chan, embed_chan, extractor, conn, *item) + workers.extract(upsert_chan, extractor, conn, *item) ) logger.info(f"extract done") diff --git a/ecc/app/supportai/util.py b/ecc/app/supportai/util.py index e9c1f29..d3906ca 100644 --- a/ecc/app/supportai/util.py +++ b/ecc/app/supportai/util.py @@ -32,40 +32,64 @@ async def install_queries( requried_queries: list[str], conn: TigerGraphConnection, ): - # queries that are currently installed installed_queries = [q.split("/")[-1] for q in await conn.getEndpoints(dynamic=True) if f"/{conn.graphname}/" in q] - # doesn't need to be parallel since tg only does it one at a time + required_names = set() for q in requried_queries: - # only install n queries at a time (n=n_workers) q_name = q.split("/")[-1] - # if the query is not installed, install it + required_names.add(q_name) if q_name not in installed_queries: - logger.info(f"Query '{q_name}' not found in installed queries. Attempting to install...") + logger.info(f"Query '{q_name}' not found in installed queries. Attempting to create...") try: res = await workers.install_query(conn, q, False) - # stop system if a required query doesn't install if res["error"]: logger.error(f"Failed to create query '{q_name}'. Error: {res['message']}") - raise Exception(f"Installation of query '{q_name}' failed with message: {res['message']}") + raise Exception(f"Creation of query '{q_name}' failed with message: {res['message']}") else: logger.info(f"Successfully created query '{q_name}'.") - except Exception as e: - logger.critical(f"Critical error during installation of query '{q_name}': {e}") + logger.critical(f"Critical error during creation of query '{q_name}': {e}") raise e else: logger.info(f"Query '{q_name}' is already installed.") - query = f"""\ -USE GRAPH {conn.graphname} -INSTALL QUERY ALL -""" + + if required_names.issubset(set(installed_queries)): + logger.info("All required queries already installed, skipping INSTALL QUERY ALL.") + return + + logger.info("Submitting INSTALL QUERY ALL ...") + query = f"USE GRAPH {conn.graphname}\nINSTALL QUERY ALL\n" async with tg_sem: res = await conn.gsql(query) - if "error" in res: + logger.info(f"INSTALL QUERY ALL returned: {str(res)[:200]}") + if isinstance(res, str) and "error" in res.lower(): raise Exception(res) - - logger.info("Finished processing all required queries.") + + max_wait = 300 # seconds + poll_interval = 5 + elapsed = 0 + while elapsed < max_wait: + ready = [ + q.split("/")[-1] + for q in await conn.getEndpoints(dynamic=True) + if f"/{conn.graphname}/" in q + ] + missing = required_names - set(ready) + if not missing: + break + logger.info( + f"Waiting for query installation to finish " + f"({len(missing)} remaining: {', '.join(sorted(missing))})" + ) + await asyncio.sleep(poll_interval) + elapsed += poll_interval + else: + raise Exception( + f"Query installation timed out after {max_wait}s. " + f"Still missing: {', '.join(sorted(missing))}" + ) + + logger.info("All required queries installed and verified.") diff --git a/ecc/app/supportai/workers.py b/ecc/app/supportai/workers.py index 300bee4..30fc9ab 100644 --- a/ecc/app/supportai/workers.py +++ b/ecc/app/supportai/workers.py @@ -1,4 +1,4 @@ -# Copyright (c) 2025 TigerGraph, Inc. +# Copyright (c) 2024-2026 TigerGraph, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -177,7 +177,6 @@ async def get_vert_desc(conn, v_id, node: Node): async def extract( upsert_chan: Channel, - embed_chan: Channel, extractor: BaseExtractor, conn: TigerGraphConnection, chunk: str, @@ -194,12 +193,8 @@ async def extract( continue desc = await get_vert_desc(conn, v_id, node) - # embed the entity - # embed with the v_id if the description is blank - if len(desc[0]): - await embed_chan.put((v_id, v_id, "Entity")) - else: - await embed_chan.put((v_id, desc[0], "Entity")) + if len(desc[0]) == 0: + desc[0] = str(node.id) await upsert_chan.put( ( @@ -238,15 +233,13 @@ async def extract( v_id = edge.type if len(v_id) == 0: continue - # embed "Relationship" - await embed_chan.put((v_id, v_id, "Relationship")) await upsert_chan.put( ( util.upsert_vertex, # func to call ( conn, - "Relationship", # v_type + "RelationshipType", # v_type v_id, { # attrs "epoch_added": int(time.time()), @@ -300,7 +293,7 @@ async def extract( "Entity", # src_type util.process_id(edge.source.id), # src_id "IS_HEAD_OF", # edgeType - "Relationship", # tgt_type + "RelationshipType", # tgt_type edge.type, # tgt_id ), ) @@ -310,7 +303,7 @@ async def extract( util.upsert_edge, ( conn, - "Relationship", # src_type + "RelationshipType", # src_type edge.type, # src_id "HAS_TAIL", # edgeType "Entity", # tgt_type @@ -329,7 +322,7 @@ async def extract( "DocumentChunk", # src_type chunk_id, # src_id "MENTIONS_RELATIONSHIP", # edge_type - "Relationship", # tgt_type + "RelationshipType", # tgt_type edge.type, # tgt_id ), ) diff --git a/graphrag-ui/src/components/Bot.tsx b/graphrag-ui/src/components/Bot.tsx index 3d31aca..1f4e4e6 100644 --- a/graphrag-ui/src/components/Bot.tsx +++ b/graphrag-ui/src/components/Bot.tsx @@ -38,11 +38,18 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo // Initial load const parseStore = loadStore(); - // Set default selectedGraph to first graph if no value in localStorage - if (!localStorage.getItem("selectedGraph") && parseStore?.graphs?.length > 0) { - const firstGraph = parseStore.graphs[0]; - setSelectedGraph(firstGraph); - localStorage.setItem("selectedGraph", firstGraph); + // Validate selectedGraph against the current graph list + const storedGraph = localStorage.getItem("selectedGraph"); + const availableGraphs = parseStore?.graphs || []; + if (!storedGraph || !availableGraphs.includes(storedGraph)) { + if (availableGraphs.length > 0) { + const firstGraph = availableGraphs[0]; + setSelectedGraph(firstGraph); + localStorage.setItem("selectedGraph", firstGraph); + } else { + setSelectedGraph(''); + localStorage.removeItem("selectedGraph"); + } } // Set default ragPattern if no value in localStorage @@ -128,7 +135,7 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo className="!h-[48px] !outline-b !outline-gray-300 dark:!outline-[#3D3D3D] h-[70px] flex justify-end items-center bg-white dark:bg-background z-50 rounded-tr-lg" > - {selectedGraph} + {selectedGraph || No Knowledge Graph} @@ -136,11 +143,19 @@ const Bot = ({ layout, getConversationId }: { layout?: string | undefined, getCo Select a KnowledgeGraph - {store?.graphs.map((f, i) => ( - handleSelect(f)}> - {f} + {store?.graphs?.length > 0 ? ( + store.graphs.map((f, i) => ( + handleSelect(f)}> + {f} + + )) + ) : ( + + + Please create a Knowledge Graph in Setup first + - ))} + )} diff --git a/graphrag-ui/src/pages/Setup.tsx b/graphrag-ui/src/pages/Setup.tsx index 0523cc6..3ec977d 100644 --- a/graphrag-ui/src/pages/Setup.tsx +++ b/graphrag-ui/src/pages/Setup.tsx @@ -1,4 +1,4 @@ -import React, { useState, useEffect } from "react"; +import React, { useState, useEffect, useRef } from "react"; import { useNavigate } from "react-router-dom"; import { Button } from "@/components/ui/button"; import { Input } from "@/components/ui/input"; @@ -54,6 +54,7 @@ const Setup = () => { const [uploadedFiles, setUploadedFiles] = useState([]); const [isUploading, setIsUploading] = useState(false); const [uploadMessage, setUploadMessage] = useState(""); + const [isProcessingFiles, setIsProcessingFiles] = useState(false); const [isIngesting, setIsIngesting] = useState(false); const [ingestMessage, setIngestMessage] = useState(""); // Ingestion job data state @@ -66,6 +67,7 @@ const Setup = () => { const [refreshMessage, setRefreshMessage] = useState(""); const [refreshGraphName, setRefreshGraphName] = useState(""); const [isRebuildRunning, setIsRebuildRunning] = useState(false); + const isRebuildRunningRef = useRef(false); const [isCheckingStatus, setIsCheckingStatus] = useState(false); // S3 state @@ -174,10 +176,11 @@ const [activeTab, setActiveTab] = useState("upload"); // Step 2: Call create_ingest to process uploaded files in background console.log("Calling handleCreateIngestAfterUpload from main upload..."); + setIsProcessingFiles(true); handleCreateIngestAfterUpload("uploaded", uploadedCount).catch((err) => { console.error("Error in background processing:", err); setUploadMessage(`❌ Processing error: ${err.message}`); - }); + }).finally(() => setIsProcessingFiles(false)); } else { setUploadMessage(`⚠️ ${data.message}`); setIsUploading(false); @@ -249,8 +252,13 @@ const [activeTab, setActiveTab] = useState("upload"); // Step 2: Call create_ingest to process uploaded files console.log("Calling handleCreateIngestAfterUpload..."); - await handleCreateIngestAfterUpload("uploaded", uploadedCount); - console.log("handleCreateIngestAfterUpload completed"); + setIsProcessingFiles(true); + try { + await handleCreateIngestAfterUpload("uploaded", uploadedCount); + console.log("handleCreateIngestAfterUpload completed"); + } finally { + setIsProcessingFiles(false); + } } catch (error: any) { console.error("Upload error:", error); setUploadMessage(`❌ Batch upload error: ${error.message}`); @@ -404,10 +412,11 @@ const [activeTab, setActiveTab] = useState("upload"); await fetchDownloadedFiles(); setIsDownloading(false); // Step 2: Call create_ingest to process downloaded files in background + setIsProcessingFiles(true); handleCreateIngestAfterUpload("downloaded", downloadCount).catch((err) => { console.error("Error in background processing:", err); setDownloadMessage(`❌ Processing error: ${err.message}`); - }); + }).finally(() => setIsProcessingFiles(false)); } else if (data.status === "warning") { setDownloadMessage(`⚠️ ${data.message}`); setIsDownloading(false); @@ -860,14 +869,12 @@ const [activeTab, setActiveTab] = useState("upload"); if (statusResponse.ok) { const statusData = await statusResponse.json(); - const wasRunning = isRebuildRunning; + const wasRunning = isRebuildRunningRef.current; const isCurrentlyRunning = statusData.is_running || false; setIsRebuildRunning(isCurrentlyRunning); + isRebuildRunningRef.current = isCurrentlyRunning; - if (statusData.status === "error" || statusData.status === "unknown") { - return; - } if (isCurrentlyRunning) { const startTime = statusData.started_at ? new Date(statusData.started_at * 1000).toLocaleString() : "unknown time"; setRefreshMessage(`⚠️ A rebuild is already in progress for "${graphName}" (started at ${startTime}). Please wait for it to complete.`); @@ -875,14 +882,21 @@ const [activeTab, setActiveTab] = useState("upload"); setRefreshMessage(`✅ Rebuild completed successfully for "${graphName}".`); } else if (statusData.status === "failed") { setRefreshMessage(`❌ Previous rebuild failed: ${statusData.error || "Unknown error"}`); - } else if (statusData.status === "idle") { + } else if (statusData.status === "error") { + setRefreshMessage(`❌ Failed to check rebuild status: ${statusData.error || "Unknown error"}`); + } else if (statusData.status === "unknown") { + setRefreshMessage(`⚠️ ECC service returned unknown status. It may be unavailable.`); + } else { setRefreshMessage(""); } + } else { + setRefreshMessage(`❌ Failed to check rebuild status (HTTP ${statusResponse.status}).`); } } catch (error: any) { console.error("Error checking rebuild status:", error); - // On error, don't change state - keep existing message - // Don't clear existing messages on error + if (showLoadingMessage) { + setRefreshMessage(`❌ Unable to reach ECC service: ${error.message || "Connection failed"}`); + } } finally { setIsCheckingStatus(false); } @@ -901,17 +915,18 @@ const [activeTab, setActiveTab] = useState("upload"); return; } + setIsRefreshing(true); + // Ask user to confirm before proceeding with refresh const shouldRefresh = await confirm( `Are you sure you want to refresh the knowledge graph "${refreshGraphName}"? This will rebuild the graph content.` ); if (!shouldRefresh) { setRefreshMessage("Operation cancelled by user."); + setIsRefreshing(false); return; } - // Check status one final time RIGHT before submitting (to catch any race conditions) - setIsRefreshing(true); setRefreshMessage("Verifying rebuild status..."); try { @@ -930,6 +945,7 @@ const [activeTab, setActiveTab] = useState("upload"); if (statusData.is_running) { setRefreshMessage(`⚠️ A rebuild is already in progress for "${refreshGraphName}". Please wait for it to complete.`); setIsRebuildRunning(true); + isRebuildRunningRef.current = true; setIsRefreshing(false); return; } @@ -960,6 +976,7 @@ const [activeTab, setActiveTab] = useState("upload"); setRefreshMessage(`✅ Refresh submitted successfully! The knowledge graph "${refreshGraphName}" is being rebuilt.`); setIsRebuildRunning(true); + isRebuildRunningRef.current = true; } catch (error: any) { console.error("Error refreshing graph:", error); setRefreshMessage(`❌ Error: ${error.message}`); @@ -1441,19 +1458,6 @@ const [activeTab, setActiveTab] = useState("upload"); )} - {/* Ingestion Status Message */} - {ingestMessage && ( -
- {ingestMessage} -
- )} - {/* Uploaded Files List */} {uploadedFiles.length > 0 && (
@@ -1494,7 +1498,7 @@ const [activeTab, setActiveTab] = useState("upload");

+ {ingestMessage && ( +
+ {ingestMessage} +
+ )}
)} @@ -1798,7 +1818,7 @@ const [activeTab, setActiveTab] = useState("upload");