Last updated

GraphQL Analysis

Query your knowledge graph with GraphQL for structured insights and analytics.

Overview

While natural language search is perfect for finding relevant context, GraphQL provides structured querying capabilities for:

  • Complex relationship analysis
  • Data aggregations and statistics
  • Multi-hop graph traversals
  • Analytics dashboards
  • Extracting structured insights for AI reasoning

Why GraphQL

Aggregate Data: Sum revenues, count interactions, calculate averages across entities

Analyze Relationships: Query multi-hop connections between entities

Build Dashboards: Extract structured data for visualization

Extract Insights: Get structured information for LLM consumption

Complex Queries: Joins, filters, and nested queries across your knowledge graph

Authentication

GraphQL uses the same authentication as the REST API:

# Using API Key
response = client.graphql.query(
    query="{ ... }",
    variables={}
)

Authentication headers:

  • X-API-Key: Your API key
  • Authorization: Bearer token
  • X-Session-Token: Session token

Basic Query Structure

GraphQL queries follow this pattern:

query QueryName($variable: Type!) {
  entityType(filter: condition) {
    property1
    property2
    relatedEntity {
      property1
    }
  }
}

Python Examples

Basic Customer Query

from papr_memory import Papr
import os

client = Papr(x_api_key=os.environ.get("PAPR_MEMORY_API_KEY"))

response = client.graphql.query(
    query="""
    query GetCustomer($customerId: ID!) {
      customer(id: $customerId) {
        name
        email
        tier
        created_at
      }
    }
    """,
    variables={"customerId": "cust_123"}
)

customer = response.data['customer']
print(f"Customer: {customer['name']} ({customer['tier']})")

Query with Relationships

response = client.graphql.query(
    query="""
    query GetCustomerPurchases($customerId: ID!) {
      customer(id: $customerId) {
        name
        email
        purchases {
          date
          amount
          product {
            name
            category
            price
          }
        }
      }
    }
    """,
    variables={"customerId": "cust_123"}
)

customer = response.data['customer']
print(f"{customer['name']} has made {len(customer['purchases'])} purchases")
for purchase in customer['purchases']:
    print(f"  - {purchase['product']['name']}: ${purchase['amount']}")

Aggregations

response = client.graphql.query(
    query="""
    query CustomerSpending($customerId: ID!) {
      customer(id: $customerId) {
        name
        total_spent: purchases_aggregate {
          sum {
            amount
          }
        }
        purchase_count: purchases_aggregate {
          count
        }
        average_order: purchases_aggregate {
          avg {
            amount
          }
        }
      }
    }
    """,
    variables={"customerId": "cust_123"}
)

data = response.data['customer']
print(f"{data['name']} statistics:")
print(f"  Total spent: ${data['total_spent']['sum']['amount']}")
print(f"  Number of orders: {data['purchase_count']['count']}")
print(f"  Average order: ${data['average_order']['avg']['amount']}")

TypeScript Examples

Basic Query

import Papr from '@papr/memory';

const client = new Papr({
  xAPIKey: process.env.PAPR_MEMORY_API_KEY
});

const response = await client.graphql.query({
  query: `
    query GetCustomer($customerId: ID!) {
      customer(id: $customerId) {
        name
        email
        tier
      }
    }
  `,
  variables: { customerId: "cust_123" }
});

const customer = response.data.customer;
console.log(`Customer: ${customer.name} (${customer.tier})`);

Complex Analytics

const response = await client.graphql.query({
  query: `
    query ProductAnalytics {
      products {
        name
        category
        price
        revenue: purchases_aggregate {
          sum {
            amount
          }
        }
        customers: purchases_aggregate {
          count(distinct: true, columns: customer_id)
        }
        average_rating: reviews_aggregate {
          avg {
            rating
          }
        }
      }
    }
  `
});

const products = response.data.products;
products.forEach(product => {
  console.log(`${product.name}:`);
  console.log(`  Revenue: $${product.revenue.sum.amount}`);
  console.log(`  Customers: ${product.customers.count}`);
  console.log(`  Avg Rating: ${product.average_rating.avg.rating}/5`);
});

Advanced Queries

Filtering and Sorting

response = client.graphql.query(
    query="""
    query TopCustomers($minSpending: Float!) {
      customers(
        where: {
          total_spending: { _gte: $minSpending }
          tier: { _in: ["gold", "platinum"] }
        }
        order_by: { total_spending: desc }
        limit: 10
      ) {
        name
        email
        tier
        total_spending
        last_purchase_date
      }
    }
    """,
    variables={"minSpending": 1000.0}
)

top_customers = response.data['customers']
for i, customer in enumerate(top_customers, 1):
    print(f"{i}. {customer['name']}: ${customer['total_spending']}")

Multi-Hop Relationships

# Find customers who purchased products in same category as a specific product
response = client.graphql.query(
    query="""
    query RelatedCustomers($productId: ID!) {
      product(id: $productId) {
        name
        category
        related_customers: category {
          other_products {
            purchasers {
              customer {
                name
                email
              }
            }
          }
        }
      }
    }
    """,
    variables={"productId": "prod_123"}
)

Date Range Filtering

response = client.graphql.query(
    query="""
    query RecentActivity($startDate: DateTime!, $endDate: DateTime!) {
      memories(
        where: {
          created_at: { _gte: $startDate, _lte: $endDate }
          topics: { _contains: "customer_service" }
        }
        order_by: { created_at: desc }
      ) {
        id
        content
        created_at
        metadata
      }
    }
    """,
    variables={
        "startDate": "2024-03-01T00:00:00Z",
        "endDate": "2024-03-31T23:59:59Z"
    }
)

Common Use Cases

1. Agent Reasoning: Project Blockers

# Agent queries to understand current project status
response = client.graphql.query(
    query="""
    query ProjectBlockers($projectId: ID!) {
      project(id: $projectId) {
        name
        status
        tasks {
          title
          status
          priority
          blockers {
            description
            severity
            created_at
          }
          assignee {
            name
            workload: assigned_tasks_aggregate {
              count
            }
          }
        }
      }
    }
    """,
    variables={"projectId": "proj_123"}
)

# Agent uses this structured data to reason about blockers
project = response.data['project']
blocked_tasks = [t for t in project['tasks'] if t['blockers']]
print(f"Project {project['name']} has {len(blocked_tasks)} blocked tasks")

2. Analytics: Revenue by Customer Tier

response = client.graphql.query(
    query="""
    query RevenueByTier {
      tiers: [bronze, silver, gold, platinum] {
        tier_name
        customers_aggregate(where: { tier: { _eq: tier_name } }) {
          count
        }
        revenue_aggregate {
          sum {
            purchases {
              amount
            }
          }
        }
        average_customer_value: revenue_aggregate {
          avg {
            purchases {
              amount
            }
          }
        }
      }
    }
    """
)

# Perfect for building dashboards
for tier in response.data['tiers']:
    print(f"{tier['tier_name']}: {tier['customers_aggregate']['count']} customers")
    print(f"  Total revenue: ${tier['revenue_aggregate']['sum']}")
    print(f"  Avg per customer: ${tier['average_customer_value']['avg']}")

3. Relationship Analysis: Customer Connections

response = client.graphql.query(
    query="""
    query CustomerNetwork($customerId: ID!) {
      customer(id: $customerId) {
        name
        purchased_products {
          product {
            name
            also_purchased_by {
              customer {
                name
                shared_interests: purchased_products(
                  where: { 
                    product: { 
                      category: { _in: $original_categories } 
                    }
                  }
                ) {
                  product {
                    name
                  }
                }
              }
            }
          }
        }
      }
    }
    """,
    variables={"customerId": "cust_123"}
)

# Find customers with similar interests

4. Data Validation: Orphaned Nodes

response = client.graphql.query(
    query="""
    query OrphanedEntities {
      products(where: { purchases: { _eq: null } }) {
        id
        name
        created_at
      }
      customers(where: { purchases: { _eq: null } }) {
        id
        name
        created_at
      }
    }
    """
)

orphaned_products = response.data['products']
orphaned_customers = response.data['customers']
print(f"Found {len(orphaned_products)} products with no purchases")
print(f"Found {len(orphaned_customers)} customers with no purchases")

5. Insights Extraction for LLMs

# Extract structured data for LLM consumption
response = client.graphql.query(
    query="""
    query CustomerInsightsForAI($customerId: ID!) {
      customer(id: $customerId) {
        profile: {
          name
          email
          tier
          join_date
        }
        behavior: {
          total_purchases: purchases_aggregate { count }
          total_spent: purchases_aggregate { sum { amount } }
          favorite_category: purchases(
            group_by: product__category
            order_by: { count: desc }
            limit: 1
          ) {
            category
            count
          }
          last_purchase: purchases(
            order_by: { date: desc }
            limit: 1
          ) {
            date
            product { name }
            amount
          }
        }
        interactions: {
          support_tickets: interactions(
            where: { type: "support" }
          ) {
            subject
            status
            created_at
          }
          satisfaction_score: reviews_aggregate {
            avg { rating }
          }
        }
      }
    }
    """,
    variables={"customerId": "cust_123"}
)

# Pass this structured data to LLM for reasoning
customer_data = response.data['customer']
llm_prompt = f"""
Analyze this customer:
{json.dumps(customer_data, indent=2)}

What personalized recommendations would you make?
"""

Query Optimization

1. Request Only Needed Fields

# Bad: Requesting everything
query = """
{
  customers {
    id
    name
    email
    created_at
    updated_at
    metadata
    purchases { ... }  # Lots of data
  }
}
"""

# Good: Request only what you need
query = """
{
  customers {
    name
    email
  }
}
"""

2. Use Pagination for Large Results

response = client.graphql.query(
    query="""
    query PaginatedCustomers($limit: Int!, $offset: Int!) {
      customers(limit: $limit, offset: $offset) {
        name
        email
      }
      customers_aggregate {
        count
      }
    }
    """,
    variables={"limit": 50, "offset": 0}
)

total = response.data['customers_aggregate']['count']
print(f"Showing {len(response.data['customers'])} of {total} customers")

3. Use Aggregates Instead of Fetching All Data

# Bad: Fetch all purchases to count
query = """
{
  customer(id: "123") {
    purchases {
      id
    }
  }
}
"""
count = len(response.data['customer']['purchases'])

# Good: Use aggregate
query = """
{
  customer(id: "123") {
    purchase_count: purchases_aggregate {
      count
    }
  }
}
"""
count = response.data['customer']['purchase_count']['count']

Error Handling

try:
    response = client.graphql.query(
        query=query,
        variables=variables
    )
    
    if response.errors:
        print("GraphQL errors:")
        for error in response.errors:
            print(f"  - {error['message']}")
    else:
        # Process data
        data = response.data
        
except Exception as e:
    print(f"Request failed: {e}")

GraphQL Schema Introspection

Query the schema to understand available types and fields:

response = client.graphql.query(
    query="""
    {
      __schema {
        types {
          name
          kind
          fields {
            name
            type {
              name
            }
          }
        }
      }
    }
    """
)

# See all available types
for type_info in response.data['__schema']['types']:
    if not type_info['name'].startswith('__'):
        print(f"Type: {type_info['name']}")
        if type_info['fields']:
            for field in type_info['fields']:
                print(f"  - {field['name']}: {field['type']['name']}")

Best Practices

1. Use Variables for Dynamic Values

# Good: Use variables
response = client.graphql.query(
    query="""
    query GetCustomer($id: ID!) {
      customer(id: $id) { name }
    }
    """,
    variables={"id": customer_id}
)

# Bad: String interpolation (security risk)
query = f"""
{{
  customer(id: "{customer_id}") {{ name }}
}}
"""

2. Name Your Queries

# Good: Named query
query = """
query GetCustomerDetails($id: ID!) {
  customer(id: $id) { ... }
}
"""

# Bad: Anonymous query
query = """
{
  customer(id: "123") { ... }
}
"""

3. Use Fragments for Reusable Fields

query = """
fragment CustomerFields on Customer {
  name
  email
  tier
  created_at
}

query GetCustomers {
  vip_customers: customers(where: { tier: "gold" }) {
    ...CustomerFields
  }
  new_customers: customers(
    where: { created_at: { _gte: "2024-01-01" } }
  ) {
    ...CustomerFields
  }
}
"""

4. Cache Results When Appropriate

import time

# Cache for 5 minutes
cache = {}
cache_ttl = 300

def get_customer_with_cache(customer_id):
    cache_key = f"customer_{customer_id}"
    
    if cache_key in cache:
        data, timestamp = cache[cache_key]
        if time.time() - timestamp < cache_ttl:
            return data
    
    response = client.graphql.query(
        query="...",
        variables={"customerId": customer_id}
    )
    
    cache[cache_key] = (response.data, time.time())
    return response.data

Use both search and GraphQL together for powerful queries:

# 1. Find relevant memories with natural language search
search_response = client.memory.search(
    query="customer complaints about shipping delays",
    max_memories=20
)

# 2. Extract customer IDs from memories
customer_ids = [
    m.metadata.get('customer_id') 
    for m in search_response.data.memories 
    if m.metadata.get('customer_id')
]

# 3. Use GraphQL to get structured customer data
graphql_response = client.graphql.query(
    query="""
    query GetCustomerDetails($ids: [ID!]!) {
      customers(where: { id: { _in: $ids } }) {
        name
        email
        tier
        order_count: purchases_aggregate { count }
        shipping_issues: interactions(
          where: { topic: "shipping" }
        ) {
          created_at
          resolved
        }
      }
    }
    """,
    variables={"ids": customer_ids}
)

# Now you have both semantic context and structured data

Troubleshooting

Query Syntax Errors

If you get syntax errors, check:

  • All braces are matched
  • Field names are spelled correctly
  • Variables are declared and used correctly
  • Quotes are proper (use " not ' in GraphQL)

Performance Issues

If queries are slow:

  • Add limit to constrain results
  • Remove unnecessary fields
  • Use aggregates instead of fetching all data
  • Check if you need all relationship levels

Schema Mismatch

If fields don't exist:

  • Use introspection to see available fields
  • Check if custom schema is active
  • Verify node/relationship type names

Next Steps