Last updated

Building an Enterprise SaaS Knowledge Base

This tutorial demonstrates how to build an enterprise knowledge base system that stores product documentation and internal knowledge in Papr Memory for easy search and retrieval.

Prerequisites

Before you begin, you'll need:

  • A Papr Memory API key
  • An OpenAI API key
  • Node.js installed

Implementation

1. Project Setup

Set up your project:

mkdir enterprise-kb
cd enterprise-kb
npm init -y
npm install express dotenv node-fetch openai multer cors marked

Create a .env file:

PAPR_API_KEY=your_papr_api_key_here
OPENAI_API_KEY=your_openai_api_key_here

2. Knowledge Base System

Create app.js:

import express from 'express';
import multer from 'multer';
import { OpenAI } from 'openai';
import { Papr } from '@papr/sdk';
import fs from 'fs';
import { marked } from 'marked';
import path from 'path';
import cors from 'cors';
import dotenv from 'dotenv';

dotenv.config();

// Initialize Express
const app = express();
app.use(express.json());
app.use(cors());
app.use(express.static('public'));

// Set up file upload
const upload = multer({ dest: 'uploads/' });

// Initialize OpenAI and Papr client
const openai = new OpenAI({
  apiKey: process.env.OPENAI_API_KEY
});

const paprClient = new Papr({
  apiKey: process.env.PAPR_API_KEY
});

// Function to chunk text into manageable pieces
function chunkText(text, chunkSize = 1500) {
  const chunks = [];
  for (let i = 0; i < text.length; i += chunkSize) {
    chunks.push(text.slice(i, i + chunkSize));
  }
  return chunks;
}

// Function to add content to memory
async function addToMemory(content, metadata) {
  try {
    const response = await paprClient.memory.add({
      content,
      type: 'text',
      metadata
    });
    
    return response;
  } catch (error) {
    throw new Error(`Failed to add to memory: ${error.message}`);
  }
}

// Function to search memory
async function searchMemory(query) {
  const enhancedQuery = `Find information in our knowledge base that answers the following question from an employee: "${query}". I need comprehensive and accurate information from our documentation and guides.`;
  
  try {
    const searchResponse = await paprClient.memory.search({
      query: enhancedQuery
    });
    
    return searchResponse;
  } catch (error) {
    throw new Error(`Failed to search memory: ${error.message}`);
  }
}

// Upload and process documentation
app.post('/api/docs/upload', upload.single('file'), async (req, res) => {
  try {
    if (!req.file) {
      return res.status(400).json({ error: 'No file uploaded' });
    }

    const { originalname } = req.file;
    const { category, product, version } = req.body;
    const filePath = req.file.path;
    
    // Read file content
    let fileContent = fs.readFileSync(filePath, 'utf-8');
    
    // If it's a markdown file, convert to plain text
    if (originalname.endsWith('.md')) {
      fileContent = marked.parse(fileContent);
      // Remove HTML tags
      fileContent = fileContent.replace(/<[^>]*>?/gm, '');
    }
    
    // Chunk the content
    const chunks = chunkText(fileContent);
    const docId = `doc-${Date.now()}`;
    
    // Upload each chunk to memory
    const uploadPromises = chunks.map((chunk, index) => {
      return addToMemory(chunk, {
        document_id: docId,
        document_name: originalname,
        category: category || 'uncategorized',
        product: product || 'general',
        version: version || 'latest',
        chunk_index: index,
        total_chunks: chunks.length,
        timestamp: new Date().toISOString()
      });
    });
    
    await Promise.all(uploadPromises);
    
    // Delete the temporary file
    fs.unlinkSync(filePath);
    
    res.json({
      success: true,
      documentId: docId,
      message: `Document "${originalname}" processed and added to knowledge base`
    });
    
  } catch (error) {
    console.error('Error processing document:', error);
    res.status(500).json({ error: error.message });
  }
});

// Search knowledge base
app.post('/api/search', async (req, res) => {
  try {
    const { query } = req.body;
    
    if (!query) {
      return res.status(400).json({ error: 'Query is required' });
    }
    
    // Search memory for relevant content
    const searchResults = await searchMemory(query);
    
    // Extract content from search results
    let searchContent = "";
    if (searchResults.data && searchResults.data.memories) {
      searchContent = searchResults.data.memories
        .map(memory => {
          const meta = memory.metadata || {};
          return `Source: ${meta.document_name || 'Unknown'}\n${memory.content}`;
        })
        .join('\n\n');
    }
    
    // If no search results found
    if (!searchContent) {
      return res.json({
        answer: "I couldn't find specific information about that in our knowledge base.",
        sources: []
      });
    }
    
    // Use OpenAI to generate a comprehensive answer
    const completion = await openai.chat.completions.create({
      model: "gpt-3.5-turbo",
      messages: [
        { 
          role: "system", 
          content: "You are a knowledge base assistant for an enterprise SaaS company. Your job is to provide accurate information based on the company's documentation. Only use the information provided to you - don't make up details or reference things not in the provided content."
        },
        { role: "user", content: query },
        { 
          role: "system", 
          content: `Here is relevant information from our knowledge base:\n\n${searchContent}`
        }
      ]
    });
    
    const answer = completion.choices[0].message.content;
    
    // Extract source information for citations
    const sources = searchResults.data.memories.map(memory => {
      return {
        title: memory.metadata?.document_name || 'Unknown document',
        category: memory.metadata?.category || 'Uncategorized',
        product: memory.metadata?.product || 'General'
      };
    });
    
    res.json({
      answer,
      sources
    });
    
  } catch (error) {
    console.error('Error searching knowledge base:', error);
    res.status(500).json({ error: error.message });
  }
});

// Create the frontend
function setupFrontend() {
  fs.mkdirSync('public', { recursive: true });
  
  // Create HTML file
  fs.writeFileSync('public/index.html', `
<!DOCTYPE html>
<html>
<head>
  <title>Enterprise Knowledge Base</title>
  <link rel="stylesheet" href="style.css">
</head>
<body>
  <header>
    <h1>Enterprise Knowledge Base</h1>
  </header>
  
  <div class="container">
    <div class="sidebar">
      <h2>Upload Documentation</h2>
      <form id="upload-form">
        <div class="form-group">
          <label for="file">Document File (text, MD)</label>
          <input type="file" id="file" name="file" accept=".txt,.md" required>
        </div>
        <div class="form-group">
          <label for="category">Category</label>
          <select id="category" name="category">
            <option value="product">Product Documentation</option>
            <option value="api">API Reference</option>
            <option value="guide">User Guide</option>
            <option value="internal">Internal Process</option>
          </select>
        </div>
        <div class="form-group">
          <label for="product">Product</label>
          <input type="text" id="product" name="product" placeholder="e.g. Analytics Pro">
        </div>
        <div class="form-group">
          <label for="version">Version</label>
          <input type="text" id="version" name="version" placeholder="e.g. 2.1.0">
        </div>
        <button type="submit">Upload Document</button>
      </form>
      <div id="upload-status"></div>
      
      <div class="sample-data">
        <h3>Add Sample Documentation</h3>
        <button id="add-sample">Add Sample Docs</button>
      </div>
    </div>
    
    <div class="main-content">
      <h2>Search Knowledge Base</h2>
      <div class="search-container">
        <input type="text" id="search-input" placeholder="Ask a question about our products, APIs, or processes">
        <button id="search-button">Search</button>
      </div>
      
      <div class="results-container">
        <div id="loading" class="hidden">Searching knowledge base...</div>
        <div id="answer-box" class="hidden">
          <h3>Answer</h3>
          <div id="answer-content"></div>
          <div id="sources">
            <h4>Sources</h4>
            <ul id="sources-list"></ul>
          </div>
        </div>
      </div>
    </div>
  </div>
  
  <script src="script.js"></script>
</body>
</html>
  `);
  
  // Create CSS file
  fs.writeFileSync('public/style.css', `
* {
  box-sizing: border-box;
}

body {
  font-family: Arial, sans-serif;
  margin: 0;
  padding: 0;
  background-color: #f5f5f5;
  color: #333;
}

header {
  background-color: #2c3e50;
  color: white;
  padding: 1rem;
  text-align: center;
}

.container {
  display: flex;
  max-width: 1200px;
  margin: 20px auto;
  gap: 20px;
}

.sidebar {
  flex: 1;
  background: white;
  padding: 20px;
  border-radius: 8px;
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}

.main-content {
  flex: 2;
  background: white;
  padding: 20px;
  border-radius: 8px;
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}

.form-group {
  margin-bottom: 15px;
}

label {
  display: block;
  margin-bottom: 5px;
  font-weight: bold;
}

input, select, textarea {
  width: 100%;
  padding: 8px;
  border: 1px solid #ddd;
  border-radius: 4px;
}

button {
  background-color: #3498db;
  color: white;
  border: none;
  padding: 10px 15px;
  border-radius: 4px;
  cursor: pointer;
  font-size: 14px;
}

button:hover {
  background-color: #2980b9;
}

.search-container {
  display: flex;
  gap: 10px;
  margin-bottom: 20px;
}

.search-container input {
  flex: 1;
  padding: 10px;
  font-size: 16px;
}

#upload-status {
  margin-top: 15px;
  padding: 10px;
  border-radius: 4px;
}

.success {
  background-color: #d4edda;
  color: #155724;
}

.error {
  background-color: #f8d7da;
  color: #721c24;
}

.hidden {
  display: none;
}

#loading {
  text-align: center;
  padding: 20px;
  font-style: italic;
  color: #666;
}

#answer-box {
  border: 1px solid #ddd;
  border-radius: 8px;
  padding: 20px;
  margin-top: 20px;
}

#answer-content {
  line-height: 1.6;
  margin-bottom: 20px;
}

#sources {
  border-top: 1px solid #eee;
  padding-top: 15px;
}

#sources-list {
  padding-left: 20px;
}

.sample-data {
  margin-top: 30px;
  padding-top: 20px;
  border-top: 1px solid #eee;
}
  `);
  
  // Create JavaScript file
  fs.writeFileSync('public/script.js', `
// Handle document upload
document.getElementById('upload-form').addEventListener('submit', async (e) => {
  e.preventDefault();
  
  const formData = new FormData();
  const fileInput = document.getElementById('file');
  const category = document.getElementById('category').value;
  const product = document.getElementById('product').value;
  const version = document.getElementById('version').value;
  
  formData.append('file', fileInput.files[0]);
  formData.append('category', category);
  formData.append('product', product);
  formData.append('version', version);
  
  const statusDiv = document.getElementById('upload-status');
  statusDiv.textContent = 'Uploading and processing document...';
  statusDiv.className = '';
  
  try {
    const response = await fetch('/api/docs/upload', {
      method: 'POST',
      body: formData
    });
    
    const result = await response.json();
    
    if (result.success) {
      statusDiv.textContent = result.message;
      statusDiv.className = 'success';
      document.getElementById('upload-form').reset();
    } else {
      statusDiv.textContent = result.error || 'Error uploading document';
      statusDiv.className = 'error';
    }
  } catch (error) {
    statusDiv.textContent = 'Error: ' + error.message;
    statusDiv.className = 'error';
  }
});

// Handle search
document.getElementById('search-button').addEventListener('click', performSearch);
document.getElementById('search-input').addEventListener('keypress', (e) => {
  if (e.key === 'Enter') {
    performSearch();
  }
});

async function performSearch() {
  const query = document.getElementById('search-input').value.trim();
  
  if (!query) return;
  
  // Show loading, hide answer
  document.getElementById('loading').classList.remove('hidden');
  document.getElementById('answer-box').classList.add('hidden');
  
  try {
    const response = await fetch('/api/search', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json'
      },
      body: JSON.stringify({ query })
    });
    
    const result = await response.json();
    
    // Hide loading
    document.getElementById('loading').classList.add('hidden');
    
    // Display answer
    const answerBox = document.getElementById('answer-box');
    document.getElementById('answer-content').textContent = result.answer;
    
    // Display sources
    const sourcesList = document.getElementById('sources-list');
    sourcesList.innerHTML = '';
    
    if (result.sources && result.sources.length > 0) {
      result.sources.forEach(source => {
        const listItem = document.createElement('li');
        listItem.textContent = \`\${source.title} (\${source.category} - \${source.product})\`;
        sourcesList.appendChild(listItem);
      });
      
      answerBox.classList.remove('hidden');
    } else {
      const listItem = document.createElement('li');
      listItem.textContent = 'No specific sources available';
      sourcesList.appendChild(listItem);
      
      answerBox.classList.remove('hidden');
    }
    
  } catch (error) {
    document.getElementById('loading').classList.add('hidden');
    alert('Error searching knowledge base: ' + error.message);
  }
}

// Sample documentation
const sampleDocs = [
  {
    content: \`# API Authentication Guide
    
Our API uses OAuth 2.0 for authentication. To authenticate your requests, you'll need to obtain an access token.

## Getting an Access Token

1. Register your application in the Developer Portal to get a client ID and secret.
2. Make a POST request to https://api.example.com/oauth/token with your client credentials.
3. Use the returned access token in the Authorization header of your API requests.

## Token Expiration

Access tokens expire after 24 hours. Your application should handle token refresh automatically when needed.

## Security Best Practices

- Never store client secrets in client-side code
- Implement proper token storage on your servers
- Use HTTPS for all API requests
- Implement token rotation for production applications\`,
    metadata: {
      category: 'api',
      product: 'Platform API',
      version: '2.0'
    }
  },
  {
    content: \`# Analytics Dashboard User Guide
    
The Analytics Dashboard provides real-time insights into your application performance and user engagement.

## Key Features

### Real-time Metrics
- View active users, session duration, and conversion rates in real-time
- Data refreshes automatically every 30 seconds

### Custom Reports
- Create custom reports by selecting metrics and dimensions
- Save report configurations for quick access
- Export reports in CSV, Excel, or PDF formats

### User Segments
- Create custom user segments based on behavior or attributes
- Compare performance across different user segments
- Set up automatic alerts for segment performance changes

## Getting Started

1. Log in to your account and navigate to the Analytics section
2. Select the property you want to analyze from the dropdown menu
3. Choose a default dashboard or create a custom view
4. Use the date picker to select your desired time period\`,
    metadata: {
      category: 'guide',
      product: 'Analytics Pro',
      version: '3.2'
    }
  },
  {
    content: \`# Internal Release Process
    
This document outlines the standard release process for all software products.

## Release Cycle

We follow a two-week sprint cycle with releases scheduled for every other Friday.

## Pre-release Checklist

1. All critical and high-priority bugs must be resolved
2. QA approval is required for all feature branches
3. Documentation must be updated and approved
4. Performance testing must show no regressions
5. Security scan must be completed with no critical findings

## Deployment Process

1. Release branch is created from develop branch
2. Final QA verification is performed on the release branch
3. Release notes are compiled and distributed
4. Deployment is scheduled during low-traffic window (typically 10pm-2am)
5. Automated and manual smoke tests are run after deployment
6. On-call engineer monitors system health for 24 hours post-release

## Rollback Procedure

In case of critical issues, the release manager may initiate a rollback:

1. Alert the engineering and support teams
2. Execute the rollback script
3. Verify system stability after rollback
4. Schedule emergency fix if needed\`,
    metadata: {
      category: 'internal',
      product: 'Development Process',
      version: '1.0'
    }
  }
];

// Add sample documentation
document.getElementById('add-sample').addEventListener('click', async () => {
  const statusDiv = document.getElementById('upload-status');
  statusDiv.textContent = 'Adding sample documentation...';
  statusDiv.className = '';
  
  try {
    // Create temporary files from sample content
    for (let i = 0; i < sampleDocs.length; i++) {
      const doc = sampleDocs[i];
      const fileName = \`sample-doc-\${i+1}.md\`;
      
      // Create a temporary file
      const file = new File([doc.content], fileName, { type: 'text/markdown' });
      
      // Create FormData
      const formData = new FormData();
      formData.append('file', file);
      formData.append('category', doc.metadata.category);
      formData.append('product', doc.metadata.product);
      formData.append('version', doc.metadata.version);
      
      // Upload to API
      await fetch('/api/docs/upload', {
        method: 'POST',
        body: formData
      });
    }
    
    statusDiv.textContent = 'Sample documentation added successfully!';
    statusDiv.className = 'success';
  } catch (error) {
    statusDiv.textContent = 'Error adding sample documentation: ' + error.message;
    statusDiv.className = 'error';
  }
});
  `);
}

// Set up the application
setupFrontend();

const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
  console.log(`Server running on http://localhost:${PORT}`);
});

3. Update package.json

Update your package.json to include:

{
  "name": "enterprise-kb",
  "version": "1.0.0",
  "description": "Enterprise knowledge base with Papr Memory and OpenAI",
  "main": "app.js",
  "type": "module",
  "scripts": {
    "start": "node app.js"
  },
  "dependencies": {
    "@papr/sdk": "^1.0.0",
    "cors": "^2.8.5",
    "dotenv": "^16.3.1",
    "express": "^4.18.2",
    "marked": "^9.0.3",
    "multer": "^1.4.5-lts.1",
    "openai": "^4.0.0"
  }
}

Usage

  1. Start the server:
npm start
  1. Open your browser to http://localhost:3000

  2. You can either:

    • Upload your own documentation files (text or markdown)
    • Click "Add Sample Docs" to populate the system with example content
  3. Use the search interface to ask questions about the documentation

How It Works

  1. Document Processing:

    • Documents are uploaded through the web interface
    • Text is chunked into manageable sections
    • Each chunk is stored in Papr Memory with metadata
    • Metadata includes document ID, category, product, and version
  2. Knowledge Retrieval:

    • Users enter questions in natural language
    • System searches Papr Memory for relevant content
    • OpenAI processes the search results and the user's question
    • A comprehensive answer is generated based on the retrieved content
    • Source documents are cited for transparency
  3. Key Benefits:

    • Enterprise knowledge is centralized and easily accessible
    • Employees get instant answers with relevant sources
    • Documentation remains up-to-date and searchable
    • The system handles different document types and categories

Next Steps

  • Add authentication and role-based access
  • Implement document versioning and updates
  • Add analytics to track common queries
  • Support more document formats (PDF, DOCX, etc)
  • Integrate with existing documentation systems