diff --git a/mongo_lookup_patterns.png b/mongo_lookup_patterns.png new file mode 100644 index 0000000..bbeea2c Binary files /dev/null and b/mongo_lookup_patterns.png differ diff --git a/mongo_lookup_patterns.py b/mongo_lookup_patterns.py new file mode 100644 index 0000000..069b5a9 --- /dev/null +++ b/mongo_lookup_patterns.py @@ -0,0 +1,236 @@ +import matplotlib.pyplot as plt +import matplotlib.patches as patches +from matplotlib.patches import FancyBboxPatch, ConnectionPatch +import numpy as np + +# Create figure with subplots +fig = plt.figure(figsize=(16, 12)) +gs = fig.add_gridspec(3, 2, height_ratios=[1, 1, 0.3], hspace=0.3, wspace=0.2) + +# Color scheme +colors = { + 'collection_a': '#FF6B6B', + 'collection_b': '#4ECDC4', + 'aggregation': '#45B7D1', + 'driver': '#96CEB4', + 'lookup': '#FFEAA7', + 'bad': '#FF7675', + 'good': '#00B894' +} + +def draw_collection(ax, x, y, width, height, name, doc_count, color): + """Draw a collection box with documents""" + # Collection box + rect = FancyBboxPatch((x, y), width, height, + boxstyle="round,pad=0.02", + facecolor=color, + edgecolor='black', + linewidth=2) + ax.add_patch(rect) + + # Collection title + ax.text(x + width/2, y + height - 0.1, name, + ha='center', va='center', fontsize=12, fontweight='bold') + + # Document count + ax.text(x + width/2, y + height - 0.25, f'{doc_count:,} documents', + ha='center', va='center', fontsize=10) + + # Draw sample documents + doc_height = 0.08 + docs_to_show = min(5, doc_count // 100000 + 1) + for i in range(docs_to_show): + doc_y = y + 0.1 + i * (doc_height + 0.02) + doc_rect = plt.Rectangle((x + 0.1, doc_y), width - 0.2, doc_height, + facecolor='white', edgecolor='gray', linewidth=1) + ax.add_patch(doc_rect) + +def draw_arrow(ax, start, end, label, color='black', style='-'): + """Draw an arrow with label""" + arrow = ConnectionPatch(start, end, "data", "data", + arrowstyle="->", shrinkA=5, shrinkB=5, + mutation_scale=20, fc=color, ec=color, linestyle=style) + ax.add_patch(arrow) + + # Add label + mid_x, mid_y = (start[0] + end[0]) / 2, (start[1] + end[1]) / 2 + ax.text(mid_x, mid_y + 0.05, label, ha='center', va='bottom', + fontsize=9, fontweight='bold', color=color) + +# Pattern 1: $lookup with aggregation (top row) +ax1 = fig.add_subplot(gs[0, 0]) +ax1.set_xlim(0, 10) +ax1.set_ylim(0, 5) +ax1.set_title('Pattern 1: $lookup with Aggregation\n(Poor Performance)', + fontsize=14, fontweight='bold', color=colors['bad']) + +# Collections +draw_collection(ax1, 0.5, 2, 2.5, 2, 'Collection A\n(Orders)', 500000, colors['collection_a']) +draw_collection(ax1, 6.5, 2, 2.5, 2, 'Collection B\n(Users)', 10000, colors['collection_b']) + +# Aggregation pipeline +agg_rect = FancyBboxPatch((3.5, 0.5), 2.5, 1, + boxstyle="round,pad=0.1", + facecolor=colors['aggregation'], + edgecolor='black', linewidth=2) +ax1.add_patch(agg_rect) +ax1.text(4.75, 1, 'Aggregation Pipeline\nwith $lookup', + ha='center', va='center', fontsize=10, fontweight='bold') + +# Arrows +draw_arrow(ax1, (3, 3), (3.5, 1.2), '500K docs', colors['bad']) +draw_arrow(ax1, (6.5, 3), (6, 1.2), '10K docs', colors['bad']) +draw_arrow(ax1, (4.75, 0.5), (4.75, -0.5), 'Joined Result\n500K docs', colors['bad']) + +ax1.axis('off') + +# Pattern 2: Aggregation + Driver lookup (top right) +ax2 = fig.add_subplot(gs[0, 1]) +ax2.set_xlim(0, 10) +ax2.set_ylim(0, 5) +ax2.set_title('Pattern 2: Aggregation + Driver Lookup\n(Good Performance)', + fontsize=14, fontweight='bold', color=colors['good']) + +# Collections +draw_collection(ax2, 0.5, 2, 2.5, 2, 'Collection A\n(Orders)', 500000, colors['collection_a']) +draw_collection(ax2, 6.5, 2, 2.5, 2, 'Collection B\n(Users)', 10000, colors['collection_b']) + +# Aggregation pipeline (only on A) +agg_rect = FancyBboxPatch((1, 0.2), 2, 0.8, + boxstyle="round,pad=0.1", + facecolor=colors['aggregation'], + edgecolor='black', linewidth=2) +ax2.add_patch(agg_rect) +ax2.text(2, 0.6, 'Aggregation\n(group by ref)', + ha='center', va='center', fontsize=9, fontweight='bold') + +# Driver lookup +driver_rect = FancyBboxPatch((6, 0.2), 2.5, 0.8, + boxstyle="round,pad=0.1", + facecolor=colors['driver'], + edgecolor='black', linewidth=2) +ax2.add_patch(driver_rect) +ax2.text(7.25, 0.6, 'Driver Lookup\n(5 unique refs)', + ha='center', va='center', fontsize=9, fontweight='bold') + +# Arrows +draw_arrow(ax2, (2, 2), (2, 1), '500K docs', colors['good']) +draw_arrow(ax2, (3, 0.6), (6, 0.6), '5 grouped\nresults', colors['good']) +draw_arrow(ax2, (7.25, 1), (7.25, 2), '5 lookups\nonly', colors['good']) + +ax2.axis('off') + +# Detailed flow diagrams (bottom row) +ax3 = fig.add_subplot(gs[1, :]) +ax3.set_xlim(0, 20) +ax3.set_ylim(0, 8) +ax3.set_title('Detailed Performance Comparison', fontsize=14, fontweight='bold') + +# Pattern 1 detailed flow +ax3.text(2.5, 7.5, 'Pattern 1: $lookup Aggregation', ha='center', va='center', + fontsize=12, fontweight='bold', color=colors['bad']) + +steps1 = [ + (1, 6.5, '500K docs\nfrom Collection A'), + (1, 5, 'MongoDB performs\n$lookup operation'), + (1, 3.5, 'Joins EVERY doc\nwith Collection B'), + (1, 2, 'Returns 500K\njoined documents'), + (1, 0.5, 'Performance: O(N*M)\nwhere N=500K, M=10K') +] + +for i, (x, y, text) in enumerate(steps1): + rect = FancyBboxPatch((x-0.8, y-0.4), 1.6, 0.8, + boxstyle="round,pad=0.05", + facecolor=colors['bad'] if i == len(steps1)-1 else colors['lookup'], + edgecolor='black', linewidth=1) + ax3.add_patch(rect) + ax3.text(x, y, text, ha='center', va='center', fontsize=9) + + if i < len(steps1) - 1: + draw_arrow(ax3, (x, y-0.4), (x, steps1[i+1][1]+0.4), '', colors['bad']) + +# Pattern 2 detailed flow +ax3.text(12.5, 7.5, 'Pattern 2: Aggregation + Driver Lookup', ha='center', va='center', + fontsize=12, fontweight='bold', color=colors['good']) + +steps2 = [ + (11, 6.5, '500K docs\nfrom Collection A'), + (11, 5, 'Aggregation groups\nby ObjectID references'), + (11, 3.5, 'Returns only 5\nunique references'), + (14, 3.5, 'Driver makes 5\ntargeted lookups'), + (14, 2, 'Fetches only 5 docs\nfrom Collection B'), + (12.5, 0.5, 'Performance: O(N) + O(K)\nwhere N=500K, K=5') +] + +for i, (x, y, text) in enumerate(steps2): + color = colors['good'] if i == len(steps2)-1 else (colors['driver'] if x > 12 else colors['aggregation']) + rect = FancyBboxPatch((x-0.8, y-0.4), 1.6, 0.8, + boxstyle="round,pad=0.05", + facecolor=color, + edgecolor='black', linewidth=1) + ax3.add_patch(rect) + ax3.text(x, y, text, ha='center', va='center', fontsize=9) + + if i < len(steps2) - 1: + if i == 2: # Arrow from aggregation to driver + draw_arrow(ax3, (x+0.8, y), (steps2[i+1][0]-0.8, steps2[i+1][1]), '', colors['good']) + elif i == 4: # Arrow to final result + draw_arrow(ax3, (x-1, y-0.4), (steps2[i+1][0], steps2[i+1][1]+0.4), '', colors['good']) + else: + draw_arrow(ax3, (x, y-0.4), (x, steps2[i+1][1]+0.4), '', colors['good']) + +# Dividing line +ax3.axvline(x=10, color='gray', linestyle='--', alpha=0.5) + +ax3.axis('off') + +# Performance comparison table +ax4 = fig.add_subplot(gs[2, :]) +ax4.set_xlim(0, 10) +ax4.set_ylim(0, 2) + +# Table headers +headers = ['Aspect', 'Pattern 1: $lookup', 'Pattern 2: Aggregation + Driver'] +col_widths = [2, 4, 4] +col_positions = [1, 3.5, 6.5] + +for i, (header, pos, width) in enumerate(zip(headers, col_positions, col_widths)): + rect = FancyBboxPatch((pos-width/2, 1.5), width, 0.4, + boxstyle="round,pad=0.02", + facecolor='lightgray', + edgecolor='black', linewidth=1) + ax4.add_patch(rect) + ax4.text(pos, 1.7, header, ha='center', va='center', fontsize=10, fontweight='bold') + +# Table rows +rows = [ + ['Performance', 'O(N*M) - Very Slow', 'O(N) + O(K) - Fast'], + ['Memory Usage', 'High (500K joined docs)', 'Low (5 unique refs)'], + ['Network Traffic', 'Heavy (large result set)', 'Light (small aggregation result)'], + ['Best Use Case', 'Small collections only', 'Large collections with grouped refs'] +] + +for row_idx, row in enumerate(rows): + y_pos = 1.1 - row_idx * 0.3 + for col_idx, (text, pos, width) in enumerate(zip(row, col_positions, col_widths)): + color = colors['bad'] if col_idx == 1 else (colors['good'] if col_idx == 2 else 'white') + rect = FancyBboxPatch((pos-width/2, y_pos-0.1), width, 0.2, + boxstyle="round,pad=0.02", + facecolor=color, + edgecolor='black', linewidth=1, alpha=0.3) + ax4.add_patch(rect) + ax4.text(pos, y_pos, text, ha='center', va='center', fontsize=9) + +ax4.axis('off') + +plt.suptitle('MongoDB Collection Lookup Patterns: Performance Comparison', + fontsize=16, fontweight='bold', y=0.98) + +plt.tight_layout() +plt.savefig('mongo_lookup_patterns.png', dpi=300, bbox_inches='tight') +plt.show() + +print("Diagram saved as 'mongo_lookup_patterns.png'") +print("\nSummary:") +print("Pattern 1 ($lookup): Joins all 500K documents at database level - Poor performance") +print("Pattern 2 (Aggregation + Driver): Groups first, then lookup only unique refs - Good performance") \ No newline at end of file diff --git a/mongodb_lookup_patterns_diagram.txt b/mongodb_lookup_patterns_diagram.txt new file mode 100644 index 0000000..be7a2fd --- /dev/null +++ b/mongodb_lookup_patterns_diagram.txt @@ -0,0 +1,196 @@ +MONGODB LOOKUP PATTERNS: Performance Comparison +================================================== + +SCENARIO: Collection A (500,000 docs) needs to reference Collection B (5 docs) +Collection A has an ObjectID field pointing to documents in Collection B + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PATTERN 1: $lookup Aggregation │ +│ (POOR PERFORMANCE) │ +└─────────────────────────────────────────────────────────────────────────────┘ + +┌─────────────────┐ $lookup ┌─────────────────┐ +│ Collection A │ ◄─────────────► │ Collection B │ +│ 500,000 docs │ │ 5 docs │ +│ │ │ │ +│ {_id: 1, │ │ {_id: "b1", │ +│ ref: "b1", │ │ name: "Item1"} │ +│ data: "..."} │ │ │ +│ │ │ {_id: "b2", │ +│ {_id: 2, │ │ name: "Item2"} │ +│ ref: "b2", │ │ ... │ +│ data: "..."} │ │ │ +│ ... │ │ │ +└─────────────────┘ └─────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ MongoDB Aggregation Engine │ +│ │ +│ db.collectionA.aggregate([ │ +│ { │ +│ $lookup: { │ +│ from: "collectionB", │ +│ localField: "ref", │ +│ foreignField: "_id", │ +│ as: "resolved" │ +│ } │ +│ } │ +│ ]) │ +│ │ +│ ⚠️ PERFORMANCE ISSUE: │ +│ • Processes ALL 500,000 docs from Collection A │ +│ • For each doc, searches Collection B │ +│ • Total operations: 500,000 × lookup operations │ +│ • Memory intensive: loads large result set │ +└─────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Result to Driver │ +│ 500,000 enriched documents │ +└─────────────────────────────────────────────────────┘ + +═══════════════════════════════════════════════════════════════════════════════ + +┌─────────────────────────────────────────────────────────────────────────────┐ +│ PATTERN 2: Aggregation + Driver-Side Resolution │ +│ (GOOD PERFORMANCE) │ +└─────────────────────────────────────────────────────────────────────────────┘ + +STEP 1: Aggregate on Collection A (Group by reference) +┌─────────────────┐ +│ Collection A │ +│ 500,000 docs │ +│ │ +│ {_id: 1, │ +│ ref: "b1", │ +│ data: "..."} │ +│ │ +│ {_id: 2, │ +│ ref: "b2", │ +│ data: "..."} │ +│ ... │ +└─────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ MongoDB Aggregation Engine │ +│ │ +│ db.collectionA.aggregate([ │ +│ { │ +│ $group: { │ +│ _id: "$ref", │ +│ docs: { $push: "$$ROOT" }, │ +│ count: { $sum: 1 } │ +│ } │ +│ } │ +│ ]) │ +│ │ +│ ✅ PERFORMANCE BENEFIT: │ +│ • Groups 500,000 docs by ref field │ +│ • Results in only 5 groups (one per ref) │ +│ • No cross-collection joins during aggregation │ +└─────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Aggregation Result │ +│ 5 grouped docs │ +│ │ +│ {_id: "b1", docs: [...], count: 100000} │ +│ {_id: "b2", docs: [...], count: 150000} │ +│ {_id: "b3", docs: [...], count: 120000} │ +│ {_id: "b4", docs: [...], count: 80000} │ +│ {_id: "b5", docs: [...], count: 50000} │ +└─────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────┐ +│ Driver Application │ +│ │ +│ // Extract unique ref IDs from aggregation result │ +│ refIds = ["b1", "b2", "b3", "b4", "b5"] │ +│ │ +│ // Single query to Collection B │ +│ referencedDocs = db.collectionB.find({ │ +│ _id: { $in: refIds } │ +│ }) │ +│ │ +│ ✅ PERFORMANCE BENEFIT: │ +│ • Only 1 query to Collection B │ +│ • Fetches only 5 documents │ +│ • Driver merges results in memory │ +└─────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────┐ Single Query ┌─────────────────┐ +│ Driver │ ──────────────────► │ Collection B │ +│ │ ◄────────────────── │ 5 docs │ +│ refIds = │ 5 docs │ │ +│ ["b1","b2", │ │ {_id: "b1", │ +│ "b3","b4", │ │ name: "Item1"} │ +│ "b5"] │ │ │ +└─────────────────┘ │ {_id: "b2", │ + │ │ name: "Item2"} │ + ▼ │ ... │ +┌─────────────────────────────────────────────────────────┐ +│ Final Enriched Result │ +│ │ +│ // Driver combines aggregation + lookup results │ +│ for each group in aggregationResult: │ +│ resolvedDoc = referencedDocs[group._id] │ +│ group.resolved = resolvedDoc │ +│ │ +│ Final result: 5 groups with resolved references │ +└─────────────────────────────────────────────────────────┘ + +═══════════════════════════════════════════════════════════════════════════════ + +PERFORMANCE COMPARISON: +┌─────────────────────────────────────────────────────────────────────────────┐ +│ │ +│ PATTERN 1 ($lookup): │ +│ ❌ MongoDB processes: 500,000 documents │ +│ ❌ Cross-collection operations: 500,000 lookups │ +│ ❌ Memory usage: Very high (full result set) │ +│ ❌ Network transfer: 500,000 enriched documents │ +│ ❌ Time complexity: O(n) where n = source collection size │ +│ │ +│ PATTERN 2 (Aggregation + Driver resolution): │ +│ ✅ MongoDB processes: 500,000 documents (grouping only) │ +│ ✅ Cross-collection operations: 1 lookup for 5 documents │ +│ ✅ Memory usage: Low (5 groups + 5 referenced docs) │ +│ ✅ Network transfer: 5 groups + 5 referenced documents │ +│ ✅ Time complexity: O(log n) for grouping + O(k) for lookup where k = 5 │ +│ │ +│ PERFORMANCE GAIN: ~100x to 1000x improvement depending on: │ +│ • Ratio of source docs to referenced docs │ +│ • Network latency │ +│ • Available memory │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ + +KEY INSIGHTS: +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +1. WHEN TO USE PATTERN 2: + • Large source collection (Collection A) + • Small reference collection (Collection B) + • High cardinality in reference relationships + • Memory and network bandwidth constraints + +2. TRADE-OFFS: + • Pattern 1: Simple query, poor performance with large datasets + • Pattern 2: More complex code, excellent performance with large datasets + +3. IMPLEMENTATION CONSIDERATIONS: + • Pattern 2 requires application-level logic to merge results + • Pattern 1 is handled entirely by MongoDB + • Pattern 2 scales better with collection growth + +4. TYPICAL USE CASES FOR PATTERN 2: + • User documents referencing a small set of categories + • Order documents referencing products + • Log entries referencing configuration items + • Any scenario with high fan-out from small reference data \ No newline at end of file diff --git a/mongodb_patterns_code_examples.js b/mongodb_patterns_code_examples.js new file mode 100644 index 0000000..861cee1 --- /dev/null +++ b/mongodb_patterns_code_examples.js @@ -0,0 +1,290 @@ +// MongoDB Lookup Patterns - Code Examples +// ===================================== + +// Sample Data Structure +// Collection A: orders (500,000 documents) +// Collection B: products (5 documents) + +/* +Collection A (orders): +{ + _id: ObjectId("..."), + orderId: "ORD-001", + productId: ObjectId("prod1"), // Reference to Collection B + quantity: 2, + orderDate: ISODate("2024-01-01"), + customerId: "CUST-123" +} + +Collection B (products): +{ + _id: ObjectId("prod1"), + name: "Laptop", + price: 999.99, + category: "Electronics" +} +*/ + +// ============================================================================ +// PATTERN 1: $lookup Aggregation (POOR PERFORMANCE) +// ============================================================================ + +console.log("PATTERN 1: Using $lookup aggregation"); +console.log("====================================="); + +const pattern1_lookup = () => { + const startTime = Date.now(); + + // This performs poorly with large source collections + const result = db.orders.aggregate([ + { + $lookup: { + from: "products", + localField: "productId", + foreignField: "_id", + as: "productDetails" + } + }, + { + $unwind: "$productDetails" // Optional: flatten the array + }, + { + $project: { + orderId: 1, + quantity: 1, + orderDate: 1, + customerId: 1, + productName: "$productDetails.name", + productPrice: "$productDetails.price", + totalValue: { $multiply: ["$quantity", "$productDetails.price"] } + } + } + ]).toArray(); + + const endTime = Date.now(); + + console.log(`Pattern 1 Results: ${result.length} documents`); + console.log(`Execution time: ${endTime - startTime}ms`); + console.log("Performance Issues:"); + console.log("- MongoDB processes ALL 500,000 orders"); + console.log("- Performs 500,000 individual lookups to products collection"); + console.log("- High memory usage for result set"); + console.log("- Large network transfer (500,000 enriched documents)"); + + return result; +}; + +// ============================================================================ +// PATTERN 2: Aggregation + Driver-Side Resolution (GOOD PERFORMANCE) +// ============================================================================ + +console.log("\nPATTERN 2: Aggregation + Driver-side resolution"); +console.log("==============================================="); + +const pattern2_optimized = async () => { + const startTime = Date.now(); + + // STEP 1: Aggregate orders by productId (group similar orders) + console.log("Step 1: Grouping orders by productId..."); + const groupedOrders = await db.orders.aggregate([ + { + $group: { + _id: "$productId", // Group by product reference + orders: { $push: "$$ROOT" }, // Keep all order documents + totalQuantity: { $sum: "$quantity" }, + orderCount: { $sum: 1 } + } + } + ]).toArray(); + + console.log(`Grouped into ${groupedOrders.length} product groups`); + + // STEP 2: Extract unique product IDs + const productIds = groupedOrders.map(group => group._id); + console.log(`Unique product IDs: ${productIds.length}`); + + // STEP 3: Single lookup to products collection (only 5 documents!) + console.log("Step 2: Fetching product details..."); + const products = await db.products.find({ + _id: { $in: productIds } + }).toArray(); + + // Create a map for quick product lookup + const productMap = {}; + products.forEach(product => { + productMap[product._id.toString()] = product; + }); + + // STEP 4: Merge results in application code + console.log("Step 3: Merging results..."); + const enrichedResults = []; + + groupedOrders.forEach(group => { + const product = productMap[group._id.toString()]; + + // Process each order in the group + group.orders.forEach(order => { + enrichedResults.push({ + orderId: order.orderId, + quantity: order.quantity, + orderDate: order.orderDate, + customerId: order.customerId, + productName: product.name, + productPrice: product.price, + totalValue: order.quantity * product.price + }); + }); + }); + + const endTime = Date.now(); + + console.log(`Pattern 2 Results: ${enrichedResults.length} documents`); + console.log(`Execution time: ${endTime - startTime}ms`); + console.log("Performance Benefits:"); + console.log("- MongoDB groups 500,000 orders efficiently"); + console.log("- Only 1 lookup query to products collection"); + console.log("- Fetches only 5 product documents"); + console.log("- Low memory usage during aggregation"); + console.log("- Minimal network transfer"); + + return enrichedResults; +}; + +// ============================================================================ +// ALTERNATIVE PATTERN 2 IMPLEMENTATION: More Memory Efficient +// ============================================================================ + +const pattern2_memory_efficient = async () => { + console.log("\nPATTERN 2 (Memory Efficient): Processing in batches"); + console.log("==================================================="); + + const startTime = Date.now(); + + // STEP 1: Get distinct product IDs without loading all orders + const distinctProducts = await db.orders.distinct("productId"); + console.log(`Found ${distinctProducts.length} distinct products`); + + // STEP 2: Fetch all product details + const products = await db.products.find({ + _id: { $in: distinctProducts } + }).toArray(); + + const productMap = {}; + products.forEach(product => { + productMap[product._id.toString()] = product; + }); + + // STEP 3: Process orders in batches with cursor + const batchSize = 10000; + let processedCount = 0; + const enrichedResults = []; + + const cursor = db.orders.find({}).batchSize(batchSize); + + while (await cursor.hasNext()) { + const order = await cursor.next(); + const product = productMap[order.productId.toString()]; + + if (product) { + enrichedResults.push({ + orderId: order.orderId, + quantity: order.quantity, + orderDate: order.orderDate, + customerId: order.customerId, + productName: product.name, + productPrice: product.price, + totalValue: order.quantity * product.price + }); + } + + processedCount++; + if (processedCount % 50000 === 0) { + console.log(`Processed ${processedCount} orders...`); + } + } + + const endTime = Date.now(); + + console.log(`Pattern 2 (Efficient) Results: ${enrichedResults.length} documents`); + console.log(`Execution time: ${endTime - startTime}ms`); + console.log("Additional Benefits:"); + console.log("- Constant memory usage regardless of collection size"); + console.log("- Can process collections larger than available RAM"); + console.log("- Suitable for real-time streaming scenarios"); + + return enrichedResults; +}; + +// ============================================================================ +// PERFORMANCE COMPARISON RUNNER +// ============================================================================ + +const runPerformanceComparison = async () => { + console.log("MongoDB Lookup Patterns Performance Comparison"); + console.log("==============================================\n"); + + // Note: In a real scenario, you would uncomment these based on your data size + + // For small datasets (< 10,000 docs), Pattern 1 might be acceptable + // const results1 = pattern1_lookup(); + + // For large datasets (> 100,000 docs), always use Pattern 2 + const results2 = await pattern2_optimized(); + + // For very large datasets (> 1,000,000 docs), use memory-efficient version + // const results3 = await pattern2_memory_efficient(); + + console.log("\n=== SUMMARY ==="); + console.log("Pattern 1 ($lookup): Simple code, poor performance with large datasets"); + console.log("Pattern 2 (Optimized): Complex code, excellent performance"); + console.log("Pattern 2 (Memory Efficient): Most complex, handles any dataset size"); +}; + +// ============================================================================ +// INDEX RECOMMENDATIONS +// ============================================================================ + +console.log("\nRECOMMENDED INDEXES:"); +console.log("==================="); + +// Essential indexes for both patterns +const createIndexes = () => { + // Index on the reference field in Collection A + db.orders.createIndex({ "productId": 1 }); + + // Compound index for common queries + db.orders.createIndex({ "productId": 1, "orderDate": -1 }); + + // Index on the primary key of Collection B (usually already exists) + db.products.createIndex({ "_id": 1 }); + + console.log("Created essential indexes for optimal performance"); +}; + +// ============================================================================ +// WHEN TO USE EACH PATTERN +// ============================================================================ + +console.log("\nDECISION MATRIX:"); +console.log("================"); +console.log("Use Pattern 1 ($lookup) when:"); +console.log("- Source collection < 10,000 documents"); +console.log("- Simple queries with minimal processing"); +console.log("- Development speed is more important than performance"); +console.log("- One-off analytical queries"); + +console.log("\nUse Pattern 2 (Optimized) when:"); +console.log("- Source collection > 100,000 documents"); +console.log("- Reference collection is small (< 1,000 docs)"); +console.log("- High-frequency queries"); +console.log("- Performance is critical"); +console.log("- Memory usage needs to be controlled"); + +// Export for use in applications +module.exports = { + pattern1_lookup, + pattern2_optimized, + pattern2_memory_efficient, + runPerformanceComparison, + createIndexes +}; \ No newline at end of file