Elasticsearch Practical Guide | Search, Aggregations, Mappings & Node.js

Elasticsearch Practical Guide | Search, Aggregations, Mappings & Node.js

이 글의 핵심

Elasticsearch enables sub-second full-text search across millions of documents. This guide covers index design, search queries, aggregations, autocomplete, and practical Node.js integration patterns.

What Elasticsearch Does

Elasticsearch is a distributed search and analytics engine built on Apache Lucene:

PostgreSQL query:  SELECT * FROM products WHERE name LIKE '%wireless headphones%'
  → Scans every row, case-sensitive, no ranking

Elasticsearch:    search "wireless headphones" in products
  → Inverted index lookup (O(1)), relevance scoring, fuzzy matching,
    handles "headphone" ≈ "headphones", returns best matches first

Use cases:

  • Full-text product/content search
  • Log analytics (ELK Stack: Elasticsearch + Logstash + Kibana)
  • Autocomplete and suggestions
  • Faceted search (filter by category, price range, brand)
  • Real-time dashboards from event streams

Setup

# Docker — easiest for development
docker run -d \
  --name elasticsearch \
  -p 9200:9200 \
  -e "discovery.type=single-node" \
  -e "xpack.security.enabled=false" \
  elasticsearch:8.12.0

# Verify
curl http://localhost:9200
# { "name": "...", "version": { "number": "8.12.0", ... } }
# Node.js client
npm install @elastic/elasticsearch
// lib/elasticsearch.ts
import { Client } from '@elastic/elasticsearch';

export const es = new Client({
  node: process.env.ELASTICSEARCH_URL || 'http://localhost:9200',
});

Index and Mappings

Define the schema before indexing documents:

// Create index with explicit mapping
await es.indices.create({
  index: 'products',
  body: {
    settings: {
      number_of_shards: 1,
      number_of_replicas: 1,
      analysis: {
        analyzer: {
          // Custom analyzer for product search
          product_search: {
            type: 'custom',
            tokenizer: 'standard',
            filter: ['lowercase', 'asciifolding', 'stop'],
          },
        },
      },
    },
    mappings: {
      properties: {
        name: {
          type: 'text',
          analyzer: 'product_search',
          fields: {
            keyword: { type: 'keyword' },       // Exact matching and sorting
            suggest: { type: 'completion' },    // Autocomplete
          },
        },
        description: {
          type: 'text',
          analyzer: 'product_search',
        },
        price: { type: 'float' },
        category: { type: 'keyword' },          // Exact match, aggregations
        brand: { type: 'keyword' },
        rating: { type: 'float' },
        inStock: { type: 'boolean' },
        tags: { type: 'keyword' },
        createdAt: { type: 'date' },
      },
    },
  },
});

Field type guide:

  • text — analyzed, for full-text search (tokenized, lowercased)
  • keyword — not analyzed, for exact matching, sorting, aggregations
  • float, integer, long — numeric fields
  • date — date/datetime fields
  • boolean — true/false
  • completion — for autocomplete/suggestions

CRUD Operations

// Index (create) a document
await es.index({
  index: 'products',
  id: '1',                    // Optional — auto-generated if omitted
  document: {
    name: 'Sony WH-1000XM5 Wireless Headphones',
    description: 'Industry-leading noise canceling with outstanding call quality',
    price: 349.99,
    category: 'Electronics',
    brand: 'Sony',
    rating: 4.8,
    inStock: true,
    tags: ['wireless', 'noise-canceling', 'headphones'],
    createdAt: new Date().toISOString(),
  },
});

// Bulk index — much faster for large imports
const operations = products.flatMap(product => [
  { index: { _index: 'products', _id: product.id } },
  product,
]);

await es.bulk({ operations });

// Get by ID
const doc = await es.get({ index: 'products', id: '1' });
console.log(doc._source);  // The document

// Update (partial)
await es.update({
  index: 'products',
  id: '1',
  doc: { price: 299.99, inStock: false },
});

// Delete
await es.delete({ index: 'products', id: '1' });

Search Queries

// Simple match query
const results = await es.search({
  index: 'products',
  query: {
    match: {
      name: 'wireless headphones',   // Tokenized: ["wireless", "headphones"]
    },
  },
});

// Multi-field search with boosting
const results = await es.search({
  index: 'products',
  query: {
    multi_match: {
      query: 'wireless headphones',
      fields: ['name^3', 'description^1', 'tags^2'],  // name is 3x more important
      type: 'best_fields',
      fuzziness: 'AUTO',   // Handle typos: "headphoens" → "headphones"
    },
  },
});

Bool Query — Combine Conditions

const results = await es.search({
  index: 'products',
  query: {
    bool: {
      must: [                          // All must match (AND)
        { match: { name: 'headphones' } },
      ],
      filter: [                        // Must match, no scoring impact
        { term: { inStock: true } },
        { term: { category: 'Electronics' } },
        {
          range: {
            price: { gte: 100, lte: 500 },
          },
        },
      ],
      should: [                        // Nice to have (boost score)
        { term: { brand: 'Sony' } },
        { range: { rating: { gte: 4.5 } } },
      ],
      must_not: [                      // Must NOT match
        { term: { brand: 'Unknown' } },
      ],
    },
  },
});

Pagination and Sorting

const results = await es.search({
  index: 'products',
  query: { match: { name: 'headphones' } },
  from: 0,           // Offset (page - 1) * size
  size: 10,          // Page size
  sort: [
    { _score: 'desc' },         // Primary: relevance score
    { rating: 'desc' },         // Secondary: highest rated first
    { price: 'asc' },           // Tertiary: cheapest first
  ],
  _source: ['name', 'price', 'rating', 'category'],  // Only these fields
});

const hits = results.hits.hits;
const total = results.hits.total.value;

Aggregations

Aggregations compute summaries and analytics:

const results = await es.search({
  index: 'products',
  query: {
    bool: {
      filter: [{ term: { inStock: true } }],
    },
  },
  size: 0,                 // Only return aggregations, no documents
  aggs: {
    // Category breakdown
    by_category: {
      terms: { field: 'category', size: 10 },
      aggs: {
        avg_price: { avg: { field: 'price' } },
        avg_rating: { avg: { field: 'rating' } },
      },
    },

    // Price histogram
    price_ranges: {
      histogram: {
        field: 'price',
        interval: 100,     // Buckets: 0-100, 100-200, 200-300...
      },
    },

    // Statistics
    price_stats: {
      stats: { field: 'price' },  // min, max, avg, count, sum
    },

    // Top brands
    top_brands: {
      terms: { field: 'brand', size: 5 },
    },
  },
});

// Access results
const categories = results.aggregations.by_category.buckets;
// [{ key: 'Electronics', doc_count: 245, avg_price: { value: 187.5 } }, ...]

Autocomplete / Suggestions

// Using completion suggester (defined in mapping as 'completion' type)
const suggestions = await es.search({
  index: 'products',
  suggest: {
    product_suggest: {
      prefix: 'wirel',            // What the user has typed so far
      completion: {
        field: 'name.suggest',    // The completion field
        size: 5,
        skip_duplicates: true,
        fuzzy: { fuzziness: 1 }, // Allow 1 character difference
      },
    },
  },
});

const options = suggestions.suggest.product_suggest[0].options;
// [{ text: 'Wireless Headphones', _score: 1 }, ...]

Node.js Integration Pattern

// services/search.ts
import { es } from '../lib/elasticsearch';

interface SearchParams {
  query: string;
  category?: string;
  minPrice?: number;
  maxPrice?: number;
  inStock?: boolean;
  page?: number;
  pageSize?: number;
  sortBy?: 'relevance' | 'price_asc' | 'price_desc' | 'rating';
}

export async function searchProducts(params: SearchParams) {
  const {
    query,
    category,
    minPrice,
    maxPrice,
    inStock,
    page = 1,
    pageSize = 20,
    sortBy = 'relevance',
  } = params;

  const sort = {
    relevance: [{ _score: 'desc' }],
    price_asc: [{ price: 'asc' }],
    price_desc: [{ price: 'desc' }],
    rating: [{ rating: 'desc' }],
  }[sortBy];

  const filters: any[] = [];
  if (category) filters.push({ term: { category } });
  if (inStock !== undefined) filters.push({ term: { inStock } });
  if (minPrice !== undefined || maxPrice !== undefined) {
    filters.push({
      range: {
        price: {
          ...(minPrice !== undefined && { gte: minPrice }),
          ...(maxPrice !== undefined && { lte: maxPrice }),
        },
      },
    });
  }

  const results = await es.search({
    index: 'products',
    query: {
      bool: {
        must: query
          ? [{
              multi_match: {
                query,
                fields: ['name^3', 'description^1', 'tags^2'],
                fuzziness: 'AUTO',
              },
            }]
          : [{ match_all: {} }],
        filter: filters,
      },
    },
    sort,
    from: (page - 1) * pageSize,
    size: pageSize,
    aggs: {
      by_category: { terms: { field: 'category', size: 20 } },
      price_stats: { stats: { field: 'price' } },
    },
  });

  return {
    products: results.hits.hits.map(hit => ({ id: hit._id, ...hit._source })),
    total: results.hits.total.value,
    totalPages: Math.ceil(results.hits.total.value / pageSize),
    facets: {
      categories: results.aggregations.by_category.buckets,
      priceStats: results.aggregations.price_stats,
    },
  };
}

Syncing with a Primary Database

// Keep Elasticsearch in sync when the primary DB changes
import { prisma } from '../lib/prisma';
import { es } from '../lib/elasticsearch';

async function indexProduct(productId: number) {
  const product = await prisma.product.findUnique({
    where: { id: productId },
    include: { category: true, tags: true },
  });

  if (!product) return;

  await es.index({
    index: 'products',
    id: product.id.toString(),
    document: {
      name: product.name,
      description: product.description,
      price: product.price,
      category: product.category.name,
      brand: product.brand,
      rating: product.rating,
      inStock: product.stock > 0,
      tags: product.tags.map(t => t.name),
      createdAt: product.createdAt.toISOString(),
    },
  });
}

async function deleteProductIndex(productId: number) {
  await es.delete({ index: 'products', id: productId.toString() });
}

// Call after DB operations
await prisma.product.update({ where: { id }, data: updateData });
await indexProduct(id);   // Sync to Elasticsearch

Quick Reference

OperationCode
Create indexes.indices.create({ index, body: { settings, mappings } })
Index documentes.index({ index, id, document })
Bulk indexes.bulk({ operations })
Full-text search{ match: { field: 'query' } }
Exact match{ term: { field: 'value' } }
Range filter{ range: { price: { gte: 10, lte: 100 } } }
Multiple conditions{ bool: { must, filter, should, must_not } }
Aggregate{ aggs: { name: { terms: { field: 'category' } } } }

Related posts: