Lesson 15

Capstone & beyond

Integrate, document, and extend your engine.

4-5 hours advanced Module 5: Production & Operations

Course Navigation

Back to course
Progress 100%

πŸŽ‰ Capstone Project: Build Your Production Database

Congratulations! You’ve learned all the fundamentals. Now it’s time to build a complete, production-ready distributed key-value database with advanced features and real-world deployment.

Estimated Time: 20-30 hours | Difficulty: Advanced | Prerequisites: All previous modules

Lesson 15.1: Advanced Features Implementation

Feature 1: Time-To-Live (TTL)

TTL allows keys to automatically expire after a duration, essential for caching and session management.

// TTLManager handles key expiration
type TTLManager struct {
    store       Store
    expirations map[string]time.Time
    mu          sync.RWMutex
    ticker      *time.Ticker
    done        chan struct{}
}

// SetWithTTL stores value with expiration time
func (tm *TTLManager) SetWithTTL(key []byte, value []byte, ttl time.Duration) error {
    keyStr := string(key)
    
    // Store value
    if err := tm.store.Put(key, value); err != nil {
        return err
    }
    
    // Record expiration time
    tm.mu.Lock()
    tm.expirations[keyStr] = time.Now().Add(ttl)
    tm.mu.Unlock()
    
    return nil
}

// Get returns value if not expired
func (tm *TTLManager) Get(key []byte) ([]byte, error) {
    keyStr := string(key)
    
    tm.mu.RLock()
    expireTime, exists := tm.expirations[keyStr]
    tm.mu.RUnlock()
    
    // Check if expired
    if exists && time.Now().After(expireTime) {
        tm.store.Delete(key)
        tm.mu.Lock()
        delete(tm.expirations, keyStr)
        tm.mu.Unlock()
        return nil, fmt.Errorf("key expired")
    }
    
    return tm.store.Get(key)
}

// Start begins cleanup of expired keys
func (tm *TTLManager) Start() {
    tm.ticker = time.NewTicker(1 * time.Second)
    
    go func() {
        for {
            select {
            case <-tm.ticker.C:
                tm.cleanupExpired()
            case <-tm.done:
                return
            }
        }
    }()
}

// cleanupExpired removes all expired keys
func (tm *TTLManager) cleanupExpired() {
    tm.mu.Lock()
    defer tm.mu.Unlock()
    
    now := time.Now()
    for key, expireTime := range tm.expirations {
        if now.After(expireTime) {
            tm.store.Delete([]byte(key))
            delete(tm.expirations, key)
        }
    }
}

Feature 2: Pub/Sub Messaging

Pub/Sub allows publishers to send messages to multiple subscribers, enabling real-time communication patterns.

// PubSubManager handles publish/subscribe
type PubSubManager struct {
    subscribers map[string][]*Subscriber
    mu          sync.RWMutex
}

type Subscriber struct {
    ID      string
    Channel string
    Messages chan []byte
}

// Subscribe adds subscriber to channel
func (ps *PubSubManager) Subscribe(channel string) *Subscriber {
    subscriber := &Subscriber{
        ID:       generateID(),
        Channel:  channel,
        Messages: make(chan []byte, 100),
    }
    
    ps.mu.Lock()
    ps.subscribers[channel] = append(ps.subscribers[channel], subscriber)
    ps.mu.Unlock()
    
    return subscriber
}

// Publish sends message to all subscribers
func (ps *PubSubManager) Publish(channel string, message []byte) int {
    ps.mu.RLock()
    subscribers, exists := ps.subscribers[channel]
    ps.mu.RUnlock()
    
    if !exists {
        return 0
    }
    
    count := 0
    for _, sub := range subscribers {
        select {
        case sub.Messages <- message:
            count++
        default:
            // Subscriber queue full, skip
        }
    }
    
    return count
}

// Unsubscribe removes subscriber
func (ps *PubSubManager) Unsubscribe(subscriber *Subscriber) {
    ps.mu.Lock()
    defer ps.mu.Unlock()
    
    subscribers, exists := ps.subscribers[subscriber.Channel]
    if !exists {
        return
    }
    
    // Remove subscriber
    for i, sub := range subscribers {
        if sub.ID == subscriber.ID {
            ps.subscribers[subscriber.Channel] = append(
                subscribers[:i],
                subscribers[i+1:]...,
            )
            close(sub.Messages)
            break
        }
    }
}

Feature 3: Lua Scripting

Lua scripting allows atomic operations on multiple keys, enabling complex business logic at the database level.

import "github.com/yuin/gopher-lua"

// LuaEngine executes Lua scripts
type LuaEngine struct {
    store Store
    vm    *lua.LState
}

// NewLuaEngine creates Lua engine
func NewLuaEngine(store Store) *LuaEngine {
    L := lua.NewState()
    
    engine := &LuaEngine{
        store: store,
        vm:    L,
    }
    
    // Register database functions
    engine.registerFunctions()
    
    return engine
}

// registerFunctions registers Go functions for Lua
func (le *LuaEngine) registerFunctions() {
    le.vm.SetGlobal("get", le.vm.NewFunction(le.luaGet))
    le.vm.SetGlobal("set", le.vm.NewFunction(le.luaSet))
    le.vm.SetGlobal("del", le.vm.NewFunction(le.luaDel))
}

// luaGet implements get() in Lua
func (le *LuaEngine) luaGet(L *lua.LState) int {
    key := L.ToString(1)
    
    value, err := le.store.Get([]byte(key))
    if err != nil {
        L.Push(lua.LNil)
        return 1
    }
    
    L.Push(lua.LString(value))
    return 1
}

// Execute runs Lua script
func (le *LuaEngine) Execute(script string) (interface{}, error) {
    if err := le.vm.DoString(script); err != nil {
        return nil, err
    }
    
    result := le.vm.Get(-1)
    return result, nil
}

Feature 4: Geo-spatial Queries

Geo-spatial queries allow querying by geographic location, essential for location-based applications.

import "math"

// GeoPoint represents latitude/longitude
type GeoPoint struct {
    Latitude  float64
    Longitude float64
}

// GeoManager handles geographic data
type GeoManager struct {
    store Store
    index map[string]*GeoPoint
    mu    sync.RWMutex
}

// AddLocation stores location with key
func (gm *GeoManager) AddLocation(key string, point GeoPoint) error {
    gm.mu.Lock()
    gm.index[key] = &point
    gm.mu.Unlock()
    
    // Store in main store as well
    data, _ := json.Marshal(point)
    return gm.store.Put([]byte(key), data)
}

// Distance calculates distance in km using Haversine formula
func (gm *GeoManager) Distance(point1, point2 GeoPoint) float64 {
    const R = 6371 // Earth radius in km
    
    lat1 := point1.Latitude * math.Pi / 180
    lat2 := point2.Latitude * math.Pi / 180
    deltaLat := (point2.Latitude - point1.Latitude) * math.Pi / 180
    deltaLon := (point2.Longitude - point1.Longitude) * math.Pi / 180
    
    a := math.Sin(deltaLat/2)*math.Sin(deltaLat/2) +
        math.Cos(lat1)*math.Cos(lat2)*
            math.Sin(deltaLon/2)*math.Sin(deltaLon/2)
    
    c := 2 * math.Atan2(math.Sqrt(a), math.Sqrt(1-a))
    
    return R * c
}

// NearbyLocations finds locations within radius
func (gm *GeoManager) NearbyLocations(center GeoPoint, radiusKm float64) []string {
    gm.mu.RLock()
    defer gm.mu.RUnlock()
    
    var nearby []string
    
    for key, point := range gm.index {
        distance := gm.Distance(center, *point)
        if distance <= radiusKm {
            nearby = append(nearby, key)
        }
    }
    
    return nearby
}

Lesson 15.2: Documentation

API Documentation

# KVDB API Reference

## Commands

### GET
Retrieve value for key
```
GET key
```
Response: bulk string or null

### SET
Store value for key
```
SET key value [EX seconds] [PX milliseconds]
```
Response: OK

### DEL
Delete key
```
DEL key [key ...]
```
Response: integer (number deleted)

### TTL
Get remaining TTL in seconds
```
TTL key
```
Response: integer (-2 if not exists, -1 if no expiry)

### EXPIRE
Set expiration in seconds
```
EXPIRE key seconds
```
Response: 1 if set, 0 if not exists

### PUBLISH
Publish message to channel
```
PUBLISH channel message
```
Response: integer (number of subscribers)

### SUBSCRIBE
Subscribe to channel
```
SUBSCRIBE channel [channel ...]
```
Response: array of messages

### EVAL
Execute Lua script
```
EVAL script numkeys [key ...] [arg ...]
```
Response: script result

Operational Runbook

# KVDB Operational Runbook

## Starting Server
```bash
./kvdb -config config.yaml
```

## Health Checks
- Liveness: GET /healthz
- Readiness: GET /readyz

## Backup Procedures
```bash
# Create backup
curl -X POST http://localhost:8080/backup

# List backups
curl http://localhost:8080/backups

# Restore backup
curl -X POST http://localhost:8080/restore/backup-name
```

## Scaling Cluster
```bash
# Add new node
kubectl scale deployment kvdb --replicas=5

# Remove node
kubectl scale deployment kvdb --replicas=3
```

## Troubleshooting
- High latency: Check p99 latency in metrics
- Memory growth: Check for memory leaks with pprof
- Replication lag: Monitor replication metrics

Lesson 15.3: Production Deployment

Pre-Deployment Checklist

βœ… Pre-Deployment Checklist

  • β–‘ All tests pass (>80% coverage)

  • β–‘ Performance benchmarks meet targets

  • β–‘ Security audit completed

  • β–‘ Load testing completed

  • β–‘ Backup/restore tested

  • β–‘ Documentation complete

  • β–‘ Monitoring configured

  • β–‘ Health checks working

  • β–‘ Rate limiting configured

  • β–‘ TLS certificates ready

  • β–‘ Database backups available

  • β–‘ Rollback plan documented

Deployment Script

#!/bin/bash
# deploy.sh - Production deployment script

set -e

VERSION=$1
ENVIRONMENT=$2

if [ -z "$VERSION" ] || [ -z "$ENVIRONMENT" ]; then
    echo "Usage: ./deploy.sh  "
    exit 1
fi

echo "Starting deployment of version $VERSION to $ENVIRONMENT"

# 1. Build Docker image
echo "Building Docker image..."
docker build -t kvdb:$VERSION .

# 2. Push to registry
echo "Pushing to registry..."
docker tag kvdb:$VERSION registry.example.com/kvdb:$VERSION
docker push registry.example.com/kvdb:$VERSION

# 3. Update Kubernetes deployment
echo "Updating Kubernetes deployment..."
kubectl set image deployment/kvdb kvdb=registry.example.com/kvdb:$VERSION

# 4. Wait for rollout
echo "Waiting for rollout..."
kubectl rollout status deployment/kvdb --timeout=5m

# 5. Verify health
echo "Verifying health..."
for i in {1..10}; do
    if kubectl exec -it deployment/kvdb -- curl http://localhost:8080/readyz; then
        echo "Health check passed"
        exit 0
    fi
    sleep 10
done

echo "Health check failed"
exit 1

Architecture Overview

KVDB - Distributed Key-Value Database

β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚         Client Application              β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚      Client Library (Pooling, Retry)    β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚    RESP Protocol (Redis Compatible)     β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚    TCP Server (10K+ connections)        β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚  Transaction Engine (MVCC + Conflicts)  β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚ Replication Layer (Failover, Consensus)β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚    Storage Engine (LSM Tree)            β”‚
β”‚  β”œβ”€ Memtables (Sorted)                  β”‚
β”‚  β”œβ”€ SSTables (Persisted)                β”‚
β”‚  β”œβ”€ Compaction (Leveled)                β”‚
β”‚  β”œβ”€ Bloom Filters                       β”‚
β”‚  └─ Block Cache                         β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚    WAL + Crash Recovery                 β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
               β”‚
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
β”‚   Sharded In-Memory Store               β”‚
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜

Key Achievements

Performance

  • 1M+ ops/sec single-node

  • 10K+ ops/sec distributed

  • <100ms p99 latency

  • Sub-millisecond MVCC lookup

Reliability

  • Zero data loss (WAL + replication)

  • <500ms automatic failover

  • ACID transactions

  • Deadlock detection

Operability

  • Prometheus metrics

  • Structured logging

  • Health checks

  • Kubernetes ready

  • TLS encryption

Scalability

  • Multi-node clusters

  • Automatic replication

  • Leader-follower topology

  • Consistent hashing

Capstone Project Assignment

Project Requirements

Build a complete, production-ready key-value database with all the features we’ve learned:

Core Engine (Modules 1-3)

  • βœ… Sharded in-memory storage

  • βœ… LSM tree persistence

  • βœ… MVCC transactions

  • βœ… Crash recovery

Networking (Module 4)

  • βœ… TCP server (10K+ connections)

  • βœ… RESP protocol

  • βœ… Client library with pooling

Distribution (Module 5)

  • βœ… Leader-follower replication

  • βœ… Automatic failover

  • βœ… Consensus-based

Production (Modules 6-7)

  • βœ… Prometheus metrics

  • βœ… Structured logging

  • βœ… Docker & Kubernetes

  • βœ… TLS security

Advanced Features (Module 8)

Implement at least 2 of the following advanced features:

  • Time-To-Live (TTL): Automatic key expiration

  • Pub/Sub Messaging: Publish-subscribe pattern

  • Lua Scripting: Atomic multi-key operations

  • Geo-spatial Queries: Location-based queries

Deliverables

1. Source Code

  • Organized project structure

  • >80% test coverage

  • Clean code (gofmt compliant)

  • Comprehensive error handling

2. Documentation

  • API reference

  • Operational runbook

  • Architecture decision records

  • Performance tuning guide

3. Deployment

  • Working Docker image

  • Kubernetes manifests

  • Backup/restore procedures

  • Health checks

4. Testing

  • Unit tests (>80% coverage)

  • Integration tests

  • Load tests

  • Chaos engineering tests

Grading Rubric

Criteria Excellent Good Fair

Functionality All features working Most features work Basic features only

Performance 1M+ ops/sec 100K+ ops/sec 10K+ ops/sec

Reliability <1% error, zero data loss <5% error >10% error

Code Quality Clean, tested Tested Minimal tests

πŸŽ‰ Course Complete!

Congratulations! You’ve successfully completed the comprehensive β€œBuild a Key-Value Database in Go” course.

What You’ve Built

A production-ready, distributed key-value database in Go featuring:

  • Multi-node replication with automatic failover

  • ACID transactions with MVCC

  • LSM tree persistence with Bloom filters

  • Redis-compatible protocol

  • Kubernetes deployment ready

  • Comprehensive monitoring

  • Advanced features (TTL, Pub/Sub, Lua, Geo)

Course Statistics

15

Weeks of Material

4000+

Lines of Content

80-100

Hours of Learning

Next Steps

  1. 1. Deploy to production - Use the Kubernetes manifests you’ve created

  2. 2. Add more features - Implement additional advanced features

  3. 3. Optimize performance - Profile and tune for specific workloads

  4. 4. Contribute to open source - Consider releasing as open source

  5. 5. Build on it - Use as foundation for new projects

Resources

πŸš€ Congratulations!

You’ve successfully built a complete, production-grade distributed database system.

This is a significant achievement. Well done!