Beyond the core, building a push notification system that scales from thousands to millions of users requires careful architectural decisions. This chapter covers the critical technical components that separate a prototype from a production-ready system.

Scaling with message queues

Direct API calls to push providers work fine for low volumes, but they create problems at scale. Synchronous delivery blocks your application, retries become complex, and traffic spikes can overwhelm providers.

A message queue decouples notification generation from delivery, providing resilience and scalability. Your application publishes notification jobs to the queue, and workers process them asynchronously at a sustainable rate.

// Publisher: Add notification job to queue
async function queueNotification(userId, notification) {
  const job = {
    id: generateId(),
    userId,
    notification,
    attempts: 0,
    createdAt: new Date(),
    scheduledFor: notification.sendAt || new Date(),
  };
 
  await queue.send("notifications", job, {
    delay: notification.sendAt ? notification.sendAt - Date.now() : 0,
    priority: notification.priority || "normal",
  });
 
  return job.id;
}
 
// Worker: Process notifications from queue
async function processNotificationJob(job) {
  const { userId, notification } = job;
 
  try {
    // Check if notification should still be sent
    const shouldSend = await shouldSendNotification(
      userId,
      notification.deviceId,
      notification.category,
      "push",
    );
 
    if (!shouldSend) {
      return { status: "skipped", reason: "preferences" };
    }
 
    // Send to appropriate provider
    const result = await sendToPushProvider(notification);
    await recordDelivery(userId, notification, result);
 
    return result;
  } catch (error) {
    // Implement exponential backoff for retries
    if (job.attempts < MAX_RETRIES) {
      const delay = Math.pow(2, job.attempts) * 1000;
      await queue.send(
        "notifications",
        { ...job, attempts: job.attempts + 1 },
        { delay },
      );
    }
    throw error;
  }
}

Choose your queue technology based on your needs: Amazon SQS for simplicity and AWS integration, RabbitMQ for complex routing rules, or Kafka for high-throughput event streaming. Implement multiple priority queues so transactional notifications don’t wait behind marketing campaigns, and monitor queue depth as a key health metric; growing queues indicate either traffic spikes or processing slowdowns.

Rate limiting strategies

Push providers enforce rate limits to prevent abuse and ensure service quality. FCM allows 600,000 requests per minute per project, while APNs doesn't publish limits but will throttle aggressive senders. Exceeding these limits results in dropped notifications or temporary bans, so implementing proper rate limiting is crucial.

class RateLimiter {
  constructor(maxPerSecond, maxBurst) {
    this.maxPerSecond = maxPerSecond;
    this.maxBurst = maxBurst;
    this.tokens = maxBurst;
    this.lastRefill = Date.now();
  }
 
  async acquire(count = 1) {
    // Refill tokens based on time elapsed
    const now = Date.now();
    const elapsed = (now - this.lastRefill) / 1000;
    this.tokens = Math.min(
      this.maxBurst,
      this.tokens + elapsed * this.maxPerSecond,
    );
    this.lastRefill = now;
 
    if (this.tokens >= count) {
      this.tokens -= count;
      return true;
    }
 
    // Calculate wait time if tokens not available
    const waitTime = ((count - this.tokens) / this.maxPerSecond) * 1000;
    await sleep(waitTime);
    return this.acquire(count);
  }
}
 
// Provider-specific rate limiters
const rateLimiters = {
  fcm: new RateLimiter(10000, 1000), // 10k/sec, burst of 1000
  apns: new RateLimiter(5000, 500), // Conservative for APNs
};
 
async function sendWithRateLimit(provider, notifications) {
  const limiter = rateLimiters[provider];
  const batches = chunk(notifications, 100); // Send in batches
 
  for (const batch of batches) {
    await limiter.acquire(batch.length);
    await Promise.all(batch.map((n) => sendNotification(provider, n)));
  }
}

Implement rate limiting at multiple levels: per-provider, per-app (for multi-tenant systems), and per-user (to prevent notification bombing). Use token bucket algorithms for smooth traffic shaping rather than hard cutoffs. When limits are reached, queue notifications for later delivery rather than dropping them.

Security and token management

Push notification tokens are sensitive. They grant the ability to send notifications to specific devices. Treat them like authentication credentials: encrypt tokens at rest, use secure transport (HTTPS only), and implement access controls. Token rotation is particularly important as tokens can expire or be revoked without warning.

const CryptoJS = require("crypto-js");
 
// Secure token storage with encryption
class TokenManager {
  constructor(encryptionKey) {
    this.encryptionKey = encryptionKey; // Can be a passphrase or key string
  }
 
  async storeToken(userId, deviceId, token, platform) {
    const encrypted = CryptoJS.AES.encrypt(
      token,
      this.encryptionKey,
    ).toString();
 
    await db.collection("deviceTokens").updateOne(
      { userId, deviceId },
      {
        $set: {
          encryptedToken: encrypted,
          platform,
          lastValidated: new Date(),
          failureCount: 0,
        },
      },
      { upsert: true },
    );
  }
 
  async validateTokens() {
    const tokens = await db
      .collection("deviceTokens")
      .find({
        lastValidated: { $lt: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000) },
      })
      .limit(1000)
      .toArray();
 
    for (const tokenBatch of chunk(tokens, 100)) {
      const results = await provider.validateTokens(tokenBatch);
 
      const invalidTokens = results.filter((r) => !r.valid);
      if (invalidTokens.length > 0) {
        await db
          .collection("deviceTokens")
          .updateMany(
            { deviceId: { $in: invalidTokens.map((t) => t.deviceId) } },
            { $set: { invalid: true, invalidatedAt: new Date() } },
          );
      }
    }
  }
}

Implement automatic token refresh when you receive specific error responses from providers. APNs returns 410 Unregistered for invalid tokens, while FCM provides messaging/registration-token-not-registered. Track token failure rates per device, as multiple failures likely indicate the app was uninstalled. Clean up invalid tokens regularly to maintain database performance and accurate analytics.

Monitoring and analytics

You can't improve what you don't measure. A comprehensive monitoring strategy tracks both technical metrics (delivery rates, latencies) and business metrics (open rates, conversion). Build your analytics to answer critical questions like:

  • Are notifications being delivered?
  • Are users engaging with them?
  • Which notifications drive the most value?
class NotificationAnalytics {
  async recordSend(notification) {
    const event = {
      notificationId: notification.id,
      userId: notification.userId,
      category: notification.category,
      provider: notification.provider,
      timestamp: new Date(),
      metadata: notification.metadata,
    };
 
    // Real-time metrics
    await redis.hincrby(
      `metrics:${getHour()}`,
      `sent:${notification.category}`,
      1,
    );
 
    // Detailed event log for analysis
    await db.collection("notificationEvents").insertOne({
      ...event,
      type: "sent",
    });
  }
 
  async recordDelivery(notificationId, providerResponse) {
    const deliveryTime = Date.now() - notification.sentAt;
 
    await db.collection("notificationEvents").insertOne({
      notificationId,
      type: "delivered",
      deliveryTime,
      providerMessageId: providerResponse.messageId,
      timestamp: new Date(),
    });
 
    // Track delivery rates
    await redis.hincrby(`metrics:${getHour()}`, "delivered", 1);
    await redis.lpush(`metrics:delivery_times`, deliveryTime);
  }
 
  async recordInteraction(notificationId, action) {
    // Track open rates and user actions
    await db.collection("notificationEvents").insertOne({
      notificationId,
      type: "interacted",
      action, // 'opened', 'dismissed', 'action_button_clicked'
      timestamp: new Date(),
    });
  }
}
 
// Dashboard queries
async function getDeliveryMetrics(timeRange) {
  const pipeline = [
    {
      $match: {
        timestamp: { $gte: timeRange.start, $lte: timeRange.end },
        type: { $in: ["sent", "delivered", "failed"] },
      },
    },
    {
      $group: {
        _id: { category: "$category", type: "$type" },
        count: { $sum: 1 },
      },
    },
    {
      $group: {
        _id: "$_id.category",
        metrics: {
          $push: { type: "$_id.type", count: "$count" },
        },
      },
    },
  ];
 
  return await db
    .collection("notificationEvents")
    .aggregate(pipeline)
    .toArray();
}

Also make sure to set up alerts for anomalies, such as sudden drops in delivery rates, spikes in token failures, or increasing queue depths. Use distributed tracing to track notifications through your entire system, from API call to delivery confirmation. Export metrics to time-series databases like Prometheus or CloudWatch for long-term trend analysis.

Performance optimization

At scale, small inefficiencies multiply. Batch operations wherever possible: group notifications to the same provider, bulk validate tokens, and aggregate analytics events.

Connection pooling is critical here. Reuse HTTP/2 connections to APNs and FCM rather than creating new ones for each request.

// Connection pooling for APNs
class APNSConnectionPool {
  constructor(config, poolSize = 10) {
    this.connections = [];
    this.currentIndex = 0;
 
    for (let i = 0; i < poolSize; i++) {
      this.connections.push(this.createConnection(config));
    }
  }
 
  createConnection(config) {
    return http2.connect("https://api.push.apple.com", {
      cert: config.cert,
      key: config.key,
      maxConcurrentStreams: 1000,
    });
  }
 
  getConnection() {
    // Round-robin connection selection
    const connection = this.connections[this.currentIndex];
    this.currentIndex = (this.currentIndex + 1) % this.connections.length;
    return connection;
  }
 
  async send(notification) {
    const connection = this.getConnection();
    const stream = connection.request({
      ":method": "POST",
      ":path": `/3/device/${notification.deviceToken}`,
      "content-type": "application/json",
    });
 
    stream.write(JSON.stringify(notification.payload));
    stream.end();
 
    return new Promise((resolve, reject) => {
      stream.on("response", (headers) => {
        if (headers[":status"] === 200) {
          resolve({ success: true });
        } else {
          reject(new Error(`APNS error: ${headers[":status"]}`));
        }
      });
    });
  }
}

Cache aggressively but carefully: user preferences (invalidate on change), device tokens (update on failure), and template compilations (versioned for updates). Use read replicas for analytics queries to avoid impacting production performance.

The key to scaling push notifications is building systems that gracefully handle failure, respect platform limits, and provide visibility into operations. With proper architecture, you can deliver billions of notifications reliably while maintaining sub-second latencies.