Background Jobs: Processing Tasks Asynchronously
Background jobs are tasks moved out of the main application request cycle to run asynchronously in a separate process, enabling your application to respond quickly while heavy or slow work happens in the background.
What You’ll Learn
By the end of this tutorial, you’ll understand how background job processing works, the differences between job queue systems (Bull, Sidekiq, RQ), and how to implement retries, prioritization, and failure handling.
Why Background Jobs Matter
A web request that sends 100 emails, generates a PDF, or processes a video keeps the user waiting. Background jobs free the request cycle. Doda Browser uses background jobs to scan uploaded files for malware signatures — the upload returns instantly, and the scan completes moments later.
Synchronous vs Async Processing
flowchart LR
subgraph "Synchronous (Blocking)"
U[User Request] --> P[Process]
P -- "Wait 10s" --> R[Response]
end
subgraph "Async (Background Job)"
U2[User Request] --> Q[Job Queue]
Q --> R2["Response (Instant)"]
Q --> W[Worker]
W --> W2[Background Processing]
end
style Q fill:#f90,color:#fff
Python: RQ (Redis Queue)
RQ is a simple Python library for background jobs using Redis.
# worker.py
# Simple RQ worker setup
import redis
from rq import Queue, Worker, Connection
# Connect to Redis
redis_conn = redis.Redis(host='localhost', port=6379, db=0)
queue = Queue(connection=redis_conn)
# Start worker: rq worker# jobs.py
# Background job definitions for RQ
import time
from rq import get_current_job
def send_welcome_email(user_email, username):
"""Send a welcome email (runs in background)."""
job = get_current_job()
print(f"[Job {job.id}] Sending welcome email to {user_email}...")
# Simulate processing
for i in range(3):
time.sleep(1)
job.meta['progress'] = int((i + 1) / 3 * 100)
job.save_meta()
print(f"Progress: {job.meta['progress']}%")
print(f"Welcome email sent to {username}")
return {"email": user_email, "status": "sent"}
def process_image(image_path, quality=80):
"""Generate compressed thumbnail (runs in background)."""
print(f"Processing image: {image_path} (quality={quality})")
# Simulate image processing
import random
processing_time = random.uniform(2, 5)
time.sleep(processing_time)
result = {
"original": image_path,
"thumbnail": image_path.replace('.jpg', '_thumb.jpg'),
"processing_time": round(processing_time, 2),
"size_kb": random.randint(50, 500),
}
print(f"Image processed: {result}")
return result# enqueue.py
# Enqueue background jobs
from redis import Redis
from rq import Queue
from jobs import send_welcome_email, process_image
redis_conn = Redis(host='localhost', port=6379, db=0)
queue = Queue(connection=redis_conn)
# Enqueue jobs
email_job = queue.enqueue(
send_welcome_email,
args=('alice@example.com', 'Alice'),
description='Send welcome email to Alice'
)
print(f"Email job enqueued: {email_job.id}")
image_job = queue.enqueue(
process_image,
args=('/uploads/photo.jpg',),
kwargs={'quality': 70},
description='Generate thumbnail',
job_timeout=60
)
print(f"Image job enqueued: {image_job.id}")
# Schedule a job for later
from datetime import datetime, timedelta
scheduled_job = queue.enqueue_at(
datetime.now() + timedelta(hours=2),
send_welcome_email,
args=('bob@example.com', 'Bob')
)
print(f"Scheduled job: {scheduled_job.id} at {scheduled_job.enqueued_at}")Expected output:
Email job enqueued: abc123
Image job enqueued: def456
Scheduled job: ghi789 at 2026-06-20 12:00:00Node.js: Bull Queue
// bull_queue.js
// Background job processing with Bull (Redis-backed)
const Queue = require('bull');
// Create a queue
const emailQueue = new Queue('email', {
redis: { host: 'localhost', port: 6379 },
defaultJobOptions: {
attempts: 3,
backoff: { type: 'exponential', delay: 2000 },
removeOnComplete: 100,
removeOnFail: 50,
}
});
// Define the processing function
emailQueue.process(async (job) => {
const { to, subject, body } = job.data;
console.log(`[Job ${job.id}] Sending email to ${to}: ${subject}`);
// Report progress
await job.progress(25);
// Simulate SMTP call
await new Promise(resolve => setTimeout(resolve, 1000));
await job.progress(75);
await new Promise(resolve => setTimeout(resolve, 500));
await job.progress(100);
console.log(`Email sent to ${to}`);
return { sent: true, to, timestamp: Date.now() };
});
// Error handling
emailQueue.on('failed', (job, err) => {
console.error(`[Job ${job.id}] Failed: ${err.message}`);
if (job.attemptsMade < job.opts.attempts) {
console.log(`Retry ${job.attemptsMade + 1}/${job.opts.attempts}`);
}
});
emailQueue.on('completed', (job, result) => {
console.log(`[Job ${job.id}] Completed: ${JSON.stringify(result)}`);
});
// Add jobs
async function addJobs() {
await emailQueue.add({
to: 'alice@example.com',
subject: 'Welcome!',
body: 'Thanks for signing up'
});
await emailQueue.add({
to: 'bob@example.com',
subject: 'Your report is ready',
body: 'Download your report here'
}, {
priority: 10, // Higher priority = processed first
delay: 3600000, // 1 hour delay
});
console.log('Jobs added to queue');
}
addJobs();Expected output:
[Job 1] Sending email to alice@example.com: Welcome!
Email sent to alice@example.com
[Job 1] Completed: {"sent":true,"to":"alice@example.com","timestamp":1723456789012}Ruby: Sidekiq
# sidekiq_worker.rb
# Background job processing with Sidekiq
require 'sidekiq'
class WelcomeEmailWorker
include Sidekiq::Worker
sidekiq_options retry: 3, queue: 'email', backtrace: true
def perform(user_id, user_email)
logger.info "Sending welcome email to #{user_email} (user ##{user_id})"
# Simulate sending
sleep 2
# Track progress via Redis
Sidekiq.redis do |conn|
conn.set("email_progress:#{jid}", 100)
end
logger.info "Welcome email sent to #{user_email}"
rescue StandardError => e
logger.error "Failed to send email: #{e.message}"
raise # Sidekiq will retry based on retry options
end
end
# Enqueue from anywhere
WelcomeEmailWorker.perform_async(42, 'alice@example.com')
# Schedule for later
WelcomeEmailWorker.perform_in(3600, 43, 'bob@example.com')Job Prioritization
# prioritization.py
# RQ job prioritization using multiple queues
from redis import Redis
from rq import Queue
redis_conn = Redis(host='localhost', port=6379, db=0)
# Create queues with different priority levels
critical_queue = Queue('critical', connection=redis_conn, default_timeout=300)
high_queue = Queue('high', connection=redis_conn, default_timeout=300)
default_queue = Queue('default', connection=redis_conn, default_timeout=300)
low_queue = Queue('low', connection=redis_conn, default_timeout=300)
def process_payment(order_id, amount):
"""Process a payment — high priority."""
print(f"[CRITICAL] Processing payment for order {order_id}")
return {"order": order_id, "status": "paid"}
def send_newsletter(campaign_id):
"""Send marketing newsletter — low priority."""
print(f"[LOW] Sending newsletter campaign {campaign_id}")
return {"campaign": campaign_id, "sent": True}
def generate_report(report_id):
"""Generate a daily report — default priority."""
print(f"[DEFAULT] Generating report {report_id}")
return {"report": report_id, "status": "generated"}
# Enqueue with priority levels
critical_queue.enqueue(process_payment, args=('ORD-123', 99.99))
low_queue.enqueue(send_newsletter, args=('CAMPAIGN-SPRING',))
default_queue.enqueue(generate_report, args=('DAILY-SALES',))
print("Jobs enqueued with priority levels")Error Handling and Retries
# retry_handling.py
# Robust job error handling with retries
from redis import Redis
from rq import Queue, Retry
import random
redis_conn = Redis(host='localhost', port=6379, db=0)
queue = Queue(connection=redis_conn)
def process_order(order_id, payment_method):
"""Process an order with retry on transient failures."""
try:
# Simulate flaky payment gateway
if random.random() < 0.2: # 20% failure
raise ConnectionError("Payment gateway timeout")
if not payment_method:
raise ValueError("Invalid payment method")
print(f"Order {order_id} processed via {payment_method}")
return {"order_id": order_id, "status": "completed"}
except ConnectionError as e:
print(f"Transient error: {e}. Will retry.")
raise # RQ catches and retries
except ValueError as e:
# Non-retriable — log and don't retry
print(f"Fatal error: {e}. No retry.")
return {"order_id": order_id, "status": "failed", "error": str(e)}
# Enqueue with retry configuration
queue.enqueue(
process_order,
args=('ORD-456', 'credit_card'),
retry=Retry(max=3, interval=[10, 30, 60]), # Retry at 10s, 30s, 60s
on_failure=lambda job: print(f"All retries exhausted for {job.id}")
)
# Failed job inspection
from rq import get_current_job
def inspect_failed_jobs():
"""Check failed jobs in the queue."""
registry = queue.failed_job_registry
failed_jobs = registry.get_job_ids()
print(f"Failed jobs: {len(failed_jobs)}")
for job_id in failed_jobs[:5]: # Show last 5
job = queue.fetch_job(job_id)
if job:
print(f" {job.id}: {job.failed().exc_string[:100]}...")Common Errors
1. Not Setting Job Timeouts
A job that hangs due to a network issue blocks a worker slot indefinitely. Always set job_timeout / timeout to match your expected maximum execution time.
2. Retrying Non-Retriable Errors
Retrying a job with invalid input data (e.g., malformed JSON, missing fields) wastes resources. Classify exceptions and only retry transient failures (network errors, timeouts, rate limits).
3. Running Too Few Workers
If the queue grows but all workers are busy, new jobs wait. Monitor queue depth and scale workers horizontally. Use autoscaling in production.
4. Sharing a Single Redis Instance for Everything
Using the same Redis for queues, caching, and sessions causes resource contention. Use separate Redis instances or databases (db=0 for cache, db=1 for queues).
5. Not Monitoring Queue Depth
Without monitoring, queues grow silently until they consume all Redis memory. Set up alerts on queue length (e.g., alert if queue > 1000 for > 5 minutes).
6. Ignoring Job Idempotency
If a job runs twice (due to retry), it should produce the same result. Sending “welcome email” twice is harmless if the provider deduplicates, but charging a credit card twice is catastrophic.
Practice Questions
1. What is the difference between a background job and a cron job?
Background jobs are tasks triggered by application events (user signs up, file uploaded). Cron jobs are scheduled tasks that run at specific times (daily report at 2 AM). Background jobs use a queue; cron jobs use a time-based scheduler.
2. How does job prioritization work?
Multiple queues with different priority levels. Workers pick from higher-priority queues first. Some systems (Bull) support priority within a single queue, but multi-queue is simpler and more reliable.
3. What happens when a job fails repeatedly?
After exhausting retries, the job moves to a failed/dead queue. It stays there for inspection. You can retry failed jobs manually, discard them, or send an alert to the engineering team.
4. How do you track the progress of a running background job?
Job systems like RQ and Bull let you update progress via job.meta['progress'] (RQ) or job.progress(n) (Bull). The frontend polls a status endpoint to display progress.
5. Challenge: Build an order processing pipeline with background jobs that: (1) validates inventory (2) charges payment (3) sends confirmation (4) updates shipping. Use separate queues for payment (high priority) and confirmation (low priority). Add retry with exponential backoff for payment failures.
Implement with three queues (critical=payment, default=inventory/shipping, low=confirmation). Chain jobs so the next job enqueues only if the previous succeeded. Use exponential backoff: 10s, 30s, 90s, 270s for payment retries.
Mini Project: Job Dashboard Monitor
# job_dashboard.py
# Real-time background job monitoring dashboard
from redis import Redis
from rq import Queue, Worker
from datetime import datetime
import time
redis_conn = Redis(host='localhost', port=6379, db=0)
def monitor_jobs():
"""Print a live dashboard of queue and worker status."""
queues = [
Queue('critical', connection=redis_conn),
Queue('high', connection=redis_conn),
Queue('default', connection=redis_conn),
Queue('low', connection=redis_conn),
]
workers = Worker.all(connection=redis_conn)
try:
while True:
print("\033[2J\033[H") # Clear screen
print(f"═══ Job Dashboard — {datetime.now().strftime('%H:%M:%S')} ═══")
print()
# Queue status
print("Queues:")
print(f"{'Name':<12} {'Jobs':<8} {'Failed':<8} {'Scheduled':<10}")
print("-" * 40)
for q in queues:
print(f"{q.name:<12} {q.count:<8} "
f"{q.failed_job_registry.count:<8} "
f"{q.scheduled_job_registry.count:<10}")
print()
print(f"Active Workers: {len(workers)}")
for w in workers:
current_job = w.get_current_job()
if current_job:
job_desc = current_job.description[:40]
else:
job_desc = "Idle"
print(f" {w.name:<20} {job_desc}")
time.sleep(5)
except KeyboardInterrupt:
print("\nMonitoring stopped.")
if __name__ == '__main__':
monitor_jobs()Expected output:
═══ Job Dashboard — 10:00:00 ═══
Queues:
Name Jobs Failed Scheduled
critical 0 0 0
high 3 1 0
default 12 2 5
low 45 0 0
Active Workers: 4
hostname-1 Processing: Send welcome email
hostname-2 Processing: Generate thumbnail
hostname-3 Idle
hostname-4 IdleFAQ
Related Concepts
What’s Next
You now understand background jobs! Next, learn about Celery for a production-grade Python task queue, then explore cron job patterns for scheduled task execution.
- Practice daily — Convert a slow endpoint in your app to use a background job
- Build a project — Build an order processing pipeline with three priority levels and retry logic
- Explore related topics — Check out job deduplication, job batching, and dead-letter queues
Built by the developers of Doda Browser, DodaZIP, and Durga Antivirus Pro. Updated 2026-06-20.
Built by the developers of DodaTech
Doda Browser, DodaZIP & Durga Antivirus Pro