MongoDB is a popular NoSQL database that stores data in flexible, JSON-like documents. It's designed for scalability and developer agility.
Key Features of MongoDB
Document-Oriented: Stores data in BSON (Binary JSON) format
Schema-less: Documents in a collection can have different fields
Scalable: Horizontal scaling with sharding
High Performance: Indexing and rich queries
Aggregation: Powerful data processing pipeline
MongoDB vs SQL Terminology
SQL
MongoDB
Database
Database
Table
Collection
Row
Document
Column
Field
Primary Key
_id (default)
2. Setting Up MongoDB with Python
To work with MongoDB in Python, we use the pymongo driver, the recommended way to work with MongoDB from Python.
Installation
# Install pymongo using pip
pip install pymongo
# For additional features like GridFS and BSON tools
pip install pymongo[srv,ocsp,tls]
Connecting to MongoDB
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure
# Basic connection (default: localhost:27017)
client = MongoClient('mongodb://localhost:27017/')
# Connection with authentication
# client = MongoClient('mongodb://username:password@localhost:27017/')
# Connect to a specific database
db = client['mydatabase']
# Test the connection
try:
# The ismaster command is cheap and does not require auth
client.admin.command('ismaster')
print("MongoDB connection successful!")
except ConnectionFailure as e:
print(f"MongoDB connection failed: {e}")
finally:
# Close the connection when done
# client.close()
pass
Security Note: Never hardcode credentials in your code. Use environment variables or a configuration file to store sensitive information.
3. CRUD Operations in MongoDB
MongoDB provides intuitive methods for Create, Read, Update, and Delete (CRUD) operations.
Create
Read
Update
Delete
Create Operations
Insert documents into a collection.
from datetime import datetime
# Get the collection (creates it if it doesn't exist)
collection = db['users']
# Insert a single document
user1 = {
"username": "johndoe",
"email": "john@example.com",
"age": 30,
"interests": ["programming", "reading", "hiking"],
"created_at": datetime.utcnow()
}
# Insert one document
result = collection.insert_one(user1)
print(f"Inserted document with id: {result.inserted_id}")
# Insert multiple documents
user2 = {
"username": "janedoe",
"email": "jane@example.com",
"age": 28,
"interests": ["photography", "travel"],
"created_at": datetime.utcnow()
}
user3 = {
"username": "bobsmith",
"email": "bob@example.com",
"age": 35,
"interests": ["cooking", "gaming"],
"is_active": True,
"created_at": datetime.utcnow()
}
result = collection.insert_many([user2, user3])
print(f"Inserted {len(result.inserted_ids)} documents")
Read Operations
Query documents from a collection.
# Find a single document
user = collection.find_one({"username": "johndoe"})
print("Single user:", user)
# Find all documents
print("\nAll users:")
for user in collection.find():
print(user)
# Find with query operators
print("\nUsers older than 30:")
for user in collection.find({"age": {"$gt": 30}}):
print(user["username"], user["age"])
# Projection (select specific fields)
print("\nUsernames and emails:")
for user in collection.find(
{},
{"username": 1, "email": 1, "_id": 0} # 1 for include, 0 for exclude
):
print(user)
# Sorting and limiting
print("\nYoungest 2 users:")
for user in collection.find().sort("age", 1).limit(2):
print(user["username"], user["age"])
Update Operations
Modify existing documents.
from bson.objectid import ObjectId
# Update a single document
result = collection.update_one(
{"username": "johndoe"},
{"$set": {"age": 31, "last_updated": datetime.utcnow()}}
)
print(f"Modified {result.modified_count} document")
# Update multiple documents
result = collection.update_many(
{"age": {"$exists": True}},
{"$inc": {"age": 1}} # Increment age by 1
)
print(f"Updated {result.modified_count} documents")
# Upsert (update if exists, insert if not)
result = collection.update_one(
{"username": "alice"},
{
"$setOnInsert": {
"username": "alice",
"email": "alice@example.com",
"created_at": datetime.utcnow()
},
"$set": {
"last_login": datetime.utcnow()
}
},
upsert=True
)
if result.upserted_id:
print(f"Inserted new user with id: {result.upserted_id}")
Delete Operations
Remove documents from a collection.
# Delete a single document
result = collection.delete_one({"username": "bobsmith"})
print(f"Deleted {result.deleted_count} document")
# Delete multiple documents
result = collection.delete_many({"age": {"$lt": 30}})
print(f"Deleted {result.deleted_count} documents")
# Delete all documents in a collection (be careful!)
# result = collection.delete_many({})
# print(f"Deleted {result.deleted_count} documents")
# Drop the entire collection (removes the collection and its indexes)
# collection.drop()
Warning: Be extremely careful with delete operations, especially delete_many({}) and drop() as they can result in data loss.
4. MongoDB Transactions
MongoDB supports multi-document transactions, allowing you to execute multiple operations atomically.
from pymongo import MongoClient
from pymongo.errors import ConnectionFailure, OperationFailure
# For a replica set
client = MongoClient('mongodb://localhost:27017/?replicaSet=rs0')
# Get the database and collections
db = client['bank']
accounts = db.accounts
transactions = db.transactions
# Sample data
accounts.delete_many({}) # Clear existing data
accounts.insert_many([
{"account_id": "A1", "balance": 1000},
{"account_id": "A2", "balance": 500}
])
# Transfer money between accounts
def transfer_funds(from_account, to_account, amount):
# Start a client session
with client.start_session() as session:
# Start a transaction
session.start_transaction()
try:
# Withdraw from source account
result = accounts.update_one(
{"account_id": from_account, "balance": {"$gte": amount}},
{"$inc": {"balance": -amount}},
session=session
)
if result.modified_count == 0:
print("Insufficient funds or account not found")
session.abort_transaction()
return False
# Deposit to target account
result = accounts.update_one(
{"account_id": to_account},
{"$inc": {"balance": amount}},
session=session
)
if result.modified_count == 0:
print("Target account not found")
session.abort_transaction()
return False
# Record the transaction
transactions.insert_one({
"from": from_account,
"to": to_account,
"amount": amount,
"timestamp": datetime.utcnow()
}, session=session)
# Commit the transaction
session.commit_transaction()
print("Transaction completed successfully")
return True
except Exception as e:
# Abort the transaction on error
print(f"Transaction failed: {e}")
session.abort_transaction()
return False
# Test the transfer
print("Balances before transfer:")
for acc in accounts.find():
print(f"Account {acc['account_id']}: ${acc['balance']}")
# Transfer $200 from A1 to A2
transfer_funds("A1", "A2", 200)
print("\nBalances after transfer:")
for acc in accounts.find():
print(f"Account {acc['account_id']}: ${acc['balance']}")
Important Notes on Transactions:
Transactions require a replica set or sharded cluster
Operations within a transaction must be on the same session
Transactions have a default timeout (60 seconds by default)
Not all operations are allowed in transactions (e.g., creating collections)
Always handle errors and explicitly commit or abort transactions
5. Error Handling and Best Practices
Common MongoDB Exceptions
from pymongo import MongoClient
from pymongo.errors import (
ConnectionFailure,
OperationFailure,
DuplicateKeyError,
BulkWriteError
)
try:
# Attempt to connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
# Force a connection to check if server is available
client.admin.command('ismaster')
db = client['mydb']
collection = db['users']
# Create a unique index
collection.create_index([("email", 1)], unique=True)
# Try to insert a duplicate document
collection.insert_one({"email": "test@example.com"})
collection.insert_one({"email": "test@example.com"}) # This will raise DuplicateKeyError
except ConnectionFailure as e:
print(f"Could not connect to MongoDB: {e}")
except DuplicateKeyError as e:
print(f"Duplicate key error: {e.details}")
except OperationFailure as e:
print(f"Operation failed: {e.details}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
finally:
# Always close the connection
if 'client' in locals():
client.close()
Best Practices
1. Connection Handling
Use connection pooling (enabled by default in PyMongo)
Reuse the client instance across your application
Close connections when your application shuts down
2. Performance
Create appropriate indexes for your queries
Use projection to retrieve only necessary fields
Limit the number of documents returned with limit()
Use batch_size() for large result sets
3. Security
Enable authentication and authorization
Use TLS/SSL for network encryption
Validate and sanitize all inputs
Follow the principle of least privilege
4. Schema Design
Design your schema based on your application's query patterns
Consider embedding documents for one-to-few relationships
Use references for many-to-many relationships or large hierarchies