from django.db import connection
from blog.models import Post
def get_top_authors(limit=10):
"""Get authors with most published posts using raw SQL."""
query = """
SELECT
u.id,
u.username,
COUNT(p.id) as post_count
FROM auth_user u
INNER JOIN blog_post p ON u.id = p.author_id
WHERE p.status = %s
GROUP BY u.id, u.username
ORDER BY post_count DESC
LIMIT %s
"""
with connection.cursor() as cursor:
cursor.execute(query, ['published', limit])
columns = [col[0] for col in cursor.description]
return [dict(zip(columns, row)) for row in cursor.fetchall()]
def get_monthly_stats():
"""Use raw() to get Post objects with aggregations."""
query = """
SELECT
id,
title,
author_id,
DATE_TRUNC('month', published_at) as month
FROM blog_post
WHERE status = %s
ORDER BY published_at DESC
"""
return Post.objects.raw(query, ['published'])
For queries too complex for the ORM, I use raw SQL. The raw() method returns model instances. I use cursor.execute() for non-model queries. I always use parameterized queries to prevent SQL injection—never string interpolation. For reporting, raw SQL is often clearer than complex ORM chains. I document why raw SQL is needed so future developers understand. For database-specific features like window functions, raw SQL is sometimes the only option. I test raw queries against multiple database backends if the app supports them.