#1 Data Analytics Program in India
₹2,499₹1,499Enroll Now
5 min read min read

Creating Summary Reports

Learn to create complete data summaries

Creating Summary Reports

Why Summary Reports?

After EDA, you need to share findings. A good report includes:

  • Data overview
  • Key statistics
  • Important patterns
  • Data quality issues

Basic Data Overview

code.py
import pandas as pd

df = pd.DataFrame({
    'Name': ['John', 'Sarah', 'Mike', 'Emma', None],
    'Age': [25, 30, None, 35, 28],
    'City': ['NYC', 'LA', 'NYC', 'Chicago', 'LA'],
    'Salary': [50000, 60000, 55000, 70000, 45000]
})

def data_overview(df):
    print("=" * 40)
    print("DATA OVERVIEW")
    print("=" * 40)
    print(f"Rows: {len(df)}")
    print(f"Columns: {len(df.columns)}")
    print(f"\nColumn names: {list(df.columns)}")
    print(f"\nData types:")
    print(df.dtypes)

data_overview(df)

Missing Data Report

code.py
def missing_report(df):
    print("=" * 40)
    print("MISSING DATA")
    print("=" * 40)
    missing = df.isna().sum()
    pct = (missing / len(df)) * 100

    report = pd.DataFrame({
        'Missing': missing,
        'Percent': pct
    })
    print(report[report['Missing'] > 0])
    print(f"\nTotal missing values: {missing.sum()}")

missing_report(df)

Numeric Summary

code.py
def numeric_summary(df):
    print("=" * 40)
    print("NUMERIC COLUMNS")
    print("=" * 40)
    numeric_cols = df.select_dtypes(include=['number']).columns

    for col in numeric_cols:
        print(f"\n{col}:")
        print(f"  Mean: {df[col].mean():.2f}")
        print(f"  Median: {df[col].median():.2f}")
        print(f"  Min: {df[col].min()}")
        print(f"  Max: {df[col].max()}")

numeric_summary(df)

Categorical Summary

code.py
def categorical_summary(df):
    print("=" * 40)
    print("CATEGORICAL COLUMNS")
    print("=" * 40)
    cat_cols = df.select_dtypes(include=['object']).columns

    for col in cat_cols:
        print(f"\n{col}:")
        print(f"  Unique values: {df[col].nunique()}")
        print(f"  Most common: {df[col].mode()[0]}")
        print(f"  Value counts:")
        print(df[col].value_counts().head(5).to_string())

categorical_summary(df)

Complete EDA Report Function

code.py
def full_eda_report(df):
    print("\n" + "=" * 50)
    print("EXPLORATORY DATA ANALYSIS REPORT")
    print("=" * 50)

    # 1. Overview
    print(f"\n1. DATA SHAPE")
    print(f"   Rows: {len(df)}")
    print(f"   Columns: {len(df.columns)}")

    # 2. Missing data
    print(f"\n2. MISSING DATA")
    missing = df.isna().sum()
    if missing.sum() > 0:
        for col in df.columns:
            if missing[col] > 0:
                pct = (missing[col] / len(df)) * 100
                print(f"   {col}: {missing[col]} ({pct:.1f}%)")
    else:
        print("   No missing values!")

    # 3. Numeric columns
    print(f"\n3. NUMERIC SUMMARY")
    print(df.describe().round(2))

    # 4. Categorical columns
    print(f"\n4. CATEGORICAL SUMMARY")
    for col in df.select_dtypes(include=['object']).columns:
        print(f"\n   {col}:")
        print(df[col].value_counts().to_string())

    # 5. Correlations
    print(f"\n5. CORRELATIONS")
    numeric_df = df.select_dtypes(include=['number'])
    if len(numeric_df.columns) > 1:
        print(numeric_df.corr().round(2))

    print("\n" + "=" * 50)
    print("END OF REPORT")
    print("=" * 50)

# Run the report
full_eda_report(df)

Save Report to File

code.py
import sys

# Save print output to file
with open('eda_report.txt', 'w') as f:
    # Redirect print to file
    old_stdout = sys.stdout
    sys.stdout = f
    full_eda_report(df)
    sys.stdout = old_stdout

print("Report saved to eda_report.txt")

Key Points

  • Start with data overview (shape, types)
  • Report missing values
  • Summarize numeric columns (mean, median)
  • Summarize categorical columns (counts)
  • Note any unusual findings
  • Save report for sharing

EDA Checklist

  1. Data size and shape
  2. Data types
  3. Missing values
  4. Duplicates
  5. Numeric statistics
  6. Categorical distributions
  7. Correlations
  8. Outliers
  9. Key findings

What's Next?

Congratulations! You've completed EDA. Next, learn Data Visualization to show your findings in charts.