Camera Calibration and 3D Reconstruction

Last updated on Sep 11, 2025
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib_inline
import scipy.optimize

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
#plt.style.use(r'C:\LibsAndApps\Python config files\proplot_style.mplstyle')

def R_axis_angle(n: np.ndarray, theta: float) -> np.ndarray:
    # function returning 3x3 rotation matrix given an axis and angle of rotation
    return np.eye(3) * np.cos(theta) + \
        (1 - np.cos(theta)) * np.outer(n, n) + \
        np.sin(theta) * np.array([[0, -n[2], n[1]], [n[2], 0, -n[0]], [-n[1], n[0], 0]])

def affine_matrix(R: np.ndarray, T: np.ndarray) -> np.ndarray:
    # function returning 4x4 affine matrix given a rotation matrix and a translation vector
    return np.hstack((np.vstack((R, np.array([0, 0, 0]))),
        np.vstack((T.reshape(3, 1), np.array([1])))))

# set camera intrinsic parameters - same in both cameras
k_u = 15  # pixel size, horizontal
k_v = 13  # pixel size, vertical
f = 1  # focal length
u_0 = 1  # optical center, horizontal
v_0 = 0.5  # optical center, vertical

# set camera extrinsic parameters
R_axis_1 = np.array([0.707, 0, 0.707])  # axis of rotation
R_axis_2 = np.array([0.9, 0.2, 0.8])
R_1 = R_axis_angle(R_axis_1 / np.linalg.norm(R_axis_1), np.pi / 2)  # rotation matrix, 3D
R_2 = R_axis_angle(R_axis_2 / np.linalg.norm(R_axis_2), np.pi / 3)
T_1 = np.array([0, 1, 1])  # translation vector, 3D
T_2 = np.array([-1, 0, 1])

P_r_1 = affine_matrix(R_1, T_1)  # rotation matrix: world to camera
P_r_2 = affine_matrix(R_2, T_2)
P_p = np.array([[f, 0, 0, 0], [0, f, 0, 0], [0, 0, 1, 0]])  # projection matrix: camera to image
P_c = np.array([[k_u, 0, u_0], [0, k_v, v_0], [0, 0, 1]])  # camera matrix: image to pixel
P_pr_1 = P_c @ P_p @ P_r_1  # projection matrix: world to pixel
P_pr_2 = P_c @ P_p @ P_r_2

print(P_pr_1)
print(P_pr_2)

# generate some random 3D points
N_pts = 30
pts_world_true_3d = np.random.rand(3, N_pts)
pts_world_true_4d = np.vstack((pts_world_true_3d, np.ones(N_pts)))  # homogeneous coordinates

# find true camera coords
pts_cam_1_true_3d = P_pr_1 @ pts_world_true_4d
pts_cam_2_true_3d = P_pr_2 @ pts_world_true_4d
pts_cam_1_true_2d = pts_cam_1_true_3d[:2, :] / pts_cam_1_true_3d[2, :]
pts_cam_2_true_2d = pts_cam_2_true_3d[:2, :] / pts_cam_2_true_3d[2, :]

# add some noise to the true points to simulate measured points
noise = 0.02
pts_cam_1_2d = pts_cam_1_true_2d + np.random.randn(2, N_pts) * noise
pts_cam_2_2d = pts_cam_2_true_2d + np.random.randn(2, N_pts) * noise

print(f'World points: {pts_world_true_3d[:, :5]}')
print(f'Camera 1 true points: {pts_cam_1_true_2d[:, :5]}')
print(f'Camera 2 true points: {pts_cam_2_true_2d[:, :5]}')
print(f'Camera 1 measured points: {pts_cam_1_2d[:, :5]}')
print(f'Camera 2 measured points: {pts_cam_2_2d[:, :5]}')

# use RANSAC and least squares to calibrate each camera

def calibrate_camera(pts_cam_2d, pts_world_true_4d, max_iter=1000, abs_tol=1e-2):

    # 1. RANSAC (random sample consensus) to estimate the projection matrix
    inlier_count_max = 0
    best_P_est = None

    for r_i in range(max_iter):
        selected_indices = np.random.choice(N_pts, 6, replace=False)
        A = np.zeros((12, 12))
        for i, j in zip(selected_indices, range(6)):
            A[2 * j, :] = np.hstack((pts_world_true_4d[:, i], np.zeros(4), -1 * pts_cam_2d[0, i] * pts_world_true_4d[:, i]))  # equation for u
            A[2 * j + 1, :] = np.hstack((np.zeros(4), pts_world_true_4d[:, i], -1 * pts_cam_2d[1, i] * pts_world_true_4d[:, i]))  # equation for v

        # orthogonal least squares solution to A @ p = 0 (right eigenvector corresponding to eigenvalue closest to 0)
        _, _, Vh = np.linalg.svd(A)
        P_est = Vh[-1, :].reshape(3, 4)  # estimated projection matrix
        P_est /= P_est[-1, -1]  # normalise

        # find estimated 3D points
        A_full = np.zeros((2 * N_pts, 12))
        for i in range(N_pts):
            A_full[2 * i, :] = np.hstack((pts_world_true_4d[:, i], np.zeros(4), -1 * pts_cam_2d[0, i] * pts_world_true_4d[:, i]))
            A_full[2 * i + 1, :] = np.hstack((np.zeros(4), pts_world_true_4d[:, i], -1 * pts_cam_2d[1, i] * pts_world_true_4d[:, i]))

        Ap_est = A_full @ P_est.flatten()
        # count inliers, should be very close to zero
        inliers = np.sum(np.abs(Ap_est) <= abs_tol)
        if inliers > inlier_count_max:
            inlier_count_max = inliers
            best_P_est = P_est
            print(f'Iteration {r_i}: found {inliers} inliers')

    print(f'RANSAC finished: found {inlier_count_max} out of {2 * N_pts} inliers with P = \n{best_P_est}')

    # 2. Minimise reprojection errors (nonlinear least squares: Levenberg-Marquardt algorithm)
    # starting from best_P_est as initial guess
    proj_2d = lambda P, X: (w := (P.reshape(3, 4) @ X))[:2] / w[2]
    P_opt = scipy.optimize.least_squares(
        lambda P: np.sum((pts_cam_2d - proj_2d(P, pts_world_true_4d))**2, axis=0), best_P_est.flatten())
    P_best = P_opt.x.reshape(3, 4) / P_opt.x[-1]
    print(f'Optimal solution: \n {P_best}, cost: {P_opt.cost}')

    # 3. Decomposition into intrinsic and extrinsic parameters from optimal estimate of P
    K_est, R_est = scipy.linalg.rq(P_best[:, :3])  # transposed QR decomposition
    T_est = np.linalg.inv(K_est) @ P_best[:, 3]
    K_est = np.hstack((K_est / K_est[-1, -1], np.zeros((3, 1))))
    print(f'Estimated parameters:\n K: \n{np.around(K_est, 5)}, \nR: \n{np.around(R_est, 5)}, \nT: {np.around(T_est, 5)}')

    return K_est, R_est, T_est


def triangulate(K, R, T, pts_cam_2d, X_c: np.array = None, invert_in_camera: bool = True):
    if X_c is None:
        N_pts = pts_cam_2d.shape[1]
        pts_cam_X_c = (-1 if invert_in_camera else 1) * np.linalg.inv(K[:, :3]) @ np.vstack((pts_cam_2d, np.ones(N_pts)))
    else:
        N_pts = X_c.shape[1]
        pts_cam_X_c = (-1 if invert_in_camera else 1) * X_c
    pts_cam_X = np.linalg.inv(R) @ (pts_cam_X_c - np.tile(T.reshape(3, 1), (1, N_pts)))
    return pts_cam_X

# Calibration: compute intrinsic and extrinsic camera parameters for each camera
K_1_est, R_1_est, T_1_est = calibrate_camera(pts_cam_1_2d, pts_world_true_4d)
K_2_est, R_2_est, T_2_est = calibrate_camera(pts_cam_2_2d, pts_world_true_4d)

# Triangulation: invert the projection to calculate 3D world points, projected to the image plane
pts_cam_1_X = triangulate(K_1_est, R_1_est, T_1_est, pts_cam_1_2d)
pts_cam_2_X = triangulate(K_2_est, R_2_est, T_2_est, pts_cam_2_2d)

# Epipolar constraint - can use the essential matrix since the cameras are calibrated
E = R_2_est @ np.array([[0, -T_2_est[2], T_2_est[1]], [T_2_est[2], 0, -T_2_est[0]], [-T_2_est[1], T_2_est[0], 0]]) @ R_1_est

### plots

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(pts_world_true_3d[0, :], pts_world_true_3d[1, :], pts_world_true_3d[2, :], label='True world points')
ax.scatter(pts_cam_1_X[0, :], pts_cam_1_X[1, :], pts_cam_1_X[2, :], label='Estimated, cam 1')
ax.scatter(pts_cam_2_X[0, :], pts_cam_2_X[1, :], pts_cam_2_X[2, :], label='Estimated, cam 2')

# draw imaging plane of camera 1
u1 = np.linspace(min(pts_cam_1_2d[0, :]), max(pts_cam_1_2d[0, :]), 10)
v1 = np.linspace(min(pts_cam_1_2d[1, :]), max(pts_cam_1_2d[1, :]), 10)
U1, V1 = np.meshgrid(u1, v1)
X1 = np.vstack((U1.flatten(), V1.flatten(), np.ones(100)))
X1_w = triangulate(K_1_est, R_1_est, T_1_est, None, X_c = np.linalg.inv(K_1_est[:, :3]) @ X1, invert_in_camera=True)
ax.plot_surface(X1_w[0, :].reshape(10, 10), X1_w[1, :].reshape(10, 10), X1_w[2, :].reshape(10, 10),
                alpha=0.3, label='Camera 1 imaging plane')

# draw imaging plane of camera 2
u2 = np.linspace(min(pts_cam_2_2d[0, :]), max(pts_cam_2_2d[0, :]), 10)
v2 = np.linspace(min(pts_cam_2_2d[1, :]), max(pts_cam_2_2d[1, :]), 10)
U2, V2 = np.meshgrid(u2, v2)
X2 = np.vstack((U2.flatten(), V2.flatten(), np.ones(100)))
X2_w = triangulate(K_2_est, R_2_est, T_2_est, None, X_c = np.linalg.inv(K_2_est[:, :3]) @ X2, invert_in_camera=True)
ax.plot_surface(X2_w[0, :].reshape(10, 10), X2_w[1, :].reshape(10, 10), X2_w[2, :].reshape(10, 10),
                alpha=0.3, label='Camera 2 imaging plane')

# draw optical centres
X_c = np.array([0, 0, 0])  # origin of camera-centred coordinates
X1_w = triangulate(K_1_est, R_1_est, T_1_est, None, X_c=X_c.reshape(3, 1))
ax.scatter(X1_w[0], X1_w[1], X1_w[2], c='r', label='Optical center 1')
X2_w = triangulate(K_2_est, R_2_est, T_2_est, None, X_c=X_c.reshape(3, 1))
ax.scatter(X2_w[0], X2_w[1], X2_w[2], c='b', label='Optical center 2')

# draw lines between the true and estimated points
for i in range(N_pts):
    ax.plot([pts_world_true_3d[0, i], pts_cam_1_X[0, i]],
            [pts_world_true_3d[1, i], pts_cam_1_X[1, i]],
            [pts_world_true_3d[2, i], pts_cam_1_X[2, i]], 'k-',
            alpha=0.3, label=('Rays through projections' if i == 0 else None))

    ax.plot([pts_world_true_3d[0, i], pts_cam_2_X[0, i]],
            [pts_world_true_3d[1, i], pts_cam_2_X[1, i]],
            [pts_world_true_3d[2, i], pts_cam_2_X[2, i]], 'k-',
            alpha=0.3, label=('Rays through projections' if i == 0 else None))

# draw lines between estimated and optical center
for i in range(N_pts):
    ax.plot([pts_cam_1_X[0, i], X1_w[0][0]],
            [pts_cam_1_X[1, i], X1_w[1][0]],
            [pts_cam_1_X[2, i], X1_w[2][0]], 'k--',
            alpha=0.3, label=('Rays to centre' if i == 0 else None))
    
    ax.plot([pts_cam_2_X[0, i], X2_w[0][0]],
            [pts_cam_2_X[1, i], X2_w[1][0]],
            [pts_cam_2_X[2, i], X2_w[2][0]], 'k--',
            alpha=0.3, label=('Rays to centre' if i == 0 else None))

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.legend(loc='upper left')
plt.show()
Computer Vision
Camera Calibration and 3D Reconstruction

Lorcan Nicholls

Graduate Engineer

Related