Camera Calibration and 3D Reconstruction

alt text

Code on GitHub gists here:

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import matplotlib_inline
import scipy.optimize

matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
#plt.style.use(r'C:\LibsAndApps\Python config files\proplot_style.mplstyle')

def R_axis_angle(n: np.ndarray, theta: float) -> np.ndarray:
    # function returning 3x3 rotation matrix given an axis and angle of rotation
    return np.eye(3) * np.cos(theta) + \
        (1 - np.cos(theta)) * np.outer(n, n) + \
        np.sin(theta) * np.array([[0, -n[2], n[1]], [n[2], 0, -n[0]], [-n[1], n[0], 0]])

def affine_matrix(R: np.ndarray, T: np.ndarray) -> np.ndarray:
    # function returning 4x4 affine matrix given a rotation matrix and a translation vector
    return np.hstack((np.vstack((R, np.array([0, 0, 0]))),
        np.vstack((T.reshape(3, 1), np.array([1])))))

# set camera intrinsic parameters - same in both cameras
k_u = 15  # pixel size, horizontal
k_v = 13  # pixel size, vertical
f = 1  # focal length
u_0 = 1  # optical center, horizontal
v_0 = 0.5  # optical center, vertical

# set camera extrinsic parameters
R_axis_1 = np.array([0.707, 0, 0.707])  # axis of rotation
R_axis_2 = np.array([0.9, 0.2, 0.8])
R_1 = R_axis_angle(R_axis_1 / np.linalg.norm(R_axis_1), np.pi / 2)  # rotation matrix, 3D
R_2 = R_axis_angle(R_axis_2 / np.linalg.norm(R_axis_2), np.pi / 3)
T_1 = np.array([0, 1, 1])  # translation vector, 3D
T_2 = np.array([-1, 0, 1])

P_r_1 = affine_matrix(R_1, T_1)  # rotation matrix: world to camera
P_r_2 = affine_matrix(R_2, T_2)
P_p = np.array([[f, 0, 0, 0], [0, f, 0, 0], [0, 0, 1, 0]])  # projection matrix: camera to image
P_c = np.array([[k_u, 0, u_0], [0, k_v, v_0], [0, 0, 1]])  # camera matrix: image to pixel
P_pr_1 = P_c @ P_p @ P_r_1  # projection matrix: world to pixel
P_pr_2 = P_c @ P_p @ P_r_2

print(P_pr_1)
print(P_pr_2)

# generate some random 3D points
N_pts = 30
pts_world_true_3d = np.random.rand(3, N_pts)
pts_world_true_4d = np.vstack((pts_world_true_3d, np.ones(N_pts)))  # homogeneous coordinates

# find true camera coords
pts_cam_1_true_3d = P_pr_1 @ pts_world_true_4d
pts_cam_2_true_3d = P_pr_2 @ pts_world_true_4d
pts_cam_1_true_2d = pts_cam_1_true_3d[:2, :] / pts_cam_1_true_3d[2, :]
pts_cam_2_true_2d = pts_cam_2_true_3d[:2, :] / pts_cam_2_true_3d[2, :]

# add some noise to the true points to simulate measured points
noise = 0.02
pts_cam_1_2d = pts_cam_1_true_2d + np.random.randn(2, N_pts) * noise
pts_cam_2_2d = pts_cam_2_true_2d + np.random.randn(2, N_pts) * noise

print(f'World points: {pts_world_true_3d[:, :5]}')
print(f'Camera 1 true points: {pts_cam_1_true_2d[:, :5]}')
print(f'Camera 2 true points: {pts_cam_2_true_2d[:, :5]}')
print(f'Camera 1 measured points: {pts_cam_1_2d[:, :5]}')
print(f'Camera 2 measured points: {pts_cam_2_2d[:, :5]}')

# use RANSAC and least squares to calibrate each camera

def calibrate_camera(pts_cam_2d, pts_world_true_4d, max_iter=1000, abs_tol=1e-2):

    # 1. RANSAC (random sample consensus) to estimate the projection matrix
    inlier_count_max = 0
    best_P_est = None

    for r_i in range(max_iter):
        selected_indices = np.random.choice(N_pts, 6, replace=False)
        A = np.zeros((12, 12))
        for i, j in zip(selected_indices, range(6)):
            A[2 * j, :] = np.hstack((pts_world_true_4d[:, i], np.zeros(4), -1 * pts_cam_2d[0, i] * pts_world_true_4d[:, i]))  # equation for u
            A[2 * j + 1, :] = np.hstack((np.zeros(4), pts_world_true_4d[:, i], -1 * pts_cam_2d[1, i] * pts_world_true_4d[:, i]))  # equation for v

        # orthogonal least squares solution to A @ p = 0 (right eigenvector corresponding to eigenvalue closest to 0)
        _, _, Vh = np.linalg.svd(A)
        P_est = Vh[-1, :].reshape(3, 4)  # estimated projection matrix
        P_est /= P_est[-1, -1]  # normalise

        # find estimated 3D points
        A_full = np.zeros((2 * N_pts, 12))
        for i in range(N_pts):
            A_full[2 * i, :] = np.hstack((pts_world_true_4d[:, i], np.zeros(4), -1 * pts_cam_2d[0, i] * pts_world_true_4d[:, i]))
            A_full[2 * i + 1, :] = np.hstack((np.zeros(4), pts_world_true_4d[:, i], -1 * pts_cam_2d[1, i] * pts_world_true_4d[:, i]))

        Ap_est = A_full @ P_est.flatten()
        # count inliers, should be very close to zero
        inliers = np.sum(np.abs(Ap_est) <= abs_tol)
        if inliers > inlier_count_max:
            inlier_count_max = inliers
            best_P_est = P_est
            print(f'Iteration {r_i}: found {inliers} inliers')

    print(f'RANSAC finished: found {inlier_count_max} out of {2 * N_pts} inliers with P = \n{best_P_est}')

    # 2. Minimise reprojection errors (nonlinear least squares: Levenberg-Marquardt algorithm)
    # starting from best_P_est as initial guess
    proj_2d = lambda P, X: (w := (P.reshape(3, 4) @ X))[:2] / w[2]
    P_opt = scipy.optimize.least_squares(
        lambda P: np.sum((pts_cam_2d - proj_2d(P, pts_world_true_4d))**2, axis=0), best_P_est.flatten())
    P_best = P_opt.x.reshape(3, 4) / P_opt.x[-1]
    print(f'Optimal solution: \n {P_best}, cost: {P_opt.cost}')

    # 3. Decomposition into intrinsic and extrinsic parameters from optimal estimate of P
    K_est, R_est = scipy.linalg.rq(P_best[:, :3])  # transposed QR decomposition
    T_est = np.linalg.inv(K_est) @ P_best[:, 3]
    K_est = np.hstack((K_est / K_est[-1, -1], np.zeros((3, 1))))
    print(f'Estimated parameters:\n K: \n{np.around(K_est, 5)}, \nR: \n{np.around(R_est, 5)}, \nT: {np.around(T_est, 5)}')

    return K_est, R_est, T_est


def triangulate(K, R, T, pts_cam_2d, X_c: np.array = None, invert_in_camera: bool = True):
    if X_c is None:
        N_pts = pts_cam_2d.shape[1]
        pts_cam_X_c = (-1 if invert_in_camera else 1) * np.linalg.inv(K[:, :3]) @ np.vstack((pts_cam_2d, np.ones(N_pts)))
    else:
        N_pts = X_c.shape[1]
        pts_cam_X_c = (-1 if invert_in_camera else 1) * X_c
    pts_cam_X = np.linalg.inv(R) @ (pts_cam_X_c - np.tile(T.reshape(3, 1), (1, N_pts)))
    return pts_cam_X

# Calibration: compute intrinsic and extrinsic camera parameters for each camera
K_1_est, R_1_est, T_1_est = calibrate_camera(pts_cam_1_2d, pts_world_true_4d)
K_2_est, R_2_est, T_2_est = calibrate_camera(pts_cam_2_2d, pts_world_true_4d)

# Triangulation: invert the projection to calculate 3D world points, projected to the image plane
pts_cam_1_X = triangulate(K_1_est, R_1_est, T_1_est, pts_cam_1_2d)
pts_cam_2_X = triangulate(K_2_est, R_2_est, T_2_est, pts_cam_2_2d)

# Epipolar constraint - can use the essential matrix since the cameras are calibrated
E = R_2_est @ np.array([[0, -T_2_est[2], T_2_est[1]], [T_2_est[2], 0, -T_2_est[0]], [-T_2_est[1], T_2_est[0], 0]]) @ R_1_est

### plots

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(pts_world_true_3d[0, :], pts_world_true_3d[1, :], pts_world_true_3d[2, :], label='True world points')
ax.scatter(pts_cam_1_X[0, :], pts_cam_1_X[1, :], pts_cam_1_X[2, :], label='Estimated, cam 1')
ax.scatter(pts_cam_2_X[0, :], pts_cam_2_X[1, :], pts_cam_2_X[2, :], label='Estimated, cam 2')

# draw imaging plane of camera 1
u1 = np.linspace(min(pts_cam_1_2d[0, :]), max(pts_cam_1_2d[0, :]), 10)
v1 = np.linspace(min(pts_cam_1_2d[1, :]), max(pts_cam_1_2d[1, :]), 10)
U1, V1 = np.meshgrid(u1, v1)
X1 = np.vstack((U1.flatten(), V1.flatten(), np.ones(100)))
X1_w = triangulate(K_1_est, R_1_est, T_1_est, None, X_c = np.linalg.inv(K_1_est[:, :3]) @ X1, invert_in_camera=True)
ax.plot_surface(X1_w[0, :].reshape(10, 10), X1_w[1, :].reshape(10, 10), X1_w[2, :].reshape(10, 10),
                alpha=0.3, label='Camera 1 imaging plane')

# draw imaging plane of camera 2
u2 = np.linspace(min(pts_cam_2_2d[0, :]), max(pts_cam_2_2d[0, :]), 10)
v2 = np.linspace(min(pts_cam_2_2d[1, :]), max(pts_cam_2_2d[1, :]), 10)
U2, V2 = np.meshgrid(u2, v2)
X2 = np.vstack((U2.flatten(), V2.flatten(), np.ones(100)))
X2_w = triangulate(K_2_est, R_2_est, T_2_est, None, X_c = np.linalg.inv(K_2_est[:, :3]) @ X2, invert_in_camera=True)
ax.plot_surface(X2_w[0, :].reshape(10, 10), X2_w[1, :].reshape(10, 10), X2_w[2, :].reshape(10, 10),
                alpha=0.3, label='Camera 2 imaging plane')

# draw optical centres
X_c = np.array([0, 0, 0])  # origin of camera-centred coordinates
X1_w = triangulate(K_1_est, R_1_est, T_1_est, None, X_c=X_c.reshape(3, 1))
ax.scatter(X1_w[0], X1_w[1], X1_w[2], c='r', label='Optical center 1')
X2_w = triangulate(K_2_est, R_2_est, T_2_est, None, X_c=X_c.reshape(3, 1))
ax.scatter(X2_w[0], X2_w[1], X2_w[2], c='b', label='Optical center 2')

# draw lines between the true and estimated points
for i in range(N_pts):
    ax.plot([pts_world_true_3d[0, i], pts_cam_1_X[0, i]],
            [pts_world_true_3d[1, i], pts_cam_1_X[1, i]],
            [pts_world_true_3d[2, i], pts_cam_1_X[2, i]], 'k-',
            alpha=0.3, label=('Rays through projections' if i == 0 else None))

    ax.plot([pts_world_true_3d[0, i], pts_cam_2_X[0, i]],
            [pts_world_true_3d[1, i], pts_cam_2_X[1, i]],
            [pts_world_true_3d[2, i], pts_cam_2_X[2, i]], 'k-',
            alpha=0.3, label=('Rays through projections' if i == 0 else None))

# draw lines between estimated and optical center
for i in range(N_pts):
    ax.plot([pts_cam_1_X[0, i], X1_w[0][0]],
            [pts_cam_1_X[1, i], X1_w[1][0]],
            [pts_cam_1_X[2, i], X1_w[2][0]], 'k--',
            alpha=0.3, label=('Rays to centre' if i == 0 else None))
    
    ax.plot([pts_cam_2_X[0, i], X2_w[0][0]],
            [pts_cam_2_X[1, i], X2_w[1][0]],
            [pts_cam_2_X[2, i], X2_w[2][0]], 'k--',
            alpha=0.3, label=('Rays to centre' if i == 0 else None))

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.legend(loc='upper left')
plt.show()
Lorcan Nicholls
Lorcan Nicholls
Graduate Engineer

An graduate engineer from the University of Cambridge. Interested in interdisciplinary engineering and science, sustainable energy and automation.

comments powered by Disqus
Next
Previous

Related