Question 1

1) Implement Linear Regression using Python and NumPy.

Answer

import numpy as np

# Linear regression function
def linear_regression(X, y):
    # Add a bias column with 1s to the X matrix
    X = np.c_[np.ones(X.shape[0]), X]
    # Calculate the optimal weights using the Normal Equation
    theta = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
    return theta

# Example data
X = np.array([[1], [2], [3], [4]])  # Features
y = np.array([5, 7, 9, 11])  # Target

# Train the model
theta = linear_regression(X, y)
print(f"Optimal weights: {theta}")

Explanation: This code uses the Normal Equation to solve for the optimal weights in a linear regression model.

Question 2

2) Implement Logistic Regression for binary classification using Python and NumPy.

Answer

import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logistic_regression(X, y, learning_rate=0.1, epochs=1000):
    m, n = X.shape
    X = np.c_[np.ones(m), X]  # Add bias column
    theta = np.zeros(n + 1)  # Initialize weights
    
    for _ in range(epochs):
        prediction = sigmoid(np.dot(X, theta))
        error = prediction - y
        gradient = np.dot(X.T, error) / m
        theta -= learning_rate * gradient  # Update weights
    
    return theta

# Example data
X = np.array([[1], [2], [3], [4]])
y = np.array([0, 0, 1, 1])  # Labels for binary classification

# Train the model
theta = logistic_regression(X, y)
print(f"Optimal weights: {theta}")

Explanation: This implementation uses gradient descent to update the weights based on the sigmoid activation function, suitable for binary classification tasks.

Question 3

3) Write a k-Nearest Neighbors algorithm from scratch using Python.

Answer

import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def knn(X_train, y_train, X_test, k=3):
    predictions = []
    for test_point in X_test:
        # Calculate the distance from the test point to all training points
        distances = [euclidean_distance(test_point, train_point) for train_point in X_train]
        # Get indices of the k nearest neighbors
        k_indices = np.argsort(distances)[:k]
        # Get the labels of the k nearest neighbors
        k_nearest_labels = [y_train[i] for i in k_indices]
        # Get the most common class label
        most_common = Counter(k_nearest_labels).most_common(1)
        predictions.append(most_common[0][0])
    return np.array(predictions)

# Example data
X_train = np.array([[1], [2], [3], [4]])
y_train = np.array([0, 0, 1, 1])
X_test = np.array([[1.5], [3.5]])

# Make predictions
predictions = knn(X_train, y_train, X_test, k=3)
print(f"Predictions: {predictions}")

Explanation: This implementation computes the Euclidean distance between the test point and all training points, then predicts the most frequent class label from the k nearest neighbors.

Question 4

4) Implement a simple decision tree classifier from scratch using Python.

Answer

import numpy as np

def gini_impurity(y):
    m = len(y)
    return 1 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

def best_split(X, y):
    best_gini = float("inf")
    best_split = None
    m, n = X.shape
    
    for feature_index in range(n):
        values = X[:, feature_index]
        possible_splits = np.unique(values)
        
        for split_value in possible_splits:
            left_mask = values <= split_value
            right_mask = values > split_value
            left_y, right_y = y[left_mask], y[right_mask]
            
            gini = (len(left_y) * gini_impurity(left_y) + len(right_y) * gini_impurity(right_y)) / len(y)
            
            if gini < best_gini:
                best_gini = gini
                best_split = (feature_index, split_value)
    
    return best_split

def decision_tree(X, y, depth=0, max_depth=3):
    if len(np.unique(y)) == 1 or depth == max_depth:
        return np.unique(y)[0]
    
    feature_index, split_value = best_split(X, y)
    left_mask = X[:, feature_index] <= split_value
    right_mask = X[:, feature_index] > split_value
    left_tree = decision_tree(X[left_mask], y[left_mask], depth + 1, max_depth)
    right_tree = decision_tree(X[right_mask], y[right_mask], depth + 1, max_depth)
    
    return (feature_index, split_value, left_tree, right_tree)

# Example data
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 0, 1, 1, 1])

# Train the decision tree
tree = decision_tree(X, y)
print(f"Trained tree: {tree}")

Explanation: This decision tree algorithm splits the data using the Gini impurity criterion and recursively builds a tree. It supports basic binary classification.

Question 5

5) Write a random forest classifier from scratch by combining multiple decision trees.

Answer

import numpy as np

def bootstrap_sample(X, y):
    n = X.shape[0]
    indices = np.random.choice(n, size=n, replace=True)
    return X[indices], y[indices]

def random_forest(X_train, y_train, n_trees=10, max_depth=3):
    trees = []
    
    for _ in range(n_trees):
        # Create bootstrap samples
        X_bootstrap, y_bootstrap = bootstrap_sample(X_train, y_train)
        # Train a decision tree
        tree = decision_tree(X_bootstrap, y_bootstrap, max_depth=max_depth)
        trees.append(tree)
    
    return trees

def predict_forest(trees, X_test):
    predictions = []
    for test_point in X_test:
        # Get predictions from each tree
        tree_preds = [predict_tree(tree, test_point) for tree in trees]
        # Majority voting for final prediction
        predictions.append(Counter(tree_preds).most_common(1)[0][0])
    return np.array(predictions)

def predict_tree(tree, x):
    if isinstance(tree, int):  # leaf node
        return tree
    feature_index, split_value, left_tree, right_tree = tree
    if x[feature_index] <= split_value:
        return predict_tree(left_tree, x)
    else:
        return predict_tree(right_tree, x)

# Example data
X_train = np.array([[1], [2], [3], [4], [5]])
y_train = np.array([0, 0, 1, 1, 1])
X_test = np.array([[1.5], [3.5]])

# Train random forest and predict
forest = random_forest(X_train, y_train, n_trees=5, max_depth=2)
predictions = predict_forest(forest, X_test)
print(f"Predictions: {predictions}")

Explanation: The random forest algorithm builds multiple decision trees using bootstrap sampling and combines their predictions via majority voting for classification.

Question 6

6) Implement K-Means Clustering Algorithm

Answer

import numpy as np

def kmeans(X, k, max_iters=100):
    # Randomly initialize centroids
    centroids = X[np.random.choice(X.shape[0], k, replace=False)]
    for _ in range(max_iters):
        # Assign points to the nearest centroid
        distances = np.array([[np.linalg.norm(x - centroid) for centroid in centroids] for x in X])
        labels = np.argmin(distances, axis=1)
        
        # Update centroids
        new_centroids = np.array([X[labels == i].mean(axis=0) for i in range(k)])
        
        # Check for convergence
        if np.all(centroids == new_centroids):
            break
        centroids = new_centroids
        
    return centroids, labels

# Example data
X = np.array([[1, 2], [1, 3], [3, 4], [5, 6], [8, 9]])

# Run K-Means
centroids, labels = kmeans(X, k=2)
print(f"Centroids: {centroids}")
print(f"Labels: {labels}")

Explanation: This is a simple K-Means clustering algorithm that initializes centroids, assigns points to the nearest centroid, and updates the centroids until convergence.

Question 7

7) Write a Naive Bayes classifier for binary classification.

Answer

import numpy as np

def fit_naive_bayes(X, y):
    # Calculate prior probabilities
    priors = np.bincount(y) / len(y)
    
    # Calculate likelihoods (conditional probabilities)
    means = np.array([X[y == c].mean(axis=0) for c in np.unique(y)])
    variances = np.array([X[y == c].var(axis=0) for c in np.unique(y)])
    
    return priors, means, variances

def predict_naive_bayes(X, priors, means, variances):
    predictions = []
    for x in X:
        likelihoods = []
        for c, (prior, mean, variance) in enumerate(zip(priors, means, variances)):
            likelihood = np.prod(1 / np.sqrt(2 * np.pi * variance) * np.exp(-((x - mean) ** 2) / (2 * variance)))
            posterior = likelihood * prior
            likelihoods.append(posterior)
        predictions.append(np.argmax(likelihoods))
    return np.array(predictions)

# Example data
X = np.array([[1, 2], [1, 3], [3, 3], [5, 6]])
y = np.array([0, 0, 1, 1])

# Train Naive Bayes
priors, means, variances = fit_naive_bayes(X, y)
predictions = predict_naive_bayes(X, priors, means, variances)
print(f"Predictions: {predictions}")

Explanation: This Naive Bayes classifier assumes feature independence and calculates the likelihood of each feature for each class based on the Gaussian distribution.

Question 8

8) Implement a simple neural network for binary classification using Python and NumPy.

Answer

import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def neural_network(X, y, epochs=1000, learning_rate=0.1):
    input_layer = X.shape[1]
    hidden_layer = 4  # Arbitrary hidden layer size
    output_layer = 1
    
    # Initialize weights
    W1 = np.random.rand(input_layer, hidden_layer)
    W2 = np.random.rand(hidden_layer, output_layer)
    
    for epoch in range(epochs):
        # Forward pass
        hidden_input = np.dot(X, W1)
        hidden_output = sigmoid(hidden_input)
        output_input = np.dot(hidden_output, W2)
        output = sigmoid(output_input)
        
        # Backward pass
        error = y - output
        output_gradient = error * sigmoid_derivative(output)
        
        hidden_error = output_gradient.dot(W2.T)
        hidden_gradient = hidden_error * sigmoid_derivative(hidden_output)
        
        # Update weights
        W2 += hidden_output.T.dot(output_gradient) * learning_rate
        W1 += X.T.dot(hidden_gradient) * learning_rate
    
    return W1, W2

# Example data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # Input data
y = np.array([[0], [1], [1], [0]])  # XOR problem (binary classification)

# Train the neural network
W1, W2 = neural_network(X, y)

Explanation: This code implements a simple neural network with one hidden layer using the sigmoid activation function. The weights are updated using gradient descent.

Question 9

9) Write a simple CNN model for image classification.

Answer

import numpy as np

def relu(x):
    return np.maximum(0, x)

def conv2d(X, kernel):
    kernel_size = kernel.shape[0]
    output = np.zeros((X.shape[0] - kernel_size + 1, X.shape[1] - kernel_size + 1))
    for i in range(output.shape[0]):
        for j in range(output.shape[1]):
            output[i, j] = np.sum(X[i:i + kernel_size, j:j + kernel_size] * kernel)
    return output

def cnn_forward(X, kernel):
    conv_output = conv2d(X, kernel)
    relu_output = relu(conv_output)
    return relu_output

# Example input (5x5 image) and kernel (3x3)
X = np.array([[1, 1, 1, 0, 0],
              [1, 1, 1, 0, 0],
              [0, 0, 1, 1, 1],
              [0, 0, 1, 1, 1],
              [0, 0, 0, 0, 0]])

kernel = np.array([[1, 0, 1],
                   [0, 1, 0],
                   [1, 0, 1]])

# Apply CNN forward pass
output = cnn_forward(X, kernel)
print(f"CNN Output: \n{output}")

Explanation: The code implements a basic convolutional layer followed by ReLU activation to process an image. The conv2d function performs the convolution operation.

Question 10

10) Write a function to perform K-fold cross-validation for model evaluation.

Answer

import numpy as np
from sklearn.metrics import accuracy_score

def k_fold_cross_validation(X, y, model, k=5):
    fold_size = len(X) // k
    scores = []
    
    for i in range(k):
        start = i * fold_size
        end = (i + 1) * fold_size if i != k - 1 else len(X)
        
        X_train = np.concatenate([X[:start], X[end:]], axis=0)
        y_train = np.concatenate([y[:start], y[end:]], axis=0)
        X_test = X[start:end]
        y_test = y[start:end]
        
        # Train model and make predictions
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        score = accuracy_score(y_test, y_pred)
        scores.append(score)
    
    return np.mean(scores)

# Example usage (with a simple model)
from sklearn.linear_model import LogisticRegression
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 1, 1, 0, 1])

model = LogisticRegression()
mean_score = k_fold_cross_validation(X, y, model, k=3)
print(f"Mean Accuracy: {mean_score}")

Explanation: This code performs K-fold cross-validation, splitting the dataset into k folds, training the model on k-1 folds, and testing it on the remaining fold. The mean accuracy is then computed.

Question 11

11) Write a function to scale features using standardization.

Answer

import numpy as np

def standardize(X):
    mean = np.mean(X, axis=0)
    std_dev = np.std(X, axis=0)
    return (X - mean) / std_dev

# Example data
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])

# Apply standardization
X_scaled = standardize(X)
print(f"Standardized Data: \n{X_scaled}")

Explanation: Standardization scales the features by subtracting the mean and dividing by the standard deviation for each feature. This is important for algorithms like logistic regression and k-NN.

Question 12

12) Write a simple gradient descent function for minimizing a cost function.

Answer

import numpy as np

def gradient_descent(X, y, learning_rate=0.01, epochs=1000):
    m = len(y)
    theta = np.zeros(X.shape[1])  # Initialize weights
    
    for _ in range(epochs):
        prediction = X.dot(theta)
        error = prediction - y
        gradient = (2 / m) * X.T.dot(error)
        theta -= learning_rate * gradient  # Update weights
    
    return theta

# Example data (linear regression)
X = np.array([[1, 1], [1, 2], [1, 3]])  # Add bias term
y = np.array([1, 2, 3])

# Train using gradient descent
theta = gradient_descent(X, y)
print(f"Optimal weights: {theta}")

Explanation: This code implements gradient descent to minimize a simple mean squared error cost function for linear regression.

Question 13

13) Write a PCA implementation for dimensionality reduction.

Answer

import numpy as np

def pca(X, n_components=1):
    # Center the data by subtracting the mean
    X_centered = X - np.mean(X, axis=0)
    
    # Calculate covariance matrix
    covariance_matrix = np.cov(X_centered.T)
    
    # Calculate eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)
    
    # Sort eigenvectors by eigenvalues in descending order
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvectors_sorted = eigenvectors[:, sorted_indices]
    
    # Select top n_components eigenvectors
    eigenvectors_selected = eigenvectors_sorted[:, :n_components]
    
    # Project data onto the selected eigenvectors
    X_pca = X_centered.dot(eigenvectors_selected)
    
    return X_pca

# Example data
X = np.array([[2, 3], [3, 4], [4, 5], [5, 6]])

# Apply PCA
X_reduced = pca(X, n_components=1)
print(f"Reduced data: \n{X_reduced}")

Explanation: PCA reduces the dimensionality of the dataset by projecting the data onto the top principal components (eigenvectors of the covariance matrix).

Question 14

14) Write a basic implementation of an RNN.

Answer

import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def rnn(X, hidden_size=3, epochs=100):
    input_size = X.shape[1]
    output_size = 1  # Binary output (0 or 1)
    
    # Initialize weights
    Wxh = np.random.randn(input_size, hidden_size)
    Whh = np.random.randn(hidden_size, hidden_size)
    Why = np.random.randn(hidden_size, output_size)
    bh = np.zeros((1, hidden_size))
    by = np.zeros((1, output_size))
    
    # Initialize hidden state
    h = np.zeros((1, hidden_size))
    
    for epoch in range(epochs):
        for x in X:
            # Forward pass
            h = sigmoid(np.dot(x, Wxh) + np.dot(h, Whh) + bh)
            y = sigmoid(np.dot(h, Why) + by)
            
            # Backward pass (simplified, just for understanding)
            loss = np.square(y - 1)  # Simple loss function for demonstration
            print(f"Epoch {epoch}, Loss: {loss}")
    
    return Wxh, Whh, Why, bh, by

# Example data
X = np.array([[0, 1], [1, 0], [1, 1], [0, 0]])

# Train RNN
rnn(X)

Explanation: This RNN model uses sigmoid activations for both hidden states and output. The code performs a basic forward pass and computes loss for binary classification.

Question 15

15) Write a function that computes the accuracy of a classifier.

Answer

import numpy as np

def accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    total = len(y_true)
    return correct / total

# Example data
y_true = np.array([0, 1, 1, 0])
y_pred = np.array([0, 1, 0, 0])

# Compute accuracy
acc = accuracy(y_true, y_pred)
print(f"Accuracy: {acc}")

Explanation: This function computes the accuracy of a classification model by comparing the predicted values with the true values.

Question 16

16) Write the softmax function for multi-class classification.

Answer

import numpy as np

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / np.sum(exp_x, axis=0)

# Example data (raw scores from a classifier)
x = np.array([1.0, 2.0, 3.0])

# Apply softmax
probabilities = softmax(x)
print(f"Softmax Output: {probabilities}")

Explanation: Softmax is used for multi-class classification, converting raw scores (logits) into probabilities. The output values sum to 1.

Chapters

Interview Questions

1) Implement Linear Regression using Python and NumPy.

2) Implement Logistic Regression for binary classification using Python and NumPy.

3) Write a k-Nearest Neighbors algorithm from scratch using Python.

4) Implement a simple decision tree classifier from scratch using Python.

5) Write a random forest classifier from scratch by combining multiple decision trees.

6) Implement K-Means Clustering Algorithm

7) Write a Naive Bayes classifier for binary classification.

8) Implement a simple neural network for binary classification using Python and NumPy.

9) Write a simple CNN model for image classification.

10) Write a function to perform K-fold cross-validation for model evaluation.

11) Write a function to scale features using standardization.

12) Write a simple gradient descent function for minimizing a cost function.

13) Write a PCA implementation for dimensionality reduction.

14) Write a basic implementation of an RNN.

15) Write a function that computes the accuracy of a classifier.

16) Write the softmax function for multi-class classification.

Modules

Interview Questions

Programming Languages

Technology Domains

Programming Languages

Technology Domains

Chapters

Interview Questions

1) Implement Linear Regression using Python and NumPy.

2) Implement Logistic Regression for binary classification using Python and NumPy.

3) Write a k-Nearest Neighbors algorithm from scratch using Python.

4) Implement a simple decision tree classifier from scratch using Python.

5) Write a random forest classifier from scratch by combining multiple decision trees.

6) Implement K-Means Clustering Algorithm

7) Write a Naive Bayes classifier for binary classification.

8) Implement a simple neural network for binary classification using Python and NumPy.

9) Write a simple CNN model for image classification.

10) Write a function to perform K-fold cross-validation for model evaluation.

11) Write a function to scale features using standardization.

12) Write a simple gradient descent function for minimizing a cost function.

13) Write a PCA implementation for dimensionality reduction.

14) Write a basic implementation of an RNN.

15) Write a function that computes the accuracy of a classifier.

16) Write the softmax function for multi-class classification.

Modules

Interview Questions