Computer Vision Projekte
Lernen Sie praktische Computer Vision Anwendungen mit Python, OpenCV und TensorFlow. Von Bildklassifikation mit CNNs bis zu YOLO Objekterkennung und fortgeschrittenen Deep Learning Techniken für die Bildanalyse.
Projekte in diesem Guide
- •Bildklassifikation mit CNNs
- •Objekterkennung mit YOLO
- •Gesichtserkennung System
- •Bildverbesserung mit OpenCV
- •Style Transfer mit Neural Networks
- •Real-time Video Processing
Projekt 1: Bildklassifikation mit CNNs
Erstellen Sie ein Convolutional Neural Network, das Bilder automatisch in verschiedene Kategorien klassifiziert. Ein perfekter Einstieg in die Deep Learning Bildverarbeitung.
🎯 Projekt-Ziel
Ein CNN-Modell trainieren, das Bilder von Hunden und Katzen mit >90% Genauigkeit unterscheiden kann.
Setup und Datenvorverarbeitung
import tensorflow as tf from tensorflow.keras import layers, models from tensorflow.keras.preprocessing.image import ImageDataGenerator import matplotlib.pyplot as plt import numpy as np # GPU-Support prüfen print("GPU available:", tf.config.list_physical_devices('GPU')) # Datenaugmentation für bessere Generalisierung train_datagen = ImageDataGenerator( rescale=1./255, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, zoom_range=0.2, validation_split=0.2 ) test_datagen = ImageDataGenerator(rescale=1./255) # Daten laden (angenommen: dogs-vs-cats Dataset) train_generator = train_datagen.flow_from_directory( 'data/train', target_size=(150, 150), batch_size=32, class_mode='binary', subset='training' ) validation_generator = train_datagen.flow_from_directory( 'data/train', target_size=(150, 150), batch_size=32, class_mode='binary', subset='validation' ) print(f"Gefundene Klassen: {train_generator.class_indices}") print(f"Trainingsbilder: {train_generator.samples}") print(f"Validierungsbilder: {validation_generator.samples}")
CNN-Modell erstellen
# CNN-Architektur definieren model = models.Sequential([ # Erste Convolution-Schicht layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)), layers.MaxPooling2D(2, 2), # Zweite Convolution-Schicht layers.Conv2D(64, (3, 3), activation='relu'), layers.MaxPooling2D(2, 2), # Dritte Convolution-Schicht layers.Conv2D(128, (3, 3), activation='relu'), layers.MaxPooling2D(2, 2), # Vierte Convolution-Schicht layers.Conv2D(128, (3, 3), activation='relu'), layers.MaxPooling2D(2, 2), # Flatten für Dense Layer layers.Flatten(), # Dropout gegen Overfitting layers.Dropout(0.5), # Dense Layer layers.Dense(512, activation='relu'), # Output Layer (binäre Klassifikation) layers.Dense(1, activation='sigmoid') ]) # Modell kompilieren model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'] ) # Modell-Architektur anzeigen model.summary()
Training und Evaluation
# Callbacks für besseres Training callbacks = [ tf.keras.callbacks.EarlyStopping( monitor='val_accuracy', patience=5, restore_best_weights=True ), tf.keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.2, patience=3, min_lr=0.0001 ) ] # Modell trainieren history = model.fit( train_generator, epochs=30, validation_data=validation_generator, callbacks=callbacks, verbose=1 ) # Trainingsverlauf visualisieren def plot_training_history(history): fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) # Accuracy ax1.plot(history.history['accuracy'], label='Training Accuracy') ax1.plot(history.history['val_accuracy'], label='Validation Accuracy') ax1.set_title('Model Accuracy') ax1.set_xlabel('Epoch') ax1.set_ylabel('Accuracy') ax1.legend() # Loss ax2.plot(history.history['loss'], label='Training Loss') ax2.plot(history.history['val_loss'], label='Validation Loss') ax2.set_title('Model Loss') ax2.set_xlabel('Epoch') ax2.set_ylabel('Loss') ax2.legend() plt.tight_layout() plt.show() plot_training_history(history) # Modell speichern model.save('dogs_vs_cats_model.h5') print("Modell gespeichert!")
Projekt 2: Objekterkennung mit YOLO
Implementieren Sie ein YOLO (You Only Look Once) System für Real-time Objekterkennung in Bildern und Videos. Perfekt für Computer Vision Anwendungen in der Praxis.
🚀 Was Sie lernen
YOLO-Architektur verstehen, vortrainierte Modelle nutzen und eigene Objekterkennungssysteme entwickeln.
YOLO mit OpenCV implementieren
import cv2 import numpy as np import argparse class YOLODetector: def __init__(self, weights_path, config_path, names_path): # YOLO-Netzwerk laden self.net = cv2.dnn.readNet(weights_path, config_path) # Klassennamen laden with open(names_path, 'r') as f: self.classes = [line.strip() for line in f.readlines()] # Output-Layer bestimmen layer_names = self.net.getLayerNames() self.output_layers = [layer_names[i[0] - 1] for i in self.net.getUnconnectedOutLayers()] # Farben für Bounding Boxes self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3)) def detect_objects(self, image, confidence_threshold=0.5, nms_threshold=0.4): height, width, channels = image.shape # Bild für YOLO vorbereiten blob = cv2.dnn.blobFromImage( image, scalefactor=1/255.0, size=(416, 416), mean=(0, 0, 0), swapRB=True, crop=False ) # Forward Pass self.net.setInput(blob) outputs = self.net.forward(self.output_layers) # Erkennungen verarbeiten boxes, confidences, class_ids = [], [], [] for output in outputs: for detection in output: scores = detection[5:] class_id = np.argmax(scores) confidence = scores[class_id] if confidence > confidence_threshold: # Bounding Box Koordinaten center_x = int(detection[0] * width) center_y = int(detection[1] * height) w = int(detection[2] * width) h = int(detection[3] * height) # Box-Koordinaten berechnen x = int(center_x - w / 2) y = int(center_y - h / 2) boxes.append([x, y, w, h]) confidences.append(float(confidence)) class_ids.append(class_id) # Non-Maximum Suppression indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold) return boxes, confidences, class_ids, indices def draw_detections(self, image, boxes, confidences, class_ids, indices): if len(indices) > 0: for i in indices.flatten(): x, y, w, h = boxes[i] # Klasse und Konfidenz label = f"{self.classes[class_ids[i]]}: {confidences[i]:.2f}" color = self.colors[class_ids[i]] # Bounding Box zeichnen cv2.rectangle(image, (x, y), (x + w, y + h), color, 2) cv2.putText( image, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2 ) return image
use YOLO for video stream
# YOLO-Detektor initialisieren detector = YOLODetector( weights_path='yolov3.weights', config_path='yolov3.cfg', names_path='coco.names' ) def process_video(source=0): # 0 für Webcam, oder Pfad zu Video cap = cv2.VideoCapture(source) # Video-Properties fps = int(cap.get(cv2.CAP_PROP_FPS)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Video-Writer für Output (optional) fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter('output.avi', fourcc, fps, (width, height)) while True: ret, frame = cap.read() if not ret: break # Objekterkennung boxes, confidences, class_ids, indices = detector.detect_objects(frame) # Erkennungen zeichnen result_frame = detector.draw_detections( frame.copy(), boxes, confidences, class_ids, indices ) # FPS anzeigen cv2.putText( result_frame, f'FPS: {fps}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2 ) # Frame anzeigen cv2.imshow('YOLO Object Detection', result_frame) # Video speichern (optional) out.write(result_frame) # Beenden mit 'q' if cv2.waitKey(1) & 0xFF == ord('q'): break # Aufräumen cap.release() out.release() cv2.destroyAllWindows() # Video-Processing starten process_video(0) # Webcam verwenden
Projekt 3: Gesichtserkennung System
Entwickeln Sie ein komplettes Gesichtserkennungssystem mit Gesichtserkennung, -encoding und -verifikation.
🔒 Anwendungen
Sicherheitssysteme, Anwesenheitskontrolle, personalisierte Benutzererfahrungen.
Gesichtserkennung mit face_recognition
import face_recognition import cv2 import numpy as np import os import pickle from datetime import datetime class FaceRecognitionSystem: def __init__(self): self.known_face_encodings = [] self.known_face_names = [] self.face_locations = [] self.face_encodings = [] self.face_names = [] def load_known_faces(self, faces_dir): """Bekannte Gesichter aus Ordner laden""" print("Lade bekannte Gesichter...") for filename in os.listdir(faces_dir): if filename.endswith(('.jpg', '.jpeg', '.png')): # Bild laden image_path = os.path.join(faces_dir, filename) image = face_recognition.load_image_file(image_path) # Gesichts-Encoding extrahieren face_encodings = face_recognition.face_encodings(image) if face_encodings: # Name aus Dateiname extrahieren name = os.path.splitext(filename)[0] self.known_face_encodings.append(face_encodings[0]) self.known_face_names.append(name) print(f"Gesicht geladen: {name}") else: print(f"Kein Gesicht gefunden in: {filename}") def save_encodings(self, filepath): """Encodings speichern""" data = { 'encodings': self.known_face_encodings, 'names': self.known_face_names } with open(filepath, 'wb') as f: pickle.dump(data, f) print(f"Encodings gespeichert in: {filepath}") def load_encodings(self, filepath): """Encodings laden""" try: with open(filepath, 'rb') as f: data = pickle.load(f) self.known_face_encodings = data['encodings'] self.known_face_names = data['names'] print(f"Encodings geladen von: {filepath}") return True except FileNotFoundError: print(f"Encodings-Datei nicht gefunden: {filepath}") return False def recognize_faces_in_image(self, image): """Gesichter in einem Bild erkennen""" # Bild von BGR zu RGB konvertieren rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Gesichter finden und encodieren face_locations = face_recognition.face_locations(rgb_image) face_encodings = face_recognition.face_encodings(rgb_image, face_locations) face_names = [] for face_encoding in face_encodings: # Gesicht mit bekannten Gesichtern vergleichen matches = face_recognition.compare_faces( self.known_face_encodings, face_encoding, tolerance=0.6 ) name = "Unbekannt" # Beste Übereinstimmung finden face_distances = face_recognition.face_distance( self.known_face_encodings, face_encoding ) if matches and len(face_distances) > 0: best_match_index = np.argmin(face_distances) if matches[best_match_index]: name = self.known_face_names[best_match_index] face_names.append(name) return face_locations, face_names
Real-time Gesichtserkennung
def run_face_recognition(): # System initialisieren fr_system = FaceRecognitionSystem() # Bekannte Gesichter laden oder Encodings laden if not fr_system.load_encodings('face_encodings.pkl'): fr_system.load_known_faces('known_faces/') fr_system.save_encodings('face_encodings.pkl') # Webcam initialisieren video_capture = cv2.VideoCapture(0) # Optimierung: Nur jedes n-te Frame verarbeiten process_this_frame = True frame_count = 0 # Anwesenheitsliste attendance_log = set() while True: ret, frame = video_capture.read() # Frame-Größe reduzieren für bessere Performance small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) # Nur jedes zweite Frame verarbeiten if process_this_frame: # Gesichtserkennung durchführen face_locations, face_names = fr_system.recognize_faces_in_image(small_frame) # Koordinaten zurückskalieren face_locations = [(top*4, right*4, bottom*4, left*4) for (top, right, bottom, left) in face_locations] process_this_frame = not process_this_frame # Ergebnisse zeichnen for (top, right, bottom, left), name in zip(face_locations, face_names): # Bounding Box color = (0, 255, 0) if name != "Unbekannt" else (0, 0, 255) cv2.rectangle(frame, (left, top), (right, bottom), color, 2) # Label cv2.rectangle(frame, (left, bottom - 35), (right, bottom), color, cv2.FILLED) cv2.putText( frame, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1 ) # Anwesenheit loggen if name != "Unbekannt" and name not in attendance_log: attendance_log.add(name) timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"Anwesenheit: {name} um {timestamp}") # FPS und Frame-Info anzeigen frame_count += 1 cv2.putText( frame, f'Frame: {frame_count}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2 ) cv2.putText( frame, f'Anwesend: {len(attendance_log)}', (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2 ) # Frame anzeigen cv2.imshow('Gesichtserkennung', frame) # Beenden mit 'q' if cv2.waitKey(1) & 0xFF == ord('q'): break # Aufräumen video_capture.release() cv2.destroyAllWindows() # Finale Anwesenheitsliste ausgeben print("\nFinale Anwesenheitsliste:") for person in attendance_log: print(f"- {person}") # System starten if __name__ == "__main__": run_face_recognition()
Erweiterte Computer Vision Techniken
Style Transfer
Übertragen Sie den Stil eines Kunstwerks auf Ihre eigenen Bilder mit neuronalen Netzwerken.
import tensorflow as tf import tensorflow_hub as hub import matplotlib.pyplot as plt # Pre-trained Style Transfer Modell laden model = hub.load('https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2') def load_and_preprocess_image(path, max_dim=512): image = tf.io.read_file(path) image = tf.image.decode_image(image, channels=3) image = tf.image.convert_image_dtype(image, tf.float32) # Größe anpassen shape = tf.cast(tf.shape(image)[:-1], tf.float32) long_dim = max(shape) scale = max_dim / long_dim new_shape = tf.cast(shape * scale, tf.int32) image = tf.image.resize(image, new_shape) image = image[tf.newaxis, :] return image def apply_style_transfer(content_path, style_path): # Bilder laden content_image = load_and_preprocess_image(content_path) style_image = load_and_preprocess_image(style_path) # Style Transfer anwenden stylized_image = model(tf.constant(content_image), tf.constant(style_image))[0] return stylized_image # Beispiel-Verwendung content_path = 'content_image.jpg' style_path = 'style_image.jpg' stylized = apply_style_transfer(content_path, style_path) # Ergebnis anzeigen plt.figure(figsize=(15, 5)) plt.subplot(1, 3, 1) plt.imshow(load_and_preprocess_image(content_path)[0]) plt.title('Content') plt.axis('off') plt.subplot(1, 3, 2) plt.imshow(load_and_preprocess_image(style_path)[0]) plt.title('Style') plt.axis('off') plt.subplot(1, 3, 3) plt.imshow(stylized[0]) plt.title('Stylized') plt.axis('off') plt.show()
Image Segmentation
Segmentieren Sie Bilder auf Pixel-Ebene für präzise Objekterkennung.
import cv2 import numpy as np from sklearn.cluster import KMeans def semantic_segmentation_kmeans(image, k=3): """Einfache Segmentierung mit K-Means Clustering""" # Bild in 2D Array umwandeln data = image.reshape((-1, 3)) data = np.float32(data) # K-Means Clustering criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0) _, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) # Cluster-Zentren zu uint8 konvertieren centers = np.uint8(centers) # Segmentiertes Bild erstellen segmented_data = centers[labels.flatten()] segmented_image = segmented_data.reshape(image.shape) return segmented_image, labels.reshape(image.shape[:2]) def watershed_segmentation(image): """Watershed algorithm for object segmentation""" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Noise entfernen ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) # Morphologische Operationen kernel = np.ones((3, 3), np.uint8) opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2) # Hintergrund bestimmen sure_bg = cv2.dilate(opening, kernel, iterations=3) # Vordergrund bestimmen dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5) ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0) # Unbekannte Region sure_fg = np.uint8(sure_fg) unknown = cv2.subtract(sure_bg, sure_fg) # Marker erstellen ret, markers = cv2.connectedComponents(sure_fg) markers = markers + 1 markers[unknown == 255] = 0 # Watershed anwenden markers = cv2.watershed(image, markers) image[markers == -1] = [255, 0, 0] # Grenzen rot markieren return image, markers # Beispiel-Usage image = cv2.imread('example.jpg') # K-Means Segmentierung segmented_kmeans, labels = semantic_segmentation_kmeans(image, k=4) # Watershed Segmentierung segmented_watershed, markers = watershed_segmentation(image.copy()) # Ergebnisse anzeigen cv2.imshow('Original', image) cv2.imshow('K-Means Segmentation', segmented_kmeans) cv2.imshow('Watershed Segmentation', segmented_watershed) cv2.waitKey(0) cv2.destroyAllWindows()
Performance-Optimierung
- ⚡GPU acceleration: Use CUDA and OpenCL for intensive calculations
- ⚡Reduce image size: Smaller resolutions for real-time processing
- ⚡Model Optimization: TensorRT, ONNX for deployment optimization
- ⚡Batch Processing: Mehrere Bilder gleichzeitig verarbeiten
Further projects
Fortgeschrittene Projekte
- →3D Objekterkennung und Tracking
- →Augmented Reality Anwendungen
- →Medizinische Bildanalyse
- →Autonomous Vehicle Vision
Empfohlene Tools
- •OpenCV für klassische CV
- •TensorFlow/PyTorch für Deep Learning
- •Detectron2 für Objekterkennung
- •MediaPipe für Real-time CV