8 python script examples, that can be useful in digital forensics

Python is an excellent programming language for conducting cyber forensics investigations because it has built-in protections that maintain the integrity of digital evidence. In this article series we will review few Python code examples that were create for digital forensics purpose.

  1. A script that hashes a given file and compares the result to a known hash value to verify the file’s integrity:




import hashlib

def hash_file(filename):
    h = hashlib.sha1()
    with open(filename, 'rb') as file:
        chunk = 0
        while chunk != b'':
            chunk = file.read(1024)
            h.update(chunk)
    return h.hexdigest()

known_hash = 'd5f1d049dae5a92a5a9f5db0d1aab68c5b10fa8a'
file_to_check = 'example.txt'

if hash_file(file_to_check) == known_hash:
    print("The file has not been modified.")
else:
    print("The file has been modified.")

2. A script that searches a hard drive or folder for specific file types and copies them to a new location:

import os
import shutil

def copy_files(src_dir, dest_dir, file_type):
    src_files = os.listdir(src_dir)
    for file in src_files:
        if file.endswith(file_type):
            shutil.copy(os.path.join(src_dir, file), dest_dir)

copy_files('C:\\Source\\Folder', 'C:\\Destination\\Folder', '.doc')

3. A script that extracts metadata from a file:

import os

def extract_metadata(filename):
    metadata = {}
    stat_info = os.stat(filename)
    metadata['size'] = stat_info.st_size
    metadata['last_modified'] = stat_info.st_mtime
    metadata['last_accessed'] = stat_info.st_atime
    metadata['created'] = stat_info.st_ctime
    return metadata

print(extract_metadata('example.txt'))

4. A script that decrypts password-protected files using a word list:

import zipfile

def decrypt_zip(zip_file, wordlist):
    with open(wordlist, 'r') as f:
        for line in f:
            password = line.strip()
            try:
                zip_file.extractall(pwd=password.encode())
                print(f"Password found: {password}")
                return password
            except Exception:
                continue
    return None

zip_file = zipfile.ZipFile('encrypted.zip')
decrypt_zip(zip_file, 'wordlist.txt')

5. A script that parses and analyzes log files from a computer’s operating system or applications:

import re

def analyze_logs(log_file):
    with open(log_file, 'r') as f:
        for line in f:
            if 'error' in line.lower():
                print(line)
            elif 'warning' in line.lower():
                print(line)

analyze_logs('system.log')

6. A script that recovers deleted files from a hard drive or other storage device:

import os
import shutil

def recover_deleted_files(src_folder, dest_folder):
    for root, dirs, files in os.walk(src_folder):
        for file in files:
            src_path = os.path.join(root, file)
            if os.stat(src_path).st_ino != 0:
                shutil.copy2(src_path, dest_folder)

recover_deleted_files('/dev/sda1', './recovered_files')

7. A script that creates a timeline of activity on a computer by analyzing various data sources:

import os
import time

def create_timeline(data_sources):
    timeline = []
    for source in data_sources:
        if source == 'file_system':
            for root, dirs, files in os.walk('/'):
                for file in files:
                    file_path = os.path.join(root, file)
                    file_stats = os.stat(file_path)
                    timeline.append({
                        'type': 'file',
                        'name': file,
                        'path': file_path,
                        'modified': file_stats.st_mtime,
                        'accessed': file_stats.st_atime,
                        'created': file_stats.st_ctime
                    })
        elif source == 'login_history':
            with open('/var/log/wtmp', 'rb') as f:
                f.seek(-768, 2)
                data = f.read()
            for entry in data.split(b'\x00'):
                if entry:
                    fields = entry.split(b'\x01')
                    user = fields[0]
                    tty = fields[1]
                    tstamp = fields[2]
                    timeline.append({
                        'type': 'login',
                        'user': user,
                        'tty': tty,
                        'timestamp': tstamp
                    })
    timeline.sort(key=lambda x: x['timestamp'])
    return timeline

print(create_timeline(['file_system', 'login_history']))

8. A script that analyzes the contents of a disk image to extract files and other information:

import pytsk3

def analyze_disk_image(image_file):
    image = pytsk3.Img_Info(image_file)
    partition_table = pytsk3.Volume_Info(image)
    for partition in partition_table:
        print(f"Found partition: {partition.addr} {partition.desc} {partition.start} {partition.len}")
        if 'Unallocated' in partition.desc or 'Free Space' in partition.desc:
            continue
        filesystem = pytsk3.FS_Info(image, offset=partition.start * 512)
        root_dir = filesystem.open_dir(path="/")
        for file in root_dir:
            print(f"Found file: {file.info.name.name} (inode: {file.info.meta.addr})")

analyze_disk_image('disk.dd')