File Handling in Python - Interview Questions and Answers

Python supports various file modes:

  • 'r' – Read (default mode)
  • 'w' – Write (overwrites if file exists)
  • 'a' – Append (adds data to the file)
  • 'x' – Create (fails if file exists)
  • 'b' – Binary mode (e.g., 'rb', 'wb')
  • 't' – Text mode (default, e.g., 'rt', 'wt')
  • '+' – Read and write (e.g., 'r+', 'w+', 'a+')

file = open("example.txt", "r")  # Open file in read mode
file.close()  # Close file

 

  • read(size): Reads size characters (or whole file if not specified).
  • readline(): Reads a single line from the file.
  • readlines(): Reads all lines and returns a list.

Example:

with open("example.txt", "r") as file:
    print(file.read(5))      # Reads first 5 characters
    print(file.readline())   # Reads one line
    print(file.readlines())  # Reads all lines into a list

 

with open("example.txt", "w") as file:
    file.write("Hello, World!\n")

 

The file is overwritten, and previous content is lost.

Use 'a' mode to append without overwriting.

with open("example.txt", "a") as file:
    file.write("Appending new line.\n")

 

Use the os.path.exists() function.

import os

if os.path.exists("example.txt"):
    print("File exists")
else:
    print("File not found")

 

  • 'r+' – Read & write, does not create file if missing.
  • 'w+' – Read & write, overwrites existing file.
  • 'a+' – Read & append, creates file if missing.

Use 'rb' and 'wb' modes.

with open("image.jpg", "rb") as file:
    data = file.read()

with open("copy.jpg", "wb") as file:
    file.write(data)

 

Use with open() as it automatically closes the file after execution.

with open("example.txt", "r") as file:
    data = file.read()

 

  • seek(offset, whence): Moves file pointer to a position.
  • tell(): Returns the current position of the file pointer.

Example:

with open("example.txt", "r") as file:
    file.seek(10)  # Move to 10th byte
    print(file.tell())  # Prints current position

 

import os
os.remove("example.txt")

 

import os
os.rename("old.txt", "new.txt")

 

Use 'x' mode.

open("newfile.txt", "x").close()

 

  • Text mode ('t'): Reads/writes data as text (str).
  • Binary mode ('b'): Reads/writes data as bytes (bytes).

with open("example.txt", "r") as file:
    for line in file:
        print(line.strip())

 

with open("source.txt", "r") as src, open("destination.txt", "w") as dest:
    dest.write(src.read())

 

with open("example.txt", "r") as file:
    print(len(file.readlines()))

 

with open("example.txt", "r") as file:
    words = file.read().split()
    print(len(words))

 

Use try-except:

try:
    with open("missing.txt", "r") as file:
        data = file.read()
except FileNotFoundError:
    print("File not found!")

 

with open("example.txt", "r") as file:
    for line in reversed(file.readlines()):
        print(line.strip())

 

import os
print(os.path.getsize("example.txt"))

 

import csv

with open("data.csv", "w", newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Name", "Age"])
    writer.writerow(["Alice", 30])

 

The json module.

import json
data = {"name": "Alice", "age": 25}
with open("data.json", "w") as file:
    json.dump(data, file)

 

import logging
logging.basicConfig(filename="app.log", level=logging.INFO)
logging.info("This is a log message")

 

Instead of reading the entire file into memory, read it line by line using a generator:

def read_large_file(file_path):
    with open(file_path, "r") as file:
        for line in file:
            yield line.strip()  # Yield one line at a time

for line in read_large_file("largefile.txt"):
    print(line)

This approach saves memory when working with large files.

Use the encoding parameter in open().

with open("example.txt", "r", encoding="utf-8") as file:
    data = file.read()

If unsure about encoding, use chardet to detect it.

import chardet

with open("example.txt", "rb") as file:
    raw_data = file.read()
    encoding = chardet.detect(raw_data)["encoding"]

with open("example.txt", "r", encoding=encoding) as file:
    data = file.read()

 

import json

data = {"students": [{"name": "Rahul", "age": 25}, {"name": "Ankit", "age": 23}]}

with open("students.json", "w") as file:
    json.dump(data, file, indent=4)

The indent=4 makes the JSON human-readable.

import json

with open("students.json", "r") as file:
    data = json.load(file)

print(data)  # Dictionary output

 

Use the gzip module for compression.

import gzip

# Compress a file
with open("example.txt", "rb") as f_in, gzip.open("example.txt.gz", "wb") as f_out:
    f_out.writelines(f_in)

# Decompress a file
with gzip.open("example.txt.gz", "rb") as f_in, open("decompressed.txt", "wb") as f_out:
    f_out.writelines(f_in)

This reduces file size for storage or transfer.

Use thread locks to prevent race conditions.

import threading

lock = threading.Lock()

def write_to_file(data):
    with lock:
        with open("threadsafe.txt", "a") as file:
            file.write(data + "\n")

thread1 = threading.Thread(target=write_to_file, args=("Thread 1 Data",))
thread2 = threading.Thread(target=write_to_file, args=("Thread 2 Data",))

thread1.start()
thread2.start()
thread1.join()
thread2.join()

This ensures safe file writing in multi-threaded programs.

import pandas as pd

# Read CSV
df = pd.read_csv("data.csv")
print(df)

# Write CSV
df.to_csv("output.csv", index=False)

Pandas makes it easier to handle structured data.

import pandas as pd

# Read Excel
df = pd.read_excel("data.xlsx")

# Write Excel
df.to_excel("output.xlsx", index=False)

Requires openpyxl or xlrd library.

Use the watchdog module to detect file changes dynamically.

from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time

class Watcher(FileSystemEventHandler):
    def on_modified(self, event):
        print(f"File {event.src_path} has been modified.")

observer = Observer()
observer.schedule(Watcher(), path=".", recursive=False)
observer.start()

try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    observer.stop()
observer.join()

This is useful for log monitoring, real-time data processing, etc.

Use mmap to map a file to memory, allowing efficient random access.

import mmap

with open("example.txt", "r+b") as file:
    mm = mmap.mmap(file.fileno(), 0)
    print(mm.readline())  # Read first line
    mm.seek(0)  # Move back to start
    mm.write(b"Hello")  # Modify content
    mm.close()

This is faster than normal file I/O for large files.

Use psutil to check if a file is locked by another process.

import psutil

def is_file_locked(file_path):
    for proc in psutil.process_iter(["open_files"]):
        if proc.info["open_files"]:
            for file in proc.info["open_files"]:
                if file.path == file_path:
                    return True
    return False

print(is_file_locked("example.txt"))

This prevents conflicts when multiple processes are accessing a file.

Use try-except and validate file integrity using hashlib.

import hashlib

def get_file_hash(filename):
    hash_md5 = hashlib.md5()
    try:
        with open(filename, "rb") as file:
            for chunk in iter(lambda: file.read(4096), b""):
                hash_md5.update(chunk)
    except IOError:
        print("File corruption detected!")
        return None
    return hash_md5.hexdigest()

print(get_file_hash("data.txt"))

If the hash doesn't match a known checksum, the file may be corrupted.

Use file streaming with shutil to replace text in large files.

import fileinput

with fileinput.FileInput("largefile.txt", inplace=True, backup=".bak") as file:
    for line in file:
        print(line.replace("old_text", "new_text"), end="")

This modifies the file in-place without loading the whole file into memory.

Move the file to a temporary trash folder instead of deleting it permanently.

import shutil
import os

def safe_delete(file_path):
    trash_dir = "trash"
    os.makedirs(trash_dir, exist_ok=True)
    shutil.move(file_path, trash_dir)

safe_delete("important_file.txt")

This prevents accidental loss of critical files.

Use zipfile with a password.

import zipfile

with zipfile.ZipFile("secure.zip") as zf:
    zf.setpassword(b"mypassword")
    zf.extractall("output_dir")

This is useful for handling encrypted archives.

Use an infinite loop to monitor log updates.

import time

def tail_file(file_path):
    with open(file_path, "r") as file:
        file.seek(0, 2)  # Move to the end of file
        while True:
            line = file.readline()
            if line:
                print(line, end="")
            else:
                time.sleep(1)

tail_file("app.log")

This is helpful for real-time log monitoring.

Use encryption with cryptography.

from cryptography.fernet import Fernet

key = Fernet.generate_key()
cipher_suite = Fernet(key)

data = "Sensitive data"
encrypted_data = cipher_suite.encrypt(data.encode())

with open("secure_data.txt", "wb") as file:
    file.write(encrypted_data)

print("Data securely written to file.")

Always encrypt sensitive data before storing it.

Use csv.reader and DictReader.

import csv

with open("data.csv", "r") as file:
    reader = csv.DictReader(file)
    for row in reader:
        print(row["Column1"], row["Column3"])  # Extract specific columns

This is efficient for working with large CSVs.

Use ThreadPoolExecutor for parallel file reading.

from concurrent.futures import ThreadPoolExecutor

def read_file(file_path):
    with open(file_path, "r") as file:
        return file.readlines()

files = ["file1.txt", "file2.txt", "file3.txt"]

with ThreadPoolExecutor() as executor:
    results = executor.map(read_file, files)

for content in results:
    print(content)

This speeds up file reading for multiple large files.

Use hashlib to create a SHA-256 checksum.

import hashlib

def file_checksum(file_path):
    sha256 = hashlib.sha256()
    with open(file_path, "rb") as file:
        while chunk := file.read(4096):
            sha256.update(chunk)
    return sha256.hexdigest()

print(file_checksum("data.txt"))

This helps verify file integrity during transfers.

Problem:
You are building a log monitoring system that detects error messages in real time. Write a Python script that continuously monitors a log file (server.log) and prints an alert whenever the word "ERROR" appears.

Solution:

import time

def monitor_log(file_path):
    with open(file_path, "r") as file:
        file.seek(0, 2)  # Move to the end of the file
        while True:
            line = file.readline()
            if line:
                if "ERROR" in line:
                    print("ALERT: Error detected ->", line.strip())
            else:
                time.sleep(1)  # Avoid high CPU usage

monitor_log("server.log")

Use Case: Useful for real-time error detection in applications.

Problem:
Write a Python script to find and display the five largest files in a given directory, sorted by size.

Solution:

import os

def find_largest_files(directory):
    files = [(f, os.path.getsize(os.path.join(directory, f))) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    files.sort(key=lambda x: x[1], reverse=True)
    for file, size in files[:5]:
        print(f"{file}: {size} bytes")

find_largest_files("/path/to/directory")

Use Case: Useful for disk space analysis and cleanup.

Problem:
Write a script that encrypts a text file using AES encryption and later decrypts it.

Solution:

from cryptography.fernet import Fernet

# Generate and save key
key = Fernet.generate_key()
cipher = Fernet(key)

def encrypt_file(input_file, output_file):
    with open(input_file, "rb") as file:
        encrypted_data = cipher.encrypt(file.read())
    with open(output_file, "wb") as file:
        file.write(encrypted_data)

def decrypt_file(encrypted_file, output_file):
    with open(encrypted_file, "rb") as file:
        decrypted_data = cipher.decrypt(file.read())
    with open(output_file, "wb") as file:
        file.write(decrypted_data)

encrypt_file("data.txt", "data_encrypted.txt")
decrypt_file("data_encrypted.txt", "data_decrypted.txt")

Use Case: Protect sensitive user data before storing it.

Problem:
Extract only the "Name" and "Email" columns from a large CSV file and save them to a new file.

Solution:

import csv

def extract_columns(input_file, output_file, columns):
    with open(input_file, "r") as infile, open(output_file, "w", newline="") as outfile:
        reader = csv.DictReader(infile)
        writer = csv.DictWriter(outfile, fieldnames=columns)
        writer.writeheader()
        for row in reader:
            writer.writerow({col: row[col] for col in columns})

extract_columns("users.csv", "filtered_users.csv", ["Name", "Email"])

Use Case: Handling large datasets without memory overhead.

Problem:
You have multiple text files containing logs. Write a script to merge them into a single file efficiently.

Solution:

import glob

def merge_files(output_file, file_pattern):
    with open(output_file, "w") as outfile:
        for file_name in glob.glob(file_pattern):
            with open(file_name, "r") as infile:
                outfile.write(infile.read() + "\n")

merge_files("merged_logs.txt", "logs/*.txt")

Use Case: Useful for data aggregation and report generation.

Problem:
Find how many times a specific word appears in a large text file without loading the whole file into memory.

Solution:

def count_word_in_file(file_path, word):
    count = 0
    with open(file_path, "r") as file:
        for line in file:
            count += line.lower().count(word.lower())
    return count

print(count_word_in_file("large_text.txt", "error"))

Use Case: Efficient log analysis.

Problem:
Instead of permanently deleting a file, move it to a trash directory.

Solution:

import os
import shutil

def safe_delete(file_path, trash_folder="trash"):
    os.makedirs(trash_folder, exist_ok=True)
    shutil.move(file_path, trash_folder)

safe_delete("important_file.txt")

Use Case: Prevents accidental data loss.

Problem:
List the contents of a ZIP file and read a specific file inside it.

Solution:

import zipfile

def read_file_from_zip(zip_file, target_file):
    with zipfile.ZipFile(zip_file, "r") as z:
        with z.open(target_file) as f:
            print(f.read().decode())

read_file_from_zip("archive.zip", "document.txt")

Use Case: Read compressed archives without extraction.

Problem:
Write a script to read multiple large files in parallel using multithreading.

Solution:

from concurrent.futures import ThreadPoolExecutor

def read_file(file_path):
    with open(file_path, "r") as file:
        return file.readlines()

files = ["file1.txt", "file2.txt", "file3.txt"]

with ThreadPoolExecutor() as executor:
    results = executor.map(read_file, files)

for content in results:
    print(content)

Use Case: Increases performance when processing large logs.

Problem:
Verify the integrity of a file using SHA-256 checksum.

Solution:

import hashlib

def compute_checksum(file_path):
    sha256 = hashlib.sha256()
    with open(file_path, "rb") as file:
        while chunk := file.read(4096):
            sha256.update(chunk)
    return sha256.hexdigest()

print(compute_checksum("data.txt"))

Use Case: Ensures file integrity after download.

Share   Share