File Handling in Python - Interview Questions and Answers
Python supports various file modes:
'r'
– Read (default mode)'w'
– Write (overwrites if file exists)'a'
– Append (adds data to the file)'x'
– Create (fails if file exists)'b'
– Binary mode (e.g.,'rb'
,'wb'
)'t'
– Text mode (default, e.g.,'rt'
,'wt'
)'+'
– Read and write (e.g.,'r+'
,'w+'
,'a+'
)
file = open("example.txt", "r") # Open file in read mode
file.close() # Close file
read(size)
: Readssize
characters (or whole file if not specified).readline()
: Reads a single line from the file.readlines()
: Reads all lines and returns a list.
Example:
with open("example.txt", "r") as file:
print(file.read(5)) # Reads first 5 characters
print(file.readline()) # Reads one line
print(file.readlines()) # Reads all lines into a list
with open("example.txt", "w") as file:
file.write("Hello, World!\n")
The file is overwritten, and previous content is lost.
Use 'a'
mode to append without overwriting.
with open("example.txt", "a") as file:
file.write("Appending new line.\n")
Use the os.path.exists()
function.
import os
if os.path.exists("example.txt"):
print("File exists")
else:
print("File not found")
'r+'
– Read & write, does not create file if missing.'w+'
– Read & write, overwrites existing file.'a+'
– Read & append, creates file if missing.
Use 'rb'
and 'wb'
modes.
with open("image.jpg", "rb") as file:
data = file.read()
with open("copy.jpg", "wb") as file:
file.write(data)
Use with open()
as it automatically closes the file after execution.
with open("example.txt", "r") as file:
data = file.read()
seek(offset, whence)
: Moves file pointer to a position.tell()
: Returns the current position of the file pointer.
Example:
with open("example.txt", "r") as file:
file.seek(10) # Move to 10th byte
print(file.tell()) # Prints current position
import os
os.remove("example.txt")
import os
os.rename("old.txt", "new.txt")
Use 'x'
mode.
open("newfile.txt", "x").close()
- Text mode (
't'
): Reads/writes data as text (str
). - Binary mode (
'b'
): Reads/writes data as bytes (bytes
).
with open("example.txt", "r") as file:
for line in file:
print(line.strip())
with open("source.txt", "r") as src, open("destination.txt", "w") as dest:
dest.write(src.read())
with open("example.txt", "r") as file:
print(len(file.readlines()))
with open("example.txt", "r") as file:
words = file.read().split()
print(len(words))
Use try-except
:
try:
with open("missing.txt", "r") as file:
data = file.read()
except FileNotFoundError:
print("File not found!")
with open("example.txt", "r") as file:
for line in reversed(file.readlines()):
print(line.strip())
import os
print(os.path.getsize("example.txt"))
import csv
with open("data.csv", "w", newline='') as file:
writer = csv.writer(file)
writer.writerow(["Name", "Age"])
writer.writerow(["Alice", 30])
The json
module.
import json
data = {"name": "Alice", "age": 25}
with open("data.json", "w") as file:
json.dump(data, file)
import logging
logging.basicConfig(filename="app.log", level=logging.INFO)
logging.info("This is a log message")
Instead of reading the entire file into memory, read it line by line using a generator:
def read_large_file(file_path):
with open(file_path, "r") as file:
for line in file:
yield line.strip() # Yield one line at a time
for line in read_large_file("largefile.txt"):
print(line)
This approach saves memory when working with large files.
Use the encoding
parameter in open()
.
with open("example.txt", "r", encoding="utf-8") as file:
data = file.read()
If unsure about encoding, use chardet
to detect it.
import chardet
with open("example.txt", "rb") as file:
raw_data = file.read()
encoding = chardet.detect(raw_data)["encoding"]
with open("example.txt", "r", encoding=encoding) as file:
data = file.read()
import json
data = {"students": [{"name": "Rahul", "age": 25}, {"name": "Ankit", "age": 23}]}
with open("students.json", "w") as file:
json.dump(data, file, indent=4)
The indent=4
makes the JSON human-readable.
import json
with open("students.json", "r") as file:
data = json.load(file)
print(data) # Dictionary output
Use the gzip
module for compression.
import gzip
# Compress a file
with open("example.txt", "rb") as f_in, gzip.open("example.txt.gz", "wb") as f_out:
f_out.writelines(f_in)
# Decompress a file
with gzip.open("example.txt.gz", "rb") as f_in, open("decompressed.txt", "wb") as f_out:
f_out.writelines(f_in)
This reduces file size for storage or transfer.
Use thread locks to prevent race conditions.
import threading
lock = threading.Lock()
def write_to_file(data):
with lock:
with open("threadsafe.txt", "a") as file:
file.write(data + "\n")
thread1 = threading.Thread(target=write_to_file, args=("Thread 1 Data",))
thread2 = threading.Thread(target=write_to_file, args=("Thread 2 Data",))
thread1.start()
thread2.start()
thread1.join()
thread2.join()
This ensures safe file writing in multi-threaded programs.
import pandas as pd
# Read CSV
df = pd.read_csv("data.csv")
print(df)
# Write CSV
df.to_csv("output.csv", index=False)
Pandas makes it easier to handle structured data.
import pandas as pd
# Read Excel
df = pd.read_excel("data.xlsx")
# Write Excel
df.to_excel("output.xlsx", index=False)
Requires openpyxl
or xlrd
library.
Use the watchdog
module to detect file changes dynamically.
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import time
class Watcher(FileSystemEventHandler):
def on_modified(self, event):
print(f"File {event.src_path} has been modified.")
observer = Observer()
observer.schedule(Watcher(), path=".", recursive=False)
observer.start()
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
This is useful for log monitoring, real-time data processing, etc.
Use mmap
to map a file to memory, allowing efficient random access.
import mmap
with open("example.txt", "r+b") as file:
mm = mmap.mmap(file.fileno(), 0)
print(mm.readline()) # Read first line
mm.seek(0) # Move back to start
mm.write(b"Hello") # Modify content
mm.close()
This is faster than normal file I/O for large files.
Use psutil
to check if a file is locked by another process.
import psutil
def is_file_locked(file_path):
for proc in psutil.process_iter(["open_files"]):
if proc.info["open_files"]:
for file in proc.info["open_files"]:
if file.path == file_path:
return True
return False
print(is_file_locked("example.txt"))
This prevents conflicts when multiple processes are accessing a file.
Use try-except and validate file integrity using hashlib
.
import hashlib
def get_file_hash(filename):
hash_md5 = hashlib.md5()
try:
with open(filename, "rb") as file:
for chunk in iter(lambda: file.read(4096), b""):
hash_md5.update(chunk)
except IOError:
print("File corruption detected!")
return None
return hash_md5.hexdigest()
print(get_file_hash("data.txt"))
If the hash doesn't match a known checksum, the file may be corrupted.
Use file streaming with shutil
to replace text in large files.
import fileinput
with fileinput.FileInput("largefile.txt", inplace=True, backup=".bak") as file:
for line in file:
print(line.replace("old_text", "new_text"), end="")
This modifies the file in-place without loading the whole file into memory.
Move the file to a temporary trash folder instead of deleting it permanently.
import shutil
import os
def safe_delete(file_path):
trash_dir = "trash"
os.makedirs(trash_dir, exist_ok=True)
shutil.move(file_path, trash_dir)
safe_delete("important_file.txt")
This prevents accidental loss of critical files.
Use zipfile
with a password.
import zipfile
with zipfile.ZipFile("secure.zip") as zf:
zf.setpassword(b"mypassword")
zf.extractall("output_dir")
This is useful for handling encrypted archives.
Use an infinite loop to monitor log updates.
import time
def tail_file(file_path):
with open(file_path, "r") as file:
file.seek(0, 2) # Move to the end of file
while True:
line = file.readline()
if line:
print(line, end="")
else:
time.sleep(1)
tail_file("app.log")
This is helpful for real-time log monitoring.
Use encryption with cryptography
.
from cryptography.fernet import Fernet
key = Fernet.generate_key()
cipher_suite = Fernet(key)
data = "Sensitive data"
encrypted_data = cipher_suite.encrypt(data.encode())
with open("secure_data.txt", "wb") as file:
file.write(encrypted_data)
print("Data securely written to file.")
Always encrypt sensitive data before storing it.
Use csv.reader
and DictReader
.
import csv
with open("data.csv", "r") as file:
reader = csv.DictReader(file)
for row in reader:
print(row["Column1"], row["Column3"]) # Extract specific columns
This is efficient for working with large CSVs.
Use ThreadPoolExecutor
for parallel file reading.
from concurrent.futures import ThreadPoolExecutor
def read_file(file_path):
with open(file_path, "r") as file:
return file.readlines()
files = ["file1.txt", "file2.txt", "file3.txt"]
with ThreadPoolExecutor() as executor:
results = executor.map(read_file, files)
for content in results:
print(content)
This speeds up file reading for multiple large files.
Use hashlib
to create a SHA-256 checksum.
import hashlib
def file_checksum(file_path):
sha256 = hashlib.sha256()
with open(file_path, "rb") as file:
while chunk := file.read(4096):
sha256.update(chunk)
return sha256.hexdigest()
print(file_checksum("data.txt"))
This helps verify file integrity during transfers.
Problem:
You are building a log monitoring system that detects error messages in real time. Write a Python script that continuously monitors a log file (server.log
) and prints an alert whenever the word "ERROR" appears.
Solution:
import time
def monitor_log(file_path):
with open(file_path, "r") as file:
file.seek(0, 2) # Move to the end of the file
while True:
line = file.readline()
if line:
if "ERROR" in line:
print("ALERT: Error detected ->", line.strip())
else:
time.sleep(1) # Avoid high CPU usage
monitor_log("server.log")
Use Case: Useful for real-time error detection in applications.
Problem:
Write a Python script to find and display the five largest files in a given directory, sorted by size.
Solution:
import os
def find_largest_files(directory):
files = [(f, os.path.getsize(os.path.join(directory, f))) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
files.sort(key=lambda x: x[1], reverse=True)
for file, size in files[:5]:
print(f"{file}: {size} bytes")
find_largest_files("/path/to/directory")
Use Case: Useful for disk space analysis and cleanup.
Problem:
Write a script that encrypts a text file using AES encryption and later decrypts it.
Solution:
from cryptography.fernet import Fernet
# Generate and save key
key = Fernet.generate_key()
cipher = Fernet(key)
def encrypt_file(input_file, output_file):
with open(input_file, "rb") as file:
encrypted_data = cipher.encrypt(file.read())
with open(output_file, "wb") as file:
file.write(encrypted_data)
def decrypt_file(encrypted_file, output_file):
with open(encrypted_file, "rb") as file:
decrypted_data = cipher.decrypt(file.read())
with open(output_file, "wb") as file:
file.write(decrypted_data)
encrypt_file("data.txt", "data_encrypted.txt")
decrypt_file("data_encrypted.txt", "data_decrypted.txt")
Use Case: Protect sensitive user data before storing it.
Problem:
Extract only the "Name" and "Email" columns from a large CSV file and save them to a new file.
Solution:
import csv
def extract_columns(input_file, output_file, columns):
with open(input_file, "r") as infile, open(output_file, "w", newline="") as outfile:
reader = csv.DictReader(infile)
writer = csv.DictWriter(outfile, fieldnames=columns)
writer.writeheader()
for row in reader:
writer.writerow({col: row[col] for col in columns})
extract_columns("users.csv", "filtered_users.csv", ["Name", "Email"])
Use Case: Handling large datasets without memory overhead.
Problem:
You have multiple text files containing logs. Write a script to merge them into a single file efficiently.
Solution:
import glob
def merge_files(output_file, file_pattern):
with open(output_file, "w") as outfile:
for file_name in glob.glob(file_pattern):
with open(file_name, "r") as infile:
outfile.write(infile.read() + "\n")
merge_files("merged_logs.txt", "logs/*.txt")
Use Case: Useful for data aggregation and report generation.
Problem:
Find how many times a specific word appears in a large text file without loading the whole file into memory.
Solution:
def count_word_in_file(file_path, word):
count = 0
with open(file_path, "r") as file:
for line in file:
count += line.lower().count(word.lower())
return count
print(count_word_in_file("large_text.txt", "error"))
Use Case: Efficient log analysis.
Problem:
Instead of permanently deleting a file, move it to a trash directory.
Solution:
import os
import shutil
def safe_delete(file_path, trash_folder="trash"):
os.makedirs(trash_folder, exist_ok=True)
shutil.move(file_path, trash_folder)
safe_delete("important_file.txt")
Use Case: Prevents accidental data loss.
Problem:
List the contents of a ZIP file and read a specific file inside it.
Solution:
import zipfile
def read_file_from_zip(zip_file, target_file):
with zipfile.ZipFile(zip_file, "r") as z:
with z.open(target_file) as f:
print(f.read().decode())
read_file_from_zip("archive.zip", "document.txt")
Use Case: Read compressed archives without extraction.
Problem:
Write a script to read multiple large files in parallel using multithreading.
Solution:
from concurrent.futures import ThreadPoolExecutor
def read_file(file_path):
with open(file_path, "r") as file:
return file.readlines()
files = ["file1.txt", "file2.txt", "file3.txt"]
with ThreadPoolExecutor() as executor:
results = executor.map(read_file, files)
for content in results:
print(content)
Use Case: Increases performance when processing large logs.
Problem:
Verify the integrity of a file using SHA-256 checksum.
Solution:
import hashlib
def compute_checksum(file_path):
sha256 = hashlib.sha256()
with open(file_path, "rb") as file:
while chunk := file.read(4096):
sha256.update(chunk)
return sha256.hexdigest()
print(compute_checksum("data.txt"))
Use Case: Ensures file integrity after download.
Tutorials
Random Blogs
- What is YII? and How to Install it?
- SQL Joins Explained: A Complete Guide with Examples
- Create Virtual Host for Nginx on Ubuntu (For Yii2 Basic & Advanced Templates)
- The Ultimate Guide to Starting a Career in Computer Vision
- Python Challenging Programming Exercises Part 3
- How AI is Making Humans Weaker – The Hidden Impact of Artificial Intelligence
- AI & Space Exploration – AI’s Role in Deep Space Missions and Planetary Research
- 10 Awesome Data Science Blogs To Check Out
- What Is SEO and Why Is It Important?
- 5 Ways Use Jupyter Notebook Online Free of Cost