Source code for dexray_insight.Utils.file_utils

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# #!/usr/bin/env python3
# # -*- coding: utf-8 -*-
#
# # Copyright (C) {{ year }} Dexray Insight Contributors
# #
# # This file is part of Dexray Insight - Android APK Security Analysis Tool
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# #     http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.

"""File utilities for APK analysis and processing.

This module provides utility functions for file operations, path manipulation,
hashing, and JSON serialization used throughout the Dexray Insight framework.
"""

# #!/usr/bin/env python3
# # -*- coding: utf-8 -*-
#
# # Copyright (C) {{ year }} Dexray Insight Contributors
# #
# # This file is part of Dexray Insight - Android APK Security Analysis Tool
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# #     http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.

import hashlib
import json
import os
import platform
import shutil
import sys
import zipfile
from datetime import datetime
from pathlib import Path


[docs] def backup_and_replace_with_template(original_file_path: str, template_cs_file: str) -> tuple[str, str]: """ Backs up the original file and replaces it with a template from the root directory. Args: original_file_path: Path to the original file (e.g., "/project/.../targetapk.csproj") template_cs_file: Name of template file in root directory (e.g., "template.csproj") Returns: tuple: (backup_path, new_file_path) Raises: FileNotFoundError: If original or template files are missing """ root_dir = Path.cwd() original_path = Path(original_file_path) template_path = Path(root_dir) / template_cs_file # Validate paths if not original_path.exists(): raise FileNotFoundError(f"Original file not found: {original_path}") if not template_path.exists(): raise FileNotFoundError(f"Template file not found: {template_path}") # Create backup (.bak) backup_path = original_path.with_name(original_path.name + ".bak") shutil.copy2(original_path, backup_path) # Replace original with template shutil.copy2(template_path, original_path) return str(backup_path), str(original_path)
[docs] def get_parent_directory(path: str) -> str: """ Return the parent directory of the given path. Example: Input: "/project/targetapk_2025-03-08_20-28-38_asam_results/targetapk_unzipped" Output: "/project/targetapk_2025-03-08_20-28-38_asam_results" """ return str(Path(path).resolve().parent)
[docs] def is_macos() -> bool: """Return True if running on macOS.""" return platform.system() == "Darwin"
[docs] def create_new_directory(dir_name: str) -> str: """Create an asam analysis directory (errors if exists).""" if os.path.exists(dir_name): raise FileExistsError(f"Directory already exists: {dir_name}") os.makedirs(dir_name) return os.path.abspath(dir_name)
[docs] def unzip_apk_with_skip(app_name: str, apk_path: str) -> tuple[str, list[str]]: """Unzip an APK while ignoring CRC errors, returns (destination_path, skipped_files).""" dest_dir = os.path.abspath(app_name) os.makedirs(dest_dir, exist_ok=True) skipped_files = [] try: with zipfile.ZipFile(apk_path, "r") as zip_ref: for file_info in zip_ref.infolist(): try: zip_ref.extract(file_info, dest_dir) except Exception as e: # Handle CRC errors across Python versions if "Bad CRC-32" in str(e) or ( # Python <3.3 message hasattr(zipfile, "BadCRCError") and isinstance(e, zipfile.BadCRCError) ): skipped_files.append(file_info.filename) else: skipped_files.append(f"{file_info.filename} ({str(e)})") return dest_dir, skipped_files except zipfile.BadZipFile as e: raise ValueError("Invalid APK structure (not a valid ZIP file)") from e except Exception as e: raise RuntimeError(f"Fatal unzip error: {str(e)}") from e
[docs] def unzip_apk(app_name: str, apk_path: str) -> str: """ Unzips an APK file into a folder named after the app. Args: app_name (str): Name for the destination folder apk_path (str): Path to the source APK file Returns: str: Path to the created directory with unzipped contents Raises: FileNotFoundError: If the APK file doesn't exist ValueError: If the APK file is invalid """ # Create destination directory dest_dir = os.path.abspath(app_name) os.makedirs(dest_dir, exist_ok=True) # Verify APK exists if not os.path.isfile(apk_path): raise FileNotFoundError(f"APK file not found: {apk_path}") try: # Unzip the APK print(f"TRying to unzip: {apk_path} --to--> {app_name}") with zipfile.ZipFile(apk_path, "r") as zip_ref: zip_ref.extractall(dest_dir) print(f"Unzipped APK to: {dest_dir}") return dest_dir except zipfile.BadZipFile as e: # Get low-level error details exc_type, exc_value, exc_traceback = sys.exc_info() print(f"ZIP Error Details: {exc_value}") # Often reveals the real issue raise ValueError(f"Invalid APK structure: {str(exc_value)}") from e except Exception as e: raise RuntimeError(f"Failed to unzip APK: {str(e)}")
[docs] def split_path_file_extension(file_path): """ Split a file path into directory path, filename without extension, and the extension. Args: file_path (str): The file path to split. Returns: tuple: A tuple containing (directory path, filename without extension, file extension). """ directory, filename = os.path.split(file_path) # Split path into directory and filename name, extension = os.path.splitext(filename) # Split filename into name and extension extension = extension.lstrip(".") # Remove the leading dot from the extension if len(directory) == 0: directory = "." return directory, name, extension
[docs] def calculate_file_hash(file_path, hash_func): """Calculate the hash of a file using the specified hash function.""" hash_obj = hash_func() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_obj.update(chunk) return hash_obj.hexdigest()
[docs] def calculate_md5_file_hash(filename): """Calculate MD5 hash of file.""" return calculate_file_hash(filename, hashlib.md5)
[docs] def calculate_sha1_file_hash(filename): """Calculate SHA1 hash of file.""" return calculate_file_hash(filename, hashlib.sha1)
[docs] def calculate_sha256_file_hash(filename): """Calculate SHA256 hash of file.""" return calculate_file_hash(filename, hashlib.sha256)
[docs] def calculate_sha512_file_hash(filename): """Calculate SHA512 hash of file.""" return calculate_file_hash(filename, hashlib.sha512)
# Custom encoder to handle non-serializable objects like datetime
[docs] class CustomJSONEncoder(json.JSONEncoder): """Custom JSON encoder for datetime and dataclass objects."""
[docs] def default(self, obj): """Override default serialization for special objects.""" if isinstance(obj, datetime): return obj.isoformat() # Convert datetime to ISO 8601 format string # Handle Enum objects if hasattr(obj, "value") and hasattr(obj.__class__, "__members__"): return obj.value # Handle dataclass objects that have a to_dict method if hasattr(obj, "to_dict") and callable(obj.to_dict): return obj.to_dict() # Handle other dataclass objects using dataclasses.asdict if hasattr(obj, "__dataclass_fields__"): from dataclasses import asdict return asdict(obj) return super().default(obj)
[docs] def dump_json(filename, data): """Dump data to JSON file with custom encoder.""" # Assuming `data` is your Python dictionary with open(filename, "w") as json_file: json.dump(data, json_file, cls=CustomJSONEncoder, indent=4)