Pages

marți, 25 martie 2025

News : Google Apps Script - find duplicate files in google drive.

For today, a simple GAScript source code to add into spreadsheet the duplicate files from Google drive.
This Google Apps Script finds duplicate files in Google Drive by comparing file sizes and optionally file types. It then displays the results in a spreadsheet with detailed information about each duplicate file. The script collects information about all files in Drive. Files are grouped by their size and optionally file type. Any group with more than one file is considered a set of duplicates These duplicate sets are displayed in the spreadsheet
Functions
  • checkDuplicatesInDrive(): Main function that searches your entire Google Drive for duplicates
  • checkDuplicatesInFolder(): Alternative function that searches a specific folder and its subfolders
  • findDuplicateFiles(): Core function that identifies duplicate files based on size and type
  • addDuplicatesToSheet(): Adds the found duplicates to a spreadsheet
I used artificial inteligence and this help me much ...
/**
 * Main function to check files in the root folder and add duplicates to the active spreadsheet
 */
function checkDuplicatesInDrive() {
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  
  // Clear the sheet and set up headers
  sheet.clear();
  sheet.appendRow(["Group", "File Name", "Size", "Type", "File Path", "Date Created", "Last Updated", "URL"]);
  sheet.getRange(1, 1, 1, 8).setFontWeight("bold").setBackground("#f3f3f3");
  
  // Find all duplicates
  const duplicates = findDuplicateFiles(true);
  
  // Check if any duplicates were found
  if (Object.keys(duplicates).length === 0) {
    sheet.appendRow(["No duplicate files found"]);
    sheet.autoResizeColumns(1, 8);
    return;
  }
  
  // Add duplicates to the sheet
  addDuplicatesToSheet(duplicates, sheet);
  
  // Auto-resize columns
  sheet.autoResizeColumns(1, 8);
}

/**
 * Adds duplicate files to the specified sheet
 */
function addDuplicatesToSheet(duplicates, sheet) {
  let groupNumber = 1;
  let rowIndex = 2;
  let totalDuplicateFiles = 0;
  
  for (const key in duplicates) {
    const files = duplicates[key];
    totalDuplicateFiles += files.length;
    
    files.forEach((file, index) => {
      // Get file path
      const filePath = getFilePath(file.id);
      
      sheet.appendRow([
        groupNumber,
        file.name,
        formatFileSize(file.size),
        file.mimeType,
        filePath,
        file.dateCreated.toLocaleString(),
        file.lastUpdated.toLocaleString(),
        file.url
      ]);
      
      // Add hyperlink to the file URL
      sheet.getRange(rowIndex, 8).setFormula(`=HYPERLINK("${file.url}","Open File")`);
      
      rowIndex++;
    });
    
    groupNumber++;
  }
  
  // Add summary at the bottom - only if we have duplicates
  if (totalDuplicateFiles > 0) {
    sheet.appendRow(["SUMMARY"]);
    sheet.appendRow([`Found ${totalDuplicateFiles} duplicate files in ${groupNumber - 1} groups.`]);
  }
  
  return totalDuplicateFiles;
}

/**
 * Gets the file path for a given file ID
 */
function getFilePath(fileId) {
  try {
    const file = DriveApp.getFileById(fileId);
    const parents = file.getParents();
    
    if (parents.hasNext()) {
      const parent = parents.next();
      return getFolderPath(parent) + "/" + file.getName();
    } else {
      return "/" + file.getName();
    }
  } catch (e) {
    return "Path not available";
  }
}

/**
 * Gets the folder path for a given folder
 */
function getFolderPath(folder) {
  try {
    const parents = folder.getParents();
    
    if (!parents.hasNext()) {
      return "/" + folder.getName();
    }
    
    const parent = parents.next();
    return getFolderPath(parent) + "/" + folder.getName();
  } catch (e) {
    return "/Unknown";
  }
}

/**
 * Finds duplicate files in Google Drive based on file size and optionally file type.
 * @param {boolean} considerFileType Whether to consider file type when finding duplicates (default: true)
 * @param {string} folderId Optional folder ID to search in. If not provided, searches in the entire Drive.
 * @return {Object} An object containing groups of duplicate files
 */
function findDuplicateFiles(considerFileType = true, folderId = null) {
  // Create a map to store files by their size (and optionally type)
  const fileMap = {};
  
  // Get files to check
  let files;
  if (folderId) {
    const folder = DriveApp.getFolderById(folderId);
    files = folder.getFiles();
  } else {
    files = DriveApp.getFiles();
  }
  
  // Process each file
  while (files.hasNext()) {
    const file = files.next();
    // Skip Google Docs, Sheets, etc. as they don't have a fixed size
    if (file.getSize() === 0) continue;
    
    const fileSize = file.getSize();
    const mimeType = file.getMimeType();
    
    // Create a key based on file size and optionally type
    let key = fileSize.toString();
    if (considerFileType) {
      key += '_' + mimeType;
    }
    
    // Add file to the map
    if (!fileMap[key]) {
      fileMap[key] = [];
    }
    
    fileMap[key].push({
      id: file.getId(),
      name: file.getName(),
      size: fileSize,
      mimeType: mimeType,
      url: file.getUrl(),
      dateCreated: file.getDateCreated(),
      lastUpdated: file.getLastUpdated()
    });
  }
  
  // Filter out unique files (groups with only one file)
  const duplicates = {};
  for (const key in fileMap) {
    if (fileMap[key].length > 1) {
      duplicates[key] = fileMap[key];
    }
  }
  
  return duplicates;
}

/**
 * Alternative function to check files in a specific folder and its subfolders
 */
function checkDuplicatesInFolder() {
  var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet();
  
  // Clear the sheet and set up headers
  sheet.clear();
  sheet.appendRow(["Group", "File Name", "Size", "Type", "File Path", "Date Created", "Last Updated", "URL"]);
  sheet.getRange(1, 1, 1, 8).setFontWeight("bold").setBackground("#f3f3f3");
  
  // Collect all files from the folder and subfolders
  var fileMap = {};
  var rootFolder = DriveApp.getRootFolder(); // Change this to your specific folder if needed
  collectFilesFromFolder(rootFolder, fileMap);
  
  // Filter out unique files
  const duplicates = {};
  for (const key in fileMap) {
    if (fileMap[key].length > 1) {
      duplicates[key] = fileMap[key];
    }
  }
  
  // Check if any duplicates were found
  if (Object.keys(duplicates).length === 0) {
    sheet.appendRow(["No duplicate files found"]);
    sheet.autoResizeColumns(1, 8);
    return;
  }
  
  // Add duplicates to the sheet
  addDuplicatesToSheet(duplicates, sheet);
  
  // Auto-resize columns
  sheet.autoResizeColumns(1, 8);
}

/**
 * Recursively collects files from a folder and its subfolders
 */
function collectFilesFromFolder(folder, fileMap, considerFileType = true) {
  // Process files in this folder
  var files = folder.getFiles();
  while (files.hasNext()) {
    const file = files.next();
    // Skip Google Docs, Sheets, etc. as they don't have a fixed size
    if (file.getSize() === 0) continue;
    
    const fileSize = file.getSize();
    const mimeType = file.getMimeType();
    
    // Create a key based on file size and optionally type
    let key = fileSize.toString();
    if (considerFileType) {
      key += '_' + mimeType;
    }
    
    // Add file to the map
    if (!fileMap[key]) {
      fileMap[key] = [];
    }
    
    fileMap[key].push({
      id: file.getId(),
      name: file.getName(),
      size: fileSize,
      mimeType: mimeType,
      url: file.getUrl(),
      dateCreated: file.getDateCreated(),
      lastUpdated: file.getLastUpdated()
    });
  }
  
  // Process subfolders
  var subfolders = folder.getFolders();
  while (subfolders.hasNext()) {
    var subfolder = subfolders.next();
    collectFilesFromFolder(subfolder, fileMap, considerFileType);
  }
}

/**
 * Helper function to format file size in a human-readable format
 */
function formatFileSize(bytes) {
  if (bytes < 1024) return bytes + " bytes";
  else if (bytes < 1048576) return (bytes / 1024).toFixed(2) + " KB";
  else if (bytes < 1073741824) return (bytes / 1048576).toFixed(2) + " MB";
  else return (bytes / 1073741824).toFixed(2) + " GB";
}

/**
 * Creates a new Google Spreadsheet with the duplicate files report
 */
function createDuplicateFilesSpreadsheet() {
  const duplicates = findDuplicateFiles(true);
  
  // Create a new spreadsheet
  const ss = SpreadsheetApp.create("Duplicate Files Report - " + new Date().toLocaleString());
  const sheet = ss.getActiveSheet();
  
  // Set up headers
  sheet.appendRow(["Group", "File Name", "Size", "Type", "File Path", "Date Created", "Last Updated", "URL"]);
  
  // Format header row
  sheet.getRange(1, 1, 1, 8).setFontWeight("bold").setBackground("#f3f3f3");
  
  // Check if any duplicates were found
  if (Object.keys(duplicates).length === 0) {
    sheet.appendRow(["No duplicate files found"]);
    sheet.autoResizeColumns(1, 8);
    return ss.getUrl();
  }
  
  // Add duplicates to the sheet
  addDuplicatesToSheet(duplicates, sheet);
  
  // Auto-resize columns
  sheet.autoResizeColumns(1, 8);
  
  Logger.log(`Spreadsheet created: ${ss.getUrl()}`);
  return ss.getUrl();
}
See the result into the spreadsheet: