mirror of
https://github.com/immich-app/immich.git
synced 2025-11-09 01:22:42 +09:00
feat(cli): use a queue for duplicate and upload (#10750)
* feat(cli): use a queue for duplicate and upload Using a queue to process the files makes the file duplicate detection and asset upload more stable and tolerant of network errors. If an error occurs, the whole command will not stop; the task will be retried (3 times) before logging the error and moving to the next step. The new queue abstraction is using [fastq](https://www.npmjs.com/package/fastq) internally. * chore(cli): queue.push return promise which resolve with task * test(cli): add spec for uploadFiles and checkForDuplicates
This commit is contained in:
@@ -16,6 +16,7 @@ import { chunk } from 'lodash-es';
|
||||
import { Stats, createReadStream } from 'node:fs';
|
||||
import { stat, unlink } from 'node:fs/promises';
|
||||
import path, { basename } from 'node:path';
|
||||
import { Queue } from 'src/queue';
|
||||
import { BaseOptions, authenticate, crawl, sha1 } from 'src/utils';
|
||||
|
||||
const s = (count: number) => (count === 1 ? '' : 's');
|
||||
@@ -83,7 +84,7 @@ const scan = async (pathsToCrawl: string[], options: UploadOptionsDto) => {
|
||||
return files;
|
||||
};
|
||||
|
||||
const checkForDuplicates = async (files: string[], { concurrency, skipHash }: UploadOptionsDto) => {
|
||||
export const checkForDuplicates = async (files: string[], { concurrency, skipHash }: UploadOptionsDto) => {
|
||||
if (skipHash) {
|
||||
console.log('Skipping hash check, assuming all files are new');
|
||||
return { newFiles: files, duplicates: [] };
|
||||
@@ -99,32 +100,50 @@ const checkForDuplicates = async (files: string[], { concurrency, skipHash }: Up
|
||||
const newFiles: string[] = [];
|
||||
const duplicates: Asset[] = [];
|
||||
|
||||
try {
|
||||
// TODO refactor into a queue
|
||||
for (const items of chunk(files, concurrency)) {
|
||||
const dto = await Promise.all(items.map(async (filepath) => ({ id: filepath, checksum: await sha1(filepath) })));
|
||||
const { results } = await checkBulkUpload({ assetBulkUploadCheckDto: { assets: dto } });
|
||||
|
||||
for (const { id: filepath, assetId, action } of results as AssetBulkUploadCheckResults) {
|
||||
const queue = new Queue<string[], AssetBulkUploadCheckResults>(
|
||||
async (filepaths: string[]) => {
|
||||
const dto = await Promise.all(
|
||||
filepaths.map(async (filepath) => ({ id: filepath, checksum: await sha1(filepath) })),
|
||||
);
|
||||
const response = await checkBulkUpload({ assetBulkUploadCheckDto: { assets: dto } });
|
||||
const results = response.results as AssetBulkUploadCheckResults;
|
||||
for (const { id: filepath, assetId, action } of results) {
|
||||
if (action === Action.Accept) {
|
||||
newFiles.push(filepath);
|
||||
} else {
|
||||
// rejects are always duplicates
|
||||
duplicates.push({ id: assetId as string, filepath });
|
||||
}
|
||||
progressBar.increment();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
progressBar.stop();
|
||||
progressBar.increment(filepaths.length);
|
||||
return results;
|
||||
},
|
||||
{ concurrency, retry: 3 },
|
||||
);
|
||||
|
||||
for (const items of chunk(files, concurrency)) {
|
||||
await queue.push(items);
|
||||
}
|
||||
|
||||
await queue.drained();
|
||||
|
||||
progressBar.stop();
|
||||
|
||||
console.log(`Found ${newFiles.length} new files and ${duplicates.length} duplicate${s(duplicates.length)}`);
|
||||
|
||||
// Report failures
|
||||
const failedTasks = queue.tasks.filter((task) => task.status === 'failed');
|
||||
if (failedTasks.length > 0) {
|
||||
console.log(`Failed to verify ${failedTasks.length} file${s(failedTasks.length)}:`);
|
||||
for (const task of failedTasks) {
|
||||
console.log(`- ${task.data} - ${task.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
return { newFiles, duplicates };
|
||||
};
|
||||
|
||||
const uploadFiles = async (files: string[], { dryRun, concurrency }: UploadOptionsDto): Promise<Asset[]> => {
|
||||
export const uploadFiles = async (files: string[], { dryRun, concurrency }: UploadOptionsDto): Promise<Asset[]> => {
|
||||
if (files.length === 0) {
|
||||
console.log('All assets were already uploaded, nothing to do.');
|
||||
return [];
|
||||
@@ -158,37 +177,52 @@ const uploadFiles = async (files: string[], { dryRun, concurrency }: UploadOptio
|
||||
|
||||
const newAssets: Asset[] = [];
|
||||
|
||||
try {
|
||||
for (const items of chunk(files, concurrency)) {
|
||||
await Promise.all(
|
||||
items.map(async (filepath) => {
|
||||
const stats = statsMap.get(filepath) as Stats;
|
||||
const response = await uploadFile(filepath, stats);
|
||||
const queue = new Queue<string, AssetMediaResponseDto>(
|
||||
async (filepath: string) => {
|
||||
const stats = statsMap.get(filepath);
|
||||
if (!stats) {
|
||||
throw new Error(`Stats not found for ${filepath}`);
|
||||
}
|
||||
|
||||
newAssets.push({ id: response.id, filepath });
|
||||
const response = await uploadFile(filepath, stats);
|
||||
newAssets.push({ id: response.id, filepath });
|
||||
if (response.status === AssetMediaStatus.Duplicate) {
|
||||
duplicateCount++;
|
||||
duplicateSize += stats.size ?? 0;
|
||||
} else {
|
||||
successCount++;
|
||||
successSize += stats.size ?? 0;
|
||||
}
|
||||
|
||||
if (response.status === AssetMediaStatus.Duplicate) {
|
||||
duplicateCount++;
|
||||
duplicateSize += stats.size ?? 0;
|
||||
} else {
|
||||
successCount++;
|
||||
successSize += stats.size ?? 0;
|
||||
}
|
||||
uploadProgress.update(successSize, { value_formatted: byteSize(successSize + duplicateSize) });
|
||||
|
||||
uploadProgress.update(successSize, { value_formatted: byteSize(successSize + duplicateSize) });
|
||||
return response;
|
||||
},
|
||||
{ concurrency, retry: 3 },
|
||||
);
|
||||
|
||||
return response;
|
||||
}),
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
uploadProgress.stop();
|
||||
for (const filepath of files) {
|
||||
await queue.push(filepath);
|
||||
}
|
||||
|
||||
await queue.drained();
|
||||
|
||||
uploadProgress.stop();
|
||||
|
||||
console.log(`Successfully uploaded ${successCount} new asset${s(successCount)} (${byteSize(successSize)})`);
|
||||
if (duplicateCount > 0) {
|
||||
console.log(`Skipped ${duplicateCount} duplicate asset${s(duplicateCount)} (${byteSize(duplicateSize)})`);
|
||||
}
|
||||
|
||||
// Report failures
|
||||
const failedTasks = queue.tasks.filter((task) => task.status === 'failed');
|
||||
if (failedTasks.length > 0) {
|
||||
console.log(`Failed to upload ${failedTasks.length} asset${s(failedTasks.length)}:`);
|
||||
for (const task of failedTasks) {
|
||||
console.log(`- ${task.data} - ${task.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
return newAssets;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user