From 1d9e1e966ac6aa3024b96a16e7b72b5b25215d93 Mon Sep 17 00:00:00 2001 From: mirkobrombin Date: Mon, 8 Jul 2024 08:15:57 +0200 Subject: [PATCH] feat: Allow directory copying during a dedup process --- cmd/cp.go | 15 ++++++++++---- cmd/dedup.go | 4 +++- pkg/processor/dedup.go | 45 +++++++++++++++++++++++++++++++----------- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/cmd/cp.go b/cmd/cp.go index 0ada74f..77911ab 100644 --- a/cmd/cp.go +++ b/cmd/cp.go @@ -13,13 +13,14 @@ import ( func NewCpCommand() *cobra.Command { cmd := &cobra.Command{ Use: "cp [source] [dest] [storage]", - Short: "Copy a file and deduplicate it in storage", + Short: "Copy a file or directory and deduplicate it in storage", Args: cobra.ExactArgs(3), Run: cpCommand, } cmd.Flags().BoolP("with-metadata", "m", false, "Include file metadata in hash calculation") cmd.Flags().BoolP("verbose", "v", false, "Verbose output") + cmd.Flags().BoolP("append", "a", false, "Append directory contents to destination (same as dedup -d)") return cmd } @@ -28,6 +29,7 @@ func cpCommand(cmd *cobra.Command, args []string) { source, dest, storagePath := args[0], args[1], args[2] withMetadata, _ := cmd.Flags().GetBool("with-metadata") verbose, _ := cmd.Flags().GetBool("verbose") + appendFlag, _ := cmd.Flags().GetBool("append") // Create storage storageOpts := storage.StorageOptions{ @@ -42,12 +44,17 @@ func cpCommand(cmd *cobra.Command, args []string) { // Create hash generator h := hash.NewSHA256Generator() - // Create processor - processor := processor.NewCpProcessor(source, dest, s, h) + // Create processor based on the append flag + var proc processor.Processor + if appendFlag { + proc = processor.NewDedupProcessor(source, dest, s, h, 10) + } else { + proc = processor.NewCpProcessor(source, dest, s, h) + } // Run the processor log.Printf("Copying %s to %s..", source, dest) - d := dabadee.NewDaBaDee(processor, verbose) + d := dabadee.NewDaBaDee(proc, verbose) if err := d.Run(); err != nil { log.Fatalf("Error during copy and link: %v", err) } diff --git a/cmd/dedup.go b/cmd/dedup.go index 7d02d49..4e6c6a6 100644 --- a/cmd/dedup.go +++ b/cmd/dedup.go @@ -24,6 +24,7 @@ func NewDedupCommand() *cobra.Command { cmd.Flags().BoolP("with-metadata", "m", false, "Include file metadata in hash calculation") cmd.Flags().BoolP("verbose", "v", false, "Verbose output") cmd.Flags().String("manifest-output", "", "Output manifest file to the given path") + cmd.Flags().String("dest", "", "Destination directory for copying deduplicated files") return cmd } @@ -33,6 +34,7 @@ func dedupCommand(cmd *cobra.Command, args []string) { withMetadata, _ := cmd.Flags().GetBool("with-metadata") verbose, _ := cmd.Flags().GetBool("verbose") outputManifest, _ := cmd.Flags().GetString("manifest-output") + destDir, _ := cmd.Flags().GetString("dest") workers, err := strconv.Atoi(workersStr) if err != nil { log.Fatalf("Invalid number of workers: %v", err) @@ -52,7 +54,7 @@ func dedupCommand(cmd *cobra.Command, args []string) { h := hash.NewSHA256Generator() // Create processor - processor := processor.NewDedupProcessor(source, s, h, workers) + processor := processor.NewDedupProcessor(source, destDir, s, h, workers) // Run the processor log.Printf("Deduplicating %s..", source) diff --git a/pkg/processor/dedup.go b/pkg/processor/dedup.go index 84d3476..f47b16e 100644 --- a/pkg/processor/dedup.go +++ b/pkg/processor/dedup.go @@ -23,6 +23,9 @@ type DedupProcessor struct { // Source is the path of the directory to deduplicate Source string + // DestDir is the path of the directory to copy deduplicated files to + DestDir string + // Storage is the storage interface to use Storage *storage.Storage @@ -40,9 +43,10 @@ type DedupProcessor struct { } // NewDedupProcessor creates a new DedupProcessor -func NewDedupProcessor(source string, storage *storage.Storage, hashGen hash.Generator, workers int) *DedupProcessor { +func NewDedupProcessor(source, destDir string, storage *storage.Storage, hashGen hash.Generator, workers int) *DedupProcessor { return &DedupProcessor{ Source: source, + DestDir: destDir, Storage: storage, HashGen: hashGen, Workers: workers, @@ -50,9 +54,9 @@ func NewDedupProcessor(source string, storage *storage.Storage, hashGen hash.Gen } } -// startProcessing marks the given hash as processing and returns a channel to +// dedupStartProcessing marks the given hash as processing and returns a channel to // wait on if the hash is already being processed -func startProcessing(hash string) (alreadyProcessing bool, waitChan chan struct{}) { +func dedupStartProcessing(hash string) (alreadyProcessing bool, waitChan chan struct{}) { globalLock.Lock() defer globalLock.Unlock() @@ -72,9 +76,9 @@ func startProcessing(hash string) (alreadyProcessing bool, waitChan chan struct{ return false, nil } -// finishProcessing marks the given hash as no longer processing and closes the +// dedupFinishProcessing marks the given hash as no longer processing and closes the // channel to signal that the processing has finished -func finishProcessing(hash string) { +func dedupFinishProcessing(hash string) { globalLock.Lock() defer globalLock.Unlock() @@ -142,7 +146,7 @@ func (p *DedupProcessor) processFile(path string) (err error) { } // Check if the file is already being processed - alreadyProcessing, waitChan := startProcessing(finalHash) + alreadyProcessing, waitChan := dedupStartProcessing(finalHash) if alreadyProcessing { <-waitChan // Wait for the processing to finish } @@ -151,7 +155,7 @@ func (p *DedupProcessor) processFile(path string) (err error) { dedupPath := filepath.Join(p.Storage.Opts.Root, finalHash) exists, err := p.Storage.FileExists(dedupPath) if err != nil { - finishProcessing(finalHash) + dedupFinishProcessing(finalHash) return fmt.Errorf("checking file existence in storage: %w", err) } @@ -159,14 +163,14 @@ func (p *DedupProcessor) processFile(path string) (err error) { // If the file does not exist in storage, move it there err = p.Storage.MoveFileToStorage(path, finalHash) if err != nil { - finishProcessing(finalHash) + dedupFinishProcessing(finalHash) return fmt.Errorf("moving file to storage: %w", err) } } else { // If the file already exists in storage, remove the source file err = os.Remove(path) if err != nil { - finishProcessing(finalHash) + dedupFinishProcessing(finalHash) return fmt.Errorf("removing source file: %w", err) } } @@ -176,14 +180,33 @@ func (p *DedupProcessor) processFile(path string) (err error) { p.FileMap[path] = finalHash p.mapMutex.Unlock() + // Create a link at the original location if _, err := os.Lstat(path); os.IsNotExist(err) { err = os.Link(dedupPath, path) if err != nil { - finishProcessing(finalHash) + dedupFinishProcessing(finalHash) return fmt.Errorf("creating link to deduplicated file: %w", err) } } - finishProcessing(finalHash) + // Create a link at the destination if DestDir is set + if p.DestDir != "" { + relativePath, err := filepath.Rel(p.Source, path) + if err != nil { + dedupFinishProcessing(finalHash) + return fmt.Errorf("getting relative path: %w", err) + } + + destPath := filepath.Join(p.DestDir, relativePath) + if _, err := os.Lstat(destPath); os.IsNotExist(err) { + err = os.Link(dedupPath, destPath) + if err != nil { + dedupFinishProcessing(finalHash) + return fmt.Errorf("creating link to deduplicated file in destination: %w", err) + } + } + } + + dedupFinishProcessing(finalHash) return nil }