Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package fdupes for openSUSE:Factory checked in at 2022-03-17 17:01:14 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/fdupes (Old) and /work/SRC/openSUSE:Factory/.fdupes.new.25692 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "fdupes" Thu Mar 17 17:01:14 2022 rev:31 rq:961812 version:2.1.2 Changes: -------- --- /work/SRC/openSUSE:Factory/fdupes/fdupes.changes 2020-08-23 09:18:35.826599271 +0200 +++ /work/SRC/openSUSE:Factory/.fdupes.new.25692/fdupes.changes 2022-03-17 17:01:21.905679238 +0100 @@ -1,0 +2,21 @@ +Tue Mar 15 07:41:35 UTC 2022 - Stephan Kulow <[email protected]> + +- A more correct approach to creating symlinks (old bug actually): + Do not link the files as given by fdupes, but turn them into + relative links (it works by chance if given a buildroot, but + fails if running on a subdirectory) +- Support multiple directories given (as glob to the macro) + +------------------------------------------------------------------- +Mon Mar 14 13:44:54 UTC 2022 - Stephan Kulow <[email protected]> + +- Handle symlinks (-s argument) correctly + +------------------------------------------------------------------- +Sat Mar 12 08:17:37 UTC 2022 - Stephan Kulow <[email protected]> + +- Simplify macros.fdupes with a call to a C++ program that does + the same within a fraction of a second what the shell loop did + in many seconds (bsc#1195709) + +------------------------------------------------------------------- New: ---- fdupes_wrapper.cpp ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ fdupes.spec ++++++ --- /var/tmp/diff_new_pack.K5gcgh/_old 2022-03-17 17:01:22.377679598 +0100 +++ /var/tmp/diff_new_pack.K5gcgh/_new 2022-03-17 17:01:22.377679598 +0100 @@ -1,7 +1,7 @@ # # spec file for package fdupes # -# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2022 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -12,9 +12,10 @@ # license that conforms to the Open Source Definition (Version 1.9) # published by the Open Source Initiative. -# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# Please submit bugfixes or comments via https://bugs.opensuse.org/ # + %{?!_rpmmacrodir:%define _rpmmacrodir /usr/lib/rpm/macros.d} Name: fdupes @@ -23,9 +24,11 @@ Summary: Tool to identify or delete duplicate files License: MIT Group: Productivity/Archiving/Compression -Url: https://github.com/adrianlopezroche/fdupes +URL: https://github.com/adrianlopezroche/fdupes Source0: https://github.com/adrianlopezroche/fdupes/releases/download/v%{version}/fdupes-%{version}.tar.gz Source1: macros.fdupes +Source2: fdupes_wrapper.cpp +BuildRequires: gcc-c++ %description FDUPES is a program for identifying or deleting duplicate files @@ -37,10 +40,12 @@ %build %configure --without-ncurses %make_build +g++ $RPM_OPT_FLAGS %{S:2} -o fdupes_wrapper %install %make_install install -D -m644 %{SOURCE1} %{buildroot}%{_rpmmacrodir}/macros.%{name} +install -D -m755 fdupes_wrapper %{buildroot}/usr/lib/rpm/fdupes_wrapper %check ./%{name} testdir @@ -53,5 +58,6 @@ %{_bindir}/%{name} %{_mandir}/man1/%{name}.1* %{_rpmmacrodir}/macros.%{name} +/usr/lib/rpm/fdupes_wrapper %changelog ++++++ fdupes_wrapper.cpp ++++++ /* * A little helper to wrap around fdupes and create hard/soft links of the * dups found. Used in openSUSE rpm. * * Copyright 2022 Jiri Slaby <[email protected]> * 2022 Stephan Kulow <[email protected]> * * SPDX-License-Identifier: MIT */ #include <algorithm> #include <array> #include <iostream> #include <list> #include <map> #include <string> #include <sys/param.h> #include <sys/stat.h> #include <tuple> #include <unistd.h> #include <utility> #include <vector> #include <sstream> using namespace std; typedef std::map<ino_t, std::vector<std::string>> dups_map; typedef std::pair<ino_t, size_t> nlink_pair; vector<string> split_paths(const string& path) { string token; vector<string> paths; stringstream ss(path); while (getline(ss, token, '/')) { if (token == "..") { paths.pop_back(); } else if (token != "." || ss.eof()) { paths.push_back(token); } } return paths; } string merge_paths(vector<string> paths) { string path; for (const auto& s : paths) { if (s.empty()) continue; if (!path.empty()) path += "/"; path += s; } return path; } string relative(const string& p1, const string& p2) { vector<string> paths1 = split_paths(p1); paths1.pop_back(); vector<string> paths2 = split_paths(p2); vector<string> paths; vector<string>::const_iterator it1 = paths1.begin(); vector<string>::const_iterator it2 = paths2.begin(); // first remove the common parts while (it1 != paths1.end() && *it1 == *it2) { it1++; it2++; } for (; it1 != paths1.end(); ++it1) { paths.push_back(".."); } for (; it2 != paths2.end(); ++it2) { paths.push_back(*it2); } return merge_paths(paths); } bool cmp_nlink(const nlink_pair& a, const nlink_pair& b) { return a.second > b.second; } void sort_by_count(const dups_map& in, std::vector<ino_t>& out) { out.clear(); std::list<nlink_pair> nlinks; for (auto it = in.cbegin(); it != in.cend(); ++it) { nlinks.push_back(std::make_pair(it->first, it->second.size())); } nlinks.sort(cmp_nlink); for (auto it = nlinks.cbegin(); it != nlinks.cend(); ++it) { out.push_back(it->first); } } void link_file(const std::string& file, const std::string& target, bool symlink) { std::cout << "Linking " << file << " -> " << target << std::endl; if (unlink(file.c_str())) { std::cerr << "Removing '" << file << "' failed." << std::endl; exit(1); } int ret; if (symlink) { ret = ::symlink(target.c_str(), file.c_str()); } else { ret = link(target.c_str(), file.c_str()); } if (ret) { std::cerr << "Linking '" << file << "' failed." << std::endl; exit(1); } } std::string target_for_link(string target, const std::string &file, bool symlink) { if (!symlink) // hardlinks don't care return target; return relative(file, target); } void handle_dups(const dups_map& dups, const std::string& buildroot, bool symlink) { // all are hardlinks to the same data if (dups.size() < 2) return; std::vector<ino_t> sorted; sort_by_count(dups, sorted); auto inodes = sorted.begin(); std::string target = dups.at(*inodes).front(); for (++inodes; inodes != sorted.end(); ++inodes) { const std::vector<std::string> files = dups.at(*inodes); for (auto it = files.begin(); it != files.end(); ++it) { link_file(*it, target_for_link(target, *it, symlink), symlink); } } } int main(int argc, char** argv) { bool symlink = false; std::vector<std::string> roots; std::string buildroot; while (1) { int result = getopt(argc, argv, "sb:"); if (result == -1) break; /* end of list */ switch (result) { case 's': symlink = true; break; default: /* unknown */ break; } } while (optind < argc) { std::string root = argv[optind++]; if (root.front() != '/') { char buffer[PATH_MAX]; root = std::string(getcwd(buffer, PATH_MAX)) + '/' + root; } roots.push_back(root); } if (roots.empty()) { std::cerr << "Missing directory argument."; } /* fdupes options used: -q: hide progress indicator -p: don't consider files with different owner/group or permission bits as duplicates -n: exclude zero-length files from consideration -o name: output order of duplicates -r: follow subdirectories -H: also report hard links as duplicates */ std::string command = "fdupes -q -p -r -n -o name"; if (!symlink) { /* if we create symlinks, avoid looking at hard links being duplicated. This way fdupes is faster and won't break them up anyway */ command += " -H"; } for (auto it = roots.begin(); it != roots.end(); ++it) { command += " '" + *it + "'"; } FILE* pipe = popen(command.c_str(), "r"); if (!pipe) { throw std::runtime_error("popen() failed!"); } std::array<char, MAXPATHLEN> buffer; dups_map dups; while (fgets(buffer.data(), buffer.size(), pipe) != nullptr) { std::string line = buffer.data(); if (line.length() < 2) { handle_dups(dups, buildroot, symlink); dups.clear(); continue; } if (line.back() != '\n') { std::cerr << "Too long lines? '" << line << "'" << std::endl; return 1; } line.pop_back(); struct stat sb; if (stat(line.c_str(), &sb)) { std::cerr << "Stat on '" << buffer.data() << "' failed" << std::endl; return 1; } dups[sb.st_ino].push_back(line); } pclose(pipe); return 0; } ++++++ macros.fdupes ++++++ --- /var/tmp/diff_new_pack.K5gcgh/_old 2022-03-17 17:01:22.437679645 +0100 +++ /var/tmp/diff_new_pack.K5gcgh/_new 2022-03-17 17:01:22.441679648 +0100 @@ -1,22 +1,2 @@ -%fdupes(s) \ - _target=""; \ - _symlinks=0; \ - %{-s:_symlinks=1;} \ - fdupes -q -p -n -H -o name -r %1 | \ - while read _file; do \ - if test -z "$_target" ; then \ - _target="$_file"; \ - else \ - if test -z "$_file" ; then \ - _target=""; \ - continue ; \ - fi ; \ - if test "$_symlinks" = 1; then \ - ln -sf "${_target#%{buildroot}}" "$_file"; \ - else \ - ln -f "$_target" "$_file"; \ - fi ;\ - fi ; \ - done \ -%{nil} +%fdupes /usr/lib/rpm/fdupes_wrapper
