fgerlits commented on a change in pull request #1283:
URL: https://github.com/apache/nifi-minifi-cpp/pull/1283#discussion_r839533598



##########
File path: PROCESSORS.md
##########
@@ -1025,6 +1026,46 @@ In the list below, the names of required properties 
appear in bold. Any other pr
 |success|All files are routed to success|
 
 
+## ListFile
+
+### Description
+
+Retrieves a listing of files from the local filesystem. For each file that is 
listed, creates a FlowFile that represents the file so that it can be fetched 
in conjunction with FetchFile.
+### Properties
+
+In the list below, the names of required properties appear in bold. Any other 
properties (not in bold) are considered optional. The table also indicates any 
default values, and whether a property supports the NiFi Expression Language.
+
+| Name | Default Value | Allowable Values | Description |
+| - | - | - | - |
+|**Input Directory**|||The input directory from which files to pull files|
+|**Recurse Subdirectories**|true||Indicates whether to list files from 
subdirectories of the directory|
+|File Filter|||Only files whose names match the given regular expression will 
be picked up|
+|Path Filter|||When Recurse Subdirectories is true, then only subdirectories 
whose path matches the given regular expression will be scanned|
+|**Minimum File Age**|0 sec||The minimum age that a file must be in order to 
be pulled; any file younger than this amount of time (according to last 
modification date) will be ignored|
+|Maximum File Age|||The maximum age that a file must be in order to be pulled; 
any file older than this amount of time (according to last modification date) 
will be ignored|
+|**Minimum File Size**|0 B||The minimum size that a file must be in order to 
be pulled|
+|Maximum File Size|||The maximum size that a file can be in order to be pulled|
+|**Ignore Hidden Files**|true||Indicates whether or not hidden files should be 
ignored|
+### Relationships
+
+| Name | Description |
+| - | - |
+|success|All FlowFiles that are received are routed to success|
+
+### Output Attributes
+
+| Attribute                  | Relationship | Description                      
                                  |
+|----------------------------|--------------|--------------------------------------------------------------------|
+| _filename_                 | success      | The name of the file that was 
read from filesystem.                |
+| _path_                     | success      | The path is set to the relative 
path of the file's directory on filesystem compared to the Input Directory 
property. For example, if Input Directory is set to /tmp, then files picked up 
from /tmp will have the path attribute set to "/". If the Recurse 
Subdirectories property is set to true and a file is picked up from 
/tmp/abc/1/2/3, then the path attribute will be set to "abc/1/2/3/". |

Review comment:
       from the code below, it looks like the relative path of the current 
directory is ".", not "/"

##########
File path: libminifi/include/utils/file/FileUtils.h
##########
@@ -577,6 +616,146 @@ inline std::string get_file_content(const std::string 
&file_name) {
 bool contains(const std::filesystem::path& file_path, std::string_view 
text_to_search);
 
 
+inline std::optional<std::string> get_file_owner(const std::string& file_path) 
{
+#ifndef WIN32
+  struct stat info;
+  if (stat(file_path.c_str(), &info) != 0) {
+    return std::nullopt;
+  }
+
+  struct passwd pw;
+  pw.pw_name = 0;
+  struct passwd *result = nullptr;
+  char localbuf[1024] = {};
+  if (getpwuid_r(info.st_uid, &pw, localbuf, sizeof(localbuf), &result) != 0 
|| pw.pw_name == 0) {
+    return std::nullopt;
+  }
+
+  return std::string(pw.pw_name);
+#else
+  DWORD return_code = 0;
+  PSID sid_owner = NULL;
+  BOOL bool_return = TRUE;
+  LPTSTR account_name = NULL;
+  LPTSTR domain_name = NULL;
+  DWORD account_name_dword = 1;
+  DWORD domain_name_dword = 1;
+  SID_NAME_USE sid_type = SidTypeUnknown;
+  HANDLE file_handle;
+  PSECURITY_DESCRIPTOR sec_descriptor = NULL;
+
+  // Get the handle of the file object.
+  file_handle = CreateFile(
+    TEXT(file_path.c_str()),
+    GENERIC_READ,
+    FILE_SHARE_READ,
+    NULL,
+    OPEN_EXISTING,
+    FILE_ATTRIBUTE_NORMAL,
+    NULL);
+
+  // Check GetLastError for CreateFile error code.
+  if (file_handle == INVALID_HANDLE_VALUE) {
+    return std::nullopt;
+  }
+
+  // Get the owner SID of the file.
+  return_code = GetSecurityInfo(
+    file_handle,
+    SE_FILE_OBJECT,
+    OWNER_SECURITY_INFORMATION,
+    &sid_owner,
+    NULL,
+    NULL,
+    NULL,
+    &sec_descriptor);
+
+  // Check GetLastError for GetSecurityInfo error condition.
+  if (return_code != ERROR_SUCCESS) {
+    return std::nullopt;
+  }
+
+  // First call to LookupAccountSid to get the buffer sizes.
+  bool_return = LookupAccountSid(
+    NULL,
+    sid_owner,
+    account_name,
+    (LPDWORD)&account_name_dword,
+    domain_name,
+    (LPDWORD)&domain_name_dword,
+    &sid_type);
+
+  // Reallocate memory for the buffers.
+  account_name = (LPTSTR)GlobalAlloc(
+    GMEM_FIXED,
+    account_name_dword);
+
+  // Check GetLastError for GlobalAlloc error condition.
+  if (account_name == NULL) {
+    return std::nullopt;
+  }
+
+  domain_name = (LPTSTR)GlobalAlloc(
+    GMEM_FIXED,
+    domain_name_dword);
+
+  // Check GetLastError for GlobalAlloc error condition.
+  if (domain_name == NULL) {
+    GlobalFree(account_name);
+    return std::nullopt;
+  }
+
+  // Second call to LookupAccountSid to get the account name.
+  bool_return = LookupAccountSid(
+    NULL,                   // name of local or remote computer
+    sid_owner,              // security identifier
+    account_name,               // account name buffer
+    (LPDWORD)&account_name_dword,   // size of account name buffer
+    domain_name,             // domain name
+    (LPDWORD)&domain_name_dword,  // size of domain name buffer
+    &sid_type);                 // SID type
+
+  // Check GetLastError for LookupAccountSid error condition.
+  if (bool_return == FALSE) {
+    GlobalFree(account_name);
+    GlobalFree(domain_name);
+    return std::nullopt;
+  }
+
+  auto result = std::string(account_name);
+  GlobalFree(account_name);
+  GlobalFree(domain_name);
+  return result;
+#endif
+}
+
+#ifndef WIN32
+inline std::optional<std::string> get_file_group(const std::string& file_path) 
{
+  struct stat info;
+  if (stat(file_path.c_str(), &info) != 0) {
+    return std::nullopt;
+  }
+
+  struct group gr;
+  gr.gr_name = 0;
+  struct group *result = nullptr;
+  char localbuf[1024] = {};
+  if ((getgrgid_r(info.st_uid, &gr, localbuf, sizeof(localbuf), &result) != 0) 
|| gr.gr_name == 0) {
+    return std::nullopt;
+  }
+
+  return std::string(gr.gr_name);
+}
+#endif
+
+inline std::optional<std::string> get_relative_path(const std::string& path, 
const std::string& base_path) {
+  if (!utils::StringUtils::startsWith(path, base_path)) {
+    return std::nullopt;
+  }
+
+  return std::filesystem::relative(path, base_path);

Review comment:
       I don't think this will compile on Windows, as `filesystem::path` 
doesn't convert implicitly to `string` on Windows; you'll need to add 
`.string()`.

##########
File path: libminifi/include/utils/file/FileUtils.h
##########
@@ -577,6 +614,147 @@ inline std::string get_file_content(const std::string 
&file_name) {
 bool contains(const std::filesystem::path& file_path, std::string_view 
text_to_search);
 
 
+inline std::optional<std::string> get_file_owner(const std::string& file_path) 
{
+#ifndef WIN32
+  struct stat info;
+  if (stat(file_path.c_str(), &info) != 0) {
+    return std::nullopt;
+  }
+
+  struct passwd pw;
+  pw.pw_name = 0;
+  struct passwd *result = nullptr;
+  char localbuf[1024];
+  std::fill(localbuf, localbuf + sizeof(localbuf), 0);
+  if (getpwuid_r(info.st_uid, &pw, localbuf, sizeof(localbuf), &result) != 0 
|| pw.pw_name == 0) {
+    return std::nullopt;
+  }
+
+  return std::string(pw.pw_name);
+#else
+  DWORD return_code = 0;
+  PSID sid_owner = NULL;
+  BOOL bool_return = TRUE;
+  LPTSTR account_name = NULL;
+  LPTSTR domain_name = NULL;
+  DWORD account_name_dword = 1;
+  DWORD domain_name_dword = 1;
+  SID_NAME_USE sid_type = SidTypeUnknown;
+  HANDLE file_handle;
+  PSECURITY_DESCRIPTOR sec_descriptor = NULL;
+
+  // Get the handle of the file object.
+  file_handle = CreateFile(
+    TEXT(file_path.c_str()),
+    GENERIC_READ,
+    FILE_SHARE_READ,
+    NULL,
+    OPEN_EXISTING,
+    FILE_ATTRIBUTE_NORMAL,
+    NULL);
+
+  // Check GetLastError for CreateFile error code.
+  if (file_handle == INVALID_HANDLE_VALUE) {
+    return std::nullopt;
+  }
+
+  // Get the owner SID of the file.
+  return_code = GetSecurityInfo(
+    file_handle,
+    SE_FILE_OBJECT,
+    OWNER_SECURITY_INFORMATION,
+    &sid_owner,
+    NULL,
+    NULL,
+    NULL,
+    &sec_descriptor);
+
+  // Check GetLastError for GetSecurityInfo error condition.
+  if (return_code != ERROR_SUCCESS) {
+    return std::nullopt;
+  }
+
+  // First call to LookupAccountSid to get the buffer sizes.
+  bool_return = LookupAccountSid(
+    NULL,
+    sid_owner,
+    account_name,
+    (LPDWORD)&account_name_dword,
+    domain_name,
+    (LPDWORD)&domain_name_dword,
+    &sid_type);
+
+  // Reallocate memory for the buffers.
+  account_name = (LPTSTR)GlobalAlloc(
+    GMEM_FIXED,
+    account_name_dword);
+
+  // Check GetLastError for GlobalAlloc error condition.
+  if (account_name == NULL) {
+    return std::nullopt;
+  }
+
+  domain_name = (LPTSTR)GlobalAlloc(
+    GMEM_FIXED,
+    domain_name_dword);

Review comment:
       I think a pair of `gsl::finally()`s would do this in a better way




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to