nastra commented on code in PR #3:
URL: https://github.com/apache/iceberg-go/pull/3#discussion_r1325910141


##########
io/io.go:
##########
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package io
+
+import (
+       "context"
+       "errors"
+       "fmt"
+       "io"
+       "io/fs"
+       "net/http"
+       "net/url"
+       "os"
+       "strings"
+
+       "github.com/aws/aws-sdk-go-v2/aws"
+       awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http"
+       "github.com/aws/aws-sdk-go-v2/config"
+       "github.com/aws/aws-sdk-go-v2/credentials"
+       "github.com/aws/aws-sdk-go-v2/service/s3"
+       "github.com/wolfeidau/s3iofs"
+)
+
+const (
+       S3RegionName      = "s3.region-name"
+       S3SessionToken    = "s3.session-token"
+       S3SecretAccessKey = "s3.secret-access-key"
+       S3AccessKeyID     = "s3.access-key-id"
+       S3EndpointURL     = "s3.region"
+       S3ProxyURI        = "s3.proxy-uri"
+)
+
+// IO is an interface to a hierarchical file system.
+//
+// The IO interface is the minimum implementation required for a file
+// system to utilize an iceberg table. A file system may implement
+// additional interfaces, such as ReadFileIO, to provide additional or
+// optimized functionality.
+type IO interface {
+       // Open opens the named file.
+       //
+       // When Open returns an error, it should be of type *PathError
+       // with the Op field set to "open", the Path field set to name,
+       // and the Err field describing the problem.
+       //
+       // Open should reject attempts to open names that do not satisfy
+       // fs.ValidPath(name), returning a *PathError with Err set to
+       // ErrInvalid or ErrNotExist.
+       Open(name string) (File, error)
+
+       // Remove removes the named file or (empty) directory.
+       //
+       // If there is an error, it will be of type *PathError.
+       Remove(name string) error
+}
+
+// ReadFileIO is the interface implemented by a file system that
+// provides an optimized implementation of ReadFile.
+type ReadFileIO interface {
+       IO
+
+       // ReadFile reads the named file and returns its contents.
+       // A successful call returns a nil error, not io.EOF.
+       // (Because ReadFile reads the whole file, the expected EOF
+       // from the final Read is not treated as an error to be reported.)
+       //
+       // The caller is permitted to modify the returned byte slice.
+       // This method should return a copy of the underlying data.
+       ReadFile(name string) ([]byte, error)
+}
+
+// A File provides access to a single file. The File interface is the
+// minimum implementation required for Iceberg to interact with a file.
+// Directory files should also implement
+type File interface {
+       fs.File
+       io.ReadSeekCloser
+       io.ReaderAt
+}
+
+// A ReadDirFile is a directory file whose entries can be read with the
+// ReadDir method. Every directory file should implement this interface.
+// (It is permissible for any file to implement this interface, but
+// if so ReadDir should return an error for non-directories.)
+type ReadDirFile interface {
+       File
+
+       // ReadDir read the contents of the directory and returns a slice
+       // of up to n DirEntry values in directory order. Subsequent calls
+       // on the same file will yield further DirEntry values.
+       //
+       // If n > 0, ReadDir returns at most n DirEntry structures. In this
+       // case, if ReadDir returns an empty slice, it will return a non-nil
+       // error explaining why.
+       //
+       // At the end of a directory, the error is io.EOF. (ReadDir must return
+       // io.EOF itself, not an error wrapping io.EOF.)
+       //
+       // If n <= 0, ReadDir returns all the DirEntry values from the directory
+       // in a single slice. In this case, if ReadDir succeeds (reads all the 
way
+       // to the end of the directory), it returns the slice and a nil error.
+       // If it encounters an error before the end of the directory, ReadDir
+       // returns the DirEntry list read until that point and a non-nil error.
+       ReadDir(n int) ([]fs.DirEntry, error)
+}
+
+// FS wraps an io/fs.FS as an IO interface.
+func FS(fsys fs.FS) IO {
+       if _, ok := fsys.(fs.ReadFileFS); ok {
+               return readFileFS{ioFS{fsys, nil}}
+       }
+       return ioFS{fsys, nil}
+}
+
+// FSPreProcName wraps an io/fs.FS like FS, only if fn is non-nil then
+// it is called to preprocess any filenames before they are passed to
+// the underlying fsys.
+func FSPreProcName(fsys fs.FS, fn func(string) string) IO {
+       if _, ok := fsys.(fs.ReadFileFS); ok {
+               return readFileFS{ioFS{fsys, fn}}
+       }
+       return ioFS{fsys, fn}
+}
+
+type readFileFS struct {
+       ioFS
+}
+
+func (r readFileFS) ReadFile(name string) ([]byte, error) {
+       if r.preProcessName != nil {
+               name = r.preProcessName(name)
+       }
+
+       rfs, ok := r.fsys.(fs.ReadFileFS)
+       if !ok {
+               return nil, errMissingReadFile
+       }
+       return rfs.ReadFile(name)
+}
+
+type ioFS struct {
+       fsys fs.FS
+
+       preProcessName func(string) string
+}
+
+func (f ioFS) Open(name string) (File, error) {
+       if f.preProcessName != nil {
+               name = f.preProcessName(name)
+       }
+
+       if name == "/" {
+               name = "."
+       } else {
+               name = strings.TrimPrefix(name, "/")
+       }
+       file, err := f.fsys.Open(name)
+       if err != nil {
+               return nil, err
+       }
+
+       return ioFile{file}, nil
+}
+
+func (f ioFS) Remove(name string) error {
+       r, ok := f.fsys.(interface{ Remove(name string) error })
+       if !ok {
+               return errMissingRemove
+       }
+       return r.Remove(name)
+}
+
+var (
+       errMissingReadDir  = errors.New("fs.File directory missing ReadDir 
method")
+       errMissingSeek     = errors.New("fs.File missing Seek method")
+       errMissingReadAt   = errors.New("fs.File missing ReadAt")
+       errMissingRemove   = errors.New("fs.FS missing Remove method")
+       errMissingReadFile = errors.New("fs.FS missing ReadFile method")
+)
+
+type ioFile struct {
+       file fs.File
+}
+
+func (f ioFile) Close() error               { return f.file.Close() }
+func (f ioFile) Read(b []byte) (int, error) { return f.file.Read(b) }
+func (f ioFile) Stat() (fs.FileInfo, error) { return f.file.Stat() }
+func (f ioFile) Seek(offset int64, whence int) (int64, error) {
+       s, ok := f.file.(io.Seeker)
+       if !ok {
+               return 0, errMissingSeek
+       }
+       return s.Seek(offset, whence)
+}
+
+func (f ioFile) ReadAt(p []byte, off int64) (n int, err error) {
+       r, ok := f.file.(io.ReaderAt)
+       if !ok {
+               return 0, errMissingReadAt
+       }
+       return r.ReadAt(p, off)
+}
+
+func (f ioFile) ReadDir(count int) ([]fs.DirEntry, error) {
+       d, ok := f.file.(fs.ReadDirFile)
+       if !ok {
+               return nil, errMissingReadDir
+       }
+
+       return d.ReadDir(count)
+}
+
+func inferFileIOFromSchema(path string, props map[string]string) (IO, error) {
+       parsed, err := url.Parse(path)
+       if err != nil {
+               return nil, err
+       }
+
+       switch parsed.Scheme {
+       case "s3", "s3a", "s3n":
+               opts := []func(*config.LoadOptions) error{}
+               endpoint, ok := props[S3EndpointURL]
+               if !ok {
+                       endpoint = os.Getenv("AWS_S3_ENDPOINT")
+               }
+
+               if endpoint != "" {

Review Comment:
   I would rather move all of this s3-related things to s3.go or something. 
Otherwise this code here will get quite difficult to read later when e.g. gcs 
or azure support will be added



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to