Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-25 Thread Michael Paquier
On Wed, Oct 26, 2016 at 2:00 AM, Magnus Hagander  wrote:
> Thanks, applied and pushed.

Thanks.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-25 Thread Magnus Hagander
On Tue, Oct 25, 2016 at 2:52 PM, Michael Paquier 
wrote:

> On Tue, Oct 25, 2016 at 7:12 PM, Magnus Hagander 
> wrote:
> > On Mon, Oct 24, 2016 at 7:46 AM, Michael Paquier <
> michael.paqu...@gmail.com>
> > wrote:
> >>
> >> On Sun, Oct 23, 2016 at 10:28 PM, Magnus Hagander 
> >> wrote:
> >> +   if (format == 'p')
> >> +   stream.walmethod = CreateWalDirectoryMethod(param->xlog,
> do_sync);
> >> +   else
> >> +   stream.walmethod = CreateWalTarMethod(param->xlog,
> >> compresslevel, do_sync);
> >> LogStreamerMain() exits immediately once it is done, but I think that
> >> we had better be tidy here and clean up the WAL methods that have been
> >> allocated. I am thinking here about a potentially retry method on
> >> failure, though the best shot in this area would be with
> >> ReceiveXlogStream().
> >
> > Wouldn't the same be needed in pg_receivexlog.c in that case?
>
> Oops, missed that. Thanks for the extra checks. Attached is an updated
> patch.
>

Thanks, applied and pushed.

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-25 Thread Michael Paquier
On Tue, Oct 25, 2016 at 7:12 PM, Magnus Hagander  wrote:
> On Mon, Oct 24, 2016 at 7:46 AM, Michael Paquier 
> wrote:
>>
>> On Sun, Oct 23, 2016 at 10:28 PM, Magnus Hagander 
>> wrote:
>> +   if (format == 'p')
>> +   stream.walmethod = CreateWalDirectoryMethod(param->xlog, do_sync);
>> +   else
>> +   stream.walmethod = CreateWalTarMethod(param->xlog,
>> compresslevel, do_sync);
>> LogStreamerMain() exits immediately once it is done, but I think that
>> we had better be tidy here and clean up the WAL methods that have been
>> allocated. I am thinking here about a potentially retry method on
>> failure, though the best shot in this area would be with
>> ReceiveXlogStream().
>
> Wouldn't the same be needed in pg_receivexlog.c in that case?

Oops, missed that. Thanks for the extra checks. Attached is an updated patch.
-- 
Michael
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 16cab97..e2875df 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -495,6 +495,13 @@ LogStreamerMain(logstreamer_param *param)
 	}
 
 	PQfinish(param->bgconn);
+
+	if (format == 'p')
+		FreeWalDirectoryMethod();
+	else
+		FreeWalTarMethod();
+	pg_free(stream.walmethod);
+
 	return 0;
 }
 
diff --git a/src/bin/pg_basebackup/pg_receivexlog.c b/src/bin/pg_basebackup/pg_receivexlog.c
index bbdf96e..99445e6 100644
--- a/src/bin/pg_basebackup/pg_receivexlog.c
+++ b/src/bin/pg_basebackup/pg_receivexlog.c
@@ -352,6 +352,10 @@ StreamLog(void)
 	}
 
 	PQfinish(conn);
+
+	FreeWalDirectoryMethod();
+	pg_free(stream.walmethod);
+
 	conn = NULL;
 }
 
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index d1dc046..656622f 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -299,6 +299,13 @@ CreateWalDirectoryMethod(const char *basedir, bool sync)
 	return method;
 }
 
+void
+FreeWalDirectoryMethod(void)
+{
+	pg_free(dir_data->basedir);
+	pg_free(dir_data);
+}
+
 
 /*-
  * WalTarMethod - write wal to a tar file containing pg_xlog contents
@@ -611,6 +618,9 @@ tar_sync(Walfile f)
 	Assert(f != NULL);
 	tar_clear_error();
 
+	if (!tar_data->sync)
+		return 0;
+
 	/*
 	 * Always sync the whole tarfile, because that's all we can do. This makes
 	 * no sense on compressed files, so just ignore those.
@@ -842,7 +852,8 @@ tar_finish(void)
 #endif
 
 	/* sync the empty blocks as well, since they're after the last file */
-	fsync(tar_data->fd);
+	if (tar_data->sync)
+		fsync(tar_data->fd);
 
 	if (close(tar_data->fd) != 0)
 		return false;
@@ -890,3 +901,14 @@ CreateWalTarMethod(const char *tarbase, int compression, bool sync)
 
 	return method;
 }
+
+void
+FreeWalTarMethod(void)
+{
+	pg_free(tar_data->tarfilename);
+#ifdef HAVE_LIBZ
+	if (tar_data->compression)
+		 pg_free(tar_data->zlibOut);
+#endif
+	pg_free(tar_data);
+}
diff --git a/src/bin/pg_basebackup/walmethods.h b/src/bin/pg_basebackup/walmethods.h
index 0c8eac7..8cea8ff 100644
--- a/src/bin/pg_basebackup/walmethods.h
+++ b/src/bin/pg_basebackup/walmethods.h
@@ -43,3 +43,7 @@ struct WalWriteMethod
  */
 WalWriteMethod *CreateWalDirectoryMethod(const char *basedir, bool sync);
 WalWriteMethod *CreateWalTarMethod(const char *tarbase, int compression, bool sync);
+
+/* Cleanup routines for previously-created methods */
+void FreeWalDirectoryMethod(void);
+void FreeWalTarMethod(void);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-25 Thread Magnus Hagander
On Mon, Oct 24, 2016 at 7:46 AM, Michael Paquier 
wrote:

> On Sun, Oct 23, 2016 at 10:28 PM, Magnus Hagander 
> wrote:
> > It also broke the tests and invalidated some documentation. But it was
> easy
> > enough to fix.
> >
> > I've now applied this, so next time you get to do the merging :P Joking
> > aside, please review and let me know if you can spot something I messed
> up
> > in the final merge.
>
> Just had another look at it..
> +static int
> +tar_fsync(Walfile f)
> +{
> +   Assert(f != NULL);
> +   tar_clear_error();
> +
> +   /*
> +* Always sync the whole tarfile, because that's all we can do. This
> makes
> +* no sense on compressed files, so just ignore those.
> +*/
> +   if (tar_data->compression)
> +   return 0;
> +
> +   return fsync(tar_data->fd);
> +}
> fsync() should not be called here if --no-sync is used.
>
> +   /* sync the empty blocks as well, since they're after the last file */
> +   fsync(tar_data->fd);
> Similarly, the fsync call of tar_finish() should not happen when
> --no-sync is used.
>

Yeah, agreed.



> +   if (format == 'p')
> +   stream.walmethod = CreateWalDirectoryMethod(param->xlog, do_sync);
> +   else
> +   stream.walmethod = CreateWalTarMethod(param->xlog,
> compresslevel, do_sync);
> LogStreamerMain() exits immediately once it is done, but I think that
> we had better be tidy here and clean up the WAL methods that have been
> allocated. I am thinking here about a potentially retry method on
> failure, though the best shot in this area would be with
> ReceiveXlogStream().
>


Wouldn't the same be needed in pg_receivexlog.c in that case?

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Michael Paquier
On Sun, Oct 23, 2016 at 10:28 PM, Magnus Hagander  wrote:
> It also broke the tests and invalidated some documentation. But it was easy
> enough to fix.
>
> I've now applied this, so next time you get to do the merging :P Joking
> aside, please review and let me know if you can spot something I messed up
> in the final merge.

Just had another look at it..
+static int
+tar_fsync(Walfile f)
+{
+   Assert(f != NULL);
+   tar_clear_error();
+
+   /*
+* Always sync the whole tarfile, because that's all we can do. This makes
+* no sense on compressed files, so just ignore those.
+*/
+   if (tar_data->compression)
+   return 0;
+
+   return fsync(tar_data->fd);
+}
fsync() should not be called here if --no-sync is used.

+   /* sync the empty blocks as well, since they're after the last file */
+   fsync(tar_data->fd);
Similarly, the fsync call of tar_finish() should not happen when
--no-sync is used.

+   if (format == 'p')
+   stream.walmethod = CreateWalDirectoryMethod(param->xlog, do_sync);
+   else
+   stream.walmethod = CreateWalTarMethod(param->xlog,
compresslevel, do_sync);
LogStreamerMain() exits immediately once it is done, but I think that
we had better be tidy here and clean up the WAL methods that have been
allocated. I am thinking here about a potentially retry method on
failure, though the best shot in this area would be with
ReceiveXlogStream().

Attached is a patch addressing those points.
-- 
Michael


pg_basebackup-tar-fixes.patch
Description: invalid/octet-stream

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Michael Paquier
On Mon, Oct 24, 2016 at 1:38 PM, Andres Freund  wrote:
> On 2016-10-17 14:37:05 +0900, Michael Paquier wrote:
>> 2) Add an option to pg_xlogdump to be able to output its output to a
>> file. That would be awkward to rely on grabbing the output data from a
>> pipe... On Windows particularly. Thinking about it, would that
>> actually be useful to others? That's not a complicated patch.
>
> Hm? Just redirecting output seems less complicated? And afaik works on
> windows as well?

In the TAP suite STDOUT is already redirect to the log files. Perhaps
we could just do a SELECT FILE; to redirect the output of pg_xlogdump
temporarily into a custom location just for the sake of a test like
that.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Andres Freund
On 2016-10-17 14:37:05 +0900, Michael Paquier wrote:
> 2) Add an option to pg_xlogdump to be able to output its output to a
> file. That would be awkward to rely on grabbing the output data from a
> pipe... On Windows particularly. Thinking about it, would that
> actually be useful to others? That's not a complicated patch.

Hm? Just redirecting output seems less complicated? And afaik works on
windows as well?

Greetings,

Andres Freund


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Michael Paquier
On Sun, Oct 23, 2016 at 10:52 PM, Michael Paquier
 wrote:
> On Sun, Oct 23, 2016 at 10:30 PM, Magnus Hagander  wrote:
>> I think both of those would be worthwhile. Just for the testability in
>> itself, but such a flag to pg_xlogdump would probably be useful in other
>> cases as well, beyond just the testing.
>
> Looking quickly at the code, it does not seem that complicated... I
> may just send patches tomorrow for all those things and be done with
> it, all that on its new dedicated thread.

Actually not that much after noticing that pg_xlogdump emulates some
of the backend's StringInfo routines and calls at the end vprintf() to
output everything to stdout, which is ugly. The cleanest solution here
would be to make StringInfo a bit more portable and allow them for
frontends, somehting that may be useful for any utility playing with
rm_desc. A less cleaner solution would be to somewhat store a fd
pointing to a file (or stdout) into compat.c and output to it. I'd
slightly prefer the first solution, but that does not seem worth the
effort just for pg_xlogdump and one test.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Michael Paquier
On Sun, Oct 23, 2016 at 10:30 PM, Magnus Hagander  wrote:
> On Mon, Oct 17, 2016 at 7:37 AM, Michael Paquier 
> wrote:
>> > But independent of this patch, actually putting that test in for non-tar
>> > mode would probably not be a bad idea -- if that breaks, it's likely
>> > both
>> > break, after all.
>>
>> Agreed (you were able to break only tar upthread with your patch). One
>> way to do that elegantly would be to:
>> 1) extend slurp_dir to return only files that have a matching pattern.
>> That's not difficult to do:
>> --- a/src/test/perl/TestLib.pm
>> +++ b/src/test/perl/TestLib.pm
>> @@ -184,10 +184,14 @@ sub generate_ascii_string
>>
>>  sub slurp_dir
>>  {
>> -   my ($dir) = @_;
>> +   my ($dir, $match_pattern) = @_;
>> opendir(my $dh, $dir)
>>   or die "could not opendir \"$dir\": $!";
>> my @direntries = readdir $dh;
>> +   if (defined($match_pattern))
>> +   {
>> +   @direntries = grep($match_pattern, @direntries);
>> +   }
>> closedir $dh;
>> return @direntries;
>>  }
>> Sorting them at the same time may be a good idea..
>> 2) Add an option to pg_xlogdump to be able to output its output to a
>> file. That would be awkward to rely on grabbing the output data from a
>> pipe... On Windows particularly. Thinking about it, would that
>> actually be useful to others? That's not a complicated patch.
>
> I think both of those would be worthwhile. Just for the testability in
> itself, but such a flag to pg_xlogdump would probably be useful in other
> cases as well, beyond just the testing.

Looking quickly at the code, it does not seem that complicated... I
may just send patches tomorrow for all those things and be done with
it, all that on its new dedicated thread.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Magnus Hagander
On Mon, Oct 17, 2016 at 7:37 AM, Michael Paquier 
wrote:

> On Sat, Oct 15, 2016 at 8:51 AM, Magnus Hagander 
> wrote:
> > Fixed.
>
> Ok, I had a extra look on the patch:
> +   The transactionn log files are written to a separate file
> +called pg_xlog.tar.
> +   
> s/transactionn/transaction/, and the  markup should be on its own
> line.
>
> +   if (dir_data->sync)
> +   {
> +   if (fsync_fname(tmppath, false, progname) != 0)
> +   {
> +   close(fd);
> +   return NULL;
> +   }
> +   if (fsync_parent_path(tmppath, progname) != 0)
> +   {
> +   close(fd);
> +   return NULL;
> +   }
> +   }
> Nit: squashing both things together would simplify the code.
>
> +   else if (method == CLOSE_UNLINK
> +   )
> Your finger slipped here.
>
> Except that it looks in pretty good to me, so I am switching that as
> ready for committer.
>

I incorporated those changes before pushing.



> > But independent of this patch, actually putting that test in for non-tar
> > mode would probably not be a bad idea -- if that breaks, it's likely both
> > break, after all.
>
> Agreed (you were able to break only tar upthread with your patch). One
> way to do that elegantly would be to:
> 1) extend slurp_dir to return only files that have a matching pattern.
> That's not difficult to do:
> --- a/src/test/perl/TestLib.pm
> +++ b/src/test/perl/TestLib.pm
> @@ -184,10 +184,14 @@ sub generate_ascii_string
>
>  sub slurp_dir
>  {
> -   my ($dir) = @_;
> +   my ($dir, $match_pattern) = @_;
> opendir(my $dh, $dir)
>   or die "could not opendir \"$dir\": $!";
> my @direntries = readdir $dh;
> +   if (defined($match_pattern))
> +   {
> +   @direntries = grep($match_pattern, @direntries);
> +   }
> closedir $dh;
> return @direntries;
>  }
> Sorting them at the same time may be a good idea..
> 2) Add an option to pg_xlogdump to be able to output its output to a
> file. That would be awkward to rely on grabbing the output data from a
> pipe... On Windows particularly. Thinking about it, would that
> actually be useful to others? That's not a complicated patch.
>

I think both of those would be worthwhile. Just for the testability in
itself, but such a flag to pg_xlogdump would probably be useful in other
cases as well, beyond just the testing.

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-23 Thread Magnus Hagander
On Fri, Oct 21, 2016 at 2:02 PM, Michael Paquier 
wrote:

> On Mon, Oct 17, 2016 at 2:37 PM, Michael Paquier
>  wrote:
> > Except that it looks in pretty good to me, so I am switching that as
> > ready for committer.
>
> +   /*
> +* Create pg_xlog/archive_status (and thus pg_xlog) so we can
> write to
> +* basedir/pg_xlog as the directory entry in the tar file may
> arrive
> +* later.
> +*/
> +   snprintf(statusdir, sizeof(statusdir), "%s/pg_xlog/archive_status",
> +basedir);
>
> This part conflicts with f82ec32, where you need make pg_basebackup
> aware of the backend version.. I promise that's the last conflict, at
> least I don't have more patches planned in the area.
>

It also broke the tests and invalidated some documentation. But it was easy
enough to fix.

I've now applied this, so next time you get to do the merging :P Joking
aside, please review and let me know if you can spot something I messed up
in the final merge.

Thanks for your repeated reviews!

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-21 Thread Michael Paquier
On Mon, Oct 17, 2016 at 2:37 PM, Michael Paquier
 wrote:
> Except that it looks in pretty good to me, so I am switching that as
> ready for committer.

+   /*
+* Create pg_xlog/archive_status (and thus pg_xlog) so we can write to
+* basedir/pg_xlog as the directory entry in the tar file may arrive
+* later.
+*/
+   snprintf(statusdir, sizeof(statusdir), "%s/pg_xlog/archive_status",
+basedir);

This part conflicts with f82ec32, where you need make pg_basebackup
aware of the backend version.. I promise that's the last conflict, at
least I don't have more patches planned in the area.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-16 Thread Michael Paquier
On Sat, Oct 15, 2016 at 8:51 AM, Magnus Hagander  wrote:
> Fixed.

Ok, I had a extra look on the patch:
+   The transactionn log files are written to a separate file
+called pg_xlog.tar.
+   
s/transactionn/transaction/, and the  markup should be on its own line.

+   if (dir_data->sync)
+   {
+   if (fsync_fname(tmppath, false, progname) != 0)
+   {
+   close(fd);
+   return NULL;
+   }
+   if (fsync_parent_path(tmppath, progname) != 0)
+   {
+   close(fd);
+   return NULL;
+   }
+   }
Nit: squashing both things together would simplify the code.

+   else if (method == CLOSE_UNLINK
+   )
Your finger slipped here.

Except that it looks in pretty good to me, so I am switching that as
ready for committer.

> But independent of this patch, actually putting that test in for non-tar
> mode would probably not be a bad idea -- if that breaks, it's likely both
> break, after all.

Agreed (you were able to break only tar upthread with your patch). One
way to do that elegantly would be to:
1) extend slurp_dir to return only files that have a matching pattern.
That's not difficult to do:
--- a/src/test/perl/TestLib.pm
+++ b/src/test/perl/TestLib.pm
@@ -184,10 +184,14 @@ sub generate_ascii_string

 sub slurp_dir
 {
-   my ($dir) = @_;
+   my ($dir, $match_pattern) = @_;
opendir(my $dh, $dir)
  or die "could not opendir \"$dir\": $!";
my @direntries = readdir $dh;
+   if (defined($match_pattern))
+   {
+   @direntries = grep($match_pattern, @direntries);
+   }
closedir $dh;
return @direntries;
 }
Sorting them at the same time may be a good idea..
2) Add an option to pg_xlogdump to be able to output its output to a
file. That would be awkward to rely on grabbing the output data from a
pipe... On Windows particularly. Thinking about it, would that
actually be useful to others? That's not a complicated patch.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-14 Thread Magnus Hagander
On Tue, Oct 4, 2016 at 12:05 AM, Michael Paquier 
wrote:

> (Squashing two emails into one)
>
> On Fri, Sep 30, 2016 at 11:16 PM, Magnus Hagander 
> wrote:
> > And here's yet another version, now rebased on top of the fsync and
> nosync
> > changes that got applied.
>
> My fault :p
>

Yes, definitely :P



> > In particular, this conflicted with pretty much every single change from
> the
> > fsync patch, so I'm definitely looking for another round of review before
> > this can be committed.
>
> Could you rebase once again? This is conflicting with the recent
> changes in open_walfile() and close_walfile() of 728ceba.
>

Done.



> > I ended up moving much of the fsync stuff into walmethods.c, since they
> were
> > dependent on if we used tar or not (obviously only the parts about the
> wal,
> > not the basebackup). So there's a significant risk that I missed
> something
> > there.
>
> +   /* If we have a temp prefix, normal is we rename the file */
> +   snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
> +dir_data->basedir, df->pathname, df->temp_suffix);
> This comment could be improved, like "normal operation is to rename
> the file" for example.
>

Agreed and fixed.



> +   if (lseek(fd, 0, SEEK_SET) != 0)
> +   return NULL;
> [...]
> +   if (fsync_fname(f->fullpath, false, progname) != 0)
> +   return NULL;
> +   if (fsync_parent_path(f->fullpath, progname) != 0)
> +   return NULL;
> fd leaks for those three code paths. And when one of those fsync calls
> fail the previously pg_malloc'd f leaks as well. It may be a good idea
> to have a single routine doing all the pg_free work for
> DirectoryMethodFile. You'll need it as well in dir_close(). Or even
> better: do the fsync calls before allocating f. For pg_basebackup it
> does not matter much, but it does for pg_receivexlog that has a retry
> logic.
>

Agreed, moving the fsyncs is definitely the best there.



> +   if (deflateInit2(tar_data->zp, tar_data->compression,
> Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK)
> +   {
> +   tar_set_error("deflateInit2 failed");
> +   return NULL;
> +   }
> tar_data->zp leaks here.. Perhaps that does not matter as tar mode is
> just used by pg_basebackup now but if we introduce some retry logic
> I'd prefer avoiding any problems in the future.
>

Agreed, leaks are bad even if they are not a direct problem right now.
Fixed.




> On Thu, Sep 29, 2016 at 7:44 PM, Magnus Hagander 
> wrote:
> >>
> >>  static bool
> >>  existsTimeLineHistoryFile(StreamCtl *stream)
> >>  {
> >> [...]
> >> +   return stream->walmethod->existsfile(histfname);
> >>  }
> >> existsfile returns always false for the tar method. This does not
> >> matter much because pg_basebackup exists immediately in case of a
> >> failure, but I think that this deserves a comment in ReceiveXlogStream
> >> where existsTimeLineHistoryFile is called.
> >
> > OK, added. As you say, the behaviour is expected, but it makes sense to
> > mention it clealry there.
>
> Thanks.
> +* false, as we are never streaming into an existing file and
> therefor
> s/therefor/therefore.
>

Fixed.



> > So what's our basic rule for these perl tests - are we allowed to use
> > pg_xlogdump from within a pg_basebackup test? If so that could actually
> be a
> > useful test - do the backup, extract the xlog and verify that it contains
> > the SWITCH record.
>
> pg_xlogdump is part of the default temporary installation, so using it
> is fine. The issue though is how do we untar pg_xlog.tar without a
> native perl call? That's not present down to 5.8.8.. The test you are
> proposing in 010_pg_basebackup.pl is the best we can do for now.
>
>
My initial thought was actually adding that check to non-tar format.

But I agree, to test the tar format things specifically we *somehow* need
to be able to untar. We either need to rely on a system tar (which will
likely break badly on Windows) or we need to rely on a perl tar module.

But independent of this patch, actually putting that test in for non-tar
mode would probably not be a bad idea -- if that breaks, it's likely both
break, after all.

Thanks!

//Magnus
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 55e913f..e024531 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -180,7 +180,8 @@ PostgreSQL documentation
 target directory, the tar contents will be written to
 standard output, suitable for piping to for example
 gzip. This is only possible if
-the cluster has no additional tablespaces.
+the cluster has no additional tablespaces and transaction
+log streaming is not used.


  
@@ -323,6 +324,10 @@ PostgreSQL documentation
  If the log 

Re: [HACKERS] pg_basebackup stream xlog to tar

2016-10-04 Thread Michael Paquier
(Squashing two emails into one)

On Fri, Sep 30, 2016 at 11:16 PM, Magnus Hagander  wrote:
> And here's yet another version, now rebased on top of the fsync and nosync
> changes that got applied.

My fault :p

> In particular, this conflicted with pretty much every single change from the
> fsync patch, so I'm definitely looking for another round of review before
> this can be committed.

Could you rebase once again? This is conflicting with the recent
changes in open_walfile() and close_walfile() of 728ceba.

> I ended up moving much of the fsync stuff into walmethods.c, since they were
> dependent on if we used tar or not (obviously only the parts about the wal,
> not the basebackup). So there's a significant risk that I missed something
> there.

+   /* If we have a temp prefix, normal is we rename the file */
+   snprintf(tmppath, sizeof(tmppath), "%s/%s%s",
+dir_data->basedir, df->pathname, df->temp_suffix);
This comment could be improved, like "normal operation is to rename
the file" for example.

+   if (lseek(fd, 0, SEEK_SET) != 0)
+   return NULL;
[...]
+   if (fsync_fname(f->fullpath, false, progname) != 0)
+   return NULL;
+   if (fsync_parent_path(f->fullpath, progname) != 0)
+   return NULL;
fd leaks for those three code paths. And when one of those fsync calls
fail the previously pg_malloc'd f leaks as well. It may be a good idea
to have a single routine doing all the pg_free work for
DirectoryMethodFile. You'll need it as well in dir_close(). Or even
better: do the fsync calls before allocating f. For pg_basebackup it
does not matter much, but it does for pg_receivexlog that has a retry
logic.

+   if (deflateInit2(tar_data->zp, tar_data->compression,
Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY) != Z_OK)
+   {
+   tar_set_error("deflateInit2 failed");
+   return NULL;
+   }
tar_data->zp leaks here.. Perhaps that does not matter as tar mode is
just used by pg_basebackup now but if we introduce some retry logic
I'd prefer avoiding any problems in the future.

On Thu, Sep 29, 2016 at 7:44 PM, Magnus Hagander  wrote:
>>
>>  static bool
>>  existsTimeLineHistoryFile(StreamCtl *stream)
>>  {
>> [...]
>> +   return stream->walmethod->existsfile(histfname);
>>  }
>> existsfile returns always false for the tar method. This does not
>> matter much because pg_basebackup exists immediately in case of a
>> failure, but I think that this deserves a comment in ReceiveXlogStream
>> where existsTimeLineHistoryFile is called.
>
> OK, added. As you say, the behaviour is expected, but it makes sense to
> mention it clealry there.

Thanks.
+* false, as we are never streaming into an existing file and therefor
s/therefor/therefore.

> So what's our basic rule for these perl tests - are we allowed to use
> pg_xlogdump from within a pg_basebackup test? If so that could actually be a
> useful test - do the backup, extract the xlog and verify that it contains
> the SWITCH record.

pg_xlogdump is part of the default temporary installation, so using it
is fine. The issue though is how do we untar pg_xlog.tar without a
native perl call? That's not present down to 5.8.8.. The test you are
proposing in 010_pg_basebackup.pl is the best we can do for now.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-30 Thread Magnus Hagander
On Thu, Sep 29, 2016 at 12:44 PM, Magnus Hagander 
wrote:

> On Mon, Sep 5, 2016 at 10:01 AM, Michael Paquier <
> michael.paqu...@gmail.com> wrote:
>
>> On Sat, Sep 3, 2016 at 10:35 PM, Magnus Hagander 
>> wrote:
>> > Ugh. That would be nice to have, but I think that's outside the scope of
>> > this patch.
>>
>> A test for this patch that could have value would be to use
>> pg_basebackup -X stream -Ft, then untar pg_xlog.tar and look at the
>> size of the segments. If you have an idea to untar something without
>> the in-core perl support because we need to have the TAP stuff able to
>> run on at least 5.8.8, I'd welcome an idea. Honestly I still have
>> none, and that's why the recovery tests do not use tarballs in their
>> tests when using pg_basebackup. In short let's not add something more
>> for this patch.
>>
>> > PFA is an updated version of this patch that:
>> > * documents a magic value passed to zlib (which is in their
>> documentation as
>> > being a magic value, but has no define)
>> > * fixes the padding of tar files
>> > * adds a most basic test that the -X stream -Ft does produce a tarfile
>>
>> So I had a more serious look at this patch, and it basically makes
>> more generic the operations done for the plain mode by adding a set of
>> routines that can be used by both tar and plain mode to work on the
>> WAL files streamed. Elegant.
>>
>> +   
>> +The transaction log files will be written to
>> + the base.tar file.
>> +   
>> Nit: number of spaces here.
>>
>
> Fixed.
>
>
>
>> -mark_file_as_archived(const char *basedir, const char *fname)
>> +mark_file_as_archived(StreamCtl *stream, const char *fname)
>> Just passing WalMethod as argument would be enough, but... My patch
>> adding the fsync calls to pg_basebackup could just make use of
>> StreamCtl, so let's keep it as you suggest.
>>
>
> Yeah, I think it's cleaner to pass the whole structure around really. If
> not now, we'd need it eventually. That makes all callers more consistent.
>
>
>
>>  static bool
>>  existsTimeLineHistoryFile(StreamCtl *stream)
>>  {
>> [...]
>> +   return stream->walmethod->existsfile(histfname);
>>  }
>> existsfile returns always false for the tar method. This does not
>> matter much because pg_basebackup exists immediately in case of a
>> failure, but I think that this deserves a comment in ReceiveXlogStream
>> where existsTimeLineHistoryFile is called.
>>
>
> OK, added. As you say, the behaviour is expected, but it makes sense to
> mention it clealry there.
>
>
>
>> I find the use of existsfile() in open_walfile() rather confusing
>> because this relies on the fact  that existsfile() returns always
>> false for the tar mode. We could add an additional field in WalMethod
>> to store the method type and use that more, but that may make the code
>> more confusing than what you propose. What do you think?
>>
>
> Yeah, I'm not sure that helps. The point is that the abstraction is
> supposed to take care of that. But if it's confusing, then clearly a
> comment is warranted there, so I've added that. Do you think that makes it
> clear enough?
>
>
>
>>
>> +   int (*unlink) (const char *pathname);
>> The unlink method is used nowhere. This could just be removed.
>>
>
> That's clearly a missed cleanup. Removed, thanks.
>
>
>
>> -static void
>> +void
>>  print_tar_number(char *s, int len, uint64 val)
>> This could be an independent patch. Or not.
>>
>
> Could be, but we don't really have any other uses for it.
>
>
>
>> I think that I found another bug regarding the contents of the
>> segments. I did pg_basebackup -F t -X stream, then untared pg_xlog.tar
>> which contained segment 1/0/2, then:
>> $ pg_xlogdump 00010002
>> pg_xlogdump: FATAL:  could not find a valid record after 0/200
>> I'd expect this segment to have records, up to a XLOG_SWITCH record.
>>
>
> Ugh. That's definitely broken yes. It seeked back and overwrote the tar
> header with the data, instead of starting where the file part was supposed
> to be. It worked fine on compressed files, and it's when implementing that
> that it broke.
>
> So what's our basic rule for these perl tests - are we allowed to use
> pg_xlogdump from within a pg_basebackup test? If so that could actually be
> a useful test - do the backup, extract the xlog and verify that it contains
> the SWITCH record.
>
>
> I also noticed that using -Z5 created a .tar.gz and -z created a .tar
> (which was compressed).  Because compresslevel is set to -1 with -z,
> meaning default.
>
>
> Again, apologies for getting late back into the game here.
>
>
And here's yet another version, now rebased on top of the fsync and nosync
changes that got applied.

In particular, this conflicted with pretty much every single change from
the fsync patch, so I'm definitely looking for another round of review
before this can be committed.

I ended up moving much of the fsync stuff into walmethods.c, since 

Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-29 Thread Magnus Hagander
On Mon, Sep 5, 2016 at 10:01 AM, Michael Paquier 
wrote:

> On Sat, Sep 3, 2016 at 10:35 PM, Magnus Hagander 
> wrote:
> > Ugh. That would be nice to have, but I think that's outside the scope of
> > this patch.
>
> A test for this patch that could have value would be to use
> pg_basebackup -X stream -Ft, then untar pg_xlog.tar and look at the
> size of the segments. If you have an idea to untar something without
> the in-core perl support because we need to have the TAP stuff able to
> run on at least 5.8.8, I'd welcome an idea. Honestly I still have
> none, and that's why the recovery tests do not use tarballs in their
> tests when using pg_basebackup. In short let's not add something more
> for this patch.
>
> > PFA is an updated version of this patch that:
> > * documents a magic value passed to zlib (which is in their
> documentation as
> > being a magic value, but has no define)
> > * fixes the padding of tar files
> > * adds a most basic test that the -X stream -Ft does produce a tarfile
>
> So I had a more serious look at this patch, and it basically makes
> more generic the operations done for the plain mode by adding a set of
> routines that can be used by both tar and plain mode to work on the
> WAL files streamed. Elegant.
>
> +   
> +The transaction log files will be written to
> + the base.tar file.
> +   
> Nit: number of spaces here.
>

Fixed.



> -mark_file_as_archived(const char *basedir, const char *fname)
> +mark_file_as_archived(StreamCtl *stream, const char *fname)
> Just passing WalMethod as argument would be enough, but... My patch
> adding the fsync calls to pg_basebackup could just make use of
> StreamCtl, so let's keep it as you suggest.
>

Yeah, I think it's cleaner to pass the whole structure around really. If
not now, we'd need it eventually. That makes all callers more consistent.



>  static bool
>  existsTimeLineHistoryFile(StreamCtl *stream)
>  {
> [...]
> +   return stream->walmethod->existsfile(histfname);
>  }
> existsfile returns always false for the tar method. This does not
> matter much because pg_basebackup exists immediately in case of a
> failure, but I think that this deserves a comment in ReceiveXlogStream
> where existsTimeLineHistoryFile is called.
>

OK, added. As you say, the behaviour is expected, but it makes sense to
mention it clealry there.



> I find the use of existsfile() in open_walfile() rather confusing
> because this relies on the fact  that existsfile() returns always
> false for the tar mode. We could add an additional field in WalMethod
> to store the method type and use that more, but that may make the code
> more confusing than what you propose. What do you think?
>

Yeah, I'm not sure that helps. The point is that the abstraction is
supposed to take care of that. But if it's confusing, then clearly a
comment is warranted there, so I've added that. Do you think that makes it
clear enough?



>
> +   int (*unlink) (const char *pathname);
> The unlink method is used nowhere. This could just be removed.
>

That's clearly a missed cleanup. Removed, thanks.



> -static void
> +void
>  print_tar_number(char *s, int len, uint64 val)
> This could be an independent patch. Or not.
>

Could be, but we don't really have any other uses for it.



> I think that I found another bug regarding the contents of the
> segments. I did pg_basebackup -F t -X stream, then untared pg_xlog.tar
> which contained segment 1/0/2, then:
> $ pg_xlogdump 00010002
> pg_xlogdump: FATAL:  could not find a valid record after 0/200
> I'd expect this segment to have records, up to a XLOG_SWITCH record.
>

Ugh. That's definitely broken yes. It seeked back and overwrote the tar
header with the data, instead of starting where the file part was supposed
to be. It worked fine on compressed files, and it's when implementing that
that it broke.

So what's our basic rule for these perl tests - are we allowed to use
pg_xlogdump from within a pg_basebackup test? If so that could actually be
a useful test - do the backup, extract the xlog and verify that it contains
the SWITCH record.


I also noticed that using -Z5 created a .tar.gz and -z created a .tar
(which was compressed).  Because compresslevel is set to -1 with -z,
meaning default.


Again, apologies for getting late back into the game here.

//Magnus
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 9f1eae1..a4236a5 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -180,7 +180,8 @@ PostgreSQL documentation
 target directory, the tar contents will be written to
 standard output, suitable for piping to for example
 gzip. This is only possible if
-the cluster has no additional tablespaces.
+the cluster has no additional tablespaces and transaction
+log 

Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-28 Thread Magnus Hagander
On Sep 28, 2016 19:11, "Robert Haas"  wrote:
>
> On Mon, Sep 5, 2016 at 4:01 AM, Michael Paquier
>  wrote:
> > [ review comments ]
>
> This thread has been sitting idle for more than 3 weeks, so I'm
> marking it "Returned with Feedback" in the CommitFest application.
> Magnus, Michael's latest round of comments seem pretty trivial, so
> perhaps you want to just fix whichever of them seem to you to have
> merit and commit without waiting for the next CommitFest.  Or, you can
> resubmit for the next CommitFest if you think it needs more review.
> But the CommitFest is just about over so it's time to clean out old
> entries, one way or the other.

Yeah, understood. I was planning to get back to it this week, but failed to
find the time. I'll still have some hope about later this week, but most
likely not until the next.

/Magnus


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-28 Thread Robert Haas
On Mon, Sep 5, 2016 at 4:01 AM, Michael Paquier
 wrote:
> [ review comments ]

This thread has been sitting idle for more than 3 weeks, so I'm
marking it "Returned with Feedback" in the CommitFest application.
Magnus, Michael's latest round of comments seem pretty trivial, so
perhaps you want to just fix whichever of them seem to you to have
merit and commit without waiting for the next CommitFest.  Or, you can
resubmit for the next CommitFest if you think it needs more review.
But the CommitFest is just about over so it's time to clean out old
entries, one way or the other.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-05 Thread Michael Paquier
On Sat, Sep 3, 2016 at 10:35 PM, Magnus Hagander  wrote:
> Ugh. That would be nice to have, but I think that's outside the scope of
> this patch.

A test for this patch that could have value would be to use
pg_basebackup -X stream -Ft, then untar pg_xlog.tar and look at the
size of the segments. If you have an idea to untar something without
the in-core perl support because we need to have the TAP stuff able to
run on at least 5.8.8, I'd welcome an idea. Honestly I still have
none, and that's why the recovery tests do not use tarballs in their
tests when using pg_basebackup. In short let's not add something more
for this patch.

> PFA is an updated version of this patch that:
> * documents a magic value passed to zlib (which is in their documentation as
> being a magic value, but has no define)
> * fixes the padding of tar files
> * adds a most basic test that the -X stream -Ft does produce a tarfile

So I had a more serious look at this patch, and it basically makes
more generic the operations done for the plain mode by adding a set of
routines that can be used by both tar and plain mode to work on the
WAL files streamed. Elegant.

+   
+The transaction log files will be written to
+ the base.tar file.
+   
Nit: number of spaces here.

-mark_file_as_archived(const char *basedir, const char *fname)
+mark_file_as_archived(StreamCtl *stream, const char *fname)
Just passing WalMethod as argument would be enough, but... My patch
adding the fsync calls to pg_basebackup could just make use of
StreamCtl, so let's keep it as you suggest.

 static bool
 existsTimeLineHistoryFile(StreamCtl *stream)
 {
[...]
+   return stream->walmethod->existsfile(histfname);
 }
existsfile returns always false for the tar method. This does not
matter much because pg_basebackup exists immediately in case of a
failure, but I think that this deserves a comment in ReceiveXlogStream
where existsTimeLineHistoryFile is called.

I find the use of existsfile() in open_walfile() rather confusing
because this relies on the fact  that existsfile() returns always
false for the tar mode. We could add an additional field in WalMethod
to store the method type and use that more, but that may make the code
more confusing than what you propose. What do you think?

+   int (*unlink) (const char *pathname);
The unlink method is used nowhere. This could just be removed.

-static void
+void
 print_tar_number(char *s, int len, uint64 val)
This could be an independent patch. Or not.

I think that I found another bug regarding the contents of the
segments. I did pg_basebackup -F t -X stream, then untared pg_xlog.tar
which contained segment 1/0/2, then:
$ pg_xlogdump 00010002
pg_xlogdump: FATAL:  could not find a valid record after 0/200
I'd expect this segment to have records, up to a XLOG_SWITCH record.

> As for using XLOGDIR to drive the name of the tarfile. pg_basebackup is
> already hardcoded to use pg_xlog. And so are the tests. We probably want to
> fix that, but that's a separate step and this patch will be easier to review
> and test if we keep it out for now.

Yes. pg_basebackup is not the only thing here missing the point here,
here is most of the list:
$ git grep "pg_xlog\"" -- *.c *.h
src/backend/access/transam/xlog.c:static const char *xlogSourceNames[]
= {"any", "archive", "pg_xlog", "stream"};
src/backend/replication/basebackup.c:   dir = AllocateDir("pg_xlog");
src/backend/replication/basebackup.c:(errmsg("could
not open directory \"%s\": %m", "pg_xlog")));
src/backend/replication/basebackup.c:   while ((de = ReadDir(dir,
"pg_xlog")) != NULL)
src/backend/replication/basebackup.c:   if (strcmp(pathbuf,
"./pg_xlog") == 0)
src/backend/storage/file/fd.c:  if (lstat("pg_xlog", ) < 0)
src/backend/storage/file/fd.c:  "pg_xlog")));
src/backend/storage/file/fd.c:  if (pgwin32_is_junction("pg_xlog"))
src/backend/storage/file/fd.c:  walkdir("pg_xlog", pre_sync_fname,
false, DEBUG1);
src/backend/storage/file/fd.c:  walkdir("pg_xlog",
datadir_fsync_fname, false, LOG);
src/bin/initdb/initdb.c:snprintf(pg_xlog, MAXPGPATH, "%s/pg_xlog", pg_data);
src/bin/initdb/initdb.c:subdirloc = psprintf("%s/pg_xlog", pg_data);
src/bin/pg_basebackup/pg_basebackup.c:  snprintf(param->xlogdir,
sizeof(param->xlogdir), "%s/pg_xlog", basedir);
src/bin/pg_basebackup/pg_basebackup.c:  if
(!((pg_str_endswith(filename, "/pg_xlog") ||
src/bin/pg_basebackup/pg_basebackup.c:  linkloc =
psprintf("%s/pg_xlog", basedir);
src/bin/pg_rewind/copy_fetch.c: strcmp(path, "pg_xlog") == 0)
src/bin/pg_rewind/filemap.c:if (strcmp(path, "pg_xlog") == 0 &&
type == FILE_TYPE_SYMLINK)
src/bin/pg_rewind/filemap.c:if (exists &&
!S_ISDIR(statbuf.st_mode) && strcmp(path, "pg_xlog") != 0)
src/bin/pg_rewind/filemap.c:if (strcmp(path, "pg_xlog") == 0 &&
type == FILE_TYPE_SYMLINK)

Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-03 Thread Magnus Hagander
On Thu, Sep 1, 2016 at 2:39 PM, Michael Paquier 
wrote:

> On Thu, Sep 1, 2016 at 5:13 PM, Magnus Hagander 
> wrote:
> > We don't seem to check for similar issues as the one just found in the
> > existing tests though, do we? As in, we don't actually verify that the
> xlog
> > files being streamed are 16Mb? (Or for that matter that the tarfile
> emitted
> > by -Ft is actually a tarfile?) Or am I missing some magic somewhere? :)
>
> No. There is no checks on the WAL file size (you should use the output
> of pg_controldata to see how large the segments should be). For the
> tar file, the complication is in its untar... Perl provides some ways
> to untar things, though the oldest version that we support in the TAP
> tests does not offer that :(
>

Ugh. That would be nice to have, but I think that's outside the scope of
this patch.

PFA is an updated version of this patch that:
* documents a magic value passed to zlib (which is in their documentation
as being a magic value, but has no define)
* fixes the padding of tar files
* adds a most basic test that the -X stream -Ft does produce a tarfile

As for using XLOGDIR to drive the name of the tarfile. pg_basebackup is
already hardcoded to use pg_xlog. And so are the tests. We probably want to
fix that, but that's a separate step and this patch will be easier to
review and test if we keep it out for now.

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 03615da..981d201 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -180,7 +180,8 @@ PostgreSQL documentation
 target directory, the tar contents will be written to
 standard output, suitable for piping to for example
 gzip. This is only possible if
-the cluster has no additional tablespaces.
+the cluster has no additional tablespaces and transaction
+log streaming is not used.


  
@@ -323,6 +324,10 @@ PostgreSQL documentation
  If the log has been rotated when it's time to transfer it, the
  backup will fail and be unusable.

+   
+The transaction log files will be written to
+ the base.tar file.
+   
   
  
 
@@ -339,6 +344,9 @@ PostgreSQL documentation
  client can keep up with transaction log received, using this mode
  requires no extra transaction logs to be saved on the master.

+   The transactionn log files are written to a separate file
+called pg_xlog.tar.
+   
   
  
 
diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile
index fa1ce8b..52ac9e9 100644
--- a/src/bin/pg_basebackup/Makefile
+++ b/src/bin/pg_basebackup/Makefile
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
 override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
 LDFLAGS += -L$(top_builddir)/src/fe_utils -lpgfeutils -lpq
 
-OBJS=receivelog.o streamutil.o $(WIN32RES)
+OBJS=receivelog.o streamutil.o walmethods.o $(WIN32RES)
 
 all: pg_basebackup pg_receivexlog pg_recvlogical
 
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 351a420..58c0821 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -365,7 +365,7 @@ typedef struct
 {
 	PGconn	   *bgconn;
 	XLogRecPtr	startptr;
-	char		xlogdir[MAXPGPATH];
+	char		xlog[MAXPGPATH];	/* directory or tarfile depending on mode */
 	char	   *sysidentifier;
 	int			timeline;
 } logstreamer_param;
@@ -383,9 +383,13 @@ LogStreamerMain(logstreamer_param *param)
 	stream.standby_message_timeout = standby_message_timeout;
 	stream.synchronous = false;
 	stream.mark_done = true;
-	stream.basedir = param->xlogdir;
 	stream.partial_suffix = NULL;
 
+	if (format == 'p')
+		stream.walmethod = CreateWalDirectoryMethod(param->xlog);
+	else
+		stream.walmethod = CreateWalTarMethod(param->xlog, compresslevel);
+
 	if (!ReceiveXlogStream(param->bgconn, ))
 
 		/*
@@ -395,6 +399,14 @@ LogStreamerMain(logstreamer_param *param)
 		 */
 		return 1;
 
+	if (!stream.walmethod->finish())
+	{
+		fprintf(stderr,
+_("%s: could not finish writing WAL files: %s\n"),
+progname, strerror(errno));
+		return 1;
+	}
+
 	PQfinish(param->bgconn);
 	return 0;
 }
@@ -445,22 +457,25 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
 		/* Error message already written in GetConnection() */
 		exit(1);
 
-	snprintf(param->xlogdir, sizeof(param->xlogdir), "%s/pg_xlog", basedir);
-
-	/*
-	 * Create pg_xlog/archive_status (and thus pg_xlog) so we can write to
-	 * basedir/pg_xlog as the directory entry in the tar file may arrive
-	 * later.
-	 */
-	snprintf(statusdir, sizeof(statusdir), 

Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-01 Thread Michael Paquier
On Thu, Sep 1, 2016 at 5:13 PM, Magnus Hagander  wrote:
> We don't seem to check for similar issues as the one just found in the
> existing tests though, do we? As in, we don't actually verify that the xlog
> files being streamed are 16Mb? (Or for that matter that the tarfile emitted
> by -Ft is actually a tarfile?) Or am I missing some magic somewhere? :)

No. There is no checks on the WAL file size (you should use the output
of pg_controldata to see how large the segments should be). For the
tar file, the complication is in its untar... Perl provides some ways
to untar things, though the oldest version that we support in the TAP
tests does not offer that :(
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-01 Thread Magnus Hagander
On Thu, Sep 1, 2016 at 9:43 AM, Michael Paquier 
wrote:

> On Thu, Sep 1, 2016 at 4:41 PM, Magnus Hagander 
> wrote:
> > That's definitely not intended - it's supposed to be 16Mb. And it
> actually
> > writes 16Mb to the tarfile, it's the extraction that doesn't see them.
> That
> > also means that if you get more than one member of the tarfile at this
> > point, it will be corrupt. (It's not corrupt in the .tar.gz case,
> clearly my
> > testing of the very last iteration of the patch forgot to doublecheck
> this
> > with both).
> >
> > Oops. Will fix.
>
> If at the same time you could add some tests in pg_basebackup/t, that
> would be great :)
>

That's definitely on my slightly-longer-term plan. But I've successfully
managed to avoid perl long enough now that looking at the code in those
tests is mighty confusing :) So I need a bunch of readup before I can
figure that out. (yes, that means I've managed to avoid our own discussions
about that style tests on this list quite successfully too :P)

We don't seem to check for similar issues as the one just found in the
existing tests though, do we? As in, we don't actually verify that the xlog
files being streamed are 16Mb? (Or for that matter that the tarfile emitted
by -Ft is actually a tarfile?) Or am I missing some magic somewhere? :)

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-01 Thread Michael Paquier
On Thu, Sep 1, 2016 at 4:41 PM, Magnus Hagander  wrote:
> That's definitely not intended - it's supposed to be 16Mb. And it actually
> writes 16Mb to the tarfile, it's the extraction that doesn't see them. That
> also means that if you get more than one member of the tarfile at this
> point, it will be corrupt. (It's not corrupt in the .tar.gz case, clearly my
> testing of the very last iteration of the patch forgot to doublecheck this
> with both).
>
> Oops. Will fix.

If at the same time you could add some tests in pg_basebackup/t, that
would be great :)
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-01 Thread Magnus Hagander
On Thu, Sep 1, 2016 at 9:19 AM, Michael Paquier 
wrote:

> On Thu, Sep 1, 2016 at 6:58 AM, Magnus Hagander 
> wrote:
> > Attached patch adds support for -X stream to work with .tar and .tar.gz
> file
> > formats.
>
> Nice.
>
> > If tar mode is specified, a separate pg_xlog.tar (or .tar.gz) file is
> > created and the data is streamed into it. Regular mode is (should not)
> see
> > any changes in how it works.
>
> Could you use XLOGDIR from xlog_internal.h at least?
>

Yes, that makes sense.


> > The implementation creates a "walmethod" for directory and one for tar,
> > which is basically a set of function pointers that we pass around as
> part of
> > the StreamCtl structure. All calls to modify the files are sent through
> the
> > current method, using the normal open/read/write calls as it is now for
> > directories, and the more complicated method for tar and targz.
>
> That looks pretty cool by looking at the code.
>
> > The tar method doesn't support all things that are required for
> > pg_receivexlog, but I don't think it makes any real sense to have support
> > for pg_receivexlog in tar mode. But it does support all the things that
> > pg_basebackup needs.
>
> Agreed. Your patch is enough complicated.
>
> > Some smaller pieces of functionality like unlinking files on failure and
> > padding files have been moved into the walmethod because they have to be
> > differently implemented (we cannot pre-pad a compressed file -- the size
> > will depend on the compression ration anyway -- for example).
> >
> > AFAICT we never actually documented that -X stream doesn't work with tar
> in
> > the manpage of current versions. Only in the error message. We might
> want to
> > fix that in backbranches.
>
> +1 for documenting that in back-branches.
>
> > In passing this also fixes an XXX comment about not re-lseeking on the
> WAL
> > file all the time -- the walmethod now tracks the current position in the
> > file in a variable.
> >
> > Finally, to make this work, the pring_tar_number() function is now
> exported
> > from port/tar.c along with the other ones already exported from there.
>
> walmethods.c:387:9: warning: comparison of unsigned expression < 0 is
> always false [-Wtautological-compare]
> if (r < 0)
> This patch is generating a warning for me with clang.
>
> I have just tested this feature:
> $ pg_basebackup -X stream -D data -F t
> Which generates that:
> $ ls data
> base.tar pg_xlog.tar
> However after decompressing pg_xlog.tar the segments don't have a correct
> size:
> $ ls -l 0*
> -rw---  1 mpaquier  _guest   3.9M Sep  1 16:12 00010011
>
> Even if that's filling them with zeros during pg_basebackup when a
> segment is done, those should be 16MB to allow users to reuse them
> directly.
>

Huh, that's annoying. I must've broken that when I fixed padding for
compressed files. It forgets the padding when it updates the size of the
tarfile (works fine for compressed files because padding is done at the
end).

That's definitely not intended - it's supposed to be 16Mb. And it actually
writes 16Mb to the tarfile, it's the extraction that doesn't see them. That
also means that if you get more than one member of the tarfile at this
point, it will be corrupt. (It's not corrupt in the .tar.gz case, clearly
my testing of the very last iteration of the patch forgot to doublecheck
this with both).

Oops. Will fix.

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/


Re: [HACKERS] pg_basebackup stream xlog to tar

2016-09-01 Thread Michael Paquier
On Thu, Sep 1, 2016 at 6:58 AM, Magnus Hagander  wrote:
> Attached patch adds support for -X stream to work with .tar and .tar.gz file
> formats.

Nice.

> If tar mode is specified, a separate pg_xlog.tar (or .tar.gz) file is
> created and the data is streamed into it. Regular mode is (should not) see
> any changes in how it works.

Could you use XLOGDIR from xlog_internal.h at least?

> The implementation creates a "walmethod" for directory and one for tar,
> which is basically a set of function pointers that we pass around as part of
> the StreamCtl structure. All calls to modify the files are sent through the
> current method, using the normal open/read/write calls as it is now for
> directories, and the more complicated method for tar and targz.

That looks pretty cool by looking at the code.

> The tar method doesn't support all things that are required for
> pg_receivexlog, but I don't think it makes any real sense to have support
> for pg_receivexlog in tar mode. But it does support all the things that
> pg_basebackup needs.

Agreed. Your patch is enough complicated.

> Some smaller pieces of functionality like unlinking files on failure and
> padding files have been moved into the walmethod because they have to be
> differently implemented (we cannot pre-pad a compressed file -- the size
> will depend on the compression ration anyway -- for example).
>
> AFAICT we never actually documented that -X stream doesn't work with tar in
> the manpage of current versions. Only in the error message. We might want to
> fix that in backbranches.

+1 for documenting that in back-branches.

> In passing this also fixes an XXX comment about not re-lseeking on the WAL
> file all the time -- the walmethod now tracks the current position in the
> file in a variable.
>
> Finally, to make this work, the pring_tar_number() function is now exported
> from port/tar.c along with the other ones already exported from there.

walmethods.c:387:9: warning: comparison of unsigned expression < 0 is
always false [-Wtautological-compare]
if (r < 0)
This patch is generating a warning for me with clang.

I have just tested this feature:
$ pg_basebackup -X stream -D data -F t
Which generates that:
$ ls data
base.tar pg_xlog.tar
However after decompressing pg_xlog.tar the segments don't have a correct size:
$ ls -l 0*
-rw---  1 mpaquier  _guest   3.9M Sep  1 16:12 00010011

Even if that's filling them with zeros during pg_basebackup when a
segment is done, those should be 16MB to allow users to reuse them
directly.
-- 
Michael


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[HACKERS] pg_basebackup stream xlog to tar

2016-08-31 Thread Magnus Hagander
Attached patch adds support for -X stream to work with .tar and .tar.gz
file formats.

If tar mode is specified, a separate pg_xlog.tar (or .tar.gz) file is
created and the data is streamed into it. Regular mode is (should not) see
any changes in how it works.

The implementation creates a "walmethod" for directory and one for tar,
which is basically a set of function pointers that we pass around as part
of the StreamCtl structure. All calls to modify the files are sent through
the current method, using the normal open/read/write calls as it is now for
directories, and the more complicated method for tar and targz.

The tar method doesn't support all things that are required for
pg_receivexlog, but I don't think it makes any real sense to have support
for pg_receivexlog in tar mode. But it does support all the things that
pg_basebackup needs.

Some smaller pieces of functionality like unlinking files on failure and
padding files have been moved into the walmethod because they have to be
differently implemented (we cannot pre-pad a compressed file -- the size
will depend on the compression ration anyway -- for example).

AFAICT we never actually documented that -X stream doesn't work with tar in
the manpage of current versions. Only in the error message. We might want
to fix that in backbranches.

In passing this also fixes an XXX comment about not re-lseeking on the WAL
file all the time -- the walmethod now tracks the current position in the
file in a variable.

Finally, to make this work, the pring_tar_number() function is now exported
from port/tar.c along with the other ones already exported from there.

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/
diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml
index 03615da..981d201 100644
--- a/doc/src/sgml/ref/pg_basebackup.sgml
+++ b/doc/src/sgml/ref/pg_basebackup.sgml
@@ -180,7 +180,8 @@ PostgreSQL documentation
 target directory, the tar contents will be written to
 standard output, suitable for piping to for example
 gzip. This is only possible if
-the cluster has no additional tablespaces.
+the cluster has no additional tablespaces and transaction
+log streaming is not used.


  
@@ -323,6 +324,10 @@ PostgreSQL documentation
  If the log has been rotated when it's time to transfer it, the
  backup will fail and be unusable.

+   
+The transaction log files will be written to
+ the base.tar file.
+   
   
  
 
@@ -339,6 +344,9 @@ PostgreSQL documentation
  client can keep up with transaction log received, using this mode
  requires no extra transaction logs to be saved on the master.

+   The transactionn log files are written to a separate file
+called pg_xlog.tar.
+   
   
  
 
diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile
index fa1ce8b..52ac9e9 100644
--- a/src/bin/pg_basebackup/Makefile
+++ b/src/bin/pg_basebackup/Makefile
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
 override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
 LDFLAGS += -L$(top_builddir)/src/fe_utils -lpgfeutils -lpq
 
-OBJS=receivelog.o streamutil.o $(WIN32RES)
+OBJS=receivelog.o streamutil.o walmethods.o $(WIN32RES)
 
 all: pg_basebackup pg_receivexlog pg_recvlogical
 
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 351a420..58c0821 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -365,7 +365,7 @@ typedef struct
 {
 	PGconn	   *bgconn;
 	XLogRecPtr	startptr;
-	char		xlogdir[MAXPGPATH];
+	char		xlog[MAXPGPATH];	/* directory or tarfile depending on mode */
 	char	   *sysidentifier;
 	int			timeline;
 } logstreamer_param;
@@ -383,9 +383,13 @@ LogStreamerMain(logstreamer_param *param)
 	stream.standby_message_timeout = standby_message_timeout;
 	stream.synchronous = false;
 	stream.mark_done = true;
-	stream.basedir = param->xlogdir;
 	stream.partial_suffix = NULL;
 
+	if (format == 'p')
+		stream.walmethod = CreateWalDirectoryMethod(param->xlog);
+	else
+		stream.walmethod = CreateWalTarMethod(param->xlog, compresslevel);
+
 	if (!ReceiveXlogStream(param->bgconn, ))
 
 		/*
@@ -395,6 +399,14 @@ LogStreamerMain(logstreamer_param *param)
 		 */
 		return 1;
 
+	if (!stream.walmethod->finish())
+	{
+		fprintf(stderr,
+_("%s: could not finish writing WAL files: %s\n"),
+progname, strerror(errno));
+		return 1;
+	}
+
 	PQfinish(param->bgconn);
 	return 0;
 }
@@ -445,22 +457,25 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier)
 		/* Error message already written in GetConnection() */
 		exit(1);
 
-	snprintf(param->xlogdir, sizeof(param->xlogdir), "%s/pg_xlog",