Toby Johnson wrote: > > Gotcha. I'll close ticket 26 and open another to add an encoding switch > to ssphys. >
I have written a patch for <http://www.pumacode.org/projects/vss2svn/ticket/44> Since I have technical problems at work with running vss2svn from the source, I cannot test it with my VSS repository. I know this isn't a healthy way to submit patches, but I think it will at least help speed the ticket along, and since I made changes to ssphys it can basically be tested with a regular latin-codepage VSS repository. There is a chance it may not run at all, though :) I've added an encoding flag to the xml formatter of ssphys with a default value of windows-1252, and a matching flag for vss2svn.pl (also with a default). Regarding DoSsCMD() in vss2svn.pl, the character range removed from the output is correct for *most* windows codepages, since all of them re-define just the lower part of windows-1255. Arabic, for example, has rarely used characters in 0x8D and 0x90 <http://en.wikipedia.org/wiki/Windows-1256> -Ori
Index: script/vss2svn.pl =================================================================== --- script/vss2svn.pl (revision 272) +++ script/vss2svn.pl (working copy) @@ -106,7 +106,7 @@ # LoadVssNames ############################################################################### sub LoadVssNames { - &DoSsCmd("info \"$gCfg{vssdatadir}/names.dat\""); + &DoSsCmd("info -e$gCfg{encoding} \"$gCfg{vssdatadir}/names.dat\""); my $xs = XML::Simple->new(KeyAttr => [], ForceArray => [qw(NameCacheEntry Entry)],); @@ -246,7 +246,7 @@ return; } - &DoSsCmd("info \"$filesegment[0]/$filesegment[1]/$filesegment[2]\""); + &DoSsCmd("info -e$gCfg{encoding} \"$filesegment[0]/$filesegment[1]/$filesegment[2]\""); my $xml = $xs->XMLin($gSysOut); my $parentphys; @@ -920,7 +920,7 @@ } if (! -e "$exportdir/$physname.$version" ) { - &DoSsCmd("get -b -v$version --force-overwrite \"$physpath\" $exportdir/$physname"); + &DoSsCmd("get -b -v$version --force-overwrite -e$gCfg{encoding} \"$physpath\" $exportdir/$physname"); } return $exportdir; @@ -944,6 +944,7 @@ VSS Dir : $gCfg{vssdir} Temp Dir : $gCfg{tempdir} Dumpfile : $gCfg{dumpfile} +VSS Encoding : $gCfg{encoding} SSPHYS exe : $gCfg{ssphys} SSPHYS ver : $ssversion @@ -1551,7 +1552,7 @@ ############################################################################### sub Initialize { GetOptions(\%gCfg,'vssdir=s','tempdir=s','dumpfile=s','resume','verbose', - 'debug','timing+','task=s','revtimerange=i', 'ssphys=s'); + 'debug','timing+','task=s','revtimerange=i','ssphys=s','encoding=s'); &GiveHelp("Must specify --vssdir") if !defined($gCfg{vssdir}); $gCfg{tempdir} = './_vss2svn' if !defined($gCfg{tempdir}); @@ -1561,6 +1562,7 @@ # XML output from ssphysout placed here. $gCfg{ssphysout} = "$gCfg{tempdir}/ssphysout"; + $gCfg{encoding} = 'windows-1252' if !defined($gCfg{encoding}); # Commit messages for SVN placed here. $gCfg{svncomment} = "$gCfg{tempdir}/svncomment.tmp.txt"; @@ -1690,6 +1692,8 @@ --verbose : Print more info about the items being processed --debug : Print lots of debugging info. --timing : Show timing information during various steps + --encoding : Specify the encoding used in VSS; + Default is windows-1252 EOTXT exit(1); Index: ssphys/SSPhys/Formatter.cpp =================================================================== --- ssphys/SSPhys/Formatter.cpp (revision 272) +++ ssphys/SSPhys/Formatter.cpp (working copy) @@ -50,11 +50,11 @@ class CXMLFormatter : public CFormatter { public: - CXMLFormatter () + CXMLFormatter (std::string encoding) : m_pCurrentFileNode (NULL) { // patch this line to match your VSS DB's locale - TiXmlDeclaration decl ("1.0", "windows-1252", ""); + TiXmlDeclaration decl ("1.0", encoding, ""); m_Document.InsertEndChild (decl); } ~CXMLFormatter () @@ -301,12 +301,12 @@ } ////////////////////////////////////////////////////////////////////// -std::auto_ptr<CFormatter> CFormatterFactory::MakeFormatter (eStyle style, po::variables_map const& vm) +std::auto_ptr<CFormatter> CFormatterFactory::MakeFormatter (eStyle style, std::string encoding, po::variables_map const& vm) { if (style == eBinary) return std::auto_ptr<CFormatter> (new CBinaryFormatter (/*value*/)); if (style == eXML) - return std::auto_ptr<CFormatter> (new CXMLFormatter ()); + return std::auto_ptr<CFormatter> (new CXMLFormatter (encoding)); if (style == eVSS) return std::auto_ptr<CFormatter> (new CVssFormatter ()); if (style == eDump) @@ -324,7 +324,7 @@ if (style == "binary") return std::auto_ptr<CFormatter> (new CBinaryFormatter (/*value*/)); if (style == "xml") - return std::auto_ptr<CFormatter> (new CXMLFormatter ()); + return std::auto_ptr<CFormatter> (new CXMLFormatter (options["encoding"].as<std::string>())); if (style == "vss") return std::auto_ptr<CFormatter> (new CVssFormatter ()); if (style == "dump") @@ -339,6 +339,8 @@ po::options_description descr ("Formatter options"); descr.add_options () ("style,s", po::value<std::string>()->default_value("XML"), "output style {XML|binary|vss|dump}"); + descr.add_options () + ("encoding,e", po::value<std::string>()->default_value("windows-1252"), "VSS text encoding for the XML output style"); return descr; } Index: ssphys/SSPhys/Formatter.h =================================================================== --- ssphys/SSPhys/Formatter.h (revision 272) +++ ssphys/SSPhys/Formatter.h (working copy) @@ -59,7 +59,7 @@ class CFormatterFactory { public: - static std::auto_ptr<CFormatter> MakeFormatter (eStyle style, po::variables_map const& vm); + static std::auto_ptr<CFormatter> MakeFormatter (eStyle style, std::string encoding, po::variables_map const& vm); static std::auto_ptr<CFormatter> MakeFormatter (po::variables_map const& vm); static po::options_description GetOptionsDescription (); };
_______________________________________________ vss2svn-users mailing list Project homepage: http://www.pumacode.org/projects/vss2svn/ Subscribe/Unsubscribe/Admin: http://lists.pumacode.org/mailman/listinfo/vss2svn-users-lists.pumacode.org Mailing list web interface (with searchable archives): http://dir.gmane.org/gmane.comp.version-control.subversion.vss2svn.user