Hello,
We are loading ORCID 2016 in a V7 instance (Version 07.20.3217-pthreads
for Linux as of Feb 10 2017), we DO NOT want to use the bulk loader,
instead we are providing SPARQL inserts of RDF/XML files via JDBC
connector from Oracle.
Virtuoso is hosted on 8 cores, 32Gb platform.
We successfully inserted 75 633 079 triples until virtuoso.log signals
performances problems on "disk write throughput", is there something
else to optimize in the virtuoso.ini while we are in this "loading"
phase (no SPARQL "read" query from clients at the moment ) ?
We've already done :
- full text indexation has been delayed ( DB.DBA.VT_BATCH_UPDATE (
'DB.DBA.RDF_OBJ', 'ON', 8640 ); )
- MaxCheckpointRemap = 505856 ( it's larger than 25% of total pages)
- UnremapQuota = 0
- DefaultIsolation = 2
- O_DIRECT = 1 (we are on XFS filesystem)
- ColumnStore = 1 (we started from a new, fresh .db,
deleted all previous existing .db, .trx)
Can we do something at transaction level ? We commit each JDBC insert as
short as possible (1 insert-> 1 commit), query is :
"'sparql *DEFINE sql:log-enable 2* INSERT DATA INTO GRAPH '||graphe ||'
{ '|| var_clob_line|| ' }'"
I can see that free memory slowly decrease, and finally the server hang.
Thanks for your help ! (Attached is virtuoso.ini)
Thomas
;
; virtuoso.ini
;
; Configuration file for the OpenLink Virtuoso VDBMS Server
;
; To learn more about this product, or any other product in our
; portfolio, please check out our web site at:
;
; http://virtuoso.openlinksw.com/
;
; or contact us at:
;
; general.informat...@openlinksw.com
;
; If you have any technical questions, please contact our support
; staff at:
;
; technical.supp...@openlinksw.com
;
;
; Database setup
;
[Database]
DatabaseFile =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.db
ErrorLogFile =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.log
LockFile =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.lck
TransactionFile =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso20170309162914.trx
;TransactionFile = /LN_Hupe/virtuoso20151207171442.trx
xa_persistent_file =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso.pxa
ErrorLogLevel = 7
FileExtend = 200
MaxCheckpointRemap = 505856
UnremapQuota = 0
DefaultIsolation = 2
Striping = 0
TempStorage = TempDatabase
[TempDatabase]
DatabaseFile =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso-temp.db
TransactionFile =
/usr/local/virtuoso-opensource/var/lib/virtuoso/db/virtuoso-temp.trx
MaxCheckpointRemap = 2000
Striping = 0
;
; Server parameters
;
[Parameters]
ServerPort = 1111
LiteMode = 0
DisableUnixSocket = 1
DisableTcpSocket = 0
;SSLServerPort = 2111
;SSLCertificate = cert.pem
;SSLPrivateKey = pk.pem
;X509ClientVerify = 0
;X509ClientVerifyDepth = 0
;X509ClientVerifyCAFile = ca.pem
MaxClientConnections = 10
CheckpointInterval = 20
O_DIRECT = 1
CaseMode = 2
MaxStaticCursorRows = 5000
CheckpointAuditTrail = 1
AllowOSCalls = 0
SchedulerInterval = 10
;DirsAllowed = .,
/usr/local/virtuoso-opensource/share/virtuoso/vad, /home/devel, /LN_Hupe,
/LN_Hupe/dumpviaf
;production
DirsAllowed = .,
/usr/local/virtuoso-opensource/share/virtuoso/vad, /home/devel/logs
ThreadCleanupInterval = 1
ThreadThreshold = 10
ResourcesCleanupInterval = 1
FreeTextBatchSize = 100000
SingleCPU = 0
VADInstallDir = /usr/local/virtuoso-opensource/share/virtuoso/vad/
PrefixResultNames = 0
RdfFreeTextRulesSize = 100
IndexTreeMaps = 256
MaxMemPoolSize = 200000000
PrefixResultNames = 0
MacSpotlight = 0
IndexTreeMaps = 64
MaxQueryMem = 3G ; memory allocated to query processor
VectorSize = 1000 ; initial parallel query vector (array of query
operations) size
MaxVectorSize = 1000000 ; query vector size threshold.
AdjustVectorSize = 0
ThreadsPerQuery = 8
AsyncQueueMaxThreads = 10
ColumnStore = 1
;server side query logging
;At run time, this may be enabled or disabled with prof_enable (), overriding
the specification of the ini file
;QueryLog = virtuoso.qrl
;;
;; When running with large data sets, one should configure the Virtuoso
;; process to use between 2/3 to 3/5 of free system memory and to stripe
;; storage on all available disks.
;;
;; Uncomment next two lines if there is 2 GB system memory free
;NumberOfBuffers = 170000
;MaxDirtyBuffers = 130000
;; Uncomment next two lines if there is 4 GB system memory free
;NumberOfBuffers = 340000
; MaxDirtyBuffers = 250000
;; Uncomment next two lines if there is 8 GB system memory free
;NumberOfBuffers = 680000
;MaxDirtyBuffers = 500000
;; Uncomment next two lines if there is 16 GB system memory free
;NumberOfBuffers = 1360000
;MaxDirtyBuffers = 1000000
;; Uncomment next two lines if there is 32 GB system memory free
NumberOfBuffers = 2720000
MaxDirtyBuffers = 2000000
;; Uncomment next two lines if there is 48 GB system memory free
;NumberOfBuffers = 4000000
;MaxDirtyBuffers = 3000000
;; Uncomment next two lines if there is 64 GB system memory free
;NumberOfBuffers = 5450000
;MaxDirtyBuffers = 4000000
;;
;; Note the default settings will take very little memory
;; but will not result in very good performance
;;
;NumberOfBuffers = 10000
;MaxDirtyBuffers = 6000
[HTTPServer]
ServerPort = 8890
ServerRoot =
/usr/local/virtuoso-opensource/var/lib/virtuoso/vsp
MaxClientConnections = 10
DavRoot = DAV
EnabledDavVSP = 0
HTTPProxyEnabled = 0
TempASPXDir = 0
DefaultMailServer = localhost:25
ServerThreads = 10
MaxKeepAlives = 10
KeepAliveTimeout = 10
MaxCachedProxyConnections = 10
ProxyConnectionCacheTimeout = 15
HTTPThreadSize = 280000
HttpPrintWarningsInOutput = 0
Charset = UTF-8
HTTPLogFile = /home/devel/logs/http09032017.log
MaintenancePage = atomic.html
EnabledGzipContent = 1
[AutoRepair]
BadParentLinks = 0
[Client]
SQL_PREFETCH_ROWS = 100
SQL_PREFETCH_BYTES = 16000
SQL_QUERY_TIMEOUT = 0
SQL_TXN_TIMEOUT = 0
;SQL_NO_CHAR_C_ESCAPE = 1
;SQL_UTF8_EXECS = 0
;SQL_NO_SYSTEM_TABLES = 0
;SQL_BINARY_TIMESTAMP = 1
;SQL_ENCRYPTION_ON_PASSWORD = -1
[VDB]
ArrayOptimization = 0
NumArrayParameters = 10
VDBDisconnectTimeout = 1000
KeepConnectionOnFixedThread = 0
[Replication]
ServerName = db-TULIPEDEV
ServerEnable = 1
QueueMax = 50000
;
; Striping setup
;
; These parameters have only effect when Striping is set to 1 in the
; [Database] section, in which case the DatabaseFile parameter is ignored.
;
; With striping, the database is spawned across multiple segments
; where each segment can have multiple stripes.
;
; Format of the lines below:
; Segment<number> = <size>, <stripe file name> [, <stripe file name> .. ]
;
; <number> must be ordered from 1 up.
;
; The <size> is the total size of the segment which is equally divided
; across all stripes forming the segment. Its specification can be in
; gigabytes (g), megabytes (m), kilobytes (k) or in database blocks
; (b, the default)
;
; Note that the segment size must be a multiple of the database page size
; which is currently 8k. Also, the segment size must be divisible by the
; number of stripe files forming the segment.
;
; The example below creates a 200 meg database striped on two segments
; with two stripes of 50 meg and one of 100 meg.
;
; You can always add more segments to the configuration, but once
; added, do not change the setup.
;
[Striping]
Segment1 = 100M, db-seg1-1.db, db-seg1-2.db
Segment2 = 100M, db-seg2-1.db
;...
;[TempStriping]
;Segment1 = 100M, db-seg1-1.db, db-seg1-2.db
;Segment2 = 100M, db-seg2-1.db
;...
;[Ucms]
;UcmPath = <path>
;Ucm1 = <file>
;Ucm2 = <file>
;...
[Zero Config]
ServerName = virtuoso (TULIPEDEV)
;ServerDSN = ZDSN
;SSLServerName =
;SSLServerDSN =
[Mono]
;MONO_TRACE = Off
;MONO_PATH = <path_here>
;MONO_ROOT = <path_here>
;MONO_CFG_DIR = <path_here>
;virtclr.dll =
[URIQA]
DynamicLocal = 0
DefaultHost = localhost:8890
[SPARQL]
ExternalQuerySource = 1
ExternalXsltSource = 1
;DefaultGraph = http://localhost:8890/dataspace
;ImmutableGraphs = http://localhost:8890/dataspace
ResultSetMaxRows = 100000
MaxQueryCostEstimationTime = 400 ; in seconds
MaxQueryExecutionTime = 40 ; in seconds
DefaultQuery = select ?p,?o from
<http://hub.abes.fr/springer/ebooksLN2011/SPR_EBOOK_ALL_25DEC> where
{<http://hub.abes.fr/springerB/ebook/0-387-97089-4/w> ?p ?o} limit 50
DeferInferenceRulesInit = 1 ; controls inference rules loading
;PingService = http://rpc.pingthesemanticweb.com/
[Plugins]
;Load4 = plain, im
;Load5 = plain, wbxml2
;Load6 = plain, hslookup
;Load7 = attach, libphp5.so
;Load8 = Hosting, hosting_php.so
;Load9 = Hosting,hosting_perl.so
;Load10 = Hosting,hosting_python.so
;Load11 = Hosting,hosting_ruby.so
;Load12 = msdtc,msdtc_sample
------------------------------------------------------------------------------
Announcing the Oxford Dictionaries API! The API offers world-renowned
dictionary content that is easy and intuitive to access. Sign up for an
account today to start using our lexical data to power your apps and
projects. Get started today and enter our developer competition.
http://sdm.link/oxford
_______________________________________________
Virtuoso-users mailing list
Virtuoso-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/virtuoso-users