This is an automated email from the git hooks/post-receive script. ucko pushed a commit to branch master in repository ncbi-entrez-direct.
commit 5d2c52c50c69fa0ea9b318fb3619ac9fe8b00d73 Author: Aaron M. Ucko <[email protected]> Date: Thu Jan 5 22:12:51 2017 -0500 New upstream version 6.00.20170105+ds --- edirect.pl | 27 +++++- nquire | 2 + xtract.go | 304 ++++++++++++++++++++++++++++++++++++------------------------- 3 files changed, 206 insertions(+), 127 deletions(-) diff --git a/edirect.pl b/edirect.pl index 712fb5e..9bf7d77 100755 --- a/edirect.pl +++ b/edirect.pl @@ -87,7 +87,7 @@ use constant true => 1; # EDirect version number -$version = "5.90"; +$version = "6.00"; # URL address components @@ -127,6 +127,7 @@ sub clearflags { $emaddr = ""; $email = ""; $err = ""; + $extend = -1; $extrafeat = -1; $field = ""; $fields = false; @@ -1748,6 +1749,11 @@ sub esmry { $data = fix_bad_encoding($dbase, $data); + # remove eSummaryResult wrapper + $data =~ s/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g; + $data =~ s/<eSummaryResult>//g; + $data =~ s/<\/eSummaryResult>//g; + print "$data"; } @@ -1862,6 +1868,11 @@ sub esmry { $data = fix_bad_encoding($dbase, $data); + # remove eSummaryResult wrapper + $data =~ s/<!DOCTYPE eSummaryResult PUBLIC/<!DOCTYPE DocumentSummarySet PUBLIC/g; + $data =~ s/<eSummaryResult>//g; + $data =~ s/<\/eSummaryResult>//g; + print "$data"; } @@ -1887,13 +1898,18 @@ Sequence Range -seq_start First sequence position to retrieve -seq_stop Last sequence position to retrieve -strand Strand of DNA to retrieve - -complexity 0 = default, 1 = bioseq, 3 = nuc-prot set Gene Range -chr_start Sequence range from 0-based coordinates -chr_stop in gene docsum GenomicInfoType object +Miscellaneous + + -complexity 0 = default, 1 = bioseq, 3 = nuc-prot set + -extend Extend sequence retrieval in both directions + -extrafeat Bit flag specifying extra features + Format Examples -db -format -mode Report Type @@ -2035,6 +2051,7 @@ sub eftch { "complexity=i" => \$complexity, "chr_start=i" => \$chr_start, "chr_stop=i" => \$chr_stop, + "extend=i" => \$extend, "extrafeat=i" => \$extrafeat, "start=i" => \$min, "stop=i" => \$max, @@ -2322,6 +2339,12 @@ sub eftch { } } + # optionally extend retrieved sequence range in both directions + if ( $extend > 0 ) { + $seq_start -= $extend; + $seq_stop += $extend; + } + if ( $strand ne "" ) { $arg .= "&strand=$strand"; } diff --git a/nquire b/nquire index 6cab866..68777b0 100755 --- a/nquire +++ b/nquire @@ -240,6 +240,8 @@ Examples nquire -eutils efetch.fcgi -db pubmed -id 2539356 -rettype medline -retmode text + nquire -eutils esummary.fcgi -db pubmed -id 2539356 -version 2.0 + nquire -url "https://eutils.ncbi.nlm.nih.gov/entrez/eutils" elink.fcgi \\ -dbfrom protein -db protein -cmd neighbor -linkname protein_protein -id NP_476532.1 diff --git a/xtract.go b/xtract.go index ee09786..c585133 100644 --- a/xtract.go +++ b/xtract.go @@ -63,7 +63,7 @@ import ( // VERSION AND HELP MESSAGE TEXT -const xtractVersion = "5.90" +const xtractVersion = "6.00" const xtractHelp = ` Overview @@ -141,9 +141,6 @@ Element Selection -element Print all items that match tag name -first Only print value of first item -last Only print value of last item - -encode URL-encode <, >, &, ", and ' characters - -upper Convert text to upper-case - -lower Convert text to lower-case -NAME Record value in named variable -element Constructs @@ -156,9 +153,14 @@ Element Selection Object Count "#Author" Item Length "%Title" Element Depth "^PMID" + Variable "&NAME" + +Special -element Operations + Parent Index "+" XML Subtree "*" - Variable "&NAME" + Children "$" + Attributes "@" Numeric Processing @@ -173,6 +175,13 @@ Numeric Processing -avg Average -dev Deviation +String Processing + + -encode URL-encode <, >, &, ", and ' characters + -upper Convert text to upper-case + -lower Convert text to lower-case + -title Capitalize initial letters of words + Phrase Processing -terms Partition phrase at spaces @@ -193,7 +202,7 @@ Command Generator -insd Argument Order Descriptors INSDSeq_sequence INSDSeq_definition INSDSeq_division - Flags complete or partial [optional] + Flags [complete|partial] Feature(s) CDS,mRNA Qualifiers INSDFeature_key "#INSDInterval" gene product @@ -212,7 +221,7 @@ Modification -filter Object [retain|remove|encode|decode|shrink] - [content|cdata|comment|object|attributes] + [content|cdata|comment|object|attributes|container] Validation @@ -423,34 +432,34 @@ Peptide Sequences xtract -insd complete mat_peptide "%peptide" product peptide | grep -i conotoxin | sort -t $'\t' -u -k 2,2n | head -n 8 - ADB43131.1 15 conotoxin Cal 1b LCCKRHHGCHPCGRT - AIC77099.1 16 conotoxin Im1.2 GCCSHPACNVNNPHIC - AIC77105.1 17 conotoxin Lt1.4 GCCSHPACDVNNPDICG - AIC77103.1 18 conotoxin Lt1.2 PRCCSNPACNANHAEICG - AIC77083.1 20 conotoxin Bt14.6 KDCTYCMHSSCSMMYEKCRP - AIC77085.1 21 conotoxin Bt14.8 NECDNCMRSFCSMIYEKCRLK - AIC77093.1 22 conotoxin Bt14.16 GDCKPCMHPDCRFNPGRCRPRE - AIC77154.1 23 conotoxin Bt14.19 VREKDCPPHPVPGMHKCVCLKTC + ADB43131.1 15 conotoxin Cal 1b LCCKRHHGCHPCGRT + ADB43128.1 16 conotoxin Cal 5.1 DPAPCCQHPIETCCRR + AIC77105.1 17 conotoxin Lt1.4 GCCSHPACDVNNPDICG + ADB43129.1 18 conotoxin Cal 5.2 MIQRSQCCAVKKNCCHVG + ADD97803.1 20 conotoxin Cal 1.2 AGCCPTIMYKTGACRTNRCR + AIC77085.1 21 conotoxin Bt14.8 NECDNCMRSFCSMIYEKCRLK + ADB43125.1 22 conotoxin Cal 14.2 GCPADCPNTCDSSNKCSPGFPG + AIC77154.1 23 conotoxin Bt14.19 VREKDCPPHPVPGMHKCVCLKTC Chromosome Locations esearch -db gene -query "calmodulin [PFN] AND mammalia [ORGN]" | efetch -format docsum | - xtract -pattern DocumentSummary -MAP "(-)" -MAP MapLocation \ - -element Id Name "&MAP" ScientificName - - 801 CALM1 14q32.11 Homo sapiens - 808 CALM3 19q13.2-q13.3 Homo sapiens - 805 CALM2 2p21 Homo sapiens - 24242 Calm1 6q31-q32 Rattus norvegicus - 12313 Calm1 12 E Mus musculus - 326597 CALM - Bos taurus - 50663 Calm2 6q11-q12 Rattus norvegicus - 24244 Calm3 1q22 Rattus norvegicus - 12315 Calm3 7 9.15 cM Mus musculus - 12314 Calm2 17 E4 Mus musculus - 617095 CALM1 - Bos taurus - 396838 CALM3 6 Sus scrofa + xtract -pattern DocumentSummary \ + -def "-" -element Id Name MapLocation ScientificName + + 801 CALM1 14q32.11 Homo sapiens + 808 CALM3 19q13.32 Homo sapiens + 805 CALM2 2p21 Homo sapiens + 24242 Calm1 6q32 Rattus norvegicus + 12313 Calm1 12 E Mus musculus + 326597 CALM - Bos taurus + 50663 Calm2 6q12 Rattus norvegicus + 24244 Calm3 1q21 Rattus norvegicus + 12315 Calm3 7 9.15 cM Mus musculus + 12314 Calm2 17 E4 Mus musculus + 617095 CALM1 - Bos taurus + 396838 CALM3 6 Sus scrofa ... Gene Regions @@ -671,20 +680,17 @@ Genome Range xtract -pattern DocumentSummary -NAME Name -DESC Description \ -block GenomicInfoType -if ChrLoc -equals Y \ -min ChrStart,ChrStop -element "&NAME" "&DESC" | - sort -k 1,1n | cut -f 2- | + sort -k 1,1n | cut -f 2- | grep -v uncharacterized | between-two-genes ASMT IL3RA - IL3RA interleukin 3 receptor subunit alpha - LOC101928032 uncharacterized LOC101928032 - LOC101928055 uncharacterized LOC101928055 - SLC25A6 solute carrier family 25 member 6 - LOC105373102 uncharacterized LOC105373102 - LINC00106 long intergenic non-protein coding RNA 106 - ASMTL-AS1 ASMTL antisense RNA 1 - ASMTL acetylserotonin O-methyltransferase-like - P2RY8 purinergic receptor P2Y8 - AKAP17A A-kinase anchoring protein 17A - ASMT acetylserotonin O-methyltransferase + IL3RA interleukin 3 receptor subunit alpha + SLC25A6 solute carrier family 25 member 6 + LINC00106 long intergenic non-protein coding RNA 106 + ASMTL-AS1 ASMTL antisense RNA 1 + ASMTL acetylserotonin O-methyltransferase-like + P2RY8 purinergic receptor P2Y8 + AKAP17A A-kinase anchoring protein 17A + ASMT acetylserotonin O-methyltransferase Amino Acid Substitutions @@ -1481,6 +1487,7 @@ const ( CDATATAG COMMENTTAG OBJECTTAG + CONTAINERTAG ISCLOSED ) @@ -1494,6 +1501,7 @@ const ( ENCODE UPPER LOWER + TITLE TERMS WORDS PAIRS @@ -1543,6 +1551,8 @@ const ( VARIABLE VALUE STAR + DOLLAR + ATSIGN COUNT LENGTH DEPTH @@ -1632,6 +1642,7 @@ var argTypeIs = map[string]ArgumentType{ "-encode": EXTRACTION, "-upper": EXTRACTION, "-lower": EXTRACTION, + "-title": EXTRACTION, "-terms": EXTRACTION, "-words": EXTRACTION, "-pairs": EXTRACTION, @@ -1671,6 +1682,7 @@ var opTypeIs = map[string]OpType{ "-encode": ENCODE, "-upper": UPPER, "-lower": LOWER, + "-title": TITLE, "-terms": TERMS, "-words": WORDS, "-pairs": PAIRS, @@ -2637,7 +2649,7 @@ func ParseArguments(args []string, pttrn string) *Block { op := &Operation{Type: status, Value: ""} comm = append(comm, op) status = UNSET - case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TERMS, WORDS, PAIRS, PHRASE: + case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE: case NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC: case TAB, RET, PFX, SFX, SEP, LBL, PFC, DEF: case UNSET: @@ -2699,10 +2711,18 @@ func ParseArguments(args []string, pttrn string) *Block { status = STAR default: } - } else if item == "*" { - status = STAR - } else if item == "+" { - status = INDEX + } else { + switch item { + case "*": + status = STAR + case "+": + status = INDEX + case "$": + status = DOLLAR + case "@": + status = ATSIGN + default: + } } // parse parent/element@attribute construct @@ -2780,7 +2800,7 @@ func ParseArguments(args []string, pttrn string) *Block { switch status { case UNSET: status = nextStatus(str) - case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC: + case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC: for !strings.HasPrefix(str, "-") { // create one operation per argument, even if under a single -element statement op := &Operation{Type: status, Value: str} @@ -3000,7 +3020,7 @@ func (rdr *XMLReader) NextBlock() string { rdr.Remainder = "" if m > 16384 { // previous remainder is larger than reserved section, write and signal need to continue reading - return string(rdr.Buffer[:]), true, false + return string(rdr.Buffer[:m]), true, false } // read next block, append behind copied remainder from previous read @@ -4123,6 +4143,8 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special case "object": // object normally retained which = OBJECTTAG + case "container": + which = CONTAINERTAG default: fmt.Fprintf(os.Stderr, "\nERROR: Unrecognized target '%s' supplied to xtract -filter\n", trget) os.Exit(1) @@ -4138,6 +4160,9 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special case STARTTAG: if name == pttrn { inPattern = true + if which == CONTAINERTAG && what == DOREMOVE { + continue + } } if inPattern && which == OBJECTTAG && what == DOREMOVE { continue @@ -4174,6 +4199,9 @@ func ProcessXMLStream(in *XMLReader, tbls *Tables, args []string, action Special if which == OBJECTTAG && what == DOREMOVE { continue } + if which == CONTAINERTAG && what == DOREMOVE { + continue + } } if inPattern && which == OBJECTTAG && what == DOREMOVE { continue @@ -5274,6 +5302,70 @@ func ProcessHydra(isPipe bool) []string { // COLLECT AND FORMAT REQUESTED XML VALUES +// ParseAttributes is only run if attribute values are requested in element statements +func ParseAttributes(attrb string) []string { + + if attrb == "" { + return nil + } + + attlen := len(attrb) + + // count equal signs + num := 0 + for i := 0; i < attlen; i++ { + if attrb[i] == '=' { + num += 2 + } + } + if num < 1 { + return nil + } + + // allocate array of proper size + arry := make([]string, num) + if arry == nil { + return nil + } + + start := 0 + idx := 0 + itm := 0 + + // place tag and value in successive array slots + for idx < attlen && itm < num { + ch := attrb[idx] + if ch == '=' { + // skip past possible leading blanks + for start < attlen { + ch = attrb[start] + if ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' { + start++ + } else { + break + } + } + // = + arry[itm] = attrb[start:idx] + itm++ + // skip past equal sign and leading double quote + idx += 2 + start = idx + } else if ch == '"' { + // " + arry[itm] = attrb[start:idx] + itm++ + // skip past trailing double quote and (possible) space + idx += 2 + start = idx + } else { + idx++ + } + } + + return arry +} + // ExploreElements returns matching element values to callback func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool, level int, proc func(string, int)) { @@ -5298,70 +5390,6 @@ func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool return } - // parseAttributes is only run if attribute values are requested in element statements - parseAttributes := func(attrb string) []string { - - if attrb == "" { - return nil - } - - attlen := len(attrb) - - // count equal signs - num := 0 - for i := 0; i < attlen; i++ { - if attrb[i] == '=' { - num += 2 - } - } - if num < 1 { - return nil - } - - // allocate array of proper size - arry := make([]string, num) - if arry == nil { - return nil - } - - start := 0 - idx := 0 - itm := 0 - - // place tag and value in successive array slots - for idx < attlen && itm < num { - ch := attrb[idx] - if ch == '=' { - // skip past possible leading blanks - for start < attlen { - ch = attrb[start] - if ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r' || ch == '\f' { - start++ - } else { - break - } - } - // = - arry[itm] = attrb[start:idx] - itm++ - // skip past equal sign and leading double quote - idx += 2 - start = idx - } else if ch == '"' { - // " - arry[itm] = attrb[start:idx] - itm++ - // skip past trailing double quote and (possible) space - idx += 2 - start = idx - } else { - idx++ - } - } - - return arry - } - // wildcard matches any namespace prefix if curr.Name == match || (wildcard && strings.HasPrefix(match, ":") && strings.HasSuffix(curr.Name, match)) || @@ -5374,7 +5402,7 @@ func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool if attrib != "" { if curr.Attributes != "" && curr.Attribs == nil { // parse attributes on-the-fly if queried - curr.Attribs = parseAttributes(curr.Attributes) + curr.Attribs = ParseAttributes(curr.Attributes) } for i := 0; i < len(curr.Attribs)-1; i += 2 { // attributes now parsed into array as [ tag, value, tag, value, tag, value, ... ] @@ -5402,6 +5430,12 @@ func ExploreElements(curr *Node, mask, prnt, match, attrib string, wildcard bool // for XML container object, send empty string to callback to increment count proc("", level) // and continue exploring + + } else if curr.Attributes != "" { + + // for self-closing object, indicate presence by sending empty string to callback + proc("", level) + return } } } @@ -5656,6 +5690,14 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st acc(str) } }) + case TITLE: + exploreElements(func(str string, lvl int) { + if str != "" { + str = strings.ToLower(str) + str = strings.Title(str) + acc(str) + } + }) case VARIABLE: // use value of stored variable val, ok := variables[match] @@ -5750,6 +5792,17 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st if txt != "" { acc(txt) } + case DOLLAR: + for chld := curr.Children; chld != nil; chld = chld.Next { + acc(chld.Name) + } + case ATSIGN: + if curr.Attributes != "" && curr.Attribs == nil { + curr.Attribs = ParseAttributes(curr.Attributes) + } + for i := 0; i < len(curr.Attribs)-1; i += 2 { + acc(curr.Attribs[i]) + } default: } } @@ -5765,7 +5818,7 @@ func ProcessClause(curr *Node, stages []*Step, mask, prev, pfx, sfx, sep, def st between := "" switch status { - case ELEMENT, ENCODE, UPPER, LOWER, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSC: + case ELEMENT, ENCODE, UPPER, LOWER, TITLE, VALUE, NUM, INC, DEC, ZEROBASED, ONEBASED, UCSC: processElement(func(str string) { if str != "" { ok = true @@ -6099,7 +6152,7 @@ func ProcessInstructions(commands []*Operation, curr *Node, mask, tab, ret strin str := op.Value switch op.Type { - case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC: + case ELEMENT, FIRST, LAST, ENCODE, UPPER, LOWER, TITLE, TERMS, WORDS, PAIRS, PHRASE, NUM, LEN, SUM, MIN, MAX, INC, DEC, SUB, AVG, DEV, ZEROBASED, ONEBASED, UCSC: txt, ok := ProcessClause(curr, op.Stages, mask, tab, pfx, sfx, sep, def, op.Type, index, level, variables) if ok { tab = col @@ -7461,15 +7514,6 @@ func main() { fmt.Fprintf(os.Stderr, "\nERROR: Input data from both stdin and file '%s', mode is '%s'\n", fileName, mode) os.Exit(1) } - - } else if runtime.GOOS != "windows" { - - fromStdin := bool((fi.Mode() & os.ModeCharDevice) == 0) - if !isPipe || !fromStdin { - mode := fi.Mode().String() - fmt.Fprintf(os.Stderr, "\nERROR: No data supplied to xtract from stdin or file, mode is '%s'\n", mode) - os.Exit(1) - } } // check for -input command after extraction arguments @@ -7665,6 +7709,16 @@ func main() { // CONFIRM INPUT DATA AVAILABILITY AFTER RUNNING COMMAND GENERATORS + if fileName == "" && runtime.GOOS != "windows" { + + fromStdin := bool((fi.Mode() & os.ModeCharDevice) == 0) + if !isPipe || !fromStdin { + mode := fi.Mode().String() + fmt.Fprintf(os.Stderr, "\nERROR: No data supplied to xtract from stdin or file, mode is '%s'\n", mode) + os.Exit(1) + } + } + if testCount < 1 && !usingFile && !isPipe { fmt.Fprintf(os.Stderr, "\nERROR: No XML input data supplied to xtract\n") -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ncbi-entrez-direct.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
