[GitHub] chrismattmann closed pull request #153: This is the code for breaking dratstat

GitBox Tue, 14 Aug 2018 06:52:08 -0700

chrismattmann closed pull request #153: This is the code for breaking dratstat
URL: https://github.com/apache/drat/pull/153


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/distribution/src/main/resources/bin/dratstats.py 
b/distribution/src/main/resources/bin/dratstats.py
index cae5c897..43ff1c86 100644
--- a/distribution/src/main/resources/bin/dratstats.py
+++ b/distribution/src/main/resources/bin/dratstats.py
@@ -268,7 +268,7 @@ def run(repos_list, output_dir):
                        print("\nOODT Started: OK\n")
 
                        print('Adding repository: '+str(rep)+' to Solr')
-                       index_solr(json.dumps([rep]))
+                       # index_solr(json.dumps([rep]))
 
 
                        print("\nRunning DRAT on " + rep["repo"] + " ...\n")
@@ -295,178 +295,9 @@ def run(repos_list, output_dir):
                                        wait_for_job("urn:drat:MimePartitioner")
                                        wait_for_job("urn:drat:RatCodeAudit")
                                        stats['map_end'] = current_datetime()
-
-                                       if retval:
-                                               time.sleep(5)
-                                               stats['reduce_start'] = 
current_datetime()
-                                               
-                                               # Extract data from 
RatAggregate File
-                                               totalNotes = 0
-                                               totalBinaries = 0
-                                               totalArchives = 0
-                                               totalStandards = 0
-                                               totalApache = 0
-                                               totalGenerated = 0
-                                               totalUnknown = 0
-
-                                               rat_dir = 
os.getenv("DRAT_HOME") + "/data/archive/rat"
-
-                                               # Iterate over all RAT log 
files 
-                                               for root, dirs, files in 
os.walk(rat_dir):
-                                                       for filename in files:
-                                                               if 
filename.endswith(".log"):
-                                                                       (notes, 
binaries, archives,standards,apachelicensed,generated,unknown) = 
parseFile(os.path.join(root, filename))
-                                                                       
totalNotes = totalNotes + notes
-                                                                       
totalBinaries = totalBinaries + binaries
-                                                                       
totalArchives = totalArchives + archives
-                                                                       
totalStandards = totalStandards + standards
-                                                                       
totalApache = totalApache + apachelicensed
-                                                                       
totalGenerated = totalGenerated + generated
-                                                                       
totalUnknown = totalUnknown + unknown
-
-                                               stats["license_Notes"] = 
totalNotes
-                                               stats["license_Binaries"] = 
totalBinaries
-                                               stats["license_Archives"] = 
totalArchives
-                                               stats["license_Standards"] = 
totalStandards
-                                               stats["license_Apache"] = 
totalApache
-                                               stats["license_Generated"] = 
totalGenerated
-                                               stats["license_Unknown"] = 
totalUnknown
-
-                                               stats['reduce_end'] = 
current_datetime()
-                                               print "\nDRAT Scan Completed: 
OK\n"
-
-                       time.sleep(5)
-
-                       if retval:
-                               # Copy Data with datetime variables above, 
extract output from RatAggregate file, extract data from Solr Core
-                               printnow ("\nCopying data to Solr and Output 
Directory...\n")
-
-                               # Extract data from Solr
-                               neg_mimetype = ["image", "application", "text", 
"video", "audio", "message", "multipart"]
-                               connection = 
urllib2.urlopen(os.getenv("SOLR_URL") + 
"/drat/select?q=*%3A*&rows=0&facet=true&facet.field=mimetype&wt=python&indent=true")
-                               
-                               response = eval(connection.read())
-                               mime_count = 
response["facet_counts"]["facet_fields"]["mimetype"]
-
-                               for i in range(0, len(mime_count), 2):
-                                       if mime_count[i].split("/")[0] not in 
neg_mimetype:
-                                               stats["mime_" + mime_count[i]] 
= mime_count[i + 1]
-
-
-                               # Count the number of files
-                               stats["files"] = count_num_files(rep["repo"], 
".git")
-
-                               # Write data into Solr
-                               stats["type"] = 'software'
-                               stats_data = []
-                               stats_data.append(stats)
-                               json_data = json.dumps(stats_data)
-                               index_solr(json_data)
-
-                               # Parse RAT logs
-                               rat_logs_dir = os.getenv("DRAT_HOME") + 
"/data/archive/rat/*/*.log"
-                               rat_license = {}
-                               rat_header = {}
-                               for filename in glob.glob(rat_logs_dir):
-                                       #print('=' * 20)
-                                       l = 0
-                                       h = 0
-                                       cur_file = ''
-                                       cur_header = ''
-                                       cur_section = ''
-                                       parsedHeaders = False
-                                       parsedLicenses = False
-                                       
-                                       with open(filename, 'rb') as f:
-                                               printnow('Parsing rat log: 
['+filename+']')
-                                               for line in f:
-                                                       if 
'*****************************************************' in line:
-                                                               l = 0
-                                                               h = 0
-                                                               if cur_section 
== 'licenses':
-                                                                       
parsedLicenses = True
-                                                               if cur_section 
== 'headers':
-                                                                       
parsedHeaders = True
-                                                                       
-                                                               cur_file = ''
-                                                               cur_header = ''
-                                                               cur_section = ''
-                                                       if line.startswith('  
Files with Apache') and not parsedLicenses:
-                                                               cur_section = 
'licenses'
-                                                       if line.startswith(' 
Printing headers for ') and not parsedHeaders:
-                                                               cur_section = 
'headers'
-                                                       if cur_section == 
'licenses':
-                                                               l += 1
-                                                               if l > 4:
-                                                                       line = 
line.strip()
-                                                                       if line:
-                                                                               
print("File: %s with License Line: %s" % (filename, line))
-                                                                               
li = parse_license(line)
-                                                                               
rat_license[li[0]] = li[1]
-                                                                               
print(li)
-                                                       if cur_section == 
'headers':
-                                                               if 
'=====================================================' in line or '== File:' 
in line:
-                                                                       h += 1
-                                                               if h == 2:
-                                                                       
cur_file = line.split("/")[-1].strip()
-                                                               if h == 3:
-                                                                       
cur_header += line
-                                                               if h == 4:
-                                                                       
rat_header[cur_file] = cur_header.split("\n", 1)[1]
-                                                                       
cur_file = ''
-                                                                       
cur_header = ''
-                                                                       h = 1
-                                       if h == 3:
-                                               rat_header[cur_file] = 
cur_header.split("\n", 1)[1]
-                                       parsedHeaders = True
-                                       parsedLicenses = True
-
-                               # Index RAT logs into Solr
-                               connection = 
urllib2.urlopen(os.getenv("SOLR_URL") +
-                                                                               
         
"/drat/select?q=*%3A*&fl=filename%2Cfilelocation%2Cmimetype&wt=python&rows="
-                                                                               
         + str(stats["files"]) +"&indent=true")
-                               response = eval(connection.read())
-                               docs = response['response']['docs']
-                               file_data = []
-                               batch = 100
-                               dc = 0
-                               
-                               for doc in docs:
-                                       fdata = {}
-                                       fdata['id'] = 
os.path.join(doc['filelocation'][0], doc['filename'][0])
-                                       m = md5.new()
-                                       m.update(fdata['id'])
-                                       hashId = m.hexdigest()
-                                       fileId = hashId+"-"+doc['filename'][0]
-
-                                       if fileId not in rat_license:
-                                               print "File: 
"+str(fdata['id'])+": ID: ["+fileId+"] not present in parsed licenses => Likely 
file copying issue. Skipping."
-                                               continue #handle issue with 
DRAT #93
-                                       
-                                       fdata["type"] = 'file'
-                                       fdata['parent'] = rep["repo"]
-                                       fdata['mimetype'] = doc['mimetype'][0]
-                                       fdata['license'] = rat_license[fileId]
-                                       if fileId in rat_header:
-                                               fdata['header'] = 
rat_header[fileId]
-                                       file_data.append(fdata)
-                                       dc += 1
-                                       if dc % batch == 0:
-                                               json_data = 
json.dumps(file_data)
-                                               index_solr(json_data)
-                                               file_data = []
-                               if dc % batch != 0:
-                                       json_data = json.dumps(file_data)
-                                       index_solr(json_data)
-
-                               # Copying data to Output Directory
-                               repos_out = output_dir + "/" + 
normalize_path(rep["repo"])
-                               shutil.copytree(os.getenv("DRAT_HOME") + 
"/data", repos_out)
-                               print("\nData copied to Solr and Output 
Directory: OK\n")
-
-                       else:
-                               print ("\nDRAT Scan Completed: Resulted in 
Error\n")
-
+                                       print ("\nwaiting for Rat 
Aggregator...\n")
+                                       wait_for_job("urn:drat:RatAggregator")
+                       
 
                        time.sleep(5)
                        print ("\nStopping OODT...\n")
diff --git a/nohup.out b/nohup.out
new file mode 100644
index 00000000..bb32b5eb
--- /dev/null
+++ b/nohup.out
@@ -0,0 +1,3 @@
+Started dynamic workflow with id '6453cca6-9f30-11e8-b99d-f5018c8e9233'
+
+Navigate to http://localhost:8080/opsui/ to view the OODT browser and 
http://localhost:8080/solr to view the Solr catalog.
diff --git 
a/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
 
b/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
index 1280e9ce..adebf8b2 100644
--- 
a/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
+++ 
b/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
@@ -156,7 +156,7 @@ the License.
         return this.stat.crawledfiles/this.stat.numOfFiles *100;
       },
       indexingprogress(){
-        return this.stat.indexedfiles/this.stat.numberOfFiles * 100;
+        return this.stat.indexedfiles/this.stat.numOfFiles * 100;
       }
     }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] chrismattmann closed pull request #153: This is the code for breaking dratstat

Reply via email to