core.git: bin/crashreportScraper.py

2024-03-15 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |   16 +++-
 1 file changed, 7 insertions(+), 9 deletions(-)

New commits:
commit 8ca321f9e52a43acf36a8f8184f81240bd946653
Author: Xisco Fauli 
AuthorDate: Thu Mar 14 22:49:19 2024 +0100
Commit: Xisco Fauli 
CommitDate: Fri Mar 15 12:58:57 2024 +0100

crashreportScraper: fix version comparison

And remove version column, it's not very relevant anyway

Change-Id: I9101d5f63aec237cdcbfc6eb3759714cca7c5328
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/164849
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 78b2f6cb5eb2..6ce91bcba189 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -18,6 +18,7 @@ import os
 import math
 from datetime import datetime
 import urllib.parse
+import re
 
 def convert_str_to_date(value):
 value = value.replace('.', '')
@@ -73,7 +74,7 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 
 reports = soup.find("div", {"id": "reports"}).tbody
 ID, currentID = "", ""
-version, currentVersion = "", ""
+version, currentVersion = 0, 0
 OS, currentOS = "", ""
 
 tr_list = reports.find_all("tr")
@@ -81,7 +82,7 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 td_list = tr.find_all("td")
 
 currentID = td_list[0].a.text.strip()
-currentVersion = td_list[2].text.strip().split(': ')[1]
+currentVersion = int(''.join(re.findall("\d+", td_list[2].text)))
 currentOS = td_list[3].text.strip()
 
 # get most recent version
@@ -91,16 +92,13 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 ID = currentID
 OS = currentOS
 
-if not version:
-version = currentVersion
-
 if not ID:
 ID = currentID
 
 if not OS:
 OS = currentOS
 
-return count, ID, version, OS
+return count, ID, OS
 
 def parse_details_and_get_info(url, gitRepo):
 try:
@@ -187,7 +185,7 @@ if __name__ == '__main__':
 with open(fileName, "a") as f:
 if bInsertHeader:
 line = '   '.join(["Name", "Ratio", "Count", "First report", "Last 
Report",
-"ID", "Version", "Reason", "OS", "Stack", "Code Lines", "Last 
4 UNO Commands", '
'])
+"ID", "Reason", "OS", "Stack", "Code Lines", "Last 4 UNO 
Commands", '
'])
 f.write(line)
 f.flush()
 
@@ -195,13 +193,13 @@ if __name__ == '__main__':
 if k not in crashesInFile:
 print("Parsing " + k)
 try:
-crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
+crashCount, crashID, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
 
"https://crashreport.libreoffice.org/stats/signature/; + urllib.parse.quote(k))
 crashReason, crashStack, codeLine, unoCommands = 
parse_details_and_get_info(
 
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, 
args.repository)
 ratio = round(crashCount / ((lDate[2] - lDate[1]).days + 
1), 2)
 line = '   '.join([k, str(ratio), str(crashCount) , 
lDate[1].strftime('%y/%m/%d'), lDate[2].strftime('%y/%m/%d'),
-crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, unoCommands, '
'])
+crashID, crashReason, crashOS, crashStack, 
codeLine, unoCommands, '
'])
 f.write(line)
 f.flush()
 except (requests.exceptions.Timeout, AttributeError):


core.git: bin/crashreportScraper.py

2024-02-13 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |   13 -
 1 file changed, 13 deletions(-)

New commits:
commit 40bab7e27f13becb45055c9cfcd52aebf6128536
Author: Xisco Fauli 
AuthorDate: Mon Feb 12 11:11:17 2024 +0100
Commit: Xisco Fauli 
CommitDate: Tue Feb 13 10:33:42 2024 +0100

crashreportScraper: no need to go to the last page anymore

JJ changed the way the reports are sorted to show
the most recent ones on page 1

Change-Id: I59d566ff45fd8a75263b70a1e436e4263422e93b
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/163239
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 876570d3a028..78b2f6cb5eb2 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -71,19 +71,6 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 td_list = tr.find_all("td")
 count += int(td_list[1].text.strip())
 
-# There are 50 reports on each page.
-# Go to the last page based on the total count to get a recent report
-last_page = math.ceil( count / 50 )
-
-if last_page > 1:
-url = url + "?page=" + str(last_page)
-try:
-html_text = requests.get(url, timeout=200).text
-soup = BeautifulSoup(html_text, 'html.parser')
-except requests.exceptions.Timeout:
-print("Timeout")
-raise
-
 reports = soup.find("div", {"id": "reports"}).tbody
 ID, currentID = "", ""
 version, currentVersion = "", ""


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2023-09-05 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

New commits:
commit 55eef24a55afb3708f2e02abf65b6934bed7f3de
Author: Xisco Fauli 
AuthorDate: Tue Sep 5 12:23:20 2023 +0200
Commit: Xisco Fauli 
CommitDate: Tue Sep 5 16:53:44 2023 +0200

crashreportScraper: Add ratio column

it makes sense to sort the sheet by this column

Change-Id: I05603dac80289605c18e86fbf27c3d899f9862c2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/156562
Tested-by: Xisco Fauli 
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 54477f6c4c28..876570d3a028 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -28,9 +28,7 @@ def convert_str_to_date(value):
 value = value.replace('Sept', 'Sep')
 # reset the time leaving the date
 value = ", ".join(value.split(", ")[:-1])
-dtDate = datetime.strptime(value, '%b %d, %Y')
-
-return dtDate.strftime('%y/%m/%d')
+return datetime.strptime(value, '%b %d, %Y')
 
 def parse_version_url(url):
 crashReports = {}
@@ -201,7 +199,7 @@ if __name__ == '__main__':
 
 with open(fileName, "a") as f:
 if bInsertHeader:
-line = '\t'.join(["Name", "Count", "First report", "Last Report",
+line = '\t'.join(["Name", "Ratio", "Count", "First report", "Last 
Report",
 "ID", "Version", "Reason", "OS", "Stack", "Code Lines", "Last 
4 UNO Commands", '\n'])
 f.write(line)
 f.flush()
@@ -214,7 +212,8 @@ if __name__ == '__main__':
 
"https://crashreport.libreoffice.org/stats/signature/; + urllib.parse.quote(k))
 crashReason, crashStack, codeLine, unoCommands = 
parse_details_and_get_info(
 
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, 
args.repository)
-line = '\t'.join([k, str(crashCount), lDate[1], lDate[2],
+ratio = round(crashCount / ((lDate[2] - lDate[1]).days + 
1), 2)
+line = '\t'.join([k, str(ratio), str(crashCount) , 
lDate[1].strftime('%y/%m/%d'), lDate[2].strftime('%y/%m/%d'),
 crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, unoCommands, '\n'])
 f.write(line)
 f.flush()


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-10-21 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |   15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

New commits:
commit da39ae2470edac28a65c3a01ddb49a810bec
Author: Xisco Fauli 
AuthorDate: Thu Oct 20 13:18:52 2022 +0200
Commit: Xisco Fauli 
CommitDate: Fri Oct 21 08:52:26 2022 +0200

crashreportScraper: Also get info about the last 4 uno commands

it can be useful

Change-Id: I8e709775814922c2623350de1de2fe647d7deadd
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141556
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 7d57ab1f747b..54477f6c4c28 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -162,7 +162,14 @@ def parse_details_and_get_info(url, gitRepo):
 #multiline
 codeLine = "\"" + codeLine + "\""
 
-return reason, stack, codeLine
+metadata = soup.find("div", {"id": "metadata"}).tbody
+tr_list = metadata.find_all("tr")
+unoCommands = ""
+for tr in tr_list:
+if tr.th.text.strip() == "Last-4-Uno-Commands":
+unoCommands = tr.td.text.strip()
+
+return reason, stack, codeLine, unoCommands
 
 
 if __name__ == '__main__':
@@ -195,7 +202,7 @@ if __name__ == '__main__':
 with open(fileName, "a") as f:
 if bInsertHeader:
 line = '\t'.join(["Name", "Count", "First report", "Last Report",
-"ID", "Version", "Reason", "OS", "Stack", "Code Lines" '\n'])
+"ID", "Version", "Reason", "OS", "Stack", "Code Lines", "Last 
4 UNO Commands", '\n'])
 f.write(line)
 f.flush()
 
@@ -205,10 +212,10 @@ if __name__ == '__main__':
 try:
 crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
 
"https://crashreport.libreoffice.org/stats/signature/; + urllib.parse.quote(k))
-crashReason, crashStack, codeLine = 
parse_details_and_get_info(
+crashReason, crashStack, codeLine, unoCommands = 
parse_details_and_get_info(
 
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, 
args.repository)
 line = '\t'.join([k, str(crashCount), lDate[1], lDate[2],
-crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
+crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, unoCommands, '\n'])
 f.write(line)
 f.flush()
 except (requests.exceptions.Timeout, AttributeError):


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-08-09 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

New commits:
commit a9a6ec313be4787f99cece793f069f61a8ee73b2
Author: Xisco Fauli 
AuthorDate: Tue Aug 9 11:50:24 2022 +0200
Commit: Xisco Fauli 
CommitDate: Tue Aug 9 14:05:40 2022 +0200

crashreportScraper: replace quation marks from code

otherwise, the csv is imported incorrectly

Change-Id: I5451516b2fdc80a96a4fde83a2c72d701bfd995a
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138009
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index b0ab5f5bd5f9..7d57ab1f747b 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -149,7 +149,7 @@ def parse_details_and_get_info(url, gitRepo):
 lines = f.readlines()
 for index, line in enumerate(lines):
 if index + 1 == int(codeNumber):
-codeLine += line.strip() + "\n"
+codeLine += line.strip().replace("\"", "'") + "\n"
 except FileNotFoundError:
 codeLine += "\n"
 continue


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-08-09 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

New commits:
commit 452311610d38d7e147b2ec2345b76c1b29646159
Author: Xisco Fauli 
AuthorDate: Mon Aug 8 19:04:04 2022 +0200
Commit: Xisco Fauli 
CommitDate: Tue Aug 9 09:51:33 2022 +0200

crashreportScraper: continue when os_tab is not found

Change-Id: I293ad70ad2776bfa6ea3e075ba69428963301433
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/137994
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index cad7feead645..b0ab5f5bd5f9 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -62,7 +62,12 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 raise
 
 count = 0
-os_tab = soup.find("table", {"id": "os_tab"}).tbody
+try:
+os_tab = soup.find("table", {"id": "os_tab"}).tbody
+except AttributeError:
+print("os_tab not found")
+raise
+
 tr_list = os_tab.find_all("tr")
 for tr in tr_list:
 td_list = tr.find_all("td")
@@ -206,5 +211,5 @@ if __name__ == '__main__':
 crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
 f.write(line)
 f.flush()
-except requests.exceptions.Timeout:
+except (requests.exceptions.Timeout, AttributeError):
 continue


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-06-21 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

New commits:
commit 6b15decd0acc2c7c0622baba65b9d4c019a4183a
Author: Xisco Fauli 
AuthorDate: Tue Jun 21 15:40:33 2022 +0200
Commit: Xisco Fauli 
CommitDate: Tue Jun 21 23:51:50 2022 +0200

crashreportScraper: encode the url

Change-Id: I1f738f017966a6fe48dd9e2cf36dbdf5f50c0cef
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/136229
Tested-by: Xisco Fauli 
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 513f5ec7b75d..cad7feead645 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -17,6 +17,7 @@ import sys
 import os
 import math
 from datetime import datetime
+import urllib.parse
 
 def convert_str_to_date(value):
 value = value.replace('.', '')
@@ -194,11 +195,11 @@ if __name__ == '__main__':
 f.flush()
 
 for k, lDate in crashes.items():
-if len(k) < 254 and k not in crashesInFile and '`' not in k:
+if k not in crashesInFile:
 print("Parsing " + k)
 try:
 crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
-
"https://crashreport.libreoffice.org/stats/signature/; + k)
+
"https://crashreport.libreoffice.org/stats/signature/; + urllib.parse.quote(k))
 crashReason, crashStack, codeLine = 
parse_details_and_get_info(
 
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, 
args.repository)
 line = '\t'.join([k, str(crashCount), lDate[1], lDate[2],


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-06-16 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |   20 
 1 file changed, 12 insertions(+), 8 deletions(-)

New commits:
commit 970f03cb9ed68d249fe04cff7d4aa15b0f2c6c35
Author: Xisco Fauli 
AuthorDate: Thu Jun 16 13:16:13 2022 +0200
Commit: Xisco Fauli 
CommitDate: Thu Jun 16 13:26:36 2022 +0200

crashreportScraper: use argparse to parse the arguments

Change-Id: Idc1d32683c5113042fe4e7ec97357b6d76c5217e
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135973
Tested-by: Xisco Fauli 
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index aec3e5e9cffb..513f5ec7b75d 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -8,8 +8,9 @@
 
 # Use this script to retrieve information from 
https://crashreport.libreoffice.org
 # about a specific version of LibreOffice
-# Usage sample: ./crashreportScraper.py 7.2.0.4
+# Usage sample: ./crashreportScraper.py --version 7.2.0.4 --repository 
/path/to/libreoffice/repository/
 
+import argparse
 import requests
 from bs4 import BeautifulSoup
 import sys
@@ -160,17 +161,20 @@ def parse_details_and_get_info(url, gitRepo):
 
 if __name__ == '__main__':
 
-version = sys.argv[1]
+parser = argparse.ArgumentParser()
 
-crashes = parse_version_url(
-"https://crashreport.libreoffice.org/stats/version/; + version + 
"?limit=1000=30")
+parser.add_argument('--version', action='store', dest="version", 
required=True)
+parser.add_argument('--repository', action="store", dest="repository", 
required=True)
+
+args = parser.parse_args()
 
-gitRepo = os.path.dirname(os.path.realpath(__file__)) + "/../"
+crashes = parse_version_url(
+"https://crashreport.libreoffice.org/stats/version/; + 
args.version + "?limit=1000=30")
 
-print(str(len(crashes)) + " crash reports in version " + version)
+print(str(len(crashes)) + " crash reports in version " + args.version)
 
 crashesInFile = []
-fileName = "crashes_" + version.replace(".", "_") + ".csv"
+fileName = "crashes_" + args.version.replace(".", "_") + ".csv"
 print("Using " + fileName)
 
 bInsertHeader = False
@@ -196,7 +200,7 @@ if __name__ == '__main__':
 crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
 
"https://crashreport.libreoffice.org/stats/signature/; + k)
 crashReason, crashStack, codeLine = 
parse_details_and_get_info(
-
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, gitRepo)
+
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, 
args.repository)
 line = '\t'.join([k, str(crashCount), lDate[1], lDate[2],
 crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
 f.write(line)


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-06-02 Thread tagezi (via logerrit)
 bin/crashreportScraper.py |   17 ++---
 1 file changed, 6 insertions(+), 11 deletions(-)

New commits:
commit 3cb921dece44e3e289fc73a64399c1a6c618259c
Author: tagezi 
AuthorDate: Fri May 27 21:25:13 2022 +0300
Commit: Xisco Fauli 
CommitDate: Thu Jun 2 15:52:20 2022 +0200

crashreportScraper: Removed time parsing, now it just resets.

Change-Id: I39465cdbc14e28556760a0c1feab22d8998e4d16
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135050
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 1735aa3052b5..aec3e5e9cffb 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -24,15 +24,11 @@ def convert_str_to_date(value):
 value = value.replace('June', 'Jun')
 value = value.replace('July', 'Jul')
 value = value.replace('Sept', 'Sep')
-value = value.replace('noon', '12:00 pm')
+# reset the time leaving the date
+value = ", ".join(value.split(", ")[:-1])
+dtDate = datetime.strptime(value, '%b %d, %Y')
 
-if ':' not in value:
-if 'am' in value:
-value = value.replace(' am', ':00 am')
-elif 'pm' in value:
-value = value.replace(' pm', ':00 pm')
-
-return datetime.strptime(value, '%b %d, %Y, %H:%M %p')
+return dtDate.strftime('%y/%m/%d')
 
 def parse_version_url(url):
 crashReports = {}
@@ -193,7 +189,7 @@ if __name__ == '__main__':
 f.write(line)
 f.flush()
 
-for k, v in crashes.items():
+for k, lDate in crashes.items():
 if len(k) < 254 and k not in crashesInFile and '`' not in k:
 print("Parsing " + k)
 try:
@@ -201,10 +197,9 @@ if __name__ == '__main__':
 
"https://crashreport.libreoffice.org/stats/signature/; + k)
 crashReason, crashStack, codeLine = 
parse_details_and_get_info(
 
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, gitRepo)
-line = '\t'.join([k, str(crashCount), 
v[1].strftime('%y/%m/%d'), v[2].strftime('%y/%m/%d'),
+line = '\t'.join([k, str(crashCount), lDate[1], lDate[2],
 crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
 f.write(line)
 f.flush()
 except requests.exceptions.Timeout:
 continue
-


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-05-31 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |   56 ++
 1 file changed, 37 insertions(+), 19 deletions(-)

New commits:
commit c5ca8b5bffc3b08f74817a3dee2c314b7ced
Author: Xisco Fauli 
AuthorDate: Tue May 31 11:08:21 2022 +0200
Commit: Xisco Fauli 
CommitDate: Tue May 31 15:43:36 2022 +0200

crashreportScraper: use timeout in requests

Change-Id: I03f8740fc124c11d250368034bf6e14239df5abe
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135180
Tested-by: Xisco Fauli 
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index aedb7a666c06..1735aa3052b5 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -36,8 +36,13 @@ def convert_str_to_date(value):
 
 def parse_version_url(url):
 crashReports = {}
-html_text = requests.get(url).text
-soup = BeautifulSoup(html_text, 'html.parser')
+
+try:
+html_text = requests.get(url, timeout=200).text
+soup = BeautifulSoup(html_text, 'html.parser')
+except requests.exceptions.Timeout:
+print("Timeout requesting " + url)
+sys.exit(1)
 
 table = soup.find("table", {"id": "data-table"}).tbody
 for tr in table.find_all("tr"):
@@ -51,8 +56,12 @@ def parse_version_url(url):
 return crashReports
 
 def parse_reports_and_get_most_recent_report_from_last_page(url):
-html_text = requests.get(url).text
-soup = BeautifulSoup(html_text, 'html.parser')
+try:
+html_text = requests.get(url, timeout=200).text
+soup = BeautifulSoup(html_text, 'html.parser')
+except requests.exceptions.Timeout:
+print("Timeout")
+raise
 
 count = 0
 os_tab = soup.find("table", {"id": "os_tab"}).tbody
@@ -67,8 +76,12 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 
 if last_page > 1:
 url = url + "?page=" + str(last_page)
-html_text = requests.get(url).text
-soup = BeautifulSoup(html_text, 'html.parser')
+try:
+html_text = requests.get(url, timeout=200).text
+soup = BeautifulSoup(html_text, 'html.parser')
+except requests.exceptions.Timeout:
+print("Timeout")
+raise
 
 reports = soup.find("div", {"id": "reports"}).tbody
 ID, currentID = "", ""
@@ -102,8 +115,12 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 return count, ID, version, OS
 
 def parse_details_and_get_info(url, gitRepo):
-html_text = requests.get(url).text
-soup = BeautifulSoup(html_text, 'html.parser')
+try:
+html_text = requests.get(url, timeout=200).text
+soup = BeautifulSoup(html_text, 'html.parser')
+except requests.exceptions.Timeout:
+print("Timeout")
+raise
 
 details = soup.find("div", {"id": "details"}).tbody
 tr_list = details.find_all("tr")
@@ -177,16 +194,17 @@ if __name__ == '__main__':
 f.flush()
 
 for k, v in crashes.items():
-# ignore unresolved crash signatures
-if len(k) < 254 and k not in crashesInFile and '`' not in k and 
not k.lower().endswith('.dll') and \
-not k.lower().endswith('.so') and ".so." not in k.lower():
+if len(k) < 254 and k not in crashesInFile and '`' not in k:
 print("Parsing " + k)
-crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
-"https://crashreport.libreoffice.org/stats/signature/; 
+ k)
-crashReason, crashStack, codeLine = parse_details_and_get_info(
-
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, gitRepo)
-line = '\t'.join([k, str(crashCount), 
v[1].strftime('%y/%m/%d'), v[2].strftime('%y/%m/%d'),
-crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
-f.write(line)
-f.flush()
+try:
+crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
+
"https://crashreport.libreoffice.org/stats/signature/; + k)
+crashReason, crashStack, codeLine = 
parse_details_and_get_info(
+
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, gitRepo)
+line = '\t'.join([k, str(crashCount), 
v[1].strftime('%y/%m/%d'), v[2].strftime('%y/%m/%d'),
+crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
+f.write(line)
+f.flush()
+except requests.exceptions.Timeout:
+continue
 


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-05-31 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |   33 +++--
 1 file changed, 27 insertions(+), 6 deletions(-)

New commits:
commit bf6c74f1bb4ba67c16d442a9d8847118891ec89e
Author: Xisco Fauli 
AuthorDate: Mon May 30 20:56:41 2022 +0200
Commit: Xisco Fauli 
CommitDate: Tue May 31 11:10:00 2022 +0200

crashreportScraper: Add new column to show the stack code

Change-Id: Id6f9ed8540a8615a80de9cc561579ce069992e85
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/135142
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
index 780db5a9dc2c..aedb7a666c06 100755
--- a/bin/crashreportScraper.py
+++ b/bin/crashreportScraper.py
@@ -101,7 +101,7 @@ def 
parse_reports_and_get_most_recent_report_from_last_page(url):
 
 return count, ID, version, OS
 
-def parse_details_and_get_info(url):
+def parse_details_and_get_info(url, gitRepo):
 html_text = requests.get(url).text
 soup = BeautifulSoup(html_text, 'html.parser')
 
@@ -110,6 +110,8 @@ def parse_details_and_get_info(url):
 reason = tr_list[8].td.text.strip()
 
 stack = ""
+codeLine = ""
+
 count = 0
 frames = soup.find("div", {"id": "frames"}).tbody
 for tr in frames.find_all("tr"):
@@ -120,10 +122,27 @@ def parse_details_and_get_info(url):
 stack += source + "\n"
 count += 1
 
+codeFile = source.split(":")[0]
+codeNumber = source.split(":")[1]
+try:
+with open(os.path.join(gitRepo, codeFile)) as f:
+lines = f.readlines()
+for index, line in enumerate(lines):
+if index + 1 == int(codeNumber):
+codeLine += line.strip() + "\n"
+except FileNotFoundError:
+codeLine += "\n"
+continue
+
 if stack:
 #multiline
 stack = "\"" + stack + "\""
-return reason, stack
+
+if codeLine:
+#multiline
+codeLine = "\"" + codeLine + "\""
+
+return reason, stack, codeLine
 
 
 if __name__ == '__main__':
@@ -133,6 +152,8 @@ if __name__ == '__main__':
 crashes = parse_version_url(
 "https://crashreport.libreoffice.org/stats/version/; + version + 
"?limit=1000=30")
 
+gitRepo = os.path.dirname(os.path.realpath(__file__)) + "/../"
+
 print(str(len(crashes)) + " crash reports in version " + version)
 
 crashesInFile = []
@@ -151,7 +172,7 @@ if __name__ == '__main__':
 with open(fileName, "a") as f:
 if bInsertHeader:
 line = '\t'.join(["Name", "Count", "First report", "Last Report",
-"ID", "Version", "Reason", "OS", "Stack", '\n'])
+"ID", "Version", "Reason", "OS", "Stack", "Code Lines" '\n'])
 f.write(line)
 f.flush()
 
@@ -162,10 +183,10 @@ if __name__ == '__main__':
 print("Parsing " + k)
 crashCount, crashID, crashVersion, crashOS = 
parse_reports_and_get_most_recent_report_from_last_page(
 "https://crashreport.libreoffice.org/stats/signature/; 
+ k)
-crashReason, crashStack = parse_details_and_get_info(
-
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID)
+crashReason, crashStack, codeLine = parse_details_and_get_info(
+
"https://crashreport.libreoffice.org/stats/crash_details/; + crashID, gitRepo)
 line = '\t'.join([k, str(crashCount), 
v[1].strftime('%y/%m/%d'), v[2].strftime('%y/%m/%d'),
-crashID, crashVersion, crashReason, crashOS, 
crashStack, '\n'])
+crashID, crashVersion, crashReason, crashOS, 
crashStack, codeLine, '\n'])
 f.write(line)
 f.flush()
 


[Libreoffice-commits] core.git: bin/crashreportScraper.py

2022-05-26 Thread Xisco Fauli (via logerrit)
 bin/crashreportScraper.py |  171 ++
 1 file changed, 171 insertions(+)

New commits:
commit e09f49f944fa1d4163bfd52fd824f4216f93558f
Author: Xisco Fauli 
AuthorDate: Fri May 13 15:30:23 2022 +0200
Commit: Xisco Fauli 
CommitDate: Thu May 26 13:07:10 2022 +0200

bin: add script to retrieve info about crashreports...

... from https://crashreport.libreoffice.org
The script saves the data into a .csv file

Change-Id: I771d144402a3039851c99b025c52bd6e799f71ec
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/134283
Tested-by: Jenkins
Reviewed-by: Xisco Fauli 

diff --git a/bin/crashreportScraper.py b/bin/crashreportScraper.py
new file mode 100755
index ..780db5a9dc2c
--- /dev/null
+++ b/bin/crashreportScraper.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Use this script to retrieve information from 
https://crashreport.libreoffice.org
+# about a specific version of LibreOffice
+# Usage sample: ./crashreportScraper.py 7.2.0.4
+
+import requests
+from bs4 import BeautifulSoup
+import sys
+import os
+import math
+from datetime import datetime
+
+def convert_str_to_date(value):
+value = value.replace('.', '')
+value = value.replace('March', 'Mar')
+value = value.replace('April', 'Apr')
+value = value.replace('June', 'Jun')
+value = value.replace('July', 'Jul')
+value = value.replace('Sept', 'Sep')
+value = value.replace('noon', '12:00 pm')
+
+if ':' not in value:
+if 'am' in value:
+value = value.replace(' am', ':00 am')
+elif 'pm' in value:
+value = value.replace(' pm', ':00 pm')
+
+return datetime.strptime(value, '%b %d, %Y, %H:%M %p')
+
+def parse_version_url(url):
+crashReports = {}
+html_text = requests.get(url).text
+soup = BeautifulSoup(html_text, 'html.parser')
+
+table = soup.find("table", {"id": "data-table"}).tbody
+for tr in table.find_all("tr"):
+td_list = tr.find_all("td")
+crashName = td_list[0].a.text.strip()
+crashNumber = int(td_list[1].text.strip())
+firstCrashDate = convert_str_to_date(td_list[5].text.strip())
+lastCrashDate = convert_str_to_date(td_list[6].text.strip())
+crashReports[crashName] = [crashNumber, firstCrashDate, lastCrashDate]
+
+return crashReports
+
+def parse_reports_and_get_most_recent_report_from_last_page(url):
+html_text = requests.get(url).text
+soup = BeautifulSoup(html_text, 'html.parser')
+
+count = 0
+os_tab = soup.find("table", {"id": "os_tab"}).tbody
+tr_list = os_tab.find_all("tr")
+for tr in tr_list:
+td_list = tr.find_all("td")
+count += int(td_list[1].text.strip())
+
+# There are 50 reports on each page.
+# Go to the last page based on the total count to get a recent report
+last_page = math.ceil( count / 50 )
+
+if last_page > 1:
+url = url + "?page=" + str(last_page)
+html_text = requests.get(url).text
+soup = BeautifulSoup(html_text, 'html.parser')
+
+reports = soup.find("div", {"id": "reports"}).tbody
+ID, currentID = "", ""
+version, currentVersion = "", ""
+OS, currentOS = "", ""
+
+tr_list = reports.find_all("tr")
+for tr in tr_list:
+td_list = tr.find_all("td")
+
+currentID = td_list[0].a.text.strip()
+currentVersion = td_list[2].text.strip().split(': ')[1]
+currentOS = td_list[3].text.strip()
+
+# get most recent version
+# symbols on linux are not very informative generally
+if currentOS == "windows" and currentVersion > version:
+version = currentVersion
+ID = currentID
+OS = currentOS
+
+if not version:
+version = currentVersion
+
+if not ID:
+ID = currentID
+
+if not OS:
+OS = currentOS
+
+return count, ID, version, OS
+
+def parse_details_and_get_info(url):
+html_text = requests.get(url).text
+soup = BeautifulSoup(html_text, 'html.parser')
+
+details = soup.find("div", {"id": "details"}).tbody
+tr_list = details.find_all("tr")
+reason = tr_list[8].td.text.strip()
+
+stack = ""
+count = 0
+frames = soup.find("div", {"id": "frames"}).tbody
+for tr in frames.find_all("tr"):
+td_list = tr.find_all("td")
+source = td_list[3].text.strip()
+if source and count <= 10:
+source = source.replace("\\", 
"/").replace("C:/cygwin64/home/buildslave/source/libo-core/", "")
+stack += source + "\n"
+count += 1
+
+if stack:
+#multiline
+stack = "\"" + stack + "\""
+return reason, stack
+
+
+if __name__ ==