commit: c70874428ee02acb4201b0ddd17178e497bb04a8 Author: Alessandro Barbieri <lssndrbarbieri <AT> gmail <DOT> com> AuthorDate: Thu Apr 9 02:23:53 2020 +0000 Commit: Andrew Ammerlaan <andrewammerlaan <AT> riseup <DOT> net> CommitDate: Thu Apr 9 02:24:41 2020 +0000 URL: https://gitweb.gentoo.org/repo/proj/guru.git/commit/?id=c7087442
www-apps/twint: new package Signed-off-by: Alessandro Barbieri <lssndrbarbieri <AT> gmail.com> www-apps/twint/Manifest | 1 + www-apps/twint/metadata.xml | 34 +++++++++++++++++++++++++++ www-apps/twint/twint-2.1.18.ebuild | 47 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/www-apps/twint/Manifest b/www-apps/twint/Manifest new file mode 100644 index 0000000..67f852a --- /dev/null +++ b/www-apps/twint/Manifest @@ -0,0 +1 @@ +DIST twint-2.1.18.tar.gz 33213 BLAKE2B 74d8634ad178b40af726347ee144766bc5ae2480842ad617739a12ed469bf7692247bdc4dc5d979a081b88a223ce2649924cf89a6c0ff77acd10142f2290865c SHA512 0060d5f496bd19e74e72243ce83202204eacd28b57db7440ae83d112c96ea962512adf12e590e0843cf5231b1922cfcc943725aaeb2ab31127522be26e7fc9a7 diff --git a/www-apps/twint/metadata.xml b/www-apps/twint/metadata.xml new file mode 100644 index 0000000..c62db26 --- /dev/null +++ b/www-apps/twint/metadata.xml @@ -0,0 +1,34 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE pkgmetadata SYSTEM "http://www.gentoo.org/dtd/metadata.dtd"> + +<pkgmetadata> + <maintainer type="person"> + <email>[email protected]</email> + <name>Alessandro Barbieri</name> + </maintainer> + <longdescription> + No authentication. No API. No limits. + +Twint is an advanced Twitter scraping tool written in Python that allows for scraping Tweets from Twitter profiles without using Twitter's API. + +Twint utilizes Twitter's search operators to let you scrape Tweets from specific users, scrape Tweets relating to certain topics, hashtags & trends, or sort out sensitive information from Tweets like e-mail and phone numbers. I find this very useful, and you can get really creative with it too. + +Twint also makes special queries to Twitter allowing you to also scrape a Twitter user's followers, Tweets a user has liked, and who they follow without any authentication, API, Selenium, or browser emulation. +tl;dr Benefits + +Some of the benefits of using Twint vs Twitter API: + + Can fetch almost all Tweets (Twitter API limits to last 3200 Tweets only); + Fast initial setup; + Can be used anonymously and without Twitter sign up; + No rate limitations. + +Limits imposed by Twitter + +Twitter limits scrolls while browsing the user timeline. This means that with .Profile or with .Favorites you will be able to get ~3200 tweets. + </longdescription> + <upstream> + <remote-id type="github">twintproject/twint</remote-id> + <remote-id type="pypi">twint</remote-id> + </upstream> +</pkgmetadata> diff --git a/www-apps/twint/twint-2.1.18.ebuild b/www-apps/twint/twint-2.1.18.ebuild new file mode 100644 index 0000000..47aa50d --- /dev/null +++ b/www-apps/twint/twint-2.1.18.ebuild @@ -0,0 +1,47 @@ +# Copyright 1999-2020 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI="7" + +PYTHON_COMPAT=( python3_6 ) +DISTUTILS_USE_SETUPTOOLS=rdepend + +inherit distutils-r1 + +DESCRIPTION="Advanced Twitter scraping & OSINT tool written in Python that doesn't use Twitter's API" +HOMEPAGE=" + https://github.com/twintproject/twint + https://pypi.org/project/twint +" +SRC_URI="https://github.com/twintproject/${PN}/archive/v${PV}.tar.gz -> ${P}.tar.gz" + +LICENSE="MIT" +SLOT="0" +KEYWORDS="~amd64" +IUSE="test" +#tests require network +RESTRICT="test" + +RDEPEND=" + dev-python/aiohttp[${PYTHON_USEDEP}] + dev-python/aiohttp-socks[${PYTHON_USEDEP}] + dev-python/aiodns[${PYTHON_USEDEP}] + dev-python/beautifulsoup[${PYTHON_USEDEP}] + dev-python/cchardet[${PYTHON_USEDEP}] + dev-python/elasticsearch-py[${PYTHON_USEDEP}] + dev-python/fake-useragent[${PYTHON_USEDEP}] + dev-python/geopy[${PYTHON_USEDEP}] + dev-python/googletransx[${PYTHON_USEDEP}] + >=dev-python/pandas-0.23.0[${PYTHON_USEDEP}] + dev-python/PySocks[${PYTHON_USEDEP}] + dev-python/schedule[${PYTHON_USEDEP}] +" +DEPEND="${RDEPEND}" + +python_test() { + "${EPYTHON}" test.py -v || die +} + +src_test() { + python_foreach_impl python_test +}
