Skip to content

Google domain changes published but not merged to master #37

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -2211,6 +2211,13 @@ print(serps)
</tbody>
</table>

To set the domain and host language you can use these parameters. This will search for "bmw" on the German Google domain and return the results in German.

```python
df = seo.get_serps("bmw", pages=1, domain="google.de", host_language="de")
```



#### Create an ABCD classification of Google Search Console data
The `classify_pages()` function returns an ABCD classification of Google Search Console data. This calculates the cumulative sum of clicks and then categorises pages using the ABC algorithm (the first 80% are classed A, the next 10% are classed B, and the final 10% are classed C, with the zero click pages classed D).
45 changes: 35 additions & 10 deletions ecommercetools/seo/google_search.py
Original file line number Diff line number Diff line change
@@ -36,19 +36,21 @@ def _get_source(url: str):
print(e)


def _get_site_results(url: str):
def _get_site_results(url: str, domain="google.co.uk", hl="en"):
"""Return the source of a site:url search.
Args:
url: URL of page to append to site: query
domain: Google domain to use (default is google.co.uk)
hl: Language to use (default is English)
Returns:
response (str): HTML of page.
"""

try:
query = urllib.parse.quote_plus(url)
response = _get_source("https://www.google.co.uk/search?q=site%3A" + query + "&num=100")
response = _get_source("https://www."+ domain + "/search?q=site%3A" + query + "&num=100" + "&hl=" + hl)

return response
except requests.exceptions.RequestException as e:
@@ -116,28 +118,41 @@ def get_indexed_pages(urls: list):
return df


def _get_results(query: str):
def _get_results(query: str, domain="google.co.uk", hl="en"):
"""Return the source of a search.
Args:
query: Search query term.
domain: Google domain to use (default is google.co.uk)
hl: Language to use (default is English)
Returns:
response (str): HTML of page.
"""

query = urllib.parse.quote_plus(query)
response = _get_source("https://www.google.co.uk/search?q=" + query + "&num=100")
url = "https://www." + domain + "/search?q=" + query + "&num=100" + "&hl=" + hl
response = _get_source(url)

return response


def _get_next_page(response, domain="google.co.uk"):
"""Get the URL for the next page of results."""
def _get_next_page(response, domain="google.co.uk", hl="en"):
"""Get the URL for the next page of results.
Args:
response: HTML of page.
domain: Google domain to use (default is google.co.uk)
hl: Language to use (default is English)
Returns:
url (str): URL of next page of results.
"""

css_identifier_next = "#pnnext"
next_page_url = response.html.find(css_identifier_next, first=True).attrs['href']
next_page = "https://www." + domain + next_page_url
next_page = "https://www." + domain + next_page_url + "&hl=" + hl

return next_page

@@ -209,22 +224,32 @@ def _parse_search_results(response):
def get_serps(query: str,
output="dataframe",
pages=1,
domain="google.co.uk"):
domain="google.co.uk",
host_language="en"):
"""Return the Google search results for a given query.
Args:
query (string): Query term to search Google for.
output (string, optional): Optional output format (dataframe or dictionary).
pages (int, optional): Optional number of pages to return.
domain (string, optional): Optional Google domain (default is google.co.uk).
host_language (string, optional): Optional host_language (default is en).
Returns:
results (dict): Results of query.
"""

response = _get_results(query)
if domain not in ["google.co.uk", "google.com", "google.co.in", "google.com.au", "google.com.br", "google.ca",
"google.com.mx", "google.co.nz", "google.com.ph", "google.pl", "google.com.sa", "google.com.sg",
"google.co.za", "google.es", "google.fr", "google.de", "google.it", "google.co.jp", "google.com.tw",
"google.com.tr", "google.com.vn"]:
raise ValueError("Domain must be a valid Google domain in the format of google.co.uk or google.com")
else:
pass

response = _get_results(query, domain=domain, hl=host_language)
results = _parse_search_results(response)
next_page = _get_next_page(response)
next_page = _get_next_page(response, domain=domain, hl=host_language)

page = 1
while page <= pages:
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@
setup(
name='ecommercetools',
packages=find_namespace_packages(include=['ecommercetools.*']),
version='0.42.8',
version='0.42.9',
license='MIT',
description='EcommerceTools is a data science toolkit for ecommerce, marketing science, and Python SEO.',
long_description=long_description,