# Takes a list of horse names and searches Bing for link to sportinglife profile page
#
# Returns list of links in text file
from py_bing_search import PyBingWebSearch
def search_horse(search, h):
bing_web = PyBingWebSearch('WeS54BO7F9y4OMgqFLIjKCeeONFCsMvK98AjQKtYLOE', search)
link = bing_web.search(limit=10, format='json')
h.write((link[0].url)) if len(link) > 1 else h.write('null')
h.write('\n')
def main():
h = open("horse-links.txt", "w")
with open("..\Text Files\horsename.txt", "r") as f:
for line in f:
search = '
www.sportinglife.com/racing/profiles/ {0}'.format(line)
search_horse(search, h)
f.close()
h.close()
main()
# Takes list of links to horse profile pages on sportinglife.com
#
# Scrapes winning information and returns in text file
import requests
from bs4 import BeautifulSoup
def horseScrape(soup, f):
if "fltr-item fltr-win" in str(soup):
name = soup.find('div', {'class': "content-header"}).find("h2").get_text().strip()
f.write("-Horse-\n")
for tag in soup.find_all("tr", "fltr-win"):
f.write(name + "\n")
f.write(tag.find_all("td")[3].get_text().strip() + "\n")
f.write(tag.find_all("td")[4].get_text().strip() + "\n")
race = str(tag.find_next('a'))
if "Handicap" in race:
f.write("HCAP\n")
else:
f.write('NON-HCAP\n')
def main():
h = open('..\Text Files\horse-links.txt', 'r')
f = open('data.txt', 'w')
for line in h.readlines():
add = line.strip('\n')
url = requests.get(add, headers={'User-Agent': 'Mozilla/5.0'})
soup = BeautifulSoup(url.content, "html.parser")
horseScrape(soup, f)
f.close()
h.close()
main()
# Scrapes information for todays winners
import requests
from bs4 import BeautifulSoup
def main():
f = open("info.txt", "a")
date = "03 Jul 2016"
# id of first race
raceId = 653063
# range 0, number of races on card
for x in range(0, 7):
url = "
http://www.racingpost.com/horses/result_home.sd?race_id=%s" % str(raceId)
r = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
soup = BeautifulSoup(r.content, "html.parser")
course = (soup.find('div', {'class': "leftColBig"}).find_next("h1")
.get_text(",").encode("utf-8").strip(" Result,%s" % date))
race = (str(soup.find('h3', {'class': "clearfix"})
.get_text().encode("utf-8")))
rclass = (str(soup.find('h3', {'class': "clearfix"}).find_next("li")
.get_text(",").encode("utf-8").strip()))
name = (soup.find('td', {'class': "nowrap"}).find_next("b")
.get_text().encode("utf-8"))
rating = (soup.find('td', {'class': "lightGray"})
.get_text().encode("utf-8"))
f.write(str(name) + ",")
f.write(str(rating) + ",")
f.write(str(course) + ",")
f.write(str(rclass) + ',')
if "Handicap" in race:
f.write('HCAP\n')
else:
f.write('NON-HCAP\n')
raceId += 1
f.close()
main()