#!/usr/bin/python2 # -*- coding: utf-8 -*- """Scrape gold price data from Yahoo Finance. (I can’t find the links to get CSV data directly from Yahoo and thought it’d be fun.) $ ./yahoogold.py "Mar 01, 2021","1,732.80","1,757.40","1,718.30","1,720.70","1,720.70","214,677" "Feb 28, 2021",-,-,-,-,-,- "Feb 26, 2021","1,768.10","1,768.20","1,718.80","1,728.10","1,728.10","1,049" "Feb 25, 2021","1,801.50","1,802.00","1,766.70","1,774.40","1,774.40","1,049" ... "Oct 29, 2020","1,862.80","1,886.60","1,862.80","1,877.40","1,877.40",725 *Close price adjusted for splits. **Adjusted close price adjusted for both dividends and splits. To the extent possible under law, Kragen Javier Sitaker has waived all copyright and related or neighboring rights to this script, yahoogold.py. This work is published from Argentina. """ import csv, collections, errno, os, pickle, sys, time, urllib import pyquery default_url = 'https://finance.yahoo.com/quote/GC%3DF/history?p=GC%3DF' default_filename = os.path.join(os.environ['HOME'], '.cached.gc') day = 24*60*60 doc = collections.namedtuple('doc', ('contents', 'timestamp')) def fetch(url=default_url): return doc(contents=urllib.urlopen(url).read(), timestamp=time.time()) def store(data, filename=default_filename): tmpname = filename + '.tmp' with open(tmpname, 'wb') as fo: pickle.dump(data, fo) fo.flush() os.fsync(fo.fileno()) os.rename(tmpname, filename) def load(filename=default_filename): try: with open(filename, 'rb') as fo: return pickle.load(fo) except IOError as e: if e.errno == errno.ENOENT: return None raise def scrape(data): pq = pyquery.PyQuery for row in pq(data.contents)('tr.BdT'): yield [pq(td).text().strip() for td in pq(row)('td')] def run(): data = load() if data is None or time.time() - data.timestamp > 1*day: data = fetch() store(data) return scrape(data) if __name__ == '__main__': w = csv.writer(sys.stdout) for row in run(): w.writerow(row)