class OptionDataWebGleaner():
def __init__(self):
ticker = pd.read_csv('Yahoo_ticker_List.csv')['AUB.AX'].values
stock = raw_input('Please give the ticker of your selected option?\n')
if stock in ticker:
self.stock = stock
raise TypeError('Your option is not available here.')
date_norm = raw_input('Please give your maturity date in the format of mm/dd/yyyy\n')
maturity_date = datetime.strptime(date_norm, '%m/%d/%Y').date()
self.maturity_date = maturity_date
self.today = date.today()
dates = ['1481846400', '1484870400', '1487289600']
maturity_dates = [date(2016, 12, 16), date(2017, 1, 20), date(2017, 2, 17)]
date_dict = {}
for v in zip(dates, maturity_dates):
date_dict[v[1]] = v[0]
self.d = date_dict[self.maturity_date]
print('Your maturuity date is not available')
option = raw_input('Please give the type of your option, either call or put\n')
self.option_type = option + 's'
def crawl_data(self): # self #option_type: calls or puts. str
stock = self.stock
option_type = self.option_type
maturity_date = self.maturity_date
d = self.d
chromedriver = "/Users/Miya/Downloads/chromedriver.exe"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
today = self.today
## Get the url
url = 'http://finance.yahoo.com/quote/' + stock + '/options?date=' + d
## Crawl data
html_source = driver.page_source
## Beautifulsoup
soup = BeautifulSoup(html_source, 'html.parser')
if soup.find('table', option_type) is not None:
stock_price = [float(i.text) for i in soup.findAll('span', 'Fz(36px)')]
title = [i.text for i in soup.find('table', option_type).find_all('th')]
text = [i.text for i in soup.find('table', option_type).find_all('td')]
rows = [row for row in soup.find('table', option_type).find_all("tr")]
l_table = len(rows) - 1
## call/put data
dictionary = {}
dictionary['maturity_date'] = [maturity_date] * l_table
dictionary['date'] = [today] * l_table
dictionary['stock_price'] = stock_price * l_table
for j in range(10):
key = title[j]
dictionary[key] = []
for i in range(l_table):
dictionary[key].append(text[10 * i + j])
## write into dataframe
dataframe = pd.DataFrame(dictionary)
return dataframe
def clean_data(self):
dataframe = self.crawl_data()
print('Remove unexpected symbols...')
columns_to_set = ['Last Price', 'Open Interest', 'Strike', 'Volume', 'Implied Volatility']
for i in columns_to_set:
series = dataframe[i]
series_new = []
for j in series:
j = str(j)
j_new = ''.join(ch for ch in j if (ch != '%') and (ch != ','))
dataframe[i] = series_new
print('Change the data type...')
## change the dtype
columns_to_change = ['Last Price', 'Open Interest', 'Strike', 'Volume', 'stock_price', 'Implied Volatility']
for i in columns_to_change:
dataframe_cleaned[i] = dataframe[i].astype(float)
print("Remove missing values...")
dataframe_cleaned = dataframe_cleaned.dropna()
# print("Clean Outliers...")
# dataframe = dataframe.loc[dataframe['Implied Volatility'] <= 2]
return dataframe_cleaned
def save_file(self):
save_file = raw_input("Do you want to save the file into csv? Type Y for yes, N or no\n ")
d = self.d
stock = self.stock
df_option = self.clean_data()
if save_file == 'Y':
csv_name = stock + d + '.csv'
print("File Saved!")
def viz(self):
dataframe = self.clean_data()
stock = self.stock
time_to_maturity = []
dataframe = dataframe.sort_values(by='Strike')
## grab dataframe, then relevant data
for i, j in zip(dataframe.maturity_date, dataframe.date):
time_to_maturity.append((i - j).days/365)
strike_price = dataframe['Strike']
# generate pseudo-implied volatility by using strike price and time-to-maturity as parameters
implied_vol = dataframe['Implied Volatility'].values
strike_price, time_to_maturity = np.meshgrid(strike_price, time_to_maturity)
fig = plot.figure(figsize=(10, 5)) ## a plot object
ax = Axes3D(fig) # create a 3D object/handle
##plot surface: array row/column stride(step size:2)
##plot surface: array row/column stride(step size:2)
surf = ax.plot_surface(strike_price, time_to_maturity, implied_vol, rstride=2, cstride=2, cmap=cm.coolwarm,
linewidth=0.5, antialiased=False)
# set x,y,a labels
ax.set_xlabel('Strike Price')
ax.set_ylabel('time to maturity')
ax.set_zlabel('implied volatility%')
def summary(self):
dataframe = self.clean_data
這裏需要堆棧跟蹤。請包括它。 –
你'導入日期時間'或'從日期時間導入日期時間? –
另外,請參閱關於創建[mcve] –