Airbnb Pricing Optimization
Airbnb claims to be part of the "sharing economy" and disrupting the hotel industry. However, data shows that the majority of Airbnb listings in most cities are entire homes, many of which are rented all year round - disrupting housing and communities.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
plt.style.use('dark_background')
plt.style.use('seaborn')
1 Year properties listing from San Francisco on Airbnb
urls = [
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-06-08/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-05-06/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-04-07/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-03-13/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-02-12/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-01-04/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2020-01-02/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2019-12-04/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2019-11-01/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2019-10-14/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2019-09-12/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2019-08-06/data/listings.csv',
'http://data.insideairbnb.com/united-states/ca/san-francisco/2019-07-08/data/listings.csv'
]
dfs = pd.concat([pd.read_csv(x) for x in urls],ignore_index=True)
dfs['availability'] = dfs['availability_365'] / 365
# dfs['date'] = pd.to_datetime(dfs['last_scraped'])
# dfs['dayofweek'] = dfs['date'].dt.dayofweek
# dfs['quarter'] = dfs['date'].dt.quarter
# dfs['month'] = dfs['date'].dt.month
# dfs['year'] = dfs['date'].dt.year
# dfs['dayofyear'] = dfs['date'].dt.dayofyear
# dfs['dayofmonth'] = dfs['date'].dt.day
# dfs['weekofyear'] = dfs['date'].dt.weekofyear
# time_columns = ['dayofweek','quarter','month','year','dayofyear','dayofmonth','weekofyear']
# clean price
for x in ['price','weekly_price','monthly_price','security_deposit','cleaning_fee']:
dfs[x] = dfs[x].str.replace('$','').str.replace(',','').astype(float).fillna(0)
dfs[:1]
Latest Month
df = dfs[dfs.last_scraped == '2020-06-08'].copy()
# save a list of columns before filtering
all_columns = df.columns.tolist()
selected_columns = ['id','last_scraped','listing_url','host_id','property_type','zipcode','accommodates', 'bathrooms', 'bedrooms', 'beds', 'price', 'weekly_price','monthly_price', 'security_deposit', 'cleaning_fee','number_of_reviews','review_scores_rating','cancellation_policy','neighbourhood','availability_365','latitude','longitude']
# filter columns
df = df[selected_columns]
df[:1]
fig,ax = plt.subplots(1,2,figsize=(25,10))
sns.scatterplot(data=df,x='latitude',y='longitude',hue='property_type',palette=sns.color_palette("Paired", df.property_type.nunique()),ax=ax.ravel()[0])
sns.scatterplot(data=df,x='latitude',y='longitude',hue='neighbourhood',palette=sns.color_palette("Paired", df.neighbourhood.nunique()),ax=ax.ravel()[1])
ax.ravel()[0].legend(bbox_to_anchor=(1.0,1.0))
ax.ravel()[1].legend(bbox_to_anchor=(1.0,1.0))
plt.tight_layout()
Diversity and Dominance
def shannon(series):
"""
series: pd.Series
"""
N = series.sum()
s = np.array([ (n/N) * np.log(n/N) for n in series if n != 0])
s = s[~np.isnan(s)]
return np.abs(s.sum())
def simpson(series):
"""
series: pd.Series
"""
N = series.sum()
return np.array([ ( n * (n-1) ) / ( N * (N-1) ) for n in series if n != 0]).sum()
Diversity and Dominance of property types by neighbourhood
plt.figure(figsize=(25,10))
ngh = pd.crosstab(df.property_type,df.neighbourhood)
ngh_diversity = ngh.apply(shannon)
ngh_dominance = ngh.apply(simpson)
ngh_df = pd.DataFrame({
'diversity': ngh_diversity,
'dominance': ngh_dominance,
'total_number_of_properties': ngh.sum(),
'unique_property_types': df.groupby('neighbourhood').property_type.nunique()
}).reset_index()
plt.scatter(ngh_diversity,ngh_dominance,s=ngh.sum(),label='Property Type',cmap='Oranges')
sns.regplot(ngh_diversity,ngh_dominance,scatter=False)
for i,x in enumerate(ngh.columns):
plt.annotate(x,(ngh_diversity[i],ngh_dominance[i]),ha='center',va='center')
plt.xlabel('Diversity')
plt.ylabel('Dominance');
ngh_df[ngh_df.neighbourhood.isin(['Downtown','Mission District','Mission Bay'])]
Diversity and Dominance of property types by for each neighbourhood
- Downtown - the most diverse, balanced property distribution in terms of price
- Mission District - quite diverse, large number of properties
- Mission Bay - low number of properties, and focused on few types
Property Distributions
fig,ax = plt.subplots(1,2,figsize=(35,10))
property_distribution = df.pivot_table(index='neighbourhood',columns='property_type',values='id',aggfunc='count',fill_value=0).T
property_distribution = property_distribution.apply(lambda x: x / x.sum(),axis=1)
sns.heatmap(property_distribution,ax=ax.ravel()[0],cmap='Oranges')
property_price = df.pivot_table(index='neighbourhood',columns='property_type',values='price',aggfunc='mean',fill_value=0)
property_price = property_price.apply(lambda x: x / x.sum(),axis=1)
sns.heatmap(property_price.T,ax=ax.ravel()[1],cmap='Oranges')
ax.ravel()[0].set_xlabel('')
# ax.ravel()[0].set_xticklabels([''])
ax.ravel()[0].set_title('Property Distribution by neighbourhood')
ax.ravel()[1].set_title('Price Distribution by property_type and neighbourhood')
plt.tight_layout()
Outliers:
- Bayview:
- Castle
- Hut
- Fisherman's Wharf:
- Boat
- Nob Hill:
- In-law
- Outer Susnet
- Camper
Average Yearly Availability by Property Type
(df.groupby('property_type').availability_365.mean() / 365).plot.bar(rot=30,figsize=(30,7))
Average Yearly Availability by Neighbourhood
(df.groupby('neighbourhood').availability_365.mean() / 365).plot.bar(rot=45,figsize=(30,7))
Number of hosts by range of properties
hosts = df.groupby('host_id')['id'].nunique().to_frame().reset_index().rename(columns={'id':'number_of_properties'})
bins = [0,1,3,5,10,20,30,50,100,150,200,230]
f_bins = list()
for x in range(1,len(bins)-1):
if x == 1:
f_bins.append(str(bins[1]))
else:
f_bins.append(f'{bins[x-1]}-{bins[x]}')
if x == len(bins)-2:
f_bins.append(str(bins[-2]) + '+')
hosts['group'] = pd.cut(hosts.number_of_properties,bins=bins,labels=f_bins)
fig,ax =plt.subplots(1,2,figsize=(20,6))
hosts.group.value_counts()[:3].plot.bar(rot=0,ax=ax.ravel()[0])
hosts.group.value_counts()[3:].plot.bar(rot=0,ax=ax.ravel()[1])
ax.ravel()[0].set_title('Number of owners by properties cut groups')
plt.tight_layout()
Using the mean values filtered by basic property specs
filtering_columns = ['neighbourhood','property_type','bathrooms', 'bedrooms', 'beds']
mean_prices_group = df.groupby(filtering_columns)['price'].mean().reset_index()
Now let's pick a random property and see the differences
smpl = df.sample(1).copy()
print('Sample specs')
print(smpl[filtering_columns + ['price']].T)
print('\nMean Price by same property specs')
print(smpl[filtering_columns].merge(mean_prices_group,on=filtering_columns,how='left').T)
XGBoost
x_cols = ['property_type', 'accommodates', 'bathrooms', 'bedrooms',
'beds', 'weekly_price', 'monthly_price', 'security_deposit',
'cleaning_fee', 'number_of_reviews', 'review_scores_rating',
'cancellation_policy', 'neighbourhood', 'availability_365']
# copy the main dataframe
xdf = df[x_cols + ['price']].copy()
# fill missing values
xdf.loc[(xdf.neighbourhood.isnull()) & (xdf.property_type == 'Boat'),'neighbourhood'] = 'Here is a boat'
xdf = xdf.fillna(0)
#label encoding
label_encoding = dict()
to_label = ['property_type','cancellation_policy','neighbourhood']
for x in to_label:
l = xdf[x].unique()
label_encoding[x] = dict(zip( l, list(range(len(l))) ))
xdf[x] = xdf[x].map(label_encoding[x].get)
xdf[:3]
import xgboost as xgb
xgr = xgb.XGBRegressor(objective='reg:gamma').fit(xdf[x_cols],xdf['price'])
xgb.plot_importance(xgr)
smpl = xdf.sample(1).copy()
print('Sample specs')
print(smpl[x_cols + ['price']].T)
print(f'\nXGBoost Gamma Regressor Prediced Price {xgr.predict(smpl[x_cols])[0]}')
def predict_price(df,xgr):
c_df = df.copy()
for j in to_label:
c_df[j] = c_df[j].map(label_encoding[j].get)
return xgr.predict(c_df)
Estimate number of nights per year for each listing
Source: tule2236/Airbnb-Dynamic-Pricing-Optimization
As found in the Overview of the Airbnb Community in San Francisco published by Airbnb, the average length of stay per guest is 4.2 nights. We assumed each listing has 4.2 days as an average lengths of stay per booking. Since we were not able to find a clear number for the ratio of guests making a booking who leave a review for Airbnb, we assumed the review rate to be equal to 0.5, which will be used as a constant throughout the estimation. To prevent artificially high results, we also assumed the maximum occupancy rate cannot exceed 0.95, meaning even the busiest of listings will have several nights a month in which they go unrented. With these assumptions and constants, we generated the formulation of estimated occupancy rate shown below:
def estimate_nights_per_year(review_per_month,yearly_availability):
av_nights = 4.2
review_rate = 0.5
max_occupancy_rate = 0.95
bookings_per_month = review_per_month / review_rate
est_occupancy = min( (( bookings_per_month * av_nights ) / 30),max_occupancy_rate)
return est_occupancy * yearly_availability
df['estimated_nights_per_year'] = df.apply(lambda x : estimate_nights_per_year(x.number_of_reviews,x.availability_365),axis=1)
Average estimated nights per year by neighbourhood and property type
plt.figure(figsize=(40,15))
sns.heatmap(df.pivot_table(index='neighbourhood',columns='property_type',values='estimated_nights_per_year',aggfunc='mean',fill_value=0))
Since we don't have property prices, we assume that after 20 years of activity, the property paid the price for itself.
main_columns = ['neighbourhood','property_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds']
copy_df = df.copy()
# estimate number of nights per year
copy_df['estimated_nights_per_year'] = copy_df.apply(lambda x : estimate_nights_per_year(x.number_of_reviews,x.availability_365),axis=1)
# get optimized prices
copy_df['optimized_price'] = predict_price(copy_df[x_cols],xgr)
# groupb by main columns
mdf = copy_df.groupby(main_columns).agg({
'availability_365': 'mean',
'price': ['mean','std'],
'number_of_reviews':'mean',
'estimated_nights_per_year':'mean',
'optimized_price': ['mean','std']
}).reset_index().dropna()
# join multi-indexes together
mdf.columns = [ '_'.join(x) if x[1] != '' else x[0] for x in mdf.columns ]
# calculate return per year
mdf['estimated_return_per_year'] = mdf['price_mean'] * mdf['estimated_nights_per_year_mean']
# calculate optimized return per year
mdf['estimated_optimized_return_per_year'] = mdf['optimized_price_mean'] * mdf['estimated_nights_per_year_mean']
# generate property price
mdf['estimated_property_price'] = mdf['price_mean'] * (365 * 20)
# format property price
mdf['estimated_property_price_M'] = (mdf['estimated_property_price'] / 1e6).map(lambda x: f'$ {x:.4f}')
mdf[:3]
Portfolios
investment = 1e7
mc_portfolios = list()
pmax=100
for p in range(pmax):
print(f'{p}/{pmax}',end='\r')
#while we have money, pick up random properties for portfolio
local_df = mdf.copy()
current_money = investment
picked_properties = list()
stop_flag = False
while stop_flag != True:
# pick up random property
ch = local_df.sample(1).copy()
# if we have money to buy it and we haven't already bought it then let's do it
if ch.estimated_property_price.values[0] < current_money and ch.index.values[0] not in picked_properties:
# add property index to current portfolio list of properties
picked_properties.append(ch.index.values[0])
# pay the property price
current_money -= ch.estimated_property_price.values[0]
# slice the current dataframe to get just affordable properties
local_df = local_df[local_df.estimated_property_price < current_money]
# if we dont't have enough money to buy the event the cheapest property or we run out of properties then it's the time to stop
if current_money < local_df.estimated_property_price.min() or len(local_df) < 1:
stop_flag = True
tmp_portfolio = mdf[mdf.index.isin(picked_properties)].copy()
tmp_portfolio['mdf_id'] = picked_properties
tmp_portfolio['p'] = p
mc_portfolios.append(tmp_portfolio)
mc_portfolios = pd.concat(mc_portfolios).reset_index(drop=True)
mc_portfolios[:2]
portfolio_results = mc_portfolios.groupby('p').agg({
'estimated_return_per_year': 'sum',
'estimated_property_price': 'sum',
'estimated_optimized_return_per_year': 'sum',
'mdf_id': 'count',
}).reset_index()
portfolio_results['estimated_property_price_M'] = (portfolio_results['estimated_property_price'] / 1e6).map(lambda x: f'$ {x:.4f}')
portfolio_results['estimated_return_per_year_M'] = (portfolio_results['estimated_return_per_year'] / 1e6).map(lambda x: f'$ {x:.4f}')
portfolio_results['estimated_optimized_return_per_year_M'] = (portfolio_results['estimated_optimized_return_per_year'] / 1e6).map(lambda x: f'$ {x:.4f}')
portfolio_results['time_to_return'] = portfolio_results['estimated_property_price'] / portfolio_results['estimated_return_per_year']
portfolio_results['time_to_return_optimized'] = portfolio_results['estimated_property_price'] / portfolio_results['estimated_optimized_return_per_year']
portfolio_results['profit'] = (portfolio_results['time_to_return'] * portfolio_results['estimated_optimized_return_per_year']) - (portfolio_results['time_to_return'] * portfolio_results['estimated_return_per_year'])
portfolio_results['profit_of_investment'] = portfolio_results['profit'] / portfolio_results['estimated_property_price']
portfolio_results['profit_M'] = (portfolio_results['profit'] / 1e6).map(lambda x: f'$ {x:.4f}')
Portfolios with minimal return time
portfolio_results.sort_values(by='time_to_return_optimized',ascending=True)[:3]
Time is money
Profit vs Time to return
sns.regplot(portfolio_results['time_to_return'],portfolio_results['profit_of_investment'])
for x in portfolio_results.itertuples():
plt.annotate(x.p,(x.time_to_return, x.profit_of_investment),ha='center',va='center')
plt.xlabel('Time to return')
plt.ylabel('Profit from investment %')
plt.tight_layout()
Portfolios by time and profit
fig,ax = plt.subplots(1,2,figsize=(30,10))
sns.regplot(portfolio_results['time_to_return'],portfolio_results['estimated_property_price'],ax=ax[0])
sns.regplot(portfolio_results['time_to_return_optimized'],portfolio_results['estimated_property_price'],ax=ax[1])
for x in portfolio_results.itertuples():
ax[0].annotate(x.p,(x.time_to_return, x.estimated_property_price),ha='center',va='center')
ax[1].annotate(x.p,(x.time_to_return_optimized, x.estimated_property_price),ha='center',va='center')
ax[0].set_xlabel('Time to return')
ax[0].set_ylabel('Total Investment')
ax[0].set_title('Original Prices')
ax[1].set_xlabel('Time to return')
ax[1].set_ylabel('Total Investment')
ax[1].set_title('Optimized Prices')
plt.tight_layout()
Portfolios by yearly return and total investment
fig,ax = plt.subplots(1,2,figsize=(30,10))
sns.regplot(portfolio_results['estimated_return_per_year'],portfolio_results['estimated_property_price'],ax=ax[0])
sns.regplot(portfolio_results['estimated_optimized_return_per_year'],portfolio_results['estimated_property_price'],ax=ax[1])
for x in portfolio_results.itertuples():
ax[0].annotate(x.p,(x.estimated_return_per_year, x.estimated_property_price),ha='center',va='center')
ax[1].annotate(x.p,(x.estimated_optimized_return_per_year, x.estimated_property_price),ha='center',va='center')
ax[0].set_xlabel('Return per Year')
ax[0].set_ylabel('Total Investment')
ax[0].set_title('Original Prices')
ax[1].set_xlabel('Return per Year')
ax[1].set_ylabel('Total Investment')
ax[1].set_title('Optimized Prices')
plt.tight_layout()
grp = dfs.groupby('id').agg({
'host_id':'first',
'neighbourhood':'first',
'property_type':'first',
'accommodates': 'mean',
'bathrooms':'mean',
'bedrooms': 'mean',
'beds':'mean',
'price':'mean',
'number_of_reviews':'mean',
'availability_365':'mean',
'review_scores_rating': 'mean',
'monthly_price': 'mean',
'security_deposit': 'mean',
'cleaning_fee': 'mean',
'cancellation_policy': 'first',
'weekly_price': 'mean'
}).reset_index()
grp[:3]
location group
main_columns = ['neighbourhood','property_type', 'accommodates', 'bathrooms', 'bedrooms', 'beds']
copy_df = grp.copy()
copy_df['estimated_nights_per_year'] = copy_df.apply(lambda x : estimate_nights_per_year(x.number_of_reviews,x.availability_365),axis=1)
copy_df['optimized_price'] = predict_price(copy_df[x_cols],xgr)
mdf = copy_df.groupby(main_columns).agg({
'availability_365': 'mean',
'price': ['mean','std'],
'number_of_reviews':'mean',
'estimated_nights_per_year':['mean','std'],
'optimized_price': ['mean','std']
}).reset_index().dropna()
mdf.columns = [ '_'.join(x) if x[1] != '' else x[0] for x in mdf.columns ]
returns = list()
optimized_returns = list()
for x in mdf.itertuples():
# generate random number of nights using the mean and std of estimated nights per year
random_nights = np.abs(np.random.normal(loc=x.estimated_nights_per_year_mean,scale=x.estimated_nights_per_year_std))
# generate random prices with the size of the random nights
random_prices = np.random.normal(loc=x.price_mean,scale=x.price_std,size=int(random_nights))
# add the yearly return to our list
returns.append(random_prices.sum())
# for the same number of random nights, calculate the optimized yearly return
random_optim_prices = np.random.normal(loc=x.optimized_price_mean,scale=x.optimized_price_std,size=int(random_nights))
optimized_returns.append(random_optim_prices.sum())
mdf['estimated_return_per_year'] = returns
mdf['estimated_optimized_return_per_year'] = optimized_returns
mdf['estimated_property_price'] = mdf['price_mean'] * (365 * 20)
mdf['estimated_property_price_M'] = (mdf['estimated_property_price'] / 1e6).map(lambda x: f'$ {x:.4f}')
mdf[:3]
investment = 1e7
mc_portfolios = list()
pmax=100
for p in range(pmax):
print(f'{p}/{pmax}',end='\r')
#while we have money, pick up random properties for portfolio
local_df = mdf.copy()
current_money = investment
picked_properties = list()
stop_flag = False
while stop_flag != True:
# pick up random property
ch = local_df.sample(1).copy()
# if we have money to buy it and we haven't already bought it then let's do it
if ch.estimated_property_price.values[0] < current_money and ch.index.values[0] not in picked_properties:
# add property index to current portfolio list of properties
picked_properties.append(ch.index.values[0])
# pay the property price
current_money -= ch.estimated_property_price.values[0]
# slice the current dataframe to get just affordable properties
local_df = local_df[local_df.estimated_property_price < current_money]
# if we dont't have enough money to buy even the cheapest property or we run out of properties then it's the time to stop
if current_money < local_df.estimated_property_price.min() or len(local_df) < 1:
stop_flag = True
tmp_portfolio = mdf[mdf.index.isin(picked_properties)].copy()
tmp_portfolio['mdf_id'] = picked_properties
tmp_portfolio['p'] = p
mc_portfolios.append(tmp_portfolio)
mc_portfolios = pd.concat(mc_portfolios).reset_index(drop=True)
mc_portfolios[:2]
portfolio_results = mc_portfolios.groupby('p').agg({
'estimated_return_per_year': 'sum',
'estimated_property_price': 'sum',
'estimated_optimized_return_per_year': 'sum',
'mdf_id': 'count',
}).reset_index()
portfolio_results['estimated_property_price_M'] = (portfolio_results['estimated_property_price'] / 1e6).map(lambda x: f'$ {x:.4f}')
portfolio_results['estimated_return_per_year_M'] = (portfolio_results['estimated_return_per_year'] / 1e6).map(lambda x: f'$ {x:.4f}')
portfolio_results['estimated_optimized_return_per_year_M'] = (portfolio_results['estimated_optimized_return_per_year'] / 1e6).map(lambda x: f'$ {x:.4f}')
portfolio_results['time_to_return'] = portfolio_results['estimated_property_price'] / portfolio_results['estimated_return_per_year']
portfolio_results['time_to_return_optimized'] = portfolio_results['estimated_property_price'] / portfolio_results['estimated_optimized_return_per_year']
portfolio_results['profit'] = (portfolio_results['time_to_return'] * portfolio_results['estimated_optimized_return_per_year']) - (portfolio_results['time_to_return'] * portfolio_results['estimated_return_per_year'])
portfolio_results['profit_of_investment'] = portfolio_results['profit'] / portfolio_results['estimated_property_price']
portfolio_results['profit_M'] = (portfolio_results['profit'] / 1e6).map(lambda x: f'$ {x:.4f}')
Portfolios with minimal return time
portfolio_results.sort_values(by='time_to_return_optimized',ascending=True)[:3]
sns.regplot(portfolio_results['time_to_return'],portfolio_results['profit_of_investment'])
for x in portfolio_results.itertuples():
plt.annotate(x.p,(x.time_to_return, x.profit_of_investment),ha='center',va='center')
plt.xlabel('Time to return')
plt.ylabel('Profit from investment %')
plt.tight_layout()
Portfolios by time to return and total investment
fig,ax = plt.subplots(1,2,figsize=(30,10))
sns.regplot(portfolio_results['time_to_return'],portfolio_results['estimated_property_price'],ax=ax[0])
sns.regplot(portfolio_results['time_to_return_optimized'],portfolio_results['estimated_property_price'],ax=ax[1])
for x in portfolio_results.itertuples():
ax[0].annotate(x.p,(x.time_to_return, x.estimated_property_price),ha='center',va='center')
ax[1].annotate(x.p,(x.time_to_return_optimized, x.estimated_property_price),ha='center',va='center')
ax[0].set_xlabel('Time to return')
ax[0].set_ylabel('Total Investment')
ax[0].set_title('Original Prices')
ax[1].set_xlabel('Time to return')
ax[1].set_ylabel('Total Investment')
ax[1].set_title('Optimized Prices')
plt.tight_layout()
Huge return time
Most probaby because there are some properties with just few days per year availability
portfolio_results.sort_values(by='time_to_return_optimized',ascending=False)[:3]
Portfolios by yearly return and total investment
fig,ax = plt.subplots(1,2,figsize=(30,10))
sns.regplot(portfolio_results['estimated_return_per_year'],portfolio_results['estimated_property_price'],ax=ax[0])
sns.regplot(portfolio_results['estimated_optimized_return_per_year'],portfolio_results['estimated_property_price'],ax=ax[1])
for x in portfolio_results.itertuples():
ax[0].annotate(x.p,(x.estimated_return_per_year, x.estimated_property_price),ha='center',va='center')
ax[1].annotate(x.p,(x.estimated_optimized_return_per_year, x.estimated_property_price),ha='center',va='center')
ax[0].set_xlabel('Return per Year')
ax[0].set_ylabel('Total Investment')
ax[0].set_title('Original Prices')
ax[1].set_xlabel('Return per Year')
ax[1].set_ylabel('Total Investment')
ax[1].set_title('Optimized Prices')
plt.tight_layout()
Next Iterations:
- Find External Data for properties prices
- Create a class for simulation
- maybe more models
- Find a way to messure the predicted prices, if possible
- Maybe try binomial distribution to estimate number of nights per listing
- Fill the missing gaps:
- more features for pricing model
- more ways to estimate the number of nights per year
- add risk rates