import numpy as np
import pandas as pd
import dask.dataframe as dd
import re,string
#import stats
import os.path
pd.options.display.float_format = '{:.3f}'.format
pd.set_option('display.max_colwidth', -1)
#from SCEconomy import *
import seaborn as sns
sns.set_style('whitegrid')
pd.set_option('display.width', 1000)
import swifter
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import matplotlib as mpl
mpl.rc("savefig",dpi=200)
from matplotlib import pyplot as plt
# plt.rcParams["figure.figsize"] = (10.,10.)
import statsmodels.api as sm
from IPython.display import HTML
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
df_baseline=[]
df_target=[]
def gini(series, weights):
if series.size!=weights.size:
print("Weights are not the same size as the series")
return
sorted_series = series.sort_values()
height, area = 0, 0
for i in range(series.size):
value = series[sorted_series.index[i]]
wgt = weights[sorted_series.index[i]]
height += wgt*value
area += wgt*(height - value / 2)
fair_area = height * weights.sum() / 2.
return (fair_area - area) / fair_area
def gini_alt(series):
csum= 0.0
N=len(series)
for i in range(N):
for j in range(N):
value_i = series.iloc[i]
value_j = series.iloc[j]
csum+=np.abs(value_i-value_j)
gini_val=0.5*(csum/(N)**2)/series.mean()
return gini_val
def Lorenz(series,quantiles):
N=len(quantiles)
Lz=np.zeros(N)
for n in range(N):
selection = series<series.quantile(quantiles[n])
Lz[n]=series[selection].sum()/series.sum()
return Lz
def uploadData(case,w,rs,rc,p,delta):
data=pd.DataFrame()
is_iz=pd.read_csv(case+'/is_to_iz.csv',delim_whitespace=True,header=None)
zgrid=pd.read_csv(case+'/zgrid.csv',delim_whitespace=True,header=None)
is_to_ieps=pd.read_csv(case+'/is_to_ieps.csv', delim_whitespace=True,header=None)
epsgrid=pd.read_csv(case+'/epsgrid.csv', delim_whitespace=True,header=None)
T=100-1
data['s_ind_']=pd.read_csv(case+'/data_i_s.csv', delim_whitespace=True,header=None).iloc[:,T-1]
data['s_ind']=pd.read_csv(case+'/data_i_s.csv', delim_whitespace=True,header=None, usecols=[T])
data['eps']=epsgrid.values.flatten()[is_to_ieps.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]]
data['eps_']=epsgrid.values.flatten()[is_to_ieps.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]]
data['iz']=is_iz.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]
data['iz_']=is_iz.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]
data['ieps']=is_to_ieps.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]
data['ieps_']=is_to_ieps.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]
data['z']=zgrid.values.flatten()[is_iz.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]]
data['z_']=zgrid.values.flatten()[is_iz.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]]
data['ys']=pd.read_csv(case+'/data_ys.csv', delim_whitespace=True,header=None, usecols=[T])
data['ys_']=pd.read_csv(case+'/data_ys.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['ks']=pd.read_csv(case+'/data_ks.csv', delim_whitespace=True,header=None, usecols=[T])
data['ns']=pd.read_csv(case+'/data_ns.csv', delim_whitespace=True,header=None, usecols=[T])
data['x']=pd.read_csv(case+'/data_x.csv', delim_whitespace=True,header=None, usecols=[T])
data['n']=pd.read_csv(case+'/data_n.csv', delim_whitespace=True,header=None, usecols=[T])
data['ks_']=pd.read_csv(case+'/data_ks.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['ns_']=pd.read_csv(case+'/data_ns.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['x_']=pd.read_csv(case+'/data_x.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['n_']=pd.read_csv(case+'/data_n.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['flagC']=pd.read_csv(case+'/data_is_c.csv', delim_whitespace=True,header=None, usecols=[T])
data['flagC_']=pd.read_csv(case+'/data_is_c.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['flagO']=pd.read_csv(case+'/data_is_o.csv',delim_whitespace=True,header=None, usecols=[T])
data['kappa']=pd.read_csv(case+'/data_kap.csv', delim_whitespace=True,header=None, usecols=[T])
data['hkap']=pd.read_csv(case+'/data_hkap.csv', delim_whitespace=True,header=None, usecols=[T])
data['hy']=pd.read_csv(case+'/data_hy.csv', delim_whitespace=True,header=None, usecols=[T])
data['s_age']=pd.read_csv(case+'/s_age.csv', delim_whitespace=True,header=None)
data['sind_age']=pd.read_csv(case+'/sind_age.csv', delim_whitespace=True,header=None)
data['ind_age']=pd.read_csv(case+'/ind_age.csv', delim_whitespace=True,header=None)
data['c_age']=pd.read_csv(case+'/c_age.csv', delim_whitespace=True,header=None)
data['div_']=pd.read_csv(case+'/data_div_sweat.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['div']= pd.read_csv(case+'/data_div_sweat.csv', delim_whitespace=True,header=None, usecols=[T])
data['val_sdf_']=pd.read_csv(case+'/data_val_sweat_dyna_sdf.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['val_mf_']=pd.read_csv(case+'/data_val_sweat_dyna_fix.csv', delim_whitespace=True,header=None, usecols=[T-1])
data['val_sdf']=pd.read_csv(case+'/data_val_sweat_dyna_sdf.csv', delim_whitespace=True,header=None, usecols=[T])
data['val_mf']=pd.read_csv(case+'/data_val_sweat_dyna_fix.csv', delim_whitespace=True,header=None, usecols=[T])
#data['val_sdf_post_tax']=pd.read_csv(case+'/data_val_sweat_dyna_sdf_quasi_taxed.csv', delim_whitespace=True,header=None, usecols=[T])
#data['val_mf_post_tax']=pd.read_csv(case+'/data_val_sweat_dyna_fix_quasi_taxed.csv', delim_whitespace=True,header=None, usecols=[T])
data['a']= pd.read_csv(case+'/data_a.csv', delim_whitespace=True,header=None, usecols=[T])
data['flagC']=data['flagC'].astype('bool')
data['flagC_']=data['flagC_'].astype('bool')
data['flagO']=data['flagO'].astype('bool')
data['expenses']=(rs+delta)*data['ks']+w*data['ns']+data['x']
data['expenses_']=(rs+delta)*data['ks_']+w*data['ns_']+data['x_']
data['wns']=data['ns']*w
data['pys']=p*data['ys']
data['bus_inc']=p*data['ys']-data['expenses']
data['bus_inc_']=p*data['ys_']-data['expenses_']
data['lab_inc']=data.eps*w*data.n
data['lab_inc_']=data.eps_*w*data.n_
data['n_e']=data['hkap']+data['hy']
data['ne']=data['n_e']
data['kappa_adj']=(data.kappa**0.15)*data.z
data['ret_sdf']=(data['val_sdf']+data['div_'])/data['val_sdf_']+0.02
data['ret_mf']=(data['val_mf']+data['div_'])/data['val_mf_']+0.02
data['div_yld_sdf']=(data['div'])/data['val_sdf']
data['div_yld_mf']=(data['div'])/data['val_mf']
#data['intan_int_sdf']=data['val_sdf'] /(data['val_sdf']+data['ks'])
#data['intan_int_mf']=data['val_mf'] /(data['val_mf']+data['ks'])
data['profit_rate']=100*data['bus_inc']/data['ks']
data['measured_markups']=100*(data['bus_inc'])/(data['x']+(rs+delta)*data['ks']+w*data['ns'])
data['measured_markups_alt']=100*(data['bus_inc']+data['x'])/((rs+delta)*data['ks']+w*data['ns'])
data['x_pys']=data['x']/(p*data['ys'])
data['cogs']=((rs+delta)*data['ks']+w*data['ns']+data['x'])
data['wealth_sdf']=data['a']+data['val_sdf']
data['wealth_mf']=data['a']+data['val_mf']
data['ztilde'] = np.log(data['z'])
data['zkapphi']=data['z']*data['kappa']**(0.15)
data['ztilde_meas'] = np.log(data['ys']/(data['ks']**(0.333)*data['ns']**(0.666)))
data['inc']=0.0
data['inc'][data['flagC']==True]=data[data['flagC']==True]['lab_inc']
data['inc'][data['flagC']==False]=data[data['flagC']==False]['bus_inc']
data['tot_inc']=data['inc']+rc*data['a']
if os.path.exists(case+'/data_R.csv'):
data['R']= pd.read_csv(case+'/data_R.csv', delim_whitespace=True,header=None, usecols=[T])
data['intan_int']=data['R']/(data['R']+data['ks'])
data['wealth_R']=0.0
data['wealth_R'][data['flagC']==True]=data[data['flagC']==True]['a']
data['wealth_R'][data['flagC']==False]=data[data['flagC']==False]['a']+data[data['flagC']==False]['R']
data['wealth_income_ratio']=data['wealth_R']/data['tot_inc']
data['wgt']=1.0
return data
def get_wealth_income_stats(case,w,rs,rc,p,delta):
data=pd.read_csv(path_baseline[n]+'/data_R.csv', delim_whitespace=True,header=None, usecols=[T])
data.columns=['R']
data['flagC']=dd.read_csv(path_baseline[n]+'/data_is_c.csv', delim_whitespace=True,header=None, usecols=[T]).compute().values
data['flagC']=data['flagC'].astype('bool')
data['ys']=pd.read_csv(case+'/data_ys.csv', delim_whitespace=True,header=None, usecols=[T])
data['ks']=pd.read_csv(case+'/data_ks.csv', delim_whitespace=True,header=None, usecols=[T])
data['ns']=pd.read_csv(case+'/data_ns.csv', delim_whitespace=True,header=None, usecols=[T])
data['x']=pd.read_csv(case+'/data_x.csv', delim_whitespace=True,header=None, usecols=[T])
data['sind_age']=pd.read_csv(case+'/sind_age.csv', delim_whitespace=True,header=None)
data['expenses']=(rs+delta)*data['ks']+w*data['ns']+data['x']
data['wns']=data['ns']*w
data['pys']=p*data['ys']
data['bus_inc']=p*data['ys']-data['expenses']
data['wgt']=1.0
return data
def get_prices(path_baseline,momfilename):
with open (path_baseline+ momfilename, 'rt') as myfile: # Open file lorem.txt for reading text
for myline in myfile: # For each line, read it to a string
#print("gdp line")
#print(myline.find("GDP("))
if myline.find("w")==0:
#print(myline)
w=float(myline[ myline.find("=")+1:])
if myline.find("S-good price")==0:
#print(myline)
p=float(myline[ myline.find("=")+1:])
if myline.find("rc")==0:
#print(myline)
rc=float(myline[ myline.find("=")+1:])
if myline.find("rs")==0:
#print(myline)
rs=float(myline[ myline.find("=")+1:])
if myline.find("GDP(")==2:
#print(myline)
GVA=float(myline[ myline.find("=")+1:])
if myline.find("investment,")==2:
#print(myline)
x=float(myline[ myline.find("=")+1:])
#print("GDP = "+ str(GVA-x))
if myline.find("delk ")==0:
#print(myline)
delta=float(myline[ myline.find("=")+1:])
#print("delta= "+ str(delta))
return w,p,rc,rs,delta,GVA-x
def decomposition (var_bin,bins,varlist):
print('')
print("SORTED BY " + var_bin)
print("BINS = " + str([round(b,2) for b in bins ]) )
data['bin_' + var_bin] = pd.cut(data[var_bin], bins, labels = False)
mean_baseline=data.groupby('bin_' + var_bin).mean()[varlist]
count_baseline=data.groupby('bin_' + var_bin).count()[varlist]
print("-------------- baseline means ----------")
print(mean_baseline)
print("-------------- baseline counts ----------")
print(count_baseline)
print('')
return mean_baseline,count_baseline
def format_table(data,total):
results=[]
results=pd.DataFrame(data)
results['bins/total']=(sorted(pd.cut(df_baseline[var_bin], bins).unique()) )
cols = results.columns.tolist()
cols = cols[-1:] + cols[:-1]
results=results[cols] # OR df = df.ix[:, cols]
results=results.append(total,ignore_index=True)
results.iloc[-1,0]='total'
return results
def print_results (var_bin,bins,varlist,df_baseline=df_baseline,df_target=df_target):
print('')
print("SORTED BY " + var_bin)
print("BINS = " + str([round(b,2) for b in bins ]) )
df_baseline['bin_' + var_bin] = pd.cut(df_baseline[var_bin], bins, labels = False)
df_target['bin_' + var_bin] = pd.cut(df_target[var_bin], bins, labels = False)
mean_baseline=df_baseline.groupby('bin_' + var_bin).mean()[varlist]
mean_target=df_target.groupby('bin_' + var_bin).mean()[varlist]
sum_baseline=df_baseline.groupby('bin_' + var_bin).sum()[varlist]
sum_target=df_target.groupby('bin_' + var_bin).sum()[varlist]
count_baseline=df_baseline.groupby('bin_' + var_bin).count()[varlist]
count_target=df_target.groupby('bin_' + var_bin).count()[varlist]
intensive=(mean_target-mean_baseline)*(count_target+count_baseline)/2
extensive=(count_target-count_baseline)*(mean_target+mean_baseline)/2
total=intensive+extensive
intensive=100.*(intensive/(total.sum()))
extensive=100.*(extensive/(total.sum()))
total_per_change=(100*total.sum()/sum_baseline.sum()).T
intensive_alt=intensive*total_per_change*(1.0/100)
extensive_alt=extensive*total_per_change*(1.0/100)
return total,sum_baseline,intensive_alt,extensive_alt
print('')
def __extract_num__(item_name, text):
import re
for ln in text.splitlines():
if ln.startswith(item_name):
match = re.search(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', ln[len(item_name):])
if match is not None:
return float(match.group(0))
else:
return np.nan
return np.nan
def get_data_textbuffer(item_name, outputs):
return np.array([__extract_num__(item_name, text) for text in outputs])
def get_duration(sentence):
sentence=sentence.lower()
sentence=sentence.replace('-', '')
sentence=sentence.replace("one", "1")
sentence=sentence.replace("two", "2")
sentence=sentence.replace("three", "3")
sentence=sentence.replace("four", "4")
sentence=sentence.replace("five", "5")
sentence=sentence.replace("six", "6")
sentence=sentence.replace("seven", "7")
sentence=sentence.replace("eight", "8")
sentence=sentence.replace("nine", "9")
sentence=sentence.replace("ten", "10")
pattern = 'week|month|year|day'
convert_to_days={'week':7,'month':30,'year':365,'day':1}
num_days=0
for match in re.finditer(pattern, sentence):
s = match.start()
e = match.end()
numstr=sentence[np.maximum(0,s-3) :e]
if len(re.findall(r'\d+', numstr ))>0:
days=convert_to_days[sentence[s:e]]*int(re.findall(r'\d+', numstr )[0])
num_days=np.maximum(num_days,days)
return num_days
def get_distance(sentence):
sentence=sentence.lower()
pattern = 'mil|kilo|km'
convert_to_miles={'mil':1.0,'kilo':0.621371,'km':0.621371}
num_miles=0
for match in re.finditer(pattern, sentence):
s = match.start()
e = match.end()
numstr=sentence[np.maximum(0,s-4) :e]
if len(re.findall(r'\d+', numstr ))>0:
miles=convert_to_miles[sentence[s:e]]*int(re.findall(r'\d+', numstr )[0])
num_miles=np.maximum(num_miles,miles)
return num_miles
We use all firms who report legal form of organization to be in C Corporation',S Corporation, Sole Proprietorship, Partnership,LLC and have a valid IntanglibleIntensity, that is, the ratio "TotalIntangiblesPPA/TotalAssetsPPA" is finite. To compute sales weight we use the variable "NetSales" normalized by "GDPDeflator" so that all sales are in 2007 dollars.## Read data from PrattStats,GDP deflator
data=pd.read_csv('prattsdatabase.csv',encoding='latin-1')
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
f2 = {'RelGoodwillIntensityPPA':['count','mean','median','std']}
GDPDeflator=pd.read_csv('GDPDEFAnnual.csv',encoding='latin-1')
GDPDeflator=GDPDeflator[:-1]
GDPDeflator.year=GDPDeflator.year.astype(int)
data['year'] = data['SaleDate'].str[-4:]
data['year']=data['year'].astype(int)
data['pricedef'] = data['year'].map(GDPDeflator.set_index('year')['GDPDEF2007'])
data['RealNetSales']=data.NetSales/data.pricedef
data['RealNetSales']=data['RealNetSales']/1000000
data['IntanglibleIntensityPPA']=data.TotalIntangiblesPPA/data.TotalAssetsPPA
data['NonCompete'][data['NonCompete']=='Yes']=1.0
data['NonCompete'][data['NonCompete']=='No']=0.0
data['NetIncomeOverSalePrice']=(data.EarningsBeforeTaxes/data.MVICPrice)*100
data['IdentifiableIntanglibleIntensityPPA']=100*data.TotalIdentifiableIntangiblesPPA/data.TotalAssetsPPA
data['TotalIntangiblesFAPPARatio']=100*data['TotalIntangiblesPPA']/(data['TotalIntangiblesPPA']+data['FixedAssetsPPA'])
data['GoodwillIntensityPPA']=100*data.GoodwillPPA/data.TotalAssetsPPA
data['IntanglibleIntensityPPA']=100*data.TotalIntangiblesPPA/data.TotalAssetsPPA
data['logIntant']=np.log(data.TotalIntangiblesPPA)
data['TargetAge2']=data['TargetAge']**2
data['IntanglibleSales']=100*data.TotalIntangiblesPPA/data.NetSales
data['RelGoodwillIntensityPPA']=100*(data['GoodwillIntensityPPA']/data['IntanglibleIntensityPPA'])
data['FixedIntensityPPA']=100*data.FixedAssetsPPA/data.TotalAssetsPPA
data['FixedAssetsBM']=100*data.FixedAssets/data.FixedAssetsPPA
data['LeveragePPA']=data.InterestBearingLiabilitiesPPA/data.TotalAssetsPPA
data['Leverage']=data.LongTermLiabilities/data.TotalAssets
data['Leverage'][np.isinf(data['Leverage'])]=np.nan
data['EmploymentAgreementDescription']=data['EmploymentAgreementDescription'].astype(str)
data["EmploymentAgreementLength"] = data.EmploymentAgreementDescription.swifter.apply(get_duration)
data['NonCompeteDescription']=data['NonCompeteDescription'].astype(str)
data["NonCompeteDistance"] = data.NonCompeteDescription.swifter.apply(get_distance)
data['EmploymentAgreementAlt']=(data["EmploymentAgreementLength"]>0)
naics_labels=['Agriculture, Forestry, Fishing and Hunting',\
'Mining, Quarrying, and Oil and Gas Extraction',\
'Utilities',\
'Construction',\
'NA1',\
'Manufacturing',\
'NA2',\
'Wholesale Trade',\
'Retail Trade',\
'NA3',\
'Transportation and Warehousing',\
'NA4',\
'Information',\
'Finance and Insurance',\
'Real Estate and Rental and Leasing',\
'Professional, Scientific, and Technical Services',\
'Management of Companies and Enterprises',\
'Administrative and Support and Waste Management and Remediation Services',\
'Educational Services',\
'Health Care and Social Assistance',\
'Arts, Entertainment, and Recreation',\
'Accommodation and Food Service',\
'Other Services','Public administration']
naics_code=[11,21,22,23,24,31,35,42,44,46,48,50,51,52,53,54,55,56,61,62,71,72,81,83,92]
n=24
#naics_labels=['Ag. Mining and Utilities',\
# 'Consts and Mfg',\
# 'Trade and warehousing',\
# 'Information and FIRE',\
# 'RE',\
# 'Professional services',\
# 'Nonprof services']
#naics_code=[11,23,42,51,53,54,57,92]
data['TargetStructureAlt']=data['TargetStructure'].values
sel=data['TargetStructure'].isin(['Partnership', 'Limited Partnership', 'LLP'])
data.loc[sel,'TargetStructureAlt']='Partnership'
sel=data['TargetStructure'].isin(['Consolidated', 'UK Corporation', 'PC','Non-Profit','Ltd','Limited Corporation'])
data.loc[sel,'TargetStructureAlt']='Others'
sel=data['TargetStructure'].isin(['C corporation'])
data.loc[sel,'TargetStructureAlt']='C Corporation'
data['naics_label']=pd.cut(data['TargetNAICS1']/10**10,naics_code[:n+1],labels=naics_labels[:n],include_lowest=True,right=False)
data['naics_code_cat']=pd.cut(data['TargetNAICS1']/10**10,naics_code[:n+1],include_lowest=True,right=False)
data['naics_2d']=pd.cut(data['TargetNAICS1']/10**10,naics_code[:n+1],labels=naics_code[:n],include_lowest=True,right=False)
#data['naics_label']=data['naics_label'].astype(str)
#merge_naics_list=['Mining, Quarrying, and Oil and Gas Extraction','Agriculture, Forestry, Fishing and Hunting','Utilities','Wholesale Trade','Educational Services','Arts, Entertainment, and Recreation','nan']
#for naics in merge_naics_list:
# sel=data['naics_label']==naics
# data.loc[sel,'naics_label']='others'
#data.loc[sel,'naics_label'].replace(naics,'others')
# data.loc[sel,'naics_code_cat'].replace(naics,'others')
# data.loc[sel,'naics_2d']=99
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selection=selection&sel
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership'])
data['TargetStructurePassThru']=data['TargetStructure'].values
data.loc[selPassThru,'TargetStructurePassThru']=True
data.loc[~selPassThru,'TargetStructurePassThru']=False
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
data['TargetStructurePassThruLLC']=data['TargetStructure'].values
data.loc[selPassThru,'TargetStructurePassThruLLC']=True
data.loc[~selPassThru,'TargetStructurePassThruLLC']=False
data['TargetStructureBus']=data['TargetStructure'].values
data.loc[sel,'TargetStructureBus']='all_bus'
data['wgt_sales']=data['RealNetSales']/data[selection]['RealNetSales'].sum()*100
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selPassThru=selection&selPassThru
data['wgt_sales_passthru']=data['RealNetSales']/data[selPassThru]['RealNetSales'].sum()*100
temp1=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructurePassThru).agg(f)
temp1=temp1.iloc[1,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru, excluding LLCs']
temp2=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructurePassThruLLC).agg(f)
temp2=temp2.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru, including LLCs']
temp3=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructureBus).agg(f)
temp3=temp3.iloc[0,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['All businesses']
temp=pd.concat([temp1,temp2,temp3])
temp['count']=temp['count'].astype(int)
display(temp)
temp=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructureAlt).agg(f)
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=['C corporations','LLCs','Partnerships','S corporation','Sole Proprietors']
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum])
temp['count']=temp['count'].astype(int)
display(temp.iloc[[4,2,3,1,0,5],:])
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selPassThru=selection&selPassThru
temp=data[selPassThru][['IntanglibleIntensityPPA','wgt_sales_passthru']].groupby(data[selPassThru].naics_label).agg(f_pass)
indexlist=temp.index.to_list()
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=indexlist
temp['count']=temp['count'].astype(int)
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum])
temp['count']=temp['count'].astype(int)
display(temp[~np.isnan(temp['mean'])] )
grp=['S Corporation']
selPassThruWoLLC=data['TargetStructureAlt'].isin(grp)
selPassThruWoLLC=selection&selPassThruWoLLC
data['wgt_sales_passthruWoLLC']=data['RealNetSales']/data[selPassThruWoLLC]['RealNetSales'].sum()*100
f_passWoLLC = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthruWoLLC':['sum']}
selPassThruWoLLC=data['TargetStructureAlt'].isin(grp)
selPassThruWoLLC=selection&selPassThruWoLLC
temp=data[selPassThruWoLLC][['IntanglibleIntensityPPA','wgt_sales_passthruWoLLC']].groupby(data[selPassThruWoLLC].naics_label).agg(f_passWoLLC)
indexlist=temp.index.to_list()
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=indexlist
temp['count']=temp['count'].astype(int)
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum])
temp['count']=temp['count'].astype(int)
display(temp[~np.isnan(temp['mean'])] )
Below we show summary statistics for different types of intangibles. For each type we compute the share in total intangibles for observations with positive values.
Other Intangibles PPA : Any other intangible asset acquired that is not listed in the preceding fields.
Goodwill PPA Represents the excess of the aggregate purchase price over the fair value of net assets of the acquired business.
varlist=['CustomerRelationshipsListsPPA','BacklogPPA','DevelopedExistingTechnologyPPA','InProcessRandDPPA','TradeNamesTradeMarksPPA','NonCompeteAgreementsPPA','OtherIntangiblesPPA','GoodwillPPA']
for var in varlist:
print(var)
data[var+'Intensity']=data[var]/data['TotalIntangiblesPPA']*100
var=var+'Intensity'
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selection=selection&sel
sel2=selection&(data[selection][var]>0)
f = {var:['count','mean','median','std'],'wgt_sales':['sum']}
temp1=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructurePassThru).agg(f)
temp1=temp1.iloc[1,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru, excluding LLCs']
temp2=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructurePassThruLLC).agg(f)
temp2=temp2.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru, including LLCs']
temp3=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructureBus).agg(f)
temp3=temp3.iloc[0,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['All businesses']
temp=pd.concat([temp1,temp2,temp3])
temp['count']=temp['count'].astype(int)
display(temp)
temp=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructureAlt).agg(f)
to_list=temp.index.to_list()
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=to_list
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum])
temp['count']=temp['count'].astype(int)
temp.iloc[-1,1]='--'
temp.iloc[-1,2]='--'
temp.iloc[-1,3]='--'
display(temp)
Agreement with the selling party not to compete with the purchaser, usually for a certain period of time and usually in a specified geographic area. Pratts provides information on whether there was a a Non compete or not, and if yes, in some cases its description such as value/length of time.
Below we show summary statistics for intangible intensity for businesses broken down by whether they have a business noncompete
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
data['NonCompete']=data['NonCompete'].replace(0,False)
data['NonCompete']=data['NonCompete'].replace(1,True)
data['NonCompete']=data['NonCompete'].astype(bool)
data['NonCompeteAlt']=np.NaN
sel=(data['NonCompete']==True)&(data.NonCompeteLength>0)
data.loc[sel,'NonCompeteAlt']='NonCompete with strictly positive duration'
sel=(data['NonCompete']==True)&(data.NonCompeteLength==0)
data.loc[sel,'NonCompeteAlt']='NonCompete with duration = 0'
sel=(data['NonCompete']==True)& np.isnan(data.NonCompeteLength)
data.loc[sel,'NonCompeteAlt']='NonCompete with no description of terms'
sel=(data['NonCompete']==False)
data.loc[sel,'NonCompeteAlt']='No Agreement'
data['NonCompeteLength']=data['NonCompeteLength']*30
#(data.NonCompeteLength>0)
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
#print(temp)
temp1=[]
temp1=temp.iloc[3,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru excluding LLCs with NoNonCompete but no description of terms']
temp2=[]
temp2=temp.iloc[2,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with NonCompete agreement and duration = 0']
temp3=[]
temp3=temp.iloc[1,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['Passthru excluding LLCs with NonCompete agreement with strictly positive duration']
temp4=[]
temp4=temp.iloc[0,:].values
temp4=pd.DataFrame(temp4,index=['count','mean','median','std','sales wgt']).T
temp4.index=['Passthru excluding LLCs without NonCompete agreements']
temp=pd.concat([temp4,temp2,temp1,temp3])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp1=[]
temp1=temp.iloc[3,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru including LLCs with NonCompete but no description of terms']
temp2=[]
temp2=temp.iloc[2,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with NonCompete agreement and duration = 0']
temp3=[]
temp3=temp.iloc[1,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['Passthru including LLCs with NonCompete agreement with strictly positive duration']
temp4=[]
temp4=temp.iloc[0,:].values
temp4=pd.DataFrame(temp4,index=['count','mean','median','std','sales wgt']).T
temp4.index=['Passthru including LLCs without NonCompete agreements']
temp=pd.concat([temp4,temp2,temp1,temp3])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp1=[]
temp1=temp.iloc[3,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['All businesses with NonCompete but no description of terms']
temp2=[]
temp2=temp.iloc[2,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All businesses with NonCompete agreement and duration = 0']
temp3=[]
temp3=temp.iloc[1,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['All businesses with NonCompete agreement with strictly positive duration']
temp4=[]
temp4=temp.iloc[0,:].values
temp4=pd.DataFrame(temp4,index=['count','mean','median','std','sales wgt']).T
temp4.index=['All businesses without NonCompete agreements']
temp=pd.concat([temp4,temp2,temp1,temp3])
temp['count']=temp['count'].astype(int)
display(temp)
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby([data[sel2].TargetStructureAlt,data[sel2].NonCompeteAlt] ).agg(f)
display(temp)
Below we describe statistics for the length of the NonCompete in days
f = {'NonCompeteLength':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'NonCompeteLength':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) ) & (data.NonCompeteAlt=='NonCompete with strictly positive duration')
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with NonCompete']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with NonCompete']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business including LLCs with NonCompete']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby([data[sel2].TargetStructureAlt] ).agg(f)
display(temp)
Below we describe statistics for the radius of the NonCompete in miles. There are several observations where the radius is described as a list of counties/states. We drop those when we construct the stats below.
f = {'NonCompeteDistance':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'NonCompeteDistance':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )&(data["NonCompeteDistance"]>0)& (data.NonCompeteAlt=='NonCompete with strictly positive duration')
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThru&(data["NonCompeteDistance"]>0)
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with NonCompete']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with NonCompete']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business with NonCompete']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby([data[sel2].TargetStructureAlt] ).agg(f)
display(temp)
An agreement between the buyer and seller for the seller's personal services to be provided to the buyer either as an employee or consultant after the sale of the business.
Below we show summary statistics for intangible intensity for businesses broken down by whether they have a owner consulting agreement
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
data['EmploymentAgreement']=data['EmploymentAgreement'].replace(0,'False')
data['EmploymentAgreement']=data['EmploymentAgreement'].replace(1,'True')
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)
temp1=[]
temp1=temp.iloc[0,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru excluding LLCs without consulting agreements']
temp2=[]
temp2=temp.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with consulting agreements']
temp=pd.concat([temp1,temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)
temp1=[]
temp1=temp.iloc[0,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru including LLCs without consulting agreements']
temp2=[]
temp2=temp.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with consulting agreements']
temp=pd.concat([temp1,temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)
temp1=[]
temp1=temp.iloc[0,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['All business without consulting agreements']
temp2=[]
temp2=temp.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business including LLCs with consulting agreements']
temp=pd.concat([temp1,temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby([data[sel2].TargetStructureAlt,data[sel2].EmploymentAgreementAlt] ).agg(f)
display(temp)
Below we describe statistics for the length of the consulting agreement measured in days. This is extracted from the descriptive notes
f = {'EmploymentAgreementLength':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'EmploymentAgreementLength':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )&data.EmploymentAgreementAlt
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with consulting agreements']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with consulting agreements']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)
temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business with consulting agreements']
temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
display(temp)
sel2=selection&sel
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby([data[sel2].TargetStructureAlt] ).agg(f)
display(temp)
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThruLLC
data[sel2][['IntanglibleIntensityPPA','wgt_sales_passthru']].groupby(pd.cut( data[sel2]['TotalAssetsPPA'] , [ data[sel2]['TotalAssetsPPA'].quantile(x) for x in [0.0,0.2,0.4,0.6,0.8,1.0]] )).agg(f_pass)
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel
data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(pd.cut( data[sel2]['TotalAssetsPPA'] , [ data[sel2]['TotalAssetsPPA'].quantile(x) for x in [0.0,0.2,0.4,0.6,0.8,1.0]] )).agg(f)
The sample includes all pass-thru firms in Pratts who have a valid entry for "TargetAge" and a positive value for "TotalIntangiblesPPA". The regression specification is
$$ \log(\text{TotalIntangiblesPPA}) = \text{constant} + \beta_{1} \text{TagetAge} + \beta_{2} \text{TagetAge}^2 + \text{sector fixed effects}+ \text{year fixed effects} $$dummies1 = pd.get_dummies(data['naics_code_cat']).rename(columns=lambda x: 'nacis_code_cat' + str(x))
dummies2 = pd.get_dummies(data['year']).rename(columns=lambda x: 'year' + str(x))
X = pd.concat([data[['TargetAge','TargetAge2']],dummies1,dummies2],axis=1)
Y=np.log(data.TotalIntangiblesPPA)
X = sm.add_constant(X)
selection=(pd.notnull(data.TargetAge)) &(pd.notnull(data.TotalIntangiblesPPA) ) & (data.TotalIntangiblesPPA>0)& ~(data.TargetStructureAlt=='C Corporation')
model11 = sm.OLS(Y[selection], X[selection]).fit()
model11.summary()
We divide the PPA allocated to Intangible assets by Net Income in the previous year and then group by legal form and report the median.
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
data['IntangibleToNetIncome']=data['TotalIntangiblesPPA']/data['NetIncome']
temp=data[selPassThru][['IntangibleToNetIncome']].groupby(data[selPassThru].TargetStructureAlt).median()
display(temp)
We show below that most of the transactions in "real estate" sector in Pratts have little or no long-term liabilities
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel=selection&sel&data.TargetStructurePassThru &(data.naics_label=='Real Estate and Rental and Leasing')
data[sel][['TotalAssetsPPA','TotalLiabilitiesPPA','DebtAssumed']].groupby([data.TargetStructureAlt,data.year]).sum()
pd.set_option('display.max_rows', None)
data['TotalLiabilityIntensityPPA']=data['TotalLiabilitiesPPA']/data['TotalAssetsPPA']
data['DebtAssumedIntensity']=data['DebtAssumed']/data['TotalAssetsPPA']
data[sel][['TotalLiabilityIntensityPPA','DebtAssumedIntensity']].describe()
#data[sel][['TotalLiabilityIntensityPPA','IntanglibleIntensityPPA','DebtAssumed','TotalAssetsPPA','TotalLiabilitiesPPA','TargetBusinessDescription','TargetStructureAlt']]
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel=selection&sel&data.TargetStructurePassThruLLC &(data.naics_label=='Transportation and Warehousing')
columnsPPA = [s for s in data.columns if "PPA" in s]
pd.set_option('display.max_columns', None)
#data[sel][columnsPPA].groupby([data.TargetStructureAlt]).describe().T
data['IsRoute']=False
sel2=data['TargetBusinessDescription'].str.contains("route", na=False, case=False)
data.loc[sel2,'IsRoute']=True
data[sel][['IntanglibleIntensityPPA']].groupby(data[sel].IsRoute).describe()
#data['IsLimo']=False
#sel2=data['TargetBusinessDescription'].str.contains("limo", na=False, case=False)
#data.loc[sel2,'IsLimo']=True
#data[sel][['IntanglibleIntensityPPA']].groupby(data[sel].IsLimo).describe()
data['Notes']=data['Notes'].str.lower()
data['ReasonForSale']='Not Provided'
sel2=data['Notes'].str.contains("reason ", na=False, case=False)
data.loc[sel2,'ReasonForSale']='Others'
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel=selection&sel
sel2=data['Notes'].str.contains("retir", na=False, case=False)
data.loc[sel2,'ReasonForSale']='retirement'
sel2=(data['Notes'].str.contains("relocat", na=False, case=False)|data['Notes'].str.contains("moving", na=False, case=False))
data.loc[sel2,'ReasonForSale']='relocation'
sel2=data['Notes'].str.contains("out-of -state", na=False, case=False)
data.loc[sel2,'ReasonForSale']='relocation'
sel2=data['Notes'].str.contains("out of state", na=False, case=False)
data.loc[sel2,'ReasonForSale']='relocation'
sel2=data['Notes'].str.contains("health", na=False, case=False)| data['Notes'].str.contains("illness", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'
#sel2=data['Notes'].str.contains("burnout", na=False, case=False)|data['Notes'].str.contains("bur nout", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='burnout'
sel2=(data['Notes'].str.contains("interest", na=False, case=False))&data['Notes'].str.contains("other", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
sel2=(data['Notes'].str.contains("opportunit", na=False, case=False) )&data['Notes'].str.contains("other", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
sel2=(data['Notes'].str.contains("opportunit", na=False, case=False) )&data['Notes'].str.contains("new", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
#sel2=data['Notes'].str.contains("divorce", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='divorce'
#sel2=data['Notes'].str.contains("family", na=False, case=False)& data['Notes'].str.contains("reason", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='family'
#sel2=data['Notes'].str.contains("dissolution", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='partnership dissolution'
#sel2=data['Notes'].str.contains("partnership breakup", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='partnership dissolution'
sel2=data['Notes'].str.contains("death", na=False, case=False)| data['Notes'].str.contains("passed away", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'
sel2=data['Notes'].str.contains("died", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'
sel2=data['Notes'].str.contains("deceased", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'
sel2=data['Notes'].str.contains("career", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
sel2=data['Notes'].str.contains("a change", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
#sel2=data['Notes'].str.contains("school", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='going back to school'
#sel2=data['Notes'].str.contains("downsizing", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='downsizing'
sel2=data['Notes'].str.contains("other business investments", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
sel2=data['Notes'].str.contains("new", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'
sel2=data['Notes'].str.contains("ill", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'
#sel2=data['Notes'].str.contains("personal time", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='more personal time'
data[sel][['IntanglibleIntensityPPA']].groupby([data[sel].ReasonForSale]).describe()
data[sel][['wgt_sales']].groupby([data[sel].ReasonForSale]).sum()
#sel1
sel1=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel3=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=data['ReasonForSale']=='Others'
data[sel1&sel2&sel3]['IntanglibleIntensityPPA'].describe()
data[sel1&sel2&sel3]['Notes']