Data Appendix: Pratts

This file generates the tables from Pratts data that are reported in data appendix to "Sweat Equity in U.S. Private Business"

Import libraries

In [1]:
import numpy as np
import pandas as pd
import dask.dataframe as dd
import re,string
#import stats
import os.path
pd.options.display.float_format = '{:.3f}'.format
pd.set_option('display.max_colwidth', -1)
#from SCEconomy import *
import seaborn as sns
sns.set_style('whitegrid')
pd.set_option('display.width', 1000)
import swifter
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline
import matplotlib as mpl
mpl.rc("savefig",dpi=200)
from matplotlib import pyplot as plt
# plt.rcParams["figure.figsize"] = (10.,10.)
import statsmodels.api as sm
C:\Users\admin\Anaconda3\lib\site-packages\ipykernel_launcher.py:8: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.
  
In [2]:
from IPython.display import HTML
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[2]:
In [3]:
df_baseline=[]
df_target=[]
def gini(series, weights):
    if series.size!=weights.size:
        print("Weights are not the same size as the series")
        return
    sorted_series = series.sort_values()
    height, area = 0, 0
    for i in range(series.size):
        value = series[sorted_series.index[i]]
        wgt = weights[sorted_series.index[i]]
        height += wgt*value
        area += wgt*(height - value / 2)
    fair_area = height * weights.sum() / 2.
    return (fair_area - area) / fair_area


def gini_alt(series):
    csum= 0.0
    N=len(series)
    for i in range(N):
        for j in range(N):
            value_i = series.iloc[i]
            value_j = series.iloc[j]
            csum+=np.abs(value_i-value_j)
        
    gini_val=0.5*(csum/(N)**2)/series.mean()
    
    return gini_val


def Lorenz(series,quantiles):
    N=len(quantiles)
    Lz=np.zeros(N)
    for n in range(N):
        selection = series<series.quantile(quantiles[n])
        Lz[n]=series[selection].sum()/series.sum()
               
        
    return  Lz


def uploadData(case,w,rs,rc,p,delta):
    data=pd.DataFrame()
    is_iz=pd.read_csv(case+'/is_to_iz.csv',delim_whitespace=True,header=None)
    zgrid=pd.read_csv(case+'/zgrid.csv',delim_whitespace=True,header=None)
    is_to_ieps=pd.read_csv(case+'/is_to_ieps.csv', delim_whitespace=True,header=None)
    epsgrid=pd.read_csv(case+'/epsgrid.csv', delim_whitespace=True,header=None)
    T=100-1
    
    data['s_ind_']=pd.read_csv(case+'/data_i_s.csv', delim_whitespace=True,header=None).iloc[:,T-1]
    data['s_ind']=pd.read_csv(case+'/data_i_s.csv', delim_whitespace=True,header=None, usecols=[T])
    
    
    data['eps']=epsgrid.values.flatten()[is_to_ieps.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]]   
    data['eps_']=epsgrid.values.flatten()[is_to_ieps.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]]   
    data['iz']=is_iz.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]
    data['iz_']=is_iz.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]
    data['ieps']=is_to_ieps.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]  
    data['ieps_']=is_to_ieps.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]
    data['z']=zgrid.values.flatten()[is_iz.astype('int').values.flatten()[data.s_ind.astype('int').values.flatten()]]    
    data['z_']=zgrid.values.flatten()[is_iz.astype('int').values.flatten()[data.s_ind_.astype('int').values.flatten()]]    
    data['ys']=pd.read_csv(case+'/data_ys.csv', delim_whitespace=True,header=None, usecols=[T])
    data['ys_']=pd.read_csv(case+'/data_ys.csv', delim_whitespace=True,header=None, usecols=[T-1])
    data['ks']=pd.read_csv(case+'/data_ks.csv', delim_whitespace=True,header=None, usecols=[T])
    data['ns']=pd.read_csv(case+'/data_ns.csv', delim_whitespace=True,header=None, usecols=[T])
    data['x']=pd.read_csv(case+'/data_x.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['n']=pd.read_csv(case+'/data_n.csv', delim_whitespace=True,header=None, usecols=[T])
    data['ks_']=pd.read_csv(case+'/data_ks.csv', delim_whitespace=True,header=None, usecols=[T-1])
    data['ns_']=pd.read_csv(case+'/data_ns.csv', delim_whitespace=True,header=None, usecols=[T-1])
    data['x_']=pd.read_csv(case+'/data_x.csv',  delim_whitespace=True,header=None, usecols=[T-1])
    data['n_']=pd.read_csv(case+'/data_n.csv', delim_whitespace=True,header=None, usecols=[T-1])
    data['flagC']=pd.read_csv(case+'/data_is_c.csv', delim_whitespace=True,header=None, usecols=[T])
    data['flagC_']=pd.read_csv(case+'/data_is_c.csv', delim_whitespace=True,header=None, usecols=[T-1])
    data['flagO']=pd.read_csv(case+'/data_is_o.csv',delim_whitespace=True,header=None, usecols=[T])
    data['kappa']=pd.read_csv(case+'/data_kap.csv', delim_whitespace=True,header=None, usecols=[T])
    data['hkap']=pd.read_csv(case+'/data_hkap.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['hy']=pd.read_csv(case+'/data_hy.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['s_age']=pd.read_csv(case+'/s_age.csv',  delim_whitespace=True,header=None)
    data['sind_age']=pd.read_csv(case+'/sind_age.csv',  delim_whitespace=True,header=None)
    data['ind_age']=pd.read_csv(case+'/ind_age.csv',  delim_whitespace=True,header=None)
    data['c_age']=pd.read_csv(case+'/c_age.csv',  delim_whitespace=True,header=None)
    data['div_']=pd.read_csv(case+'/data_div_sweat.csv',  delim_whitespace=True,header=None, usecols=[T-1])
    data['div']= pd.read_csv(case+'/data_div_sweat.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['val_sdf_']=pd.read_csv(case+'/data_val_sweat_dyna_sdf.csv',  delim_whitespace=True,header=None, usecols=[T-1])
    data['val_mf_']=pd.read_csv(case+'/data_val_sweat_dyna_fix.csv',  delim_whitespace=True,header=None, usecols=[T-1])

    data['val_sdf']=pd.read_csv(case+'/data_val_sweat_dyna_sdf.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['val_mf']=pd.read_csv(case+'/data_val_sweat_dyna_fix.csv',  delim_whitespace=True,header=None, usecols=[T])
    #data['val_sdf_post_tax']=pd.read_csv(case+'/data_val_sweat_dyna_sdf_quasi_taxed.csv',  delim_whitespace=True,header=None, usecols=[T])
    #data['val_mf_post_tax']=pd.read_csv(case+'/data_val_sweat_dyna_fix_quasi_taxed.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['a']= pd.read_csv(case+'/data_a.csv',  delim_whitespace=True,header=None, usecols=[T])  
    data['flagC']=data['flagC'].astype('bool')
    data['flagC_']=data['flagC_'].astype('bool')
    data['flagO']=data['flagO'].astype('bool')
    data['expenses']=(rs+delta)*data['ks']+w*data['ns']+data['x']
    data['expenses_']=(rs+delta)*data['ks_']+w*data['ns_']+data['x_']
    data['wns']=data['ns']*w
    data['pys']=p*data['ys']

    data['bus_inc']=p*data['ys']-data['expenses']
    data['bus_inc_']=p*data['ys_']-data['expenses_']
    data['lab_inc']=data.eps*w*data.n
    data['lab_inc_']=data.eps_*w*data.n_
    data['n_e']=data['hkap']+data['hy']
    data['ne']=data['n_e']
    data['kappa_adj']=(data.kappa**0.15)*data.z
    data['ret_sdf']=(data['val_sdf']+data['div_'])/data['val_sdf_']+0.02
    data['ret_mf']=(data['val_mf']+data['div_'])/data['val_mf_']+0.02
    data['div_yld_sdf']=(data['div'])/data['val_sdf']
    data['div_yld_mf']=(data['div'])/data['val_mf']
    #data['intan_int_sdf']=data['val_sdf'] /(data['val_sdf']+data['ks'])
    #data['intan_int_mf']=data['val_mf'] /(data['val_mf']+data['ks'])
    data['profit_rate']=100*data['bus_inc']/data['ks']
    data['measured_markups']=100*(data['bus_inc'])/(data['x']+(rs+delta)*data['ks']+w*data['ns'])
    data['measured_markups_alt']=100*(data['bus_inc']+data['x'])/((rs+delta)*data['ks']+w*data['ns'])
    data['x_pys']=data['x']/(p*data['ys'])
    data['cogs']=((rs+delta)*data['ks']+w*data['ns']+data['x'])    
    data['wealth_sdf']=data['a']+data['val_sdf']   
    data['wealth_mf']=data['a']+data['val_mf']   
    data['ztilde'] = np.log(data['z'])
    data['zkapphi']=data['z']*data['kappa']**(0.15)
    data['ztilde_meas'] = np.log(data['ys']/(data['ks']**(0.333)*data['ns']**(0.666)))
    data['inc']=0.0
    data['inc'][data['flagC']==True]=data[data['flagC']==True]['lab_inc']
    data['inc'][data['flagC']==False]=data[data['flagC']==False]['bus_inc']
    data['tot_inc']=data['inc']+rc*data['a'] 
    if os.path.exists(case+'/data_R.csv'):
        data['R']= pd.read_csv(case+'/data_R.csv',  delim_whitespace=True,header=None, usecols=[T])
        data['intan_int']=data['R']/(data['R']+data['ks'])
        data['wealth_R']=0.0
        data['wealth_R'][data['flagC']==True]=data[data['flagC']==True]['a']   
        data['wealth_R'][data['flagC']==False]=data[data['flagC']==False]['a']+data[data['flagC']==False]['R']   
        data['wealth_income_ratio']=data['wealth_R']/data['tot_inc']
  
  

    data['wgt']=1.0
    return data


def get_wealth_income_stats(case,w,rs,rc,p,delta):
    data=pd.read_csv(path_baseline[n]+'/data_R.csv',  delim_whitespace=True,header=None, usecols=[T])
    data.columns=['R']
    data['flagC']=dd.read_csv(path_baseline[n]+'/data_is_c.csv', delim_whitespace=True,header=None, usecols=[T]).compute().values
    data['flagC']=data['flagC'].astype('bool')
    data['ys']=pd.read_csv(case+'/data_ys.csv', delim_whitespace=True,header=None, usecols=[T])
    data['ks']=pd.read_csv(case+'/data_ks.csv', delim_whitespace=True,header=None, usecols=[T])
    data['ns']=pd.read_csv(case+'/data_ns.csv', delim_whitespace=True,header=None, usecols=[T])
    data['x']=pd.read_csv(case+'/data_x.csv',  delim_whitespace=True,header=None, usecols=[T])
    data['sind_age']=pd.read_csv(case+'/sind_age.csv',  delim_whitespace=True,header=None)
    data['expenses']=(rs+delta)*data['ks']+w*data['ns']+data['x']
    data['wns']=data['ns']*w
    data['pys']=p*data['ys']
    data['bus_inc']=p*data['ys']-data['expenses']
    
  

    data['wgt']=1.0
    return data


def get_prices(path_baseline,momfilename):
    with open (path_baseline+ momfilename, 'rt') as myfile:  # Open file lorem.txt for reading text
        for myline in myfile:                 # For each line, read it to a string 
          #print("gdp line")
          #print(myline.find("GDP("))
          if myline.find("w")==0:
            #print(myline)
            w=float(myline[ myline.find("=")+1:])
          if myline.find("S-good price")==0:
            #print(myline)
            p=float(myline[ myline.find("=")+1:])
          if myline.find("rc")==0:
            #print(myline)
            rc=float(myline[ myline.find("=")+1:])
          if myline.find("rs")==0:
            #print(myline)
            rs=float(myline[ myline.find("=")+1:])
          if myline.find("GDP(")==2:
            #print(myline)
            GVA=float(myline[ myline.find("=")+1:])
          if myline.find("investment,")==2:
            #print(myline)
            x=float(myline[ myline.find("=")+1:])
            #print("GDP = "+ str(GVA-x))
          if myline.find("delk ")==0:
            #print(myline)
            delta=float(myline[ myline.find("=")+1:])
            #print("delta= "+ str(delta))
            
    return w,p,rc,rs,delta,GVA-x



def decomposition (var_bin,bins,varlist):
    print('')
    print("SORTED BY " + var_bin)
    print("BINS = " + str([round(b,2) for b in bins ]) )
    

    data['bin_' + var_bin] = pd.cut(data[var_bin], bins, labels = False)     
    mean_baseline=data.groupby('bin_' + var_bin).mean()[varlist]
    count_baseline=data.groupby('bin_' + var_bin).count()[varlist]
    
    
    print("-------------- baseline means ----------")
    print(mean_baseline)
    print("-------------- baseline counts ----------")
    print(count_baseline)
    print('')
    return mean_baseline,count_baseline

def format_table(data,total):
    results=[]
    results=pd.DataFrame(data) 
    results['bins/total']=(sorted(pd.cut(df_baseline[var_bin], bins).unique()) )
    cols = results.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    results=results[cols]  #    OR    df = df.ix[:, cols]
    results=results.append(total,ignore_index=True)
    results.iloc[-1,0]='total'
    
    return results


def print_results (var_bin,bins,varlist,df_baseline=df_baseline,df_target=df_target):
    print('')
    print("SORTED BY " + var_bin)
    print("BINS = " + str([round(b,2) for b in bins ]) )
    

    df_baseline['bin_' + var_bin] = pd.cut(df_baseline[var_bin], bins, labels = False) 
    df_target['bin_' + var_bin] = pd.cut(df_target[var_bin], bins, labels = False) 
    
    mean_baseline=df_baseline.groupby('bin_' + var_bin).mean()[varlist]
    mean_target=df_target.groupby('bin_' + var_bin).mean()[varlist]
    sum_baseline=df_baseline.groupby('bin_' + var_bin).sum()[varlist]
    sum_target=df_target.groupby('bin_' + var_bin).sum()[varlist]
    count_baseline=df_baseline.groupby('bin_' + var_bin).count()[varlist]
    count_target=df_target.groupby('bin_' + var_bin).count()[varlist]
    
    intensive=(mean_target-mean_baseline)*(count_target+count_baseline)/2
    extensive=(count_target-count_baseline)*(mean_target+mean_baseline)/2
    total=intensive+extensive
    intensive=100.*(intensive/(total.sum()))
    extensive=100.*(extensive/(total.sum()))
    total_per_change=(100*total.sum()/sum_baseline.sum()).T
    intensive_alt=intensive*total_per_change*(1.0/100)
    extensive_alt=extensive*total_per_change*(1.0/100)
   
    return total,sum_baseline,intensive_alt,extensive_alt

   

    print('')
    
def __extract_num__(item_name, text):
    
    import re
    for ln in text.splitlines():
        if ln.startswith(item_name):

            match = re.search(r'[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?', ln[len(item_name):])

            if match is not None:
                return float(match.group(0))
            else:
                return np.nan
                    
                
    return np.nan

def get_data_textbuffer(item_name, outputs):
    
    return np.array([__extract_num__(item_name, text) for text in outputs])
    

def get_duration(sentence):
    sentence=sentence.lower()
    sentence=sentence.replace('-', '')
    sentence=sentence.replace("one", "1")
    sentence=sentence.replace("two", "2")
    sentence=sentence.replace("three", "3")
    sentence=sentence.replace("four", "4")
    sentence=sentence.replace("five", "5")
    sentence=sentence.replace("six", "6")
    sentence=sentence.replace("seven", "7")
    sentence=sentence.replace("eight", "8")
    sentence=sentence.replace("nine", "9")
    sentence=sentence.replace("ten", "10")

    pattern = 'week|month|year|day'
    convert_to_days={'week':7,'month':30,'year':365,'day':1}
    num_days=0
    for match in re.finditer(pattern, sentence):
        s = match.start()
        e = match.end()
        numstr=sentence[np.maximum(0,s-3) :e]
        if len(re.findall(r'\d+', numstr ))>0:
            days=convert_to_days[sentence[s:e]]*int(re.findall(r'\d+', numstr )[0])
            num_days=np.maximum(num_days,days)
    
    return num_days

def get_distance(sentence):
    sentence=sentence.lower()
    pattern = 'mil|kilo|km'
    convert_to_miles={'mil':1.0,'kilo':0.621371,'km':0.621371}
    num_miles=0
    for match in re.finditer(pattern, sentence):
        s = match.start()
        e = match.end()
        numstr=sentence[np.maximum(0,s-4) :e]
        if len(re.findall(r'\d+', numstr ))>0:
            miles=convert_to_miles[sentence[s:e]]*int(re.findall(r'\d+', numstr )[0])
            num_miles=np.maximum(num_miles,miles)
    
    return num_miles



   

We use all firms who report legal form of organization to be in C Corporation',S Corporation, Sole Proprietorship, Partnership,LLC and have a valid IntanglibleIntensity, that is, the ratio "TotalIntangiblesPPA/TotalAssetsPPA" is finite. To compute sales weight we use the variable "NetSales" normalized by "GDPDeflator" so that all sales are in 2007 dollars.## Read data from PrattStats,GDP deflator

In [4]:
data=pd.read_csv('prattsdatabase.csv',encoding='latin-1')    
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}


f2 = {'RelGoodwillIntensityPPA':['count','mean','median','std']}
In [5]:
GDPDeflator=pd.read_csv('GDPDEFAnnual.csv',encoding='latin-1')  
GDPDeflator=GDPDeflator[:-1]
GDPDeflator.year=GDPDeflator.year.astype(int)
data['year'] = data['SaleDate'].str[-4:]
data['year']=data['year'].astype(int)
data['pricedef'] = data['year'].map(GDPDeflator.set_index('year')['GDPDEF2007'])
data['RealNetSales']=data.NetSales/data.pricedef
data['RealNetSales']=data['RealNetSales']/1000000
In [6]:
data['IntanglibleIntensityPPA']=data.TotalIntangiblesPPA/data.TotalAssetsPPA
data['NonCompete'][data['NonCompete']=='Yes']=1.0
data['NonCompete'][data['NonCompete']=='No']=0.0
data['NetIncomeOverSalePrice']=(data.EarningsBeforeTaxes/data.MVICPrice)*100
data['IdentifiableIntanglibleIntensityPPA']=100*data.TotalIdentifiableIntangiblesPPA/data.TotalAssetsPPA
data['TotalIntangiblesFAPPARatio']=100*data['TotalIntangiblesPPA']/(data['TotalIntangiblesPPA']+data['FixedAssetsPPA'])
data['GoodwillIntensityPPA']=100*data.GoodwillPPA/data.TotalAssetsPPA
data['IntanglibleIntensityPPA']=100*data.TotalIntangiblesPPA/data.TotalAssetsPPA
data['logIntant']=np.log(data.TotalIntangiblesPPA)
data['TargetAge2']=data['TargetAge']**2
data['IntanglibleSales']=100*data.TotalIntangiblesPPA/data.NetSales

data['RelGoodwillIntensityPPA']=100*(data['GoodwillIntensityPPA']/data['IntanglibleIntensityPPA'])
data['FixedIntensityPPA']=100*data.FixedAssetsPPA/data.TotalAssetsPPA
data['FixedAssetsBM']=100*data.FixedAssets/data.FixedAssetsPPA

data['LeveragePPA']=data.InterestBearingLiabilitiesPPA/data.TotalAssetsPPA
data['Leverage']=data.LongTermLiabilities/data.TotalAssets
data['Leverage'][np.isinf(data['Leverage'])]=np.nan

data['EmploymentAgreementDescription']=data['EmploymentAgreementDescription'].astype(str)
data["EmploymentAgreementLength"] = data.EmploymentAgreementDescription.swifter.apply(get_duration)
data['NonCompeteDescription']=data['NonCompeteDescription'].astype(str)
data["NonCompeteDistance"] = data.NonCompeteDescription.swifter.apply(get_distance)
data['EmploymentAgreementAlt']=(data["EmploymentAgreementLength"]>0)


In [7]:
naics_labels=['Agriculture, Forestry, Fishing and Hunting',\
        'Mining, Quarrying, and Oil and Gas Extraction',\
        'Utilities',\
        'Construction',\
        'NA1',\
        'Manufacturing',\
        'NA2',\
        'Wholesale Trade',\
        'Retail Trade',\
        'NA3',\
        'Transportation and Warehousing',\
        'NA4',\
        'Information',\
        'Finance and Insurance',\
        'Real Estate and Rental and Leasing',\
        'Professional, Scientific, and Technical Services',\
        'Management of Companies and Enterprises',\
        'Administrative and Support and Waste Management and Remediation Services',\
        'Educational Services',\
        'Health Care and Social Assistance',\
        'Arts, Entertainment, and Recreation',\
        'Accommodation and Food Service',\
        'Other Services','Public administration']


naics_code=[11,21,22,23,24,31,35,42,44,46,48,50,51,52,53,54,55,56,61,62,71,72,81,83,92]
n=24


#naics_labels=['Ag. Mining and Utilities',\
#        'Consts and Mfg',\
#        'Trade and warehousing',\
#        'Information and FIRE',\
#        'RE',\
#        'Professional services',\
#        'Nonprof services']

#naics_code=[11,23,42,51,53,54,57,92]

data['TargetStructureAlt']=data['TargetStructure'].values
sel=data['TargetStructure'].isin(['Partnership', 'Limited Partnership', 'LLP'])
data.loc[sel,'TargetStructureAlt']='Partnership'
sel=data['TargetStructure'].isin(['Consolidated', 'UK Corporation', 'PC','Non-Profit','Ltd','Limited Corporation'])
data.loc[sel,'TargetStructureAlt']='Others'
sel=data['TargetStructure'].isin(['C corporation'])
data.loc[sel,'TargetStructureAlt']='C Corporation'
data['naics_label']=pd.cut(data['TargetNAICS1']/10**10,naics_code[:n+1],labels=naics_labels[:n],include_lowest=True,right=False)
data['naics_code_cat']=pd.cut(data['TargetNAICS1']/10**10,naics_code[:n+1],include_lowest=True,right=False)
data['naics_2d']=pd.cut(data['TargetNAICS1']/10**10,naics_code[:n+1],labels=naics_code[:n],include_lowest=True,right=False)
In [8]:
#data['naics_label']=data['naics_label'].astype(str)
#merge_naics_list=['Mining, Quarrying, and Oil and Gas Extraction','Agriculture, Forestry, Fishing and Hunting','Utilities','Wholesale Trade','Educational Services','Arts, Entertainment, and Recreation','nan']
#for naics in merge_naics_list:
#    sel=data['naics_label']==naics
#    data.loc[sel,'naics_label']='others'
    #data.loc[sel,'naics_label'].replace(naics,'others')
  #  data.loc[sel,'naics_code_cat'].replace(naics,'others')
  #  data.loc[sel,'naics_2d']=99
    
    
In [9]:
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selection=selection&sel
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership'])
data['TargetStructurePassThru']=data['TargetStructure'].values
data.loc[selPassThru,'TargetStructurePassThru']=True
data.loc[~selPassThru,'TargetStructurePassThru']=False
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
data['TargetStructurePassThruLLC']=data['TargetStructure'].values
data.loc[selPassThru,'TargetStructurePassThruLLC']=True
data.loc[~selPassThru,'TargetStructurePassThruLLC']=False
data['TargetStructureBus']=data['TargetStructure'].values
data.loc[sel,'TargetStructureBus']='all_bus'
In [10]:
data['wgt_sales']=data['RealNetSales']/data[selection]['RealNetSales'].sum()*100
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selPassThru=selection&selPassThru
data['wgt_sales_passthru']=data['RealNetSales']/data[selPassThru]['RealNetSales'].sum()*100

Aggregates

In [11]:
temp1=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructurePassThru).agg(f)
temp1=temp1.iloc[1,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru, excluding LLCs']

temp2=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructurePassThruLLC).agg(f)
temp2=temp2.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru, including LLCs']


temp3=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructureBus).agg(f)
temp3=temp3.iloc[0,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['All businesses']

temp=pd.concat([temp1,temp2,temp3])
temp['count']=temp['count'].astype(int)

display(temp)
count mean median std sales wgt
Passthru, excluding LLCs 6858 58.010 64.000 31.779 8.919
Passthru, including LLCs 8863 57.498 63.222 32.145 24.963
All businesses 10854 57.582 63.208 32.130 100.000
In [12]:
temp=data[selection][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[selection].TargetStructureAlt).agg(f)
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=['C corporations','LLCs','Partnerships','S corporation','Sole Proprietors']
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum]) 
temp['count']=temp['count'].astype(int)

display(temp.iloc[[4,2,3,1,0,5],:])
count mean median std sales wgt
Sole Proprietors 1140 57.497 63.795 30.654 0.225
Partnerships 197 56.575 66.667 32.218 1.331
S corporation 5521 58.168 63.889 31.995 7.364
LLCs 2005 55.745 61.081 33.312 16.044
C corporations 1991 57.959 63.092 32.069 75.037
Total 10854 285.943 318.524 160.247 100.000

Pass-throughs by industry (NAICs)

In [13]:
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
selPassThru=selection&selPassThru
temp=data[selPassThru][['IntanglibleIntensityPPA','wgt_sales_passthru']].groupby(data[selPassThru].naics_label).agg(f_pass)
indexlist=temp.index.to_list()
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=indexlist
temp['count']=temp['count'].astype(int)
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum]) 
temp['count']=temp['count'].astype(int)
display(temp[~np.isnan(temp['mean'])] )
count mean median std sales wgt
Agriculture, Forestry, Fishing and Hunting 16 30.428 35.230 24.947 0.729
Mining, Quarrying, and Oil and Gas Extraction 16 43.164 41.806 29.033 6.512
Utilities 10 60.773 66.558 33.485 0.102
Construction 383 69.284 74.312 37.939 5.689
Manufacturing 797 54.474 59.025 29.303 40.262
Wholesale Trade 17 54.821 52.147 28.897 0.014
Retail Trade 1569 54.177 58.842 30.001 6.642
Transportation and Warehousing 327 66.232 73.867 25.400 4.980
Information 189 80.830 89.474 22.532 10.309
Finance and Insurance 155 85.525 95.726 25.207 2.842
Real Estate and Rental and Leasing 268 76.292 92.660 31.027 1.755
Professional, Scientific, and Technical Services 462 81.299 89.530 21.027 4.875
Management of Companies and Enterprises 12 56.271 64.005 33.390 5.252
Administrative and Support and Waste Management and Remediation Services 1030 73.228 79.833 24.739 2.796
Educational Services 115 59.075 72.000 34.288 0.518
Health Care and Social Assistance 450 62.816 68.939 29.359 2.774
Arts, Entertainment, and Recreation 168 45.763 44.456 30.867 0.890
Accommodation and Food Service 1689 41.837 41.875 30.182 1.803
Other Services 1187 45.465 45.455 31.326 1.257
Total 8860 1141.756 1245.740 552.951 99.999
In [14]:
grp=['S Corporation']
selPassThruWoLLC=data['TargetStructureAlt'].isin(grp)
selPassThruWoLLC=selection&selPassThruWoLLC
data['wgt_sales_passthruWoLLC']=data['RealNetSales']/data[selPassThruWoLLC]['RealNetSales'].sum()*100

f_passWoLLC = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthruWoLLC':['sum']}
selPassThruWoLLC=data['TargetStructureAlt'].isin(grp)
selPassThruWoLLC=selection&selPassThruWoLLC
temp=data[selPassThruWoLLC][['IntanglibleIntensityPPA','wgt_sales_passthruWoLLC']].groupby(data[selPassThruWoLLC].naics_label).agg(f_passWoLLC)
indexlist=temp.index.to_list()
temp=temp.iloc[:,:].values
temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
temp.index=indexlist
temp['count']=temp['count'].astype(int)
tempsum=temp.sum()
tempsum=tempsum.values
tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
tempsum.index=['Total']
temp=pd.concat([temp,tempsum]) 
temp['count']=temp['count'].astype(int)
display(temp[~np.isnan(temp['mean'])] )
count mean median std sales wgt
Agriculture, Forestry, Fishing and Hunting 9 22.844 18.158 22.066 0.215
Mining, Quarrying, and Oil and Gas Extraction 5 42.877 36.647 26.079 0.569
Utilities 8 61.637 72.500 36.961 0.033
Construction 289 70.146 74.082 40.699 10.792
Manufacturing 549 53.558 57.500 28.513 22.676
Wholesale Trade 12 54.414 48.634 27.764 0.027
Retail Trade 1029 55.331 60.000 29.402 19.210
Transportation and Warehousing 250 67.516 75.192 24.959 9.914
Information 103 79.599 86.000 23.202 4.032
Finance and Insurance 116 87.896 95.789 22.025 5.799
Real Estate and Rental and Leasing 140 73.434 90.339 31.590 0.730
Professional, Scientific, and Technical Services 294 80.884 88.420 20.397 6.544
Management of Companies and Enterprises 5 40.983 60.152 31.355 6.884
Administrative and Support and Waste Management and Remediation Services 655 73.633 81.750 24.589 4.043
Educational Services 73 59.588 72.000 31.452 0.502
Health Care and Social Assistance 273 61.698 66.601 30.903 1.747
Arts, Entertainment, and Recreation 94 41.685 43.530 31.974 0.467
Accommodation and Food Service 931 41.767 42.360 29.971 3.625
Other Services 685 45.434 45.946 30.783 2.189
Total 5520 1114.921 1215.600 544.684 99.999

Breakdown of Intangible Assets

Below we show summary statistics for different types of intangibles. For each type we compute the share in total intangibles for observations with positive values.

  • Customer Relationships / Lists PPA :The value attributed to any customer relationships or customer list acquired as part of the acquisition.
  • Backlog PPA :Any acquired purchase orders or booked sales on orders that have not been fully completed.
  • Developed / Existing Technology PPA :Any acquired developed/completed technology, core technology, and/or acquired or purchased technology. Technology that is in the process of being developed is included in In-Process R&D.
  • In-Process R&D PPA : Intangible assets acquired relating to any uncompleted/in process research and development.
  • Trade Names / Trademarks PPA : The value of acquired trademarks/service marks to identify and/or differentiate goods and services or business trade names.
  • Noncompete Agreements PPA The value placed on an agreement with the selling party not to compete with the purchaser, usually for a certain period of time and usually in a specified geographic area.
  • Other Intangibles PPA : Any other intangible asset acquired that is not listed in the preceding fields.

  • Goodwill PPA Represents the excess of the aggregate purchase price over the fair value of net assets of the acquired business.

In [15]:
varlist=['CustomerRelationshipsListsPPA','BacklogPPA','DevelopedExistingTechnologyPPA','InProcessRandDPPA','TradeNamesTradeMarksPPA','NonCompeteAgreementsPPA','OtherIntangiblesPPA','GoodwillPPA']
for var in varlist:
    print(var)
    data[var+'Intensity']=data[var]/data['TotalIntangiblesPPA']*100
    var=var+'Intensity'
    selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
    sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
    selection=selection&sel
    sel2=selection&(data[selection][var]>0)

    f = {var:['count','mean','median','std'],'wgt_sales':['sum']}


    temp1=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructurePassThru).agg(f)
    temp1=temp1.iloc[1,:].values
    temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
    temp1.index=['Passthru, excluding LLCs']

    temp2=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructurePassThruLLC).agg(f)
    temp2=temp2.iloc[1,:].values
    temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
    temp2.index=['Passthru, including LLCs']


    temp3=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructureBus).agg(f)
    temp3=temp3.iloc[0,:].values
    temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
    temp3.index=['All businesses']

    temp=pd.concat([temp1,temp2,temp3])
    temp['count']=temp['count'].astype(int)
    

    display(temp)
    
    temp=data[sel2][[var,'wgt_sales']].groupby(data[sel2].TargetStructureAlt).agg(f)
    to_list=temp.index.to_list()
    temp=temp.iloc[:,:].values
    temp=pd.DataFrame(temp,columns=['count','mean','median','std','sales wgt'])
    temp.index=to_list
    tempsum=temp.sum()
    tempsum=tempsum.values
    tempsum=pd.DataFrame(tempsum,index=['count','mean','median','std','sales wgt']).T
    tempsum.index=['Total']
    temp=pd.concat([temp,tempsum]) 
    temp['count']=temp['count'].astype(int)
    temp.iloc[-1,1]='--'
    temp.iloc[-1,2]='--'
    temp.iloc[-1,3]='--'
    
    display(temp)
CustomerRelationshipsListsPPA
count mean median std sales wgt
Passthru, excluding LLCs 160 32.016 27.809 25.020 2.364
Passthru, including LLCs 281 33.477 28.188 27.429 9.451
All businesses 590 29.013 23.994 23.407 34.191
count mean median std sales wgt
C Corporation 309 24.954 21.175 18.142 24.739
LLC 121 35.408 28.462 30.322 7.087
Partnership 10 34.822 33.197 27.453 0.138
S Corporation 145 32.363 28.188 25.111 2.224
Sole Proprietorship 5 16.335 22.388 12.497 0.002
Total 590 -- -- -- 34.191
BacklogPPA
count mean median std sales wgt
Passthru, excluding LLCs 16 6.193 1.736 15.716 0.850
Passthru, including LLCs 31 6.874 2.201 15.174 2.675
All businesses 71 4.790 2.201 10.398 8.068
count mean median std sales wgt
C Corporation 40 3.175 2.081 3.289 5.392
LLC 15 7.601 3.147 15.087 1.826
Partnership 1 0.318 0.318 NaN 0.019
S Corporation 15 6.585 1.755 16.187 0.830
Total 71 -- -- -- 8.068
DevelopedExistingTechnologyPPA
count mean median std sales wgt
Passthru, excluding LLCs 76 18.715 11.024 22.864 0.875
Passthru, including LLCs 127 19.063 11.662 22.628 5.852
All businesses 380 22.854 13.790 24.224 18.212
count mean median std sales wgt
C Corporation 253 24.757 15.361 24.812 12.360
LLC 51 19.582 12.121 22.487 4.977
S Corporation 73 19.390 12.315 23.081 0.874
Sole Proprietorship 3 2.280 1.238 1.806 0.001
Total 380 -- -- -- 18.212
InProcessRandDPPA
count mean median std sales wgt
Passthru, excluding LLCs 8 22.980 5.663 31.654 0.119
Passthru, including LLCs 12 22.512 7.027 29.685 2.907
All businesses 90 28.711 14.236 31.409 6.051
count mean median std sales wgt
C Corporation 78 29.665 17.162 31.741 3.144
LLC 4 21.575 9.880 29.856 2.788
S Corporation 8 22.980 5.663 31.654 0.119
Total 90 -- -- -- 6.051
TradeNamesTradeMarksPPA
count mean median std sales wgt
Passthru, excluding LLCs 178 19.549 13.826 19.533 2.390
Passthru, including LLCs 276 19.319 11.663 22.684 9.149
All businesses 523 16.210 8.696 20.386 36.141
count mean median std sales wgt
C Corporation 247 12.735 5.961 16.842 26.992
LLC 98 18.901 8.584 27.610 6.759
Partnership 7 10.030 2.971 16.263 0.135
S Corporation 155 18.994 13.889 18.881 2.250
Sole Proprietorship 16 29.087 25.622 24.367 0.005
Total 523 -- -- -- 36.141
NonCompeteAgreementsPPA
count mean median std sales wgt
Passthru, excluding LLCs 1972 23.261 13.333 25.179 2.296
Passthru, including LLCs 2364 22.438 12.195 25.011 3.885
All businesses 2831 23.080 12.922 25.963 7.317
count mean median std sales wgt
C Corporation 467 26.326 16.667 30.150 3.433
LLC 392 18.300 7.692 23.746 1.589
Partnership 70 25.503 18.296 23.164 0.135
S Corporation 1330 21.785 10.914 25.562 2.038
Sole Proprietorship 572 26.418 19.164 24.230 0.123
Total 2831 -- -- -- 7.317
OtherIntangiblesPPA
count mean median std sales wgt
Passthru, excluding LLCs 304 42.118 29.851 36.053 1.312
Passthru, including LLCs 461 43.290 31.034 36.555 5.303
All businesses 681 38.359 24.242 35.955 23.591
count mean median std sales wgt
C Corporation 220 28.027 13.177 32.385 18.288
LLC 157 45.559 32.735 37.521 3.991
Partnership 11 29.586 17.290 32.440 0.029
S Corporation 260 42.441 29.851 36.585 1.276
Sole Proprietorship 33 43.748 43.333 32.984 0.007
Total 681 -- -- -- 23.591
GoodwillPPA
count mean median std sales wgt
Passthru, excluding LLCs 3732 85.069 97.917 22.292 6.593
Passthru, including LLCs 5063 85.702 98.974 22.352 19.146
All businesses 6370 82.674 96.970 24.189 87.521
count mean median std sales wgt
C Corporation 1307 70.948 74.017 27.275 68.376
LLC 1331 87.476 100.000 22.432 12.553
Partnership 106 74.516 81.873 24.984 0.402
S Corporation 2894 86.709 99.502 21.917 6.034
Sole Proprietorship 732 80.112 89.342 22.231 0.156
Total 6370 -- -- -- 87.521

Business Noncompete

Agreement with the selling party not to compete with the purchaser, usually for a certain period of time and usually in a specified geographic area. Pratts provides information on whether there was a a Non compete or not, and if yes, in some cases its description such as value/length of time.

Intangible Intensity

Below we show summary statistics for intangible intensity for businesses broken down by whether they have a business noncompete

In [16]:
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
data['NonCompete']=data['NonCompete'].replace(0,False)
data['NonCompete']=data['NonCompete'].replace(1,True)
data['NonCompete']=data['NonCompete'].astype(bool)
data['NonCompeteAlt']=np.NaN
sel=(data['NonCompete']==True)&(data.NonCompeteLength>0)
data.loc[sel,'NonCompeteAlt']='NonCompete  with strictly positive duration'
sel=(data['NonCompete']==True)&(data.NonCompeteLength==0) 
data.loc[sel,'NonCompeteAlt']='NonCompete with   duration  = 0'
sel=(data['NonCompete']==True)& np.isnan(data.NonCompeteLength) 
data.loc[sel,'NonCompeteAlt']='NonCompete with  no description of terms'
sel=(data['NonCompete']==False)
data.loc[sel,'NonCompeteAlt']='No Agreement'

data['NonCompeteLength']=data['NonCompeteLength']*30


#(data.NonCompeteLength>0)

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])

sel2=selection&sel&data.TargetStructurePassThru

temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)
#print(temp)
temp1=[]
temp1=temp.iloc[3,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru excluding LLCs with NoNonCompete but no description of terms']

temp2=[]
temp2=temp.iloc[2,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with NonCompete agreement and  duration  = 0']


temp3=[]
temp3=temp.iloc[1,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['Passthru excluding LLCs with NonCompete agreement with strictly positive duration']


temp4=[]
temp4=temp.iloc[0,:].values
temp4=pd.DataFrame(temp4,index=['count','mean','median','std','sales wgt']).T
temp4.index=['Passthru excluding LLCs without NonCompete agreements']


temp=pd.concat([temp4,temp2,temp1,temp3])
temp['count']=temp['count'].astype(int)
    
display(temp)


sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   
temp1=[]
temp1=temp.iloc[3,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru including LLCs with NonCompete but no description of terms']

temp2=[]
temp2=temp.iloc[2,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with NonCompete agreement and  duration  = 0']


temp3=[]
temp3=temp.iloc[1,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['Passthru including LLCs with NonCompete agreement with strictly positive duration']


temp4=[]
temp4=temp.iloc[0,:].values
temp4=pd.DataFrame(temp4,index=['count','mean','median','std','sales wgt']).T
temp4.index=['Passthru including LLCs without NonCompete agreements']


temp=pd.concat([temp4,temp2,temp1,temp3])
temp['count']=temp['count'].astype(int)
    
display(temp)

sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   
temp1=[]
temp1=temp.iloc[3,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['All businesses with NonCompete but no description of terms']

temp2=[]
temp2=temp.iloc[2,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All businesses with NonCompete agreement and  duration  = 0']


temp3=[]
temp3=temp.iloc[1,:].values
temp3=pd.DataFrame(temp3,index=['count','mean','median','std','sales wgt']).T
temp3.index=['All businesses with NonCompete agreement with strictly positive duration']


temp4=[]
temp4=temp.iloc[0,:].values
temp4=pd.DataFrame(temp4,index=['count','mean','median','std','sales wgt']).T
temp4.index=['All businesses without NonCompete agreements']


temp=pd.concat([temp4,temp2,temp1,temp3])
temp['count']=temp['count'].astype(int)
    
display(temp)

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])

sel2=selection&sel

temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby([data[sel2].TargetStructureAlt,data[sel2].NonCompeteAlt] ).agg(f)

display(temp)
    
count mean median std sales wgt
Passthru excluding LLCs without NonCompete agreements 581 57.011 62.155 31.418 4.867
Passthru excluding LLCs with NonCompete agreement and duration = 0 25 56.228 61.714 29.831 0.011
Passthru excluding LLCs with NoNonCompete but no description of terms 314 62.484 67.792 27.275 1.182
Passthru excluding LLCs with NonCompete agreement with strictly positive duration 5938 57.879 63.819 32.031 2.860
count mean median std sales wgt
Passthru including LLCs without NonCompete agreements 834 57.362 62.358 31.631 18.724
Passthru including LLCs with NonCompete agreement and duration = 0 35 55.380 60.000 32.765 0.028
Passthru including LLCs with NonCompete but no description of terms 379 62.014 67.423 27.989 1.411
Passthru including LLCs with NonCompete agreement with strictly positive duration 7615 57.298 63.077 32.379 4.801
count mean median std sales wgt
All businesses without NonCompete agreements 1578 59.893 66.703 31.795 89.710
All businesses with NonCompete agreement and duration = 0 44 55.645 60.857 31.707 0.034
All businesses with NonCompete but no description of terms 502 62.131 68.289 28.223 3.423
All businesses with NonCompete agreement with strictly positive duration 8730 56.913 62.494 32.366 6.833
IntanglibleIntensityPPA wgt_sales
count mean median std sum
TargetStructureAlt NonCompeteAlt
C Corporation No Agreement 744 62.729 70.728 31.760 70.986
NonCompete with strictly positive duration 1115 54.287 57.500 32.170 2.032
NonCompete with duration = 0 9 56.676 69.050 28.976 0.005
NonCompete with no description of terms 123 62.494 70.690 29.047 2.013
LLC No Agreement 253 58.168 64.355 32.162 13.857
NonCompete with strictly positive duration 1677 55.239 60.506 33.512 1.941
NonCompete with duration = 0 10 53.260 54.444 40.933 0.017
NonCompete with no description of terms 65 59.741 65.400 31.344 0.229
Partnership No Agreement 28 58.694 70.360 29.711 1.171
NonCompete with strictly positive duration 150 54.291 64.576 32.985 0.131
NonCompete with no description of terms 19 71.479 77.143 26.218 0.028
S Corporation No Agreement 456 58.266 62.159 30.824 3.675
NonCompete with strictly positive duration 4817 58.038 64.000 32.262 2.537
NonCompete with duration = 0 23 55.918 61.714 30.158 0.010
NonCompete with no description of terms 225 60.982 65.672 28.619 1.142
Sole Proprietorship No Agreement 97 50.626 60.000 34.103 0.020
NonCompete with strictly positive duration 971 57.647 63.055 30.702 0.192
NonCompete with duration = 0 2 59.794 59.794 36.349 0.000
NonCompete with no description of terms 70 64.870 67.736 22.382 0.012

Terms of Contract: Length of contract

Below we describe statistics for the length of the NonCompete in days

In [17]:
f = {'NonCompeteLength':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'NonCompeteLength':['count','mean','median','std'],'wgt_sales_passthru':['sum']}

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) ) & (data.NonCompeteAlt=='NonCompete  with strictly positive duration')

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])

sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with NonCompete']

temp=pd.concat([temp2])

temp['count']=temp['count'].astype(int)
        
display(temp)


sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with NonCompete']

temp=pd.concat([temp2])

temp['count']=temp['count'].astype(int)
        
display(temp)

sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business  including LLCs with NonCompete']

temp=pd.concat([temp2])

temp['count']=temp['count'].astype(int)
        
display(temp)


sel2=selection&sel

temp=data[sel2][['NonCompeteLength','wgt_sales']].groupby([data[sel2].TargetStructureAlt] ).agg(f)

display(temp)
    
count mean median std sales wgt
Passthru excluding LLCs with NonCompete 5938 1612.998 1800.000 1251.640 2.860
count mean median std sales wgt
Passthru including LLCs with NonCompete 7615 1589.622 1800.000 1282.746 4.801
count mean median std sales wgt
All business including LLCs with NonCompete 8730 1595.502 1800.000 1272.922 6.833
NonCompeteLength wgt_sales
count mean median std sum
TargetStructureAlt
C Corporation 1115 1635.659 1800.000 1203.468 2.032
LLC 1677 1506.852 1080.000 1384.518 1.941
Partnership 150 1725.200 1800.000 1595.284 0.131
S Corporation 4817 1601.204 1800.000 1292.476 2.537
Sole Proprietorship 971 1654.171 1800.000 948.370 0.192

Terms of Contract: radius

Below we describe statistics for the radius of the NonCompete in miles. There are several observations where the radius is described as a list of counties/states. We drop those when we construct the stats below.

In [18]:
f = {'NonCompeteDistance':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'NonCompeteDistance':['count','mean','median','std'],'wgt_sales_passthru':['sum']}

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )&(data["NonCompeteDistance"]>0)& (data.NonCompeteAlt=='NonCompete  with strictly positive duration')
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])

sel2=selection&sel&data.TargetStructurePassThru&(data["NonCompeteDistance"]>0)
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   



temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with NonCompete']

temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
    
display(temp)


sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with NonCompete']

temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
    
display(temp)

sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby(data[sel2].NonCompeteAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business  with NonCompete']

temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
    
display(temp)

sel2=selection&sel

temp=data[sel2][['NonCompeteDistance','wgt_sales']].groupby([data[sel2].TargetStructureAlt] ).agg(f)

display(temp)
    
count mean median std sales wgt
Passthru excluding LLCs with NonCompete 4889 39.913 20.000 64.218 1.808
count mean median std sales wgt
Passthru including LLCs with NonCompete 6326 39.264 20.000 65.693 2.200
count mean median std sales wgt
All business with NonCompete 7163 39.397 20.000 66.331 2.538
NonCompeteDistance wgt_sales
count mean median std sum
TargetStructureAlt
C Corporation 837 40.404 20.000 70.999 0.338
LLC 1437 37.057 20.000 70.458 0.392
Partnership 119 28.504 20.000 43.866 0.029
S Corporation 3999 40.746 20.000 65.260 1.626
Sole Proprietorship 771 37.350 20.000 61.168 0.154

Owner Consulting Agreements

An agreement between the buyer and seller for the seller's personal services to be provided to the buyer either as an employee or consultant after the sale of the business.

Intangible Intensity

Below we show summary statistics for intangible intensity for businesses broken down by whether they have a owner consulting agreement

In [19]:
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}
data['EmploymentAgreement']=data['EmploymentAgreement'].replace(0,'False')
data['EmploymentAgreement']=data['EmploymentAgreement'].replace(1,'True')

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])

sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)   


temp1=[]
temp1=temp.iloc[0,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru excluding LLCs without consulting agreements']


temp2=[]
temp2=temp.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with consulting agreements']

temp=pd.concat([temp1,temp2])
temp['count']=temp['count'].astype(int)
    
display(temp)


sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)   


temp1=[]
temp1=temp.iloc[0,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['Passthru including LLCs without consulting agreements']


temp2=[]
temp2=temp.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with consulting agreements']

temp=pd.concat([temp1,temp2])
temp['count']=temp['count'].astype(int)


display(temp)


sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)   

temp1=[]
temp1=temp.iloc[0,:].values
temp1=pd.DataFrame(temp1,index=['count','mean','median','std','sales wgt']).T
temp1.index=['All business without consulting agreements']


temp2=[]
temp2=temp.iloc[1,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business  including LLCs with consulting agreements']

temp=pd.concat([temp1,temp2])
temp['count']=temp['count'].astype(int)
    
display(temp)

sel2=selection&sel

temp=data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby([data[sel2].TargetStructureAlt,data[sel2].EmploymentAgreementAlt] ).agg(f)

display(temp)
    
count mean median std sales wgt
Passthru excluding LLCs without consulting agreements 3126 58.205 64.234 30.250 7.038
Passthru excluding LLCs with consulting agreements 3732 57.848 63.636 33.010 1.882
count mean median std sales wgt
Passthru including LLCs without consulting agreements 3837 58.396 64.721 30.487 22.677
Passthru including LLCs with consulting agreements 5026 56.812 62.408 33.342 2.287
count mean median std sales wgt
All business without consulting agreements 5127 58.719 64.912 30.774 97.188
All business including LLCs with consulting agreements 5727 56.565 61.845 33.266 2.812
IntanglibleIntensityPPA wgt_sales
count mean median std sum
TargetStructureAlt EmploymentAgreementAlt
C Corporation False 1290 59.681 65.881 31.606 74.511
True 701 54.791 58.182 32.689 0.526
LLC False 711 59.235 65.844 31.517 15.639
True 1294 53.827 58.503 34.119 0.405
Partnership False 114 61.712 71.034 29.565 1.296
True 83 49.518 50.480 34.489 0.034
S Corporation False 2392 58.004 63.597 30.455 5.607
True 3129 58.293 64.000 33.127 1.757
Sole Proprietorship False 620 58.336 64.777 29.577 0.134
True 520 56.496 62.500 31.890 0.091

Terms of Contracts: Length

Below we describe statistics for the length of the consulting agreement measured in days. This is extracted from the descriptive notes

In [20]:
f = {'EmploymentAgreementLength':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'EmploymentAgreementLength':['count','mean','median','std'],'wgt_sales_passthru':['sum']}

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )&data.EmploymentAgreementAlt

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])

sel2=selection&sel&data.TargetStructurePassThru
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru excluding LLCs with consulting agreements']

temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)

display(temp)


sel2=selection&sel&data.TargetStructurePassThruLLC
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)   



temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['Passthru including LLCs with consulting agreements']

temp=pd.concat([temp2])
    
temp['count']=temp['count'].astype(int)
    
display(temp)

sel2=selection&sel&data.TargetStructureBus
temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby(data[sel2].EmploymentAgreementAlt).agg(f)   


temp2=[]
temp2=temp.iloc[0,:].values
temp2=pd.DataFrame(temp2,index=['count','mean','median','std','sales wgt']).T
temp2.index=['All business  with consulting agreements']

temp=pd.concat([temp2])
temp['count']=temp['count'].astype(int)
    
display(temp)

sel2=selection&sel

temp=data[sel2][['EmploymentAgreementLength','wgt_sales']].groupby([data[sel2].TargetStructureAlt] ).agg(f)

display(temp)
    
count mean median std sales wgt
Passthru excluding LLCs with consulting agreements 3732 93.993 60.000 232.581 1.882
count mean median std sales wgt
Passthru including LLCs with consulting agreements 5026 87.369 60.000 207.979 2.287
count mean median std sales wgt
All business with consulting agreements 5727 93.285 60.000 227.561 2.812
EmploymentAgreementLength wgt_sales
count mean median std sum
TargetStructureAlt
C Corporation 701 135.703 60 333.210 0.526
LLC 1294 68.267 60 107.333 0.405
Partnership 83 113.843 60 290.563 0.034
S Corporation 3129 98.168 60 243.678 1.757
Sole Proprietorship 520 65.698 30 129.188 0.091

Decomposition of Intangible Intensity by total assets

For all passthrus including LLCS

In [21]:
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel&data.TargetStructurePassThruLLC

data[sel2][['IntanglibleIntensityPPA','wgt_sales_passthru']].groupby(pd.cut( data[sel2]['TotalAssetsPPA'] , [ data[sel2]['TotalAssetsPPA'].quantile(x) for x in [0.0,0.2,0.4,0.6,0.8,1.0]] )).agg(f_pass)
Out[21]:
IntanglibleIntensityPPA wgt_sales_passthru
count mean median std sum
TotalAssetsPPA
(1000.0, 77500.0] 1774 47.112 46.656 34.829 0.692
(77500.0, 145000.0] 1812 54.332 57.922 32.138 1.163
(145000.0, 256130.6] 1731 58.415 63.171 33.119 1.769
(256130.6, 575000.0] 1773 62.905 69.195 28.300 3.021
(575000.0, 21824700000.0] 1772 64.803 72.088 28.710 93.355

For all businesses

In [22]:
f = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales':['sum']}
f_pass = {'IntanglibleIntensityPPA':['count','mean','median','std'],'wgt_sales_passthru':['sum']}

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )

sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=selection&sel

data[sel2][['IntanglibleIntensityPPA','wgt_sales']].groupby(pd.cut( data[sel2]['TotalAssetsPPA'] , [ data[sel2]['TotalAssetsPPA'].quantile(x) for x in [0.0,0.2,0.4,0.6,0.8,1.0]] )).agg(f)
Out[22]:
IntanglibleIntensityPPA wgt_sales
count mean median std sum
TotalAssetsPPA
(1000.0, 85000.0] 2275 47.609 47.783 34.772 0.229
(85000.0, 160000.0] 2158 54.923 59.015 31.922 0.383
(160000.0, 300000.0] 2092 59.877 64.680 32.081 0.601
(300000.0, 850000.0] 2173 61.836 68.876 29.059 1.164
(850000.0, 153000000000.0] 2155 64.239 70.877 29.550 97.623

Regression of $\log V_{\kappa}$ on $age,age^2$

The sample includes all pass-thru firms in Pratts who have a valid entry for "TargetAge" and a positive value for "TotalIntangiblesPPA". The regression specification is

$$ \log(\text{TotalIntangiblesPPA}) = \text{constant} + \beta_{1} \text{TagetAge} + \beta_{2} \text{TagetAge}^2 + \text{sector fixed effects}+ \text{year fixed effects} $$
In [23]:
dummies1 = pd.get_dummies(data['naics_code_cat']).rename(columns=lambda x: 'nacis_code_cat' + str(x))
dummies2 = pd.get_dummies(data['year']).rename(columns=lambda x: 'year' + str(x))
X = pd.concat([data[['TargetAge','TargetAge2']],dummies1,dummies2],axis=1)
Y=np.log(data.TotalIntangiblesPPA)
X = sm.add_constant(X)
selection=(pd.notnull(data.TargetAge)) &(pd.notnull(data.TotalIntangiblesPPA) ) & (data.TotalIntangiblesPPA>0)& ~(data.TargetStructureAlt=='C Corporation')
model11 = sm.OLS(Y[selection], X[selection]).fit()
model11.summary()
Out[23]:
OLS Regression Results
Dep. Variable: TotalIntangiblesPPA R-squared: 0.236
Model: OLS Adj. R-squared: 0.232
Method: Least Squares F-statistic: 58.75
Date: Wed, 19 Aug 2020 Prob (F-statistic): 0.00
Time: 13:35:33 Log-Likelihood: -16865.
No. Observations: 8607 AIC: 3.382e+04
Df Residuals: 8561 BIC: 3.415e+04
Df Model: 45
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 11.2272 0.960 11.693 0.000 9.345 13.109
TargetAge 0.0169 0.002 8.518 0.000 0.013 0.021
TargetAge2 -4.637e-05 1.42e-05 -3.270 0.001 -7.42e-05 -1.86e-05
nacis_code_cat[11, 21) 0.2525 1.099 0.230 0.818 -1.902 2.407
nacis_code_cat[21, 22) 4.4811 1.087 4.123 0.000 2.351 6.612
nacis_code_cat[22, 23) -0.2470 1.191 -0.207 0.836 -2.583 2.089
nacis_code_cat[23, 24) 0.2224 1.001 0.222 0.824 -1.740 2.185
nacis_code_cat[24, 31) 2.674e-14 2.24e-14 1.192 0.233 -1.72e-14 7.07e-14
nacis_code_cat[31, 35) 0.6615 0.999 0.662 0.508 -1.298 2.620
nacis_code_cat[35, 42) -2.324e-15 9.11e-15 -0.255 0.799 -2.02e-14 1.55e-14
nacis_code_cat[42, 44) -0.9709 1.087 -0.893 0.372 -3.101 1.160
nacis_code_cat[44, 46) -0.7390 0.998 -0.740 0.459 -2.696 1.218
nacis_code_cat[46, 48) -3.898e-15 2.2e-14 -0.177 0.859 -4.7e-14 3.92e-14
nacis_code_cat[48, 50) -0.3017 1.002 -0.301 0.763 -2.265 1.662
nacis_code_cat[50, 51) -1.141e-15 2.37e-15 -0.482 0.630 -5.78e-15 3.5e-15
nacis_code_cat[51, 52) 2.3662 1.005 2.355 0.019 0.397 4.336
nacis_code_cat[52, 53) 0.7913 1.007 0.786 0.432 -1.182 2.765
nacis_code_cat[53, 54) -0.1870 1.003 -0.187 0.852 -2.152 1.778
nacis_code_cat[54, 55) 0.8193 1.001 0.819 0.413 -1.142 2.781
nacis_code_cat[55, 56) 6.4695 1.137 5.690 0.000 4.241 8.698
nacis_code_cat[56, 61) -0.5017 0.999 -0.502 0.615 -2.460 1.456
nacis_code_cat[61, 62) -0.9277 1.011 -0.918 0.359 -2.910 1.054
nacis_code_cat[62, 71) -0.1758 1.001 -0.176 0.861 -2.138 1.787
nacis_code_cat[71, 72) -0.8380 1.007 -0.833 0.405 -2.811 1.135
nacis_code_cat[72, 81) -1.2648 0.998 -1.267 0.205 -3.221 0.692
nacis_code_cat[81, 83) -1.3000 0.999 -1.302 0.193 -3.258 0.657
nacis_code_cat[83, 92) 2.13e-15 3.81e-15 0.560 0.576 -5.33e-15 9.59e-15
year1990 -3.83e-15 5.19e-15 -0.737 0.461 -1.4e-14 6.35e-15
year1991 1.658e-15 1.74e-15 0.952 0.341 -1.76e-15 5.07e-15
year1992 3.969e-16 6.46e-15 0.061 0.951 -1.23e-14 1.31e-14
year1993 -1.2132 1.173 -1.035 0.301 -3.512 1.085
year1994 -0.2682 0.630 -0.426 0.670 -1.503 0.966
year1995 -0.1314 0.466 -0.282 0.778 -1.044 0.781
year1996 0.0906 0.333 0.272 0.786 -0.562 0.744
year1997 0.1838 0.261 0.705 0.481 -0.327 0.695
year1998 0.4123 0.184 2.237 0.025 0.051 0.774
year1999 0.2481 0.169 1.466 0.143 -0.084 0.580
year2000 0.6193 0.172 3.609 0.000 0.283 0.956
year2001 0.0279 0.165 0.169 0.866 -0.295 0.351
year2002 0.3575 0.148 2.420 0.016 0.068 0.647
year2003 0.2876 0.149 1.924 0.054 -0.005 0.581
year2004 0.3198 0.136 2.360 0.018 0.054 0.585
year2005 0.8324 0.131 6.355 0.000 0.576 1.089
year2006 0.7937 0.120 6.637 0.000 0.559 1.028
year2007 0.9219 0.112 8.217 0.000 0.702 1.142
year2008 0.8160 0.106 7.723 0.000 0.609 1.023
year2009 0.5219 0.102 5.126 0.000 0.322 0.721
year2010 0.5082 0.098 5.175 0.000 0.316 0.701
year2011 0.6108 0.098 6.210 0.000 0.418 0.804
year2012 0.8147 0.097 8.383 0.000 0.624 1.005
year2013 0.8809 0.096 9.203 0.000 0.693 1.069
year2014 1.0353 0.093 11.126 0.000 0.853 1.218
year2015 1.0461 0.092 11.350 0.000 0.865 1.227
year2016 0.8004 0.092 8.740 0.000 0.621 0.980
year2017 0.7109 0.118 6.034 0.000 0.480 0.942
Omnibus: 842.453 Durbin-Watson: 1.647
Prob(Omnibus): 0.000 Jarque-Bera (JB): 4493.515
Skew: 0.321 Prob(JB): 0.00
Kurtosis: 6.481 Cond. No. 1.03e+16


Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 3.1e-22. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

Intangible valuations multiple of Net Income

We divide the PPA allocated to Intangible assets by Net Income in the previous year and then group by legal form and report the median.

In [24]:
selPassThru=data['TargetStructureAlt'].isin(['S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
data['IntangibleToNetIncome']=data['TotalIntangiblesPPA']/data['NetIncome']
temp=data[selPassThru][['IntangibleToNetIncome']].groupby(data[selPassThru].TargetStructureAlt).median()
display(temp)
IntangibleToNetIncome
TargetStructureAlt
LLC 1.268
Partnership 1.522
S Corporation 1.596
Sole Proprietorship 1.228

More details for Real Estate

We show below that most of the transactions in "real estate" sector in Pratts have little or no long-term liabilities

In [25]:
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel=selection&sel&data.TargetStructurePassThru &(data.naics_label=='Real Estate and Rental and Leasing')
data[sel][['TotalAssetsPPA','TotalLiabilitiesPPA','DebtAssumed']].groupby([data.TargetStructureAlt,data.year]).sum()
pd.set_option('display.max_rows', None)
data['TotalLiabilityIntensityPPA']=data['TotalLiabilitiesPPA']/data['TotalAssetsPPA']
data['DebtAssumedIntensity']=data['DebtAssumed']/data['TotalAssetsPPA']
data[sel][['TotalLiabilityIntensityPPA','DebtAssumedIntensity']].describe()
#data[sel][['TotalLiabilityIntensityPPA','IntanglibleIntensityPPA','DebtAssumed','TotalAssetsPPA','TotalLiabilitiesPPA','TargetBusinessDescription','TargetStructureAlt']]
Out[25]:
TotalAssetsPPA TotalLiabilitiesPPA DebtAssumed
TargetStructureAlt year
Partnership 1999 150000.000 0.000 0.000
2009 55000.000 0.000 0.000
2010 275000.000 0.000 0.000
2013 1047400.000 0.000 0.000
S Corporation 1996 325000.000 0.000 0.000
1998 109400.000 0.000 0.000
1999 75000.000 0.000 0.000
2001 1330000.000 0.000 0.000
2003 750000.000 0.000 0.000
2004 299000.000 0.000 0.000
2005 125000.000 0.000 0.000
2006 18108580.000 1817000.000 0.000
2007 32440000.000 6710000.000 3426000.000
2008 3325000.000 0.000 0.000
2009 2317000.000 0.000 0.000
2010 3263000.000 0.000 0.000
2011 4579287.000 0.000 0.000
2012 4735000.000 0.000 0.000
2013 11658749.000 0.000 0.000
2014 9452500.000 0.000 0.000
2015 2693511.000 0.000 0.000
2016 7507992.000 0.000 0.000
2017 3381000.000 0.000 0.000
Sole Proprietorship 1997 365000.000 0.000 0.000
2000 240500.000 0.000 0.000
2001 360000.000 0.000 0.000
2007 550000.000 0.000 0.000
2008 200000.000 0.000 0.000
2009 105000.000 0.000 0.000
2010 100000.000 0.000 0.000
2011 300000.000 0.000 0.000
2012 90000.000 0.000 0.000
2013 145000.000 0.000 0.000
2014 1750000.000 0.000 0.000
2015 250000.000 0.000 0.000
2016 445000.000 0.000 0.000
Out[25]:
TotalLiabilityIntensityPPA DebtAssumedIntensity
count 94.000 148.000
mean 0.004 0.001
std 0.026 0.009
min 0.000 0.000
25% 0.000 0.000
50% 0.000 0.000
75% 0.000 0.000
max 0.226 0.115

More Details on Transport and Warehousing

In [26]:
selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel=selection&sel&data.TargetStructurePassThruLLC &(data.naics_label=='Transportation and Warehousing')
columnsPPA = [s for s in data.columns if "PPA" in s]
pd.set_option('display.max_columns', None)
#data[sel][columnsPPA].groupby([data.TargetStructureAlt]).describe().T
In [27]:
data['IsRoute']=False
sel2=data['TargetBusinessDescription'].str.contains("route", na=False, case=False)
data.loc[sel2,'IsRoute']=True
data[sel][['IntanglibleIntensityPPA']].groupby(data[sel].IsRoute).describe()
#data['IsLimo']=False
#sel2=data['TargetBusinessDescription'].str.contains("limo", na=False, case=False)
#data.loc[sel2,'IsLimo']=True
#data[sel][['IntanglibleIntensityPPA']].groupby(data[sel].IsLimo).describe()
Out[27]:
IntanglibleIntensityPPA
count mean std min 25% 50% 75% max
IsRoute
False 233.000 60.241 26.351 0.000 42.857 63.706 80.935 100.000
True 94.000 81.083 14.717 19.231 76.261 82.500 90.567 100.000

Retirement Analysis

In [28]:
data['Notes']=data['Notes'].str.lower()
data['ReasonForSale']='Not Provided'

sel2=data['Notes'].str.contains("reason ", na=False, case=False)
data.loc[sel2,'ReasonForSale']='Others'

selection=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel=selection&sel

sel2=data['Notes'].str.contains("retir", na=False, case=False)
data.loc[sel2,'ReasonForSale']='retirement'

sel2=(data['Notes'].str.contains("relocat", na=False, case=False)|data['Notes'].str.contains("moving", na=False, case=False))
data.loc[sel2,'ReasonForSale']='relocation'

sel2=data['Notes'].str.contains("out-of -state", na=False, case=False)
data.loc[sel2,'ReasonForSale']='relocation'

sel2=data['Notes'].str.contains("out of state", na=False, case=False)
data.loc[sel2,'ReasonForSale']='relocation'



sel2=data['Notes'].str.contains("health", na=False, case=False)| data['Notes'].str.contains("illness", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'

#sel2=data['Notes'].str.contains("burnout", na=False, case=False)|data['Notes'].str.contains("bur nout", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='burnout'

sel2=(data['Notes'].str.contains("interest", na=False, case=False))&data['Notes'].str.contains("other", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'


sel2=(data['Notes'].str.contains("opportunit", na=False, case=False) )&data['Notes'].str.contains("other", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'


sel2=(data['Notes'].str.contains("opportunit", na=False, case=False) )&data['Notes'].str.contains("new", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'


#sel2=data['Notes'].str.contains("divorce", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='divorce'
#sel2=data['Notes'].str.contains("family", na=False, case=False)& data['Notes'].str.contains("reason", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='family'


#sel2=data['Notes'].str.contains("dissolution", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='partnership dissolution'


#sel2=data['Notes'].str.contains("partnership breakup", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='partnership dissolution'


sel2=data['Notes'].str.contains("death", na=False, case=False)| data['Notes'].str.contains("passed away", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'


sel2=data['Notes'].str.contains("died", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'

sel2=data['Notes'].str.contains("deceased", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'


sel2=data['Notes'].str.contains("career", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'


sel2=data['Notes'].str.contains("a change", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'


#sel2=data['Notes'].str.contains("school", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='going back to school'



#sel2=data['Notes'].str.contains("downsizing", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='downsizing'


sel2=data['Notes'].str.contains("other business investments", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'

sel2=data['Notes'].str.contains("new", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='other interests/opportunities'


sel2=data['Notes'].str.contains("ill", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
data.loc[sel2,'ReasonForSale']='health'

#sel2=data['Notes'].str.contains("personal time", na=False, case=False) & data['Notes'].str.contains("reason for", na=False, case=False)
#data.loc[sel2,'ReasonForSale']='more personal time'



data[sel][['IntanglibleIntensityPPA']].groupby([data[sel].ReasonForSale]).describe()

data[sel][['wgt_sales']].groupby([data[sel].ReasonForSale]).sum()
Out[28]:
IntanglibleIntensityPPA
count mean std min 25% 50% 75% max
ReasonForSale
Not Provided 9831.000 57.332 32.287 -51.829 32.835 62.963 84.671 631.818
Others 20.000 52.076 27.240 5.682 31.667 57.157 79.895 86.792
health 238.000 62.107 31.552 0.000 39.602 71.202 88.056 101.651
other interests/opportunities 465.000 60.809 31.504 -51.386 38.000 67.781 86.304 259.291
relocation 146.000 63.268 26.133 4.286 42.527 67.296 85.631 100.000
retirement 154.000 52.155 28.791 0.000 28.242 55.412 73.187 100.000
Out[28]:
wgt_sales
ReasonForSale
Not Provided 53.787
Others 0.005
health 13.051
other interests/opportunities 29.199
relocation 0.117
retirement 3.841

Other Category

In [29]:
#sel1
sel1=(data.IntanglibleIntensityPPA.notnull())&~(np.isinf(data.IntanglibleIntensityPPA) )
sel3=data['TargetStructureAlt'].isin(['C Corporation','S Corporation', 'Sole Proprietorship', 'Partnership','LLC'])
sel2=data['ReasonForSale']=='Others'
data[sel1&sel2&sel3]['IntanglibleIntensityPPA'].describe()
Out[29]:
count   20.000
mean    52.076
std     27.240
min     5.682 
25%     31.667
50%     57.157
75%     79.895
max     86.792
Name: IntanglibleIntensityPPA, dtype: float64
In [30]:
data[sel1&sel2&sel3]['Notes']
Out[30]:
2153     net income includes other income of 1399.  the reason for selling was burnout.                                                                                     
6360     ebt includes other income of 4739.  the reason for selling was to go into prototype work.                                                                          
12969    reason for selling was divorce                                                                                                                                     
13011    reason for sale: leaving business.                                                                                                                                 
13032    ebt: includes other income of 31.  reason for sale: concentrate on second location.                                                                                
13064    reason for sale: partnership dissolution.  payroll averages over 15/hr.  for the 96 hrs/wk. they are open.  by lowering cost seller feels business could do better.
13200    reason for sale: one partner has moved out of area. bldg. 1800 ft.2.                                                                                               
13211    reason for sale: tired of pizza business.                                                                                                                          
13268    net income includes rental income of 9756 a vendor fee of 9 and a tax penalty of 3869.  the reason for selling was to start a family.                              
13665    reason for sale was to concentrate on other business.                                                                                                              
13778    ebt includes other income of 30841.  the reason for selling was a non-pharmacist son inherited the business.                                                       
14202    ebt includes vendor fee income of 1064.  the reason for sale was burnout.                                                                                          
16862    the reason for selling was a partnership dissolution.                                                                                                              
17086    reason for sale: concentrate on developing retail outlets.                                                                                                         
17756    the reason for sale was divorce.                                                                                                                                   
17770    reason for sale: spend more time with family.                                                                                                                      
21971    the reason for selling was consolidation of businesses.                                                                                                            
23121    the reason for selling was divorce.                                                                                                                                
23122    the reason for selling was divorce.                                                                                                                                
24765    reason for sale was a conflict of interest.                                                                                                                        
Name: Notes, dtype: object
In [ ]: