import pandas as pd
import numpy as np

CompustatData = pd.read_csv('./data/compustat_data.csv')
CompustatData=CompustatData[['pi','txt','txpd','gvkey','fyear','naics','fic']]
CompustatData=CompustatData.dropna()
CompustatData.columns
years=[2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016]

CompustatData=CompustatData[(CompustatData.fyear>1995) & (CompustatData.fyear<2019)]
CompustatData.fyear=CompustatData.fyear.astype(int)
CompustatData = CompustatData.drop_duplicates(subset = ['gvkey', 'fyear']) #this is not ok.
#CompustatData = CompustatData.sort_values(['gvkey', 'fyear'])
#CompustatData = CompustatData.set_index(['gvkey', 'fyear'], verify_integrity = True, drop = False)
CompustatData = CompustatData[CompustatData['fic'] == 'USA']
CompustatData=CompustatData[CompustatData['fyear'].isin( years)]

naics_labels=['Agriculture, Forestry, Fishing and Hunting',\
        'Mining, Quarrying, and Oil and Gas Extraction',\
        'Utilities',\
        'Construction',\
        'NA1',\
        'Manufacturing',\
        'NA2',\
        'Wholesale Trade',\
        'Retail Trade',\
        'NA3',\
        'Transportation and Warehousing',\
        'NA4',\
        'Information',\
        'Finance and Insurance',\
        'Real Estate and Rental and Leasing',\
        'Professional, Scientific, and Technical Services',\
        'Management of Companies and Enterprises',\
        'Administrative and Support and Waste Management and Remediation Services',\
        'Educational Services',\
        'Health Care and Social Assistance',\
        'Arts, Entertainment, and Recreation',\
        'Accommodation and Food Service',\
        'Other Services','Public administration']


naics_code=[11,21,22,23,24,31,35,42,44,46,48,50,51,52,53,54,55,56,61,62,71,72,81,83,92]
n=24


#naics_labels=['Ag. Mining and Utilities',\
#        'Consts and Mfg',\
#        'Trade and warehousing',\
#        'Information and FIRE',\
#        'RE',\
#        'Professional services',\
#        'Nonprof services']

#naics_code=[11,23,42,51,53,54,57,92]

CompustatData['naics_label']=pd.cut(CompustatData['naics']/10**4,naics_code[:n+1],labels=naics_labels[:n],include_lowest=True,right=False)
CompustatData['naics_code_cat']=pd.cut(CompustatData['naics']/10**4,naics_code[:n+1],include_lowest=True,right=False)
CompustatData['naics_2d']=pd.cut(CompustatData['naics']/10**4,naics_code[:n+1],labels=naics_code[:n],include_lowest=True,right=False)

temp=pd.pivot_table(CompustatData, values=['txt','pi'], columns=['fyear'], aggfunc=np.mean).T
temp2=pd.DataFrame((temp.txt/temp.pi).values)
temp2=temp2.T
temp2.columns=years

temp2.index=['all']

naics_year_count = pd.pivot_table(CompustatData, values='gvkey', index=['naics_label'], columns=['fyear'], aggfunc=len)
naics_year_pi = pd.pivot_table(CompustatData, values='pi', index=['naics_label'], columns=['fyear'], aggfunc=np.sum)
naics_year_txt =pd.pivot_table(CompustatData, values='txt', index=['naics_label'], columns=['fyear'], aggfunc=np.sum)
naics_year_txpd =pd.pivot_table(CompustatData, values='txpd', index=['naics_label'], columns=['fyear'], aggfunc=np.sum)

naics_year_etr_txt=naics_year_txt/naics_year_pi

naics_year_etr_txpd=naics_year_txpd/naics_year_pi

naics_year_etr_txt=pd.concat([temp2,naics_year_etr_txt])

temp=pd.pivot_table(CompustatData, values=['txpd','pi'], columns=['fyear'], aggfunc=np.mean).T
temp2=pd.DataFrame((temp.txpd/temp.pi).values)
temp2=temp2.T
temp2.columns=years

temp2.index=['all']
naics_year_etr_txpd = pd.concat([temp2,naics_year_etr_txpd])

naics_year_count

naics_year_etr_txt.T.describe().T

naics_year_etr_txt

naics_year_etr_txpd

naics_year_etr_txpd.T.describe().T

fyear	2000	2001	2002	2003	2004	2005	2006	2007	2008	2009	2010	2011	2012	2013	2014	2015	2016
naics_label
Agriculture, Forestry, Fishing and Hunting	22	24	22	21	19	18	18	16	18	20	19	19	18	15	13	12	10
Mining, Quarrying, and Oil and Gas Extraction	189	181	168	171	178	199	200	205	202	210	210	208	224	220	192	170	152
Utilities	97	95	95	94	91	89	87	82	77	81	77	75	73	72	71	74	69
Construction	92	85	76	68	69	72	69	73	66	60	62	64	63	69	71	66	64
Manufacturing	2301	2173	2090	2030	1977	1921	1871	1845	1796	1740	1663	1607	1611	1636	1554	1468	1398
Wholesale Trade	226	212	200	196	177	164	159	155	150	151	140	145	151	147	140	131	127
Retail Trade	316	298	290	273	257	240	231	219	207	201	204	198	196	193	179	167	164
Transportation and Warehousing	144	141	138	146	143	142	133	113	117	117	113	107	104	109	104	100	92
Information	803	749	681	636	626	617	565	524	508	503	496	497	519	530	514	498	466
Finance and Insurance	530	500	479	499	1180	1169	1110	1043	1002	998	997	980	1007	995	977	973	942
Real Estate and Rental and Leasing	250	234	238	229	221	219	221	224	229	226	226	235	247	250	261	258	255
Professional, Scientific, and Technical Services	321	288	269	252	244	248	233	216	203	201	181	169	169	156	148	139	128
Administrative and Support and Waste Management and Remediation Services	178	165	159	145	136	131	129	116	112	102	97	92	94	98	99	94	91
Educational Services	26	28	25	22	21	23	25	24	27	25	25	23	22	26	25	23	20
Health Care and Social Assistance	141	124	119	118	116	116	111	107	110	111	104	91	85	85	82	74	73
Arts, Entertainment, and Recreation	58	57	49	45	42	36	36	32	30	34	30	29	34	36	36	31	28
Accommodation and Food Service	167	154	143	135	117	104	97	89	87	88	83	86	90	91	90	88	82
Other Services	33	33	29	24	21	20	18	16	15	15	16	15	14	11	9	8	8

	count	mean	std	min	25%	50%	75%	max
all	17.0	0.375320	0.186630	0.224527	0.285605	0.306803	0.356972	0.982909
Agriculture, Forestry, Fishing and Hunting	17.0	0.296715	0.336471	-0.134196	0.116764	0.172373	0.431282	1.188372
Mining, Quarrying, and Oil and Gas Extraction	17.0	0.040013	0.997118	-3.677641	0.286168	0.348338	0.378294	0.531369
Utilities	17.0	0.338970	0.033770	0.292616	0.323095	0.334929	0.351420	0.426789
Construction	17.0	0.214386	0.234696	-0.385968	0.121346	0.336150	0.377168	0.386120
Manufacturing	17.0	0.363597	0.171029	0.238535	0.278886	0.324511	0.369308	0.993169
Wholesale Trade	17.0	0.372585	0.110719	0.279930	0.307139	0.335052	0.381492	0.717321
Retail Trade	17.0	0.379651	0.033825	0.347304	0.361636	0.369621	0.384771	0.468024
Transportation and Warehousing	17.0	-0.069250	1.666960	-6.357222	0.275288	0.310544	0.391183	0.905198
Information	17.0	0.215194	0.347916	-0.865063	0.232769	0.269857	0.323069	0.594826
Finance and Insurance	17.0	0.209909	0.195698	-0.484793	0.212482	0.282506	0.290987	0.326720
Real Estate and Rental and Leasing	17.0	0.109903	0.475109	-1.595688	0.118296	0.158121	0.205678	0.687303
Professional, Scientific, and Technical Services	17.0	0.818259	1.570493	0.173492	0.325647	0.384270	0.561525	6.864591
Administrative and Support and Waste Management and Remediation Services	17.0	0.629038	0.854882	0.275707	0.367179	0.386086	0.402849	3.909591
Educational Services	17.0	0.801245	1.339290	0.296771	0.369175	0.395351	0.487859	5.858571
Health Care and Social Assistance	17.0	0.406493	0.116027	0.195375	0.358391	0.374321	0.443231	0.753115
Arts, Entertainment, and Recreation	17.0	0.428171	0.486389	-0.754793	0.365019	0.446556	0.482949	1.482036
Accommodation and Food Service	17.0	0.374575	0.307947	0.164461	0.266385	0.324722	0.369466	1.536308
Other Services	17.0	0.411105	0.373706	-0.298428	0.371707	0.419024	0.447216	1.534937

	2000	2001	2002	2003	2004	2005	2006	2007	2008	2009	2010	2011	2012	2013	2014	2015	2016
all	0.400392	0.659538	0.484114	0.325736	0.324376	0.334442	0.306803	0.356972	0.982909	0.294388	0.285605	0.279759	0.267726	0.224527	0.291869	0.290361	0.270921
Agriculture, Forestry, Fishing and Hunting	0.729187	0.718040	0.431282	0.182650	0.123614	0.124937	0.172373	0.304782	-0.134196	0.044519	0.116764	-0.014455	0.609598	0.112320	0.137729	0.196633	1.188372
Mining, Quarrying, and Oil and Gas Extraction	0.368225	0.400728	0.531369	0.357355	0.348338	0.309974	0.339525	0.355874	-0.695381	0.435142	0.286168	0.303840	0.490975	0.378294	-3.677641	0.108007	0.039423
Utilities	0.426789	0.392311	0.368217	0.351420	0.334929	0.352287	0.343267	0.341324	0.330200	0.305996	0.306924	0.336205	0.292616	0.333866	0.298705	0.324340	0.323095
Construction	0.385995	0.386120	0.377168	0.370536	0.377177	0.376231	0.378672	0.137416	-0.194842	0.094909	0.121346	0.336150	-0.385968	-0.076554	0.284535	0.347501	0.328161
Manufacturing	0.384997	0.993169	0.445508	0.369308	0.325353	0.341955	0.325047	0.321584	0.409487	0.324511	0.276664	0.298395	0.310834	0.278886	0.276299	0.260617	0.238535
Wholesale Trade	0.468147	0.425787	0.523107	0.352967	0.335052	0.350178	0.335698	0.331372	0.717321	0.282492	0.381492	0.325077	0.307113	0.286508	0.307139	0.324566	0.279930
Retail Trade	0.442070	0.468024	0.393502	0.384771	0.364752	0.347509	0.363486	0.369621	0.419556	0.370109	0.350849	0.381175	0.361636	0.361962	0.358063	0.369676	0.347304
Transportation and Warehousing	0.391183	0.905198	-6.357222	0.514807	0.730389	-0.926905	0.153354	0.310544	0.842466	0.308469	0.318533	0.316206	0.295388	0.275288	0.286148	0.127510	0.331389
Information	0.594826	-0.388159	-0.865063	0.269857	0.385726	0.323069	0.322788	0.436528	0.590404	0.255839	0.226885	0.247373	0.199558	0.235868	0.292637	0.297391	0.232769
Finance and Insurance	0.326720	0.322312	0.312322	0.287580	0.293717	0.282506	0.282059	0.262402	0.070754	-0.484793	0.290987	0.212482	0.206073	0.052547	0.276229	0.285970	0.288593
Real Estate and Rental and Leasing	0.205678	0.134690	0.140178	0.158121	0.176059	0.215706	0.175220	0.118296	-1.595688	0.670971	0.687303	0.215826	0.187419	0.130016	0.111968	0.054370	0.082226
Professional, Scientific, and Technical Services	0.632256	0.173492	6.864591	0.561525	0.287496	0.514899	0.384270	0.400166	0.681114	0.977133	0.223820	0.549014	0.328705	0.325647	0.336724	0.322275	0.347282
Administrative and Support and Waste Management and Remediation Services	0.648397	0.734525	0.652172	0.402849	0.373495	0.336440	0.386086	0.391923	3.909591	0.376147	0.371774	0.367179	0.387991	0.400844	0.346570	0.331957	0.275707
Educational Services	0.507136	0.443538	0.487859	0.393671	0.364068	0.372916	0.377350	0.369175	0.364447	0.395351	0.403077	0.418861	5.858571	0.296771	0.591455	1.649519	0.327406
Health Care and Social Assistance	0.753115	0.535125	0.374321	0.431911	0.481539	0.379450	0.455178	0.318455	0.339837	0.350524	0.195375	0.365705	0.358391	0.365933	0.372929	0.389366	0.443231
Arts, Entertainment, and Recreation	0.488465	0.441539	0.451074	0.461999	0.446556	0.446495	0.482949	0.555592	1.266158	-0.754793	0.365019	0.383527	0.339343	-0.276265	1.482036	0.237618	0.461588
Accommodation and Food Service	0.375427	0.369466	0.349126	0.324722	0.335510	0.309016	0.266385	0.339390	1.536308	0.407081	0.172661	0.164461	0.288260	0.226094	0.297368	0.219167	0.387328
Other Services	-0.298428	-0.188604	1.534937	0.499855	0.265042	0.351297	0.376412	0.373385	0.447216	0.476555	0.421987	0.371707	0.437833	0.399861	0.664504	0.419024	0.436194

	2000	2001	2002	2003	2004	2005	2006	2007	2008	2009	2010	2011	2012	2013	2014	2015	2016
all	0.295230	0.592458	0.349780	0.223008	0.238338	0.295419	0.281040	0.347111	1.288864	0.296396	0.229813	0.226243	0.246870	0.223132	0.250258	0.297018	0.229505
Agriculture, Forestry, Fishing and Hunting	0.667969	-1.157210	0.584653	0.095646	0.061954	0.127506	0.232163	0.514657	0.417637	0.052409	0.130856	0.189703	0.336209	0.082664	0.117335	0.199549	0.460251
Mining, Quarrying, and Oil and Gas Extraction	0.244907	0.270114	0.246094	0.202513	0.182664	0.212547	0.243966	0.297870	-2.846381	0.377749	0.261626	0.272471	0.419892	0.543016	-7.199914	-0.038396	-0.041029
Utilities	0.467512	0.210507	0.343033	0.264526	0.172228	0.344575	0.270905	0.271851	0.148907	-0.010511	0.056183	-0.027509	-0.035335	0.043521	0.104665	-0.018769	-0.041994
Construction	0.332301	0.330196	0.332682	0.324676	0.294916	0.322569	0.563227	-0.197833	-0.005430	0.288085	-2.145696	0.302222	0.313394	0.207961	0.297741	0.299821	0.262527
Manufacturing	0.295964	1.072972	0.349847	0.284888	0.250909	0.300854	0.312876	0.322223	0.403191	0.302118	0.268004	0.259077	0.273834	0.273541	0.275625	0.277241	0.258798
Wholesale Trade	0.392762	0.423969	0.407093	0.149221	0.305633	0.283587	0.291933	0.241799	0.737238	0.247785	0.298214	0.254881	0.265077	0.292975	0.297357	0.324194	0.224577
Retail Trade	0.383350	0.369749	0.320497	0.282921	0.334571	0.353519	0.377648	0.354053	0.490868	0.344020	0.334842	0.312515	0.334971	0.385382	0.367298	0.363054	0.312874
Transportation and Warehousing	0.188483	1.462720	-2.077742	0.162727	0.340122	-0.611962	0.126564	0.234086	0.573021	0.152760	0.167173	0.102386	0.226459	0.231350	0.204474	0.172589	0.137312
Information	0.345772	-0.327964	-0.552033	0.126802	0.298419	0.291104	0.263438	0.338920	0.747170	0.245293	0.209857	0.171596	0.189965	0.161815	0.225755	0.229405	0.226194
Finance and Insurance	0.230291	0.201737	0.234832	0.202702	0.229846	0.257132	0.238194	0.459849	-0.213888	-2.152599	0.177984	0.273241	0.176789	0.157346	0.210553	0.258329	0.205529
Real Estate and Rental and Leasing	0.098826	0.043085	0.007703	0.018951	0.045896	0.120693	0.110043	0.103282	-2.839872	0.484206	0.765826	0.146590	0.170912	0.137811	0.107008	0.086351	0.050503
Professional, Scientific, and Technical Services	0.398748	-0.159982	2.560366	0.479809	0.321349	0.410193	0.370057	0.282396	1.068728	1.385183	0.189443	0.516240	0.294503	0.290145	0.380701	0.342558	0.371190
Administrative and Support and Waste Management and Remediation Services	0.598543	0.655756	0.414378	0.199553	0.258329	0.278998	0.316392	0.308306	4.898612	0.420224	0.308397	0.252414	0.361404	0.359063	0.386551	0.288098	0.304300
Educational Services	0.570759	0.385000	0.329530	0.320336	0.306498	0.330646	0.381841	0.335208	0.357579	0.366334	0.432863	0.396577	7.400604	0.245662	0.561883	3.847620	0.405357
Health Care and Social Assistance	0.577174	0.257823	0.231396	0.325922	0.286583	0.236876	0.550711	0.278687	0.324502	0.351317	0.321340	0.238478	0.293085	0.344190	0.309225	0.352746	0.383887
Arts, Entertainment, and Recreation	0.484350	0.389292	0.318436	0.371016	0.299990	0.463711	0.445420	0.551258	1.140924	-1.465003	0.356821	0.233150	0.452118	-0.745033	1.795971	0.165193	0.212908
Accommodation and Food Service	0.327883	0.315645	0.236611	0.251479	0.274215	0.301563	0.309004	0.353913	1.825653	0.328612	0.181050	0.182424	0.307890	0.357230	0.315248	0.216323	0.342729
Other Services	-0.742499	-0.361106	1.685388	0.345985	0.139773	0.241337	0.279701	0.214650	0.319713	0.254474	0.208673	0.188306	0.285421	0.214994	0.254748	0.315175	0.320385

	count	mean	std	min	25%	50%	75%	max
all	17.0	0.347675	0.258135	0.223008	0.229813	0.281040	0.297018	1.288864
Agriculture, Forestry, Fishing and Hunting	17.0	0.183174	0.396888	-1.157210	0.095646	0.189703	0.417637	0.667969
Mining, Quarrying, and Oil and Gas Extraction	17.0	-0.373546	1.916858	-7.199914	0.182664	0.244907	0.272471	0.543016
Utilities	17.0	0.150841	0.158474	-0.041994	-0.010511	0.148907	0.270905	0.467512
Construction	17.0	0.124903	0.606476	-2.145696	0.262527	0.299821	0.324676	0.563227
Manufacturing	17.0	0.340115	0.192593	0.250909	0.273541	0.284888	0.312876	1.072972
Wholesale Trade	17.0	0.319900	0.127154	0.149221	0.254881	0.292975	0.324194	0.737238
Retail Trade	17.0	0.354243	0.045026	0.282921	0.334571	0.353519	0.369749	0.490868
Transportation and Warehousing	17.0	0.105442	0.683554	-2.077742	0.137312	0.172589	0.231350	1.462720
Information	17.0	0.187736	0.275656	-0.552033	0.171596	0.226194	0.291104	0.747170
Finance and Insurance	17.0	0.067522	0.586019	-2.152599	0.177984	0.210553	0.238194	0.459849
Real Estate and Rental and Leasing	17.0	-0.020129	0.750898	-2.839872	0.045896	0.103282	0.137811	0.765826
Professional, Scientific, and Technical Services	17.0	0.558919	0.616550	-0.159982	0.294503	0.371190	0.479809	2.560366
Administrative and Support and Waste Management and Remediation Services	17.0	0.624077	1.107781	0.199553	0.288098	0.316392	0.414378	4.898612
Educational Services	17.0	0.998488	1.852671	0.245662	0.330646	0.381841	0.432863	7.400604
Health Care and Social Assistance	17.0	0.333173	0.097509	0.231396	0.278687	0.321340	0.351317	0.577174
Arts, Entertainment, and Recreation	17.0	0.321795	0.678203	-1.465003	0.233150	0.371016	0.463711	1.795971
Accommodation and Food Service	17.0	0.378087	0.377177	0.181050	0.251479	0.309004	0.328612	1.825653
Other Services	17.0	0.245007	0.465302	-0.742499	0.208673	0.254474	0.315175	1.685388