jeudi 30 mai 2019

Working on a Financial scraper for Income Statements

I have a couple of functions first function gets me a df of the S&P 500 list. I then have a second function that will try to scrape the data from Yahoo Finance. Then write it to a MySQL table. What happens is it breaks on a Timer that doesn't have any Income Statement data.

a lot of web searches to try and skip a ticker if there isn't any data.

'''python def SP500_Tickers(): df = pd.read_html('http://en.wikipedia.org/wiki/List_of_S%26P_500_companies', header=0)[0] ticker = (df.Symbol) # print(ticker) return ticker def Income_Statement(): ticker = SP500_Tickers() mydb = mysql.connector.connect( host="localhost", user="invest", passwd="invest101", database="investing" )

mycursor = mydb.cursor()
today = date.today()
try:
    for tik in ticker:
        inc_state = pd.read_html('https://finance.yahoo.com/quote/' + tik + '/financials?p' + tik)

        if len(inc_state) < 1:
            pass
        else:
            # Dataframes Column 1
            IS_Date_C1 = inc_state[0][1][0]
            TotRevenue_C1 = inc_state[0][1][1]
            CostRevenue_C1 = inc_state[0][1][2]
            GrossProfit_C1 = inc_state[0][1][3]
            OE_RND_C1 = inc_state[0][1][5]
            OE_SellingAdmin_C1 = inc_state[0][1][6]
            OE_NonRecuring_C1 = inc_state[0][1][7]
            OE_Others_C1 = inc_state[0][1][8]
            OE_Total_C1 = inc_state[0][1][9]
            OperIncomeLoss_C1 = inc_state[0][1][10]
            TotOtherIncExp_C1 = inc_state[0][1][12]
            EarningBeforIntrestTax_C1 = inc_state[0][1][13]
            IntrestExpense_C1 = inc_state[0][1][14]
            IncomeB4Tax_C1 = inc_state[0][1][15]
            IncomeTaxExpense_C1 = inc_state[0][1][16]
            MinorityIntrest_C1 = inc_state[0][1][17]
            NetIncomeOps_C1 = inc_state[0][1][18]
            DiscontinuedOperations_C1 = inc_state[0][1][20]
            ExtraordinaryItems_C1 = inc_state[0][1][21]
            EffectAccountChange_C1 = inc_state[0][1][22]
            OtherItems_C1 = inc_state[0][1][23]
            NetIncome_C1 = inc_state[0][1][25]
            PreferredStock_C1 = inc_state[0][1][26]
            NetIncomeShares_C1 = inc_state[0][1][27]

            # Dataframes Column 2
            IS_Date_C2 = inc_state[0][2][0]
            TotRevenue_C2 = inc_state[0][2][1]
            CostRevenue_C2 = inc_state[0][2][2]
            GrossProfit_C2 = inc_state[0][2][3]
            OE_RND_C2 = inc_state[0][2][5]
            OE_SellingAdmin_C2 = inc_state[0][2][6]
            OE_NonRecuring_C2 = inc_state[0][2][7]
            OE_Others_C2 = inc_state[0][2][8]
            OE_Total_C2 = inc_state[0][2][9]
            OperIncomeLoss_C2 = inc_state[0][2][10]
            TotOtherIncExp_C2 = inc_state[0][2][12]
            EarningBeforIntrestTax_C2 = inc_state[0][2][13]
            IntrestExpense_C2 = inc_state[0][2][14]
            IncomeB4Tax_C2 = inc_state[0][2][15]
            IncomeTaxExpense_C2 = inc_state[0][2][16]
            MinorityIntrest_C2 = inc_state[0][2][17]
            NetIncomeOps_C2 = inc_state[0][2][18]
            DiscontinuedOperations_C2 = inc_state[0][2][20]
            ExtraordinaryItems_C2 = inc_state[0][2][21]
            EffectAccountChange_C2 = inc_state[0][2][22]
            OtherItems_C2 = inc_state[0][2][23]
            NetIncome_C2 = inc_state[0][2][25]
            PreferredStock_C2 = inc_state[0][2][26]
            NetIncomeShares_C2 = inc_state[0][2][27]

            # Dataframes Column 3
            IS_Date_C3 = inc_state[0][3][0]
            TotRevenue_C3 = inc_state[0][3][1]
            CostRevenue_C3 = inc_state[0][3][2]
            GrossProfit_C3 = inc_state[0][3][3]
            OE_RND_C3 = inc_state[0][3][5]
            OE_SellingAdmin_C3 = inc_state[0][3][6]
            OE_NonRecuring_C3 = inc_state[0][3][7]
            OE_Others_C3 = inc_state[0][3][8]
            OE_Total_C3 = inc_state[0][3][9]
            OperIncomeLoss_C3 = inc_state[0][3][10]
            TotOtherIncExp_C3 = inc_state[0][3][12]
            EarningBeforIntrestTax_C3 = inc_state[0][3][13]
            IntrestExpense_C3 = inc_state[0][3][14]
            IncomeB4Tax_C3 = inc_state[0][3][15]
            IncomeTaxExpense_C3 = inc_state[0][3][16]
            MinorityIntrest_C3 = inc_state[0][3][17]
            NetIncomeOps_C3 = inc_state[0][3][18]
            DiscontinuedOperations_C3 = inc_state[0][3][20]
            ExtraordinaryItems_C3 = inc_state[0][3][21]
            EffectAccountChange_C3 = inc_state[0][3][22]
            OtherItems_C3 = inc_state[0][3][23]
            NetIncome_C3 = inc_state[0][3][25]
            PreferredStock_C3 = inc_state[0][3][26]
            NetIncomeShares_C3 = inc_state[0][3][27]

            # Dataframes Column 4
            IS_Date_C4 = inc_state[0][4][0]
            TotRevenue_C4 = inc_state[0][4][1]
            CostRevenue_C4 = inc_state[0][4][2]
            GrossProfit_C4 = inc_state[0][4][3]
            OE_RND_C4 = inc_state[0][4][5]
            OE_SellingAdmin_C4 = inc_state[0][4][6]
            OE_NonRecuring_C4 = inc_state[0][4][7]
            OE_Others_C4 = inc_state[0][4][8]
            OE_Total_C4 = inc_state[0][4][9]
            OperIncomeLoss_C4 = inc_state[0][4][10]
            TotOtherIncExp_C4 = inc_state[0][4][12]
            EarningBeforIntrestTax_C4 = inc_state[0][4][13]
            IntrestExpense_C4 = inc_state[0][4][14]
            IncomeB4Tax_C4 = inc_state[0][4][15]
            IncomeTaxExpense_C4 = inc_state[0][4][16]
            MinorityIntrest_C4 = inc_state[0][4][17]
            NetIncomeOps_C4 = inc_state[0][4][18]
            DiscontinuedOperations_C4 = inc_state[0][4][20]
            ExtraordinaryItems_C4 = inc_state[0][4][21]
            EffectAccountChange_C4 = inc_state[0][4][22]
            OtherItems_C4 = inc_state[0][4][23]
            NetIncome_C4 = inc_state[0][4][25]
            PreferredStock_C4 = inc_state[0][4][26]
            NetIncomeShares_C4 = inc_state[0][4][27]

            print(tik)
            print(len(inc_state))
            print('Colunm 1')
            print(IS_Date_C1, TotRevenue_C1, CostRevenue_C1, GrossProfit_C1, OE_RND_C1, OE_SellingAdmin_C1, OE_NonRecuring_C1, OE_Others_C1, OE_Total_C1,
                  OperIncomeLoss_C1, TotOtherIncExp_C1, EarningBeforIntrestTax_C1, IntrestExpense_C1, IncomeB4Tax_C1, IncomeTaxExpense_C1,
                  MinorityIntrest_C1, NetIncomeOps_C1, DiscontinuedOperations_C1, ExtraordinaryItems_C1, EffectAccountChange_C1, OtherItems_C1,
                  NetIncome_C1, PreferredStock_C1, NetIncomeShares_C1)

            # print('Colunm 2')
            #
            # print(IS_Date_C2, TotRevenue_C2, CostRevenue_C2, GrossProfit_C2, OE_RND_C2, OE_SellingAdmin_C2, OE_NonRecuring_C2,
            #       OE_Others_C2, OE_Total_C2,
            #       OperIncomeLoss_C2, TotOtherIncExp_C2, EarningBeforIntrestTax_C2, IntrestExpense_C2, IncomeB4Tax_C2,
            #       IncomeTaxExpense_C2,
            #       MinorityIntrest_C2, NetIncomeOps_C2, DiscontinuedOperations_C2, ExtraordinaryItems_C2, EffectAccountChange_C2,
            #       OtherItems_C2,
            #       NetIncome_C2, PreferredStock_C2, NetIncomeShares_C2)

            # print('Colunm 3')
            #
            # print(IS_Date_C3, TotRevenue_C3, CostRevenue_C3, GrossProfit_C3, OE_RND_C3, OE_SellingAdmin_C3, OE_NonRecuring_C3,
            #       OE_Others_C3, OE_Total_C3,
            #       OperIncomeLoss_C3, TotOtherIncExp_C3, EarningBeforIntrestTax_C3, IntrestExpense_C3, IncomeB4Tax_C3,
            #       IncomeTaxExpense_C3,
            #       MinorityIntrest_C3, NetIncomeOps_C3, DiscontinuedOperations_C3, ExtraordinaryItems_C3, EffectAccountChange_C3,
            #       OtherItems_C3,
            #       NetIncome_C3, PreferredStock_C3, NetIncomeShares_C3)
            # print('Colunm 4')
            # print(IS_Date_C4, TotRevenue_C4, CostRevenue_C4, GrossProfit_C4, OE_RND_C4, OE_SellingAdmin_C4, OE_NonRecuring_C4,
            #       OE_Others_C4, OE_Total_C4,
            #       OperIncomeLoss_C4, TotOtherIncExp_C4, EarningBeforIntrestTax_C4, IntrestExpense_C4, IncomeB4Tax_C4,
            #       IncomeTaxExpense_C4,
            #       MinorityIntrest_C4, NetIncomeOps_C4, DiscontinuedOperations_C4, ExtraordinaryItems_C4, EffectAccountChange_C4,
            #       OtherItems_C4,
            #       NetIncome_C4, PreferredStock_C4, NetIncomeShares_C4)


            # mycursor.execute(
            #     "INSERT INTO IncomeStatement (Ticker, IS_Date, TotRevenue, CostRevenue, GrossProfit, OE_RND, OE_SellingAdmin, OE_NonRecuring, OE_Others, OE_Total, OperIncomeLoss, TotOtherIncExp, EarningBeforIntrestTax, IntrestExpense, IncomeB4Tax, IncomeTaxExpense, MinorityIntrest, NetIncomeOps, DiscontinuedOperations, ExtraordinaryItems, EffectAccountChange, OtherItems, NetIncome, PreferredStock, NetIncomeShares, Today ) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
            #     tik, IS_Date_C1, TotRevenue_C1, CostRevenue_C1, GrossProfit_C1, OE_RND_C1, OE_SellingAdmin_C1, OE_NonRecuring_C1, OE_Others_C1, OE_Total_C1, OperIncomeLoss_C1, TotOtherIncExp_C1, EarningBeforIntrestTax_C1, IntrestExpense_C1, IncomeB4Tax_C1, IncomeTaxExpense_C1, MinorityIntrest_C1, NetIncomeOps_C1, DiscontinuedOperations_C1, ExtraordinaryItems_C1, EffectAccountChange_C1, OtherItems_C1, NetIncome_C1, PreferredStock_C1, NetIncomeShares_C1, today))
            # mycursor.execute(
            #     "INSERT INTO IncomeStatement (Ticker, IS_Date, TotRevenue, CostRevenue, GrossProfit, OE_RND, OE_SellingAdmin, OE_NonRecuring, OE_Others, OE_Total, OperIncomeLoss, TotOtherIncExp, EarningBeforIntrestTax, IntrestExpense, IncomeB4Tax, IncomeTaxExpense, MinorityIntrest, NetIncomeOps, DiscontinuedOperations, ExtraordinaryItems, EffectAccountChange, OtherItems, NetIncome, PreferredStock, NetIncomeShares, Today ) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
            #         tik, IS_Date_C2, TotRevenue_C2, CostRevenue_C2, GrossProfit_C2, OE_RND_C2, OE_SellingAdmin_C2,
            #         OE_NonRecuring_C2, OE_Others_C2, OE_Total_C2, OperIncomeLoss_C2, TotOtherIncExp_C2,
            #         EarningBeforIntrestTax_C2, IntrestExpense_C2, IncomeB4Tax_C2, IncomeTaxExpense_C2, MinorityIntrest_C2,
            #         NetIncomeOps_C2, DiscontinuedOperations_C2, ExtraordinaryItems_C2, EffectAccountChange_C2, OtherItems_C2,
            #         NetIncome_C2, PreferredStock_C2, NetIncomeShares_C2, today))
            # mycursor.execute(
            #     "INSERT INTO IncomeStatement (Ticker, IS_Date, TotRevenue, CostRevenue, GrossProfit, OE_RND, OE_SellingAdmin, OE_NonRecuring, OE_Others, OE_Total, OperIncomeLoss, TotOtherIncExp, EarningBeforIntrestTax, IntrestExpense, IncomeB4Tax, IncomeTaxExpense, MinorityIntrest, NetIncomeOps, DiscontinuedOperations, ExtraordinaryItems, EffectAccountChange, OtherItems, NetIncome, PreferredStock, NetIncomeShares, Today ) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
            #         tik, IS_Date_C3, TotRevenue_C3, CostRevenue_C3, GrossProfit_C3, OE_RND_C3, OE_SellingAdmin_C3,
            #         OE_NonRecuring_C3, OE_Others_C3, OE_Total_C3, OperIncomeLoss_C3, TotOtherIncExp_C3,
            #         EarningBeforIntrestTax_C3, IntrestExpense_C3, IncomeB4Tax_C3, IncomeTaxExpense_C3, MinorityIntrest_C3,
            #         NetIncomeOps_C3, DiscontinuedOperations_C3, ExtraordinaryItems_C3, EffectAccountChange_C3, OtherItems_C3,
            #         NetIncome_C3, PreferredStock_C3, NetIncomeShares_C3, today))
            # mycursor.execute(
            #     "INSERT INTO IncomeStatement (Ticker, IS_Date, TotRevenue, CostRevenue, GrossProfit, OE_RND, OE_SellingAdmin, OE_NonRecuring, OE_Others, OE_Total, OperIncomeLoss, TotOtherIncExp, EarningBeforIntrestTax, IntrestExpense, IncomeB4Tax, IncomeTaxExpense, MinorityIntrest, NetIncomeOps, DiscontinuedOperations, ExtraordinaryItems, EffectAccountChange, OtherItems, NetIncome, PreferredStock, NetIncomeShares, Today ) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
            #         tik, IS_Date_C4, TotRevenue_C4, CostRevenue_C4, GrossProfit_C4, OE_RND_C4, OE_SellingAdmin_C4,
            #         OE_NonRecuring_C4, OE_Others_C4, OE_Total_C4, OperIncomeLoss_C4, TotOtherIncExp_C4,
            #         EarningBeforIntrestTax_C4, IntrestExpense_C4, IncomeB4Tax_C4, IncomeTaxExpense_C4, MinorityIntrest_C4,
            #         NetIncomeOps_C4, DiscontinuedOperations_C4, ExtraordinaryItems_C4, EffectAccountChange_C4, OtherItems_C4,
            #         NetIncome_C4, PreferredStock_C4, NetIncomeShares_C4, today))
            #
            # mydb.commit()
except KeyboardInterrupt:
    pass

Income_Statement()

'''

I am trying to get the if statement to by pass the bad ticker and go on its merry way. Sorry code is ugly I am noob.

Aucun commentaire:

Enregistrer un commentaire