mardi 3 septembre 2019

Why isnt this if statement working in python

I have a program which adds array data to a database. I want to check and see if a record already exists. If the record already exists, then I don't want to add it to the array ( and eventually the database) . I created an if else statement to only add the data to the array if it doesn't already exist in the database. However, when I check to see if the duplicate data is added in the database, I see the duplicate entries added:

test=# select count(*) from cert;
count 
-------
100
(1 row)

test=# select count(*) from cert;
count 
-------
200
(1 row)

test=# 

the second hundred added are all duplicate entries

Here is my Python code where I am attempting to filter these duplicates out:

import json
import json, urllib, requests
import psycopg2
from psycopg2.extras import execute_values

def cleanse(inputData):
    out = inputData #data cleanse function
    out = out.replace("[", "")
    out = out.replace("]", "")
    out = out.replace('"', '')
    out = out.replace("", "")
    out = out.replace(" ", "")
    out = out.replace(" ", "")
    out = ''.join(out.split())
    return out

#connect to the databse
try:
    conn = psycopg2.connect("dbname='test' user='postgres' host='localhost' password='test_password' ")
except:
    print("Database Connection Failure")

cur = conn.cursor()

#access the API
url = "https://api.certspotter.com/v1/issuances?domain=google.com&include_subdomains=true&expand=dns_names&expand=issuer&expand=cert"
response = urllib.request.urlopen(url)
fulljson = json.loads(response.read())
length = len(fulljson)

issuerSubJson = json.dumps(fulljson[1]["issuer"]) 
issuerSubJson2 = json.loads(issuerSubJson)

#create 2d array
certgrab_array = [[0 for x in range(11)] for y in range(length)]

#traverse the 2d array and add values from json elements
for i in range(0, length):
    row_count = 0
    idcheck = cleanse(json.dumps(fulljson[i]["id"]))
    cur.execute("SELECT id from cert where id = %s;", [idcheck])
    print(idcheck)
    row_count = cur.rowcount
    if row_count > 0:
        exists = True

    else:
        exists = False
    print(exists)
    if not exists:

        column = 0
        #create a sub json variable for the "issuer" sub attributes
        issuerSubJson = json.dumps(fulljson[i]["issuer"]) 
        issuerSubJson2 = json.loads(issuerSubJson)

        id1 = cleanse(json.dumps(fulljson[i]["id"]))
        certgrab_array[i][column] = id1

        tbs_sha256 = cleanse(json.dumps(fulljson[i]["tbs_sha256"]))
        certgrab_array[i][1] = tbs_sha256

        dns = cleanse(json.dumps(fulljson[i]["dns_names"]))

        certgrab_array[i][2] = dns

        pubkey_sha256 = cleanse(json.dumps(fulljson[i]["pubkey_sha256"]))
        certgrab_array[i][3] = pubkey_sha256

        j = 4
        name = cleanse(json.dumps(issuerSubJson2["name"]))
        certgrab_array[i][j] = name
        j = j + 1
        issuerSha256 = cleanse(json.dumps(issuerSubJson2["pubkey_sha256"]))
        certgrab_array[i][j] = issuerSha256

        not_before = cleanse(json.dumps(fulljson[i]["not_before"]))
        certgrab_array[i][6] = not_before

        not_after = cleanse(json.dumps(fulljson[i]["not_after"]))
        certgrab_array[i][7] = not_after

        p = 8
      #Create a sub json variable for the "cert" sub attributes
        certSubJson = json.dumps(fulljson[i]["cert"])
        certSubJson2 = json.loads(certSubJson)

        certType = cleanse(json.dumps(certSubJson2["type"]))
        certgrab_array[i][p] = certType
        p = p + 1

        sha256 = cleanse(json.dumps(certSubJson2["sha256"]))
        certgrab_array[i][p] = sha256
        p = p + 1
        data = cleanse(json.dumps(certSubJson2["data"]))
        certgrab_array[i][p] = data
    else:
        continue                                                                       
insert_query = "INSERT INTO cert(id, tbs_sha256, dns_names, pubkey_sha256, issuer_name, issuer_pubkey_sha256, not_before, not_after, cert_type, cert_hash, cert_data) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"

cur.executemany(insert_query, certgrab_array)
conn.commit()
cur.close()
conn.close()

The exists and idcheck variables get their intended values:

418675152
True
526898343
True
526994364
True
528715730
True
528738608
True
529648183
True

Aucun commentaire:

Enregistrer un commentaire