Skip to content

Commit

Permalink
adding redaction of cause number
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolassaw committed Sep 13, 2024
1 parent 91f21ae commit 9c08b42
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/cleaner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import json, argparse, os, datetime as dt, xxhash
from azure.cosmos import CosmosClient, exceptions
from dotenv import load_dotenv

class Cleaner:

def __init__(self, county):
self.county = county.lower()

def redact_cause_number(self, input_dict: dict, out_file: dict) -> dict:
#This will hash and redact the cause number and then add it to the output file.
cause_number_hash = xxhash.xxh64(str(input_dict['code'])).hexdigest()
out_file["cause_number_redacted"] = cause_number_hash
return out_file

def clean(self):

case_json_folder_path = os.path.join(
Expand All @@ -26,7 +31,7 @@ def clean(self):
list_case_json_files = os.listdir(case_json_folder_path)
for case_json in list_case_json_files:
print(case_json)
# List of motions identified as evidenciary
# List of motions identified as evidentiary. TODO: These should be moved to a separate JSON in resources
good_motions = [
"Motion To Suppress",
"Motion to Reduce Bond",
Expand Down Expand Up @@ -61,7 +66,6 @@ def clean(self):
charge_name_to_umich = charge_name_to_umich_dict
# Cleaned Case Primary format
out_file = {}
out_file["case_number"] = input_dict["code"] #Note: This may be closed to personally identifying information of the defendant.
out_file["attorney_type"] = input_dict["party information"]["appointed or retained"]
#Adding the county and hash values into the final version.
out_file["county"] = input_dict["county"]
Expand Down Expand Up @@ -112,11 +116,12 @@ def contains_good_motion(motion, event):
def_atty_hash = xxhash.xxh64(str(def_atty_unique_str)).hexdigest()
out_file["defense attorney"] = def_atty_hash

out_file = self.redact_cause_number(input_dict, out_file)

# Original Format
out_filepath = os.path.join(
os.path.dirname(__file__), "..", "..", "data", self.county, "case_json_cleaned",case_json
)

with open(out_filepath, "w") as f:
json.dump(out_file, f)

0 comments on commit 9c08b42

Please sign in to comment.