Question: Ok let's make this one simple need the JOB column to print data. The data starts with JC or AC lines

Ok let's make this one simple need the "JOB" column to print data. The data starts with "JC" or "AC" lines
import re
import pandas as pd
import subprocess
import sys
# Check if tabulate is installed
try:
import tabulate
except ImportError:
# Install tabulate if not found
subprocess.check_call([sys.executable, "-m", "pip", "install", "tabulate"])
import tabulate
# Check if openpyxl is installed
try:
import openpyxl
except ImportError:
# Install openpyxl if not found
subprocess.check_call([sys.executable, "-m", "pip", "install", "openpyxl"])
import openpyxl
# Define the file path (using forward slashes)
file_path ='C:/Users/collbb/Downloads/test1.txt'
output_file_path ='C:/Users/collbb/Downloads/output_test.xlsx'
# Read the data from the file
with open(file_path, "r") as file:
data = file.read()
# Split the data into lines
lines = data.splitlines()
# Initialize an empty list to store the processed data
processed_data =[]
# Regular expressions for extracting data
ip_regex = r"IP=([\d.]+)"
path_regex = r"CD\s+/FTP/PUBLIC/(\S+)"
date_regex = r"(\d{2}/\d{3}/\d{4})" # Adjusted date format
job_regex = r"(JC\S+)" # Matches job names starting with "JC"
# Iterate through the lines and extract the data
i =0
while i len(lines):
line = lines[i].strip()
if line.startswith("// EXEC FTPBATCH"):
try: # Add try-except for error handling
ip_match = re.search(ip_regex, line)
ip = ip_match.group(1) if ip_match else None
except AttributeError:
ip = None
print(f"Warning: Could not extract IP on line {i+1}")
ftp_action = "Upload" # Assuming FTPBATCH implies upload action
# Look ahead for the path and date
j = i +1
path = date = None
while j len(lines) and not lines[j].startswith("*****"):
try:
path_match = re.search(path_regex, lines[j])
if path_match:
path = path_match.group(1)
except AttributeError:
path = None
print(f"Warning: Could not extract path on line {j+1}")
try:
date_match = re.search(date_regex, lines[j])
if date_match:
date = date_match.group(1)
except AttributeError:
date = None
print(f"Warning: Could not extract date on line {j+1}")
j +=1
# Extract job information (search within a window of lines)
job = None
for k in range(i +1, min(i +5, len(lines))): # Search the next 4 lines
job_match = re.search(job_regex, lines[k].strip())
if job_match:
job = job_match.group(0)
break # Stop searching once a job is found
# Append the extracted data to the list
processed_data.append({
"IP": ip,
"CD /FTP/PUBLIC/": path,
"Date": date, # Add back if needed
"FTP_Action": ftp_action,
"Status": "Success",
"Job": job
})
i +=1
# Create a pandas DataFrame from the processed data
df = pd.DataFrame(processed_data)
# Print the first 5 rows of the DataFrame
print(df.head().to_markdown(index=False, numalign="left", stralign="left"))
# Create an Excel spreadsheet and save the DataFrame
with pd.ExcelWriter(output_file_path, engine="openpyxl") as writer:
df.to_excel(writer, sheet_name="FTP Data", index=False)
Ok let's make this one simple need the

Step by Step Solution

There are 3 Steps involved in it

1 Expert Approved Answer
Step: 1 Unlock blur-text-image
Question Has Been Solved by an Expert!

Get step-by-step solutions from verified subject matter experts

Step: 2 Unlock
Step: 3 Unlock

Students Have Also Explored These Related Programming Questions!