CSV

Pure BASH to remove spaces line by line

Blog tags: 

Here is a neat little script I wrote to remove spaces in CSVs recursively line by line using only pure Bash

#!/bin/bash
INPUT_CSV="test.csv"

set IFS=,
set oldIFS = $IFS
readarray -t arry < ${INPUT_CSV}

for i in "${arry[@]}"
do
   :
        res="${i//[^ ]}"
        cnt="${#res}"
        while [ ${cnt} -gt 0 ]; do
                i=${i/, /,}
                cnt=$[$cnt-1]
        done
        echo $i
done

Simple Python Script to Retrieve CSV Reports of Bugs on GNOME's Bugzilla while parsing values

Blog tags: 

Well for what was a trivial script - I had to add proxy functionality to grab all of the files from GNOME's Bugzilla instance; if only they had an interface for researchers. You may need to source your own list of proxies...

import os
import string
import sys
from random import randint
from time import sleep
import fnmatch
import random

ARRAY_OF_FILES = [] # Again do what you want with this
LISTOFPROXIES = ["107.182.17.9:8089","199.200.120.36:7808", "61.157.126.37:18000","61.163.165.250:9999","124.9.193.85:21320",
                 "211.144.81.68:18000","186.3.60.214:80","109.197.55.7:3128","216.246.109.93:7808","67.81.129.246:21320",
                 "86.107.110.73:8089","148.163.92.84:8089", "199.200.120.36:8089", "199.200.120.37:8089","60.234.51.18:8118",
                 "202.29.235.130:3129" ,"137.135.166.225:8128", "211.144.81.69:18000","148.163.92.84:8089","66.186.2.163:443",
                 "162.209.127.86:3128","222.85.1.123:8118","221.228.248.248:8085","216.246.109.93:7808","223.64.56.33:8123",
                 "54.228.234.9:443","184.170.253.22:30264","164.109.49.91:80", "67.81.129.246:21320","65.220.79.222:80",
                 "54.152.174.124:8080","148.163.92.84:80","200.103.97.218:80","199.233.244.70:3128","64.251.15.49:80",
                 "86.107.110.73:7808", "107.182.17.9:7808","204.12.211.218:8118","109.236.81.101:35478",]

def parse_bug_file(filename):
   
    print(filename)
    o = open("zilladir/buglist.csv",'a')
    if os.path.getsize(filename) == 0:
        print("no bugs detected - file is empty")
        return
   
    print("Opening bugzilla file: ",filename)
    with open(filename) as f:    
        f_csv = csv.reader(f)
        headers = next(f_csv)
        try:
             next(f_csv)
        except StopIteration:
             print(headers)
             print("no bugs detected - only headers")
             yield
       
        for row in f_csv:
            bug_element = [ filename, 1, row[5].replace("---","OPEN"),row[6]]
            ARRAY_OF_BUGS.append(bug_element)
            changeCount +=1
       
        for bug in ARRAY_OF_BUGS:
            o.write(bug[0]+","+bug[1]+","+bug[2]+"\n")
            print("wrote changes to zilladir/buglist.csv")    
       
        ARRAY_OF_BUGS = []
       
    o.close()

# Parse GIT files for filename and path using directory and pattern
def parse_git_dir_files(directory, pattern):
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                tmp = root.replace('\\','/')
                file_element = [basename, tmp]
                ARRAY_OF_FILES.append(file_element)
               

# Parse GIT files for filename and path using directory and pattern
def retrieve_files_in_git_dir(path):

    parse_git_dir_files(path, '*.c')
    parse_git_dir_files(path, '*.h')

def proxy_http_getter(filename):
    URL = """"<a href="https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENAME&no_redirect=1&product=gtk%2B&query_format=specific&ctype=csv&human=1"">https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENA...</a> > zilladir/FILENAME.csv"""
    URL = URL.replace("FILENAME", "".join(filename))
    CMD = "curl -s -L -x "+ random.choice(LISTOFPROXIES) + " " + URL
    print(CMD)
    os.system(CMD)
    while os.stat("zilladir/"+filename+".csv").st_size == 0:
         print("error")
         URL = """"<a href="https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENAME&no_redirect=1&product=gtk%2B&query_format=specific&ctype=csv&human=1"">https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENA...</a> > zilladir/FILENAME.csv"""
         URL = URL.replace("FILENAME", "".join(filename))
         CMD = "curl -s -L -x "+ random.choice(LISTOFPROXIES) + " " + URL
         print(CMD)
         os.system(CMD)
   
def bugzilla_ops(source_dir):    
    #os.system("rm -rf zilladir")
   # os.system("mkdir zilladir")
    retrieve_files_in_git_dir(source_dir)
    for file_element in ARRAY_OF_FILES:
        proxy_http_getter(file_element[0])
        str = "zilladir/"+file_element[0]+".csv"
        parse_bug_file(str)
        sleep(randint(3,17))

bugzilla_ops("gtk+")

Subscribe to RSS - CSV