Pure BASH to remove spaces line by line

Submitted by admin on Mon, 12/04/2017 - 09:58

Blog tags:

Here is a neat little script I wrote to remove spaces in CSVs recursively line by line using only pure Bash

#!/bin/bash

INPUT_CSV="test.csv"
set IFS=,

set oldIFS = $IFS

readarray -t arry < ${INPUT_CSV}
for i in "${arry[@]}"

do

   : 

        res="${i//[^ ]}"

        cnt="${#res}"

        while [ ${cnt} -gt 0 ]; do

                i=${i/, /,}

                cnt=$[$cnt-1]

        done

        echo $i

done

Simple Python Script to Retrieve CSV Reports of Bugs on GNOME's Bugzilla while parsing values

Submitted by admin on Mon, 03/09/2015 - 13:11

Blog tags:

Well for what was a trivial script - I had to add proxy functionality to grab all of the files from GNOME's Bugzilla instance; if only they had an interface for researchers. You may need to source your own list of proxies...

import os

import string

import sys

from random import randint

from time import sleep

import fnmatch

import random
ARRAY_OF_FILES = [] # Again do what you want with this

LISTOFPROXIES = ["107.182.17.9:8089","199.200.120.36:7808", "61.157.126.37:18000","61.163.165.250:9999","124.9.193.85:21320",

                 "211.144.81.68:18000","186.3.60.214:80","109.197.55.7:3128","216.246.109.93:7808","67.81.129.246:21320",

                 "86.107.110.73:8089","148.163.92.84:8089", "199.200.120.36:8089", "199.200.120.37:8089","60.234.51.18:8118",

                 "202.29.235.130:3129" ,"137.135.166.225:8128", "211.144.81.69:18000","148.163.92.84:8089","66.186.2.163:443",

                 "162.209.127.86:3128","222.85.1.123:8118","221.228.248.248:8085","216.246.109.93:7808","223.64.56.33:8123",

                 "54.228.234.9:443","184.170.253.22:30264","164.109.49.91:80", "67.81.129.246:21320","65.220.79.222:80",

                 "54.152.174.124:8080","148.163.92.84:80","200.103.97.218:80","199.233.244.70:3128","64.251.15.49:80",

                 "86.107.110.73:7808", "107.182.17.9:7808","204.12.211.218:8118","109.236.81.101:35478",]
def parse_bug_file(filename):

    print(filename)

    o = open("zilladir/buglist.csv",'a')

    if os.path.getsize(filename) == 0:

        print("no bugs detected - file is empty")

        return

    print("Opening bugzilla file: ",filename)

    with open(filename) as f:    

        f_csv = csv.reader(f)

        headers = next(f_csv)

        try:

             next(f_csv) 

        except StopIteration:

             print(headers)

             print("no bugs detected - only headers")

             yield

        for row in f_csv:

            bug_element = [ filename, 1, row[5].replace("---","OPEN"),row[6]]

            ARRAY_OF_BUGS.append(bug_element)

            changeCount +=1

        for bug in ARRAY_OF_BUGS:

            o.write(bug[0]+","+bug[1]+","+bug[2]+"\n")

            print("wrote changes to zilladir/buglist.csv")    

        ARRAY_OF_BUGS = []

    o.close()
# Parse GIT files for filename and path using directory and pattern

def parse_git_dir_files(directory, pattern):

    for root, dirs, files in os.walk(directory):

        for basename in files:

            if fnmatch.fnmatch(basename, pattern):

                filename = os.path.join(root, basename)

                tmp = root.replace('\\','/')

                file_element = [basename, tmp]

                ARRAY_OF_FILES.append(file_element)

# Parse GIT files for filename and path using directory and pattern

def retrieve_files_in_git_dir(path):
    parse_git_dir_files(path, '*.c')

    parse_git_dir_files(path, '*.h')

def proxy_http_getter(filename):

    URL = """"<a href="https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENAME&no_redirect=1&product=gtk+&query_format=specific&ctype=csv&human=1"">https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENA...</a> > zilladir/FILENAME.csv"""

    URL = URL.replace("FILENAME", "".join(filename))

    CMD = "curl -s -L -x "+ random.choice(LISTOFPROXIES) + " " + URL

    print(CMD)

    os.system(CMD)

    while os.stat("zilladir/"+filename+".csv").st_size == 0:

         print("error")

         URL = """"<a href="https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENAME&no_redirect=1&product=gtk+&query_format=specific&ctype=csv&human=1"">https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=FILENA...</a> > zilladir/FILENAME.csv"""

         URL = URL.replace("FILENAME", "".join(filename))

         CMD = "curl -s -L -x "+ random.choice(LISTOFPROXIES) + " " + URL

         print(CMD)

         os.system(CMD)

def bugzilla_ops(source_dir):    

    #os.system("rm -rf zilladir")

   # os.system("mkdir zilladir")

    retrieve_files_in_git_dir(source_dir)

    for file_element in ARRAY_OF_FILES:

        proxy_http_getter(file_element[0])

        str = "zilladir/"+file_element[0]+".csv"

        parse_bug_file(str)

        sleep(randint(3,17))
bugzilla_ops("gtk+")

Pacific Simplicity

CSV

Pure BASH to remove spaces line by line

Blog tags:

Simple Python Script to Retrieve CSV Reports of Bugs on GNOME's Bugzilla while parsing values

Blog tags:

Tweets

Blog tags:

Blog tags:

Search form

Tweets