import pandas as pd
import numpy as np!head pvcs.csvNAME,STATUS,VOLUME,CAPACITY,ACCESSMODES,AGE
claim-redacted-205,Bound,pvc-5643aadf-ddb3-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-1088,Bound,pvc-bc68d752-ecba-11e6-98ef-42010af000c3,10Gi,RWO,2d
claim-redacted-389,Bound,pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-119,Bound,pvc-73c5d644-ddb1-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-64,Bound,pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-192,Bound,pvc-51b672a8-ddb3-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-814,Bound,pvc-2b83ae02-dee0-11e6-98ef-42010af000c3,10Gi,RWO,20d
claim-redacted-775,Bound,pvc-ec1d47be-deb0-11e6-98ef-42010af000c3,10Gi,RWO,20d
claim-redacted-954,Bound,pvc-5a0acdc5-e3ee-11e6-98ef-42010af000c3,10Gi,RWO,13d
df = pd.DataFrame.from_csv('pvcs.csv')
df.head()| STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | |
|---|---|---|---|---|---|
| NAME | |||||
| claim-redacted-205 | Bound | pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
| claim-redacted-1088 | Bound | pvc-bc68d752-ecba-11e6-98ef-42010af000c3 | 10Gi | RWO | 2d |
| claim-redacted-389 | Bound | pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
| claim-redacted-119 | Bound | pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
| claim-redacted-64 | Bound | pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d |
df['username'] = np.array(df.index.str.extract('\w+-([\w-]+)-\d+$'))
df['id'] = np.array(df.index.str.extract('\w+-\w+-(\d+)$'))
df.head()/Users/redacted/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
if __name__ == '__main__':
/Users/redacted/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
from ipykernel import kernelapp as app
| STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | username | id | ||
|---|---|---|---|---|---|---|---|---|
| NAME | ||||||||
| claim-redacted-205 | Bound | pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 205 | |
| claim-redacted-1088 | Bound | pvc-bc68d752-ecba-11e6-98ef-42010af000c3 | 10Gi | RWO | 2d | redacted | 1088 | |
| claim-redacted-389 | Bound | pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 389 | |
| claim-redacted-119 | Bound | pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 119 | |
| claim-redacted-64 | Bound | pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 64 |
df.tail()| STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | username | id | |
|---|---|---|---|---|---|---|---|
| NAME | |||||||
| claim-redacted-562 | Bound | pvc-4418d1d6-de0a-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 562 |
| claim-redacted-400 | Bound | pvc-2399fe54-ddcf-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 400 |
| claim-redacted-946 | Bound | pvc-294cb2c7-e35e-11e6-98ef-42010af000c3 | 10Gi | RWO | 14d | redacted | 946 |
| claim-redacted-900 | Bound | pvc-f6859607-e1c0-11e6-98ef-42010af000c3 | 10Gi | RWO | 16d | redacted | 900 |
| hub-db-dir | Bound | pvc-ea8a0bef-dd44-11e6-98ef-42010af000c3 | 10Gi | RWO | 22d | NaN | NaN |
valids = df[~df['AGE'].str.contains('m')].dropna()
valids = valids[valids['id'] != '14']
valids.head()| STATUS | VOLUME | CAPACITY | ACCESSMODES | AGE | username | id | |
|---|---|---|---|---|---|---|---|
| NAME | |||||||
| claim-redacted-205 | Bound | pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 205 |
| claim-redacted-1088 | Bound | pvc-bc68d752-ecba-11e6-98ef-42010af000c3 | 10Gi | RWO | 2d | redacted | 1088 |
| claim-redacted-389 | Bound | pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 389 |
| claim-redacted-119 | Bound | pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 119 |
| claim-redacted-64 | Bound | pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 | 10Gi | RWO | 21d | redacted | 64 |
valids['username'].head().value_counts()redacted 1
redacted 1
redacted 1
redacted 1
redacted 1
Name: username, dtype: int64
valids['id'].value_counts().head()400 1
90 1
756 1
488 1
1029 1
Name: id, dtype: int64
len(valids['id'])1105
import sqlite3
conn = sqlite3.connect('jupyterhub.sqlite')
c = conn.cursor()c.execute('PRAGMA TABLE_INFO({})'.format('users'))
c.fetchall()[(0, 'id', 'INTEGER', 1, None, 1),
(1, 'name', 'VARCHAR(1023)', 0, None, 0),
(2, '_server_id', 'INTEGER', 0, None, 0),
(3, 'admin', 'BOOLEAN', 0, None, 0),
(4, 'last_activity', 'DATETIME', 0, None, 0),
(5, 'cookie_id', 'VARCHAR(1023)', 0, None, 0),
(6, 'state', 'TEXT', 0, None, 0),
(7, 'auth_state', 'TEXT', 0, None, 0)]
c.execute('SELECT * FROM users LIMIT 20')
current_users = c.fetchall()
current_users[:2][(1,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'8ec09f8def774b668e838d1e70e00329',
None,
None),
(2,
'redacted',
None,
1,
'2017-02-09 09:07:03.938610',
'c98f59afc1fe4a70b18b451ac1190d45',
None,
None)]
admins = {user[1] for user in current_users if user[3] == 1}
admins{'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted',
'redacted'}
# I just did this manually in the sqlite3 CLI
# c.execute('DROP FROM users where id > -1')
# c.fetchall()--------------------------------------------------------------------------- OperationalError Traceback (most recent call last) <ipython-input-73-a8eb9e4f2c5d> in <module>() ----> 1 c.execute('DROP FROM users where id > -1') 2 c.fetchall() OperationalError: near "FROM": syntax error
import datetime
import itertools
records = list(zip(
valids['id'].astype(int),
valids['username'],
itertools.repeat('NULL'),
[1 if name in admins else 0 for name in valids['username']],
# itertools.repeat('NULL'), # Pick a random valid value
itertools.repeat('2017-02-09 09:07:03.936620'), # Pick a random valid value
itertools.repeat('thisisadummycookiehopefullyitworks'),
itertools.repeat('NULL'),
itertools.repeat('NULL')
))
records[:3][(205,
'redacted',
'NULL',
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
'NULL',
'NULL'),
(1088,
'redacted',
'NULL',
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
'NULL',
'NULL'),
(389,
'redacted',
'NULL',
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
'NULL',
'NULL')]
# I hate everything
def record_to_sql(record):
return "INSERT INTO users VALUES({}, '{}', {}, '{}', '{}', '{}', {}, {})".format(
*record
)
sql_statements = [record_to_sql(record) for record in records]
record_to_sql(records[0])"INSERT INTO users VALUES(205, 'redacted', NULL, '0', '2017-02-09 09:07:03.936620', 'thisisadummycookiehopefullyitworks', NULL, NULL)"
[(0, 'id', 'INTEGER', 1, None, 1),
(1, 'name', 'VARCHAR(1023)', 0, None, 0),
(2, '_server_id', 'INTEGER', 0, None, 0),
(3, 'admin', 'BOOLEAN', 0, None, 0),
(4, 'last_activity', 'DATETIME', 0, None, 0),
(5, 'cookie_id', 'VARCHAR(1023)', 0, None, 0),
(6, 'state', 'TEXT', 0, None, 0),
(7, 'auth_state', 'TEXT', 0, None, 0)]
with conn:
c = conn.cursor()
for statement in sql_statements:
c.execute(statement)c.execute('SELECT * FROM users LIMIT 20')
c.fetchall()[(2,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(3,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(4,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(5,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(6,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(7,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(8,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(9,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(10,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(11,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(12,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(13,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(15,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(17,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(18,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(19,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(20,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(21,
'redacted',
None,
1,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(22,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None),
(24,
'redacted',
None,
0,
'2017-02-09 09:07:03.936620',
'thisisadummycookiehopefullyitworks',
None,
None)]