Pandas秘籍【第七章】
原文:Chapter 7
# 通常的开头
%matplotlib inlineimport pandas as pd
import matplotlib.pyplot as plt
import numpy as np# 使图表更大更漂亮
pd.set_option('display.mpl_style', 'default')
plt.rcParams['figure.figsize'] = (15, 5)
plt.rcParams['font.family'] = 'sans-serif'# 在 Pandas 0.12 中需要展示大量的列
# 在 Pandas 0.13 中不需要
pd.set_option('display.width', 5000)
pd.set_option('display.max_columns', 60)
杂乱数据的主要问题之一是:你怎么知道它是否杂乱呢?
我们将在这里使用 NYC 311 服务请求数据集,因为它很大,有点不方便。
requests = pd.read_csv('../data/311-service-requests.csv')
7.1 我怎么知道它是否杂乱?
我们在这里查看几列。 我知道邮政编码有一些问题,所以让我们先看看它。
要了解列是否有问题,我通常使用.unique()
来查看所有的值。 如果它是一列数字,我将绘制一个直方图来获得分布的感觉。
当我们看看Incident Zip
中的唯一值时,很快就会清楚这是一个混乱。
一些问题:
- 一些已经解析为字符串,一些是浮点
- 存在
nan
- 部分邮政编码为
29616-0759
或83
- 有一些 Pandas 无法识别的 N/A 值 ,如
'N/A'
和'NO CLUE'
我们可以做的事情:
- 将
N/A
和NO CLUE
规格化为nan
值 - 看看 83 处发生了什么,并决定做什么
- 将一切转化为字符串
requests['Incident Zip'].unique()
array([11432.0, 11378.0, 10032.0, 10023.0, 10027.0, 11372.0, 11419.0,11417.0, 10011.0, 11225.0, 11218.0, 10003.0, 10029.0, 10466.0,11219.0, 10025.0, 10310.0, 11236.0, nan, 10033.0, 11216.0, 10016.0,10305.0, 10312.0, 10026.0, 10309.0, 10036.0, 11433.0, 11235.0,11213.0, 11379.0, 11101.0, 10014.0, 11231.0, 11234.0, 10457.0,10459.0, 10465.0, 11207.0, 10002.0, 10034.0, 11233.0, 10453.0,10456.0, 10469.0, 11374.0, 11221.0, 11421.0, 11215.0, 10007.0,10019.0, 11205.0, 11418.0, 11369.0, 11249.0, 10005.0, 10009.0,11211.0, 11412.0, 10458.0, 11229.0, 10065.0, 10030.0, 11222.0,10024.0, 10013.0, 11420.0, 11365.0, 10012.0, 11214.0, 11212.0,10022.0, 11232.0, 11040.0, 11226.0, 10281.0, 11102.0, 11208.0,10001.0, 10472.0, 11414.0, 11223.0, 10040.0, 11220.0, 11373.0,11203.0, 11691.0, 11356.0, 10017.0, 10452.0, 10280.0, 11217.0,10031.0, 11201.0, 11358.0, 10128.0, 11423.0, 10039.0, 10010.0,11209.0, 10021.0, 10037.0, 11413.0, 11375.0, 11238.0, 10473.0,11103.0, 11354.0, 11361.0, 11106.0, 11385.0, 10463.0, 10467.0,11204.0, 11237.0, 11377.0, 11364.0, 11434.0, 11435.0, 11210.0,11228.0, 11368.0, 11694.0, 10464.0, 11415.0, 10314.0, 10301.0,10018.0, 10038.0, 11105.0, 11230.0, 10468.0, 11104.0, 10471.0,11416.0, 10075.0, 11422.0, 11355.0, 10028.0, 10462.0, 10306.0,10461.0, 11224.0, 11429.0, 10035.0, 11366.0, 11362.0, 11206.0,10460.0, 10304.0, 11360.0, 11411.0, 10455.0, 10475.0, 10069.0,10303.0, 10308.0, 10302.0, 11357.0, 10470.0, 11367.0, 11370.0,10454.0, 10451.0, 11436.0, 11426.0, 10153.0, 11004.0, 11428.0,11427.0, 11001.0, 11363.0, 10004.0, 10474.0, 11430.0, 10000.0,10307.0, 11239.0, 10119.0, 10006.0, 10048.0, 11697.0, 11692.0,11693.0, 10573.0, 83.0, 11559.0, 10020.0, 77056.0, 11776.0, 70711.0,10282.0, 11109.0, 10044.0, '10452', '11233', '10468', '10310','11105', '10462', '10029', '10301', '10457', '10467', '10469','11225', '10035', '10031', '11226', '10454', '11221', '10025','11229', '11235', '11422', '10472', '11208', '11102', '10032','11216', '10473', '10463', '11213', '10040', '10302', '11231','10470', '11204', '11104', '11212', '10466', '11416', '11214','10009', '11692', '11385', '11423', '11201', '10024', '11435','10312', '10030', '11106', '10033', '10303', '11215', '11222','11354', '10016', '10034', '11420', '10304', '10019', '11237','11249', '11230', '11372', '11207', '11378', '11419', '11361','10011', '11357', '10012', '11358', '10003', '10002', '11374','10007', '11234', '10065', '11369', '11434', '11205', '11206','11415', '11236', '11218', '11413', '10458', '11101', '10306','11355', '10023', '11368', '10314', '11421', '10010', '10018','11223', '10455', '11377', '11433', '11375', '10037', '11209','10459', '10128', '10014', '10282', '11373', '10451', '11238','11211', '10038', '11694', '11203', '11691', '11232', '10305','10021', '11228', '10036', '10001', '10017', '11217', '11219','10308', '10465', '11379', '11414', '10460', '11417', '11220','11366', '10027', '11370', '10309', '11412', '11356', '10456','11432', '10022', '10013', '11367', '11040', '10026', '10475','11210', '11364', '11426', '10471', '10119', '11224', '11418','11429', '11365', '10461', '11239', '10039', '00083', '11411','10075', '11004', '11360', '10453', '10028', '11430', '10307','11103', '10004', '10069', '10005', '10474', '11428', '11436','10020', '11001', '11362', '11693', '10464', '11427', '10044','11363', '10006', '10000', '02061', '77092-2016', '10280', '11109','14225', '55164-0737', '19711', '07306', '000000', 'NO CLUE','90010', '10281', '11747', '23541', '11776', '11697', '11788','07604', 10112.0, 11788.0, 11563.0, 11580.0, 7087.0, 11042.0,7093.0, 11501.0, 92123.0, 0.0, 11575.0, 7109.0, 11797.0, '10803','11716', '11722', '11549-3650', '10162', '92123', '23502', '11518','07020', '08807', '11577', '07114', '11003', '07201', '11563','61702', '10103', '29616-0759', '35209-3114', '11520', '11735','10129', '11005', '41042', '11590', 6901.0, 7208.0, 11530.0,13221.0, 10954.0, 11735.0, 10103.0, 7114.0, 11111.0, 10107.0], dtype=object)
7.3 修复nan
值和字符串/浮点混淆
我们可以将na_values
选项传递到pd.read_csv
来清理它们。 我们还可以指定Incident Zip
的类型是字符串,而不是浮点。
na_values = ['NO CLUE', 'N/A', '0']
requests = pd.read_csv('../data/311-service-requests.csv', na_values=na_values, dtype={'Incident Zip': str})
requests['Incident Zip'].unique()
array(['11432', '11378', '10032', '10023', '10027', '11372', '11419','11417', '10011', '11225', '11218', '10003', '10029', '10466','11219', '10025', '10310', '11236', nan, '10033', '11216', '10016','10305', '10312', '10026', '10309', '10036', '11433', '11235','11213', '11379', '11101', '10014', '11231', '11234', '10457','10459', '10465', '11207', '10002', '10034', '11233', '10453','10456', '10469', '11374', '11221', '11421', '11215', '10007','10019', '11205', '11418', '11369', '11249', '10005', '10009','11211', '11412', '10458', '11229', '10065', '10030', '11222','10024', '10013', '11420', '11365', '10012', '11214', '11212','10022', '11232', '11040', '11226', '10281', '11102', '11208','10001', '10472', '11414', '11223', '10040', '11220', '11373','11203', '11691', '11356', '10017', '10452', '10280', '11217','10031', '11201', '11358', '10128', '11423', '10039', '10010','11209', '10021', '10037', '11413', '11375', '11238', '10473','11103', '11354', '11361', '11106', '11385', '10463', '10467','11204', '11237', '11377', '11364', '11434', '11435', '11210','11228', '11368', '11694', '10464', '11415', '10314', '10301','10018', '10038', '11105', '11230', '10468', '11104', '10471','11416', '10075', '11422', '11355', '10028', '10462', '10306','10461', '11224', '11429', '10035', '11366', '11362', '11206','10460', '10304', '11360', '11411', '10455', '10475', '10069','10303', '10308', '10302', '11357', '10470', '11367', '11370','10454', '10451', '11436', '11426', '10153', '11004', '11428','11427', '11001', '11363', '10004', '10474', '11430', '10000','10307', '11239', '10119', '10006', '10048', '11697', '11692','11693', '10573', '00083', '11559', '10020', '77056', '11776','70711', '10282', '11109', '10044', '02061', '77092-2016', '14225','55164-0737', '19711', '07306', '000000', '90010', '11747', '23541','11788', '07604', '10112', '11563', '11580', '07087', '11042','07093', '11501', '92123', '00000', '11575', '07109', '11797','10803', '11716', '11722', '11549-3650', '10162', '23502', '11518','07020', '08807', '11577', '07114', '11003', '07201', '61702','10103', '29616-0759', '35209-3114', '11520', '11735', '10129','11005', '41042', '11590', '06901', '07208', '11530', '13221','10954', '11111', '10107'], dtype=object)
7.4 短横线处发生了什么
rows_with_dashes = requests['Incident Zip'].str.contains('-').fillna(False)
len(requests[rows_with_dashes])
5
requests[rows_with_dashes]
Unique Key | Created Date | Closed Date | Agency | Agency Name | Complaint Type | Descriptor | Location Type | Incident Zip | Incident Address | Street Name | Cross Street 1 | Cross Street 2 | Intersection Street 1 | Intersection Street 2 | Address Type | City | Landmark | Facility Type | Status | Due Date | Resolution Action Updated Date | Community Board | Borough | X Coordinate (State Plane) | Y Coordinate (State Plane) | Park Facility Name | Park Borough | School Name | School Number | School Region | School Code | School Phone Number | School Address | School City | School State | School Zip | School Not Found | School or Citywide Complaint | Vehicle Type | Taxi Company Borough | Taxi Pick Up Location | Bridge Highway Name | Bridge Highway Direction | Road Ramp | Bridge Highway Segment | Garage Lot Name | Ferry Direction | Ferry Terminal Name | Latitude | Longitude | Location |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
29136 | 26550551 | 10/24/2013 06:16:34 PM | NaN | DCA | Department of Consumer Affairs | Consumer Complaint | False Advertising | NaN | 77092-2016 | 2700 EAST SELTICE WAY | EAST SELTICE WAY | NaN | NaN | NaN | NaN | NaN | HOUSTON | NaN | NaN | Assigned | 11/13/2013 11:15:20 AM | 10/29/2013 11:16:16 AM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
30939 | 26548831 | 10/24/2013 09:35:10 AM | NaN | DCA | Department of Consumer Affairs | Consumer Complaint | Harassment | NaN | 55164-0737 | P.O. BOX 64437 | 64437 | NaN | NaN | NaN | NaN | NaN | ST. PAUL | NaN | NaN | Assigned | 11/13/2013 02:30:21 PM | 10/29/2013 02:31:06 PM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
70539 | 26488417 | 10/15/2013 03:40:33 PM | NaN | TLC | Taxi and Limousine Commission | Taxi Complaint | Driver Complaint | Street | 11549-3650 | 365 HOFSTRA UNIVERSITY | HOFSTRA UNIVERSITY | NaN | NaN | NaN | NaN | NaN | HEMSTEAD | NaN | NaN | Assigned | 11/30/2013 01:20:33 PM | 10/16/2013 01:21:39 PM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | La Guardia Airport | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
85821 | 26468296 | 10/10/2013 12:36:43 PM | 10/26/2013 01:07:07 AM | DCA | Department of Consumer Affairs | Consumer Complaint | Debt Not Owed | NaN | 29616-0759 | PO BOX 25759 | BOX 25759 | NaN | NaN | NaN | NaN | NaN | GREENVILLE | NaN | NaN | Closed | 10/26/2013 09:20:28 AM | 10/26/2013 01:07:07 AM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
89304 | 26461137 | 10/09/2013 05:23:46 PM | 10/25/2013 01:06:41 AM | DCA | Department of Consumer Affairs | Consumer Complaint | Harassment | NaN | 35209-3114 | 600 BEACON PKWY | BEACON PKWY | NaN | NaN | NaN | NaN | NaN | BIRMINGHAM | NaN | NaN | Closed | 10/25/2013 02:43:42 PM | 10/25/2013 01:06:41 AM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
我认为这些都是缺失的数据,像这样删除它们:
requests['Incident Zip'][rows_with_dashes] = np.nan
但是我的朋友 Dave 指出,9 位邮政编码是正常的。 让我们看看所有超过 5 位数的邮政编码,确保它们没问题,然后截断它们。
long_zip_codes = requests['Incident Zip'].str.len() > 5
requests['Incident Zip'][long_zip_codes].unique()
array(['77092-2016', '55164-0737', '000000', '11549-3650', '29616-0759','35209-3114'], dtype=object)
这些看起来可以截断:
requests['Incident Zip'] = requests['Incident Zip'].str.slice(0, 5)
就可以了。
早些时候我认为 00083 是一个损坏的邮政编码,但事实证明中央公园的邮政编码是 00083! 显示我知道的吧。 我仍然关心 00000 邮政编码,但是:让我们看看。
requests[requests['Incident Zip'] == '00000']
Unique Key | Created Date | Closed Date | Agency | Agency Name | Complaint Type | Descriptor | Location Type | Incident Zip | Incident Address | Street Name | Cross Street 1 | Cross Street 2 | Intersection Street 1 | Intersection Street 2 | Address Type | City | Landmark | Facility Type | Status | Due Date | Resolution Action Updated Date | Community Board | Borough | X Coordinate (State Plane) | Y Coordinate (State Plane) | Park Facility Name | Park Borough | School Name | School Number | School Region | School Code | School Phone Number | School Address | School City | School State | School Zip | School Not Found | School or Citywide Complaint | Vehicle Type | Taxi Company Borough | Taxi Pick Up Location | Bridge Highway Name | Bridge Highway Direction | Road Ramp | Bridge Highway Segment | Garage Lot Name | Ferry Direction | Ferry Terminal Name | Latitude | Longitude | Location |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
42600 | 26529313 | 10/22/2013 02:51:06 PM | NaN | TLC | Taxi and Limousine Commission | Taxi Complaint | Driver Complaint | NaN | 00000 | EWR EWR | EWR | NaN | NaN | NaN | NaN | NaN | NEWARK | NaN | NaN | Assigned | 12/07/2013 09:53:51 AM | 10/23/2013 09:54:43 AM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | Other | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
60843 | 26507389 | 10/17/2013 05:48:44 PM | NaN | TLC | Taxi and Limousine Commission | Taxi Complaint | Driver Complaint | Street | 00000 | 1 NEWARK AIRPORT | NEWARK AIRPORT | NaN | NaN | NaN | NaN | NaN | NEWARK | NaN | NaN | Assigned | 12/02/2013 11:59:46 AM | 10/18/2013 12:01:08 PM | 0 Unspecified | Unspecified | NaN | NaN | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | Unspecified | N | NaN | NaN | NaN | Other | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
这看起来对我来说很糟糕,让我将它们设为NaN
。
zero_zips = requests['Incident Zip'] == '00000'
requests.loc[zero_zips, 'Incident Zip'] = np.nan
太棒了,让我们看看现在在哪里。
unique_zips = requests['Incident Zip'].unique()
unique_zips.sort()
unique_zips
array([nan, '00083', '02061', '06901', '07020', '07087', '07093', '07109','07114', '07201', '07208', '07306', '07604', '08807', '10000','10001', '10002', '10003', '10004', '10005', '10006', '10007','10009', '10010', '10011', '10012', '10013', '10014', '10016','10017', '10018', '10019', '10020', '10021', '10022', '10023','10024', '10025', '10026', '10027', '10028', '10029', '10030','10031', '10032', '10033', '10034', '10035', '10036', '10037','10038', '10039', '10040', '10044', '10048', '10065', '10069','10075', '10103', '10107', '10112', '10119', '10128', '10129','10153', '10162', '10280', '10281', '10282', '10301', '10302','10303', '10304', '10305', '10306', '10307', '10308', '10309','10310', '10312', '10314', '10451', '10452', '10453', '10454','10455', '10456', '10457', '10458', '10459', '10460', '10461','10462', '10463', '10464', '10465', '10466', '10467', '10468','10469', '10470', '10471', '10472', '10473', '10474', '10475','10573', '10803', '10954', '11001', '11003', '11004', '11005','11040', '11042', '11101', '11102', '11103', '11104', '11105','11106', '11109', '11111', '11201', '11203', '11204', '11205','11206', '11207', '11208', '11209', '11210', '11211', '11212','11213', '11214', '11215', '11216', '11217', '11218', '11219','11220', '11221', '11222', '11223', '11224', '11225', '11226','11228', '11229', '11230', '11231', '11232', '11233', '11234','11235', '11236', '11237', '11238', '11239', '11249', '11354','11355', '11356', '11357', '11358', '11360', '11361', '11362','11363', '11364', '11365', '11366', '11367', '11368', '11369','11370', '11372', '11373', '11374', '11375', '11377', '11378','11379', '11385', '11411', '11412', '11413', '11414', '11415','11416', '11417', '11418', '11419', '11420', '11421', '11422','11423', '11426', '11427', '11428', '11429', '11430', '11432','11433', '11434', '11435', '11436', '11501', '11518', '11520','11530', '11549', '11559', '11563', '11575', '11577', '11580','11590', '11691', '11692', '11693', '11694', '11697', '11716','11722', '11735', '11747', '11776', '11788', '11797', '13221','14225', '19711', '23502', '23541', '29616', '35209', '41042','55164', '61702', '70711', '77056', '77092', '90010', '92123'], dtype=object)
太棒了! 这更加干净。 虽然这里有一些奇怪的东西 - 我在谷歌地图上查找 77056,这是在德克萨斯州。
让我们仔细看看:
zips = requests['Incident Zip']
# Let's say the zips starting with '0' and '1' are okay, for now. (this isn't actually true -- 13221 is in Syracuse, and why?)
is_close = zips.str.startswith('0') | zips.str.startswith('1')
# There are a bunch of NaNs, but we're not interested in them right now, so we'll say they're False
is_far = ~(is_close) & zips.notnull()
zips[is_far]
12102 77056
13450 70711
29136 77092
30939 55164
44008 90010
47048 23541
57636 92123
71001 92123
71834 23502
80573 61702
85821 29616
89304 35209
94201 41042
Name: Incident Zip, dtype: object
requests[is_far][['Incident Zip', 'Descriptor', 'City']].sort('Incident Zip')
Incident Zip | Descriptor | City |
---|---|---|
71834 | 23502 | Harassment |
47048 | 23541 | Harassment |
85821 | 29616 | Debt Not Owed |
89304 | 35209 | Harassment |
94201 | 41042 | Harassment |
30939 | 55164 | Harassment |
80573 | 61702 | Billing Dispute |
13450 | 70711 | Contract Dispute |
12102 | 77056 | Debt Not Owed |
29136 | 77092 | False Advertising |
44008 | 90010 | Billing Dispute |
57636 | 92123 | Harassment |
71001 | 92123 | Billing Dispute |
好吧,真的有来自 LA 和休斯敦的请求! 很高兴知道它们。 按邮政编码过滤可能是处理它的一个糟糕的方式 - 我们真的应该看着城市。
requests['City'].str.upper().value_counts()
BROOKLYN 31662
NEW YORK 22664
BRONX 18438
STATEN ISLAND 4766
JAMAICA 2246
FLUSHING 1803
ASTORIA 1568
RIDGEWOOD 1073
CORONA 707
OZONE PARK 693
LONG ISLAND CITY 678
FAR ROCKAWAY 652
ELMHURST 647
WOODSIDE 609
EAST ELMHURST 562
...
MELVILLE 1
PORT JEFFERSON STATION 1
NORWELL 1
EAST ROCKAWAY 1
BIRMINGHAM 1
ROSLYN 1
LOS ANGELES 1
MINEOLA 1
JERSEY CITY 1
ST. PAUL 1
CLIFTON 1
COL.ANVURES 1
EDGEWATER 1
ROSELYN 1
CENTRAL ISLIP 1
Length: 100, dtype: int64
看起来这些是合法的投诉,所以我们只是把它们放在一边。
7.5 把它们放到一起
这里是我们最后所做的事情,用于清理我们的邮政编码,都在一起:
na_values = ['NO CLUE', 'N/A', '0']
requests = pd.read_csv('../data/311-service-requests.csv', na_values=na_values, dtype={'Incident Zip': str})
def fix_zip_codes(zips):# Truncate everything to length 5 zips = zips.str.slice(0, 5)# Set 00000 zip codes to nanzero_zips = zips == '00000'zips[zero_zips] = np.nanreturn zips
requests['Incident Zip'] = fix_zip_codes(requests['Incident Zip'])
requests['Incident Zip'].unique()
array(['11432', '11378', '10032', '10023', '10027', '11372', '11419','11417', '10011', '11225', '11218', '10003', '10029', '10466','11219', '10025', '10310', '11236', nan, '10033', '11216', '10016','10305', '10312', '10026', '10309', '10036', '11433', '11235','11213', '11379', '11101', '10014', '11231', '11234', '10457','10459', '10465', '11207', '10002', '10034', '11233', '10453','10456', '10469', '11374', '11221', '11421', '11215', '10007','10019', '11205', '11418', '11369', '11249', '10005', '10009','11211', '11412', '10458', '11229', '10065', '10030', '11222','10024', '10013', '11420', '11365', '10012', '11214', '11212','10022', '11232', '11040', '11226', '10281', '11102', '11208','10001', '10472', '11414', '11223', '10040', '11220', '11373','11203', '11691', '11356', '10017', '10452', '10280', '11217','10031', '11201', '11358', '10128', '11423', '10039', '10010','11209', '10021', '10037', '11413', '11375', '11238', '10473','11103', '11354', '11361', '11106', '11385', '10463', '10467','11204', '11237', '11377', '11364', '11434', '11435', '11210','11228', '11368', '11694', '10464', '11415', '10314', '10301','10018', '10038', '11105', '11230', '10468', '11104', '10471','11416', '10075', '11422', '11355', '10028', '10462', '10306','10461', '11224', '11429', '10035', '11366', '11362', '11206','10460', '10304', '11360', '11411', '10455', '10475', '10069','10303', '10308', '10302', '11357', '10470', '11367', '11370','10454', '10451', '11436', '11426', '10153', '11004', '11428','11427', '11001', '11363', '10004', '10474', '11430', '10000','10307', '11239', '10119', '10006', '10048', '11697', '11692','11693', '10573', '00083', '11559', '10020', '77056', '11776','70711', '10282', '11109', '10044', '02061', '77092', '14225','55164', '19711', '07306', '90010', '11747', '23541', '11788','07604', '10112', '11563', '11580', '07087', '11042', '07093','11501', '92123', '11575', '07109', '11797', '10803', '11716','11722', '11549', '10162', '23502', '11518', '07020', '08807','11577', '07114', '11003', '07201', '61702', '10103', '29616','35209', '11520', '11735', '10129', '11005', '41042', '11590','06901', '07208', '11530', '13221', '10954', '11111', '10107'], dtype=object)
Pandas秘籍【第七章】相关推荐
- Kali Linux 网络扫描秘籍 第七章 Web 应用扫描(三)
第七章 Web 应用扫描(三) 作者:Justin Hutchens 译者:飞龙 协议:CC BY-NC-SA 4.0 7.13 使用 BurpSuite Sequencer(序列器) Web 应用会 ...
- Kali Linux 网络扫描秘籍 第七章 Web 应用扫描(二)
第七章 Web 应用扫描(二) 作者:Justin Hutchens 译者:飞龙 协议:CC BY-NC-SA 4.0 7.7 使用 BurpSuite Web 代理 虽然它有许多可用工具,BurpS ...
- Kali Linux 网络扫描秘籍 第七章 Web 应用扫描(一)
第七章 Web 应用扫描(一) 作者:Justin Hutchens 译者:飞龙 协议:CC BY-NC-SA 4.0 7.1 使用 Nikto 扫描 Web 应用 Nikto 是 Kali 中的命令 ...
- Kali Linux 秘籍 第七章 权限提升
第七章 权限提升 作者:Willie L. Pritchett, David De Smet 译者:飞龙 协议:CC BY-NC-SA 4.0 简介 我们已经获得了想要攻击的计算机的权限.于是将权限尽 ...
- Pandas 秘籍:1~5
原文:Pandas Cookbook 协议:CC BY-NC-SA 4.0 译者:飞龙 一.Pandas 基础 在本章中,我们将介绍以下内容: 剖析数据帧的结构 访问主要的数据帧组件 了解数据类型 选 ...
- Pandas 秘籍:6~11
原文:Pandas Cookbook 协议:CC BY-NC-SA 4.0 译者:飞龙 六.索引对齐 在本章中,我们将介绍以下主题: 检查索引对象 生成笛卡尔积 索引爆炸 用不相等的索引填充值 追加来 ...
- 第七章 线性回归预测模型
线性回归模型属于经典的统计学模型,该模型的应用场景是根据已知的变量(自变量)来预测某个连续的数值变量(因变量). 一元线性回归模型:y=a+bx+ε import pandas as pd impor ...
- Pandas 秘籍·翻译完成
协议:CC BY-NC-SA 4.0 欢迎任何人参与和完善:一个人可以走的很快,但是一群人却可以走的更远. 在线阅读 ApacheCN 面试求职交流群 724187166 ApacheCN 学习资源 ...
- 第七章 绘制3D图表和统计地图
第七章 绘制3D图表和统计地图 这节课主要学习了如何使用mplot3d工具包绘制3D图表,然后介绍了使用anima tion模块制作动画,最后学习了使用basemap工具包绘制统计地图等. 接下来是课 ...
- JAVA修炼秘籍第六章《鏖战》
往期: JAVA 修炼秘籍第一章:<痛苦的折磨> JAVA 修炼秘籍第二章:<逐渐魔化> JAVA 修炼秘籍第三章:<绝地反击> JAVA 修炼秘籍第四章:< ...
最新文章
- java发送串口命令_如何發送命令到串口(JAVA + RXTX)
- java打开文件出错_打开文件错误太多,java.io.FileNotFoundException
- matlab输入集合,matlab关于集合的操作大全
- So Easy! 让开发人员更轻松的工具和资源
- XP操作系统最优视觉效果
- kamailio,asteisk,realtime 实现负载均衡(集群)完整配置笔记
- [转]深入理解linux内核list_head
- 如何使用ASP建立虚拟的FTP服务器
- android studio dns问题_Android网络优化篇-从DNS开始
- Ubuntu16.04安装Mono4.2.1 C#开发环境
- swift 高级进阶1:类与结构体(上)
- jira权限设置-各个项目组查看不同项目
- 社会工程学——基础认知(补充)
- 如何在Ubuntu18.4中设置ERPNEXT开源ERP生产环境开机运行
- linux apr下载地址,apache apr各版本国内高速下载地址及入门教程
- JUC并发编程学习笔记
- js html 测反应速度游戏,利用JS测试目标网站的打开响应速度
- 软件测试之——性能测试(超级详细)这些你都知道吗?
- C专家编程 --- 书中案例汇编与分析(持续更新中)
- STM32 - Printf重定向使用微库、不使用微库(Keil) (转载)