所以我试图在python中构建一个自然学习处理器,我正在使用我在网上找到的一些代码,然后调整自己的东西。但现在,它只是不想工作。它一直在给我

ValueError: Found array with 0 sample(s) (shape=(0, 262)) while a minimum of 1 is required.

这是我的代码。我道歉,如果它是凌乱的我只是直接从互联网上复制它:

from collections import Counter

import pandas

from nltk.corpus import stopwords

import pandas as pd

import numpy

headlines = []

apps = pd.read_csv('DataUse.csv')

for e in apps['title_lower']:

headlines.append(e)

testdata = pd.read_csv('testdata.csv')

# Find all the unique words in the headlines.

unique_words = list(set(" ".join(headlines).split(" ")))

def make_matrix(headlines, vocab):

matrix = []

for headline in headlines:

# Count each word in the headline, and make a dictionary.

counter = Counter(headline)

# Turn the dictionary into a matrix row using the vocab.

row = [counter.get(w, 0) for w in vocab]

matrix.append(row)

df = pandas.DataFrame(matrix)

df.columns = unique_words

return df

print(make_matrix(headlines, unique_words))

import re

# Lowercase, then replace any non-letter, space, or digit character in the headlines.

new_headlines = [re.sub(r'[^\w\s\d]','',h.lower()) for h in headlines]

# Replace sequences of whitespace with a space character.

new_headlines = [re.sub("\s+", " ", h) for h in new_headlines]

unique_words = list(set(" ".join(new_headlines).split(" ")))

# We've reduced the number of columns in the matrix a bit.

print(make_matrix(new_headlines, unique_words))

stopwords = set(stopwords.words('english'))

stopwords = [re.sub(r'[^\w\s\d]','',s.lower()) for s in stopwords]

unique_words = list(set(" ".join(new_headlines).split(" ")))

# Remove stopwords from the vocabulary.

unique_words = [w for w in unique_words if w not in stopwords]

# We're down to 34 columns, which is way better!

print(make_matrix(new_headlines, unique_words))

##

##

##

##

from sklearn.feature_extraction.text import CountVectorizer

# Construct a bag of words matrix.

# This will lowercase everything, and ignore all punctuation by default.

# It will also remove stop words.

vectorizer = CountVectorizer(lowercase=True, stop_words="english")

matrix = vectorizer.fit_transform(headlines)

# We created our bag of words matrix with far fewer commands.

print(matrix.todense())

# Let's apply the same method to all the headlines in all 100000 submissions.

# We'll also add the url of the submission to the end of the headline so we can take it into account.

full_matrix = vectorizer.fit_transform(apps['title_lower'])

print(full_matrix.shape)

##

##

##

##

##

from sklearn.feature_selection import SelectKBest

from sklearn.feature_selection import chi2

# Convert the upvotes variable to binary so it works with a chi-squared test.

col = apps["total_shares"].copy(deep=True)

col_mean = col.mean()

col[col < col_mean] = 0

col[(col > 0) & (col > col_mean)] = 1

print col

# Find the 1000 most informative columns

selector = SelectKBest(chi2, k='all')

selector.fit(full_matrix, col)

top_words = selector.get_support().nonzero()

# Pick only the most informative columns in the data.

chi_matrix = full_matrix[:,top_words[0]]

##

##

##

##

##

##

import numpy as numpy

transform_functions = [

lambda x: len(x),

lambda x: x.count(" "),

lambda x: x.count("."),

lambda x: x.count("!"),

lambda x: x.count("?"),

lambda x: len(x) / (x.count(" ") + 1),

lambda x: x.count(" ") / (x.count(".") + 1),

lambda x: len(re.findall("\d", x)),

lambda x: len(re.findall("[A-Z]", x)),

]

# Apply each function and put the results into a list.

columns = []

for func in transform_functions:

columns.append(apps["title_lower"].apply(func))

# Convert the meta features to a numpy array.

meta = numpy.asarray(columns).T

##

##

##

##

##

##

##

features = numpy.hstack([chi_matrix.todense()])

from sklearn.linear_model import Ridge

import random

train_rows = 262

# Set a seed to get the same "random" shuffle every time.

random.seed(1)

# Shuffle the indices for the matrix.

indices = list(range(features.shape[0]))

random.shuffle(indices)

# Create train and test sets.

train = features[indices[:train_rows], :]

test = features[indices[train_rows:], :]

print test

train_upvotes = apps['total_shares'].iloc[indices[:train_rows]]

test_upvotes = apps['total_shares'].iloc[indices[train_rows:]]

train = numpy.nan_to_num(train)

print (test)

# Run the regression and generate predictions for the test set.

reg = Ridge(alpha=.1)

reg.fit(train, train_upvotes)

predictions = reg.predict(test)

##

##

##

##

##

### We're going to use mean absolute error as an error metric.

### Our error is about 13.6 upvotes, which means that, on average,

### our prediction is 13.6 upvotes away from the actual number of upvotes.

##print(sum(abs(predictions - test_upvotes)) / len(predictions))

##

### As a baseline, we'll use the average number of upvotes

### across all submissions.

### The error here is 17.2 -- our estimate is better, but not hugely so.

### There either isn't a ton of predictive value encoded in the

### data we have, or we aren't extracting it well.

##average_upvotes = sum(test_upvotes)/len(test_upvotes)

##print(sum(abs(average_upvotes - test_upvotes)) / len(predictions))

##

编辑:这是错误:

Traceback (most recent call last):

File "C:/Users/Tucker Siegel/Desktop/Machines/Test.py", line 156, in

predictions = reg.predict(test)

File "C:\Python27\lib\site-packages\sklearn\linear_model\base.py", line 200, in predict

return self._decision_function(X)

File "C:\Python27\lib\site-packages\sklearn\linear_model\base.py", line 183, in _decision_function

X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])

File "C:\Python27\lib\site-packages\sklearn\utils\validation.py", line 407, in check_array

context))

ValueError: Found array with 0 sample(s) (shape=(0, 262)) while a minimum of 1 is required.

python中shape 0_Python错误:找到包含0个样本(shape =(0,262))的数组,同时至少需要1个...相关推荐

  1. python一直报缩进错误_如何避免Python中的缩进错误

    Python是当今编程界领先和新兴的编程平台之一.凭借其丰富的功能和巨大的灵活性,人们可以在这个平台上实现很多,只要他们知道如何操作它.在Python中的这个缩进错误中,我们将介 Python是当今编 ...

  2. 成功解决Python中导出图片出现错误SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position

    成功解决Python中导出图片出现错误SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position ...

  3. 【Python中的权限错误:无法访问文件】-- 解决方案

    [Python中的权限错误:无法访问文件]-- 解决方案 在Python编程过程中,我们常常会遇到文件读写.创建.删除等操作.有时候当我们试图进行这些操作时,会出现"PermissionEr ...

  4. 关于python语言下列说法错误的是_在Python中下列说法错误的是()

    在Python中下列说法错误的是() 答:Python语言只能用4个空格的缩进来实现程序的强制可读性 比赛中,运动员击出的球压在端线上,则判该运动员失分 答:× 虚证的症状表现是( ) 答:神疲乏力 ...

  5. python中异常和错误是一个概念_Python的异常概念介绍以及处理

    一.什么是异常处理 定义:异常处理就是我们在写Python时,经常看到的报错信息,例如;NameError TypeError ValueError等,这些都是异常. 异常是一个事件,改事件会在程序执 ...

  6. Python 中最强大的错误重试库

    作者 | 费弗里 来源丨Python大数据分析 1 简介 我们在编写程序尤其是与网络请求相关的程序,如调用web接口.运行网络爬虫等任务时,经常会遇到一些偶然发生的请求失败的状况,这种时候如果我们仅仅 ...

  7. Python中datetime类错误

    在使用python的datetime类的时候,如下所示: a=datetime.datetime(2014,08,02) ^ SyntaxError: invalid token 如果转换的是2014 ...

  8. python中表头格式错误导入_python读csv文件时指定行为表头或无表头的方法

    pd.read_csv()方法中header参数,默认为0,标签为0(即第1行)的行为表头.若设置为-1,则无表头.示例如下: (1)不设置header参数(默认)时: df1 = pd.read_c ...

  9. Python中文本文件的读取(包含指针移动)

    一.普通文件的读写 打开文件的步骤:打开 --> 操作 --> 关闭 注意:打开后关闭的原因是因为这样不会浪费文字描述符 文件读写的方式: r:(默认) -只能读,不能写 -读取的文件不存 ...

  10. python16进制表示0xad_在 Python 中 0xad 是合法的十六进制数字表示形式。 (2.0分)_学小易找答案...

    [填空题]表达式 isinstance('4', (int, float, complex)) 的值为 _____________ .( ) (2.0分) [判断题]Python 3.x 完全兼容 P ...

最新文章

  1. 技术不是大数据第一生产力,数据交易才能带来应用爆发
  2. leetcode算法题--一周中的第几天
  3. 杭电c语言课程设计实验7,杭电1072 BFS 大神给看看啊 郁闷整整10个小时了 不知道哪里错wa...
  4. cstring判断是否包含子串_最长子串-滑动窗口
  5. atheros蓝牙设备驱动 小米_小米Air 13笔记本黑苹果WiFi蓝牙硬件改装方案二
  6. 连接查询 左连接 右连接 内连接 1112 sqlserver
  7. 关于kafka的几个问题
  8. CONSUL install 和启动
  9. 如何在计算机管理路由器,怎么查看路由器的管理IP地址?
  10. Sublime中使用livereload插件实时预览html文件
  11. 股票、债券、基金、期权、期货等的异同
  12. 某android广告SDK逆向分析总结
  13. Android 底部导航栏-极致简单版
  14. MySQL中支持的字符集和排序规则
  15. 手机便签内容如何保存到电脑
  16. 2-3 编写一个三角形类
  17. Google代码实验室
  18. Ubuntu18.04 + kinova joca2机械臂 + RealSense D435i深度相机进行eye to hand手眼标定
  19. 5g4g network术语
  20. 【荷兰男孩】githubshare|python练手超百项目

热门文章

  1. 评价微型计算机有哪些主要性能指标,计算机性能指标有哪些
  2. C语言问题,if条件里面按位取反
  3. 英语拼音怎么在计算机上拼出来的,英语拼音怎么写
  4. 【通俗理解】股票、基金、证券、债券、信托、期货、国债、外汇
  5. 洛谷P3709 大爷的字符串题 莫队
  6. 日本人布置工作至少说5遍
  7. 数据结构与算法--回溯的理解以及实现
  8. 修改(移交)微信支付商户平台超级管理员账号流程
  9. 怎样使用Excel填充柄的作用与功能
  10. poj-1260 Pearls